{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 5813, "global_step": 11625, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.718213058419244e-07, "loss": 0.0635, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.436426116838488e-07, "loss": 0.0656, "step": 2 }, { "epoch": 0.0, "learning_rate": 5.154639175257732e-07, "loss": 0.0849, "step": 3 }, { "epoch": 0.0, "learning_rate": 6.872852233676976e-07, "loss": 0.0733, "step": 4 }, { "epoch": 0.0, "learning_rate": 8.591065292096222e-07, "loss": 0.0551, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.0309278350515464e-06, "loss": 0.0653, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.202749140893471e-06, "loss": 0.0729, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.3745704467353952e-06, "loss": 0.0607, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.5463917525773197e-06, "loss": 0.0751, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.7182130584192443e-06, "loss": 0.0645, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.8900343642611683e-06, "loss": 0.0638, "step": 11 }, { "epoch": 0.0, "learning_rate": 2.061855670103093e-06, "loss": 0.0755, "step": 12 }, { "epoch": 0.0, "learning_rate": 2.233676975945017e-06, "loss": 0.0661, "step": 13 }, { "epoch": 0.0, "learning_rate": 2.405498281786942e-06, "loss": 0.0866, "step": 14 }, { "epoch": 0.0, "learning_rate": 2.577319587628866e-06, "loss": 0.0791, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.7491408934707903e-06, "loss": 0.0597, "step": 16 }, { "epoch": 0.0, "learning_rate": 2.920962199312715e-06, "loss": 0.0671, "step": 17 }, { "epoch": 0.0, "learning_rate": 3.0927835051546395e-06, "loss": 0.0562, "step": 18 }, { "epoch": 0.0, "learning_rate": 3.264604810996564e-06, "loss": 0.083, "step": 19 }, { "epoch": 0.01, "learning_rate": 3.4364261168384886e-06, "loss": 0.061, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.608247422680412e-06, "loss": 0.0591, "step": 21 }, { "epoch": 0.01, "learning_rate": 3.7800687285223365e-06, "loss": 0.0611, "step": 22 }, { "epoch": 0.01, "learning_rate": 3.951890034364261e-06, "loss": 0.0623, "step": 23 }, { "epoch": 0.01, "learning_rate": 4.123711340206186e-06, "loss": 0.0746, "step": 24 }, { "epoch": 0.01, "learning_rate": 4.2955326460481105e-06, "loss": 0.0665, "step": 25 }, { "epoch": 0.01, "learning_rate": 4.467353951890034e-06, "loss": 0.0556, "step": 26 }, { "epoch": 0.01, "learning_rate": 4.639175257731959e-06, "loss": 0.0642, "step": 27 }, { "epoch": 0.01, "learning_rate": 4.810996563573884e-06, "loss": 0.0546, "step": 28 }, { "epoch": 0.01, "learning_rate": 4.982817869415808e-06, "loss": 0.0683, "step": 29 }, { "epoch": 0.01, "learning_rate": 5.154639175257732e-06, "loss": 0.0511, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.326460481099657e-06, "loss": 0.0795, "step": 31 }, { "epoch": 0.01, "learning_rate": 5.498281786941581e-06, "loss": 0.0668, "step": 32 }, { "epoch": 0.01, "learning_rate": 5.670103092783505e-06, "loss": 0.0426, "step": 33 }, { "epoch": 0.01, "learning_rate": 5.84192439862543e-06, "loss": 0.0648, "step": 34 }, { "epoch": 0.01, "learning_rate": 6.013745704467354e-06, "loss": 0.0576, "step": 35 }, { "epoch": 0.01, "learning_rate": 6.185567010309279e-06, "loss": 0.0532, "step": 36 }, { "epoch": 0.01, "learning_rate": 6.357388316151203e-06, "loss": 0.064, "step": 37 }, { "epoch": 0.01, "learning_rate": 6.529209621993128e-06, "loss": 0.0695, "step": 38 }, { "epoch": 0.01, "learning_rate": 6.701030927835052e-06, "loss": 0.0576, "step": 39 }, { "epoch": 0.01, "learning_rate": 6.872852233676977e-06, "loss": 0.0607, "step": 40 }, { "epoch": 0.01, "learning_rate": 7.0446735395189e-06, "loss": 0.044, "step": 41 }, { "epoch": 0.01, "learning_rate": 7.216494845360824e-06, "loss": 0.0719, "step": 42 }, { "epoch": 0.01, "learning_rate": 7.38831615120275e-06, "loss": 0.0656, "step": 43 }, { "epoch": 0.01, "learning_rate": 7.560137457044673e-06, "loss": 0.0732, "step": 44 }, { "epoch": 0.01, "learning_rate": 7.731958762886599e-06, "loss": 0.0666, "step": 45 }, { "epoch": 0.01, "learning_rate": 7.903780068728522e-06, "loss": 0.0475, "step": 46 }, { "epoch": 0.01, "learning_rate": 8.075601374570448e-06, "loss": 0.0494, "step": 47 }, { "epoch": 0.01, "learning_rate": 8.247422680412371e-06, "loss": 0.0593, "step": 48 }, { "epoch": 0.01, "learning_rate": 8.419243986254296e-06, "loss": 0.0453, "step": 49 }, { "epoch": 0.01, "learning_rate": 8.591065292096221e-06, "loss": 0.0651, "step": 50 }, { "epoch": 0.01, "learning_rate": 8.762886597938144e-06, "loss": 0.0623, "step": 51 }, { "epoch": 0.01, "learning_rate": 8.934707903780069e-06, "loss": 0.0478, "step": 52 }, { "epoch": 0.01, "learning_rate": 9.106529209621994e-06, "loss": 0.087, "step": 53 }, { "epoch": 0.01, "learning_rate": 9.278350515463918e-06, "loss": 0.077, "step": 54 }, { "epoch": 0.01, "learning_rate": 9.450171821305841e-06, "loss": 0.0468, "step": 55 }, { "epoch": 0.01, "learning_rate": 9.621993127147768e-06, "loss": 0.0628, "step": 56 }, { "epoch": 0.01, "learning_rate": 9.793814432989691e-06, "loss": 0.0696, "step": 57 }, { "epoch": 0.01, "learning_rate": 9.965635738831616e-06, "loss": 0.0582, "step": 58 }, { "epoch": 0.02, "learning_rate": 1.013745704467354e-05, "loss": 0.051, "step": 59 }, { "epoch": 0.02, "learning_rate": 1.0309278350515464e-05, "loss": 0.0579, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.0481099656357389e-05, "loss": 0.0497, "step": 61 }, { "epoch": 0.02, "learning_rate": 1.0652920962199313e-05, "loss": 0.054, "step": 62 }, { "epoch": 0.02, "learning_rate": 1.0824742268041238e-05, "loss": 0.0656, "step": 63 }, { "epoch": 0.02, "learning_rate": 1.0996563573883161e-05, "loss": 0.0558, "step": 64 }, { "epoch": 0.02, "learning_rate": 1.1168384879725088e-05, "loss": 0.0412, "step": 65 }, { "epoch": 0.02, "learning_rate": 1.134020618556701e-05, "loss": 0.0617, "step": 66 }, { "epoch": 0.02, "learning_rate": 1.1512027491408934e-05, "loss": 0.0697, "step": 67 }, { "epoch": 0.02, "learning_rate": 1.168384879725086e-05, "loss": 0.0671, "step": 68 }, { "epoch": 0.02, "learning_rate": 1.1855670103092783e-05, "loss": 0.0555, "step": 69 }, { "epoch": 0.02, "learning_rate": 1.2027491408934708e-05, "loss": 0.0606, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.2199312714776633e-05, "loss": 0.0574, "step": 71 }, { "epoch": 0.02, "learning_rate": 1.2371134020618558e-05, "loss": 0.0508, "step": 72 }, { "epoch": 0.02, "learning_rate": 1.2542955326460481e-05, "loss": 0.0515, "step": 73 }, { "epoch": 0.02, "learning_rate": 1.2714776632302406e-05, "loss": 0.0786, "step": 74 }, { "epoch": 0.02, "learning_rate": 1.2886597938144329e-05, "loss": 0.0625, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.3058419243986255e-05, "loss": 0.0682, "step": 76 }, { "epoch": 0.02, "learning_rate": 1.323024054982818e-05, "loss": 0.0502, "step": 77 }, { "epoch": 0.02, "learning_rate": 1.3402061855670103e-05, "loss": 0.0746, "step": 78 }, { "epoch": 0.02, "learning_rate": 1.3573883161512026e-05, "loss": 0.0555, "step": 79 }, { "epoch": 0.02, "learning_rate": 1.3745704467353954e-05, "loss": 0.0582, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.3917525773195878e-05, "loss": 0.0654, "step": 81 }, { "epoch": 0.02, "learning_rate": 1.40893470790378e-05, "loss": 0.0512, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.4261168384879725e-05, "loss": 0.0547, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.4432989690721649e-05, "loss": 0.0746, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.4604810996563575e-05, "loss": 0.0573, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.47766323024055e-05, "loss": 0.0576, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.4948453608247423e-05, "loss": 0.0477, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.5120274914089346e-05, "loss": 0.0543, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.5292096219931273e-05, "loss": 0.0599, "step": 89 }, { "epoch": 0.02, "learning_rate": 1.5463917525773197e-05, "loss": 0.0453, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.5635738831615122e-05, "loss": 0.0594, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.5807560137457044e-05, "loss": 0.0567, "step": 92 }, { "epoch": 0.02, "learning_rate": 1.5979381443298968e-05, "loss": 0.0438, "step": 93 }, { "epoch": 0.02, "learning_rate": 1.6151202749140896e-05, "loss": 0.0549, "step": 94 }, { "epoch": 0.02, "learning_rate": 1.6323024054982818e-05, "loss": 0.0728, "step": 95 }, { "epoch": 0.02, "learning_rate": 1.6494845360824743e-05, "loss": 0.0791, "step": 96 }, { "epoch": 0.03, "learning_rate": 1.6666666666666667e-05, "loss": 0.0371, "step": 97 }, { "epoch": 0.03, "learning_rate": 1.6838487972508592e-05, "loss": 0.0606, "step": 98 }, { "epoch": 0.03, "learning_rate": 1.7010309278350517e-05, "loss": 0.0787, "step": 99 }, { "epoch": 0.03, "learning_rate": 1.7182130584192442e-05, "loss": 0.0539, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.7353951890034363e-05, "loss": 0.0672, "step": 101 }, { "epoch": 0.03, "learning_rate": 1.7525773195876288e-05, "loss": 0.0831, "step": 102 }, { "epoch": 0.03, "learning_rate": 1.7697594501718216e-05, "loss": 0.0602, "step": 103 }, { "epoch": 0.03, "learning_rate": 1.7869415807560138e-05, "loss": 0.0575, "step": 104 }, { "epoch": 0.03, "learning_rate": 1.8041237113402062e-05, "loss": 0.0475, "step": 105 }, { "epoch": 0.03, "learning_rate": 1.8213058419243987e-05, "loss": 0.0873, "step": 106 }, { "epoch": 0.03, "learning_rate": 1.8384879725085912e-05, "loss": 0.075, "step": 107 }, { "epoch": 0.03, "learning_rate": 1.8556701030927837e-05, "loss": 0.0703, "step": 108 }, { "epoch": 0.03, "learning_rate": 1.872852233676976e-05, "loss": 0.0712, "step": 109 }, { "epoch": 0.03, "learning_rate": 1.8900343642611683e-05, "loss": 0.0535, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.9072164948453608e-05, "loss": 0.0455, "step": 111 }, { "epoch": 0.03, "learning_rate": 1.9243986254295536e-05, "loss": 0.057, "step": 112 }, { "epoch": 0.03, "learning_rate": 1.9415807560137457e-05, "loss": 0.0776, "step": 113 }, { "epoch": 0.03, "learning_rate": 1.9587628865979382e-05, "loss": 0.0598, "step": 114 }, { "epoch": 0.03, "learning_rate": 1.9759450171821307e-05, "loss": 0.0824, "step": 115 }, { "epoch": 0.03, "learning_rate": 1.9931271477663232e-05, "loss": 0.0475, "step": 116 }, { "epoch": 0.03, "learning_rate": 2.0103092783505157e-05, "loss": 0.0535, "step": 117 }, { "epoch": 0.03, "learning_rate": 2.027491408934708e-05, "loss": 0.061, "step": 118 }, { "epoch": 0.03, "learning_rate": 2.0446735395189003e-05, "loss": 0.0637, "step": 119 }, { "epoch": 0.03, "learning_rate": 2.0618556701030927e-05, "loss": 0.0422, "step": 120 }, { "epoch": 0.03, "learning_rate": 2.0790378006872856e-05, "loss": 0.0876, "step": 121 }, { "epoch": 0.03, "learning_rate": 2.0962199312714777e-05, "loss": 0.0584, "step": 122 }, { "epoch": 0.03, "learning_rate": 2.1134020618556702e-05, "loss": 0.0814, "step": 123 }, { "epoch": 0.03, "learning_rate": 2.1305841924398627e-05, "loss": 0.0499, "step": 124 }, { "epoch": 0.03, "learning_rate": 2.1477663230240548e-05, "loss": 0.0726, "step": 125 }, { "epoch": 0.03, "learning_rate": 2.1649484536082476e-05, "loss": 0.0712, "step": 126 }, { "epoch": 0.03, "learning_rate": 2.18213058419244e-05, "loss": 0.0535, "step": 127 }, { "epoch": 0.03, "learning_rate": 2.1993127147766322e-05, "loss": 0.0527, "step": 128 }, { "epoch": 0.03, "learning_rate": 2.2164948453608247e-05, "loss": 0.0682, "step": 129 }, { "epoch": 0.03, "learning_rate": 2.2336769759450175e-05, "loss": 0.0628, "step": 130 }, { "epoch": 0.03, "learning_rate": 2.2508591065292097e-05, "loss": 0.0489, "step": 131 }, { "epoch": 0.03, "learning_rate": 2.268041237113402e-05, "loss": 0.0624, "step": 132 }, { "epoch": 0.03, "learning_rate": 2.2852233676975946e-05, "loss": 0.0606, "step": 133 }, { "epoch": 0.03, "learning_rate": 2.3024054982817868e-05, "loss": 0.0587, "step": 134 }, { "epoch": 0.03, "learning_rate": 2.3195876288659796e-05, "loss": 0.0876, "step": 135 }, { "epoch": 0.04, "learning_rate": 2.336769759450172e-05, "loss": 0.0476, "step": 136 }, { "epoch": 0.04, "learning_rate": 2.3539518900343642e-05, "loss": 0.0479, "step": 137 }, { "epoch": 0.04, "learning_rate": 2.3711340206185567e-05, "loss": 0.0668, "step": 138 }, { "epoch": 0.04, "learning_rate": 2.3883161512027495e-05, "loss": 0.0649, "step": 139 }, { "epoch": 0.04, "learning_rate": 2.4054982817869417e-05, "loss": 0.0786, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.422680412371134e-05, "loss": 0.0508, "step": 141 }, { "epoch": 0.04, "learning_rate": 2.4398625429553266e-05, "loss": 0.0517, "step": 142 }, { "epoch": 0.04, "learning_rate": 2.4570446735395188e-05, "loss": 0.0572, "step": 143 }, { "epoch": 0.04, "learning_rate": 2.4742268041237116e-05, "loss": 0.0648, "step": 144 }, { "epoch": 0.04, "learning_rate": 2.491408934707904e-05, "loss": 0.0597, "step": 145 }, { "epoch": 0.04, "learning_rate": 2.5085910652920962e-05, "loss": 0.0515, "step": 146 }, { "epoch": 0.04, "learning_rate": 2.5257731958762887e-05, "loss": 0.0704, "step": 147 }, { "epoch": 0.04, "learning_rate": 2.542955326460481e-05, "loss": 0.0438, "step": 148 }, { "epoch": 0.04, "learning_rate": 2.5601374570446733e-05, "loss": 0.0588, "step": 149 }, { "epoch": 0.04, "learning_rate": 2.5773195876288658e-05, "loss": 0.0426, "step": 150 }, { "epoch": 0.04, "learning_rate": 2.594501718213059e-05, "loss": 0.0675, "step": 151 }, { "epoch": 0.04, "learning_rate": 2.611683848797251e-05, "loss": 0.0606, "step": 152 }, { "epoch": 0.04, "learning_rate": 2.6288659793814435e-05, "loss": 0.0553, "step": 153 }, { "epoch": 0.04, "learning_rate": 2.646048109965636e-05, "loss": 0.0531, "step": 154 }, { "epoch": 0.04, "learning_rate": 2.663230240549828e-05, "loss": 0.0485, "step": 155 }, { "epoch": 0.04, "learning_rate": 2.6804123711340206e-05, "loss": 0.0698, "step": 156 }, { "epoch": 0.04, "learning_rate": 2.697594501718213e-05, "loss": 0.0381, "step": 157 }, { "epoch": 0.04, "learning_rate": 2.7147766323024053e-05, "loss": 0.0585, "step": 158 }, { "epoch": 0.04, "learning_rate": 2.7319587628865977e-05, "loss": 0.0495, "step": 159 }, { "epoch": 0.04, "learning_rate": 2.749140893470791e-05, "loss": 0.0575, "step": 160 }, { "epoch": 0.04, "learning_rate": 2.766323024054983e-05, "loss": 0.0521, "step": 161 }, { "epoch": 0.04, "learning_rate": 2.7835051546391755e-05, "loss": 0.0558, "step": 162 }, { "epoch": 0.04, "learning_rate": 2.800687285223368e-05, "loss": 0.0842, "step": 163 }, { "epoch": 0.04, "learning_rate": 2.81786941580756e-05, "loss": 0.0437, "step": 164 }, { "epoch": 0.04, "learning_rate": 2.8350515463917526e-05, "loss": 0.0488, "step": 165 }, { "epoch": 0.04, "learning_rate": 2.852233676975945e-05, "loss": 0.0769, "step": 166 }, { "epoch": 0.04, "learning_rate": 2.8694158075601372e-05, "loss": 0.0478, "step": 167 }, { "epoch": 0.04, "learning_rate": 2.8865979381443297e-05, "loss": 0.0691, "step": 168 }, { "epoch": 0.04, "learning_rate": 2.903780068728523e-05, "loss": 0.049, "step": 169 }, { "epoch": 0.04, "learning_rate": 2.920962199312715e-05, "loss": 0.0657, "step": 170 }, { "epoch": 0.04, "learning_rate": 2.9381443298969075e-05, "loss": 0.0345, "step": 171 }, { "epoch": 0.04, "learning_rate": 2.9553264604811e-05, "loss": 0.0551, "step": 172 }, { "epoch": 0.04, "learning_rate": 2.972508591065292e-05, "loss": 0.0558, "step": 173 }, { "epoch": 0.04, "learning_rate": 2.9896907216494846e-05, "loss": 0.0487, "step": 174 }, { "epoch": 0.05, "learning_rate": 3.006872852233677e-05, "loss": 0.0468, "step": 175 }, { "epoch": 0.05, "learning_rate": 3.0240549828178692e-05, "loss": 0.06, "step": 176 }, { "epoch": 0.05, "learning_rate": 3.0412371134020617e-05, "loss": 0.0527, "step": 177 }, { "epoch": 0.05, "learning_rate": 3.0584192439862545e-05, "loss": 0.0634, "step": 178 }, { "epoch": 0.05, "learning_rate": 3.075601374570447e-05, "loss": 0.0599, "step": 179 }, { "epoch": 0.05, "learning_rate": 3.0927835051546395e-05, "loss": 0.069, "step": 180 }, { "epoch": 0.05, "learning_rate": 3.1099656357388316e-05, "loss": 0.0615, "step": 181 }, { "epoch": 0.05, "learning_rate": 3.1271477663230244e-05, "loss": 0.0665, "step": 182 }, { "epoch": 0.05, "learning_rate": 3.1443298969072166e-05, "loss": 0.0471, "step": 183 }, { "epoch": 0.05, "learning_rate": 3.161512027491409e-05, "loss": 0.0502, "step": 184 }, { "epoch": 0.05, "learning_rate": 3.1786941580756015e-05, "loss": 0.071, "step": 185 }, { "epoch": 0.05, "learning_rate": 3.1958762886597937e-05, "loss": 0.0552, "step": 186 }, { "epoch": 0.05, "learning_rate": 3.2130584192439865e-05, "loss": 0.0716, "step": 187 }, { "epoch": 0.05, "learning_rate": 3.230240549828179e-05, "loss": 0.0552, "step": 188 }, { "epoch": 0.05, "learning_rate": 3.2474226804123714e-05, "loss": 0.06, "step": 189 }, { "epoch": 0.05, "learning_rate": 3.2646048109965636e-05, "loss": 0.0542, "step": 190 }, { "epoch": 0.05, "learning_rate": 3.2817869415807564e-05, "loss": 0.0565, "step": 191 }, { "epoch": 0.05, "learning_rate": 3.2989690721649485e-05, "loss": 0.0545, "step": 192 }, { "epoch": 0.05, "learning_rate": 3.316151202749141e-05, "loss": 0.0696, "step": 193 }, { "epoch": 0.05, "learning_rate": 3.3333333333333335e-05, "loss": 0.06, "step": 194 }, { "epoch": 0.05, "learning_rate": 3.3505154639175256e-05, "loss": 0.0422, "step": 195 }, { "epoch": 0.05, "learning_rate": 3.3676975945017185e-05, "loss": 0.0767, "step": 196 }, { "epoch": 0.05, "learning_rate": 3.384879725085911e-05, "loss": 0.0551, "step": 197 }, { "epoch": 0.05, "learning_rate": 3.4020618556701034e-05, "loss": 0.0658, "step": 198 }, { "epoch": 0.05, "learning_rate": 3.4192439862542955e-05, "loss": 0.0654, "step": 199 }, { "epoch": 0.05, "learning_rate": 3.4364261168384884e-05, "loss": 0.0764, "step": 200 }, { "epoch": 0.05, "learning_rate": 3.4536082474226805e-05, "loss": 0.051, "step": 201 }, { "epoch": 0.05, "learning_rate": 3.4707903780068726e-05, "loss": 0.0315, "step": 202 }, { "epoch": 0.05, "learning_rate": 3.4879725085910655e-05, "loss": 0.0739, "step": 203 }, { "epoch": 0.05, "learning_rate": 3.5051546391752576e-05, "loss": 0.0958, "step": 204 }, { "epoch": 0.05, "learning_rate": 3.5223367697594504e-05, "loss": 0.0663, "step": 205 }, { "epoch": 0.05, "learning_rate": 3.539518900343643e-05, "loss": 0.0594, "step": 206 }, { "epoch": 0.05, "learning_rate": 3.5567010309278354e-05, "loss": 0.0565, "step": 207 }, { "epoch": 0.05, "learning_rate": 3.5738831615120275e-05, "loss": 0.0554, "step": 208 }, { "epoch": 0.05, "learning_rate": 3.5910652920962203e-05, "loss": 0.0479, "step": 209 }, { "epoch": 0.05, "learning_rate": 3.6082474226804125e-05, "loss": 0.0841, "step": 210 }, { "epoch": 0.05, "learning_rate": 3.6254295532646046e-05, "loss": 0.0548, "step": 211 }, { "epoch": 0.05, "learning_rate": 3.6426116838487974e-05, "loss": 0.0381, "step": 212 }, { "epoch": 0.05, "learning_rate": 3.6597938144329896e-05, "loss": 0.0474, "step": 213 }, { "epoch": 0.06, "learning_rate": 3.6769759450171824e-05, "loss": 0.0496, "step": 214 }, { "epoch": 0.06, "learning_rate": 3.694158075601375e-05, "loss": 0.0521, "step": 215 }, { "epoch": 0.06, "learning_rate": 3.7113402061855674e-05, "loss": 0.0615, "step": 216 }, { "epoch": 0.06, "learning_rate": 3.7285223367697595e-05, "loss": 0.0726, "step": 217 }, { "epoch": 0.06, "learning_rate": 3.745704467353952e-05, "loss": 0.0728, "step": 218 }, { "epoch": 0.06, "learning_rate": 3.7628865979381445e-05, "loss": 0.0647, "step": 219 }, { "epoch": 0.06, "learning_rate": 3.7800687285223366e-05, "loss": 0.0577, "step": 220 }, { "epoch": 0.06, "learning_rate": 3.7972508591065294e-05, "loss": 0.0499, "step": 221 }, { "epoch": 0.06, "learning_rate": 3.8144329896907216e-05, "loss": 0.0485, "step": 222 }, { "epoch": 0.06, "learning_rate": 3.8316151202749144e-05, "loss": 0.06, "step": 223 }, { "epoch": 0.06, "learning_rate": 3.848797250859107e-05, "loss": 0.0472, "step": 224 }, { "epoch": 0.06, "learning_rate": 3.865979381443299e-05, "loss": 0.0595, "step": 225 }, { "epoch": 0.06, "learning_rate": 3.8831615120274915e-05, "loss": 0.0609, "step": 226 }, { "epoch": 0.06, "learning_rate": 3.900343642611684e-05, "loss": 0.0576, "step": 227 }, { "epoch": 0.06, "learning_rate": 3.9175257731958764e-05, "loss": 0.0787, "step": 228 }, { "epoch": 0.06, "learning_rate": 3.9347079037800686e-05, "loss": 0.0536, "step": 229 }, { "epoch": 0.06, "learning_rate": 3.9518900343642614e-05, "loss": 0.0643, "step": 230 }, { "epoch": 0.06, "learning_rate": 3.9690721649484535e-05, "loss": 0.0314, "step": 231 }, { "epoch": 0.06, "learning_rate": 3.9862542955326463e-05, "loss": 0.0573, "step": 232 }, { "epoch": 0.06, "learning_rate": 4.003436426116839e-05, "loss": 0.0383, "step": 233 }, { "epoch": 0.06, "learning_rate": 4.020618556701031e-05, "loss": 0.0358, "step": 234 }, { "epoch": 0.06, "learning_rate": 4.0378006872852234e-05, "loss": 0.0634, "step": 235 }, { "epoch": 0.06, "learning_rate": 4.054982817869416e-05, "loss": 0.0612, "step": 236 }, { "epoch": 0.06, "learning_rate": 4.0721649484536084e-05, "loss": 0.0744, "step": 237 }, { "epoch": 0.06, "learning_rate": 4.0893470790378005e-05, "loss": 0.0528, "step": 238 }, { "epoch": 0.06, "learning_rate": 4.1065292096219934e-05, "loss": 0.0532, "step": 239 }, { "epoch": 0.06, "learning_rate": 4.1237113402061855e-05, "loss": 0.0676, "step": 240 }, { "epoch": 0.06, "learning_rate": 4.140893470790378e-05, "loss": 0.0416, "step": 241 }, { "epoch": 0.06, "learning_rate": 4.158075601374571e-05, "loss": 0.0687, "step": 242 }, { "epoch": 0.06, "learning_rate": 4.175257731958763e-05, "loss": 0.0404, "step": 243 }, { "epoch": 0.06, "learning_rate": 4.1924398625429554e-05, "loss": 0.0479, "step": 244 }, { "epoch": 0.06, "learning_rate": 4.209621993127148e-05, "loss": 0.0459, "step": 245 }, { "epoch": 0.06, "learning_rate": 4.2268041237113404e-05, "loss": 0.06, "step": 246 }, { "epoch": 0.06, "learning_rate": 4.2439862542955325e-05, "loss": 0.0457, "step": 247 }, { "epoch": 0.06, "learning_rate": 4.261168384879725e-05, "loss": 0.0405, "step": 248 }, { "epoch": 0.06, "learning_rate": 4.2783505154639175e-05, "loss": 0.0353, "step": 249 }, { "epoch": 0.06, "learning_rate": 4.2955326460481096e-05, "loss": 0.0473, "step": 250 }, { "epoch": 0.06, "learning_rate": 4.312714776632303e-05, "loss": 0.0687, "step": 251 }, { "epoch": 0.07, "learning_rate": 4.329896907216495e-05, "loss": 0.0494, "step": 252 }, { "epoch": 0.07, "learning_rate": 4.3470790378006874e-05, "loss": 0.0564, "step": 253 }, { "epoch": 0.07, "learning_rate": 4.36426116838488e-05, "loss": 0.0402, "step": 254 }, { "epoch": 0.07, "learning_rate": 4.3814432989690723e-05, "loss": 0.0675, "step": 255 }, { "epoch": 0.07, "learning_rate": 4.3986254295532645e-05, "loss": 0.0332, "step": 256 }, { "epoch": 0.07, "learning_rate": 4.415807560137457e-05, "loss": 0.0516, "step": 257 }, { "epoch": 0.07, "learning_rate": 4.4329896907216494e-05, "loss": 0.0784, "step": 258 }, { "epoch": 0.07, "learning_rate": 4.4501718213058416e-05, "loss": 0.0594, "step": 259 }, { "epoch": 0.07, "learning_rate": 4.467353951890035e-05, "loss": 0.0586, "step": 260 }, { "epoch": 0.07, "learning_rate": 4.484536082474227e-05, "loss": 0.0575, "step": 261 }, { "epoch": 0.07, "learning_rate": 4.5017182130584194e-05, "loss": 0.0605, "step": 262 }, { "epoch": 0.07, "learning_rate": 4.518900343642612e-05, "loss": 0.0396, "step": 263 }, { "epoch": 0.07, "learning_rate": 4.536082474226804e-05, "loss": 0.0588, "step": 264 }, { "epoch": 0.07, "learning_rate": 4.5532646048109965e-05, "loss": 0.0586, "step": 265 }, { "epoch": 0.07, "learning_rate": 4.570446735395189e-05, "loss": 0.0691, "step": 266 }, { "epoch": 0.07, "learning_rate": 4.5876288659793814e-05, "loss": 0.0699, "step": 267 }, { "epoch": 0.07, "learning_rate": 4.6048109965635736e-05, "loss": 0.0579, "step": 268 }, { "epoch": 0.07, "learning_rate": 4.621993127147767e-05, "loss": 0.0729, "step": 269 }, { "epoch": 0.07, "learning_rate": 4.639175257731959e-05, "loss": 0.0569, "step": 270 }, { "epoch": 0.07, "learning_rate": 4.656357388316151e-05, "loss": 0.039, "step": 271 }, { "epoch": 0.07, "learning_rate": 4.673539518900344e-05, "loss": 0.0517, "step": 272 }, { "epoch": 0.07, "learning_rate": 4.690721649484536e-05, "loss": 0.0465, "step": 273 }, { "epoch": 0.07, "learning_rate": 4.7079037800687284e-05, "loss": 0.0537, "step": 274 }, { "epoch": 0.07, "learning_rate": 4.725085910652921e-05, "loss": 0.0482, "step": 275 }, { "epoch": 0.07, "learning_rate": 4.7422680412371134e-05, "loss": 0.0642, "step": 276 }, { "epoch": 0.07, "learning_rate": 4.7594501718213055e-05, "loss": 0.0397, "step": 277 }, { "epoch": 0.07, "learning_rate": 4.776632302405499e-05, "loss": 0.0665, "step": 278 }, { "epoch": 0.07, "learning_rate": 4.793814432989691e-05, "loss": 0.0801, "step": 279 }, { "epoch": 0.07, "learning_rate": 4.810996563573883e-05, "loss": 0.0532, "step": 280 }, { "epoch": 0.07, "learning_rate": 4.828178694158076e-05, "loss": 0.0607, "step": 281 }, { "epoch": 0.07, "learning_rate": 4.845360824742268e-05, "loss": 0.056, "step": 282 }, { "epoch": 0.07, "learning_rate": 4.8625429553264604e-05, "loss": 0.0464, "step": 283 }, { "epoch": 0.07, "learning_rate": 4.879725085910653e-05, "loss": 0.0457, "step": 284 }, { "epoch": 0.07, "learning_rate": 4.8969072164948454e-05, "loss": 0.0499, "step": 285 }, { "epoch": 0.07, "learning_rate": 4.9140893470790375e-05, "loss": 0.0477, "step": 286 }, { "epoch": 0.07, "learning_rate": 4.931271477663231e-05, "loss": 0.0654, "step": 287 }, { "epoch": 0.07, "learning_rate": 4.948453608247423e-05, "loss": 0.052, "step": 288 }, { "epoch": 0.07, "learning_rate": 4.965635738831615e-05, "loss": 0.0576, "step": 289 }, { "epoch": 0.07, "learning_rate": 4.982817869415808e-05, "loss": 0.0489, "step": 290 }, { "epoch": 0.08, "learning_rate": 5e-05, "loss": 0.0612, "step": 291 }, { "epoch": 0.08, "learning_rate": 5.0171821305841924e-05, "loss": 0.064, "step": 292 }, { "epoch": 0.08, "learning_rate": 5.034364261168385e-05, "loss": 0.0599, "step": 293 }, { "epoch": 0.08, "learning_rate": 5.051546391752577e-05, "loss": 0.054, "step": 294 }, { "epoch": 0.08, "learning_rate": 5.0687285223367695e-05, "loss": 0.0714, "step": 295 }, { "epoch": 0.08, "learning_rate": 5.085910652920962e-05, "loss": 0.0654, "step": 296 }, { "epoch": 0.08, "learning_rate": 5.1030927835051544e-05, "loss": 0.0752, "step": 297 }, { "epoch": 0.08, "learning_rate": 5.1202749140893466e-05, "loss": 0.0593, "step": 298 }, { "epoch": 0.08, "learning_rate": 5.1374570446735394e-05, "loss": 0.0583, "step": 299 }, { "epoch": 0.08, "learning_rate": 5.1546391752577315e-05, "loss": 0.0539, "step": 300 }, { "epoch": 0.08, "learning_rate": 5.171821305841925e-05, "loss": 0.1083, "step": 301 }, { "epoch": 0.08, "learning_rate": 5.189003436426118e-05, "loss": 0.066, "step": 302 }, { "epoch": 0.08, "learning_rate": 5.20618556701031e-05, "loss": 0.0686, "step": 303 }, { "epoch": 0.08, "learning_rate": 5.223367697594502e-05, "loss": 0.0609, "step": 304 }, { "epoch": 0.08, "learning_rate": 5.240549828178695e-05, "loss": 0.0801, "step": 305 }, { "epoch": 0.08, "learning_rate": 5.257731958762887e-05, "loss": 0.0548, "step": 306 }, { "epoch": 0.08, "learning_rate": 5.274914089347079e-05, "loss": 0.0569, "step": 307 }, { "epoch": 0.08, "learning_rate": 5.292096219931272e-05, "loss": 0.0713, "step": 308 }, { "epoch": 0.08, "learning_rate": 5.309278350515464e-05, "loss": 0.0717, "step": 309 }, { "epoch": 0.08, "learning_rate": 5.326460481099656e-05, "loss": 0.0745, "step": 310 }, { "epoch": 0.08, "learning_rate": 5.343642611683849e-05, "loss": 0.0837, "step": 311 }, { "epoch": 0.08, "learning_rate": 5.360824742268041e-05, "loss": 0.0855, "step": 312 }, { "epoch": 0.08, "learning_rate": 5.3780068728522334e-05, "loss": 0.0717, "step": 313 }, { "epoch": 0.08, "learning_rate": 5.395189003436426e-05, "loss": 0.078, "step": 314 }, { "epoch": 0.08, "learning_rate": 5.4123711340206184e-05, "loss": 0.0676, "step": 315 }, { "epoch": 0.08, "learning_rate": 5.4295532646048105e-05, "loss": 0.0474, "step": 316 }, { "epoch": 0.08, "learning_rate": 5.4467353951890033e-05, "loss": 0.0526, "step": 317 }, { "epoch": 0.08, "learning_rate": 5.4639175257731955e-05, "loss": 0.0713, "step": 318 }, { "epoch": 0.08, "learning_rate": 5.481099656357389e-05, "loss": 0.0541, "step": 319 }, { "epoch": 0.08, "learning_rate": 5.498281786941582e-05, "loss": 0.0549, "step": 320 }, { "epoch": 0.08, "learning_rate": 5.515463917525774e-05, "loss": 0.0627, "step": 321 }, { "epoch": 0.08, "learning_rate": 5.532646048109966e-05, "loss": 0.0607, "step": 322 }, { "epoch": 0.08, "learning_rate": 5.549828178694159e-05, "loss": 0.0814, "step": 323 }, { "epoch": 0.08, "learning_rate": 5.567010309278351e-05, "loss": 0.0576, "step": 324 }, { "epoch": 0.08, "learning_rate": 5.584192439862543e-05, "loss": 0.0362, "step": 325 }, { "epoch": 0.08, "learning_rate": 5.601374570446736e-05, "loss": 0.0699, "step": 326 }, { "epoch": 0.08, "learning_rate": 5.618556701030928e-05, "loss": 0.0477, "step": 327 }, { "epoch": 0.08, "learning_rate": 5.63573883161512e-05, "loss": 0.0575, "step": 328 }, { "epoch": 0.08, "learning_rate": 5.652920962199313e-05, "loss": 0.0753, "step": 329 }, { "epoch": 0.09, "learning_rate": 5.670103092783505e-05, "loss": 0.0662, "step": 330 }, { "epoch": 0.09, "learning_rate": 5.6872852233676974e-05, "loss": 0.064, "step": 331 }, { "epoch": 0.09, "learning_rate": 5.70446735395189e-05, "loss": 0.0653, "step": 332 }, { "epoch": 0.09, "learning_rate": 5.721649484536082e-05, "loss": 0.0553, "step": 333 }, { "epoch": 0.09, "learning_rate": 5.7388316151202745e-05, "loss": 0.0578, "step": 334 }, { "epoch": 0.09, "learning_rate": 5.756013745704467e-05, "loss": 0.0508, "step": 335 }, { "epoch": 0.09, "learning_rate": 5.7731958762886594e-05, "loss": 0.0552, "step": 336 }, { "epoch": 0.09, "learning_rate": 5.790378006872853e-05, "loss": 0.0556, "step": 337 }, { "epoch": 0.09, "learning_rate": 5.807560137457046e-05, "loss": 0.07, "step": 338 }, { "epoch": 0.09, "learning_rate": 5.824742268041238e-05, "loss": 0.0456, "step": 339 }, { "epoch": 0.09, "learning_rate": 5.84192439862543e-05, "loss": 0.0569, "step": 340 }, { "epoch": 0.09, "learning_rate": 5.859106529209623e-05, "loss": 0.0547, "step": 341 }, { "epoch": 0.09, "learning_rate": 5.876288659793815e-05, "loss": 0.0522, "step": 342 }, { "epoch": 0.09, "learning_rate": 5.893470790378007e-05, "loss": 0.066, "step": 343 }, { "epoch": 0.09, "learning_rate": 5.9106529209622e-05, "loss": 0.049, "step": 344 }, { "epoch": 0.09, "learning_rate": 5.927835051546392e-05, "loss": 0.0552, "step": 345 }, { "epoch": 0.09, "learning_rate": 5.945017182130584e-05, "loss": 0.0556, "step": 346 }, { "epoch": 0.09, "learning_rate": 5.962199312714777e-05, "loss": 0.0719, "step": 347 }, { "epoch": 0.09, "learning_rate": 5.979381443298969e-05, "loss": 0.0403, "step": 348 }, { "epoch": 0.09, "learning_rate": 5.996563573883161e-05, "loss": 0.0451, "step": 349 }, { "epoch": 0.09, "learning_rate": 6.013745704467354e-05, "loss": 0.0659, "step": 350 }, { "epoch": 0.09, "learning_rate": 6.030927835051546e-05, "loss": 0.0598, "step": 351 }, { "epoch": 0.09, "learning_rate": 6.0481099656357384e-05, "loss": 0.0732, "step": 352 }, { "epoch": 0.09, "learning_rate": 6.065292096219931e-05, "loss": 0.0549, "step": 353 }, { "epoch": 0.09, "learning_rate": 6.0824742268041234e-05, "loss": 0.073, "step": 354 }, { "epoch": 0.09, "learning_rate": 6.099656357388317e-05, "loss": 0.0553, "step": 355 }, { "epoch": 0.09, "learning_rate": 6.116838487972509e-05, "loss": 0.0764, "step": 356 }, { "epoch": 0.09, "learning_rate": 6.134020618556701e-05, "loss": 0.0504, "step": 357 }, { "epoch": 0.09, "learning_rate": 6.151202749140895e-05, "loss": 0.047, "step": 358 }, { "epoch": 0.09, "learning_rate": 6.168384879725087e-05, "loss": 0.0595, "step": 359 }, { "epoch": 0.09, "learning_rate": 6.185567010309279e-05, "loss": 0.074, "step": 360 }, { "epoch": 0.09, "learning_rate": 6.202749140893471e-05, "loss": 0.0578, "step": 361 }, { "epoch": 0.09, "learning_rate": 6.219931271477663e-05, "loss": 0.0614, "step": 362 }, { "epoch": 0.09, "learning_rate": 6.237113402061855e-05, "loss": 0.0518, "step": 363 }, { "epoch": 0.09, "learning_rate": 6.254295532646049e-05, "loss": 0.0665, "step": 364 }, { "epoch": 0.09, "learning_rate": 6.271477663230241e-05, "loss": 0.0418, "step": 365 }, { "epoch": 0.09, "learning_rate": 6.288659793814433e-05, "loss": 0.0603, "step": 366 }, { "epoch": 0.09, "learning_rate": 6.305841924398625e-05, "loss": 0.0714, "step": 367 }, { "epoch": 0.09, "learning_rate": 6.323024054982817e-05, "loss": 0.0621, "step": 368 }, { "epoch": 0.1, "learning_rate": 6.340206185567011e-05, "loss": 0.0497, "step": 369 }, { "epoch": 0.1, "learning_rate": 6.357388316151203e-05, "loss": 0.0611, "step": 370 }, { "epoch": 0.1, "learning_rate": 6.374570446735395e-05, "loss": 0.055, "step": 371 }, { "epoch": 0.1, "learning_rate": 6.391752577319587e-05, "loss": 0.0684, "step": 372 }, { "epoch": 0.1, "learning_rate": 6.408934707903781e-05, "loss": 0.0496, "step": 373 }, { "epoch": 0.1, "learning_rate": 6.426116838487973e-05, "loss": 0.0456, "step": 374 }, { "epoch": 0.1, "learning_rate": 6.443298969072165e-05, "loss": 0.0487, "step": 375 }, { "epoch": 0.1, "learning_rate": 6.460481099656359e-05, "loss": 0.0599, "step": 376 }, { "epoch": 0.1, "learning_rate": 6.477663230240551e-05, "loss": 0.0431, "step": 377 }, { "epoch": 0.1, "learning_rate": 6.494845360824743e-05, "loss": 0.0469, "step": 378 }, { "epoch": 0.1, "learning_rate": 6.512027491408935e-05, "loss": 0.0657, "step": 379 }, { "epoch": 0.1, "learning_rate": 6.529209621993127e-05, "loss": 0.0489, "step": 380 }, { "epoch": 0.1, "learning_rate": 6.546391752577319e-05, "loss": 0.0629, "step": 381 }, { "epoch": 0.1, "learning_rate": 6.563573883161513e-05, "loss": 0.06, "step": 382 }, { "epoch": 0.1, "learning_rate": 6.580756013745705e-05, "loss": 0.0607, "step": 383 }, { "epoch": 0.1, "learning_rate": 6.597938144329897e-05, "loss": 0.0565, "step": 384 }, { "epoch": 0.1, "learning_rate": 6.615120274914089e-05, "loss": 0.0654, "step": 385 }, { "epoch": 0.1, "learning_rate": 6.632302405498281e-05, "loss": 0.0876, "step": 386 }, { "epoch": 0.1, "learning_rate": 6.649484536082475e-05, "loss": 0.057, "step": 387 }, { "epoch": 0.1, "learning_rate": 6.666666666666667e-05, "loss": 0.0583, "step": 388 }, { "epoch": 0.1, "learning_rate": 6.683848797250859e-05, "loss": 0.0571, "step": 389 }, { "epoch": 0.1, "learning_rate": 6.701030927835051e-05, "loss": 0.0642, "step": 390 }, { "epoch": 0.1, "learning_rate": 6.718213058419243e-05, "loss": 0.0598, "step": 391 }, { "epoch": 0.1, "learning_rate": 6.735395189003437e-05, "loss": 0.0838, "step": 392 }, { "epoch": 0.1, "learning_rate": 6.752577319587629e-05, "loss": 0.0761, "step": 393 }, { "epoch": 0.1, "learning_rate": 6.769759450171823e-05, "loss": 0.0541, "step": 394 }, { "epoch": 0.1, "learning_rate": 6.786941580756015e-05, "loss": 0.031, "step": 395 }, { "epoch": 0.1, "learning_rate": 6.804123711340207e-05, "loss": 0.0609, "step": 396 }, { "epoch": 0.1, "learning_rate": 6.821305841924399e-05, "loss": 0.0627, "step": 397 }, { "epoch": 0.1, "learning_rate": 6.838487972508591e-05, "loss": 0.0649, "step": 398 }, { "epoch": 0.1, "learning_rate": 6.855670103092783e-05, "loss": 0.0548, "step": 399 }, { "epoch": 0.1, "learning_rate": 6.872852233676977e-05, "loss": 0.041, "step": 400 }, { "epoch": 0.1, "learning_rate": 6.890034364261169e-05, "loss": 0.0473, "step": 401 }, { "epoch": 0.1, "learning_rate": 6.907216494845361e-05, "loss": 0.073, "step": 402 }, { "epoch": 0.1, "learning_rate": 6.924398625429553e-05, "loss": 0.0742, "step": 403 }, { "epoch": 0.1, "learning_rate": 6.941580756013745e-05, "loss": 0.0601, "step": 404 }, { "epoch": 0.1, "learning_rate": 6.958762886597939e-05, "loss": 0.0657, "step": 405 }, { "epoch": 0.1, "learning_rate": 6.975945017182131e-05, "loss": 0.0756, "step": 406 }, { "epoch": 0.11, "learning_rate": 6.993127147766323e-05, "loss": 0.0799, "step": 407 }, { "epoch": 0.11, "learning_rate": 7.010309278350515e-05, "loss": 0.075, "step": 408 }, { "epoch": 0.11, "learning_rate": 7.027491408934707e-05, "loss": 0.065, "step": 409 }, { "epoch": 0.11, "learning_rate": 7.044673539518901e-05, "loss": 0.0609, "step": 410 }, { "epoch": 0.11, "learning_rate": 7.061855670103093e-05, "loss": 0.0754, "step": 411 }, { "epoch": 0.11, "learning_rate": 7.079037800687286e-05, "loss": 0.0704, "step": 412 }, { "epoch": 0.11, "learning_rate": 7.096219931271479e-05, "loss": 0.0798, "step": 413 }, { "epoch": 0.11, "learning_rate": 7.113402061855671e-05, "loss": 0.0646, "step": 414 }, { "epoch": 0.11, "learning_rate": 7.130584192439863e-05, "loss": 0.0606, "step": 415 }, { "epoch": 0.11, "learning_rate": 7.147766323024055e-05, "loss": 0.0626, "step": 416 }, { "epoch": 0.11, "learning_rate": 7.164948453608247e-05, "loss": 0.0697, "step": 417 }, { "epoch": 0.11, "learning_rate": 7.182130584192441e-05, "loss": 0.0597, "step": 418 }, { "epoch": 0.11, "learning_rate": 7.199312714776633e-05, "loss": 0.0653, "step": 419 }, { "epoch": 0.11, "learning_rate": 7.216494845360825e-05, "loss": 0.0789, "step": 420 }, { "epoch": 0.11, "learning_rate": 7.233676975945017e-05, "loss": 0.0524, "step": 421 }, { "epoch": 0.11, "learning_rate": 7.250859106529209e-05, "loss": 0.052, "step": 422 }, { "epoch": 0.11, "learning_rate": 7.268041237113403e-05, "loss": 0.0514, "step": 423 }, { "epoch": 0.11, "learning_rate": 7.285223367697595e-05, "loss": 0.0716, "step": 424 }, { "epoch": 0.11, "learning_rate": 7.302405498281787e-05, "loss": 0.0429, "step": 425 }, { "epoch": 0.11, "learning_rate": 7.319587628865979e-05, "loss": 0.0548, "step": 426 }, { "epoch": 0.11, "learning_rate": 7.336769759450171e-05, "loss": 0.0505, "step": 427 }, { "epoch": 0.11, "learning_rate": 7.353951890034365e-05, "loss": 0.0699, "step": 428 }, { "epoch": 0.11, "learning_rate": 7.371134020618557e-05, "loss": 0.0682, "step": 429 }, { "epoch": 0.11, "learning_rate": 7.38831615120275e-05, "loss": 0.0786, "step": 430 }, { "epoch": 0.11, "learning_rate": 7.405498281786943e-05, "loss": 0.0684, "step": 431 }, { "epoch": 0.11, "learning_rate": 7.422680412371135e-05, "loss": 0.0868, "step": 432 }, { "epoch": 0.11, "learning_rate": 7.439862542955327e-05, "loss": 0.0586, "step": 433 }, { "epoch": 0.11, "learning_rate": 7.457044673539519e-05, "loss": 0.054, "step": 434 }, { "epoch": 0.11, "learning_rate": 7.474226804123711e-05, "loss": 0.0596, "step": 435 }, { "epoch": 0.11, "learning_rate": 7.491408934707905e-05, "loss": 0.063, "step": 436 }, { "epoch": 0.11, "learning_rate": 7.508591065292097e-05, "loss": 0.0551, "step": 437 }, { "epoch": 0.11, "learning_rate": 7.525773195876289e-05, "loss": 0.0569, "step": 438 }, { "epoch": 0.11, "learning_rate": 7.542955326460481e-05, "loss": 0.0508, "step": 439 }, { "epoch": 0.11, "learning_rate": 7.560137457044673e-05, "loss": 0.0857, "step": 440 }, { "epoch": 0.11, "learning_rate": 7.577319587628867e-05, "loss": 0.0601, "step": 441 }, { "epoch": 0.11, "learning_rate": 7.594501718213059e-05, "loss": 0.0809, "step": 442 }, { "epoch": 0.11, "learning_rate": 7.611683848797251e-05, "loss": 0.0687, "step": 443 }, { "epoch": 0.11, "learning_rate": 7.628865979381443e-05, "loss": 0.0615, "step": 444 }, { "epoch": 0.11, "learning_rate": 7.646048109965635e-05, "loss": 0.06, "step": 445 }, { "epoch": 0.12, "learning_rate": 7.663230240549829e-05, "loss": 0.0716, "step": 446 }, { "epoch": 0.12, "learning_rate": 7.680412371134021e-05, "loss": 0.0688, "step": 447 }, { "epoch": 0.12, "learning_rate": 7.697594501718214e-05, "loss": 0.0447, "step": 448 }, { "epoch": 0.12, "learning_rate": 7.714776632302407e-05, "loss": 0.0556, "step": 449 }, { "epoch": 0.12, "learning_rate": 7.731958762886599e-05, "loss": 0.0634, "step": 450 }, { "epoch": 0.12, "learning_rate": 7.749140893470791e-05, "loss": 0.0569, "step": 451 }, { "epoch": 0.12, "learning_rate": 7.766323024054983e-05, "loss": 0.0686, "step": 452 }, { "epoch": 0.12, "learning_rate": 7.783505154639175e-05, "loss": 0.0478, "step": 453 }, { "epoch": 0.12, "learning_rate": 7.800687285223369e-05, "loss": 0.0797, "step": 454 }, { "epoch": 0.12, "learning_rate": 7.817869415807561e-05, "loss": 0.0486, "step": 455 }, { "epoch": 0.12, "learning_rate": 7.835051546391753e-05, "loss": 0.057, "step": 456 }, { "epoch": 0.12, "learning_rate": 7.852233676975945e-05, "loss": 0.0481, "step": 457 }, { "epoch": 0.12, "learning_rate": 7.869415807560137e-05, "loss": 0.076, "step": 458 }, { "epoch": 0.12, "learning_rate": 7.88659793814433e-05, "loss": 0.0705, "step": 459 }, { "epoch": 0.12, "learning_rate": 7.903780068728523e-05, "loss": 0.0518, "step": 460 }, { "epoch": 0.12, "learning_rate": 7.920962199312715e-05, "loss": 0.0795, "step": 461 }, { "epoch": 0.12, "learning_rate": 7.938144329896907e-05, "loss": 0.0667, "step": 462 }, { "epoch": 0.12, "learning_rate": 7.955326460481099e-05, "loss": 0.0551, "step": 463 }, { "epoch": 0.12, "learning_rate": 7.972508591065293e-05, "loss": 0.0508, "step": 464 }, { "epoch": 0.12, "learning_rate": 7.989690721649485e-05, "loss": 0.0639, "step": 465 }, { "epoch": 0.12, "learning_rate": 8.006872852233678e-05, "loss": 0.0576, "step": 466 }, { "epoch": 0.12, "learning_rate": 8.02405498281787e-05, "loss": 0.067, "step": 467 }, { "epoch": 0.12, "learning_rate": 8.041237113402063e-05, "loss": 0.0424, "step": 468 }, { "epoch": 0.12, "learning_rate": 8.058419243986255e-05, "loss": 0.05, "step": 469 }, { "epoch": 0.12, "learning_rate": 8.075601374570447e-05, "loss": 0.0585, "step": 470 }, { "epoch": 0.12, "learning_rate": 8.092783505154639e-05, "loss": 0.0628, "step": 471 }, { "epoch": 0.12, "learning_rate": 8.109965635738833e-05, "loss": 0.0612, "step": 472 }, { "epoch": 0.12, "learning_rate": 8.127147766323025e-05, "loss": 0.0495, "step": 473 }, { "epoch": 0.12, "learning_rate": 8.144329896907217e-05, "loss": 0.0721, "step": 474 }, { "epoch": 0.12, "learning_rate": 8.161512027491409e-05, "loss": 0.0561, "step": 475 }, { "epoch": 0.12, "learning_rate": 8.178694158075601e-05, "loss": 0.0691, "step": 476 }, { "epoch": 0.12, "learning_rate": 8.195876288659795e-05, "loss": 0.0646, "step": 477 }, { "epoch": 0.12, "learning_rate": 8.213058419243987e-05, "loss": 0.0683, "step": 478 }, { "epoch": 0.12, "learning_rate": 8.230240549828179e-05, "loss": 0.0513, "step": 479 }, { "epoch": 0.12, "learning_rate": 8.247422680412371e-05, "loss": 0.0663, "step": 480 }, { "epoch": 0.12, "learning_rate": 8.264604810996563e-05, "loss": 0.052, "step": 481 }, { "epoch": 0.12, "learning_rate": 8.281786941580757e-05, "loss": 0.08, "step": 482 }, { "epoch": 0.12, "learning_rate": 8.298969072164949e-05, "loss": 0.0609, "step": 483 }, { "epoch": 0.12, "learning_rate": 8.316151202749142e-05, "loss": 0.0586, "step": 484 }, { "epoch": 0.13, "learning_rate": 8.333333333333334e-05, "loss": 0.0604, "step": 485 }, { "epoch": 0.13, "learning_rate": 8.350515463917527e-05, "loss": 0.0613, "step": 486 }, { "epoch": 0.13, "learning_rate": 8.367697594501719e-05, "loss": 0.0763, "step": 487 }, { "epoch": 0.13, "learning_rate": 8.384879725085911e-05, "loss": 0.0654, "step": 488 }, { "epoch": 0.13, "learning_rate": 8.402061855670103e-05, "loss": 0.0587, "step": 489 }, { "epoch": 0.13, "learning_rate": 8.419243986254296e-05, "loss": 0.0579, "step": 490 }, { "epoch": 0.13, "learning_rate": 8.436426116838489e-05, "loss": 0.0656, "step": 491 }, { "epoch": 0.13, "learning_rate": 8.453608247422681e-05, "loss": 0.0636, "step": 492 }, { "epoch": 0.13, "learning_rate": 8.470790378006873e-05, "loss": 0.0516, "step": 493 }, { "epoch": 0.13, "learning_rate": 8.487972508591065e-05, "loss": 0.054, "step": 494 }, { "epoch": 0.13, "learning_rate": 8.505154639175259e-05, "loss": 0.0599, "step": 495 }, { "epoch": 0.13, "learning_rate": 8.52233676975945e-05, "loss": 0.0651, "step": 496 }, { "epoch": 0.13, "learning_rate": 8.539518900343643e-05, "loss": 0.05, "step": 497 }, { "epoch": 0.13, "learning_rate": 8.556701030927835e-05, "loss": 0.0733, "step": 498 }, { "epoch": 0.13, "learning_rate": 8.573883161512027e-05, "loss": 0.0542, "step": 499 }, { "epoch": 0.13, "learning_rate": 8.591065292096219e-05, "loss": 0.0565, "step": 500 }, { "epoch": 0.13, "learning_rate": 8.608247422680413e-05, "loss": 0.0837, "step": 501 }, { "epoch": 0.13, "learning_rate": 8.625429553264606e-05, "loss": 0.079, "step": 502 }, { "epoch": 0.13, "learning_rate": 8.642611683848798e-05, "loss": 0.0694, "step": 503 }, { "epoch": 0.13, "learning_rate": 8.65979381443299e-05, "loss": 0.0622, "step": 504 }, { "epoch": 0.13, "learning_rate": 8.676975945017183e-05, "loss": 0.0848, "step": 505 }, { "epoch": 0.13, "learning_rate": 8.694158075601375e-05, "loss": 0.0729, "step": 506 }, { "epoch": 0.13, "learning_rate": 8.711340206185567e-05, "loss": 0.0604, "step": 507 }, { "epoch": 0.13, "learning_rate": 8.72852233676976e-05, "loss": 0.0665, "step": 508 }, { "epoch": 0.13, "learning_rate": 8.745704467353953e-05, "loss": 0.0659, "step": 509 }, { "epoch": 0.13, "learning_rate": 8.762886597938145e-05, "loss": 0.0721, "step": 510 }, { "epoch": 0.13, "learning_rate": 8.780068728522337e-05, "loss": 0.0699, "step": 511 }, { "epoch": 0.13, "learning_rate": 8.797250859106529e-05, "loss": 0.0426, "step": 512 }, { "epoch": 0.13, "learning_rate": 8.814432989690722e-05, "loss": 0.0519, "step": 513 }, { "epoch": 0.13, "learning_rate": 8.831615120274915e-05, "loss": 0.0555, "step": 514 }, { "epoch": 0.13, "learning_rate": 8.848797250859107e-05, "loss": 0.0508, "step": 515 }, { "epoch": 0.13, "learning_rate": 8.865979381443299e-05, "loss": 0.0879, "step": 516 }, { "epoch": 0.13, "learning_rate": 8.883161512027491e-05, "loss": 0.069, "step": 517 }, { "epoch": 0.13, "learning_rate": 8.900343642611683e-05, "loss": 0.0619, "step": 518 }, { "epoch": 0.13, "learning_rate": 8.917525773195877e-05, "loss": 0.0633, "step": 519 }, { "epoch": 0.13, "learning_rate": 8.93470790378007e-05, "loss": 0.0697, "step": 520 }, { "epoch": 0.13, "learning_rate": 8.951890034364262e-05, "loss": 0.056, "step": 521 }, { "epoch": 0.13, "learning_rate": 8.969072164948454e-05, "loss": 0.0814, "step": 522 }, { "epoch": 0.13, "learning_rate": 8.986254295532647e-05, "loss": 0.0576, "step": 523 }, { "epoch": 0.14, "learning_rate": 9.003436426116839e-05, "loss": 0.0717, "step": 524 }, { "epoch": 0.14, "learning_rate": 9.020618556701031e-05, "loss": 0.0714, "step": 525 }, { "epoch": 0.14, "learning_rate": 9.037800687285224e-05, "loss": 0.0477, "step": 526 }, { "epoch": 0.14, "learning_rate": 9.054982817869416e-05, "loss": 0.0827, "step": 527 }, { "epoch": 0.14, "learning_rate": 9.072164948453609e-05, "loss": 0.0679, "step": 528 }, { "epoch": 0.14, "learning_rate": 9.089347079037801e-05, "loss": 0.0541, "step": 529 }, { "epoch": 0.14, "learning_rate": 9.106529209621993e-05, "loss": 0.064, "step": 530 }, { "epoch": 0.14, "learning_rate": 9.123711340206186e-05, "loss": 0.0783, "step": 531 }, { "epoch": 0.14, "learning_rate": 9.140893470790379e-05, "loss": 0.0743, "step": 532 }, { "epoch": 0.14, "learning_rate": 9.158075601374571e-05, "loss": 0.0725, "step": 533 }, { "epoch": 0.14, "learning_rate": 9.175257731958763e-05, "loss": 0.0551, "step": 534 }, { "epoch": 0.14, "learning_rate": 9.192439862542955e-05, "loss": 0.053, "step": 535 }, { "epoch": 0.14, "learning_rate": 9.209621993127147e-05, "loss": 0.0763, "step": 536 }, { "epoch": 0.14, "learning_rate": 9.22680412371134e-05, "loss": 0.0674, "step": 537 }, { "epoch": 0.14, "learning_rate": 9.243986254295534e-05, "loss": 0.0691, "step": 538 }, { "epoch": 0.14, "learning_rate": 9.261168384879726e-05, "loss": 0.0569, "step": 539 }, { "epoch": 0.14, "learning_rate": 9.278350515463918e-05, "loss": 0.0717, "step": 540 }, { "epoch": 0.14, "learning_rate": 9.29553264604811e-05, "loss": 0.0566, "step": 541 }, { "epoch": 0.14, "learning_rate": 9.312714776632303e-05, "loss": 0.0724, "step": 542 }, { "epoch": 0.14, "learning_rate": 9.329896907216495e-05, "loss": 0.0489, "step": 543 }, { "epoch": 0.14, "learning_rate": 9.347079037800688e-05, "loss": 0.0762, "step": 544 }, { "epoch": 0.14, "learning_rate": 9.36426116838488e-05, "loss": 0.0594, "step": 545 }, { "epoch": 0.14, "learning_rate": 9.381443298969073e-05, "loss": 0.0594, "step": 546 }, { "epoch": 0.14, "learning_rate": 9.398625429553265e-05, "loss": 0.0691, "step": 547 }, { "epoch": 0.14, "learning_rate": 9.415807560137457e-05, "loss": 0.0715, "step": 548 }, { "epoch": 0.14, "learning_rate": 9.43298969072165e-05, "loss": 0.0647, "step": 549 }, { "epoch": 0.14, "learning_rate": 9.450171821305843e-05, "loss": 0.051, "step": 550 }, { "epoch": 0.14, "learning_rate": 9.467353951890035e-05, "loss": 0.0678, "step": 551 }, { "epoch": 0.14, "learning_rate": 9.484536082474227e-05, "loss": 0.0542, "step": 552 }, { "epoch": 0.14, "learning_rate": 9.501718213058419e-05, "loss": 0.064, "step": 553 }, { "epoch": 0.14, "learning_rate": 9.518900343642611e-05, "loss": 0.0634, "step": 554 }, { "epoch": 0.14, "learning_rate": 9.536082474226805e-05, "loss": 0.0566, "step": 555 }, { "epoch": 0.14, "learning_rate": 9.553264604810998e-05, "loss": 0.0592, "step": 556 }, { "epoch": 0.14, "learning_rate": 9.57044673539519e-05, "loss": 0.053, "step": 557 }, { "epoch": 0.14, "learning_rate": 9.587628865979382e-05, "loss": 0.0712, "step": 558 }, { "epoch": 0.14, "learning_rate": 9.604810996563574e-05, "loss": 0.0727, "step": 559 }, { "epoch": 0.14, "learning_rate": 9.621993127147767e-05, "loss": 0.0558, "step": 560 }, { "epoch": 0.14, "learning_rate": 9.639175257731959e-05, "loss": 0.0524, "step": 561 }, { "epoch": 0.15, "learning_rate": 9.656357388316152e-05, "loss": 0.0688, "step": 562 }, { "epoch": 0.15, "learning_rate": 9.673539518900344e-05, "loss": 0.0656, "step": 563 }, { "epoch": 0.15, "learning_rate": 9.690721649484537e-05, "loss": 0.0567, "step": 564 }, { "epoch": 0.15, "learning_rate": 9.707903780068729e-05, "loss": 0.0737, "step": 565 }, { "epoch": 0.15, "learning_rate": 9.725085910652921e-05, "loss": 0.0633, "step": 566 }, { "epoch": 0.15, "learning_rate": 9.742268041237114e-05, "loss": 0.0676, "step": 567 }, { "epoch": 0.15, "learning_rate": 9.759450171821306e-05, "loss": 0.0818, "step": 568 }, { "epoch": 0.15, "learning_rate": 9.776632302405499e-05, "loss": 0.0552, "step": 569 }, { "epoch": 0.15, "learning_rate": 9.793814432989691e-05, "loss": 0.0467, "step": 570 }, { "epoch": 0.15, "learning_rate": 9.810996563573883e-05, "loss": 0.0475, "step": 571 }, { "epoch": 0.15, "learning_rate": 9.828178694158075e-05, "loss": 0.058, "step": 572 }, { "epoch": 0.15, "learning_rate": 9.845360824742269e-05, "loss": 0.0535, "step": 573 }, { "epoch": 0.15, "learning_rate": 9.862542955326462e-05, "loss": 0.0564, "step": 574 }, { "epoch": 0.15, "learning_rate": 9.879725085910654e-05, "loss": 0.0515, "step": 575 }, { "epoch": 0.15, "learning_rate": 9.896907216494846e-05, "loss": 0.0713, "step": 576 }, { "epoch": 0.15, "learning_rate": 9.914089347079038e-05, "loss": 0.0664, "step": 577 }, { "epoch": 0.15, "learning_rate": 9.93127147766323e-05, "loss": 0.0673, "step": 578 }, { "epoch": 0.15, "learning_rate": 9.948453608247423e-05, "loss": 0.0615, "step": 579 }, { "epoch": 0.15, "learning_rate": 9.965635738831616e-05, "loss": 0.0678, "step": 580 }, { "epoch": 0.15, "learning_rate": 9.982817869415808e-05, "loss": 0.0534, "step": 581 }, { "epoch": 0.15, "learning_rate": 0.0001, "loss": 0.0516, "step": 582 }, { "epoch": 0.15, "learning_rate": 9.999999797667519e-05, "loss": 0.0556, "step": 583 }, { "epoch": 0.15, "learning_rate": 9.999999190670093e-05, "loss": 0.0462, "step": 584 }, { "epoch": 0.15, "learning_rate": 9.99999817900777e-05, "loss": 0.0519, "step": 585 }, { "epoch": 0.15, "learning_rate": 9.999996762680632e-05, "loss": 0.066, "step": 586 }, { "epoch": 0.15, "learning_rate": 9.999994941688795e-05, "loss": 0.0608, "step": 587 }, { "epoch": 0.15, "learning_rate": 9.999992716032405e-05, "loss": 0.0629, "step": 588 }, { "epoch": 0.15, "learning_rate": 9.999990085711643e-05, "loss": 0.0692, "step": 589 }, { "epoch": 0.15, "learning_rate": 9.99998705072672e-05, "loss": 0.056, "step": 590 }, { "epoch": 0.15, "learning_rate": 9.999983611077885e-05, "loss": 0.0722, "step": 591 }, { "epoch": 0.15, "learning_rate": 9.999979766765414e-05, "loss": 0.0378, "step": 592 }, { "epoch": 0.15, "learning_rate": 9.999975517789619e-05, "loss": 0.0596, "step": 593 }, { "epoch": 0.15, "learning_rate": 9.999970864150843e-05, "loss": 0.0705, "step": 594 }, { "epoch": 0.15, "learning_rate": 9.999965805849462e-05, "loss": 0.0608, "step": 595 }, { "epoch": 0.15, "learning_rate": 9.999960342885889e-05, "loss": 0.072, "step": 596 }, { "epoch": 0.15, "learning_rate": 9.99995447526056e-05, "loss": 0.0673, "step": 597 }, { "epoch": 0.15, "learning_rate": 9.999948202973957e-05, "loss": 0.0614, "step": 598 }, { "epoch": 0.15, "learning_rate": 9.999941526026582e-05, "loss": 0.0848, "step": 599 }, { "epoch": 0.15, "learning_rate": 9.999934444418978e-05, "loss": 0.0638, "step": 600 }, { "epoch": 0.16, "learning_rate": 9.999926958151717e-05, "loss": 0.1093, "step": 601 }, { "epoch": 0.16, "learning_rate": 9.999919067225407e-05, "loss": 0.0655, "step": 602 }, { "epoch": 0.16, "learning_rate": 9.999910771640686e-05, "loss": 0.105, "step": 603 }, { "epoch": 0.16, "learning_rate": 9.999902071398222e-05, "loss": 0.0564, "step": 604 }, { "epoch": 0.16, "learning_rate": 9.999892966498723e-05, "loss": 0.0673, "step": 605 }, { "epoch": 0.16, "learning_rate": 9.999883456942924e-05, "loss": 0.0675, "step": 606 }, { "epoch": 0.16, "learning_rate": 9.999873542731597e-05, "loss": 0.0572, "step": 607 }, { "epoch": 0.16, "learning_rate": 9.999863223865542e-05, "loss": 0.0834, "step": 608 }, { "epoch": 0.16, "learning_rate": 9.999852500345594e-05, "loss": 0.0601, "step": 609 }, { "epoch": 0.16, "learning_rate": 9.999841372172623e-05, "loss": 0.0832, "step": 610 }, { "epoch": 0.16, "learning_rate": 9.999829839347527e-05, "loss": 0.0757, "step": 611 }, { "epoch": 0.16, "learning_rate": 9.99981790187124e-05, "loss": 0.0552, "step": 612 }, { "epoch": 0.16, "learning_rate": 9.99980555974473e-05, "loss": 0.0773, "step": 613 }, { "epoch": 0.16, "learning_rate": 9.999792812968995e-05, "loss": 0.0522, "step": 614 }, { "epoch": 0.16, "learning_rate": 9.999779661545066e-05, "loss": 0.0792, "step": 615 }, { "epoch": 0.16, "learning_rate": 9.999766105474007e-05, "loss": 0.0664, "step": 616 }, { "epoch": 0.16, "learning_rate": 9.999752144756916e-05, "loss": 0.0567, "step": 617 }, { "epoch": 0.16, "learning_rate": 9.999737779394924e-05, "loss": 0.0648, "step": 618 }, { "epoch": 0.16, "learning_rate": 9.999723009389191e-05, "loss": 0.064, "step": 619 }, { "epoch": 0.16, "learning_rate": 9.999707834740915e-05, "loss": 0.0487, "step": 620 }, { "epoch": 0.16, "learning_rate": 9.999692255451322e-05, "loss": 0.0515, "step": 621 }, { "epoch": 0.16, "learning_rate": 9.999676271521675e-05, "loss": 0.0547, "step": 622 }, { "epoch": 0.16, "learning_rate": 9.999659882953264e-05, "loss": 0.0718, "step": 623 }, { "epoch": 0.16, "learning_rate": 9.999643089747419e-05, "loss": 0.0594, "step": 624 }, { "epoch": 0.16, "learning_rate": 9.999625891905497e-05, "loss": 0.0651, "step": 625 }, { "epoch": 0.16, "learning_rate": 9.999608289428892e-05, "loss": 0.0602, "step": 626 }, { "epoch": 0.16, "learning_rate": 9.999590282319028e-05, "loss": 0.0729, "step": 627 }, { "epoch": 0.16, "learning_rate": 9.99957187057736e-05, "loss": 0.0668, "step": 628 }, { "epoch": 0.16, "learning_rate": 9.999553054205382e-05, "loss": 0.0751, "step": 629 }, { "epoch": 0.16, "learning_rate": 9.999533833204613e-05, "loss": 0.0653, "step": 630 }, { "epoch": 0.16, "learning_rate": 9.99951420757661e-05, "loss": 0.0954, "step": 631 }, { "epoch": 0.16, "learning_rate": 9.999494177322964e-05, "loss": 0.0708, "step": 632 }, { "epoch": 0.16, "learning_rate": 9.999473742445291e-05, "loss": 0.0588, "step": 633 }, { "epoch": 0.16, "learning_rate": 9.999452902945249e-05, "loss": 0.0791, "step": 634 }, { "epoch": 0.16, "learning_rate": 9.999431658824524e-05, "loss": 0.0601, "step": 635 }, { "epoch": 0.16, "learning_rate": 9.999410010084834e-05, "loss": 0.0629, "step": 636 }, { "epoch": 0.16, "learning_rate": 9.999387956727931e-05, "loss": 0.0784, "step": 637 }, { "epoch": 0.16, "learning_rate": 9.999365498755602e-05, "loss": 0.0702, "step": 638 }, { "epoch": 0.16, "learning_rate": 9.999342636169662e-05, "loss": 0.0633, "step": 639 }, { "epoch": 0.17, "learning_rate": 9.999319368971963e-05, "loss": 0.0579, "step": 640 }, { "epoch": 0.17, "learning_rate": 9.999295697164388e-05, "loss": 0.0558, "step": 641 }, { "epoch": 0.17, "learning_rate": 9.999271620748851e-05, "loss": 0.0753, "step": 642 }, { "epoch": 0.17, "learning_rate": 9.999247139727304e-05, "loss": 0.0599, "step": 643 }, { "epoch": 0.17, "learning_rate": 9.999222254101724e-05, "loss": 0.0791, "step": 644 }, { "epoch": 0.17, "learning_rate": 9.999196963874129e-05, "loss": 0.0676, "step": 645 }, { "epoch": 0.17, "learning_rate": 9.999171269046563e-05, "loss": 0.0614, "step": 646 }, { "epoch": 0.17, "learning_rate": 9.999145169621109e-05, "loss": 0.0622, "step": 647 }, { "epoch": 0.17, "learning_rate": 9.999118665599875e-05, "loss": 0.08, "step": 648 }, { "epoch": 0.17, "learning_rate": 9.999091756985008e-05, "loss": 0.0827, "step": 649 }, { "epoch": 0.17, "learning_rate": 9.999064443778688e-05, "loss": 0.0859, "step": 650 }, { "epoch": 0.17, "learning_rate": 9.999036725983121e-05, "loss": 0.0625, "step": 651 }, { "epoch": 0.17, "learning_rate": 9.999008603600553e-05, "loss": 0.0592, "step": 652 }, { "epoch": 0.17, "learning_rate": 9.99898007663326e-05, "loss": 0.0677, "step": 653 }, { "epoch": 0.17, "learning_rate": 9.998951145083552e-05, "loss": 0.0534, "step": 654 }, { "epoch": 0.17, "learning_rate": 9.998921808953767e-05, "loss": 0.0546, "step": 655 }, { "epoch": 0.17, "learning_rate": 9.998892068246282e-05, "loss": 0.0701, "step": 656 }, { "epoch": 0.17, "learning_rate": 9.998861922963505e-05, "loss": 0.0671, "step": 657 }, { "epoch": 0.17, "learning_rate": 9.998831373107872e-05, "loss": 0.0654, "step": 658 }, { "epoch": 0.17, "learning_rate": 9.998800418681858e-05, "loss": 0.0586, "step": 659 }, { "epoch": 0.17, "learning_rate": 9.99876905968797e-05, "loss": 0.078, "step": 660 }, { "epoch": 0.17, "learning_rate": 9.99873729612874e-05, "loss": 0.0579, "step": 661 }, { "epoch": 0.17, "learning_rate": 9.998705128006745e-05, "loss": 0.0847, "step": 662 }, { "epoch": 0.17, "learning_rate": 9.998672555324584e-05, "loss": 0.0526, "step": 663 }, { "epoch": 0.17, "learning_rate": 9.998639578084896e-05, "loss": 0.047, "step": 664 }, { "epoch": 0.17, "learning_rate": 9.99860619629035e-05, "loss": 0.0648, "step": 665 }, { "epoch": 0.17, "learning_rate": 9.998572409943646e-05, "loss": 0.0425, "step": 666 }, { "epoch": 0.17, "learning_rate": 9.99853821904752e-05, "loss": 0.0759, "step": 667 }, { "epoch": 0.17, "learning_rate": 9.998503623604738e-05, "loss": 0.0587, "step": 668 }, { "epoch": 0.17, "learning_rate": 9.998468623618101e-05, "loss": 0.0569, "step": 669 }, { "epoch": 0.17, "learning_rate": 9.998433219090438e-05, "loss": 0.0472, "step": 670 }, { "epoch": 0.17, "learning_rate": 9.99839741002462e-05, "loss": 0.0698, "step": 671 }, { "epoch": 0.17, "learning_rate": 9.998361196423541e-05, "loss": 0.0673, "step": 672 }, { "epoch": 0.17, "learning_rate": 9.998324578290135e-05, "loss": 0.0648, "step": 673 }, { "epoch": 0.17, "learning_rate": 9.998287555627363e-05, "loss": 0.0787, "step": 674 }, { "epoch": 0.17, "learning_rate": 9.998250128438223e-05, "loss": 0.0592, "step": 675 }, { "epoch": 0.17, "learning_rate": 9.998212296725742e-05, "loss": 0.0661, "step": 676 }, { "epoch": 0.17, "learning_rate": 9.998174060492984e-05, "loss": 0.0654, "step": 677 }, { "epoch": 0.17, "learning_rate": 9.998135419743044e-05, "loss": 0.0546, "step": 678 }, { "epoch": 0.18, "learning_rate": 9.998096374479046e-05, "loss": 0.0484, "step": 679 }, { "epoch": 0.18, "learning_rate": 9.998056924704153e-05, "loss": 0.0899, "step": 680 }, { "epoch": 0.18, "learning_rate": 9.998017070421558e-05, "loss": 0.038, "step": 681 }, { "epoch": 0.18, "learning_rate": 9.997976811634483e-05, "loss": 0.0649, "step": 682 }, { "epoch": 0.18, "learning_rate": 9.997936148346192e-05, "loss": 0.0749, "step": 683 }, { "epoch": 0.18, "learning_rate": 9.997895080559971e-05, "loss": 0.061, "step": 684 }, { "epoch": 0.18, "learning_rate": 9.997853608279146e-05, "loss": 0.0676, "step": 685 }, { "epoch": 0.18, "learning_rate": 9.997811731507071e-05, "loss": 0.0774, "step": 686 }, { "epoch": 0.18, "learning_rate": 9.997769450247141e-05, "loss": 0.0755, "step": 687 }, { "epoch": 0.18, "learning_rate": 9.997726764502772e-05, "loss": 0.0583, "step": 688 }, { "epoch": 0.18, "learning_rate": 9.997683674277421e-05, "loss": 0.0648, "step": 689 }, { "epoch": 0.18, "learning_rate": 9.997640179574574e-05, "loss": 0.0461, "step": 690 }, { "epoch": 0.18, "learning_rate": 9.997596280397754e-05, "loss": 0.0571, "step": 691 }, { "epoch": 0.18, "learning_rate": 9.997551976750512e-05, "loss": 0.0557, "step": 692 }, { "epoch": 0.18, "learning_rate": 9.997507268636434e-05, "loss": 0.0609, "step": 693 }, { "epoch": 0.18, "learning_rate": 9.997462156059137e-05, "loss": 0.069, "step": 694 }, { "epoch": 0.18, "learning_rate": 9.997416639022274e-05, "loss": 0.0638, "step": 695 }, { "epoch": 0.18, "learning_rate": 9.997370717529529e-05, "loss": 0.0506, "step": 696 }, { "epoch": 0.18, "learning_rate": 9.997324391584617e-05, "loss": 0.0696, "step": 697 }, { "epoch": 0.18, "learning_rate": 9.997277661191289e-05, "loss": 0.0725, "step": 698 }, { "epoch": 0.18, "learning_rate": 9.997230526353327e-05, "loss": 0.0726, "step": 699 }, { "epoch": 0.18, "learning_rate": 9.997182987074543e-05, "loss": 0.0554, "step": 700 }, { "epoch": 0.18, "learning_rate": 9.997135043358786e-05, "loss": 0.0648, "step": 701 }, { "epoch": 0.18, "learning_rate": 9.997086695209938e-05, "loss": 0.101, "step": 702 }, { "epoch": 0.18, "learning_rate": 9.997037942631909e-05, "loss": 0.0782, "step": 703 }, { "epoch": 0.18, "learning_rate": 9.996988785628647e-05, "loss": 0.0749, "step": 704 }, { "epoch": 0.18, "learning_rate": 9.99693922420413e-05, "loss": 0.0925, "step": 705 }, { "epoch": 0.18, "learning_rate": 9.996889258362369e-05, "loss": 0.0799, "step": 706 }, { "epoch": 0.18, "learning_rate": 9.996838888107407e-05, "loss": 0.0645, "step": 707 }, { "epoch": 0.18, "learning_rate": 9.996788113443321e-05, "loss": 0.0635, "step": 708 }, { "epoch": 0.18, "learning_rate": 9.996736934374222e-05, "loss": 0.0598, "step": 709 }, { "epoch": 0.18, "learning_rate": 9.99668535090425e-05, "loss": 0.055, "step": 710 }, { "epoch": 0.18, "learning_rate": 9.99663336303758e-05, "loss": 0.0583, "step": 711 }, { "epoch": 0.18, "learning_rate": 9.996580970778422e-05, "loss": 0.0605, "step": 712 }, { "epoch": 0.18, "learning_rate": 9.996528174131013e-05, "loss": 0.0712, "step": 713 }, { "epoch": 0.18, "learning_rate": 9.996474973099626e-05, "loss": 0.0656, "step": 714 }, { "epoch": 0.18, "learning_rate": 9.99642136768857e-05, "loss": 0.0857, "step": 715 }, { "epoch": 0.18, "learning_rate": 9.996367357902182e-05, "loss": 0.0611, "step": 716 }, { "epoch": 0.19, "learning_rate": 9.99631294374483e-05, "loss": 0.0609, "step": 717 }, { "epoch": 0.19, "learning_rate": 9.996258125220924e-05, "loss": 0.0643, "step": 718 }, { "epoch": 0.19, "learning_rate": 9.996202902334895e-05, "loss": 0.0693, "step": 719 }, { "epoch": 0.19, "learning_rate": 9.996147275091215e-05, "loss": 0.0739, "step": 720 }, { "epoch": 0.19, "learning_rate": 9.996091243494384e-05, "loss": 0.0745, "step": 721 }, { "epoch": 0.19, "learning_rate": 9.99603480754894e-05, "loss": 0.0836, "step": 722 }, { "epoch": 0.19, "learning_rate": 9.99597796725945e-05, "loss": 0.0872, "step": 723 }, { "epoch": 0.19, "learning_rate": 9.995920722630512e-05, "loss": 0.0765, "step": 724 }, { "epoch": 0.19, "learning_rate": 9.99586307366676e-05, "loss": 0.0776, "step": 725 }, { "epoch": 0.19, "learning_rate": 9.995805020372859e-05, "loss": 0.0778, "step": 726 }, { "epoch": 0.19, "learning_rate": 9.99574656275351e-05, "loss": 0.0537, "step": 727 }, { "epoch": 0.19, "learning_rate": 9.995687700813443e-05, "loss": 0.0761, "step": 728 }, { "epoch": 0.19, "learning_rate": 9.99562843455742e-05, "loss": 0.0592, "step": 729 }, { "epoch": 0.19, "learning_rate": 9.995568763990239e-05, "loss": 0.0744, "step": 730 }, { "epoch": 0.19, "learning_rate": 9.995508689116729e-05, "loss": 0.0644, "step": 731 }, { "epoch": 0.19, "learning_rate": 9.995448209941753e-05, "loss": 0.0576, "step": 732 }, { "epoch": 0.19, "learning_rate": 9.995387326470204e-05, "loss": 0.0613, "step": 733 }, { "epoch": 0.19, "learning_rate": 9.995326038707012e-05, "loss": 0.0882, "step": 734 }, { "epoch": 0.19, "learning_rate": 9.995264346657136e-05, "loss": 0.0775, "step": 735 }, { "epoch": 0.19, "learning_rate": 9.995202250325568e-05, "loss": 0.0652, "step": 736 }, { "epoch": 0.19, "learning_rate": 9.995139749717334e-05, "loss": 0.0624, "step": 737 }, { "epoch": 0.19, "learning_rate": 9.995076844837492e-05, "loss": 0.0549, "step": 738 }, { "epoch": 0.19, "learning_rate": 9.995013535691135e-05, "loss": 0.0859, "step": 739 }, { "epoch": 0.19, "learning_rate": 9.994949822283385e-05, "loss": 0.0677, "step": 740 }, { "epoch": 0.19, "learning_rate": 9.9948857046194e-05, "loss": 0.0516, "step": 741 }, { "epoch": 0.19, "learning_rate": 9.994821182704368e-05, "loss": 0.0678, "step": 742 }, { "epoch": 0.19, "learning_rate": 9.994756256543511e-05, "loss": 0.0746, "step": 743 }, { "epoch": 0.19, "learning_rate": 9.994690926142083e-05, "loss": 0.0868, "step": 744 }, { "epoch": 0.19, "learning_rate": 9.994625191505374e-05, "loss": 0.0744, "step": 745 }, { "epoch": 0.19, "learning_rate": 9.9945590526387e-05, "loss": 0.0797, "step": 746 }, { "epoch": 0.19, "learning_rate": 9.994492509547417e-05, "loss": 0.0772, "step": 747 }, { "epoch": 0.19, "learning_rate": 9.994425562236911e-05, "loss": 0.0634, "step": 748 }, { "epoch": 0.19, "learning_rate": 9.994358210712599e-05, "loss": 0.0699, "step": 749 }, { "epoch": 0.19, "learning_rate": 9.994290454979931e-05, "loss": 0.0767, "step": 750 }, { "epoch": 0.19, "learning_rate": 9.994222295044393e-05, "loss": 0.079, "step": 751 }, { "epoch": 0.19, "learning_rate": 9.994153730911499e-05, "loss": 0.0701, "step": 752 }, { "epoch": 0.19, "learning_rate": 9.994084762586798e-05, "loss": 0.0637, "step": 753 }, { "epoch": 0.19, "learning_rate": 9.994015390075875e-05, "loss": 0.0758, "step": 754 }, { "epoch": 0.19, "learning_rate": 9.993945613384342e-05, "loss": 0.0653, "step": 755 }, { "epoch": 0.2, "learning_rate": 9.993875432517846e-05, "loss": 0.0792, "step": 756 }, { "epoch": 0.2, "learning_rate": 9.993804847482067e-05, "loss": 0.0458, "step": 757 }, { "epoch": 0.2, "learning_rate": 9.993733858282718e-05, "loss": 0.0688, "step": 758 }, { "epoch": 0.2, "learning_rate": 9.993662464925548e-05, "loss": 0.0742, "step": 759 }, { "epoch": 0.2, "learning_rate": 9.993590667416329e-05, "loss": 0.0644, "step": 760 }, { "epoch": 0.2, "learning_rate": 9.993518465760875e-05, "loss": 0.065, "step": 761 }, { "epoch": 0.2, "learning_rate": 9.993445859965029e-05, "loss": 0.0725, "step": 762 }, { "epoch": 0.2, "learning_rate": 9.993372850034668e-05, "loss": 0.0696, "step": 763 }, { "epoch": 0.2, "learning_rate": 9.9932994359757e-05, "loss": 0.0617, "step": 764 }, { "epoch": 0.2, "learning_rate": 9.993225617794067e-05, "loss": 0.0605, "step": 765 }, { "epoch": 0.2, "learning_rate": 9.993151395495741e-05, "loss": 0.0598, "step": 766 }, { "epoch": 0.2, "learning_rate": 9.993076769086734e-05, "loss": 0.0713, "step": 767 }, { "epoch": 0.2, "learning_rate": 9.99300173857308e-05, "loss": 0.0595, "step": 768 }, { "epoch": 0.2, "learning_rate": 9.992926303960857e-05, "loss": 0.07, "step": 769 }, { "epoch": 0.2, "learning_rate": 9.992850465256165e-05, "loss": 0.0566, "step": 770 }, { "epoch": 0.2, "learning_rate": 9.992774222465147e-05, "loss": 0.0934, "step": 771 }, { "epoch": 0.2, "learning_rate": 9.992697575593969e-05, "loss": 0.0614, "step": 772 }, { "epoch": 0.2, "learning_rate": 9.992620524648838e-05, "loss": 0.0542, "step": 773 }, { "epoch": 0.2, "learning_rate": 9.992543069635985e-05, "loss": 0.0691, "step": 774 }, { "epoch": 0.2, "learning_rate": 9.992465210561683e-05, "loss": 0.0833, "step": 775 }, { "epoch": 0.2, "learning_rate": 9.992386947432233e-05, "loss": 0.0698, "step": 776 }, { "epoch": 0.2, "learning_rate": 9.992308280253969e-05, "loss": 0.0665, "step": 777 }, { "epoch": 0.2, "learning_rate": 9.992229209033255e-05, "loss": 0.0772, "step": 778 }, { "epoch": 0.2, "learning_rate": 9.992149733776492e-05, "loss": 0.0676, "step": 779 }, { "epoch": 0.2, "learning_rate": 9.992069854490114e-05, "loss": 0.0728, "step": 780 }, { "epoch": 0.2, "learning_rate": 9.991989571180585e-05, "loss": 0.0674, "step": 781 }, { "epoch": 0.2, "learning_rate": 9.991908883854401e-05, "loss": 0.06, "step": 782 }, { "epoch": 0.2, "learning_rate": 9.991827792518093e-05, "loss": 0.0825, "step": 783 }, { "epoch": 0.2, "learning_rate": 9.991746297178226e-05, "loss": 0.0706, "step": 784 }, { "epoch": 0.2, "learning_rate": 9.991664397841391e-05, "loss": 0.0702, "step": 785 }, { "epoch": 0.2, "learning_rate": 9.991582094514222e-05, "loss": 0.0543, "step": 786 }, { "epoch": 0.2, "learning_rate": 9.991499387203376e-05, "loss": 0.0599, "step": 787 }, { "epoch": 0.2, "learning_rate": 9.99141627591555e-05, "loss": 0.0703, "step": 788 }, { "epoch": 0.2, "learning_rate": 9.991332760657466e-05, "loss": 0.0745, "step": 789 }, { "epoch": 0.2, "learning_rate": 9.991248841435887e-05, "loss": 0.0576, "step": 790 }, { "epoch": 0.2, "learning_rate": 9.991164518257602e-05, "loss": 0.0609, "step": 791 }, { "epoch": 0.2, "learning_rate": 9.991079791129441e-05, "loss": 0.0759, "step": 792 }, { "epoch": 0.2, "learning_rate": 9.990994660058254e-05, "loss": 0.0698, "step": 793 }, { "epoch": 0.2, "learning_rate": 9.990909125050934e-05, "loss": 0.0366, "step": 794 }, { "epoch": 0.21, "learning_rate": 9.990823186114405e-05, "loss": 0.0493, "step": 795 }, { "epoch": 0.21, "learning_rate": 9.990736843255621e-05, "loss": 0.0694, "step": 796 }, { "epoch": 0.21, "learning_rate": 9.99065009648157e-05, "loss": 0.0583, "step": 797 }, { "epoch": 0.21, "learning_rate": 9.990562945799273e-05, "loss": 0.0707, "step": 798 }, { "epoch": 0.21, "learning_rate": 9.990475391215782e-05, "loss": 0.0413, "step": 799 }, { "epoch": 0.21, "learning_rate": 9.990387432738184e-05, "loss": 0.0668, "step": 800 }, { "epoch": 0.21, "learning_rate": 9.990299070373599e-05, "loss": 0.0806, "step": 801 }, { "epoch": 0.21, "learning_rate": 9.990210304129177e-05, "loss": 0.0684, "step": 802 }, { "epoch": 0.21, "learning_rate": 9.990121134012103e-05, "loss": 0.0869, "step": 803 }, { "epoch": 0.21, "learning_rate": 9.990031560029594e-05, "loss": 0.0796, "step": 804 }, { "epoch": 0.21, "learning_rate": 9.989941582188897e-05, "loss": 0.0732, "step": 805 }, { "epoch": 0.21, "learning_rate": 9.989851200497295e-05, "loss": 0.0767, "step": 806 }, { "epoch": 0.21, "learning_rate": 9.989760414962106e-05, "loss": 0.054, "step": 807 }, { "epoch": 0.21, "learning_rate": 9.989669225590674e-05, "loss": 0.0814, "step": 808 }, { "epoch": 0.21, "learning_rate": 9.989577632390381e-05, "loss": 0.0587, "step": 809 }, { "epoch": 0.21, "learning_rate": 9.989485635368641e-05, "loss": 0.0599, "step": 810 }, { "epoch": 0.21, "learning_rate": 9.989393234532895e-05, "loss": 0.0751, "step": 811 }, { "epoch": 0.21, "learning_rate": 9.989300429890627e-05, "loss": 0.0959, "step": 812 }, { "epoch": 0.21, "learning_rate": 9.989207221449344e-05, "loss": 0.0855, "step": 813 }, { "epoch": 0.21, "learning_rate": 9.989113609216592e-05, "loss": 0.0809, "step": 814 }, { "epoch": 0.21, "learning_rate": 9.989019593199945e-05, "loss": 0.0482, "step": 815 }, { "epoch": 0.21, "learning_rate": 9.988925173407013e-05, "loss": 0.0711, "step": 816 }, { "epoch": 0.21, "learning_rate": 9.98883034984544e-05, "loss": 0.0527, "step": 817 }, { "epoch": 0.21, "learning_rate": 9.988735122522897e-05, "loss": 0.0685, "step": 818 }, { "epoch": 0.21, "learning_rate": 9.988639491447092e-05, "loss": 0.0527, "step": 819 }, { "epoch": 0.21, "learning_rate": 9.988543456625765e-05, "loss": 0.0712, "step": 820 }, { "epoch": 0.21, "learning_rate": 9.988447018066688e-05, "loss": 0.0812, "step": 821 }, { "epoch": 0.21, "learning_rate": 9.988350175777668e-05, "loss": 0.0728, "step": 822 }, { "epoch": 0.21, "learning_rate": 9.98825292976654e-05, "loss": 0.0585, "step": 823 }, { "epoch": 0.21, "learning_rate": 9.988155280041176e-05, "loss": 0.0641, "step": 824 }, { "epoch": 0.21, "learning_rate": 9.98805722660948e-05, "loss": 0.0601, "step": 825 }, { "epoch": 0.21, "learning_rate": 9.987958769479384e-05, "loss": 0.0687, "step": 826 }, { "epoch": 0.21, "learning_rate": 9.987859908658861e-05, "loss": 0.0596, "step": 827 }, { "epoch": 0.21, "learning_rate": 9.987760644155909e-05, "loss": 0.0666, "step": 828 }, { "epoch": 0.21, "learning_rate": 9.987660975978562e-05, "loss": 0.0611, "step": 829 }, { "epoch": 0.21, "learning_rate": 9.987560904134888e-05, "loss": 0.0498, "step": 830 }, { "epoch": 0.21, "learning_rate": 9.987460428632985e-05, "loss": 0.0644, "step": 831 }, { "epoch": 0.21, "learning_rate": 9.987359549480986e-05, "loss": 0.0565, "step": 832 }, { "epoch": 0.21, "learning_rate": 9.987258266687054e-05, "loss": 0.0726, "step": 833 }, { "epoch": 0.22, "learning_rate": 9.987156580259387e-05, "loss": 0.0641, "step": 834 }, { "epoch": 0.22, "learning_rate": 9.987054490206213e-05, "loss": 0.0474, "step": 835 }, { "epoch": 0.22, "learning_rate": 9.986951996535798e-05, "loss": 0.0678, "step": 836 }, { "epoch": 0.22, "learning_rate": 9.986849099256434e-05, "loss": 0.0675, "step": 837 }, { "epoch": 0.22, "learning_rate": 9.98674579837645e-05, "loss": 0.0637, "step": 838 }, { "epoch": 0.22, "learning_rate": 9.986642093904205e-05, "loss": 0.0713, "step": 839 }, { "epoch": 0.22, "learning_rate": 9.986537985848097e-05, "loss": 0.0689, "step": 840 }, { "epoch": 0.22, "learning_rate": 9.986433474216546e-05, "loss": 0.0504, "step": 841 }, { "epoch": 0.22, "learning_rate": 9.986328559018012e-05, "loss": 0.0779, "step": 842 }, { "epoch": 0.22, "learning_rate": 9.986223240260985e-05, "loss": 0.0579, "step": 843 }, { "epoch": 0.22, "learning_rate": 9.986117517953994e-05, "loss": 0.0464, "step": 844 }, { "epoch": 0.22, "learning_rate": 9.98601139210559e-05, "loss": 0.0578, "step": 845 }, { "epoch": 0.22, "learning_rate": 9.985904862724365e-05, "loss": 0.0624, "step": 846 }, { "epoch": 0.22, "learning_rate": 9.985797929818939e-05, "loss": 0.0555, "step": 847 }, { "epoch": 0.22, "learning_rate": 9.985690593397968e-05, "loss": 0.0827, "step": 848 }, { "epoch": 0.22, "learning_rate": 9.985582853470136e-05, "loss": 0.0584, "step": 849 }, { "epoch": 0.22, "learning_rate": 9.985474710044167e-05, "loss": 0.0777, "step": 850 }, { "epoch": 0.22, "learning_rate": 9.985366163128811e-05, "loss": 0.0821, "step": 851 }, { "epoch": 0.22, "learning_rate": 9.985257212732852e-05, "loss": 0.0716, "step": 852 }, { "epoch": 0.22, "learning_rate": 9.98514785886511e-05, "loss": 0.0522, "step": 853 }, { "epoch": 0.22, "learning_rate": 9.985038101534434e-05, "loss": 0.0704, "step": 854 }, { "epoch": 0.22, "learning_rate": 9.984927940749707e-05, "loss": 0.0783, "step": 855 }, { "epoch": 0.22, "learning_rate": 9.984817376519846e-05, "loss": 0.0723, "step": 856 }, { "epoch": 0.22, "learning_rate": 9.984706408853797e-05, "loss": 0.0682, "step": 857 }, { "epoch": 0.22, "learning_rate": 9.984595037760542e-05, "loss": 0.0522, "step": 858 }, { "epoch": 0.22, "learning_rate": 9.984483263249096e-05, "loss": 0.07, "step": 859 }, { "epoch": 0.22, "learning_rate": 9.984371085328503e-05, "loss": 0.0492, "step": 860 }, { "epoch": 0.22, "learning_rate": 9.984258504007845e-05, "loss": 0.0443, "step": 861 }, { "epoch": 0.22, "learning_rate": 9.984145519296228e-05, "loss": 0.0715, "step": 862 }, { "epoch": 0.22, "learning_rate": 9.984032131202803e-05, "loss": 0.0587, "step": 863 }, { "epoch": 0.22, "learning_rate": 9.98391833973674e-05, "loss": 0.0659, "step": 864 }, { "epoch": 0.22, "learning_rate": 9.983804144907256e-05, "loss": 0.0525, "step": 865 }, { "epoch": 0.22, "learning_rate": 9.983689546723585e-05, "loss": 0.0688, "step": 866 }, { "epoch": 0.22, "learning_rate": 9.98357454519501e-05, "loss": 0.0705, "step": 867 }, { "epoch": 0.22, "learning_rate": 9.98345914033083e-05, "loss": 0.0688, "step": 868 }, { "epoch": 0.22, "learning_rate": 9.983343332140392e-05, "loss": 0.0827, "step": 869 }, { "epoch": 0.22, "learning_rate": 9.983227120633066e-05, "loss": 0.0644, "step": 870 }, { "epoch": 0.22, "learning_rate": 9.983110505818256e-05, "loss": 0.0759, "step": 871 }, { "epoch": 0.23, "learning_rate": 9.982993487705404e-05, "loss": 0.0412, "step": 872 }, { "epoch": 0.23, "learning_rate": 9.982876066303975e-05, "loss": 0.0469, "step": 873 }, { "epoch": 0.23, "learning_rate": 9.982758241623477e-05, "loss": 0.0542, "step": 874 }, { "epoch": 0.23, "learning_rate": 9.982640013673443e-05, "loss": 0.0618, "step": 875 }, { "epoch": 0.23, "learning_rate": 9.982521382463443e-05, "loss": 0.0602, "step": 876 }, { "epoch": 0.23, "learning_rate": 9.98240234800308e-05, "loss": 0.0427, "step": 877 }, { "epoch": 0.23, "learning_rate": 9.982282910301984e-05, "loss": 0.0657, "step": 878 }, { "epoch": 0.23, "learning_rate": 9.982163069369823e-05, "loss": 0.0605, "step": 879 }, { "epoch": 0.23, "learning_rate": 9.982042825216296e-05, "loss": 0.0599, "step": 880 }, { "epoch": 0.23, "learning_rate": 9.981922177851136e-05, "loss": 0.0617, "step": 881 }, { "epoch": 0.23, "learning_rate": 9.981801127284104e-05, "loss": 0.0735, "step": 882 }, { "epoch": 0.23, "learning_rate": 9.981679673525002e-05, "loss": 0.0638, "step": 883 }, { "epoch": 0.23, "learning_rate": 9.981557816583656e-05, "loss": 0.0584, "step": 884 }, { "epoch": 0.23, "learning_rate": 9.98143555646993e-05, "loss": 0.0643, "step": 885 }, { "epoch": 0.23, "learning_rate": 9.981312893193718e-05, "loss": 0.0616, "step": 886 }, { "epoch": 0.23, "learning_rate": 9.981189826764946e-05, "loss": 0.069, "step": 887 }, { "epoch": 0.23, "learning_rate": 9.981066357193576e-05, "loss": 0.0605, "step": 888 }, { "epoch": 0.23, "learning_rate": 9.9809424844896e-05, "loss": 0.0672, "step": 889 }, { "epoch": 0.23, "learning_rate": 9.980818208663046e-05, "loss": 0.0526, "step": 890 }, { "epoch": 0.23, "learning_rate": 9.980693529723968e-05, "loss": 0.081, "step": 891 }, { "epoch": 0.23, "learning_rate": 9.98056844768246e-05, "loss": 0.0558, "step": 892 }, { "epoch": 0.23, "learning_rate": 9.980442962548641e-05, "loss": 0.094, "step": 893 }, { "epoch": 0.23, "learning_rate": 9.980317074332671e-05, "loss": 0.0713, "step": 894 }, { "epoch": 0.23, "learning_rate": 9.980190783044739e-05, "loss": 0.0811, "step": 895 }, { "epoch": 0.23, "learning_rate": 9.980064088695062e-05, "loss": 0.0767, "step": 896 }, { "epoch": 0.23, "learning_rate": 9.979936991293897e-05, "loss": 0.0587, "step": 897 }, { "epoch": 0.23, "learning_rate": 9.979809490851529e-05, "loss": 0.0723, "step": 898 }, { "epoch": 0.23, "learning_rate": 9.979681587378278e-05, "loss": 0.0626, "step": 899 }, { "epoch": 0.23, "learning_rate": 9.979553280884494e-05, "loss": 0.0661, "step": 900 }, { "epoch": 0.23, "learning_rate": 9.979424571380563e-05, "loss": 0.0799, "step": 901 }, { "epoch": 0.23, "learning_rate": 9.9792954588769e-05, "loss": 0.068, "step": 902 }, { "epoch": 0.23, "learning_rate": 9.979165943383957e-05, "loss": 0.0865, "step": 903 }, { "epoch": 0.23, "learning_rate": 9.979036024912214e-05, "loss": 0.0826, "step": 904 }, { "epoch": 0.23, "learning_rate": 9.978905703472186e-05, "loss": 0.0534, "step": 905 }, { "epoch": 0.23, "learning_rate": 9.978774979074421e-05, "loss": 0.1218, "step": 906 }, { "epoch": 0.23, "learning_rate": 9.978643851729498e-05, "loss": 0.0956, "step": 907 }, { "epoch": 0.23, "learning_rate": 9.978512321448032e-05, "loss": 0.0594, "step": 908 }, { "epoch": 0.23, "learning_rate": 9.978380388240665e-05, "loss": 0.0912, "step": 909 }, { "epoch": 0.23, "learning_rate": 9.978248052118075e-05, "loss": 0.074, "step": 910 }, { "epoch": 0.24, "learning_rate": 9.978115313090974e-05, "loss": 0.0709, "step": 911 }, { "epoch": 0.24, "learning_rate": 9.977982171170105e-05, "loss": 0.0605, "step": 912 }, { "epoch": 0.24, "learning_rate": 9.977848626366241e-05, "loss": 0.0629, "step": 913 }, { "epoch": 0.24, "learning_rate": 9.977714678690195e-05, "loss": 0.0636, "step": 914 }, { "epoch": 0.24, "learning_rate": 9.977580328152802e-05, "loss": 0.0854, "step": 915 }, { "epoch": 0.24, "learning_rate": 9.97744557476494e-05, "loss": 0.0585, "step": 916 }, { "epoch": 0.24, "learning_rate": 9.977310418537513e-05, "loss": 0.0795, "step": 917 }, { "epoch": 0.24, "learning_rate": 9.977174859481459e-05, "loss": 0.0684, "step": 918 }, { "epoch": 0.24, "learning_rate": 9.977038897607749e-05, "loss": 0.0807, "step": 919 }, { "epoch": 0.24, "learning_rate": 9.97690253292739e-05, "loss": 0.0626, "step": 920 }, { "epoch": 0.24, "learning_rate": 9.976765765451415e-05, "loss": 0.0718, "step": 921 }, { "epoch": 0.24, "learning_rate": 9.976628595190894e-05, "loss": 0.0752, "step": 922 }, { "epoch": 0.24, "learning_rate": 9.97649102215693e-05, "loss": 0.0917, "step": 923 }, { "epoch": 0.24, "learning_rate": 9.976353046360655e-05, "loss": 0.0692, "step": 924 }, { "epoch": 0.24, "learning_rate": 9.976214667813238e-05, "loss": 0.0613, "step": 925 }, { "epoch": 0.24, "learning_rate": 9.976075886525875e-05, "loss": 0.0519, "step": 926 }, { "epoch": 0.24, "learning_rate": 9.975936702509801e-05, "loss": 0.0774, "step": 927 }, { "epoch": 0.24, "learning_rate": 9.97579711577628e-05, "loss": 0.045, "step": 928 }, { "epoch": 0.24, "learning_rate": 9.975657126336609e-05, "loss": 0.0616, "step": 929 }, { "epoch": 0.24, "learning_rate": 9.975516734202118e-05, "loss": 0.0806, "step": 930 }, { "epoch": 0.24, "learning_rate": 9.975375939384168e-05, "loss": 0.0798, "step": 931 }, { "epoch": 0.24, "learning_rate": 9.975234741894157e-05, "loss": 0.0723, "step": 932 }, { "epoch": 0.24, "learning_rate": 9.97509314174351e-05, "loss": 0.0476, "step": 933 }, { "epoch": 0.24, "learning_rate": 9.974951138943686e-05, "loss": 0.0619, "step": 934 }, { "epoch": 0.24, "learning_rate": 9.97480873350618e-05, "loss": 0.0806, "step": 935 }, { "epoch": 0.24, "learning_rate": 9.974665925442517e-05, "loss": 0.0495, "step": 936 }, { "epoch": 0.24, "learning_rate": 9.974522714764253e-05, "loss": 0.0753, "step": 937 }, { "epoch": 0.24, "learning_rate": 9.974379101482982e-05, "loss": 0.0548, "step": 938 }, { "epoch": 0.24, "learning_rate": 9.974235085610325e-05, "loss": 0.0893, "step": 939 }, { "epoch": 0.24, "learning_rate": 9.974090667157939e-05, "loss": 0.0686, "step": 940 }, { "epoch": 0.24, "learning_rate": 9.973945846137507e-05, "loss": 0.0687, "step": 941 }, { "epoch": 0.24, "learning_rate": 9.973800622560757e-05, "loss": 0.0858, "step": 942 }, { "epoch": 0.24, "learning_rate": 9.973654996439436e-05, "loss": 0.0838, "step": 943 }, { "epoch": 0.24, "learning_rate": 9.973508967785337e-05, "loss": 0.0573, "step": 944 }, { "epoch": 0.24, "learning_rate": 9.973362536610271e-05, "loss": 0.0814, "step": 945 }, { "epoch": 0.24, "learning_rate": 9.973215702926096e-05, "loss": 0.0816, "step": 946 }, { "epoch": 0.24, "learning_rate": 9.973068466744689e-05, "loss": 0.0546, "step": 947 }, { "epoch": 0.24, "learning_rate": 9.972920828077972e-05, "loss": 0.0575, "step": 948 }, { "epoch": 0.24, "learning_rate": 9.972772786937891e-05, "loss": 0.059, "step": 949 }, { "epoch": 0.25, "learning_rate": 9.972624343336427e-05, "loss": 0.0425, "step": 950 }, { "epoch": 0.25, "learning_rate": 9.972475497285596e-05, "loss": 0.0452, "step": 951 }, { "epoch": 0.25, "learning_rate": 9.972326248797444e-05, "loss": 0.0612, "step": 952 }, { "epoch": 0.25, "learning_rate": 9.972176597884047e-05, "loss": 0.0829, "step": 953 }, { "epoch": 0.25, "learning_rate": 9.972026544557522e-05, "loss": 0.0665, "step": 954 }, { "epoch": 0.25, "learning_rate": 9.971876088830008e-05, "loss": 0.0835, "step": 955 }, { "epoch": 0.25, "learning_rate": 9.971725230713686e-05, "loss": 0.0635, "step": 956 }, { "epoch": 0.25, "learning_rate": 9.971573970220764e-05, "loss": 0.0783, "step": 957 }, { "epoch": 0.25, "learning_rate": 9.971422307363483e-05, "loss": 0.0561, "step": 958 }, { "epoch": 0.25, "learning_rate": 9.971270242154118e-05, "loss": 0.0613, "step": 959 }, { "epoch": 0.25, "learning_rate": 9.971117774604977e-05, "loss": 0.0749, "step": 960 }, { "epoch": 0.25, "learning_rate": 9.9709649047284e-05, "loss": 0.065, "step": 961 }, { "epoch": 0.25, "learning_rate": 9.970811632536757e-05, "loss": 0.0717, "step": 962 }, { "epoch": 0.25, "learning_rate": 9.970657958042453e-05, "loss": 0.0467, "step": 963 }, { "epoch": 0.25, "learning_rate": 9.970503881257927e-05, "loss": 0.0502, "step": 964 }, { "epoch": 0.25, "learning_rate": 9.970349402195649e-05, "loss": 0.0651, "step": 965 }, { "epoch": 0.25, "learning_rate": 9.970194520868121e-05, "loss": 0.0541, "step": 966 }, { "epoch": 0.25, "learning_rate": 9.970039237287876e-05, "loss": 0.0399, "step": 967 }, { "epoch": 0.25, "learning_rate": 9.969883551467485e-05, "loss": 0.0676, "step": 968 }, { "epoch": 0.25, "learning_rate": 9.969727463419545e-05, "loss": 0.0722, "step": 969 }, { "epoch": 0.25, "learning_rate": 9.96957097315669e-05, "loss": 0.0647, "step": 970 }, { "epoch": 0.25, "learning_rate": 9.969414080691586e-05, "loss": 0.0513, "step": 971 }, { "epoch": 0.25, "learning_rate": 9.96925678603693e-05, "loss": 0.0709, "step": 972 }, { "epoch": 0.25, "learning_rate": 9.969099089205453e-05, "loss": 0.0671, "step": 973 }, { "epoch": 0.25, "learning_rate": 9.968940990209916e-05, "loss": 0.0708, "step": 974 }, { "epoch": 0.25, "learning_rate": 9.968782489063115e-05, "loss": 0.0604, "step": 975 }, { "epoch": 0.25, "learning_rate": 9.96862358577788e-05, "loss": 0.0554, "step": 976 }, { "epoch": 0.25, "learning_rate": 9.968464280367071e-05, "loss": 0.0588, "step": 977 }, { "epoch": 0.25, "learning_rate": 9.968304572843578e-05, "loss": 0.074, "step": 978 }, { "epoch": 0.25, "learning_rate": 9.968144463220331e-05, "loss": 0.0689, "step": 979 }, { "epoch": 0.25, "learning_rate": 9.967983951510284e-05, "loss": 0.082, "step": 980 }, { "epoch": 0.25, "learning_rate": 9.967823037726432e-05, "loss": 0.0529, "step": 981 }, { "epoch": 0.25, "learning_rate": 9.967661721881795e-05, "loss": 0.0628, "step": 982 }, { "epoch": 0.25, "learning_rate": 9.96750000398943e-05, "loss": 0.0598, "step": 983 }, { "epoch": 0.25, "learning_rate": 9.967337884062424e-05, "loss": 0.0549, "step": 984 }, { "epoch": 0.25, "learning_rate": 9.967175362113899e-05, "loss": 0.0594, "step": 985 }, { "epoch": 0.25, "learning_rate": 9.967012438157008e-05, "loss": 0.0767, "step": 986 }, { "epoch": 0.25, "learning_rate": 9.966849112204938e-05, "loss": 0.0546, "step": 987 }, { "epoch": 0.25, "learning_rate": 9.966685384270907e-05, "loss": 0.0618, "step": 988 }, { "epoch": 0.26, "learning_rate": 9.966521254368164e-05, "loss": 0.0739, "step": 989 }, { "epoch": 0.26, "learning_rate": 9.966356722509995e-05, "loss": 0.0768, "step": 990 }, { "epoch": 0.26, "learning_rate": 9.966191788709716e-05, "loss": 0.0787, "step": 991 }, { "epoch": 0.26, "learning_rate": 9.966026452980673e-05, "loss": 0.0618, "step": 992 }, { "epoch": 0.26, "learning_rate": 9.96586071533625e-05, "loss": 0.0805, "step": 993 }, { "epoch": 0.26, "learning_rate": 9.965694575789859e-05, "loss": 0.0638, "step": 994 }, { "epoch": 0.26, "learning_rate": 9.965528034354948e-05, "loss": 0.0693, "step": 995 }, { "epoch": 0.26, "learning_rate": 9.965361091044993e-05, "loss": 0.0757, "step": 996 }, { "epoch": 0.26, "learning_rate": 9.965193745873508e-05, "loss": 0.0662, "step": 997 }, { "epoch": 0.26, "learning_rate": 9.965025998854034e-05, "loss": 0.0768, "step": 998 }, { "epoch": 0.26, "learning_rate": 9.964857850000149e-05, "loss": 0.0671, "step": 999 }, { "epoch": 0.26, "learning_rate": 9.964689299325462e-05, "loss": 0.0763, "step": 1000 }, { "epoch": 0.26, "learning_rate": 9.964520346843614e-05, "loss": 0.0572, "step": 1001 }, { "epoch": 0.26, "learning_rate": 9.964350992568277e-05, "loss": 0.078, "step": 1002 }, { "epoch": 0.26, "learning_rate": 9.96418123651316e-05, "loss": 0.0834, "step": 1003 }, { "epoch": 0.26, "learning_rate": 9.964011078692002e-05, "loss": 0.0821, "step": 1004 }, { "epoch": 0.26, "learning_rate": 9.96384051911857e-05, "loss": 0.0557, "step": 1005 }, { "epoch": 0.26, "learning_rate": 9.963669557806674e-05, "loss": 0.0882, "step": 1006 }, { "epoch": 0.26, "learning_rate": 9.963498194770146e-05, "loss": 0.0821, "step": 1007 }, { "epoch": 0.26, "learning_rate": 9.963326430022857e-05, "loss": 0.0781, "step": 1008 }, { "epoch": 0.26, "learning_rate": 9.963154263578706e-05, "loss": 0.053, "step": 1009 }, { "epoch": 0.26, "learning_rate": 9.962981695451632e-05, "loss": 0.0827, "step": 1010 }, { "epoch": 0.26, "learning_rate": 9.962808725655597e-05, "loss": 0.0709, "step": 1011 }, { "epoch": 0.26, "learning_rate": 9.962635354204599e-05, "loss": 0.0825, "step": 1012 }, { "epoch": 0.26, "learning_rate": 9.962461581112675e-05, "loss": 0.0896, "step": 1013 }, { "epoch": 0.26, "learning_rate": 9.962287406393884e-05, "loss": 0.0571, "step": 1014 }, { "epoch": 0.26, "learning_rate": 9.962112830062323e-05, "loss": 0.0905, "step": 1015 }, { "epoch": 0.26, "learning_rate": 9.961937852132124e-05, "loss": 0.0607, "step": 1016 }, { "epoch": 0.26, "learning_rate": 9.961762472617447e-05, "loss": 0.0783, "step": 1017 }, { "epoch": 0.26, "learning_rate": 9.961586691532483e-05, "loss": 0.0811, "step": 1018 }, { "epoch": 0.26, "learning_rate": 9.961410508891462e-05, "loss": 0.0647, "step": 1019 }, { "epoch": 0.26, "learning_rate": 9.961233924708644e-05, "loss": 0.0647, "step": 1020 }, { "epoch": 0.26, "learning_rate": 9.961056938998316e-05, "loss": 0.0733, "step": 1021 }, { "epoch": 0.26, "learning_rate": 9.960879551774807e-05, "loss": 0.0863, "step": 1022 }, { "epoch": 0.26, "learning_rate": 9.96070176305247e-05, "loss": 0.0649, "step": 1023 }, { "epoch": 0.26, "learning_rate": 9.960523572845694e-05, "loss": 0.0662, "step": 1024 }, { "epoch": 0.26, "learning_rate": 9.960344981168902e-05, "loss": 0.056, "step": 1025 }, { "epoch": 0.26, "learning_rate": 9.960165988036548e-05, "loss": 0.0528, "step": 1026 }, { "epoch": 0.27, "learning_rate": 9.959986593463118e-05, "loss": 0.0728, "step": 1027 }, { "epoch": 0.27, "learning_rate": 9.95980679746313e-05, "loss": 0.0537, "step": 1028 }, { "epoch": 0.27, "learning_rate": 9.959626600051136e-05, "loss": 0.0598, "step": 1029 }, { "epoch": 0.27, "learning_rate": 9.959446001241722e-05, "loss": 0.0561, "step": 1030 }, { "epoch": 0.27, "learning_rate": 9.9592650010495e-05, "loss": 0.052, "step": 1031 }, { "epoch": 0.27, "learning_rate": 9.959083599489123e-05, "loss": 0.0695, "step": 1032 }, { "epoch": 0.27, "learning_rate": 9.958901796575269e-05, "loss": 0.0884, "step": 1033 }, { "epoch": 0.27, "learning_rate": 9.958719592322656e-05, "loss": 0.0684, "step": 1034 }, { "epoch": 0.27, "learning_rate": 9.958536986746027e-05, "loss": 0.0776, "step": 1035 }, { "epoch": 0.27, "learning_rate": 9.958353979860162e-05, "loss": 0.0592, "step": 1036 }, { "epoch": 0.27, "learning_rate": 9.958170571679873e-05, "loss": 0.0621, "step": 1037 }, { "epoch": 0.27, "learning_rate": 9.957986762220001e-05, "loss": 0.0641, "step": 1038 }, { "epoch": 0.27, "learning_rate": 9.957802551495426e-05, "loss": 0.068, "step": 1039 }, { "epoch": 0.27, "learning_rate": 9.957617939521052e-05, "loss": 0.0613, "step": 1040 }, { "epoch": 0.27, "learning_rate": 9.957432926311825e-05, "loss": 0.0756, "step": 1041 }, { "epoch": 0.27, "learning_rate": 9.957247511882716e-05, "loss": 0.0817, "step": 1042 }, { "epoch": 0.27, "learning_rate": 9.957061696248733e-05, "loss": 0.0855, "step": 1043 }, { "epoch": 0.27, "learning_rate": 9.956875479424914e-05, "loss": 0.0668, "step": 1044 }, { "epoch": 0.27, "learning_rate": 9.956688861426328e-05, "loss": 0.0431, "step": 1045 }, { "epoch": 0.27, "learning_rate": 9.95650184226808e-05, "loss": 0.0769, "step": 1046 }, { "epoch": 0.27, "learning_rate": 9.956314421965307e-05, "loss": 0.0665, "step": 1047 }, { "epoch": 0.27, "learning_rate": 9.956126600533177e-05, "loss": 0.0626, "step": 1048 }, { "epoch": 0.27, "learning_rate": 9.95593837798689e-05, "loss": 0.0628, "step": 1049 }, { "epoch": 0.27, "learning_rate": 9.95574975434168e-05, "loss": 0.0646, "step": 1050 }, { "epoch": 0.27, "learning_rate": 9.955560729612813e-05, "loss": 0.0734, "step": 1051 }, { "epoch": 0.27, "learning_rate": 9.955371303815586e-05, "loss": 0.0708, "step": 1052 }, { "epoch": 0.27, "learning_rate": 9.955181476965333e-05, "loss": 0.0829, "step": 1053 }, { "epoch": 0.27, "learning_rate": 9.954991249077415e-05, "loss": 0.0443, "step": 1054 }, { "epoch": 0.27, "learning_rate": 9.954800620167226e-05, "loss": 0.0582, "step": 1055 }, { "epoch": 0.27, "learning_rate": 9.954609590250198e-05, "loss": 0.0792, "step": 1056 }, { "epoch": 0.27, "learning_rate": 9.95441815934179e-05, "loss": 0.0825, "step": 1057 }, { "epoch": 0.27, "learning_rate": 9.954226327457493e-05, "loss": 0.0759, "step": 1058 }, { "epoch": 0.27, "learning_rate": 9.954034094612836e-05, "loss": 0.0524, "step": 1059 }, { "epoch": 0.27, "learning_rate": 9.953841460823374e-05, "loss": 0.0742, "step": 1060 }, { "epoch": 0.27, "learning_rate": 9.9536484261047e-05, "loss": 0.059, "step": 1061 }, { "epoch": 0.27, "learning_rate": 9.953454990472435e-05, "loss": 0.0494, "step": 1062 }, { "epoch": 0.27, "learning_rate": 9.953261153942235e-05, "loss": 0.0775, "step": 1063 }, { "epoch": 0.27, "learning_rate": 9.953066916529787e-05, "loss": 0.0554, "step": 1064 }, { "epoch": 0.27, "learning_rate": 9.952872278250813e-05, "loss": 0.0595, "step": 1065 }, { "epoch": 0.28, "learning_rate": 9.952677239121064e-05, "loss": 0.0605, "step": 1066 }, { "epoch": 0.28, "learning_rate": 9.952481799156328e-05, "loss": 0.0414, "step": 1067 }, { "epoch": 0.28, "learning_rate": 9.952285958372419e-05, "loss": 0.067, "step": 1068 }, { "epoch": 0.28, "learning_rate": 9.952089716785187e-05, "loss": 0.085, "step": 1069 }, { "epoch": 0.28, "learning_rate": 9.951893074410515e-05, "loss": 0.0671, "step": 1070 }, { "epoch": 0.28, "learning_rate": 9.951696031264321e-05, "loss": 0.0694, "step": 1071 }, { "epoch": 0.28, "learning_rate": 9.951498587362549e-05, "loss": 0.0586, "step": 1072 }, { "epoch": 0.28, "learning_rate": 9.95130074272118e-05, "loss": 0.0493, "step": 1073 }, { "epoch": 0.28, "learning_rate": 9.951102497356224e-05, "loss": 0.0572, "step": 1074 }, { "epoch": 0.28, "learning_rate": 9.950903851283729e-05, "loss": 0.0637, "step": 1075 }, { "epoch": 0.28, "learning_rate": 9.950704804519768e-05, "loss": 0.0724, "step": 1076 }, { "epoch": 0.28, "learning_rate": 9.950505357080453e-05, "loss": 0.059, "step": 1077 }, { "epoch": 0.28, "learning_rate": 9.950305508981928e-05, "loss": 0.0621, "step": 1078 }, { "epoch": 0.28, "learning_rate": 9.950105260240363e-05, "loss": 0.0622, "step": 1079 }, { "epoch": 0.28, "learning_rate": 9.949904610871968e-05, "loss": 0.0554, "step": 1080 }, { "epoch": 0.28, "learning_rate": 9.94970356089298e-05, "loss": 0.0659, "step": 1081 }, { "epoch": 0.28, "learning_rate": 9.949502110319673e-05, "loss": 0.0712, "step": 1082 }, { "epoch": 0.28, "learning_rate": 9.949300259168346e-05, "loss": 0.0574, "step": 1083 }, { "epoch": 0.28, "learning_rate": 9.949098007455341e-05, "loss": 0.0633, "step": 1084 }, { "epoch": 0.28, "learning_rate": 9.948895355197026e-05, "loss": 0.0693, "step": 1085 }, { "epoch": 0.28, "learning_rate": 9.9486923024098e-05, "loss": 0.0736, "step": 1086 }, { "epoch": 0.28, "learning_rate": 9.948488849110096e-05, "loss": 0.0687, "step": 1087 }, { "epoch": 0.28, "learning_rate": 9.948284995314383e-05, "loss": 0.0524, "step": 1088 }, { "epoch": 0.28, "learning_rate": 9.948080741039157e-05, "loss": 0.0746, "step": 1089 }, { "epoch": 0.28, "learning_rate": 9.947876086300952e-05, "loss": 0.0572, "step": 1090 }, { "epoch": 0.28, "learning_rate": 9.947671031116328e-05, "loss": 0.0467, "step": 1091 }, { "epoch": 0.28, "learning_rate": 9.947465575501882e-05, "loss": 0.072, "step": 1092 }, { "epoch": 0.28, "learning_rate": 9.947259719474243e-05, "loss": 0.0565, "step": 1093 }, { "epoch": 0.28, "learning_rate": 9.94705346305007e-05, "loss": 0.0541, "step": 1094 }, { "epoch": 0.28, "learning_rate": 9.946846806246058e-05, "loss": 0.0617, "step": 1095 }, { "epoch": 0.28, "learning_rate": 9.946639749078929e-05, "loss": 0.0748, "step": 1096 }, { "epoch": 0.28, "learning_rate": 9.946432291565444e-05, "loss": 0.0647, "step": 1097 }, { "epoch": 0.28, "learning_rate": 9.946224433722393e-05, "loss": 0.0719, "step": 1098 }, { "epoch": 0.28, "learning_rate": 9.946016175566598e-05, "loss": 0.0683, "step": 1099 }, { "epoch": 0.28, "learning_rate": 9.945807517114911e-05, "loss": 0.0606, "step": 1100 }, { "epoch": 0.28, "learning_rate": 9.945598458384222e-05, "loss": 0.0778, "step": 1101 }, { "epoch": 0.28, "learning_rate": 9.945388999391453e-05, "loss": 0.0882, "step": 1102 }, { "epoch": 0.28, "learning_rate": 9.945179140153553e-05, "loss": 0.0903, "step": 1103 }, { "epoch": 0.28, "learning_rate": 9.944968880687507e-05, "loss": 0.0769, "step": 1104 }, { "epoch": 0.29, "learning_rate": 9.94475822101033e-05, "loss": 0.077, "step": 1105 }, { "epoch": 0.29, "learning_rate": 9.944547161139078e-05, "loss": 0.0501, "step": 1106 }, { "epoch": 0.29, "learning_rate": 9.944335701090824e-05, "loss": 0.092, "step": 1107 }, { "epoch": 0.29, "learning_rate": 9.944123840882689e-05, "loss": 0.0779, "step": 1108 }, { "epoch": 0.29, "learning_rate": 9.943911580531816e-05, "loss": 0.0744, "step": 1109 }, { "epoch": 0.29, "learning_rate": 9.943698920055384e-05, "loss": 0.0934, "step": 1110 }, { "epoch": 0.29, "learning_rate": 9.943485859470606e-05, "loss": 0.0682, "step": 1111 }, { "epoch": 0.29, "learning_rate": 9.943272398794725e-05, "loss": 0.0762, "step": 1112 }, { "epoch": 0.29, "learning_rate": 9.943058538045016e-05, "loss": 0.0698, "step": 1113 }, { "epoch": 0.29, "learning_rate": 9.942844277238787e-05, "loss": 0.0735, "step": 1114 }, { "epoch": 0.29, "learning_rate": 9.942629616393381e-05, "loss": 0.0708, "step": 1115 }, { "epoch": 0.29, "learning_rate": 9.94241455552617e-05, "loss": 0.0744, "step": 1116 }, { "epoch": 0.29, "learning_rate": 9.942199094654559e-05, "loss": 0.0669, "step": 1117 }, { "epoch": 0.29, "learning_rate": 9.941983233795984e-05, "loss": 0.0785, "step": 1118 }, { "epoch": 0.29, "learning_rate": 9.941766972967921e-05, "loss": 0.0714, "step": 1119 }, { "epoch": 0.29, "learning_rate": 9.941550312187868e-05, "loss": 0.067, "step": 1120 }, { "epoch": 0.29, "learning_rate": 9.941333251473362e-05, "loss": 0.0687, "step": 1121 }, { "epoch": 0.29, "learning_rate": 9.941115790841969e-05, "loss": 0.057, "step": 1122 }, { "epoch": 0.29, "learning_rate": 9.940897930311288e-05, "loss": 0.067, "step": 1123 }, { "epoch": 0.29, "learning_rate": 9.940679669898954e-05, "loss": 0.0567, "step": 1124 }, { "epoch": 0.29, "learning_rate": 9.940461009622628e-05, "loss": 0.0865, "step": 1125 }, { "epoch": 0.29, "learning_rate": 9.94024194950001e-05, "loss": 0.055, "step": 1126 }, { "epoch": 0.29, "learning_rate": 9.940022489548829e-05, "loss": 0.0618, "step": 1127 }, { "epoch": 0.29, "learning_rate": 9.939802629786845e-05, "loss": 0.0574, "step": 1128 }, { "epoch": 0.29, "learning_rate": 9.939582370231852e-05, "loss": 0.0926, "step": 1129 }, { "epoch": 0.29, "learning_rate": 9.939361710901676e-05, "loss": 0.0604, "step": 1130 }, { "epoch": 0.29, "learning_rate": 9.939140651814176e-05, "loss": 0.0529, "step": 1131 }, { "epoch": 0.29, "learning_rate": 9.938919192987245e-05, "loss": 0.0573, "step": 1132 }, { "epoch": 0.29, "learning_rate": 9.938697334438804e-05, "loss": 0.0684, "step": 1133 }, { "epoch": 0.29, "learning_rate": 9.938475076186809e-05, "loss": 0.0741, "step": 1134 }, { "epoch": 0.29, "learning_rate": 9.938252418249247e-05, "loss": 0.075, "step": 1135 }, { "epoch": 0.29, "learning_rate": 9.93802936064414e-05, "loss": 0.0598, "step": 1136 }, { "epoch": 0.29, "learning_rate": 9.937805903389543e-05, "loss": 0.0733, "step": 1137 }, { "epoch": 0.29, "learning_rate": 9.937582046503535e-05, "loss": 0.0426, "step": 1138 }, { "epoch": 0.29, "learning_rate": 9.93735779000424e-05, "loss": 0.055, "step": 1139 }, { "epoch": 0.29, "learning_rate": 9.937133133909801e-05, "loss": 0.0697, "step": 1140 }, { "epoch": 0.29, "learning_rate": 9.936908078238407e-05, "loss": 0.0851, "step": 1141 }, { "epoch": 0.29, "learning_rate": 9.936682623008268e-05, "loss": 0.0543, "step": 1142 }, { "epoch": 0.29, "learning_rate": 9.936456768237632e-05, "loss": 0.0656, "step": 1143 }, { "epoch": 0.3, "learning_rate": 9.936230513944777e-05, "loss": 0.061, "step": 1144 }, { "epoch": 0.3, "learning_rate": 9.936003860148015e-05, "loss": 0.0554, "step": 1145 }, { "epoch": 0.3, "learning_rate": 9.935776806865692e-05, "loss": 0.0705, "step": 1146 }, { "epoch": 0.3, "learning_rate": 9.935549354116181e-05, "loss": 0.0553, "step": 1147 }, { "epoch": 0.3, "learning_rate": 9.935321501917893e-05, "loss": 0.0558, "step": 1148 }, { "epoch": 0.3, "learning_rate": 9.935093250289265e-05, "loss": 0.059, "step": 1149 }, { "epoch": 0.3, "learning_rate": 9.934864599248774e-05, "loss": 0.0714, "step": 1150 }, { "epoch": 0.3, "learning_rate": 9.934635548814926e-05, "loss": 0.0748, "step": 1151 }, { "epoch": 0.3, "learning_rate": 9.934406099006253e-05, "loss": 0.0795, "step": 1152 }, { "epoch": 0.3, "learning_rate": 9.934176249841331e-05, "loss": 0.0788, "step": 1153 }, { "epoch": 0.3, "learning_rate": 9.93394600133876e-05, "loss": 0.0893, "step": 1154 }, { "epoch": 0.3, "learning_rate": 9.933715353517175e-05, "loss": 0.0699, "step": 1155 }, { "epoch": 0.3, "learning_rate": 9.933484306395242e-05, "loss": 0.0748, "step": 1156 }, { "epoch": 0.3, "learning_rate": 9.933252859991662e-05, "loss": 0.0523, "step": 1157 }, { "epoch": 0.3, "learning_rate": 9.933021014325166e-05, "loss": 0.0689, "step": 1158 }, { "epoch": 0.3, "learning_rate": 9.932788769414517e-05, "loss": 0.0654, "step": 1159 }, { "epoch": 0.3, "learning_rate": 9.932556125278512e-05, "loss": 0.0807, "step": 1160 }, { "epoch": 0.3, "learning_rate": 9.93232308193598e-05, "loss": 0.0617, "step": 1161 }, { "epoch": 0.3, "learning_rate": 9.932089639405782e-05, "loss": 0.0573, "step": 1162 }, { "epoch": 0.3, "learning_rate": 9.93185579770681e-05, "loss": 0.0761, "step": 1163 }, { "epoch": 0.3, "learning_rate": 9.931621556857991e-05, "loss": 0.0666, "step": 1164 }, { "epoch": 0.3, "learning_rate": 9.931386916878281e-05, "loss": 0.0742, "step": 1165 }, { "epoch": 0.3, "learning_rate": 9.931151877786672e-05, "loss": 0.058, "step": 1166 }, { "epoch": 0.3, "learning_rate": 9.930916439602184e-05, "loss": 0.0611, "step": 1167 }, { "epoch": 0.3, "learning_rate": 9.930680602343876e-05, "loss": 0.0672, "step": 1168 }, { "epoch": 0.3, "learning_rate": 9.930444366030832e-05, "loss": 0.0594, "step": 1169 }, { "epoch": 0.3, "learning_rate": 9.93020773068217e-05, "loss": 0.0694, "step": 1170 }, { "epoch": 0.3, "learning_rate": 9.929970696317044e-05, "loss": 0.0468, "step": 1171 }, { "epoch": 0.3, "learning_rate": 9.929733262954638e-05, "loss": 0.0755, "step": 1172 }, { "epoch": 0.3, "learning_rate": 9.929495430614167e-05, "loss": 0.0586, "step": 1173 }, { "epoch": 0.3, "learning_rate": 9.929257199314879e-05, "loss": 0.0564, "step": 1174 }, { "epoch": 0.3, "learning_rate": 9.929018569076056e-05, "loss": 0.0411, "step": 1175 }, { "epoch": 0.3, "learning_rate": 9.928779539917011e-05, "loss": 0.0584, "step": 1176 }, { "epoch": 0.3, "learning_rate": 9.92854011185709e-05, "loss": 0.0672, "step": 1177 }, { "epoch": 0.3, "learning_rate": 9.928300284915668e-05, "loss": 0.0638, "step": 1178 }, { "epoch": 0.3, "learning_rate": 9.928060059112157e-05, "loss": 0.08, "step": 1179 }, { "epoch": 0.3, "learning_rate": 9.927819434465999e-05, "loss": 0.0672, "step": 1180 }, { "epoch": 0.3, "learning_rate": 9.927578410996667e-05, "loss": 0.058, "step": 1181 }, { "epoch": 0.31, "learning_rate": 9.927336988723671e-05, "loss": 0.0869, "step": 1182 }, { "epoch": 0.31, "learning_rate": 9.927095167666547e-05, "loss": 0.0657, "step": 1183 }, { "epoch": 0.31, "learning_rate": 9.926852947844867e-05, "loss": 0.0872, "step": 1184 }, { "epoch": 0.31, "learning_rate": 9.926610329278236e-05, "loss": 0.0706, "step": 1185 }, { "epoch": 0.31, "learning_rate": 9.926367311986288e-05, "loss": 0.0811, "step": 1186 }, { "epoch": 0.31, "learning_rate": 9.926123895988692e-05, "loss": 0.0514, "step": 1187 }, { "epoch": 0.31, "learning_rate": 9.925880081305148e-05, "loss": 0.0587, "step": 1188 }, { "epoch": 0.31, "learning_rate": 9.92563586795539e-05, "loss": 0.0851, "step": 1189 }, { "epoch": 0.31, "learning_rate": 9.92539125595918e-05, "loss": 0.0651, "step": 1190 }, { "epoch": 0.31, "learning_rate": 9.925146245336318e-05, "loss": 0.0696, "step": 1191 }, { "epoch": 0.31, "learning_rate": 9.924900836106633e-05, "loss": 0.0603, "step": 1192 }, { "epoch": 0.31, "learning_rate": 9.924655028289985e-05, "loss": 0.062, "step": 1193 }, { "epoch": 0.31, "learning_rate": 9.92440882190627e-05, "loss": 0.0817, "step": 1194 }, { "epoch": 0.31, "learning_rate": 9.924162216975412e-05, "loss": 0.0772, "step": 1195 }, { "epoch": 0.31, "learning_rate": 9.923915213517372e-05, "loss": 0.0598, "step": 1196 }, { "epoch": 0.31, "learning_rate": 9.923667811552139e-05, "loss": 0.0526, "step": 1197 }, { "epoch": 0.31, "learning_rate": 9.923420011099736e-05, "loss": 0.0806, "step": 1198 }, { "epoch": 0.31, "learning_rate": 9.923171812180219e-05, "loss": 0.0713, "step": 1199 }, { "epoch": 0.31, "learning_rate": 9.922923214813675e-05, "loss": 0.0652, "step": 1200 }, { "epoch": 0.31, "learning_rate": 9.922674219020224e-05, "loss": 0.086, "step": 1201 }, { "epoch": 0.31, "learning_rate": 9.92242482482002e-05, "loss": 0.0802, "step": 1202 }, { "epoch": 0.31, "learning_rate": 9.922175032233243e-05, "loss": 0.0802, "step": 1203 }, { "epoch": 0.31, "learning_rate": 9.921924841280113e-05, "loss": 0.098, "step": 1204 }, { "epoch": 0.31, "learning_rate": 9.921674251980876e-05, "loss": 0.0756, "step": 1205 }, { "epoch": 0.31, "learning_rate": 9.921423264355815e-05, "loss": 0.0805, "step": 1206 }, { "epoch": 0.31, "learning_rate": 9.921171878425241e-05, "loss": 0.0733, "step": 1207 }, { "epoch": 0.31, "learning_rate": 9.920920094209503e-05, "loss": 0.0746, "step": 1208 }, { "epoch": 0.31, "learning_rate": 9.920667911728976e-05, "loss": 0.0634, "step": 1209 }, { "epoch": 0.31, "learning_rate": 9.92041533100407e-05, "loss": 0.0477, "step": 1210 }, { "epoch": 0.31, "learning_rate": 9.920162352055226e-05, "loss": 0.0749, "step": 1211 }, { "epoch": 0.31, "learning_rate": 9.919908974902922e-05, "loss": 0.094, "step": 1212 }, { "epoch": 0.31, "learning_rate": 9.919655199567662e-05, "loss": 0.0683, "step": 1213 }, { "epoch": 0.31, "learning_rate": 9.919401026069983e-05, "loss": 0.0929, "step": 1214 }, { "epoch": 0.31, "learning_rate": 9.919146454430461e-05, "loss": 0.087, "step": 1215 }, { "epoch": 0.31, "learning_rate": 9.918891484669695e-05, "loss": 0.0689, "step": 1216 }, { "epoch": 0.31, "learning_rate": 9.918636116808322e-05, "loss": 0.0806, "step": 1217 }, { "epoch": 0.31, "learning_rate": 9.91838035086701e-05, "loss": 0.0768, "step": 1218 }, { "epoch": 0.31, "learning_rate": 9.91812418686646e-05, "loss": 0.0652, "step": 1219 }, { "epoch": 0.31, "learning_rate": 9.917867624827399e-05, "loss": 0.0648, "step": 1220 }, { "epoch": 0.32, "learning_rate": 9.917610664770598e-05, "loss": 0.0637, "step": 1221 }, { "epoch": 0.32, "learning_rate": 9.91735330671685e-05, "loss": 0.0533, "step": 1222 }, { "epoch": 0.32, "learning_rate": 9.917095550686983e-05, "loss": 0.057, "step": 1223 }, { "epoch": 0.32, "learning_rate": 9.91683739670186e-05, "loss": 0.0615, "step": 1224 }, { "epoch": 0.32, "learning_rate": 9.916578844782374e-05, "loss": 0.0688, "step": 1225 }, { "epoch": 0.32, "learning_rate": 9.916319894949448e-05, "loss": 0.0622, "step": 1226 }, { "epoch": 0.32, "learning_rate": 9.916060547224044e-05, "loss": 0.0647, "step": 1227 }, { "epoch": 0.32, "learning_rate": 9.915800801627147e-05, "loss": 0.061, "step": 1228 }, { "epoch": 0.32, "learning_rate": 9.915540658179783e-05, "loss": 0.0853, "step": 1229 }, { "epoch": 0.32, "learning_rate": 9.915280116903003e-05, "loss": 0.0753, "step": 1230 }, { "epoch": 0.32, "learning_rate": 9.915019177817895e-05, "loss": 0.051, "step": 1231 }, { "epoch": 0.32, "learning_rate": 9.914757840945577e-05, "loss": 0.0577, "step": 1232 }, { "epoch": 0.32, "learning_rate": 9.9144961063072e-05, "loss": 0.0714, "step": 1233 }, { "epoch": 0.32, "learning_rate": 9.914233973923947e-05, "loss": 0.0515, "step": 1234 }, { "epoch": 0.32, "learning_rate": 9.913971443817034e-05, "loss": 0.0532, "step": 1235 }, { "epoch": 0.32, "learning_rate": 9.913708516007708e-05, "loss": 0.0803, "step": 1236 }, { "epoch": 0.32, "learning_rate": 9.913445190517248e-05, "loss": 0.0468, "step": 1237 }, { "epoch": 0.32, "learning_rate": 9.913181467366964e-05, "loss": 0.0573, "step": 1238 }, { "epoch": 0.32, "learning_rate": 9.912917346578203e-05, "loss": 0.086, "step": 1239 }, { "epoch": 0.32, "learning_rate": 9.912652828172339e-05, "loss": 0.0404, "step": 1240 }, { "epoch": 0.32, "learning_rate": 9.912387912170782e-05, "loss": 0.072, "step": 1241 }, { "epoch": 0.32, "learning_rate": 9.912122598594972e-05, "loss": 0.0671, "step": 1242 }, { "epoch": 0.32, "learning_rate": 9.91185688746638e-05, "loss": 0.0581, "step": 1243 }, { "epoch": 0.32, "learning_rate": 9.911590778806513e-05, "loss": 0.0742, "step": 1244 }, { "epoch": 0.32, "learning_rate": 9.911324272636906e-05, "loss": 0.0655, "step": 1245 }, { "epoch": 0.32, "learning_rate": 9.911057368979129e-05, "loss": 0.0704, "step": 1246 }, { "epoch": 0.32, "learning_rate": 9.910790067854783e-05, "loss": 0.0688, "step": 1247 }, { "epoch": 0.32, "learning_rate": 9.910522369285503e-05, "loss": 0.0588, "step": 1248 }, { "epoch": 0.32, "learning_rate": 9.910254273292952e-05, "loss": 0.0798, "step": 1249 }, { "epoch": 0.32, "learning_rate": 9.909985779898832e-05, "loss": 0.0583, "step": 1250 }, { "epoch": 0.32, "learning_rate": 9.909716889124867e-05, "loss": 0.071, "step": 1251 }, { "epoch": 0.32, "learning_rate": 9.909447600992825e-05, "loss": 0.06, "step": 1252 }, { "epoch": 0.32, "learning_rate": 9.909177915524495e-05, "loss": 0.0624, "step": 1253 }, { "epoch": 0.32, "learning_rate": 9.908907832741709e-05, "loss": 0.0791, "step": 1254 }, { "epoch": 0.32, "learning_rate": 9.90863735266632e-05, "loss": 0.0808, "step": 1255 }, { "epoch": 0.32, "learning_rate": 9.908366475320223e-05, "loss": 0.0553, "step": 1256 }, { "epoch": 0.32, "learning_rate": 9.90809520072534e-05, "loss": 0.0605, "step": 1257 }, { "epoch": 0.32, "learning_rate": 9.907823528903626e-05, "loss": 0.0631, "step": 1258 }, { "epoch": 0.32, "learning_rate": 9.907551459877066e-05, "loss": 0.0716, "step": 1259 }, { "epoch": 0.33, "learning_rate": 9.907278993667681e-05, "loss": 0.0876, "step": 1260 }, { "epoch": 0.33, "learning_rate": 9.907006130297524e-05, "loss": 0.0654, "step": 1261 }, { "epoch": 0.33, "learning_rate": 9.906732869788676e-05, "loss": 0.0645, "step": 1262 }, { "epoch": 0.33, "learning_rate": 9.906459212163255e-05, "loss": 0.072, "step": 1263 }, { "epoch": 0.33, "learning_rate": 9.906185157443406e-05, "loss": 0.0696, "step": 1264 }, { "epoch": 0.33, "learning_rate": 9.905910705651314e-05, "loss": 0.0662, "step": 1265 }, { "epoch": 0.33, "learning_rate": 9.905635856809185e-05, "loss": 0.0594, "step": 1266 }, { "epoch": 0.33, "learning_rate": 9.90536061093927e-05, "loss": 0.0411, "step": 1267 }, { "epoch": 0.33, "learning_rate": 9.90508496806384e-05, "loss": 0.0765, "step": 1268 }, { "epoch": 0.33, "learning_rate": 9.904808928205205e-05, "loss": 0.0532, "step": 1269 }, { "epoch": 0.33, "learning_rate": 9.904532491385707e-05, "loss": 0.0565, "step": 1270 }, { "epoch": 0.33, "learning_rate": 9.904255657627719e-05, "loss": 0.0573, "step": 1271 }, { "epoch": 0.33, "learning_rate": 9.903978426953644e-05, "loss": 0.0606, "step": 1272 }, { "epoch": 0.33, "learning_rate": 9.903700799385922e-05, "loss": 0.0627, "step": 1273 }, { "epoch": 0.33, "learning_rate": 9.903422774947019e-05, "loss": 0.0773, "step": 1274 }, { "epoch": 0.33, "learning_rate": 9.903144353659438e-05, "loss": 0.0832, "step": 1275 }, { "epoch": 0.33, "learning_rate": 9.902865535545713e-05, "loss": 0.0483, "step": 1276 }, { "epoch": 0.33, "learning_rate": 9.902586320628409e-05, "loss": 0.0753, "step": 1277 }, { "epoch": 0.33, "learning_rate": 9.902306708930123e-05, "loss": 0.0421, "step": 1278 }, { "epoch": 0.33, "learning_rate": 9.902026700473486e-05, "loss": 0.0795, "step": 1279 }, { "epoch": 0.33, "learning_rate": 9.901746295281159e-05, "loss": 0.0495, "step": 1280 }, { "epoch": 0.33, "learning_rate": 9.901465493375836e-05, "loss": 0.0412, "step": 1281 }, { "epoch": 0.33, "learning_rate": 9.901184294780245e-05, "loss": 0.0679, "step": 1282 }, { "epoch": 0.33, "learning_rate": 9.900902699517141e-05, "loss": 0.0732, "step": 1283 }, { "epoch": 0.33, "learning_rate": 9.900620707609318e-05, "loss": 0.0727, "step": 1284 }, { "epoch": 0.33, "learning_rate": 9.900338319079595e-05, "loss": 0.0807, "step": 1285 }, { "epoch": 0.33, "learning_rate": 9.90005553395083e-05, "loss": 0.068, "step": 1286 }, { "epoch": 0.33, "learning_rate": 9.899772352245907e-05, "loss": 0.0817, "step": 1287 }, { "epoch": 0.33, "learning_rate": 9.899488773987744e-05, "loss": 0.0834, "step": 1288 }, { "epoch": 0.33, "learning_rate": 9.899204799199294e-05, "loss": 0.0798, "step": 1289 }, { "epoch": 0.33, "learning_rate": 9.89892042790354e-05, "loss": 0.0664, "step": 1290 }, { "epoch": 0.33, "learning_rate": 9.898635660123497e-05, "loss": 0.0688, "step": 1291 }, { "epoch": 0.33, "learning_rate": 9.89835049588221e-05, "loss": 0.0671, "step": 1292 }, { "epoch": 0.33, "learning_rate": 9.89806493520276e-05, "loss": 0.0584, "step": 1293 }, { "epoch": 0.33, "learning_rate": 9.897778978108257e-05, "loss": 0.0608, "step": 1294 }, { "epoch": 0.33, "learning_rate": 9.897492624621847e-05, "loss": 0.0696, "step": 1295 }, { "epoch": 0.33, "learning_rate": 9.897205874766702e-05, "loss": 0.0643, "step": 1296 }, { "epoch": 0.33, "learning_rate": 9.896918728566032e-05, "loss": 0.0733, "step": 1297 }, { "epoch": 0.33, "learning_rate": 9.896631186043078e-05, "loss": 0.0663, "step": 1298 }, { "epoch": 0.34, "learning_rate": 9.896343247221107e-05, "loss": 0.0667, "step": 1299 }, { "epoch": 0.34, "learning_rate": 9.896054912123424e-05, "loss": 0.0557, "step": 1300 }, { "epoch": 0.34, "learning_rate": 9.895766180773369e-05, "loss": 0.0743, "step": 1301 }, { "epoch": 0.34, "learning_rate": 9.895477053194306e-05, "loss": 0.1035, "step": 1302 }, { "epoch": 0.34, "learning_rate": 9.895187529409635e-05, "loss": 0.0905, "step": 1303 }, { "epoch": 0.34, "learning_rate": 9.89489760944279e-05, "loss": 0.0761, "step": 1304 }, { "epoch": 0.34, "learning_rate": 9.894607293317234e-05, "loss": 0.0779, "step": 1305 }, { "epoch": 0.34, "learning_rate": 9.894316581056464e-05, "loss": 0.0932, "step": 1306 }, { "epoch": 0.34, "learning_rate": 9.894025472684006e-05, "loss": 0.0861, "step": 1307 }, { "epoch": 0.34, "learning_rate": 9.893733968223422e-05, "loss": 0.0864, "step": 1308 }, { "epoch": 0.34, "learning_rate": 9.893442067698305e-05, "loss": 0.0671, "step": 1309 }, { "epoch": 0.34, "learning_rate": 9.893149771132275e-05, "loss": 0.0886, "step": 1310 }, { "epoch": 0.34, "learning_rate": 9.892857078548994e-05, "loss": 0.0786, "step": 1311 }, { "epoch": 0.34, "learning_rate": 9.89256398997215e-05, "loss": 0.0766, "step": 1312 }, { "epoch": 0.34, "learning_rate": 9.89227050542546e-05, "loss": 0.0852, "step": 1313 }, { "epoch": 0.34, "learning_rate": 9.89197662493268e-05, "loss": 0.0554, "step": 1314 }, { "epoch": 0.34, "learning_rate": 9.891682348517591e-05, "loss": 0.074, "step": 1315 }, { "epoch": 0.34, "learning_rate": 9.891387676204013e-05, "loss": 0.082, "step": 1316 }, { "epoch": 0.34, "learning_rate": 9.891092608015793e-05, "loss": 0.0551, "step": 1317 }, { "epoch": 0.34, "learning_rate": 9.890797143976813e-05, "loss": 0.0677, "step": 1318 }, { "epoch": 0.34, "learning_rate": 9.890501284110984e-05, "loss": 0.0716, "step": 1319 }, { "epoch": 0.34, "learning_rate": 9.890205028442252e-05, "loss": 0.0817, "step": 1320 }, { "epoch": 0.34, "learning_rate": 9.889908376994594e-05, "loss": 0.0719, "step": 1321 }, { "epoch": 0.34, "learning_rate": 9.889611329792018e-05, "loss": 0.0697, "step": 1322 }, { "epoch": 0.34, "learning_rate": 9.889313886858566e-05, "loss": 0.0698, "step": 1323 }, { "epoch": 0.34, "learning_rate": 9.889016048218311e-05, "loss": 0.0743, "step": 1324 }, { "epoch": 0.34, "learning_rate": 9.888717813895356e-05, "loss": 0.068, "step": 1325 }, { "epoch": 0.34, "learning_rate": 9.888419183913839e-05, "loss": 0.059, "step": 1326 }, { "epoch": 0.34, "learning_rate": 9.888120158297932e-05, "loss": 0.0606, "step": 1327 }, { "epoch": 0.34, "learning_rate": 9.887820737071832e-05, "loss": 0.0478, "step": 1328 }, { "epoch": 0.34, "learning_rate": 9.887520920259773e-05, "loss": 0.0753, "step": 1329 }, { "epoch": 0.34, "learning_rate": 9.88722070788602e-05, "loss": 0.0979, "step": 1330 }, { "epoch": 0.34, "learning_rate": 9.886920099974872e-05, "loss": 0.0648, "step": 1331 }, { "epoch": 0.34, "learning_rate": 9.886619096550658e-05, "loss": 0.0592, "step": 1332 }, { "epoch": 0.34, "learning_rate": 9.886317697637735e-05, "loss": 0.0654, "step": 1333 }, { "epoch": 0.34, "learning_rate": 9.886015903260501e-05, "loss": 0.0806, "step": 1334 }, { "epoch": 0.34, "learning_rate": 9.885713713443379e-05, "loss": 0.0564, "step": 1335 }, { "epoch": 0.34, "learning_rate": 9.885411128210824e-05, "loss": 0.0749, "step": 1336 }, { "epoch": 0.35, "learning_rate": 9.88510814758733e-05, "loss": 0.0626, "step": 1337 }, { "epoch": 0.35, "learning_rate": 9.884804771597413e-05, "loss": 0.0715, "step": 1338 }, { "epoch": 0.35, "learning_rate": 9.884501000265632e-05, "loss": 0.0746, "step": 1339 }, { "epoch": 0.35, "learning_rate": 9.884196833616566e-05, "loss": 0.0491, "step": 1340 }, { "epoch": 0.35, "learning_rate": 9.883892271674836e-05, "loss": 0.0888, "step": 1341 }, { "epoch": 0.35, "learning_rate": 9.88358731446509e-05, "loss": 0.0596, "step": 1342 }, { "epoch": 0.35, "learning_rate": 9.883281962012007e-05, "loss": 0.0668, "step": 1343 }, { "epoch": 0.35, "learning_rate": 9.882976214340305e-05, "loss": 0.0721, "step": 1344 }, { "epoch": 0.35, "learning_rate": 9.882670071474726e-05, "loss": 0.0698, "step": 1345 }, { "epoch": 0.35, "learning_rate": 9.882363533440047e-05, "loss": 0.0622, "step": 1346 }, { "epoch": 0.35, "learning_rate": 9.882056600261075e-05, "loss": 0.0785, "step": 1347 }, { "epoch": 0.35, "learning_rate": 9.881749271962656e-05, "loss": 0.0577, "step": 1348 }, { "epoch": 0.35, "learning_rate": 9.881441548569658e-05, "loss": 0.057, "step": 1349 }, { "epoch": 0.35, "learning_rate": 9.88113343010699e-05, "loss": 0.0637, "step": 1350 }, { "epoch": 0.35, "learning_rate": 9.880824916599587e-05, "loss": 0.0687, "step": 1351 }, { "epoch": 0.35, "learning_rate": 9.880516008072419e-05, "loss": 0.0685, "step": 1352 }, { "epoch": 0.35, "learning_rate": 9.880206704550486e-05, "loss": 0.0699, "step": 1353 }, { "epoch": 0.35, "learning_rate": 9.87989700605882e-05, "loss": 0.0702, "step": 1354 }, { "epoch": 0.35, "learning_rate": 9.879586912622487e-05, "loss": 0.0649, "step": 1355 }, { "epoch": 0.35, "learning_rate": 9.879276424266584e-05, "loss": 0.0649, "step": 1356 }, { "epoch": 0.35, "learning_rate": 9.878965541016239e-05, "loss": 0.07, "step": 1357 }, { "epoch": 0.35, "learning_rate": 9.878654262896612e-05, "loss": 0.0721, "step": 1358 }, { "epoch": 0.35, "learning_rate": 9.878342589932898e-05, "loss": 0.0619, "step": 1359 }, { "epoch": 0.35, "learning_rate": 9.878030522150318e-05, "loss": 0.0595, "step": 1360 }, { "epoch": 0.35, "learning_rate": 9.877718059574134e-05, "loss": 0.0579, "step": 1361 }, { "epoch": 0.35, "learning_rate": 9.87740520222963e-05, "loss": 0.0534, "step": 1362 }, { "epoch": 0.35, "learning_rate": 9.877091950142128e-05, "loss": 0.0737, "step": 1363 }, { "epoch": 0.35, "learning_rate": 9.87677830333698e-05, "loss": 0.0739, "step": 1364 }, { "epoch": 0.35, "learning_rate": 9.876464261839572e-05, "loss": 0.0582, "step": 1365 }, { "epoch": 0.35, "learning_rate": 9.876149825675318e-05, "loss": 0.0742, "step": 1366 }, { "epoch": 0.35, "learning_rate": 9.875834994869668e-05, "loss": 0.0715, "step": 1367 }, { "epoch": 0.35, "learning_rate": 9.875519769448101e-05, "loss": 0.0701, "step": 1368 }, { "epoch": 0.35, "learning_rate": 9.87520414943613e-05, "loss": 0.0918, "step": 1369 }, { "epoch": 0.35, "learning_rate": 9.874888134859297e-05, "loss": 0.05, "step": 1370 }, { "epoch": 0.35, "learning_rate": 9.874571725743181e-05, "loss": 0.0949, "step": 1371 }, { "epoch": 0.35, "learning_rate": 9.874254922113389e-05, "loss": 0.0648, "step": 1372 }, { "epoch": 0.35, "learning_rate": 9.873937723995561e-05, "loss": 0.0677, "step": 1373 }, { "epoch": 0.35, "learning_rate": 9.873620131415366e-05, "loss": 0.0561, "step": 1374 }, { "epoch": 0.35, "learning_rate": 9.873302144398513e-05, "loss": 0.0548, "step": 1375 }, { "epoch": 0.36, "learning_rate": 9.872983762970732e-05, "loss": 0.0534, "step": 1376 }, { "epoch": 0.36, "learning_rate": 9.872664987157794e-05, "loss": 0.0831, "step": 1377 }, { "epoch": 0.36, "learning_rate": 9.872345816985498e-05, "loss": 0.0488, "step": 1378 }, { "epoch": 0.36, "learning_rate": 9.872026252479676e-05, "loss": 0.0614, "step": 1379 }, { "epoch": 0.36, "learning_rate": 9.87170629366619e-05, "loss": 0.0635, "step": 1380 }, { "epoch": 0.36, "learning_rate": 9.871385940570935e-05, "loss": 0.0623, "step": 1381 }, { "epoch": 0.36, "learning_rate": 9.871065193219838e-05, "loss": 0.0722, "step": 1382 }, { "epoch": 0.36, "learning_rate": 9.87074405163886e-05, "loss": 0.0677, "step": 1383 }, { "epoch": 0.36, "learning_rate": 9.870422515853989e-05, "loss": 0.0596, "step": 1384 }, { "epoch": 0.36, "learning_rate": 9.870100585891253e-05, "loss": 0.0637, "step": 1385 }, { "epoch": 0.36, "learning_rate": 9.869778261776701e-05, "loss": 0.0626, "step": 1386 }, { "epoch": 0.36, "learning_rate": 9.869455543536423e-05, "loss": 0.0581, "step": 1387 }, { "epoch": 0.36, "learning_rate": 9.869132431196535e-05, "loss": 0.0535, "step": 1388 }, { "epoch": 0.36, "learning_rate": 9.86880892478319e-05, "loss": 0.0575, "step": 1389 }, { "epoch": 0.36, "learning_rate": 9.86848502432257e-05, "loss": 0.0601, "step": 1390 }, { "epoch": 0.36, "learning_rate": 9.868160729840888e-05, "loss": 0.0599, "step": 1391 }, { "epoch": 0.36, "learning_rate": 9.867836041364391e-05, "loss": 0.0705, "step": 1392 }, { "epoch": 0.36, "learning_rate": 9.867510958919357e-05, "loss": 0.0736, "step": 1393 }, { "epoch": 0.36, "learning_rate": 9.867185482532095e-05, "loss": 0.0716, "step": 1394 }, { "epoch": 0.36, "learning_rate": 9.866859612228947e-05, "loss": 0.0905, "step": 1395 }, { "epoch": 0.36, "learning_rate": 9.866533348036288e-05, "loss": 0.0703, "step": 1396 }, { "epoch": 0.36, "learning_rate": 9.866206689980522e-05, "loss": 0.073, "step": 1397 }, { "epoch": 0.36, "learning_rate": 9.865879638088087e-05, "loss": 0.0742, "step": 1398 }, { "epoch": 0.36, "learning_rate": 9.865552192385453e-05, "loss": 0.0599, "step": 1399 }, { "epoch": 0.36, "learning_rate": 9.865224352899119e-05, "loss": 0.0689, "step": 1400 }, { "epoch": 0.36, "learning_rate": 9.864896119655621e-05, "loss": 0.0759, "step": 1401 }, { "epoch": 0.36, "learning_rate": 9.864567492681523e-05, "loss": 0.0691, "step": 1402 }, { "epoch": 0.36, "learning_rate": 9.86423847200342e-05, "loss": 0.095, "step": 1403 }, { "epoch": 0.36, "learning_rate": 9.863909057647941e-05, "loss": 0.0769, "step": 1404 }, { "epoch": 0.36, "learning_rate": 9.863579249641748e-05, "loss": 0.0759, "step": 1405 }, { "epoch": 0.36, "learning_rate": 9.863249048011533e-05, "loss": 0.0658, "step": 1406 }, { "epoch": 0.36, "learning_rate": 9.862918452784019e-05, "loss": 0.1046, "step": 1407 }, { "epoch": 0.36, "learning_rate": 9.862587463985964e-05, "loss": 0.0695, "step": 1408 }, { "epoch": 0.36, "learning_rate": 9.862256081644155e-05, "loss": 0.0397, "step": 1409 }, { "epoch": 0.36, "learning_rate": 9.861924305785409e-05, "loss": 0.0926, "step": 1410 }, { "epoch": 0.36, "learning_rate": 9.861592136436583e-05, "loss": 0.0665, "step": 1411 }, { "epoch": 0.36, "learning_rate": 9.861259573624556e-05, "loss": 0.0731, "step": 1412 }, { "epoch": 0.36, "learning_rate": 9.860926617376245e-05, "loss": 0.0896, "step": 1413 }, { "epoch": 0.36, "learning_rate": 9.860593267718598e-05, "loss": 0.0689, "step": 1414 }, { "epoch": 0.37, "learning_rate": 9.860259524678592e-05, "loss": 0.0678, "step": 1415 }, { "epoch": 0.37, "learning_rate": 9.859925388283241e-05, "loss": 0.0801, "step": 1416 }, { "epoch": 0.37, "learning_rate": 9.859590858559584e-05, "loss": 0.0522, "step": 1417 }, { "epoch": 0.37, "learning_rate": 9.859255935534697e-05, "loss": 0.0796, "step": 1418 }, { "epoch": 0.37, "learning_rate": 9.858920619235688e-05, "loss": 0.0589, "step": 1419 }, { "epoch": 0.37, "learning_rate": 9.858584909689691e-05, "loss": 0.0712, "step": 1420 }, { "epoch": 0.37, "learning_rate": 9.85824880692388e-05, "loss": 0.0819, "step": 1421 }, { "epoch": 0.37, "learning_rate": 9.857912310965454e-05, "loss": 0.082, "step": 1422 }, { "epoch": 0.37, "learning_rate": 9.857575421841649e-05, "loss": 0.0818, "step": 1423 }, { "epoch": 0.37, "learning_rate": 9.85723813957973e-05, "loss": 0.0633, "step": 1424 }, { "epoch": 0.37, "learning_rate": 9.856900464206993e-05, "loss": 0.0414, "step": 1425 }, { "epoch": 0.37, "learning_rate": 9.856562395750767e-05, "loss": 0.0796, "step": 1426 }, { "epoch": 0.37, "learning_rate": 9.856223934238414e-05, "loss": 0.0886, "step": 1427 }, { "epoch": 0.37, "learning_rate": 9.855885079697327e-05, "loss": 0.0901, "step": 1428 }, { "epoch": 0.37, "learning_rate": 9.855545832154929e-05, "loss": 0.0628, "step": 1429 }, { "epoch": 0.37, "learning_rate": 9.855206191638677e-05, "loss": 0.0753, "step": 1430 }, { "epoch": 0.37, "learning_rate": 9.85486615817606e-05, "loss": 0.0677, "step": 1431 }, { "epoch": 0.37, "learning_rate": 9.854525731794598e-05, "loss": 0.0808, "step": 1432 }, { "epoch": 0.37, "learning_rate": 9.85418491252184e-05, "loss": 0.0804, "step": 1433 }, { "epoch": 0.37, "learning_rate": 9.853843700385373e-05, "loss": 0.0562, "step": 1434 }, { "epoch": 0.37, "learning_rate": 9.85350209541281e-05, "loss": 0.0657, "step": 1435 }, { "epoch": 0.37, "learning_rate": 9.853160097631797e-05, "loss": 0.0631, "step": 1436 }, { "epoch": 0.37, "learning_rate": 9.852817707070017e-05, "loss": 0.0547, "step": 1437 }, { "epoch": 0.37, "learning_rate": 9.85247492375518e-05, "loss": 0.0921, "step": 1438 }, { "epoch": 0.37, "learning_rate": 9.852131747715023e-05, "loss": 0.0565, "step": 1439 }, { "epoch": 0.37, "learning_rate": 9.851788178977327e-05, "loss": 0.0731, "step": 1440 }, { "epoch": 0.37, "learning_rate": 9.851444217569894e-05, "loss": 0.0798, "step": 1441 }, { "epoch": 0.37, "learning_rate": 9.851099863520565e-05, "loss": 0.0687, "step": 1442 }, { "epoch": 0.37, "learning_rate": 9.850755116857206e-05, "loss": 0.0729, "step": 1443 }, { "epoch": 0.37, "learning_rate": 9.85040997760772e-05, "loss": 0.0518, "step": 1444 }, { "epoch": 0.37, "learning_rate": 9.85006444580004e-05, "loss": 0.0572, "step": 1445 }, { "epoch": 0.37, "learning_rate": 9.849718521462133e-05, "loss": 0.0583, "step": 1446 }, { "epoch": 0.37, "learning_rate": 9.849372204621991e-05, "loss": 0.0871, "step": 1447 }, { "epoch": 0.37, "learning_rate": 9.849025495307647e-05, "loss": 0.0705, "step": 1448 }, { "epoch": 0.37, "learning_rate": 9.84867839354716e-05, "loss": 0.0566, "step": 1449 }, { "epoch": 0.37, "learning_rate": 9.848330899368621e-05, "loss": 0.0635, "step": 1450 }, { "epoch": 0.37, "learning_rate": 9.847983012800154e-05, "loss": 0.0668, "step": 1451 }, { "epoch": 0.37, "learning_rate": 9.847634733869915e-05, "loss": 0.068, "step": 1452 }, { "epoch": 0.37, "learning_rate": 9.84728606260609e-05, "loss": 0.059, "step": 1453 }, { "epoch": 0.38, "learning_rate": 9.8469369990369e-05, "loss": 0.0947, "step": 1454 }, { "epoch": 0.38, "learning_rate": 9.846587543190595e-05, "loss": 0.0425, "step": 1455 }, { "epoch": 0.38, "learning_rate": 9.846237695095457e-05, "loss": 0.0567, "step": 1456 }, { "epoch": 0.38, "learning_rate": 9.8458874547798e-05, "loss": 0.0707, "step": 1457 }, { "epoch": 0.38, "learning_rate": 9.845536822271971e-05, "loss": 0.0587, "step": 1458 }, { "epoch": 0.38, "learning_rate": 9.845185797600347e-05, "loss": 0.0747, "step": 1459 }, { "epoch": 0.38, "learning_rate": 9.844834380793338e-05, "loss": 0.0796, "step": 1460 }, { "epoch": 0.38, "learning_rate": 9.844482571879386e-05, "loss": 0.0598, "step": 1461 }, { "epoch": 0.38, "learning_rate": 9.844130370886962e-05, "loss": 0.0746, "step": 1462 }, { "epoch": 0.38, "learning_rate": 9.84377777784457e-05, "loss": 0.0556, "step": 1463 }, { "epoch": 0.38, "learning_rate": 9.843424792780749e-05, "loss": 0.0656, "step": 1464 }, { "epoch": 0.38, "learning_rate": 9.843071415724067e-05, "loss": 0.0484, "step": 1465 }, { "epoch": 0.38, "learning_rate": 9.842717646703123e-05, "loss": 0.0613, "step": 1466 }, { "epoch": 0.38, "learning_rate": 9.842363485746547e-05, "loss": 0.0729, "step": 1467 }, { "epoch": 0.38, "learning_rate": 9.842008932883006e-05, "loss": 0.0596, "step": 1468 }, { "epoch": 0.38, "learning_rate": 9.84165398814119e-05, "loss": 0.0605, "step": 1469 }, { "epoch": 0.38, "learning_rate": 9.841298651549831e-05, "loss": 0.0748, "step": 1470 }, { "epoch": 0.38, "learning_rate": 9.840942923137685e-05, "loss": 0.0746, "step": 1471 }, { "epoch": 0.38, "learning_rate": 9.840586802933543e-05, "loss": 0.0845, "step": 1472 }, { "epoch": 0.38, "learning_rate": 9.840230290966223e-05, "loss": 0.0729, "step": 1473 }, { "epoch": 0.38, "learning_rate": 9.839873387264584e-05, "loss": 0.0647, "step": 1474 }, { "epoch": 0.38, "learning_rate": 9.839516091857509e-05, "loss": 0.0704, "step": 1475 }, { "epoch": 0.38, "learning_rate": 9.839158404773915e-05, "loss": 0.0812, "step": 1476 }, { "epoch": 0.38, "learning_rate": 9.83880032604275e-05, "loss": 0.0507, "step": 1477 }, { "epoch": 0.38, "learning_rate": 9.838441855692996e-05, "loss": 0.0762, "step": 1478 }, { "epoch": 0.38, "learning_rate": 9.838082993753664e-05, "loss": 0.0703, "step": 1479 }, { "epoch": 0.38, "learning_rate": 9.837723740253797e-05, "loss": 0.0502, "step": 1480 }, { "epoch": 0.38, "learning_rate": 9.837364095222473e-05, "loss": 0.0639, "step": 1481 }, { "epoch": 0.38, "learning_rate": 9.837004058688794e-05, "loss": 0.0704, "step": 1482 }, { "epoch": 0.38, "learning_rate": 9.836643630681906e-05, "loss": 0.0716, "step": 1483 }, { "epoch": 0.38, "learning_rate": 9.836282811230974e-05, "loss": 0.058, "step": 1484 }, { "epoch": 0.38, "learning_rate": 9.835921600365203e-05, "loss": 0.0576, "step": 1485 }, { "epoch": 0.38, "learning_rate": 9.835559998113826e-05, "loss": 0.0586, "step": 1486 }, { "epoch": 0.38, "learning_rate": 9.835198004506108e-05, "loss": 0.0499, "step": 1487 }, { "epoch": 0.38, "learning_rate": 9.834835619571346e-05, "loss": 0.0752, "step": 1488 }, { "epoch": 0.38, "learning_rate": 9.834472843338871e-05, "loss": 0.0989, "step": 1489 }, { "epoch": 0.38, "learning_rate": 9.834109675838041e-05, "loss": 0.0803, "step": 1490 }, { "epoch": 0.38, "learning_rate": 9.833746117098251e-05, "loss": 0.0727, "step": 1491 }, { "epoch": 0.39, "learning_rate": 9.833382167148921e-05, "loss": 0.0502, "step": 1492 }, { "epoch": 0.39, "learning_rate": 9.83301782601951e-05, "loss": 0.0666, "step": 1493 }, { "epoch": 0.39, "learning_rate": 9.832653093739505e-05, "loss": 0.0701, "step": 1494 }, { "epoch": 0.39, "learning_rate": 9.832287970338423e-05, "loss": 0.0662, "step": 1495 }, { "epoch": 0.39, "learning_rate": 9.831922455845815e-05, "loss": 0.0478, "step": 1496 }, { "epoch": 0.39, "learning_rate": 9.831556550291265e-05, "loss": 0.0773, "step": 1497 }, { "epoch": 0.39, "learning_rate": 9.831190253704385e-05, "loss": 0.0757, "step": 1498 }, { "epoch": 0.39, "learning_rate": 9.830823566114821e-05, "loss": 0.0539, "step": 1499 }, { "epoch": 0.39, "learning_rate": 9.83045648755225e-05, "loss": 0.0621, "step": 1500 }, { "epoch": 0.39, "learning_rate": 9.83008901804638e-05, "loss": 0.0689, "step": 1501 }, { "epoch": 0.39, "learning_rate": 9.829721157626955e-05, "loss": 0.0552, "step": 1502 }, { "epoch": 0.39, "learning_rate": 9.829352906323741e-05, "loss": 0.0794, "step": 1503 }, { "epoch": 0.39, "learning_rate": 9.828984264166548e-05, "loss": 0.0784, "step": 1504 }, { "epoch": 0.39, "learning_rate": 9.828615231185206e-05, "loss": 0.0816, "step": 1505 }, { "epoch": 0.39, "learning_rate": 9.828245807409584e-05, "loss": 0.0678, "step": 1506 }, { "epoch": 0.39, "learning_rate": 9.827875992869581e-05, "loss": 0.0789, "step": 1507 }, { "epoch": 0.39, "learning_rate": 9.82750578759513e-05, "loss": 0.0813, "step": 1508 }, { "epoch": 0.39, "learning_rate": 9.827135191616188e-05, "loss": 0.0764, "step": 1509 }, { "epoch": 0.39, "learning_rate": 9.82676420496275e-05, "loss": 0.0543, "step": 1510 }, { "epoch": 0.39, "learning_rate": 9.826392827664841e-05, "loss": 0.0623, "step": 1511 }, { "epoch": 0.39, "learning_rate": 9.826021059752519e-05, "loss": 0.0771, "step": 1512 }, { "epoch": 0.39, "learning_rate": 9.825648901255871e-05, "loss": 0.0822, "step": 1513 }, { "epoch": 0.39, "learning_rate": 9.825276352205016e-05, "loss": 0.0871, "step": 1514 }, { "epoch": 0.39, "learning_rate": 9.824903412630107e-05, "loss": 0.0709, "step": 1515 }, { "epoch": 0.39, "learning_rate": 9.824530082561327e-05, "loss": 0.0735, "step": 1516 }, { "epoch": 0.39, "learning_rate": 9.824156362028892e-05, "loss": 0.057, "step": 1517 }, { "epoch": 0.39, "learning_rate": 9.823782251063046e-05, "loss": 0.0673, "step": 1518 }, { "epoch": 0.39, "learning_rate": 9.823407749694068e-05, "loss": 0.0686, "step": 1519 }, { "epoch": 0.39, "learning_rate": 9.823032857952268e-05, "loss": 0.0734, "step": 1520 }, { "epoch": 0.39, "learning_rate": 9.822657575867985e-05, "loss": 0.0793, "step": 1521 }, { "epoch": 0.39, "learning_rate": 9.822281903471593e-05, "loss": 0.0724, "step": 1522 }, { "epoch": 0.39, "learning_rate": 9.821905840793497e-05, "loss": 0.0394, "step": 1523 }, { "epoch": 0.39, "learning_rate": 9.821529387864133e-05, "loss": 0.0427, "step": 1524 }, { "epoch": 0.39, "learning_rate": 9.821152544713967e-05, "loss": 0.0952, "step": 1525 }, { "epoch": 0.39, "learning_rate": 9.8207753113735e-05, "loss": 0.0536, "step": 1526 }, { "epoch": 0.39, "learning_rate": 9.820397687873259e-05, "loss": 0.0815, "step": 1527 }, { "epoch": 0.39, "learning_rate": 9.820019674243811e-05, "loss": 0.0578, "step": 1528 }, { "epoch": 0.39, "learning_rate": 9.819641270515748e-05, "loss": 0.0726, "step": 1529 }, { "epoch": 0.39, "learning_rate": 9.819262476719692e-05, "loss": 0.0738, "step": 1530 }, { "epoch": 0.4, "learning_rate": 9.818883292886304e-05, "loss": 0.0691, "step": 1531 }, { "epoch": 0.4, "learning_rate": 9.81850371904627e-05, "loss": 0.0874, "step": 1532 }, { "epoch": 0.4, "learning_rate": 9.818123755230313e-05, "loss": 0.0674, "step": 1533 }, { "epoch": 0.4, "learning_rate": 9.817743401469181e-05, "loss": 0.0505, "step": 1534 }, { "epoch": 0.4, "learning_rate": 9.81736265779366e-05, "loss": 0.0869, "step": 1535 }, { "epoch": 0.4, "learning_rate": 9.816981524234565e-05, "loss": 0.0499, "step": 1536 }, { "epoch": 0.4, "learning_rate": 9.81660000082274e-05, "loss": 0.077, "step": 1537 }, { "epoch": 0.4, "learning_rate": 9.816218087589063e-05, "loss": 0.0823, "step": 1538 }, { "epoch": 0.4, "learning_rate": 9.815835784564444e-05, "loss": 0.082, "step": 1539 }, { "epoch": 0.4, "learning_rate": 9.815453091779826e-05, "loss": 0.0777, "step": 1540 }, { "epoch": 0.4, "learning_rate": 9.815070009266178e-05, "loss": 0.0598, "step": 1541 }, { "epoch": 0.4, "learning_rate": 9.814686537054506e-05, "loss": 0.0621, "step": 1542 }, { "epoch": 0.4, "learning_rate": 9.814302675175845e-05, "loss": 0.0656, "step": 1543 }, { "epoch": 0.4, "learning_rate": 9.813918423661264e-05, "loss": 0.0724, "step": 1544 }, { "epoch": 0.4, "learning_rate": 9.813533782541857e-05, "loss": 0.0484, "step": 1545 }, { "epoch": 0.4, "learning_rate": 9.813148751848759e-05, "loss": 0.0696, "step": 1546 }, { "epoch": 0.4, "learning_rate": 9.812763331613129e-05, "loss": 0.0606, "step": 1547 }, { "epoch": 0.4, "learning_rate": 9.812377521866161e-05, "loss": 0.0515, "step": 1548 }, { "epoch": 0.4, "learning_rate": 9.811991322639079e-05, "loss": 0.0522, "step": 1549 }, { "epoch": 0.4, "learning_rate": 9.81160473396314e-05, "loss": 0.0719, "step": 1550 }, { "epoch": 0.4, "learning_rate": 9.811217755869633e-05, "loss": 0.0585, "step": 1551 }, { "epoch": 0.4, "learning_rate": 9.810830388389876e-05, "loss": 0.0713, "step": 1552 }, { "epoch": 0.4, "learning_rate": 9.810442631555219e-05, "loss": 0.0738, "step": 1553 }, { "epoch": 0.4, "learning_rate": 9.810054485397044e-05, "loss": 0.0722, "step": 1554 }, { "epoch": 0.4, "learning_rate": 9.809665949946768e-05, "loss": 0.0625, "step": 1555 }, { "epoch": 0.4, "learning_rate": 9.809277025235834e-05, "loss": 0.057, "step": 1556 }, { "epoch": 0.4, "learning_rate": 9.808887711295718e-05, "loss": 0.0576, "step": 1557 }, { "epoch": 0.4, "learning_rate": 9.808498008157931e-05, "loss": 0.062, "step": 1558 }, { "epoch": 0.4, "learning_rate": 9.80810791585401e-05, "loss": 0.0632, "step": 1559 }, { "epoch": 0.4, "learning_rate": 9.807717434415527e-05, "loss": 0.0714, "step": 1560 }, { "epoch": 0.4, "learning_rate": 9.807326563874087e-05, "loss": 0.0902, "step": 1561 }, { "epoch": 0.4, "learning_rate": 9.806935304261322e-05, "loss": 0.0513, "step": 1562 }, { "epoch": 0.4, "learning_rate": 9.8065436556089e-05, "loss": 0.0783, "step": 1563 }, { "epoch": 0.4, "learning_rate": 9.806151617948516e-05, "loss": 0.0619, "step": 1564 }, { "epoch": 0.4, "learning_rate": 9.8057591913119e-05, "loss": 0.0661, "step": 1565 }, { "epoch": 0.4, "learning_rate": 9.805366375730811e-05, "loss": 0.0662, "step": 1566 }, { "epoch": 0.4, "learning_rate": 9.804973171237042e-05, "loss": 0.0768, "step": 1567 }, { "epoch": 0.4, "learning_rate": 9.804579577862416e-05, "loss": 0.0596, "step": 1568 }, { "epoch": 0.4, "learning_rate": 9.804185595638789e-05, "loss": 0.0524, "step": 1569 }, { "epoch": 0.41, "learning_rate": 9.803791224598044e-05, "loss": 0.0637, "step": 1570 }, { "epoch": 0.41, "learning_rate": 9.803396464772101e-05, "loss": 0.0509, "step": 1571 }, { "epoch": 0.41, "learning_rate": 9.803001316192907e-05, "loss": 0.0727, "step": 1572 }, { "epoch": 0.41, "learning_rate": 9.802605778892447e-05, "loss": 0.0667, "step": 1573 }, { "epoch": 0.41, "learning_rate": 9.802209852902728e-05, "loss": 0.0603, "step": 1574 }, { "epoch": 0.41, "learning_rate": 9.801813538255797e-05, "loss": 0.0438, "step": 1575 }, { "epoch": 0.41, "learning_rate": 9.801416834983727e-05, "loss": 0.0627, "step": 1576 }, { "epoch": 0.41, "learning_rate": 9.801019743118625e-05, "loss": 0.0787, "step": 1577 }, { "epoch": 0.41, "learning_rate": 9.800622262692628e-05, "loss": 0.0652, "step": 1578 }, { "epoch": 0.41, "learning_rate": 9.800224393737905e-05, "loss": 0.0709, "step": 1579 }, { "epoch": 0.41, "learning_rate": 9.799826136286659e-05, "loss": 0.0486, "step": 1580 }, { "epoch": 0.41, "learning_rate": 9.79942749037112e-05, "loss": 0.0558, "step": 1581 }, { "epoch": 0.41, "learning_rate": 9.799028456023554e-05, "loss": 0.0759, "step": 1582 }, { "epoch": 0.41, "learning_rate": 9.798629033276254e-05, "loss": 0.0576, "step": 1583 }, { "epoch": 0.41, "learning_rate": 9.798229222161546e-05, "loss": 0.0492, "step": 1584 }, { "epoch": 0.41, "learning_rate": 9.797829022711791e-05, "loss": 0.0796, "step": 1585 }, { "epoch": 0.41, "learning_rate": 9.797428434959374e-05, "loss": 0.0664, "step": 1586 }, { "epoch": 0.41, "learning_rate": 9.797027458936718e-05, "loss": 0.0645, "step": 1587 }, { "epoch": 0.41, "learning_rate": 9.796626094676276e-05, "loss": 0.0619, "step": 1588 }, { "epoch": 0.41, "learning_rate": 9.796224342210532e-05, "loss": 0.0584, "step": 1589 }, { "epoch": 0.41, "learning_rate": 9.795822201571998e-05, "loss": 0.073, "step": 1590 }, { "epoch": 0.41, "learning_rate": 9.795419672793222e-05, "loss": 0.0524, "step": 1591 }, { "epoch": 0.41, "learning_rate": 9.795016755906784e-05, "loss": 0.0532, "step": 1592 }, { "epoch": 0.41, "learning_rate": 9.794613450945292e-05, "loss": 0.0605, "step": 1593 }, { "epoch": 0.41, "learning_rate": 9.794209757941383e-05, "loss": 0.0623, "step": 1594 }, { "epoch": 0.41, "learning_rate": 9.793805676927735e-05, "loss": 0.0458, "step": 1595 }, { "epoch": 0.41, "learning_rate": 9.793401207937046e-05, "loss": 0.0473, "step": 1596 }, { "epoch": 0.41, "learning_rate": 9.792996351002055e-05, "loss": 0.0761, "step": 1597 }, { "epoch": 0.41, "learning_rate": 9.792591106155527e-05, "loss": 0.0704, "step": 1598 }, { "epoch": 0.41, "learning_rate": 9.79218547343026e-05, "loss": 0.0562, "step": 1599 }, { "epoch": 0.41, "learning_rate": 9.79177945285908e-05, "loss": 0.0586, "step": 1600 }, { "epoch": 0.41, "learning_rate": 9.791373044474852e-05, "loss": 0.1038, "step": 1601 }, { "epoch": 0.41, "learning_rate": 9.790966248310467e-05, "loss": 0.0763, "step": 1602 }, { "epoch": 0.41, "learning_rate": 9.790559064398845e-05, "loss": 0.0917, "step": 1603 }, { "epoch": 0.41, "learning_rate": 9.790151492772943e-05, "loss": 0.0866, "step": 1604 }, { "epoch": 0.41, "learning_rate": 9.789743533465747e-05, "loss": 0.0694, "step": 1605 }, { "epoch": 0.41, "learning_rate": 9.789335186510276e-05, "loss": 0.0553, "step": 1606 }, { "epoch": 0.41, "learning_rate": 9.788926451939575e-05, "loss": 0.0832, "step": 1607 }, { "epoch": 0.41, "learning_rate": 9.788517329786726e-05, "loss": 0.0797, "step": 1608 }, { "epoch": 0.42, "learning_rate": 9.788107820084839e-05, "loss": 0.1034, "step": 1609 }, { "epoch": 0.42, "learning_rate": 9.787697922867061e-05, "loss": 0.0424, "step": 1610 }, { "epoch": 0.42, "learning_rate": 9.787287638166563e-05, "loss": 0.0484, "step": 1611 }, { "epoch": 0.42, "learning_rate": 9.78687696601655e-05, "loss": 0.0988, "step": 1612 }, { "epoch": 0.42, "learning_rate": 9.78646590645026e-05, "loss": 0.0841, "step": 1613 }, { "epoch": 0.42, "learning_rate": 9.78605445950096e-05, "loss": 0.0711, "step": 1614 }, { "epoch": 0.42, "learning_rate": 9.785642625201954e-05, "loss": 0.0872, "step": 1615 }, { "epoch": 0.42, "learning_rate": 9.785230403586567e-05, "loss": 0.0935, "step": 1616 }, { "epoch": 0.42, "learning_rate": 9.784817794688165e-05, "loss": 0.0809, "step": 1617 }, { "epoch": 0.42, "learning_rate": 9.784404798540143e-05, "loss": 0.0714, "step": 1618 }, { "epoch": 0.42, "learning_rate": 9.783991415175921e-05, "loss": 0.05, "step": 1619 }, { "epoch": 0.42, "learning_rate": 9.78357764462896e-05, "loss": 0.0847, "step": 1620 }, { "epoch": 0.42, "learning_rate": 9.783163486932744e-05, "loss": 0.0576, "step": 1621 }, { "epoch": 0.42, "learning_rate": 9.782748942120796e-05, "loss": 0.0566, "step": 1622 }, { "epoch": 0.42, "learning_rate": 9.782334010226664e-05, "loss": 0.0585, "step": 1623 }, { "epoch": 0.42, "learning_rate": 9.781918691283928e-05, "loss": 0.0665, "step": 1624 }, { "epoch": 0.42, "learning_rate": 9.781502985326206e-05, "loss": 0.0647, "step": 1625 }, { "epoch": 0.42, "learning_rate": 9.781086892387137e-05, "loss": 0.064, "step": 1626 }, { "epoch": 0.42, "learning_rate": 9.7806704125004e-05, "loss": 0.0764, "step": 1627 }, { "epoch": 0.42, "learning_rate": 9.7802535456997e-05, "loss": 0.0712, "step": 1628 }, { "epoch": 0.42, "learning_rate": 9.779836292018776e-05, "loss": 0.0437, "step": 1629 }, { "epoch": 0.42, "learning_rate": 9.779418651491399e-05, "loss": 0.0531, "step": 1630 }, { "epoch": 0.42, "learning_rate": 9.779000624151369e-05, "loss": 0.0823, "step": 1631 }, { "epoch": 0.42, "learning_rate": 9.778582210032517e-05, "loss": 0.0833, "step": 1632 }, { "epoch": 0.42, "learning_rate": 9.778163409168707e-05, "loss": 0.0723, "step": 1633 }, { "epoch": 0.42, "learning_rate": 9.777744221593834e-05, "loss": 0.0694, "step": 1634 }, { "epoch": 0.42, "learning_rate": 9.777324647341825e-05, "loss": 0.075, "step": 1635 }, { "epoch": 0.42, "learning_rate": 9.776904686446637e-05, "loss": 0.0837, "step": 1636 }, { "epoch": 0.42, "learning_rate": 9.776484338942257e-05, "loss": 0.0799, "step": 1637 }, { "epoch": 0.42, "learning_rate": 9.776063604862708e-05, "loss": 0.0603, "step": 1638 }, { "epoch": 0.42, "learning_rate": 9.775642484242039e-05, "loss": 0.069, "step": 1639 }, { "epoch": 0.42, "learning_rate": 9.775220977114334e-05, "loss": 0.0698, "step": 1640 }, { "epoch": 0.42, "learning_rate": 9.774799083513704e-05, "loss": 0.0593, "step": 1641 }, { "epoch": 0.42, "learning_rate": 9.774376803474297e-05, "loss": 0.0744, "step": 1642 }, { "epoch": 0.42, "learning_rate": 9.77395413703029e-05, "loss": 0.0499, "step": 1643 }, { "epoch": 0.42, "learning_rate": 9.773531084215888e-05, "loss": 0.0638, "step": 1644 }, { "epoch": 0.42, "learning_rate": 9.773107645065332e-05, "loss": 0.0498, "step": 1645 }, { "epoch": 0.42, "learning_rate": 9.772683819612888e-05, "loss": 0.0471, "step": 1646 }, { "epoch": 0.43, "learning_rate": 9.772259607892865e-05, "loss": 0.0593, "step": 1647 }, { "epoch": 0.43, "learning_rate": 9.77183500993959e-05, "loss": 0.0754, "step": 1648 }, { "epoch": 0.43, "learning_rate": 9.771410025787427e-05, "loss": 0.0483, "step": 1649 }, { "epoch": 0.43, "learning_rate": 9.770984655470775e-05, "loss": 0.0591, "step": 1650 }, { "epoch": 0.43, "learning_rate": 9.770558899024056e-05, "loss": 0.0735, "step": 1651 }, { "epoch": 0.43, "learning_rate": 9.770132756481732e-05, "loss": 0.0621, "step": 1652 }, { "epoch": 0.43, "learning_rate": 9.769706227878288e-05, "loss": 0.0646, "step": 1653 }, { "epoch": 0.43, "learning_rate": 9.769279313248247e-05, "loss": 0.0737, "step": 1654 }, { "epoch": 0.43, "learning_rate": 9.76885201262616e-05, "loss": 0.0686, "step": 1655 }, { "epoch": 0.43, "learning_rate": 9.76842432604661e-05, "loss": 0.0583, "step": 1656 }, { "epoch": 0.43, "learning_rate": 9.76799625354421e-05, "loss": 0.0609, "step": 1657 }, { "epoch": 0.43, "learning_rate": 9.767567795153604e-05, "loss": 0.0568, "step": 1658 }, { "epoch": 0.43, "learning_rate": 9.767138950909472e-05, "loss": 0.0881, "step": 1659 }, { "epoch": 0.43, "learning_rate": 9.766709720846518e-05, "loss": 0.0692, "step": 1660 }, { "epoch": 0.43, "learning_rate": 9.766280104999484e-05, "loss": 0.0514, "step": 1661 }, { "epoch": 0.43, "learning_rate": 9.765850103403137e-05, "loss": 0.0599, "step": 1662 }, { "epoch": 0.43, "learning_rate": 9.76541971609228e-05, "loss": 0.0576, "step": 1663 }, { "epoch": 0.43, "learning_rate": 9.764988943101746e-05, "loss": 0.0618, "step": 1664 }, { "epoch": 0.43, "learning_rate": 9.764557784466398e-05, "loss": 0.0508, "step": 1665 }, { "epoch": 0.43, "learning_rate": 9.764126240221132e-05, "loss": 0.0667, "step": 1666 }, { "epoch": 0.43, "learning_rate": 9.763694310400872e-05, "loss": 0.0744, "step": 1667 }, { "epoch": 0.43, "learning_rate": 9.763261995040577e-05, "loss": 0.0573, "step": 1668 }, { "epoch": 0.43, "learning_rate": 9.762829294175235e-05, "loss": 0.0601, "step": 1669 }, { "epoch": 0.43, "learning_rate": 9.762396207839866e-05, "loss": 0.0464, "step": 1670 }, { "epoch": 0.43, "learning_rate": 9.761962736069521e-05, "loss": 0.0538, "step": 1671 }, { "epoch": 0.43, "learning_rate": 9.761528878899283e-05, "loss": 0.0816, "step": 1672 }, { "epoch": 0.43, "learning_rate": 9.761094636364264e-05, "loss": 0.0556, "step": 1673 }, { "epoch": 0.43, "learning_rate": 9.760660008499609e-05, "loss": 0.0642, "step": 1674 }, { "epoch": 0.43, "learning_rate": 9.760224995340493e-05, "loss": 0.0612, "step": 1675 }, { "epoch": 0.43, "learning_rate": 9.759789596922125e-05, "loss": 0.0599, "step": 1676 }, { "epoch": 0.43, "learning_rate": 9.759353813279741e-05, "loss": 0.0731, "step": 1677 }, { "epoch": 0.43, "learning_rate": 9.758917644448611e-05, "loss": 0.0651, "step": 1678 }, { "epoch": 0.43, "learning_rate": 9.758481090464037e-05, "loss": 0.0614, "step": 1679 }, { "epoch": 0.43, "learning_rate": 9.758044151361347e-05, "loss": 0.0599, "step": 1680 }, { "epoch": 0.43, "learning_rate": 9.757606827175907e-05, "loss": 0.0526, "step": 1681 }, { "epoch": 0.43, "learning_rate": 9.75716911794311e-05, "loss": 0.068, "step": 1682 }, { "epoch": 0.43, "learning_rate": 9.756731023698382e-05, "loss": 0.0662, "step": 1683 }, { "epoch": 0.43, "learning_rate": 9.756292544477176e-05, "loss": 0.0529, "step": 1684 }, { "epoch": 0.43, "learning_rate": 9.755853680314984e-05, "loss": 0.081, "step": 1685 }, { "epoch": 0.44, "learning_rate": 9.755414431247321e-05, "loss": 0.0518, "step": 1686 }, { "epoch": 0.44, "learning_rate": 9.754974797309737e-05, "loss": 0.0607, "step": 1687 }, { "epoch": 0.44, "learning_rate": 9.754534778537815e-05, "loss": 0.0627, "step": 1688 }, { "epoch": 0.44, "learning_rate": 9.754094374967166e-05, "loss": 0.0692, "step": 1689 }, { "epoch": 0.44, "learning_rate": 9.753653586633433e-05, "loss": 0.0605, "step": 1690 }, { "epoch": 0.44, "learning_rate": 9.753212413572291e-05, "loss": 0.0659, "step": 1691 }, { "epoch": 0.44, "learning_rate": 9.752770855819445e-05, "loss": 0.0381, "step": 1692 }, { "epoch": 0.44, "learning_rate": 9.752328913410631e-05, "loss": 0.0633, "step": 1693 }, { "epoch": 0.44, "learning_rate": 9.751886586381618e-05, "loss": 0.0574, "step": 1694 }, { "epoch": 0.44, "learning_rate": 9.751443874768203e-05, "loss": 0.0384, "step": 1695 }, { "epoch": 0.44, "learning_rate": 9.751000778606217e-05, "loss": 0.067, "step": 1696 }, { "epoch": 0.44, "learning_rate": 9.750557297931522e-05, "loss": 0.0677, "step": 1697 }, { "epoch": 0.44, "learning_rate": 9.750113432780008e-05, "loss": 0.0404, "step": 1698 }, { "epoch": 0.44, "learning_rate": 9.749669183187602e-05, "loss": 0.0638, "step": 1699 }, { "epoch": 0.44, "learning_rate": 9.749224549190254e-05, "loss": 0.065, "step": 1700 }, { "epoch": 0.44, "learning_rate": 9.748779530823952e-05, "loss": 0.1004, "step": 1701 }, { "epoch": 0.44, "learning_rate": 9.748334128124715e-05, "loss": 0.0658, "step": 1702 }, { "epoch": 0.44, "learning_rate": 9.747888341128586e-05, "loss": 0.0422, "step": 1703 }, { "epoch": 0.44, "learning_rate": 9.747442169871646e-05, "loss": 0.0693, "step": 1704 }, { "epoch": 0.44, "learning_rate": 9.746995614390004e-05, "loss": 0.0971, "step": 1705 }, { "epoch": 0.44, "learning_rate": 9.746548674719804e-05, "loss": 0.0754, "step": 1706 }, { "epoch": 0.44, "learning_rate": 9.746101350897216e-05, "loss": 0.0942, "step": 1707 }, { "epoch": 0.44, "learning_rate": 9.745653642958444e-05, "loss": 0.0807, "step": 1708 }, { "epoch": 0.44, "learning_rate": 9.74520555093972e-05, "loss": 0.0785, "step": 1709 }, { "epoch": 0.44, "learning_rate": 9.744757074877312e-05, "loss": 0.0824, "step": 1710 }, { "epoch": 0.44, "learning_rate": 9.744308214807516e-05, "loss": 0.0746, "step": 1711 }, { "epoch": 0.44, "learning_rate": 9.74385897076666e-05, "loss": 0.066, "step": 1712 }, { "epoch": 0.44, "learning_rate": 9.743409342791101e-05, "loss": 0.0614, "step": 1713 }, { "epoch": 0.44, "learning_rate": 9.742959330917229e-05, "loss": 0.0572, "step": 1714 }, { "epoch": 0.44, "learning_rate": 9.742508935181468e-05, "loss": 0.0805, "step": 1715 }, { "epoch": 0.44, "learning_rate": 9.742058155620266e-05, "loss": 0.0717, "step": 1716 }, { "epoch": 0.44, "learning_rate": 9.741606992270108e-05, "loss": 0.0524, "step": 1717 }, { "epoch": 0.44, "learning_rate": 9.741155445167506e-05, "loss": 0.0684, "step": 1718 }, { "epoch": 0.44, "learning_rate": 9.740703514349009e-05, "loss": 0.0759, "step": 1719 }, { "epoch": 0.44, "learning_rate": 9.740251199851189e-05, "loss": 0.0619, "step": 1720 }, { "epoch": 0.44, "learning_rate": 9.739798501710655e-05, "loss": 0.0583, "step": 1721 }, { "epoch": 0.44, "learning_rate": 9.739345419964044e-05, "loss": 0.0689, "step": 1722 }, { "epoch": 0.44, "learning_rate": 9.738891954648028e-05, "loss": 0.0847, "step": 1723 }, { "epoch": 0.44, "learning_rate": 9.738438105799304e-05, "loss": 0.0853, "step": 1724 }, { "epoch": 0.45, "learning_rate": 9.737983873454605e-05, "loss": 0.0626, "step": 1725 }, { "epoch": 0.45, "learning_rate": 9.737529257650695e-05, "loss": 0.0862, "step": 1726 }, { "epoch": 0.45, "learning_rate": 9.737074258424363e-05, "loss": 0.0531, "step": 1727 }, { "epoch": 0.45, "learning_rate": 9.736618875812438e-05, "loss": 0.0632, "step": 1728 }, { "epoch": 0.45, "learning_rate": 9.736163109851773e-05, "loss": 0.0734, "step": 1729 }, { "epoch": 0.45, "learning_rate": 9.735706960579255e-05, "loss": 0.0593, "step": 1730 }, { "epoch": 0.45, "learning_rate": 9.735250428031801e-05, "loss": 0.0618, "step": 1731 }, { "epoch": 0.45, "learning_rate": 9.734793512246362e-05, "loss": 0.0664, "step": 1732 }, { "epoch": 0.45, "learning_rate": 9.734336213259914e-05, "loss": 0.0754, "step": 1733 }, { "epoch": 0.45, "learning_rate": 9.73387853110947e-05, "loss": 0.0835, "step": 1734 }, { "epoch": 0.45, "learning_rate": 9.73342046583207e-05, "loss": 0.0867, "step": 1735 }, { "epoch": 0.45, "learning_rate": 9.73296201746479e-05, "loss": 0.0693, "step": 1736 }, { "epoch": 0.45, "learning_rate": 9.732503186044728e-05, "loss": 0.0617, "step": 1737 }, { "epoch": 0.45, "learning_rate": 9.732043971609024e-05, "loss": 0.0651, "step": 1738 }, { "epoch": 0.45, "learning_rate": 9.731584374194839e-05, "loss": 0.0746, "step": 1739 }, { "epoch": 0.45, "learning_rate": 9.731124393839375e-05, "loss": 0.0582, "step": 1740 }, { "epoch": 0.45, "learning_rate": 9.730664030579854e-05, "loss": 0.0535, "step": 1741 }, { "epoch": 0.45, "learning_rate": 9.730203284453538e-05, "loss": 0.0493, "step": 1742 }, { "epoch": 0.45, "learning_rate": 9.729742155497715e-05, "loss": 0.0723, "step": 1743 }, { "epoch": 0.45, "learning_rate": 9.729280643749707e-05, "loss": 0.0382, "step": 1744 }, { "epoch": 0.45, "learning_rate": 9.728818749246865e-05, "loss": 0.0627, "step": 1745 }, { "epoch": 0.45, "learning_rate": 9.728356472026569e-05, "loss": 0.036, "step": 1746 }, { "epoch": 0.45, "learning_rate": 9.727893812126237e-05, "loss": 0.074, "step": 1747 }, { "epoch": 0.45, "learning_rate": 9.727430769583309e-05, "loss": 0.0698, "step": 1748 }, { "epoch": 0.45, "learning_rate": 9.726967344435263e-05, "loss": 0.0653, "step": 1749 }, { "epoch": 0.45, "learning_rate": 9.726503536719607e-05, "loss": 0.0715, "step": 1750 }, { "epoch": 0.45, "learning_rate": 9.726039346473874e-05, "loss": 0.0615, "step": 1751 }, { "epoch": 0.45, "learning_rate": 9.725574773735636e-05, "loss": 0.0513, "step": 1752 }, { "epoch": 0.45, "learning_rate": 9.72510981854249e-05, "loss": 0.0665, "step": 1753 }, { "epoch": 0.45, "learning_rate": 9.724644480932065e-05, "loss": 0.0711, "step": 1754 }, { "epoch": 0.45, "learning_rate": 9.724178760942028e-05, "loss": 0.0694, "step": 1755 }, { "epoch": 0.45, "learning_rate": 9.723712658610064e-05, "loss": 0.0564, "step": 1756 }, { "epoch": 0.45, "learning_rate": 9.723246173973902e-05, "loss": 0.0551, "step": 1757 }, { "epoch": 0.45, "learning_rate": 9.722779307071292e-05, "loss": 0.0668, "step": 1758 }, { "epoch": 0.45, "learning_rate": 9.722312057940021e-05, "loss": 0.0403, "step": 1759 }, { "epoch": 0.45, "learning_rate": 9.721844426617904e-05, "loss": 0.0435, "step": 1760 }, { "epoch": 0.45, "learning_rate": 9.721376413142788e-05, "loss": 0.0613, "step": 1761 }, { "epoch": 0.45, "learning_rate": 9.72090801755255e-05, "loss": 0.0636, "step": 1762 }, { "epoch": 0.45, "learning_rate": 9.7204392398851e-05, "loss": 0.0575, "step": 1763 }, { "epoch": 0.46, "learning_rate": 9.719970080178378e-05, "loss": 0.0622, "step": 1764 }, { "epoch": 0.46, "learning_rate": 9.719500538470351e-05, "loss": 0.0368, "step": 1765 }, { "epoch": 0.46, "learning_rate": 9.719030614799026e-05, "loss": 0.059, "step": 1766 }, { "epoch": 0.46, "learning_rate": 9.718560309202429e-05, "loss": 0.0771, "step": 1767 }, { "epoch": 0.46, "learning_rate": 9.71808962171863e-05, "loss": 0.0679, "step": 1768 }, { "epoch": 0.46, "learning_rate": 9.717618552385716e-05, "loss": 0.0494, "step": 1769 }, { "epoch": 0.46, "learning_rate": 9.717147101241816e-05, "loss": 0.0749, "step": 1770 }, { "epoch": 0.46, "learning_rate": 9.716675268325087e-05, "loss": 0.0571, "step": 1771 }, { "epoch": 0.46, "learning_rate": 9.716203053673714e-05, "loss": 0.0572, "step": 1772 }, { "epoch": 0.46, "learning_rate": 9.715730457325916e-05, "loss": 0.0743, "step": 1773 }, { "epoch": 0.46, "learning_rate": 9.715257479319939e-05, "loss": 0.068, "step": 1774 }, { "epoch": 0.46, "learning_rate": 9.714784119694066e-05, "loss": 0.0568, "step": 1775 }, { "epoch": 0.46, "learning_rate": 9.714310378486604e-05, "loss": 0.0663, "step": 1776 }, { "epoch": 0.46, "learning_rate": 9.713836255735898e-05, "loss": 0.06, "step": 1777 }, { "epoch": 0.46, "learning_rate": 9.713361751480316e-05, "loss": 0.0684, "step": 1778 }, { "epoch": 0.46, "learning_rate": 9.712886865758265e-05, "loss": 0.0771, "step": 1779 }, { "epoch": 0.46, "learning_rate": 9.712411598608178e-05, "loss": 0.0711, "step": 1780 }, { "epoch": 0.46, "learning_rate": 9.711935950068516e-05, "loss": 0.056, "step": 1781 }, { "epoch": 0.46, "learning_rate": 9.71145992017778e-05, "loss": 0.0669, "step": 1782 }, { "epoch": 0.46, "learning_rate": 9.710983508974494e-05, "loss": 0.0507, "step": 1783 }, { "epoch": 0.46, "learning_rate": 9.710506716497214e-05, "loss": 0.0645, "step": 1784 }, { "epoch": 0.46, "learning_rate": 9.710029542784532e-05, "loss": 0.0561, "step": 1785 }, { "epoch": 0.46, "learning_rate": 9.709551987875064e-05, "loss": 0.0444, "step": 1786 }, { "epoch": 0.46, "learning_rate": 9.70907405180746e-05, "loss": 0.0574, "step": 1787 }, { "epoch": 0.46, "learning_rate": 9.708595734620403e-05, "loss": 0.0652, "step": 1788 }, { "epoch": 0.46, "learning_rate": 9.708117036352602e-05, "loss": 0.064, "step": 1789 }, { "epoch": 0.46, "learning_rate": 9.707637957042802e-05, "loss": 0.0723, "step": 1790 }, { "epoch": 0.46, "learning_rate": 9.707158496729772e-05, "loss": 0.0611, "step": 1791 }, { "epoch": 0.46, "learning_rate": 9.706678655452321e-05, "loss": 0.063, "step": 1792 }, { "epoch": 0.46, "learning_rate": 9.706198433249285e-05, "loss": 0.0531, "step": 1793 }, { "epoch": 0.46, "learning_rate": 9.705717830159524e-05, "loss": 0.0683, "step": 1794 }, { "epoch": 0.46, "learning_rate": 9.705236846221937e-05, "loss": 0.0799, "step": 1795 }, { "epoch": 0.46, "learning_rate": 9.704755481475454e-05, "loss": 0.0588, "step": 1796 }, { "epoch": 0.46, "learning_rate": 9.704273735959031e-05, "loss": 0.0528, "step": 1797 }, { "epoch": 0.46, "learning_rate": 9.703791609711658e-05, "loss": 0.0549, "step": 1798 }, { "epoch": 0.46, "learning_rate": 9.703309102772353e-05, "loss": 0.0655, "step": 1799 }, { "epoch": 0.46, "learning_rate": 9.70282621518017e-05, "loss": 0.0649, "step": 1800 }, { "epoch": 0.46, "learning_rate": 9.702342946974188e-05, "loss": 0.1078, "step": 1801 }, { "epoch": 0.47, "learning_rate": 9.70185929819352e-05, "loss": 0.0851, "step": 1802 }, { "epoch": 0.47, "learning_rate": 9.701375268877309e-05, "loss": 0.0868, "step": 1803 }, { "epoch": 0.47, "learning_rate": 9.700890859064729e-05, "loss": 0.1087, "step": 1804 }, { "epoch": 0.47, "learning_rate": 9.700406068794986e-05, "loss": 0.0915, "step": 1805 }, { "epoch": 0.47, "learning_rate": 9.699920898107313e-05, "loss": 0.0904, "step": 1806 }, { "epoch": 0.47, "learning_rate": 9.699435347040979e-05, "loss": 0.0682, "step": 1807 }, { "epoch": 0.47, "learning_rate": 9.698949415635279e-05, "loss": 0.106, "step": 1808 }, { "epoch": 0.47, "learning_rate": 9.698463103929542e-05, "loss": 0.0869, "step": 1809 }, { "epoch": 0.47, "learning_rate": 9.697976411963127e-05, "loss": 0.0748, "step": 1810 }, { "epoch": 0.47, "learning_rate": 9.697489339775422e-05, "loss": 0.0837, "step": 1811 }, { "epoch": 0.47, "learning_rate": 9.697001887405847e-05, "loss": 0.0834, "step": 1812 }, { "epoch": 0.47, "learning_rate": 9.696514054893857e-05, "loss": 0.0681, "step": 1813 }, { "epoch": 0.47, "learning_rate": 9.696025842278929e-05, "loss": 0.0671, "step": 1814 }, { "epoch": 0.47, "learning_rate": 9.695537249600577e-05, "loss": 0.0842, "step": 1815 }, { "epoch": 0.47, "learning_rate": 9.695048276898344e-05, "loss": 0.0841, "step": 1816 }, { "epoch": 0.47, "learning_rate": 9.694558924211806e-05, "loss": 0.0458, "step": 1817 }, { "epoch": 0.47, "learning_rate": 9.694069191580566e-05, "loss": 0.0559, "step": 1818 }, { "epoch": 0.47, "learning_rate": 9.693579079044259e-05, "loss": 0.0723, "step": 1819 }, { "epoch": 0.47, "learning_rate": 9.693088586642554e-05, "loss": 0.0685, "step": 1820 }, { "epoch": 0.47, "learning_rate": 9.692597714415144e-05, "loss": 0.0696, "step": 1821 }, { "epoch": 0.47, "learning_rate": 9.692106462401759e-05, "loss": 0.0713, "step": 1822 }, { "epoch": 0.47, "learning_rate": 9.691614830642159e-05, "loss": 0.0718, "step": 1823 }, { "epoch": 0.47, "learning_rate": 9.691122819176131e-05, "loss": 0.0624, "step": 1824 }, { "epoch": 0.47, "learning_rate": 9.690630428043495e-05, "loss": 0.05, "step": 1825 }, { "epoch": 0.47, "learning_rate": 9.690137657284102e-05, "loss": 0.0876, "step": 1826 }, { "epoch": 0.47, "learning_rate": 9.689644506937837e-05, "loss": 0.0606, "step": 1827 }, { "epoch": 0.47, "learning_rate": 9.689150977044605e-05, "loss": 0.0525, "step": 1828 }, { "epoch": 0.47, "learning_rate": 9.688657067644353e-05, "loss": 0.0624, "step": 1829 }, { "epoch": 0.47, "learning_rate": 9.688162778777056e-05, "loss": 0.0527, "step": 1830 }, { "epoch": 0.47, "learning_rate": 9.687668110482715e-05, "loss": 0.0974, "step": 1831 }, { "epoch": 0.47, "learning_rate": 9.687173062801368e-05, "loss": 0.0556, "step": 1832 }, { "epoch": 0.47, "learning_rate": 9.686677635773078e-05, "loss": 0.0603, "step": 1833 }, { "epoch": 0.47, "learning_rate": 9.686181829437943e-05, "loss": 0.0735, "step": 1834 }, { "epoch": 0.47, "learning_rate": 9.68568564383609e-05, "loss": 0.0605, "step": 1835 }, { "epoch": 0.47, "learning_rate": 9.685189079007676e-05, "loss": 0.0881, "step": 1836 }, { "epoch": 0.47, "learning_rate": 9.68469213499289e-05, "loss": 0.0601, "step": 1837 }, { "epoch": 0.47, "learning_rate": 9.684194811831951e-05, "loss": 0.0645, "step": 1838 }, { "epoch": 0.47, "learning_rate": 9.68369710956511e-05, "loss": 0.0512, "step": 1839 }, { "epoch": 0.47, "learning_rate": 9.683199028232646e-05, "loss": 0.048, "step": 1840 }, { "epoch": 0.48, "learning_rate": 9.682700567874869e-05, "loss": 0.0746, "step": 1841 }, { "epoch": 0.48, "learning_rate": 9.682201728532125e-05, "loss": 0.0925, "step": 1842 }, { "epoch": 0.48, "learning_rate": 9.681702510244784e-05, "loss": 0.0713, "step": 1843 }, { "epoch": 0.48, "learning_rate": 9.681202913053247e-05, "loss": 0.0688, "step": 1844 }, { "epoch": 0.48, "learning_rate": 9.680702936997952e-05, "loss": 0.064, "step": 1845 }, { "epoch": 0.48, "learning_rate": 9.680202582119363e-05, "loss": 0.0766, "step": 1846 }, { "epoch": 0.48, "learning_rate": 9.679701848457971e-05, "loss": 0.0718, "step": 1847 }, { "epoch": 0.48, "learning_rate": 9.679200736054307e-05, "loss": 0.077, "step": 1848 }, { "epoch": 0.48, "learning_rate": 9.678699244948925e-05, "loss": 0.0596, "step": 1849 }, { "epoch": 0.48, "learning_rate": 9.678197375182414e-05, "loss": 0.0682, "step": 1850 }, { "epoch": 0.48, "learning_rate": 9.677695126795388e-05, "loss": 0.0584, "step": 1851 }, { "epoch": 0.48, "learning_rate": 9.677192499828498e-05, "loss": 0.074, "step": 1852 }, { "epoch": 0.48, "learning_rate": 9.676689494322427e-05, "loss": 0.0643, "step": 1853 }, { "epoch": 0.48, "learning_rate": 9.676186110317877e-05, "loss": 0.0829, "step": 1854 }, { "epoch": 0.48, "learning_rate": 9.675682347855593e-05, "loss": 0.0691, "step": 1855 }, { "epoch": 0.48, "learning_rate": 9.675178206976345e-05, "loss": 0.0635, "step": 1856 }, { "epoch": 0.48, "learning_rate": 9.674673687720934e-05, "loss": 0.0658, "step": 1857 }, { "epoch": 0.48, "learning_rate": 9.674168790130195e-05, "loss": 0.0753, "step": 1858 }, { "epoch": 0.48, "learning_rate": 9.673663514244988e-05, "loss": 0.0596, "step": 1859 }, { "epoch": 0.48, "learning_rate": 9.673157860106206e-05, "loss": 0.0718, "step": 1860 }, { "epoch": 0.48, "learning_rate": 9.672651827754776e-05, "loss": 0.0578, "step": 1861 }, { "epoch": 0.48, "learning_rate": 9.672145417231652e-05, "loss": 0.0688, "step": 1862 }, { "epoch": 0.48, "learning_rate": 9.671638628577817e-05, "loss": 0.0521, "step": 1863 }, { "epoch": 0.48, "learning_rate": 9.671131461834288e-05, "loss": 0.0561, "step": 1864 }, { "epoch": 0.48, "learning_rate": 9.670623917042114e-05, "loss": 0.0608, "step": 1865 }, { "epoch": 0.48, "learning_rate": 9.67011599424237e-05, "loss": 0.0656, "step": 1866 }, { "epoch": 0.48, "learning_rate": 9.669607693476162e-05, "loss": 0.0506, "step": 1867 }, { "epoch": 0.48, "learning_rate": 9.66909901478463e-05, "loss": 0.0562, "step": 1868 }, { "epoch": 0.48, "learning_rate": 9.668589958208944e-05, "loss": 0.0625, "step": 1869 }, { "epoch": 0.48, "learning_rate": 9.668080523790302e-05, "loss": 0.0757, "step": 1870 }, { "epoch": 0.48, "learning_rate": 9.667570711569936e-05, "loss": 0.0537, "step": 1871 }, { "epoch": 0.48, "learning_rate": 9.667060521589104e-05, "loss": 0.0575, "step": 1872 }, { "epoch": 0.48, "learning_rate": 9.666549953889098e-05, "loss": 0.0681, "step": 1873 }, { "epoch": 0.48, "learning_rate": 9.66603900851124e-05, "loss": 0.0709, "step": 1874 }, { "epoch": 0.48, "learning_rate": 9.665527685496883e-05, "loss": 0.0698, "step": 1875 }, { "epoch": 0.48, "learning_rate": 9.66501598488741e-05, "loss": 0.0649, "step": 1876 }, { "epoch": 0.48, "learning_rate": 9.664503906724233e-05, "loss": 0.0679, "step": 1877 }, { "epoch": 0.48, "learning_rate": 9.663991451048797e-05, "loss": 0.0675, "step": 1878 }, { "epoch": 0.48, "learning_rate": 9.663478617902577e-05, "loss": 0.0532, "step": 1879 }, { "epoch": 0.49, "learning_rate": 9.662965407327076e-05, "loss": 0.0758, "step": 1880 }, { "epoch": 0.49, "learning_rate": 9.662451819363832e-05, "loss": 0.0611, "step": 1881 }, { "epoch": 0.49, "learning_rate": 9.66193785405441e-05, "loss": 0.0641, "step": 1882 }, { "epoch": 0.49, "learning_rate": 9.661423511440408e-05, "loss": 0.0754, "step": 1883 }, { "epoch": 0.49, "learning_rate": 9.660908791563453e-05, "loss": 0.0693, "step": 1884 }, { "epoch": 0.49, "learning_rate": 9.6603936944652e-05, "loss": 0.0593, "step": 1885 }, { "epoch": 0.49, "learning_rate": 9.659878220187342e-05, "loss": 0.0638, "step": 1886 }, { "epoch": 0.49, "learning_rate": 9.659362368771594e-05, "loss": 0.0533, "step": 1887 }, { "epoch": 0.49, "learning_rate": 9.658846140259707e-05, "loss": 0.071, "step": 1888 }, { "epoch": 0.49, "learning_rate": 9.658329534693462e-05, "loss": 0.0602, "step": 1889 }, { "epoch": 0.49, "learning_rate": 9.657812552114667e-05, "loss": 0.0633, "step": 1890 }, { "epoch": 0.49, "learning_rate": 9.657295192565165e-05, "loss": 0.0512, "step": 1891 }, { "epoch": 0.49, "learning_rate": 9.656777456086826e-05, "loss": 0.0461, "step": 1892 }, { "epoch": 0.49, "learning_rate": 9.656259342721552e-05, "loss": 0.0738, "step": 1893 }, { "epoch": 0.49, "learning_rate": 9.655740852511276e-05, "loss": 0.0852, "step": 1894 }, { "epoch": 0.49, "learning_rate": 9.655221985497965e-05, "loss": 0.0692, "step": 1895 }, { "epoch": 0.49, "learning_rate": 9.654702741723604e-05, "loss": 0.0657, "step": 1896 }, { "epoch": 0.49, "learning_rate": 9.654183121230223e-05, "loss": 0.0549, "step": 1897 }, { "epoch": 0.49, "learning_rate": 9.653663124059876e-05, "loss": 0.074, "step": 1898 }, { "epoch": 0.49, "learning_rate": 9.653142750254644e-05, "loss": 0.065, "step": 1899 }, { "epoch": 0.49, "learning_rate": 9.652621999856647e-05, "loss": 0.0542, "step": 1900 }, { "epoch": 0.49, "learning_rate": 9.652100872908029e-05, "loss": 0.0483, "step": 1901 }, { "epoch": 0.49, "learning_rate": 9.651579369450966e-05, "loss": 0.0987, "step": 1902 }, { "epoch": 0.49, "learning_rate": 9.651057489527666e-05, "loss": 0.0922, "step": 1903 }, { "epoch": 0.49, "learning_rate": 9.650535233180365e-05, "loss": 0.0652, "step": 1904 }, { "epoch": 0.49, "learning_rate": 9.650012600451333e-05, "loss": 0.0735, "step": 1905 }, { "epoch": 0.49, "learning_rate": 9.649489591382864e-05, "loss": 0.0924, "step": 1906 }, { "epoch": 0.49, "learning_rate": 9.648966206017291e-05, "loss": 0.0569, "step": 1907 }, { "epoch": 0.49, "learning_rate": 9.64844244439697e-05, "loss": 0.0971, "step": 1908 }, { "epoch": 0.49, "learning_rate": 9.647918306564294e-05, "loss": 0.0575, "step": 1909 }, { "epoch": 0.49, "learning_rate": 9.647393792561681e-05, "loss": 0.0746, "step": 1910 }, { "epoch": 0.49, "learning_rate": 9.64686890243158e-05, "loss": 0.0675, "step": 1911 }, { "epoch": 0.49, "learning_rate": 9.646343636216475e-05, "loss": 0.0858, "step": 1912 }, { "epoch": 0.49, "learning_rate": 9.645817993958875e-05, "loss": 0.0816, "step": 1913 }, { "epoch": 0.49, "learning_rate": 9.645291975701323e-05, "loss": 0.0757, "step": 1914 }, { "epoch": 0.49, "learning_rate": 9.64476558148639e-05, "loss": 0.0601, "step": 1915 }, { "epoch": 0.49, "learning_rate": 9.644238811356681e-05, "loss": 0.0811, "step": 1916 }, { "epoch": 0.49, "learning_rate": 9.643711665354827e-05, "loss": 0.0779, "step": 1917 }, { "epoch": 0.49, "learning_rate": 9.643184143523491e-05, "loss": 0.0518, "step": 1918 }, { "epoch": 0.5, "learning_rate": 9.64265624590537e-05, "loss": 0.0608, "step": 1919 }, { "epoch": 0.5, "learning_rate": 9.642127972543184e-05, "loss": 0.0654, "step": 1920 }, { "epoch": 0.5, "learning_rate": 9.641599323479693e-05, "loss": 0.0487, "step": 1921 }, { "epoch": 0.5, "learning_rate": 9.641070298757677e-05, "loss": 0.0694, "step": 1922 }, { "epoch": 0.5, "learning_rate": 9.640540898419956e-05, "loss": 0.0512, "step": 1923 }, { "epoch": 0.5, "learning_rate": 9.640011122509371e-05, "loss": 0.0553, "step": 1924 }, { "epoch": 0.5, "learning_rate": 9.639480971068804e-05, "loss": 0.0821, "step": 1925 }, { "epoch": 0.5, "learning_rate": 9.638950444141158e-05, "loss": 0.0606, "step": 1926 }, { "epoch": 0.5, "learning_rate": 9.63841954176937e-05, "loss": 0.0556, "step": 1927 }, { "epoch": 0.5, "learning_rate": 9.637888263996409e-05, "loss": 0.0682, "step": 1928 }, { "epoch": 0.5, "learning_rate": 9.637356610865273e-05, "loss": 0.0754, "step": 1929 }, { "epoch": 0.5, "learning_rate": 9.636824582418991e-05, "loss": 0.0597, "step": 1930 }, { "epoch": 0.5, "learning_rate": 9.636292178700618e-05, "loss": 0.0573, "step": 1931 }, { "epoch": 0.5, "learning_rate": 9.635759399753246e-05, "loss": 0.0496, "step": 1932 }, { "epoch": 0.5, "learning_rate": 9.635226245619995e-05, "loss": 0.0609, "step": 1933 }, { "epoch": 0.5, "learning_rate": 9.634692716344011e-05, "loss": 0.0531, "step": 1934 }, { "epoch": 0.5, "learning_rate": 9.634158811968479e-05, "loss": 0.0679, "step": 1935 }, { "epoch": 0.5, "learning_rate": 9.633624532536607e-05, "loss": 0.0678, "step": 1936 }, { "epoch": 0.5, "learning_rate": 9.633089878091634e-05, "loss": 0.0827, "step": 1937 }, { "epoch": 0.5, "learning_rate": 9.632554848676836e-05, "loss": 0.065, "step": 1938 }, { "epoch": 0.5, "learning_rate": 9.63201944433551e-05, "loss": 0.0619, "step": 1939 }, { "epoch": 0.5, "learning_rate": 9.63148366511099e-05, "loss": 0.0607, "step": 1940 }, { "epoch": 0.5, "learning_rate": 9.630947511046638e-05, "loss": 0.0694, "step": 1941 }, { "epoch": 0.5, "learning_rate": 9.630410982185844e-05, "loss": 0.0784, "step": 1942 }, { "epoch": 0.5, "learning_rate": 9.629874078572037e-05, "loss": 0.0666, "step": 1943 }, { "epoch": 0.5, "learning_rate": 9.629336800248665e-05, "loss": 0.0511, "step": 1944 }, { "epoch": 0.5, "learning_rate": 9.628799147259212e-05, "loss": 0.0636, "step": 1945 }, { "epoch": 0.5, "learning_rate": 9.628261119647193e-05, "loss": 0.0654, "step": 1946 }, { "epoch": 0.5, "learning_rate": 9.627722717456154e-05, "loss": 0.0771, "step": 1947 }, { "epoch": 0.5, "learning_rate": 9.627183940729666e-05, "loss": 0.0669, "step": 1948 }, { "epoch": 0.5, "learning_rate": 9.626644789511335e-05, "loss": 0.0766, "step": 1949 }, { "epoch": 0.5, "learning_rate": 9.626105263844797e-05, "loss": 0.0748, "step": 1950 }, { "epoch": 0.5, "learning_rate": 9.625565363773716e-05, "loss": 0.0663, "step": 1951 }, { "epoch": 0.5, "learning_rate": 9.625025089341791e-05, "loss": 0.0542, "step": 1952 }, { "epoch": 0.5, "learning_rate": 9.624484440592743e-05, "loss": 0.0483, "step": 1953 }, { "epoch": 0.5, "learning_rate": 9.623943417570334e-05, "loss": 0.0675, "step": 1954 }, { "epoch": 0.5, "learning_rate": 9.623402020318345e-05, "loss": 0.0601, "step": 1955 }, { "epoch": 0.5, "learning_rate": 9.622860248880596e-05, "loss": 0.0571, "step": 1956 }, { "epoch": 0.51, "learning_rate": 9.622318103300933e-05, "loss": 0.079, "step": 1957 }, { "epoch": 0.51, "learning_rate": 9.621775583623236e-05, "loss": 0.0578, "step": 1958 }, { "epoch": 0.51, "learning_rate": 9.621232689891411e-05, "loss": 0.0519, "step": 1959 }, { "epoch": 0.51, "learning_rate": 9.620689422149394e-05, "loss": 0.0749, "step": 1960 }, { "epoch": 0.51, "learning_rate": 9.620145780441156e-05, "loss": 0.0646, "step": 1961 }, { "epoch": 0.51, "learning_rate": 9.619601764810695e-05, "loss": 0.0621, "step": 1962 }, { "epoch": 0.51, "learning_rate": 9.61905737530204e-05, "loss": 0.0796, "step": 1963 }, { "epoch": 0.51, "learning_rate": 9.618512611959249e-05, "loss": 0.0594, "step": 1964 }, { "epoch": 0.51, "learning_rate": 9.617967474826411e-05, "loss": 0.0884, "step": 1965 }, { "epoch": 0.51, "learning_rate": 9.617421963947648e-05, "loss": 0.0718, "step": 1966 }, { "epoch": 0.51, "learning_rate": 9.616876079367108e-05, "loss": 0.0593, "step": 1967 }, { "epoch": 0.51, "learning_rate": 9.616329821128971e-05, "loss": 0.0648, "step": 1968 }, { "epoch": 0.51, "learning_rate": 9.615783189277447e-05, "loss": 0.048, "step": 1969 }, { "epoch": 0.51, "learning_rate": 9.61523618385678e-05, "loss": 0.0653, "step": 1970 }, { "epoch": 0.51, "learning_rate": 9.614688804911237e-05, "loss": 0.0744, "step": 1971 }, { "epoch": 0.51, "learning_rate": 9.614141052485117e-05, "loss": 0.0592, "step": 1972 }, { "epoch": 0.51, "learning_rate": 9.613592926622758e-05, "loss": 0.0675, "step": 1973 }, { "epoch": 0.51, "learning_rate": 9.613044427368516e-05, "loss": 0.0473, "step": 1974 }, { "epoch": 0.51, "learning_rate": 9.612495554766785e-05, "loss": 0.0541, "step": 1975 }, { "epoch": 0.51, "learning_rate": 9.611946308861987e-05, "loss": 0.0623, "step": 1976 }, { "epoch": 0.51, "learning_rate": 9.611396689698573e-05, "loss": 0.0742, "step": 1977 }, { "epoch": 0.51, "learning_rate": 9.610846697321026e-05, "loss": 0.0703, "step": 1978 }, { "epoch": 0.51, "learning_rate": 9.610296331773858e-05, "loss": 0.0724, "step": 1979 }, { "epoch": 0.51, "learning_rate": 9.609745593101611e-05, "loss": 0.0764, "step": 1980 }, { "epoch": 0.51, "learning_rate": 9.609194481348861e-05, "loss": 0.0515, "step": 1981 }, { "epoch": 0.51, "learning_rate": 9.608642996560209e-05, "loss": 0.0899, "step": 1982 }, { "epoch": 0.51, "learning_rate": 9.608091138780286e-05, "loss": 0.0695, "step": 1983 }, { "epoch": 0.51, "learning_rate": 9.607538908053761e-05, "loss": 0.064, "step": 1984 }, { "epoch": 0.51, "learning_rate": 9.606986304425323e-05, "loss": 0.0638, "step": 1985 }, { "epoch": 0.51, "learning_rate": 9.606433327939698e-05, "loss": 0.0756, "step": 1986 }, { "epoch": 0.51, "learning_rate": 9.60587997864164e-05, "loss": 0.0549, "step": 1987 }, { "epoch": 0.51, "learning_rate": 9.605326256575932e-05, "loss": 0.0602, "step": 1988 }, { "epoch": 0.51, "learning_rate": 9.604772161787389e-05, "loss": 0.0655, "step": 1989 }, { "epoch": 0.51, "learning_rate": 9.604217694320857e-05, "loss": 0.0595, "step": 1990 }, { "epoch": 0.51, "learning_rate": 9.603662854221207e-05, "loss": 0.0552, "step": 1991 }, { "epoch": 0.51, "learning_rate": 9.603107641533348e-05, "loss": 0.0629, "step": 1992 }, { "epoch": 0.51, "learning_rate": 9.602552056302215e-05, "loss": 0.0833, "step": 1993 }, { "epoch": 0.51, "learning_rate": 9.601996098572767e-05, "loss": 0.0608, "step": 1994 }, { "epoch": 0.51, "learning_rate": 9.601439768390007e-05, "loss": 0.0907, "step": 1995 }, { "epoch": 0.52, "learning_rate": 9.600883065798957e-05, "loss": 0.0768, "step": 1996 }, { "epoch": 0.52, "learning_rate": 9.600325990844672e-05, "loss": 0.0697, "step": 1997 }, { "epoch": 0.52, "learning_rate": 9.599768543572239e-05, "loss": 0.0788, "step": 1998 }, { "epoch": 0.52, "learning_rate": 9.599210724026773e-05, "loss": 0.0389, "step": 1999 }, { "epoch": 0.52, "learning_rate": 9.598652532253422e-05, "loss": 0.0643, "step": 2000 }, { "epoch": 0.52, "learning_rate": 9.598093968297359e-05, "loss": 0.0364, "step": 2001 }, { "epoch": 0.52, "learning_rate": 9.597535032203793e-05, "loss": 0.0652, "step": 2002 }, { "epoch": 0.52, "learning_rate": 9.59697572401796e-05, "loss": 0.0868, "step": 2003 }, { "epoch": 0.52, "learning_rate": 9.596416043785124e-05, "loss": 0.081, "step": 2004 }, { "epoch": 0.52, "learning_rate": 9.595855991550584e-05, "loss": 0.0737, "step": 2005 }, { "epoch": 0.52, "learning_rate": 9.595295567359666e-05, "loss": 0.073, "step": 2006 }, { "epoch": 0.52, "learning_rate": 9.594734771257725e-05, "loss": 0.0626, "step": 2007 }, { "epoch": 0.52, "learning_rate": 9.594173603290153e-05, "loss": 0.0737, "step": 2008 }, { "epoch": 0.52, "learning_rate": 9.593612063502362e-05, "loss": 0.0715, "step": 2009 }, { "epoch": 0.52, "learning_rate": 9.5930501519398e-05, "loss": 0.0673, "step": 2010 }, { "epoch": 0.52, "learning_rate": 9.592487868647947e-05, "loss": 0.051, "step": 2011 }, { "epoch": 0.52, "learning_rate": 9.591925213672307e-05, "loss": 0.0591, "step": 2012 }, { "epoch": 0.52, "learning_rate": 9.591362187058419e-05, "loss": 0.0719, "step": 2013 }, { "epoch": 0.52, "learning_rate": 9.590798788851851e-05, "loss": 0.0777, "step": 2014 }, { "epoch": 0.52, "learning_rate": 9.590235019098197e-05, "loss": 0.0885, "step": 2015 }, { "epoch": 0.52, "learning_rate": 9.589670877843089e-05, "loss": 0.069, "step": 2016 }, { "epoch": 0.52, "learning_rate": 9.589106365132184e-05, "loss": 0.0587, "step": 2017 }, { "epoch": 0.52, "learning_rate": 9.588541481011167e-05, "loss": 0.0764, "step": 2018 }, { "epoch": 0.52, "learning_rate": 9.587976225525757e-05, "loss": 0.0437, "step": 2019 }, { "epoch": 0.52, "learning_rate": 9.587410598721702e-05, "loss": 0.0418, "step": 2020 }, { "epoch": 0.52, "learning_rate": 9.58684460064478e-05, "loss": 0.0869, "step": 2021 }, { "epoch": 0.52, "learning_rate": 9.5862782313408e-05, "loss": 0.0618, "step": 2022 }, { "epoch": 0.52, "learning_rate": 9.585711490855598e-05, "loss": 0.0829, "step": 2023 }, { "epoch": 0.52, "learning_rate": 9.585144379235043e-05, "loss": 0.087, "step": 2024 }, { "epoch": 0.52, "learning_rate": 9.584576896525032e-05, "loss": 0.0539, "step": 2025 }, { "epoch": 0.52, "learning_rate": 9.584009042771496e-05, "loss": 0.0773, "step": 2026 }, { "epoch": 0.52, "learning_rate": 9.583440818020389e-05, "loss": 0.0779, "step": 2027 }, { "epoch": 0.52, "learning_rate": 9.582872222317702e-05, "loss": 0.0672, "step": 2028 }, { "epoch": 0.52, "learning_rate": 9.582303255709453e-05, "loss": 0.0657, "step": 2029 }, { "epoch": 0.52, "learning_rate": 9.581733918241689e-05, "loss": 0.0463, "step": 2030 }, { "epoch": 0.52, "learning_rate": 9.581164209960487e-05, "loss": 0.0902, "step": 2031 }, { "epoch": 0.52, "learning_rate": 9.58059413091196e-05, "loss": 0.0492, "step": 2032 }, { "epoch": 0.52, "learning_rate": 9.580023681142241e-05, "loss": 0.0682, "step": 2033 }, { "epoch": 0.52, "learning_rate": 9.5794528606975e-05, "loss": 0.0629, "step": 2034 }, { "epoch": 0.53, "learning_rate": 9.578881669623936e-05, "loss": 0.0643, "step": 2035 }, { "epoch": 0.53, "learning_rate": 9.578310107967778e-05, "loss": 0.0748, "step": 2036 }, { "epoch": 0.53, "learning_rate": 9.577738175775281e-05, "loss": 0.0593, "step": 2037 }, { "epoch": 0.53, "learning_rate": 9.577165873092736e-05, "loss": 0.0619, "step": 2038 }, { "epoch": 0.53, "learning_rate": 9.57659319996646e-05, "loss": 0.0561, "step": 2039 }, { "epoch": 0.53, "learning_rate": 9.576020156442802e-05, "loss": 0.0612, "step": 2040 }, { "epoch": 0.53, "learning_rate": 9.575446742568139e-05, "loss": 0.081, "step": 2041 }, { "epoch": 0.53, "learning_rate": 9.574872958388879e-05, "loss": 0.0508, "step": 2042 }, { "epoch": 0.53, "learning_rate": 9.574298803951462e-05, "loss": 0.0363, "step": 2043 }, { "epoch": 0.53, "learning_rate": 9.573724279302354e-05, "loss": 0.0678, "step": 2044 }, { "epoch": 0.53, "learning_rate": 9.573149384488054e-05, "loss": 0.074, "step": 2045 }, { "epoch": 0.53, "learning_rate": 9.57257411955509e-05, "loss": 0.0753, "step": 2046 }, { "epoch": 0.53, "learning_rate": 9.571998484550018e-05, "loss": 0.0605, "step": 2047 }, { "epoch": 0.53, "learning_rate": 9.57142247951943e-05, "loss": 0.0501, "step": 2048 }, { "epoch": 0.53, "learning_rate": 9.570846104509938e-05, "loss": 0.0691, "step": 2049 }, { "epoch": 0.53, "learning_rate": 9.570269359568196e-05, "loss": 0.0601, "step": 2050 }, { "epoch": 0.53, "learning_rate": 9.569692244740878e-05, "loss": 0.0621, "step": 2051 }, { "epoch": 0.53, "learning_rate": 9.569114760074691e-05, "loss": 0.0576, "step": 2052 }, { "epoch": 0.53, "learning_rate": 9.568536905616374e-05, "loss": 0.0815, "step": 2053 }, { "epoch": 0.53, "learning_rate": 9.567958681412697e-05, "loss": 0.086, "step": 2054 }, { "epoch": 0.53, "learning_rate": 9.567380087510452e-05, "loss": 0.0659, "step": 2055 }, { "epoch": 0.53, "learning_rate": 9.56680112395647e-05, "loss": 0.0651, "step": 2056 }, { "epoch": 0.53, "learning_rate": 9.56622179079761e-05, "loss": 0.0755, "step": 2057 }, { "epoch": 0.53, "learning_rate": 9.565642088080753e-05, "loss": 0.0794, "step": 2058 }, { "epoch": 0.53, "learning_rate": 9.56506201585282e-05, "loss": 0.0739, "step": 2059 }, { "epoch": 0.53, "learning_rate": 9.56448157416076e-05, "loss": 0.0661, "step": 2060 }, { "epoch": 0.53, "learning_rate": 9.563900763051545e-05, "loss": 0.0763, "step": 2061 }, { "epoch": 0.53, "learning_rate": 9.563319582572186e-05, "loss": 0.0638, "step": 2062 }, { "epoch": 0.53, "learning_rate": 9.562738032769716e-05, "loss": 0.065, "step": 2063 }, { "epoch": 0.53, "learning_rate": 9.562156113691205e-05, "loss": 0.0603, "step": 2064 }, { "epoch": 0.53, "learning_rate": 9.561573825383748e-05, "loss": 0.0594, "step": 2065 }, { "epoch": 0.53, "learning_rate": 9.560991167894472e-05, "loss": 0.0592, "step": 2066 }, { "epoch": 0.53, "learning_rate": 9.560408141270531e-05, "loss": 0.0606, "step": 2067 }, { "epoch": 0.53, "learning_rate": 9.559824745559114e-05, "loss": 0.0497, "step": 2068 }, { "epoch": 0.53, "learning_rate": 9.559240980807435e-05, "loss": 0.0498, "step": 2069 }, { "epoch": 0.53, "learning_rate": 9.55865684706274e-05, "loss": 0.0874, "step": 2070 }, { "epoch": 0.53, "learning_rate": 9.558072344372305e-05, "loss": 0.0542, "step": 2071 }, { "epoch": 0.53, "learning_rate": 9.557487472783437e-05, "loss": 0.0544, "step": 2072 }, { "epoch": 0.53, "learning_rate": 9.556902232343469e-05, "loss": 0.0547, "step": 2073 }, { "epoch": 0.54, "learning_rate": 9.556316623099768e-05, "loss": 0.0513, "step": 2074 }, { "epoch": 0.54, "learning_rate": 9.555730645099728e-05, "loss": 0.057, "step": 2075 }, { "epoch": 0.54, "learning_rate": 9.555144298390774e-05, "loss": 0.0779, "step": 2076 }, { "epoch": 0.54, "learning_rate": 9.554557583020361e-05, "loss": 0.0576, "step": 2077 }, { "epoch": 0.54, "learning_rate": 9.553970499035973e-05, "loss": 0.0402, "step": 2078 }, { "epoch": 0.54, "learning_rate": 9.553383046485129e-05, "loss": 0.0746, "step": 2079 }, { "epoch": 0.54, "learning_rate": 9.552795225415366e-05, "loss": 0.0599, "step": 2080 }, { "epoch": 0.54, "learning_rate": 9.552207035874263e-05, "loss": 0.0511, "step": 2081 }, { "epoch": 0.54, "learning_rate": 9.551618477909423e-05, "loss": 0.052, "step": 2082 }, { "epoch": 0.54, "learning_rate": 9.551029551568478e-05, "loss": 0.0598, "step": 2083 }, { "epoch": 0.54, "learning_rate": 9.550440256899093e-05, "loss": 0.0759, "step": 2084 }, { "epoch": 0.54, "learning_rate": 9.549850593948963e-05, "loss": 0.0669, "step": 2085 }, { "epoch": 0.54, "learning_rate": 9.549260562765809e-05, "loss": 0.0706, "step": 2086 }, { "epoch": 0.54, "learning_rate": 9.548670163397384e-05, "loss": 0.0773, "step": 2087 }, { "epoch": 0.54, "learning_rate": 9.548079395891471e-05, "loss": 0.0585, "step": 2088 }, { "epoch": 0.54, "learning_rate": 9.547488260295885e-05, "loss": 0.0696, "step": 2089 }, { "epoch": 0.54, "learning_rate": 9.546896756658463e-05, "loss": 0.057, "step": 2090 }, { "epoch": 0.54, "learning_rate": 9.546304885027082e-05, "loss": 0.0525, "step": 2091 }, { "epoch": 0.54, "learning_rate": 9.545712645449644e-05, "loss": 0.0476, "step": 2092 }, { "epoch": 0.54, "learning_rate": 9.545120037974077e-05, "loss": 0.052, "step": 2093 }, { "epoch": 0.54, "learning_rate": 9.544527062648345e-05, "loss": 0.0604, "step": 2094 }, { "epoch": 0.54, "learning_rate": 9.54393371952044e-05, "loss": 0.0763, "step": 2095 }, { "epoch": 0.54, "learning_rate": 9.54334000863838e-05, "loss": 0.0436, "step": 2096 }, { "epoch": 0.54, "learning_rate": 9.542745930050219e-05, "loss": 0.075, "step": 2097 }, { "epoch": 0.54, "learning_rate": 9.542151483804036e-05, "loss": 0.0732, "step": 2098 }, { "epoch": 0.54, "learning_rate": 9.54155666994794e-05, "loss": 0.0649, "step": 2099 }, { "epoch": 0.54, "learning_rate": 9.540961488530074e-05, "loss": 0.0715, "step": 2100 }, { "epoch": 0.54, "learning_rate": 9.540365939598606e-05, "loss": 0.0562, "step": 2101 }, { "epoch": 0.54, "learning_rate": 9.539770023201737e-05, "loss": 0.0621, "step": 2102 }, { "epoch": 0.54, "learning_rate": 9.539173739387694e-05, "loss": 0.1075, "step": 2103 }, { "epoch": 0.54, "learning_rate": 9.538577088204739e-05, "loss": 0.062, "step": 2104 }, { "epoch": 0.54, "learning_rate": 9.537980069701158e-05, "loss": 0.0559, "step": 2105 }, { "epoch": 0.54, "learning_rate": 9.53738268392527e-05, "loss": 0.0726, "step": 2106 }, { "epoch": 0.54, "learning_rate": 9.536784930925424e-05, "loss": 0.0695, "step": 2107 }, { "epoch": 0.54, "learning_rate": 9.536186810749999e-05, "loss": 0.0636, "step": 2108 }, { "epoch": 0.54, "learning_rate": 9.5355883234474e-05, "loss": 0.0912, "step": 2109 }, { "epoch": 0.54, "learning_rate": 9.534989469066067e-05, "loss": 0.0761, "step": 2110 }, { "epoch": 0.54, "learning_rate": 9.534390247654464e-05, "loss": 0.0844, "step": 2111 }, { "epoch": 0.55, "learning_rate": 9.53379065926109e-05, "loss": 0.0613, "step": 2112 }, { "epoch": 0.55, "learning_rate": 9.533190703934473e-05, "loss": 0.0808, "step": 2113 }, { "epoch": 0.55, "learning_rate": 9.532590381723165e-05, "loss": 0.0832, "step": 2114 }, { "epoch": 0.55, "learning_rate": 9.531989692675756e-05, "loss": 0.0737, "step": 2115 }, { "epoch": 0.55, "learning_rate": 9.531388636840859e-05, "loss": 0.0612, "step": 2116 }, { "epoch": 0.55, "learning_rate": 9.53078721426712e-05, "loss": 0.0714, "step": 2117 }, { "epoch": 0.55, "learning_rate": 9.530185425003214e-05, "loss": 0.0444, "step": 2118 }, { "epoch": 0.55, "learning_rate": 9.529583269097845e-05, "loss": 0.0758, "step": 2119 }, { "epoch": 0.55, "learning_rate": 9.528980746599747e-05, "loss": 0.0529, "step": 2120 }, { "epoch": 0.55, "learning_rate": 9.528377857557687e-05, "loss": 0.068, "step": 2121 }, { "epoch": 0.55, "learning_rate": 9.527774602020454e-05, "loss": 0.0566, "step": 2122 }, { "epoch": 0.55, "learning_rate": 9.527170980036875e-05, "loss": 0.0527, "step": 2123 }, { "epoch": 0.55, "learning_rate": 9.526566991655801e-05, "loss": 0.0731, "step": 2124 }, { "epoch": 0.55, "learning_rate": 9.525962636926116e-05, "loss": 0.0629, "step": 2125 }, { "epoch": 0.55, "learning_rate": 9.525357915896732e-05, "loss": 0.0629, "step": 2126 }, { "epoch": 0.55, "learning_rate": 9.524752828616588e-05, "loss": 0.0501, "step": 2127 }, { "epoch": 0.55, "learning_rate": 9.524147375134658e-05, "loss": 0.0633, "step": 2128 }, { "epoch": 0.55, "learning_rate": 9.523541555499945e-05, "loss": 0.0592, "step": 2129 }, { "epoch": 0.55, "learning_rate": 9.522935369761475e-05, "loss": 0.0596, "step": 2130 }, { "epoch": 0.55, "learning_rate": 9.522328817968314e-05, "loss": 0.0592, "step": 2131 }, { "epoch": 0.55, "learning_rate": 9.521721900169547e-05, "loss": 0.0407, "step": 2132 }, { "epoch": 0.55, "learning_rate": 9.521114616414297e-05, "loss": 0.055, "step": 2133 }, { "epoch": 0.55, "learning_rate": 9.520506966751712e-05, "loss": 0.0765, "step": 2134 }, { "epoch": 0.55, "learning_rate": 9.51989895123097e-05, "loss": 0.0634, "step": 2135 }, { "epoch": 0.55, "learning_rate": 9.519290569901282e-05, "loss": 0.0529, "step": 2136 }, { "epoch": 0.55, "learning_rate": 9.518681822811885e-05, "loss": 0.0456, "step": 2137 }, { "epoch": 0.55, "learning_rate": 9.518072710012045e-05, "loss": 0.0676, "step": 2138 }, { "epoch": 0.55, "learning_rate": 9.517463231551061e-05, "loss": 0.0627, "step": 2139 }, { "epoch": 0.55, "learning_rate": 9.51685338747826e-05, "loss": 0.0693, "step": 2140 }, { "epoch": 0.55, "learning_rate": 9.516243177842998e-05, "loss": 0.0569, "step": 2141 }, { "epoch": 0.55, "learning_rate": 9.515632602694663e-05, "loss": 0.054, "step": 2142 }, { "epoch": 0.55, "learning_rate": 9.515021662082667e-05, "loss": 0.0528, "step": 2143 }, { "epoch": 0.55, "learning_rate": 9.514410356056458e-05, "loss": 0.0479, "step": 2144 }, { "epoch": 0.55, "learning_rate": 9.513798684665509e-05, "loss": 0.0693, "step": 2145 }, { "epoch": 0.55, "learning_rate": 9.513186647959327e-05, "loss": 0.0567, "step": 2146 }, { "epoch": 0.55, "learning_rate": 9.512574245987443e-05, "loss": 0.0894, "step": 2147 }, { "epoch": 0.55, "learning_rate": 9.511961478799423e-05, "loss": 0.0472, "step": 2148 }, { "epoch": 0.55, "learning_rate": 9.511348346444859e-05, "loss": 0.0578, "step": 2149 }, { "epoch": 0.55, "learning_rate": 9.510734848973373e-05, "loss": 0.0681, "step": 2150 }, { "epoch": 0.56, "learning_rate": 9.510120986434618e-05, "loss": 0.0901, "step": 2151 }, { "epoch": 0.56, "learning_rate": 9.509506758878275e-05, "loss": 0.0514, "step": 2152 }, { "epoch": 0.56, "learning_rate": 9.508892166354058e-05, "loss": 0.0639, "step": 2153 }, { "epoch": 0.56, "learning_rate": 9.508277208911703e-05, "loss": 0.0816, "step": 2154 }, { "epoch": 0.56, "learning_rate": 9.507661886600984e-05, "loss": 0.0515, "step": 2155 }, { "epoch": 0.56, "learning_rate": 9.507046199471699e-05, "loss": 0.0488, "step": 2156 }, { "epoch": 0.56, "learning_rate": 9.50643014757368e-05, "loss": 0.0465, "step": 2157 }, { "epoch": 0.56, "learning_rate": 9.505813730956783e-05, "loss": 0.0633, "step": 2158 }, { "epoch": 0.56, "learning_rate": 9.505196949670898e-05, "loss": 0.0675, "step": 2159 }, { "epoch": 0.56, "learning_rate": 9.504579803765943e-05, "loss": 0.0462, "step": 2160 }, { "epoch": 0.56, "learning_rate": 9.503962293291864e-05, "loss": 0.0631, "step": 2161 }, { "epoch": 0.56, "learning_rate": 9.503344418298638e-05, "loss": 0.0577, "step": 2162 }, { "epoch": 0.56, "learning_rate": 9.502726178836274e-05, "loss": 0.0633, "step": 2163 }, { "epoch": 0.56, "learning_rate": 9.502107574954808e-05, "loss": 0.0587, "step": 2164 }, { "epoch": 0.56, "learning_rate": 9.501488606704302e-05, "loss": 0.0648, "step": 2165 }, { "epoch": 0.56, "learning_rate": 9.500869274134853e-05, "loss": 0.0757, "step": 2166 }, { "epoch": 0.56, "learning_rate": 9.500249577296585e-05, "loss": 0.0692, "step": 2167 }, { "epoch": 0.56, "learning_rate": 9.499629516239654e-05, "loss": 0.0381, "step": 2168 }, { "epoch": 0.56, "learning_rate": 9.49900909101424e-05, "loss": 0.0598, "step": 2169 }, { "epoch": 0.56, "learning_rate": 9.498388301670558e-05, "loss": 0.0671, "step": 2170 }, { "epoch": 0.56, "learning_rate": 9.49776714825885e-05, "loss": 0.0529, "step": 2171 }, { "epoch": 0.56, "learning_rate": 9.497145630829387e-05, "loss": 0.0494, "step": 2172 }, { "epoch": 0.56, "learning_rate": 9.496523749432471e-05, "loss": 0.0778, "step": 2173 }, { "epoch": 0.56, "learning_rate": 9.495901504118433e-05, "loss": 0.0569, "step": 2174 }, { "epoch": 0.56, "learning_rate": 9.495278894937632e-05, "loss": 0.0676, "step": 2175 }, { "epoch": 0.56, "learning_rate": 9.49465592194046e-05, "loss": 0.0672, "step": 2176 }, { "epoch": 0.56, "learning_rate": 9.494032585177333e-05, "loss": 0.0459, "step": 2177 }, { "epoch": 0.56, "learning_rate": 9.493408884698703e-05, "loss": 0.0732, "step": 2178 }, { "epoch": 0.56, "learning_rate": 9.492784820555044e-05, "loss": 0.056, "step": 2179 }, { "epoch": 0.56, "learning_rate": 9.492160392796865e-05, "loss": 0.0518, "step": 2180 }, { "epoch": 0.56, "learning_rate": 9.491535601474704e-05, "loss": 0.061, "step": 2181 }, { "epoch": 0.56, "learning_rate": 9.490910446639128e-05, "loss": 0.066, "step": 2182 }, { "epoch": 0.56, "learning_rate": 9.490284928340729e-05, "loss": 0.0606, "step": 2183 }, { "epoch": 0.56, "learning_rate": 9.489659046630134e-05, "loss": 0.0627, "step": 2184 }, { "epoch": 0.56, "learning_rate": 9.489032801557997e-05, "loss": 0.0437, "step": 2185 }, { "epoch": 0.56, "learning_rate": 9.488406193175005e-05, "loss": 0.0653, "step": 2186 }, { "epoch": 0.56, "learning_rate": 9.487779221531868e-05, "loss": 0.0851, "step": 2187 }, { "epoch": 0.56, "learning_rate": 9.487151886679327e-05, "loss": 0.0663, "step": 2188 }, { "epoch": 0.56, "learning_rate": 9.486524188668159e-05, "loss": 0.0616, "step": 2189 }, { "epoch": 0.57, "learning_rate": 9.485896127549163e-05, "loss": 0.052, "step": 2190 }, { "epoch": 0.57, "learning_rate": 9.485267703373168e-05, "loss": 0.0688, "step": 2191 }, { "epoch": 0.57, "learning_rate": 9.484638916191037e-05, "loss": 0.0668, "step": 2192 }, { "epoch": 0.57, "learning_rate": 9.484009766053659e-05, "loss": 0.0623, "step": 2193 }, { "epoch": 0.57, "learning_rate": 9.483380253011953e-05, "loss": 0.0738, "step": 2194 }, { "epoch": 0.57, "learning_rate": 9.482750377116867e-05, "loss": 0.0708, "step": 2195 }, { "epoch": 0.57, "learning_rate": 9.482120138419379e-05, "loss": 0.0553, "step": 2196 }, { "epoch": 0.57, "learning_rate": 9.481489536970496e-05, "loss": 0.073, "step": 2197 }, { "epoch": 0.57, "learning_rate": 9.480858572821255e-05, "loss": 0.0512, "step": 2198 }, { "epoch": 0.57, "learning_rate": 9.48022724602272e-05, "loss": 0.0603, "step": 2199 }, { "epoch": 0.57, "learning_rate": 9.479595556625987e-05, "loss": 0.0564, "step": 2200 }, { "epoch": 0.57, "learning_rate": 9.478963504682181e-05, "loss": 0.0569, "step": 2201 }, { "epoch": 0.57, "learning_rate": 9.478331090242456e-05, "loss": 0.0642, "step": 2202 }, { "epoch": 0.57, "learning_rate": 9.477698313357996e-05, "loss": 0.0896, "step": 2203 }, { "epoch": 0.57, "learning_rate": 9.477065174080012e-05, "loss": 0.0983, "step": 2204 }, { "epoch": 0.57, "learning_rate": 9.476431672459747e-05, "loss": 0.0749, "step": 2205 }, { "epoch": 0.57, "learning_rate": 9.47579780854847e-05, "loss": 0.0745, "step": 2206 }, { "epoch": 0.57, "learning_rate": 9.475163582397484e-05, "loss": 0.0967, "step": 2207 }, { "epoch": 0.57, "learning_rate": 9.474528994058119e-05, "loss": 0.0633, "step": 2208 }, { "epoch": 0.57, "learning_rate": 9.47389404358173e-05, "loss": 0.0671, "step": 2209 }, { "epoch": 0.57, "learning_rate": 9.473258731019711e-05, "loss": 0.0671, "step": 2210 }, { "epoch": 0.57, "learning_rate": 9.472623056423477e-05, "loss": 0.0615, "step": 2211 }, { "epoch": 0.57, "learning_rate": 9.471987019844474e-05, "loss": 0.0695, "step": 2212 }, { "epoch": 0.57, "learning_rate": 9.47135062133418e-05, "loss": 0.0709, "step": 2213 }, { "epoch": 0.57, "learning_rate": 9.470713860944101e-05, "loss": 0.0609, "step": 2214 }, { "epoch": 0.57, "learning_rate": 9.47007673872577e-05, "loss": 0.0843, "step": 2215 }, { "epoch": 0.57, "learning_rate": 9.469439254730754e-05, "loss": 0.0477, "step": 2216 }, { "epoch": 0.57, "learning_rate": 9.468801409010643e-05, "loss": 0.0625, "step": 2217 }, { "epoch": 0.57, "learning_rate": 9.468163201617062e-05, "loss": 0.0626, "step": 2218 }, { "epoch": 0.57, "learning_rate": 9.467524632601663e-05, "loss": 0.048, "step": 2219 }, { "epoch": 0.57, "learning_rate": 9.466885702016127e-05, "loss": 0.0629, "step": 2220 }, { "epoch": 0.57, "learning_rate": 9.466246409912164e-05, "loss": 0.07, "step": 2221 }, { "epoch": 0.57, "learning_rate": 9.465606756341515e-05, "loss": 0.0747, "step": 2222 }, { "epoch": 0.57, "learning_rate": 9.464966741355947e-05, "loss": 0.0812, "step": 2223 }, { "epoch": 0.57, "learning_rate": 9.464326365007261e-05, "loss": 0.0672, "step": 2224 }, { "epoch": 0.57, "learning_rate": 9.463685627347283e-05, "loss": 0.0608, "step": 2225 }, { "epoch": 0.57, "learning_rate": 9.463044528427871e-05, "loss": 0.0644, "step": 2226 }, { "epoch": 0.57, "learning_rate": 9.462403068300909e-05, "loss": 0.0635, "step": 2227 }, { "epoch": 0.57, "learning_rate": 9.461761247018314e-05, "loss": 0.0584, "step": 2228 }, { "epoch": 0.58, "learning_rate": 9.46111906463203e-05, "loss": 0.0798, "step": 2229 }, { "epoch": 0.58, "learning_rate": 9.46047652119403e-05, "loss": 0.0723, "step": 2230 }, { "epoch": 0.58, "learning_rate": 9.459833616756318e-05, "loss": 0.0415, "step": 2231 }, { "epoch": 0.58, "learning_rate": 9.459190351370926e-05, "loss": 0.038, "step": 2232 }, { "epoch": 0.58, "learning_rate": 9.458546725089915e-05, "loss": 0.0751, "step": 2233 }, { "epoch": 0.58, "learning_rate": 9.457902737965376e-05, "loss": 0.0644, "step": 2234 }, { "epoch": 0.58, "learning_rate": 9.45725839004943e-05, "loss": 0.0607, "step": 2235 }, { "epoch": 0.58, "learning_rate": 9.456613681394222e-05, "loss": 0.0712, "step": 2236 }, { "epoch": 0.58, "learning_rate": 9.455968612051935e-05, "loss": 0.0648, "step": 2237 }, { "epoch": 0.58, "learning_rate": 9.455323182074774e-05, "loss": 0.0393, "step": 2238 }, { "epoch": 0.58, "learning_rate": 9.454677391514974e-05, "loss": 0.0634, "step": 2239 }, { "epoch": 0.58, "learning_rate": 9.454031240424806e-05, "loss": 0.0548, "step": 2240 }, { "epoch": 0.58, "learning_rate": 9.453384728856559e-05, "loss": 0.0518, "step": 2241 }, { "epoch": 0.58, "learning_rate": 9.45273785686256e-05, "loss": 0.0562, "step": 2242 }, { "epoch": 0.58, "learning_rate": 9.452090624495161e-05, "loss": 0.0661, "step": 2243 }, { "epoch": 0.58, "learning_rate": 9.451443031806746e-05, "loss": 0.0394, "step": 2244 }, { "epoch": 0.58, "learning_rate": 9.450795078849729e-05, "loss": 0.0728, "step": 2245 }, { "epoch": 0.58, "learning_rate": 9.450146765676544e-05, "loss": 0.052, "step": 2246 }, { "epoch": 0.58, "learning_rate": 9.449498092339666e-05, "loss": 0.0528, "step": 2247 }, { "epoch": 0.58, "learning_rate": 9.448849058891591e-05, "loss": 0.0811, "step": 2248 }, { "epoch": 0.58, "learning_rate": 9.448199665384851e-05, "loss": 0.0475, "step": 2249 }, { "epoch": 0.58, "learning_rate": 9.447549911872002e-05, "loss": 0.0471, "step": 2250 }, { "epoch": 0.58, "learning_rate": 9.446899798405628e-05, "loss": 0.062, "step": 2251 }, { "epoch": 0.58, "learning_rate": 9.446249325038348e-05, "loss": 0.0585, "step": 2252 }, { "epoch": 0.58, "learning_rate": 9.445598491822805e-05, "loss": 0.049, "step": 2253 }, { "epoch": 0.58, "learning_rate": 9.444947298811672e-05, "loss": 0.0853, "step": 2254 }, { "epoch": 0.58, "learning_rate": 9.444295746057654e-05, "loss": 0.0599, "step": 2255 }, { "epoch": 0.58, "learning_rate": 9.443643833613482e-05, "loss": 0.0605, "step": 2256 }, { "epoch": 0.58, "learning_rate": 9.442991561531917e-05, "loss": 0.0857, "step": 2257 }, { "epoch": 0.58, "learning_rate": 9.44233892986575e-05, "loss": 0.0364, "step": 2258 }, { "epoch": 0.58, "learning_rate": 9.441685938667802e-05, "loss": 0.0516, "step": 2259 }, { "epoch": 0.58, "learning_rate": 9.441032587990918e-05, "loss": 0.071, "step": 2260 }, { "epoch": 0.58, "learning_rate": 9.440378877887977e-05, "loss": 0.0708, "step": 2261 }, { "epoch": 0.58, "learning_rate": 9.439724808411888e-05, "loss": 0.0476, "step": 2262 }, { "epoch": 0.58, "learning_rate": 9.439070379615582e-05, "loss": 0.0749, "step": 2263 }, { "epoch": 0.58, "learning_rate": 9.43841559155203e-05, "loss": 0.0653, "step": 2264 }, { "epoch": 0.58, "learning_rate": 9.437760444274219e-05, "loss": 0.0695, "step": 2265 }, { "epoch": 0.58, "learning_rate": 9.43710493783518e-05, "loss": 0.0575, "step": 2266 }, { "epoch": 0.59, "learning_rate": 9.436449072287959e-05, "loss": 0.0597, "step": 2267 }, { "epoch": 0.59, "learning_rate": 9.435792847685638e-05, "loss": 0.0562, "step": 2268 }, { "epoch": 0.59, "learning_rate": 9.435136264081328e-05, "loss": 0.0494, "step": 2269 }, { "epoch": 0.59, "learning_rate": 9.43447932152817e-05, "loss": 0.0731, "step": 2270 }, { "epoch": 0.59, "learning_rate": 9.43382202007933e-05, "loss": 0.0624, "step": 2271 }, { "epoch": 0.59, "learning_rate": 9.433164359788007e-05, "loss": 0.0798, "step": 2272 }, { "epoch": 0.59, "learning_rate": 9.432506340707426e-05, "loss": 0.0556, "step": 2273 }, { "epoch": 0.59, "learning_rate": 9.431847962890844e-05, "loss": 0.0808, "step": 2274 }, { "epoch": 0.59, "learning_rate": 9.431189226391543e-05, "loss": 0.045, "step": 2275 }, { "epoch": 0.59, "learning_rate": 9.43053013126284e-05, "loss": 0.0715, "step": 2276 }, { "epoch": 0.59, "learning_rate": 9.429870677558074e-05, "loss": 0.0705, "step": 2277 }, { "epoch": 0.59, "learning_rate": 9.429210865330617e-05, "loss": 0.0658, "step": 2278 }, { "epoch": 0.59, "learning_rate": 9.428550694633873e-05, "loss": 0.0737, "step": 2279 }, { "epoch": 0.59, "learning_rate": 9.427890165521269e-05, "loss": 0.0648, "step": 2280 }, { "epoch": 0.59, "learning_rate": 9.427229278046264e-05, "loss": 0.0834, "step": 2281 }, { "epoch": 0.59, "learning_rate": 9.426568032262347e-05, "loss": 0.0687, "step": 2282 }, { "epoch": 0.59, "learning_rate": 9.42590642822303e-05, "loss": 0.0532, "step": 2283 }, { "epoch": 0.59, "learning_rate": 9.425244465981864e-05, "loss": 0.0572, "step": 2284 }, { "epoch": 0.59, "learning_rate": 9.42458214559242e-05, "loss": 0.0698, "step": 2285 }, { "epoch": 0.59, "learning_rate": 9.423919467108304e-05, "loss": 0.0831, "step": 2286 }, { "epoch": 0.59, "learning_rate": 9.423256430583146e-05, "loss": 0.0497, "step": 2287 }, { "epoch": 0.59, "learning_rate": 9.42259303607061e-05, "loss": 0.0512, "step": 2288 }, { "epoch": 0.59, "learning_rate": 9.421929283624386e-05, "loss": 0.0648, "step": 2289 }, { "epoch": 0.59, "learning_rate": 9.421265173298191e-05, "loss": 0.0677, "step": 2290 }, { "epoch": 0.59, "learning_rate": 9.420600705145776e-05, "loss": 0.0744, "step": 2291 }, { "epoch": 0.59, "learning_rate": 9.41993587922092e-05, "loss": 0.057, "step": 2292 }, { "epoch": 0.59, "learning_rate": 9.419270695577426e-05, "loss": 0.046, "step": 2293 }, { "epoch": 0.59, "learning_rate": 9.41860515426913e-05, "loss": 0.0483, "step": 2294 }, { "epoch": 0.59, "learning_rate": 9.417939255349896e-05, "loss": 0.0658, "step": 2295 }, { "epoch": 0.59, "learning_rate": 9.417272998873619e-05, "loss": 0.0557, "step": 2296 }, { "epoch": 0.59, "learning_rate": 9.41660638489422e-05, "loss": 0.0472, "step": 2297 }, { "epoch": 0.59, "learning_rate": 9.41593941346565e-05, "loss": 0.0595, "step": 2298 }, { "epoch": 0.59, "learning_rate": 9.415272084641888e-05, "loss": 0.0523, "step": 2299 }, { "epoch": 0.59, "learning_rate": 9.414604398476946e-05, "loss": 0.0633, "step": 2300 }, { "epoch": 0.59, "learning_rate": 9.413936355024858e-05, "loss": 0.0641, "step": 2301 }, { "epoch": 0.59, "learning_rate": 9.413267954339693e-05, "loss": 0.0612, "step": 2302 }, { "epoch": 0.59, "learning_rate": 9.412599196475547e-05, "loss": 0.0747, "step": 2303 }, { "epoch": 0.59, "learning_rate": 9.411930081486543e-05, "loss": 0.0761, "step": 2304 }, { "epoch": 0.59, "learning_rate": 9.411260609426836e-05, "loss": 0.0779, "step": 2305 }, { "epoch": 0.6, "learning_rate": 9.410590780350608e-05, "loss": 0.1106, "step": 2306 }, { "epoch": 0.6, "learning_rate": 9.409920594312069e-05, "loss": 0.0912, "step": 2307 }, { "epoch": 0.6, "learning_rate": 9.40925005136546e-05, "loss": 0.0925, "step": 2308 }, { "epoch": 0.6, "learning_rate": 9.408579151565051e-05, "loss": 0.0918, "step": 2309 }, { "epoch": 0.6, "learning_rate": 9.407907894965137e-05, "loss": 0.0631, "step": 2310 }, { "epoch": 0.6, "learning_rate": 9.40723628162005e-05, "loss": 0.0643, "step": 2311 }, { "epoch": 0.6, "learning_rate": 9.406564311584141e-05, "loss": 0.0674, "step": 2312 }, { "epoch": 0.6, "learning_rate": 9.405891984911796e-05, "loss": 0.077, "step": 2313 }, { "epoch": 0.6, "learning_rate": 9.40521930165743e-05, "loss": 0.0536, "step": 2314 }, { "epoch": 0.6, "learning_rate": 9.404546261875483e-05, "loss": 0.0645, "step": 2315 }, { "epoch": 0.6, "learning_rate": 9.403872865620426e-05, "loss": 0.061, "step": 2316 }, { "epoch": 0.6, "learning_rate": 9.40319911294676e-05, "loss": 0.0603, "step": 2317 }, { "epoch": 0.6, "learning_rate": 9.402525003909015e-05, "loss": 0.0693, "step": 2318 }, { "epoch": 0.6, "learning_rate": 9.401850538561746e-05, "loss": 0.0818, "step": 2319 }, { "epoch": 0.6, "learning_rate": 9.401175716959542e-05, "loss": 0.0503, "step": 2320 }, { "epoch": 0.6, "learning_rate": 9.400500539157016e-05, "loss": 0.0623, "step": 2321 }, { "epoch": 0.6, "learning_rate": 9.399825005208813e-05, "loss": 0.0859, "step": 2322 }, { "epoch": 0.6, "learning_rate": 9.399149115169608e-05, "loss": 0.0817, "step": 2323 }, { "epoch": 0.6, "learning_rate": 9.398472869094102e-05, "loss": 0.0795, "step": 2324 }, { "epoch": 0.6, "learning_rate": 9.397796267037023e-05, "loss": 0.0419, "step": 2325 }, { "epoch": 0.6, "learning_rate": 9.397119309053132e-05, "loss": 0.0668, "step": 2326 }, { "epoch": 0.6, "learning_rate": 9.396441995197219e-05, "loss": 0.1011, "step": 2327 }, { "epoch": 0.6, "learning_rate": 9.395764325524097e-05, "loss": 0.0686, "step": 2328 }, { "epoch": 0.6, "learning_rate": 9.395086300088616e-05, "loss": 0.0536, "step": 2329 }, { "epoch": 0.6, "learning_rate": 9.394407918945649e-05, "loss": 0.0696, "step": 2330 }, { "epoch": 0.6, "learning_rate": 9.3937291821501e-05, "loss": 0.0593, "step": 2331 }, { "epoch": 0.6, "learning_rate": 9.393050089756899e-05, "loss": 0.0613, "step": 2332 }, { "epoch": 0.6, "learning_rate": 9.392370641821009e-05, "loss": 0.0493, "step": 2333 }, { "epoch": 0.6, "learning_rate": 9.39169083839742e-05, "loss": 0.0839, "step": 2334 }, { "epoch": 0.6, "learning_rate": 9.391010679541149e-05, "loss": 0.0872, "step": 2335 }, { "epoch": 0.6, "learning_rate": 9.390330165307243e-05, "loss": 0.0578, "step": 2336 }, { "epoch": 0.6, "learning_rate": 9.389649295750782e-05, "loss": 0.0775, "step": 2337 }, { "epoch": 0.6, "learning_rate": 9.388968070926866e-05, "loss": 0.0697, "step": 2338 }, { "epoch": 0.6, "learning_rate": 9.388286490890632e-05, "loss": 0.0624, "step": 2339 }, { "epoch": 0.6, "learning_rate": 9.387604555697239e-05, "loss": 0.0593, "step": 2340 }, { "epoch": 0.6, "learning_rate": 9.38692226540188e-05, "loss": 0.0552, "step": 2341 }, { "epoch": 0.6, "learning_rate": 9.386239620059776e-05, "loss": 0.0575, "step": 2342 }, { "epoch": 0.6, "learning_rate": 9.385556619726175e-05, "loss": 0.0768, "step": 2343 }, { "epoch": 0.6, "learning_rate": 9.384873264456352e-05, "loss": 0.0534, "step": 2344 }, { "epoch": 0.61, "learning_rate": 9.384189554305613e-05, "loss": 0.0637, "step": 2345 }, { "epoch": 0.61, "learning_rate": 9.383505489329297e-05, "loss": 0.0794, "step": 2346 }, { "epoch": 0.61, "learning_rate": 9.382821069582763e-05, "loss": 0.0671, "step": 2347 }, { "epoch": 0.61, "learning_rate": 9.382136295121407e-05, "loss": 0.0659, "step": 2348 }, { "epoch": 0.61, "learning_rate": 9.381451166000646e-05, "loss": 0.0732, "step": 2349 }, { "epoch": 0.61, "learning_rate": 9.38076568227593e-05, "loss": 0.0558, "step": 2350 }, { "epoch": 0.61, "learning_rate": 9.38007984400274e-05, "loss": 0.0654, "step": 2351 }, { "epoch": 0.61, "learning_rate": 9.37939365123658e-05, "loss": 0.051, "step": 2352 }, { "epoch": 0.61, "learning_rate": 9.378707104032987e-05, "loss": 0.0898, "step": 2353 }, { "epoch": 0.61, "learning_rate": 9.378020202447526e-05, "loss": 0.0632, "step": 2354 }, { "epoch": 0.61, "learning_rate": 9.377332946535791e-05, "loss": 0.0545, "step": 2355 }, { "epoch": 0.61, "learning_rate": 9.376645336353401e-05, "loss": 0.0655, "step": 2356 }, { "epoch": 0.61, "learning_rate": 9.375957371956005e-05, "loss": 0.0559, "step": 2357 }, { "epoch": 0.61, "learning_rate": 9.375269053399285e-05, "loss": 0.0565, "step": 2358 }, { "epoch": 0.61, "learning_rate": 9.37458038073895e-05, "loss": 0.0438, "step": 2359 }, { "epoch": 0.61, "learning_rate": 9.373891354030733e-05, "loss": 0.0758, "step": 2360 }, { "epoch": 0.61, "learning_rate": 9.373201973330401e-05, "loss": 0.0612, "step": 2361 }, { "epoch": 0.61, "learning_rate": 9.372512238693747e-05, "loss": 0.0762, "step": 2362 }, { "epoch": 0.61, "learning_rate": 9.371822150176593e-05, "loss": 0.05, "step": 2363 }, { "epoch": 0.61, "learning_rate": 9.371131707834791e-05, "loss": 0.0672, "step": 2364 }, { "epoch": 0.61, "learning_rate": 9.370440911724218e-05, "loss": 0.0542, "step": 2365 }, { "epoch": 0.61, "learning_rate": 9.369749761900784e-05, "loss": 0.0706, "step": 2366 }, { "epoch": 0.61, "learning_rate": 9.369058258420427e-05, "loss": 0.0671, "step": 2367 }, { "epoch": 0.61, "learning_rate": 9.368366401339111e-05, "loss": 0.0693, "step": 2368 }, { "epoch": 0.61, "learning_rate": 9.367674190712829e-05, "loss": 0.09, "step": 2369 }, { "epoch": 0.61, "learning_rate": 9.366981626597608e-05, "loss": 0.055, "step": 2370 }, { "epoch": 0.61, "learning_rate": 9.366288709049493e-05, "loss": 0.0589, "step": 2371 }, { "epoch": 0.61, "learning_rate": 9.365595438124569e-05, "loss": 0.0456, "step": 2372 }, { "epoch": 0.61, "learning_rate": 9.364901813878943e-05, "loss": 0.0488, "step": 2373 }, { "epoch": 0.61, "learning_rate": 9.36420783636875e-05, "loss": 0.0674, "step": 2374 }, { "epoch": 0.61, "learning_rate": 9.36351350565016e-05, "loss": 0.091, "step": 2375 }, { "epoch": 0.61, "learning_rate": 9.362818821779362e-05, "loss": 0.0674, "step": 2376 }, { "epoch": 0.61, "learning_rate": 9.362123784812583e-05, "loss": 0.0465, "step": 2377 }, { "epoch": 0.61, "learning_rate": 9.361428394806073e-05, "loss": 0.0691, "step": 2378 }, { "epoch": 0.61, "learning_rate": 9.360732651816111e-05, "loss": 0.072, "step": 2379 }, { "epoch": 0.61, "learning_rate": 9.360036555899007e-05, "loss": 0.043, "step": 2380 }, { "epoch": 0.61, "learning_rate": 9.359340107111097e-05, "loss": 0.0442, "step": 2381 }, { "epoch": 0.61, "learning_rate": 9.358643305508748e-05, "loss": 0.0526, "step": 2382 }, { "epoch": 0.61, "learning_rate": 9.357946151148354e-05, "loss": 0.0601, "step": 2383 }, { "epoch": 0.62, "learning_rate": 9.357248644086336e-05, "loss": 0.0513, "step": 2384 }, { "epoch": 0.62, "learning_rate": 9.356550784379148e-05, "loss": 0.0848, "step": 2385 }, { "epoch": 0.62, "learning_rate": 9.355852572083269e-05, "loss": 0.0634, "step": 2386 }, { "epoch": 0.62, "learning_rate": 9.355154007255204e-05, "loss": 0.0635, "step": 2387 }, { "epoch": 0.62, "learning_rate": 9.354455089951495e-05, "loss": 0.0715, "step": 2388 }, { "epoch": 0.62, "learning_rate": 9.353755820228707e-05, "loss": 0.067, "step": 2389 }, { "epoch": 0.62, "learning_rate": 9.353056198143431e-05, "loss": 0.0494, "step": 2390 }, { "epoch": 0.62, "learning_rate": 9.35235622375229e-05, "loss": 0.0569, "step": 2391 }, { "epoch": 0.62, "learning_rate": 9.351655897111935e-05, "loss": 0.0747, "step": 2392 }, { "epoch": 0.62, "learning_rate": 9.350955218279049e-05, "loss": 0.0482, "step": 2393 }, { "epoch": 0.62, "learning_rate": 9.350254187310335e-05, "loss": 0.0618, "step": 2394 }, { "epoch": 0.62, "learning_rate": 9.349552804262533e-05, "loss": 0.0678, "step": 2395 }, { "epoch": 0.62, "learning_rate": 9.348851069192408e-05, "loss": 0.0459, "step": 2396 }, { "epoch": 0.62, "learning_rate": 9.348148982156751e-05, "loss": 0.0663, "step": 2397 }, { "epoch": 0.62, "learning_rate": 9.347446543212387e-05, "loss": 0.0578, "step": 2398 }, { "epoch": 0.62, "learning_rate": 9.346743752416164e-05, "loss": 0.0637, "step": 2399 }, { "epoch": 0.62, "learning_rate": 9.346040609824962e-05, "loss": 0.0581, "step": 2400 }, { "epoch": 0.62, "learning_rate": 9.34533711549569e-05, "loss": 0.0892, "step": 2401 }, { "epoch": 0.62, "learning_rate": 9.34463326948528e-05, "loss": 0.0743, "step": 2402 }, { "epoch": 0.62, "learning_rate": 9.343929071850699e-05, "loss": 0.0917, "step": 2403 }, { "epoch": 0.62, "learning_rate": 9.34322452264894e-05, "loss": 0.0581, "step": 2404 }, { "epoch": 0.62, "learning_rate": 9.342519621937025e-05, "loss": 0.0773, "step": 2405 }, { "epoch": 0.62, "learning_rate": 9.341814369772002e-05, "loss": 0.1012, "step": 2406 }, { "epoch": 0.62, "learning_rate": 9.341108766210948e-05, "loss": 0.0505, "step": 2407 }, { "epoch": 0.62, "learning_rate": 9.340402811310973e-05, "loss": 0.0484, "step": 2408 }, { "epoch": 0.62, "learning_rate": 9.339696505129209e-05, "loss": 0.0712, "step": 2409 }, { "epoch": 0.62, "learning_rate": 9.338989847722821e-05, "loss": 0.0525, "step": 2410 }, { "epoch": 0.62, "learning_rate": 9.338282839149002e-05, "loss": 0.0459, "step": 2411 }, { "epoch": 0.62, "learning_rate": 9.337575479464971e-05, "loss": 0.0759, "step": 2412 }, { "epoch": 0.62, "learning_rate": 9.336867768727975e-05, "loss": 0.0534, "step": 2413 }, { "epoch": 0.62, "learning_rate": 9.336159706995294e-05, "loss": 0.043, "step": 2414 }, { "epoch": 0.62, "learning_rate": 9.335451294324234e-05, "loss": 0.0609, "step": 2415 }, { "epoch": 0.62, "learning_rate": 9.334742530772125e-05, "loss": 0.0779, "step": 2416 }, { "epoch": 0.62, "learning_rate": 9.334033416396333e-05, "loss": 0.0777, "step": 2417 }, { "epoch": 0.62, "learning_rate": 9.333323951254247e-05, "loss": 0.0699, "step": 2418 }, { "epoch": 0.62, "learning_rate": 9.332614135403286e-05, "loss": 0.0756, "step": 2419 }, { "epoch": 0.62, "learning_rate": 9.331903968900899e-05, "loss": 0.0654, "step": 2420 }, { "epoch": 0.62, "learning_rate": 9.33119345180456e-05, "loss": 0.0838, "step": 2421 }, { "epoch": 0.63, "learning_rate": 9.330482584171776e-05, "loss": 0.0379, "step": 2422 }, { "epoch": 0.63, "learning_rate": 9.329771366060077e-05, "loss": 0.0678, "step": 2423 }, { "epoch": 0.63, "learning_rate": 9.329059797527025e-05, "loss": 0.0857, "step": 2424 }, { "epoch": 0.63, "learning_rate": 9.32834787863021e-05, "loss": 0.063, "step": 2425 }, { "epoch": 0.63, "learning_rate": 9.327635609427248e-05, "loss": 0.042, "step": 2426 }, { "epoch": 0.63, "learning_rate": 9.326922989975789e-05, "loss": 0.0746, "step": 2427 }, { "epoch": 0.63, "learning_rate": 9.326210020333501e-05, "loss": 0.0683, "step": 2428 }, { "epoch": 0.63, "learning_rate": 9.325496700558091e-05, "loss": 0.0713, "step": 2429 }, { "epoch": 0.63, "learning_rate": 9.324783030707292e-05, "loss": 0.0824, "step": 2430 }, { "epoch": 0.63, "learning_rate": 9.324069010838859e-05, "loss": 0.0443, "step": 2431 }, { "epoch": 0.63, "learning_rate": 9.323354641010582e-05, "loss": 0.0531, "step": 2432 }, { "epoch": 0.63, "learning_rate": 9.322639921280277e-05, "loss": 0.0784, "step": 2433 }, { "epoch": 0.63, "learning_rate": 9.321924851705787e-05, "loss": 0.046, "step": 2434 }, { "epoch": 0.63, "learning_rate": 9.321209432344986e-05, "loss": 0.0514, "step": 2435 }, { "epoch": 0.63, "learning_rate": 9.320493663255777e-05, "loss": 0.0643, "step": 2436 }, { "epoch": 0.63, "learning_rate": 9.319777544496084e-05, "loss": 0.0751, "step": 2437 }, { "epoch": 0.63, "learning_rate": 9.319061076123871e-05, "loss": 0.0809, "step": 2438 }, { "epoch": 0.63, "learning_rate": 9.31834425819712e-05, "loss": 0.0553, "step": 2439 }, { "epoch": 0.63, "learning_rate": 9.317627090773847e-05, "loss": 0.0639, "step": 2440 }, { "epoch": 0.63, "learning_rate": 9.31690957391209e-05, "loss": 0.0713, "step": 2441 }, { "epoch": 0.63, "learning_rate": 9.316191707669926e-05, "loss": 0.0549, "step": 2442 }, { "epoch": 0.63, "learning_rate": 9.315473492105451e-05, "loss": 0.0663, "step": 2443 }, { "epoch": 0.63, "learning_rate": 9.314754927276792e-05, "loss": 0.0686, "step": 2444 }, { "epoch": 0.63, "learning_rate": 9.314036013242107e-05, "loss": 0.0494, "step": 2445 }, { "epoch": 0.63, "learning_rate": 9.313316750059577e-05, "loss": 0.0432, "step": 2446 }, { "epoch": 0.63, "learning_rate": 9.312597137787416e-05, "loss": 0.0667, "step": 2447 }, { "epoch": 0.63, "learning_rate": 9.311877176483863e-05, "loss": 0.0677, "step": 2448 }, { "epoch": 0.63, "learning_rate": 9.311156866207187e-05, "loss": 0.0601, "step": 2449 }, { "epoch": 0.63, "learning_rate": 9.310436207015685e-05, "loss": 0.0697, "step": 2450 }, { "epoch": 0.63, "learning_rate": 9.309715198967683e-05, "loss": 0.0708, "step": 2451 }, { "epoch": 0.63, "learning_rate": 9.308993842121534e-05, "loss": 0.105, "step": 2452 }, { "epoch": 0.63, "learning_rate": 9.308272136535619e-05, "loss": 0.0579, "step": 2453 }, { "epoch": 0.63, "learning_rate": 9.307550082268347e-05, "loss": 0.0583, "step": 2454 }, { "epoch": 0.63, "learning_rate": 9.306827679378158e-05, "loss": 0.0413, "step": 2455 }, { "epoch": 0.63, "learning_rate": 9.306104927923516e-05, "loss": 0.068, "step": 2456 }, { "epoch": 0.63, "learning_rate": 9.305381827962917e-05, "loss": 0.0724, "step": 2457 }, { "epoch": 0.63, "learning_rate": 9.304658379554885e-05, "loss": 0.0745, "step": 2458 }, { "epoch": 0.63, "learning_rate": 9.303934582757967e-05, "loss": 0.054, "step": 2459 }, { "epoch": 0.63, "learning_rate": 9.303210437630746e-05, "loss": 0.0694, "step": 2460 }, { "epoch": 0.64, "learning_rate": 9.302485944231827e-05, "loss": 0.0621, "step": 2461 }, { "epoch": 0.64, "learning_rate": 9.301761102619846e-05, "loss": 0.0615, "step": 2462 }, { "epoch": 0.64, "learning_rate": 9.301035912853464e-05, "loss": 0.0623, "step": 2463 }, { "epoch": 0.64, "learning_rate": 9.300310374991379e-05, "loss": 0.0578, "step": 2464 }, { "epoch": 0.64, "learning_rate": 9.299584489092304e-05, "loss": 0.0786, "step": 2465 }, { "epoch": 0.64, "learning_rate": 9.298858255214992e-05, "loss": 0.0554, "step": 2466 }, { "epoch": 0.64, "learning_rate": 9.298131673418217e-05, "loss": 0.0495, "step": 2467 }, { "epoch": 0.64, "learning_rate": 9.297404743760785e-05, "loss": 0.0478, "step": 2468 }, { "epoch": 0.64, "learning_rate": 9.296677466301527e-05, "loss": 0.0417, "step": 2469 }, { "epoch": 0.64, "learning_rate": 9.295949841099304e-05, "loss": 0.0688, "step": 2470 }, { "epoch": 0.64, "learning_rate": 9.295221868213005e-05, "loss": 0.0838, "step": 2471 }, { "epoch": 0.64, "learning_rate": 9.294493547701545e-05, "loss": 0.0664, "step": 2472 }, { "epoch": 0.64, "learning_rate": 9.293764879623876e-05, "loss": 0.0637, "step": 2473 }, { "epoch": 0.64, "learning_rate": 9.293035864038964e-05, "loss": 0.0646, "step": 2474 }, { "epoch": 0.64, "learning_rate": 9.292306501005815e-05, "loss": 0.0526, "step": 2475 }, { "epoch": 0.64, "learning_rate": 9.291576790583454e-05, "loss": 0.0532, "step": 2476 }, { "epoch": 0.64, "learning_rate": 9.290846732830942e-05, "loss": 0.0608, "step": 2477 }, { "epoch": 0.64, "learning_rate": 9.290116327807365e-05, "loss": 0.0653, "step": 2478 }, { "epoch": 0.64, "learning_rate": 9.289385575571836e-05, "loss": 0.068, "step": 2479 }, { "epoch": 0.64, "learning_rate": 9.288654476183496e-05, "loss": 0.0524, "step": 2480 }, { "epoch": 0.64, "learning_rate": 9.287923029701517e-05, "loss": 0.0851, "step": 2481 }, { "epoch": 0.64, "learning_rate": 9.287191236185096e-05, "loss": 0.0679, "step": 2482 }, { "epoch": 0.64, "learning_rate": 9.286459095693459e-05, "loss": 0.0768, "step": 2483 }, { "epoch": 0.64, "learning_rate": 9.285726608285861e-05, "loss": 0.0709, "step": 2484 }, { "epoch": 0.64, "learning_rate": 9.284993774021582e-05, "loss": 0.0605, "step": 2485 }, { "epoch": 0.64, "learning_rate": 9.284260592959937e-05, "loss": 0.0472, "step": 2486 }, { "epoch": 0.64, "learning_rate": 9.28352706516026e-05, "loss": 0.0685, "step": 2487 }, { "epoch": 0.64, "learning_rate": 9.282793190681922e-05, "loss": 0.063, "step": 2488 }, { "epoch": 0.64, "learning_rate": 9.282058969584314e-05, "loss": 0.063, "step": 2489 }, { "epoch": 0.64, "learning_rate": 9.28132440192686e-05, "loss": 0.0518, "step": 2490 }, { "epoch": 0.64, "learning_rate": 9.28058948776901e-05, "loss": 0.0552, "step": 2491 }, { "epoch": 0.64, "learning_rate": 9.279854227170245e-05, "loss": 0.0585, "step": 2492 }, { "epoch": 0.64, "learning_rate": 9.27911862019007e-05, "loss": 0.0555, "step": 2493 }, { "epoch": 0.64, "learning_rate": 9.27838266688802e-05, "loss": 0.0588, "step": 2494 }, { "epoch": 0.64, "learning_rate": 9.277646367323659e-05, "loss": 0.0441, "step": 2495 }, { "epoch": 0.64, "learning_rate": 9.276909721556575e-05, "loss": 0.0586, "step": 2496 }, { "epoch": 0.64, "learning_rate": 9.276172729646391e-05, "loss": 0.0723, "step": 2497 }, { "epoch": 0.64, "learning_rate": 9.275435391652751e-05, "loss": 0.07, "step": 2498 }, { "epoch": 0.64, "learning_rate": 9.274697707635332e-05, "loss": 0.0512, "step": 2499 }, { "epoch": 0.65, "learning_rate": 9.273959677653836e-05, "loss": 0.0687, "step": 2500 }, { "epoch": 0.65, "learning_rate": 9.273221301767993e-05, "loss": 0.0499, "step": 2501 }, { "epoch": 0.65, "learning_rate": 9.272482580037563e-05, "loss": 0.0697, "step": 2502 }, { "epoch": 0.65, "learning_rate": 9.271743512522336e-05, "loss": 0.0599, "step": 2503 }, { "epoch": 0.65, "learning_rate": 9.27100409928212e-05, "loss": 0.0765, "step": 2504 }, { "epoch": 0.65, "learning_rate": 9.270264340376762e-05, "loss": 0.089, "step": 2505 }, { "epoch": 0.65, "learning_rate": 9.269524235866134e-05, "loss": 0.0635, "step": 2506 }, { "epoch": 0.65, "learning_rate": 9.268783785810133e-05, "loss": 0.048, "step": 2507 }, { "epoch": 0.65, "learning_rate": 9.268042990268685e-05, "loss": 0.0635, "step": 2508 }, { "epoch": 0.65, "learning_rate": 9.267301849301749e-05, "loss": 0.082, "step": 2509 }, { "epoch": 0.65, "learning_rate": 9.266560362969303e-05, "loss": 0.0707, "step": 2510 }, { "epoch": 0.65, "learning_rate": 9.265818531331361e-05, "loss": 0.0768, "step": 2511 }, { "epoch": 0.65, "learning_rate": 9.265076354447958e-05, "loss": 0.0791, "step": 2512 }, { "epoch": 0.65, "learning_rate": 9.264333832379166e-05, "loss": 0.0473, "step": 2513 }, { "epoch": 0.65, "learning_rate": 9.263590965185074e-05, "loss": 0.041, "step": 2514 }, { "epoch": 0.65, "learning_rate": 9.262847752925807e-05, "loss": 0.0712, "step": 2515 }, { "epoch": 0.65, "learning_rate": 9.262104195661518e-05, "loss": 0.0776, "step": 2516 }, { "epoch": 0.65, "learning_rate": 9.261360293452379e-05, "loss": 0.0703, "step": 2517 }, { "epoch": 0.65, "learning_rate": 9.260616046358601e-05, "loss": 0.0424, "step": 2518 }, { "epoch": 0.65, "learning_rate": 9.259871454440419e-05, "loss": 0.0664, "step": 2519 }, { "epoch": 0.65, "learning_rate": 9.25912651775809e-05, "loss": 0.0871, "step": 2520 }, { "epoch": 0.65, "learning_rate": 9.258381236371909e-05, "loss": 0.0748, "step": 2521 }, { "epoch": 0.65, "learning_rate": 9.25763561034219e-05, "loss": 0.0519, "step": 2522 }, { "epoch": 0.65, "learning_rate": 9.256889639729283e-05, "loss": 0.0516, "step": 2523 }, { "epoch": 0.65, "learning_rate": 9.256143324593558e-05, "loss": 0.0649, "step": 2524 }, { "epoch": 0.65, "learning_rate": 9.255396664995417e-05, "loss": 0.0418, "step": 2525 }, { "epoch": 0.65, "learning_rate": 9.25464966099529e-05, "loss": 0.09, "step": 2526 }, { "epoch": 0.65, "learning_rate": 9.253902312653635e-05, "loss": 0.0492, "step": 2527 }, { "epoch": 0.65, "learning_rate": 9.253154620030937e-05, "loss": 0.0603, "step": 2528 }, { "epoch": 0.65, "learning_rate": 9.252406583187708e-05, "loss": 0.0581, "step": 2529 }, { "epoch": 0.65, "learning_rate": 9.25165820218449e-05, "loss": 0.0564, "step": 2530 }, { "epoch": 0.65, "learning_rate": 9.25090947708185e-05, "loss": 0.0668, "step": 2531 }, { "epoch": 0.65, "learning_rate": 9.250160407940386e-05, "loss": 0.0525, "step": 2532 }, { "epoch": 0.65, "learning_rate": 9.249410994820724e-05, "loss": 0.0659, "step": 2533 }, { "epoch": 0.65, "learning_rate": 9.248661237783511e-05, "loss": 0.0591, "step": 2534 }, { "epoch": 0.65, "learning_rate": 9.247911136889434e-05, "loss": 0.0589, "step": 2535 }, { "epoch": 0.65, "learning_rate": 9.247160692199196e-05, "loss": 0.0584, "step": 2536 }, { "epoch": 0.65, "learning_rate": 9.246409903773532e-05, "loss": 0.0545, "step": 2537 }, { "epoch": 0.65, "learning_rate": 9.245658771673209e-05, "loss": 0.0544, "step": 2538 }, { "epoch": 0.66, "learning_rate": 9.244907295959016e-05, "loss": 0.0561, "step": 2539 }, { "epoch": 0.66, "learning_rate": 9.244155476691775e-05, "loss": 0.0704, "step": 2540 }, { "epoch": 0.66, "learning_rate": 9.24340331393233e-05, "loss": 0.0479, "step": 2541 }, { "epoch": 0.66, "learning_rate": 9.242650807741557e-05, "loss": 0.0475, "step": 2542 }, { "epoch": 0.66, "learning_rate": 9.24189795818036e-05, "loss": 0.0675, "step": 2543 }, { "epoch": 0.66, "learning_rate": 9.241144765309665e-05, "loss": 0.0621, "step": 2544 }, { "epoch": 0.66, "learning_rate": 9.240391229190435e-05, "loss": 0.0585, "step": 2545 }, { "epoch": 0.66, "learning_rate": 9.239637349883653e-05, "loss": 0.0449, "step": 2546 }, { "epoch": 0.66, "learning_rate": 9.238883127450334e-05, "loss": 0.0646, "step": 2547 }, { "epoch": 0.66, "learning_rate": 9.23812856195152e-05, "loss": 0.0482, "step": 2548 }, { "epoch": 0.66, "learning_rate": 9.237373653448278e-05, "loss": 0.0817, "step": 2549 }, { "epoch": 0.66, "learning_rate": 9.236618402001707e-05, "loss": 0.0646, "step": 2550 }, { "epoch": 0.66, "learning_rate": 9.235862807672931e-05, "loss": 0.0352, "step": 2551 }, { "epoch": 0.66, "learning_rate": 9.235106870523102e-05, "loss": 0.0543, "step": 2552 }, { "epoch": 0.66, "learning_rate": 9.234350590613402e-05, "loss": 0.0552, "step": 2553 }, { "epoch": 0.66, "learning_rate": 9.233593968005037e-05, "loss": 0.0762, "step": 2554 }, { "epoch": 0.66, "learning_rate": 9.232837002759244e-05, "loss": 0.0664, "step": 2555 }, { "epoch": 0.66, "learning_rate": 9.232079694937287e-05, "loss": 0.0556, "step": 2556 }, { "epoch": 0.66, "learning_rate": 9.231322044600454e-05, "loss": 0.055, "step": 2557 }, { "epoch": 0.66, "learning_rate": 9.23056405181007e-05, "loss": 0.0659, "step": 2558 }, { "epoch": 0.66, "learning_rate": 9.229805716627474e-05, "loss": 0.0704, "step": 2559 }, { "epoch": 0.66, "learning_rate": 9.229047039114045e-05, "loss": 0.0554, "step": 2560 }, { "epoch": 0.66, "learning_rate": 9.228288019331185e-05, "loss": 0.0768, "step": 2561 }, { "epoch": 0.66, "learning_rate": 9.227528657340323e-05, "loss": 0.0587, "step": 2562 }, { "epoch": 0.66, "learning_rate": 9.226768953202916e-05, "loss": 0.0384, "step": 2563 }, { "epoch": 0.66, "learning_rate": 9.22600890698045e-05, "loss": 0.0474, "step": 2564 }, { "epoch": 0.66, "learning_rate": 9.225248518734437e-05, "loss": 0.0683, "step": 2565 }, { "epoch": 0.66, "learning_rate": 9.224487788526416e-05, "loss": 0.0693, "step": 2566 }, { "epoch": 0.66, "learning_rate": 9.223726716417959e-05, "loss": 0.0605, "step": 2567 }, { "epoch": 0.66, "learning_rate": 9.22296530247066e-05, "loss": 0.07, "step": 2568 }, { "epoch": 0.66, "learning_rate": 9.222203546746142e-05, "loss": 0.0684, "step": 2569 }, { "epoch": 0.66, "learning_rate": 9.221441449306057e-05, "loss": 0.0858, "step": 2570 }, { "epoch": 0.66, "learning_rate": 9.220679010212083e-05, "loss": 0.0487, "step": 2571 }, { "epoch": 0.66, "learning_rate": 9.219916229525927e-05, "loss": 0.0809, "step": 2572 }, { "epoch": 0.66, "learning_rate": 9.219153107309322e-05, "loss": 0.0669, "step": 2573 }, { "epoch": 0.66, "learning_rate": 9.21838964362403e-05, "loss": 0.065, "step": 2574 }, { "epoch": 0.66, "learning_rate": 9.217625838531842e-05, "loss": 0.056, "step": 2575 }, { "epoch": 0.66, "learning_rate": 9.216861692094575e-05, "loss": 0.0448, "step": 2576 }, { "epoch": 0.67, "learning_rate": 9.216097204374072e-05, "loss": 0.0773, "step": 2577 }, { "epoch": 0.67, "learning_rate": 9.215332375432207e-05, "loss": 0.0779, "step": 2578 }, { "epoch": 0.67, "learning_rate": 9.214567205330878e-05, "loss": 0.0438, "step": 2579 }, { "epoch": 0.67, "learning_rate": 9.213801694132012e-05, "loss": 0.0618, "step": 2580 }, { "epoch": 0.67, "learning_rate": 9.213035841897568e-05, "loss": 0.0547, "step": 2581 }, { "epoch": 0.67, "learning_rate": 9.212269648689525e-05, "loss": 0.0726, "step": 2582 }, { "epoch": 0.67, "learning_rate": 9.211503114569894e-05, "loss": 0.0579, "step": 2583 }, { "epoch": 0.67, "learning_rate": 9.210736239600717e-05, "loss": 0.0522, "step": 2584 }, { "epoch": 0.67, "learning_rate": 9.209969023844051e-05, "loss": 0.0631, "step": 2585 }, { "epoch": 0.67, "learning_rate": 9.209201467361998e-05, "loss": 0.0753, "step": 2586 }, { "epoch": 0.67, "learning_rate": 9.208433570216673e-05, "loss": 0.0857, "step": 2587 }, { "epoch": 0.67, "learning_rate": 9.207665332470227e-05, "loss": 0.0643, "step": 2588 }, { "epoch": 0.67, "learning_rate": 9.206896754184833e-05, "loss": 0.0647, "step": 2589 }, { "epoch": 0.67, "learning_rate": 9.206127835422698e-05, "loss": 0.0577, "step": 2590 }, { "epoch": 0.67, "learning_rate": 9.205358576246049e-05, "loss": 0.0491, "step": 2591 }, { "epoch": 0.67, "learning_rate": 9.204588976717147e-05, "loss": 0.0667, "step": 2592 }, { "epoch": 0.67, "learning_rate": 9.203819036898277e-05, "loss": 0.0545, "step": 2593 }, { "epoch": 0.67, "learning_rate": 9.203048756851754e-05, "loss": 0.0625, "step": 2594 }, { "epoch": 0.67, "learning_rate": 9.202278136639916e-05, "loss": 0.0597, "step": 2595 }, { "epoch": 0.67, "learning_rate": 9.201507176325137e-05, "loss": 0.0625, "step": 2596 }, { "epoch": 0.67, "learning_rate": 9.200735875969808e-05, "loss": 0.0596, "step": 2597 }, { "epoch": 0.67, "learning_rate": 9.199964235636353e-05, "loss": 0.0588, "step": 2598 }, { "epoch": 0.67, "learning_rate": 9.199192255387225e-05, "loss": 0.0812, "step": 2599 }, { "epoch": 0.67, "learning_rate": 9.198419935284903e-05, "loss": 0.0706, "step": 2600 }, { "epoch": 0.67, "learning_rate": 9.197647275391891e-05, "loss": 0.0753, "step": 2601 }, { "epoch": 0.67, "learning_rate": 9.196874275770727e-05, "loss": 0.0639, "step": 2602 }, { "epoch": 0.67, "learning_rate": 9.196100936483967e-05, "loss": 0.0731, "step": 2603 }, { "epoch": 0.67, "learning_rate": 9.195327257594202e-05, "loss": 0.0878, "step": 2604 }, { "epoch": 0.67, "learning_rate": 9.194553239164047e-05, "loss": 0.0742, "step": 2605 }, { "epoch": 0.67, "learning_rate": 9.193778881256149e-05, "loss": 0.0821, "step": 2606 }, { "epoch": 0.67, "learning_rate": 9.193004183933175e-05, "loss": 0.0776, "step": 2607 }, { "epoch": 0.67, "learning_rate": 9.192229147257826e-05, "loss": 0.0734, "step": 2608 }, { "epoch": 0.67, "learning_rate": 9.191453771292828e-05, "loss": 0.0714, "step": 2609 }, { "epoch": 0.67, "learning_rate": 9.190678056100933e-05, "loss": 0.0807, "step": 2610 }, { "epoch": 0.67, "learning_rate": 9.189902001744921e-05, "loss": 0.0731, "step": 2611 }, { "epoch": 0.67, "learning_rate": 9.189125608287606e-05, "loss": 0.0902, "step": 2612 }, { "epoch": 0.67, "learning_rate": 9.188348875791817e-05, "loss": 0.0689, "step": 2613 }, { "epoch": 0.67, "learning_rate": 9.187571804320421e-05, "loss": 0.0821, "step": 2614 }, { "epoch": 0.67, "learning_rate": 9.186794393936308e-05, "loss": 0.0637, "step": 2615 }, { "epoch": 0.68, "learning_rate": 9.186016644702397e-05, "loss": 0.0738, "step": 2616 }, { "epoch": 0.68, "learning_rate": 9.185238556681632e-05, "loss": 0.0491, "step": 2617 }, { "epoch": 0.68, "learning_rate": 9.184460129936987e-05, "loss": 0.061, "step": 2618 }, { "epoch": 0.68, "learning_rate": 9.183681364531462e-05, "loss": 0.035, "step": 2619 }, { "epoch": 0.68, "learning_rate": 9.182902260528085e-05, "loss": 0.0659, "step": 2620 }, { "epoch": 0.68, "learning_rate": 9.182122817989913e-05, "loss": 0.0659, "step": 2621 }, { "epoch": 0.68, "learning_rate": 9.181343036980026e-05, "loss": 0.0639, "step": 2622 }, { "epoch": 0.68, "learning_rate": 9.180562917561533e-05, "loss": 0.0693, "step": 2623 }, { "epoch": 0.68, "learning_rate": 9.179782459797575e-05, "loss": 0.0528, "step": 2624 }, { "epoch": 0.68, "learning_rate": 9.179001663751314e-05, "loss": 0.0609, "step": 2625 }, { "epoch": 0.68, "learning_rate": 9.178220529485944e-05, "loss": 0.0566, "step": 2626 }, { "epoch": 0.68, "learning_rate": 9.177439057064683e-05, "loss": 0.034, "step": 2627 }, { "epoch": 0.68, "learning_rate": 9.176657246550778e-05, "loss": 0.0794, "step": 2628 }, { "epoch": 0.68, "learning_rate": 9.175875098007505e-05, "loss": 0.059, "step": 2629 }, { "epoch": 0.68, "learning_rate": 9.175092611498164e-05, "loss": 0.0843, "step": 2630 }, { "epoch": 0.68, "learning_rate": 9.174309787086085e-05, "loss": 0.0859, "step": 2631 }, { "epoch": 0.68, "learning_rate": 9.173526624834623e-05, "loss": 0.0855, "step": 2632 }, { "epoch": 0.68, "learning_rate": 9.172743124807163e-05, "loss": 0.0569, "step": 2633 }, { "epoch": 0.68, "learning_rate": 9.171959287067116e-05, "loss": 0.0686, "step": 2634 }, { "epoch": 0.68, "learning_rate": 9.171175111677918e-05, "loss": 0.0702, "step": 2635 }, { "epoch": 0.68, "learning_rate": 9.170390598703038e-05, "loss": 0.0637, "step": 2636 }, { "epoch": 0.68, "learning_rate": 9.169605748205968e-05, "loss": 0.0626, "step": 2637 }, { "epoch": 0.68, "learning_rate": 9.168820560250226e-05, "loss": 0.0704, "step": 2638 }, { "epoch": 0.68, "learning_rate": 9.168035034899362e-05, "loss": 0.0738, "step": 2639 }, { "epoch": 0.68, "learning_rate": 9.16724917221695e-05, "loss": 0.0585, "step": 2640 }, { "epoch": 0.68, "learning_rate": 9.166462972266593e-05, "loss": 0.0678, "step": 2641 }, { "epoch": 0.68, "learning_rate": 9.16567643511192e-05, "loss": 0.0601, "step": 2642 }, { "epoch": 0.68, "learning_rate": 9.164889560816588e-05, "loss": 0.0776, "step": 2643 }, { "epoch": 0.68, "learning_rate": 9.16410234944428e-05, "loss": 0.078, "step": 2644 }, { "epoch": 0.68, "learning_rate": 9.163314801058709e-05, "loss": 0.0829, "step": 2645 }, { "epoch": 0.68, "learning_rate": 9.162526915723612e-05, "loss": 0.0585, "step": 2646 }, { "epoch": 0.68, "learning_rate": 9.161738693502757e-05, "loss": 0.0501, "step": 2647 }, { "epoch": 0.68, "learning_rate": 9.160950134459936e-05, "loss": 0.0588, "step": 2648 }, { "epoch": 0.68, "learning_rate": 9.160161238658969e-05, "loss": 0.0747, "step": 2649 }, { "epoch": 0.68, "learning_rate": 9.159372006163704e-05, "loss": 0.0547, "step": 2650 }, { "epoch": 0.68, "learning_rate": 9.158582437038017e-05, "loss": 0.051, "step": 2651 }, { "epoch": 0.68, "learning_rate": 9.157792531345808e-05, "loss": 0.0568, "step": 2652 }, { "epoch": 0.68, "learning_rate": 9.157002289151008e-05, "loss": 0.0518, "step": 2653 }, { "epoch": 0.68, "learning_rate": 9.156211710517572e-05, "loss": 0.047, "step": 2654 }, { "epoch": 0.69, "learning_rate": 9.155420795509489e-05, "loss": 0.0631, "step": 2655 }, { "epoch": 0.69, "learning_rate": 9.154629544190764e-05, "loss": 0.0825, "step": 2656 }, { "epoch": 0.69, "learning_rate": 9.153837956625437e-05, "loss": 0.0536, "step": 2657 }, { "epoch": 0.69, "learning_rate": 9.153046032877576e-05, "loss": 0.0723, "step": 2658 }, { "epoch": 0.69, "learning_rate": 9.152253773011271e-05, "loss": 0.0666, "step": 2659 }, { "epoch": 0.69, "learning_rate": 9.151461177090643e-05, "loss": 0.0895, "step": 2660 }, { "epoch": 0.69, "learning_rate": 9.15066824517984e-05, "loss": 0.0507, "step": 2661 }, { "epoch": 0.69, "learning_rate": 9.149874977343036e-05, "loss": 0.0429, "step": 2662 }, { "epoch": 0.69, "learning_rate": 9.149081373644431e-05, "loss": 0.0611, "step": 2663 }, { "epoch": 0.69, "learning_rate": 9.148287434148253e-05, "loss": 0.0775, "step": 2664 }, { "epoch": 0.69, "learning_rate": 9.147493158918763e-05, "loss": 0.0466, "step": 2665 }, { "epoch": 0.69, "learning_rate": 9.146698548020239e-05, "loss": 0.0508, "step": 2666 }, { "epoch": 0.69, "learning_rate": 9.145903601516992e-05, "loss": 0.0724, "step": 2667 }, { "epoch": 0.69, "learning_rate": 9.145108319473363e-05, "loss": 0.0672, "step": 2668 }, { "epoch": 0.69, "learning_rate": 9.144312701953713e-05, "loss": 0.0513, "step": 2669 }, { "epoch": 0.69, "learning_rate": 9.143516749022433e-05, "loss": 0.0726, "step": 2670 }, { "epoch": 0.69, "learning_rate": 9.142720460743945e-05, "loss": 0.0628, "step": 2671 }, { "epoch": 0.69, "learning_rate": 9.141923837182693e-05, "loss": 0.0665, "step": 2672 }, { "epoch": 0.69, "learning_rate": 9.141126878403153e-05, "loss": 0.0548, "step": 2673 }, { "epoch": 0.69, "learning_rate": 9.14032958446982e-05, "loss": 0.0595, "step": 2674 }, { "epoch": 0.69, "learning_rate": 9.139531955447225e-05, "loss": 0.0656, "step": 2675 }, { "epoch": 0.69, "learning_rate": 9.138733991399922e-05, "loss": 0.0448, "step": 2676 }, { "epoch": 0.69, "learning_rate": 9.137935692392493e-05, "loss": 0.0814, "step": 2677 }, { "epoch": 0.69, "learning_rate": 9.137137058489544e-05, "loss": 0.0762, "step": 2678 }, { "epoch": 0.69, "learning_rate": 9.136338089755713e-05, "loss": 0.0736, "step": 2679 }, { "epoch": 0.69, "learning_rate": 9.135538786255666e-05, "loss": 0.0699, "step": 2680 }, { "epoch": 0.69, "learning_rate": 9.134739148054087e-05, "loss": 0.0554, "step": 2681 }, { "epoch": 0.69, "learning_rate": 9.133939175215697e-05, "loss": 0.0657, "step": 2682 }, { "epoch": 0.69, "learning_rate": 9.13313886780524e-05, "loss": 0.0672, "step": 2683 }, { "epoch": 0.69, "learning_rate": 9.132338225887486e-05, "loss": 0.0433, "step": 2684 }, { "epoch": 0.69, "learning_rate": 9.131537249527233e-05, "loss": 0.0617, "step": 2685 }, { "epoch": 0.69, "learning_rate": 9.130735938789307e-05, "loss": 0.0534, "step": 2686 }, { "epoch": 0.69, "learning_rate": 9.129934293738562e-05, "loss": 0.0634, "step": 2687 }, { "epoch": 0.69, "learning_rate": 9.129132314439876e-05, "loss": 0.0621, "step": 2688 }, { "epoch": 0.69, "learning_rate": 9.128330000958156e-05, "loss": 0.0492, "step": 2689 }, { "epoch": 0.69, "learning_rate": 9.127527353358337e-05, "loss": 0.0618, "step": 2690 }, { "epoch": 0.69, "learning_rate": 9.126724371705375e-05, "loss": 0.0737, "step": 2691 }, { "epoch": 0.69, "learning_rate": 9.125921056064261e-05, "loss": 0.067, "step": 2692 }, { "epoch": 0.69, "learning_rate": 9.125117406500011e-05, "loss": 0.0507, "step": 2693 }, { "epoch": 0.7, "learning_rate": 9.124313423077665e-05, "loss": 0.0712, "step": 2694 }, { "epoch": 0.7, "learning_rate": 9.123509105862293e-05, "loss": 0.0817, "step": 2695 }, { "epoch": 0.7, "learning_rate": 9.122704454918988e-05, "loss": 0.0665, "step": 2696 }, { "epoch": 0.7, "learning_rate": 9.121899470312876e-05, "loss": 0.0862, "step": 2697 }, { "epoch": 0.7, "learning_rate": 9.121094152109106e-05, "loss": 0.0448, "step": 2698 }, { "epoch": 0.7, "learning_rate": 9.120288500372853e-05, "loss": 0.0751, "step": 2699 }, { "epoch": 0.7, "learning_rate": 9.119482515169324e-05, "loss": 0.0735, "step": 2700 }, { "epoch": 0.7, "learning_rate": 9.118676196563747e-05, "loss": 0.0724, "step": 2701 }, { "epoch": 0.7, "learning_rate": 9.11786954462138e-05, "loss": 0.0626, "step": 2702 }, { "epoch": 0.7, "learning_rate": 9.11706255940751e-05, "loss": 0.0833, "step": 2703 }, { "epoch": 0.7, "learning_rate": 9.116255240987445e-05, "loss": 0.0629, "step": 2704 }, { "epoch": 0.7, "learning_rate": 9.115447589426528e-05, "loss": 0.0698, "step": 2705 }, { "epoch": 0.7, "learning_rate": 9.114639604790123e-05, "loss": 0.0683, "step": 2706 }, { "epoch": 0.7, "learning_rate": 9.113831287143619e-05, "loss": 0.0654, "step": 2707 }, { "epoch": 0.7, "learning_rate": 9.113022636552442e-05, "loss": 0.0716, "step": 2708 }, { "epoch": 0.7, "learning_rate": 9.112213653082034e-05, "loss": 0.0732, "step": 2709 }, { "epoch": 0.7, "learning_rate": 9.11140433679787e-05, "loss": 0.0574, "step": 2710 }, { "epoch": 0.7, "learning_rate": 9.11059468776545e-05, "loss": 0.0659, "step": 2711 }, { "epoch": 0.7, "learning_rate": 9.109784706050301e-05, "loss": 0.0827, "step": 2712 }, { "epoch": 0.7, "learning_rate": 9.108974391717979e-05, "loss": 0.0667, "step": 2713 }, { "epoch": 0.7, "learning_rate": 9.108163744834063e-05, "loss": 0.044, "step": 2714 }, { "epoch": 0.7, "learning_rate": 9.10735276546416e-05, "loss": 0.0453, "step": 2715 }, { "epoch": 0.7, "learning_rate": 9.10654145367391e-05, "loss": 0.0589, "step": 2716 }, { "epoch": 0.7, "learning_rate": 9.105729809528971e-05, "loss": 0.0663, "step": 2717 }, { "epoch": 0.7, "learning_rate": 9.104917833095032e-05, "loss": 0.0458, "step": 2718 }, { "epoch": 0.7, "learning_rate": 9.10410552443781e-05, "loss": 0.0926, "step": 2719 }, { "epoch": 0.7, "learning_rate": 9.103292883623046e-05, "loss": 0.0694, "step": 2720 }, { "epoch": 0.7, "learning_rate": 9.10247991071651e-05, "loss": 0.0536, "step": 2721 }, { "epoch": 0.7, "learning_rate": 9.101666605784e-05, "loss": 0.0549, "step": 2722 }, { "epoch": 0.7, "learning_rate": 9.100852968891337e-05, "loss": 0.0557, "step": 2723 }, { "epoch": 0.7, "learning_rate": 9.100039000104372e-05, "loss": 0.0557, "step": 2724 }, { "epoch": 0.7, "learning_rate": 9.099224699488982e-05, "loss": 0.0489, "step": 2725 }, { "epoch": 0.7, "learning_rate": 9.09841006711107e-05, "loss": 0.0594, "step": 2726 }, { "epoch": 0.7, "learning_rate": 9.097595103036567e-05, "loss": 0.0755, "step": 2727 }, { "epoch": 0.7, "learning_rate": 9.096779807331431e-05, "loss": 0.0519, "step": 2728 }, { "epoch": 0.7, "learning_rate": 9.095964180061647e-05, "loss": 0.0772, "step": 2729 }, { "epoch": 0.7, "learning_rate": 9.095148221293225e-05, "loss": 0.0469, "step": 2730 }, { "epoch": 0.7, "learning_rate": 9.094331931092201e-05, "loss": 0.0676, "step": 2731 }, { "epoch": 0.71, "learning_rate": 9.093515309524645e-05, "loss": 0.0588, "step": 2732 }, { "epoch": 0.71, "learning_rate": 9.092698356656644e-05, "loss": 0.0701, "step": 2733 }, { "epoch": 0.71, "learning_rate": 9.091881072554319e-05, "loss": 0.0802, "step": 2734 }, { "epoch": 0.71, "learning_rate": 9.091063457283813e-05, "loss": 0.0482, "step": 2735 }, { "epoch": 0.71, "learning_rate": 9.090245510911301e-05, "loss": 0.0544, "step": 2736 }, { "epoch": 0.71, "learning_rate": 9.08942723350298e-05, "loss": 0.075, "step": 2737 }, { "epoch": 0.71, "learning_rate": 9.088608625125074e-05, "loss": 0.0698, "step": 2738 }, { "epoch": 0.71, "learning_rate": 9.087789685843839e-05, "loss": 0.0519, "step": 2739 }, { "epoch": 0.71, "learning_rate": 9.086970415725552e-05, "loss": 0.076, "step": 2740 }, { "epoch": 0.71, "learning_rate": 9.086150814836519e-05, "loss": 0.0509, "step": 2741 }, { "epoch": 0.71, "learning_rate": 9.085330883243074e-05, "loss": 0.0451, "step": 2742 }, { "epoch": 0.71, "learning_rate": 9.084510621011575e-05, "loss": 0.0704, "step": 2743 }, { "epoch": 0.71, "learning_rate": 9.08369002820841e-05, "loss": 0.0481, "step": 2744 }, { "epoch": 0.71, "learning_rate": 9.08286910489999e-05, "loss": 0.0637, "step": 2745 }, { "epoch": 0.71, "learning_rate": 9.082047851152756e-05, "loss": 0.0631, "step": 2746 }, { "epoch": 0.71, "learning_rate": 9.081226267033176e-05, "loss": 0.0621, "step": 2747 }, { "epoch": 0.71, "learning_rate": 9.080404352607739e-05, "loss": 0.0486, "step": 2748 }, { "epoch": 0.71, "learning_rate": 9.07958210794297e-05, "loss": 0.0738, "step": 2749 }, { "epoch": 0.71, "learning_rate": 9.078759533105412e-05, "loss": 0.0443, "step": 2750 }, { "epoch": 0.71, "learning_rate": 9.077936628161639e-05, "loss": 0.0637, "step": 2751 }, { "epoch": 0.71, "learning_rate": 9.077113393178254e-05, "loss": 0.0523, "step": 2752 }, { "epoch": 0.71, "learning_rate": 9.07628982822188e-05, "loss": 0.0643, "step": 2753 }, { "epoch": 0.71, "learning_rate": 9.075465933359173e-05, "loss": 0.061, "step": 2754 }, { "epoch": 0.71, "learning_rate": 9.074641708656812e-05, "loss": 0.084, "step": 2755 }, { "epoch": 0.71, "learning_rate": 9.073817154181505e-05, "loss": 0.0512, "step": 2756 }, { "epoch": 0.71, "learning_rate": 9.072992269999985e-05, "loss": 0.0546, "step": 2757 }, { "epoch": 0.71, "learning_rate": 9.072167056179015e-05, "loss": 0.0631, "step": 2758 }, { "epoch": 0.71, "learning_rate": 9.071341512785377e-05, "loss": 0.0537, "step": 2759 }, { "epoch": 0.71, "learning_rate": 9.070515639885888e-05, "loss": 0.0715, "step": 2760 }, { "epoch": 0.71, "learning_rate": 9.069689437547388e-05, "loss": 0.0401, "step": 2761 }, { "epoch": 0.71, "learning_rate": 9.068862905836743e-05, "loss": 0.0942, "step": 2762 }, { "epoch": 0.71, "learning_rate": 9.068036044820849e-05, "loss": 0.078, "step": 2763 }, { "epoch": 0.71, "learning_rate": 9.067208854566623e-05, "loss": 0.0609, "step": 2764 }, { "epoch": 0.71, "learning_rate": 9.066381335141016e-05, "loss": 0.0568, "step": 2765 }, { "epoch": 0.71, "learning_rate": 9.065553486610997e-05, "loss": 0.0835, "step": 2766 }, { "epoch": 0.71, "learning_rate": 9.06472530904357e-05, "loss": 0.0709, "step": 2767 }, { "epoch": 0.71, "learning_rate": 9.06389680250576e-05, "loss": 0.0599, "step": 2768 }, { "epoch": 0.71, "learning_rate": 9.06306796706462e-05, "loss": 0.071, "step": 2769 }, { "epoch": 0.71, "learning_rate": 9.062238802787233e-05, "loss": 0.0467, "step": 2770 }, { "epoch": 0.72, "learning_rate": 9.061409309740703e-05, "loss": 0.0498, "step": 2771 }, { "epoch": 0.72, "learning_rate": 9.060579487992164e-05, "loss": 0.063, "step": 2772 }, { "epoch": 0.72, "learning_rate": 9.059749337608777e-05, "loss": 0.0581, "step": 2773 }, { "epoch": 0.72, "learning_rate": 9.058918858657727e-05, "loss": 0.0692, "step": 2774 }, { "epoch": 0.72, "learning_rate": 9.058088051206229e-05, "loss": 0.0352, "step": 2775 }, { "epoch": 0.72, "learning_rate": 9.057256915321521e-05, "loss": 0.059, "step": 2776 }, { "epoch": 0.72, "learning_rate": 9.05642545107087e-05, "loss": 0.0481, "step": 2777 }, { "epoch": 0.72, "learning_rate": 9.055593658521569e-05, "loss": 0.051, "step": 2778 }, { "epoch": 0.72, "learning_rate": 9.054761537740936e-05, "loss": 0.0653, "step": 2779 }, { "epoch": 0.72, "learning_rate": 9.053929088796321e-05, "loss": 0.0571, "step": 2780 }, { "epoch": 0.72, "learning_rate": 9.053096311755091e-05, "loss": 0.0669, "step": 2781 }, { "epoch": 0.72, "learning_rate": 9.05226320668465e-05, "loss": 0.0493, "step": 2782 }, { "epoch": 0.72, "learning_rate": 9.051429773652422e-05, "loss": 0.0754, "step": 2783 }, { "epoch": 0.72, "learning_rate": 9.050596012725858e-05, "loss": 0.0607, "step": 2784 }, { "epoch": 0.72, "learning_rate": 9.049761923972437e-05, "loss": 0.0642, "step": 2785 }, { "epoch": 0.72, "learning_rate": 9.048927507459665e-05, "loss": 0.0562, "step": 2786 }, { "epoch": 0.72, "learning_rate": 9.048092763255076e-05, "loss": 0.0614, "step": 2787 }, { "epoch": 0.72, "learning_rate": 9.047257691426225e-05, "loss": 0.0652, "step": 2788 }, { "epoch": 0.72, "learning_rate": 9.046422292040698e-05, "loss": 0.0673, "step": 2789 }, { "epoch": 0.72, "learning_rate": 9.045586565166107e-05, "loss": 0.0665, "step": 2790 }, { "epoch": 0.72, "learning_rate": 9.044750510870089e-05, "loss": 0.0698, "step": 2791 }, { "epoch": 0.72, "learning_rate": 9.043914129220308e-05, "loss": 0.0615, "step": 2792 }, { "epoch": 0.72, "learning_rate": 9.043077420284457e-05, "loss": 0.0623, "step": 2793 }, { "epoch": 0.72, "learning_rate": 9.04224038413025e-05, "loss": 0.0566, "step": 2794 }, { "epoch": 0.72, "learning_rate": 9.041403020825435e-05, "loss": 0.0479, "step": 2795 }, { "epoch": 0.72, "learning_rate": 9.040565330437779e-05, "loss": 0.0626, "step": 2796 }, { "epoch": 0.72, "learning_rate": 9.03972731303508e-05, "loss": 0.0529, "step": 2797 }, { "epoch": 0.72, "learning_rate": 9.03888896868516e-05, "loss": 0.0645, "step": 2798 }, { "epoch": 0.72, "learning_rate": 9.038050297455872e-05, "loss": 0.0675, "step": 2799 }, { "epoch": 0.72, "learning_rate": 9.037211299415088e-05, "loss": 0.062, "step": 2800 }, { "epoch": 0.72, "learning_rate": 9.036371974630714e-05, "loss": 0.044, "step": 2801 }, { "epoch": 0.72, "learning_rate": 9.035532323170677e-05, "loss": 0.0458, "step": 2802 }, { "epoch": 0.72, "learning_rate": 9.034692345102933e-05, "loss": 0.0696, "step": 2803 }, { "epoch": 0.72, "learning_rate": 9.033852040495466e-05, "loss": 0.0756, "step": 2804 }, { "epoch": 0.72, "learning_rate": 9.033011409416281e-05, "loss": 0.0598, "step": 2805 }, { "epoch": 0.72, "learning_rate": 9.032170451933414e-05, "loss": 0.0775, "step": 2806 }, { "epoch": 0.72, "learning_rate": 9.031329168114926e-05, "loss": 0.0551, "step": 2807 }, { "epoch": 0.72, "learning_rate": 9.030487558028907e-05, "loss": 0.1026, "step": 2808 }, { "epoch": 0.72, "learning_rate": 9.029645621743468e-05, "loss": 0.0805, "step": 2809 }, { "epoch": 0.73, "learning_rate": 9.028803359326752e-05, "loss": 0.0584, "step": 2810 }, { "epoch": 0.73, "learning_rate": 9.027960770846922e-05, "loss": 0.0601, "step": 2811 }, { "epoch": 0.73, "learning_rate": 9.027117856372176e-05, "loss": 0.0605, "step": 2812 }, { "epoch": 0.73, "learning_rate": 9.026274615970729e-05, "loss": 0.0683, "step": 2813 }, { "epoch": 0.73, "learning_rate": 9.025431049710831e-05, "loss": 0.064, "step": 2814 }, { "epoch": 0.73, "learning_rate": 9.02458715766075e-05, "loss": 0.0827, "step": 2815 }, { "epoch": 0.73, "learning_rate": 9.023742939888789e-05, "loss": 0.0652, "step": 2816 }, { "epoch": 0.73, "learning_rate": 9.022898396463273e-05, "loss": 0.0663, "step": 2817 }, { "epoch": 0.73, "learning_rate": 9.02205352745255e-05, "loss": 0.0533, "step": 2818 }, { "epoch": 0.73, "learning_rate": 9.021208332924999e-05, "loss": 0.0849, "step": 2819 }, { "epoch": 0.73, "learning_rate": 9.020362812949024e-05, "loss": 0.04, "step": 2820 }, { "epoch": 0.73, "learning_rate": 9.019516967593059e-05, "loss": 0.0498, "step": 2821 }, { "epoch": 0.73, "learning_rate": 9.018670796925555e-05, "loss": 0.065, "step": 2822 }, { "epoch": 0.73, "learning_rate": 9.017824301014998e-05, "loss": 0.0549, "step": 2823 }, { "epoch": 0.73, "learning_rate": 9.016977479929899e-05, "loss": 0.0565, "step": 2824 }, { "epoch": 0.73, "learning_rate": 9.016130333738792e-05, "loss": 0.0531, "step": 2825 }, { "epoch": 0.73, "learning_rate": 9.015282862510239e-05, "loss": 0.0747, "step": 2826 }, { "epoch": 0.73, "learning_rate": 9.014435066312828e-05, "loss": 0.0466, "step": 2827 }, { "epoch": 0.73, "learning_rate": 9.013586945215176e-05, "loss": 0.0648, "step": 2828 }, { "epoch": 0.73, "learning_rate": 9.012738499285919e-05, "loss": 0.0487, "step": 2829 }, { "epoch": 0.73, "learning_rate": 9.01188972859373e-05, "loss": 0.0723, "step": 2830 }, { "epoch": 0.73, "learning_rate": 9.0110406332073e-05, "loss": 0.0484, "step": 2831 }, { "epoch": 0.73, "learning_rate": 9.010191213195349e-05, "loss": 0.0703, "step": 2832 }, { "epoch": 0.73, "learning_rate": 9.009341468626625e-05, "loss": 0.0558, "step": 2833 }, { "epoch": 0.73, "learning_rate": 9.008491399569896e-05, "loss": 0.0581, "step": 2834 }, { "epoch": 0.73, "learning_rate": 9.007641006093963e-05, "loss": 0.0606, "step": 2835 }, { "epoch": 0.73, "learning_rate": 9.006790288267652e-05, "loss": 0.0418, "step": 2836 }, { "epoch": 0.73, "learning_rate": 9.005939246159812e-05, "loss": 0.0621, "step": 2837 }, { "epoch": 0.73, "learning_rate": 9.005087879839322e-05, "loss": 0.0879, "step": 2838 }, { "epoch": 0.73, "learning_rate": 9.004236189375086e-05, "loss": 0.0608, "step": 2839 }, { "epoch": 0.73, "learning_rate": 9.003384174836031e-05, "loss": 0.0956, "step": 2840 }, { "epoch": 0.73, "learning_rate": 9.002531836291117e-05, "loss": 0.0471, "step": 2841 }, { "epoch": 0.73, "learning_rate": 9.001679173809323e-05, "loss": 0.0776, "step": 2842 }, { "epoch": 0.73, "learning_rate": 9.00082618745966e-05, "loss": 0.0623, "step": 2843 }, { "epoch": 0.73, "learning_rate": 8.99997287731116e-05, "loss": 0.0742, "step": 2844 }, { "epoch": 0.73, "learning_rate": 8.999119243432887e-05, "loss": 0.0652, "step": 2845 }, { "epoch": 0.73, "learning_rate": 8.998265285893928e-05, "loss": 0.0686, "step": 2846 }, { "epoch": 0.73, "learning_rate": 8.997411004763393e-05, "loss": 0.0901, "step": 2847 }, { "epoch": 0.73, "learning_rate": 8.996556400110424e-05, "loss": 0.059, "step": 2848 }, { "epoch": 0.74, "learning_rate": 8.995701472004187e-05, "loss": 0.0553, "step": 2849 }, { "epoch": 0.74, "learning_rate": 8.994846220513872e-05, "loss": 0.0488, "step": 2850 }, { "epoch": 0.74, "learning_rate": 8.993990645708698e-05, "loss": 0.0769, "step": 2851 }, { "epoch": 0.74, "learning_rate": 8.993134747657912e-05, "loss": 0.0444, "step": 2852 }, { "epoch": 0.74, "learning_rate": 8.99227852643078e-05, "loss": 0.0609, "step": 2853 }, { "epoch": 0.74, "learning_rate": 8.991421982096599e-05, "loss": 0.0446, "step": 2854 }, { "epoch": 0.74, "learning_rate": 8.990565114724696e-05, "loss": 0.0718, "step": 2855 }, { "epoch": 0.74, "learning_rate": 8.989707924384416e-05, "loss": 0.074, "step": 2856 }, { "epoch": 0.74, "learning_rate": 8.988850411145134e-05, "loss": 0.0627, "step": 2857 }, { "epoch": 0.74, "learning_rate": 8.987992575076253e-05, "loss": 0.0711, "step": 2858 }, { "epoch": 0.74, "learning_rate": 8.987134416247198e-05, "loss": 0.0629, "step": 2859 }, { "epoch": 0.74, "learning_rate": 8.986275934727426e-05, "loss": 0.0571, "step": 2860 }, { "epoch": 0.74, "learning_rate": 8.985417130586413e-05, "loss": 0.067, "step": 2861 }, { "epoch": 0.74, "learning_rate": 8.984558003893665e-05, "loss": 0.0743, "step": 2862 }, { "epoch": 0.74, "learning_rate": 8.983698554718715e-05, "loss": 0.056, "step": 2863 }, { "epoch": 0.74, "learning_rate": 8.98283878313112e-05, "loss": 0.0549, "step": 2864 }, { "epoch": 0.74, "learning_rate": 8.981978689200464e-05, "loss": 0.0575, "step": 2865 }, { "epoch": 0.74, "learning_rate": 8.981118272996358e-05, "loss": 0.0727, "step": 2866 }, { "epoch": 0.74, "learning_rate": 8.980257534588437e-05, "loss": 0.0656, "step": 2867 }, { "epoch": 0.74, "learning_rate": 8.979396474046362e-05, "loss": 0.0598, "step": 2868 }, { "epoch": 0.74, "learning_rate": 8.978535091439823e-05, "loss": 0.0672, "step": 2869 }, { "epoch": 0.74, "learning_rate": 8.977673386838533e-05, "loss": 0.0697, "step": 2870 }, { "epoch": 0.74, "learning_rate": 8.976811360312234e-05, "loss": 0.0437, "step": 2871 }, { "epoch": 0.74, "learning_rate": 8.975949011930691e-05, "loss": 0.0816, "step": 2872 }, { "epoch": 0.74, "learning_rate": 8.975086341763699e-05, "loss": 0.0591, "step": 2873 }, { "epoch": 0.74, "learning_rate": 8.974223349881072e-05, "loss": 0.0485, "step": 2874 }, { "epoch": 0.74, "learning_rate": 8.973360036352658e-05, "loss": 0.0703, "step": 2875 }, { "epoch": 0.74, "learning_rate": 8.972496401248325e-05, "loss": 0.0755, "step": 2876 }, { "epoch": 0.74, "learning_rate": 8.971632444637972e-05, "loss": 0.0581, "step": 2877 }, { "epoch": 0.74, "learning_rate": 8.970768166591522e-05, "loss": 0.0729, "step": 2878 }, { "epoch": 0.74, "learning_rate": 8.96990356717892e-05, "loss": 0.0576, "step": 2879 }, { "epoch": 0.74, "learning_rate": 8.969038646470143e-05, "loss": 0.0583, "step": 2880 }, { "epoch": 0.74, "learning_rate": 8.968173404535191e-05, "loss": 0.0666, "step": 2881 }, { "epoch": 0.74, "learning_rate": 8.967307841444094e-05, "loss": 0.0563, "step": 2882 }, { "epoch": 0.74, "learning_rate": 8.9664419572669e-05, "loss": 0.0588, "step": 2883 }, { "epoch": 0.74, "learning_rate": 8.965575752073689e-05, "loss": 0.06, "step": 2884 }, { "epoch": 0.74, "learning_rate": 8.964709225934566e-05, "loss": 0.064, "step": 2885 }, { "epoch": 0.74, "learning_rate": 8.963842378919662e-05, "loss": 0.0646, "step": 2886 }, { "epoch": 0.75, "learning_rate": 8.962975211099132e-05, "loss": 0.0382, "step": 2887 }, { "epoch": 0.75, "learning_rate": 8.962107722543159e-05, "loss": 0.0655, "step": 2888 }, { "epoch": 0.75, "learning_rate": 8.961239913321953e-05, "loss": 0.0571, "step": 2889 }, { "epoch": 0.75, "learning_rate": 8.960371783505746e-05, "loss": 0.0625, "step": 2890 }, { "epoch": 0.75, "learning_rate": 8.9595033331648e-05, "loss": 0.0687, "step": 2891 }, { "epoch": 0.75, "learning_rate": 8.958634562369402e-05, "loss": 0.0677, "step": 2892 }, { "epoch": 0.75, "learning_rate": 8.957765471189863e-05, "loss": 0.0665, "step": 2893 }, { "epoch": 0.75, "learning_rate": 8.95689605969652e-05, "loss": 0.0461, "step": 2894 }, { "epoch": 0.75, "learning_rate": 8.956026327959739e-05, "loss": 0.0509, "step": 2895 }, { "epoch": 0.75, "learning_rate": 8.95515627604991e-05, "loss": 0.0608, "step": 2896 }, { "epoch": 0.75, "learning_rate": 8.954285904037447e-05, "loss": 0.0504, "step": 2897 }, { "epoch": 0.75, "learning_rate": 8.953415211992794e-05, "loss": 0.0743, "step": 2898 }, { "epoch": 0.75, "learning_rate": 8.952544199986417e-05, "loss": 0.0735, "step": 2899 }, { "epoch": 0.75, "learning_rate": 8.95167286808881e-05, "loss": 0.0595, "step": 2900 }, { "epoch": 0.75, "learning_rate": 8.950801216370495e-05, "loss": 0.0184, "step": 2901 }, { "epoch": 0.75, "learning_rate": 8.949929244902014e-05, "loss": 0.0597, "step": 2902 }, { "epoch": 0.75, "learning_rate": 8.949056953753938e-05, "loss": 0.0675, "step": 2903 }, { "epoch": 0.75, "learning_rate": 8.948184342996868e-05, "loss": 0.0945, "step": 2904 }, { "epoch": 0.75, "learning_rate": 8.947311412701422e-05, "loss": 0.0632, "step": 2905 }, { "epoch": 0.75, "learning_rate": 8.946438162938254e-05, "loss": 0.069, "step": 2906 }, { "epoch": 0.75, "learning_rate": 8.945564593778036e-05, "loss": 0.0661, "step": 2907 }, { "epoch": 0.75, "learning_rate": 8.944690705291467e-05, "loss": 0.0667, "step": 2908 }, { "epoch": 0.75, "learning_rate": 8.943816497549276e-05, "loss": 0.0601, "step": 2909 }, { "epoch": 0.75, "learning_rate": 8.942941970622216e-05, "loss": 0.0293, "step": 2910 }, { "epoch": 0.75, "learning_rate": 8.942067124581061e-05, "loss": 0.0803, "step": 2911 }, { "epoch": 0.75, "learning_rate": 8.94119195949662e-05, "loss": 0.0522, "step": 2912 }, { "epoch": 0.75, "learning_rate": 8.940316475439719e-05, "loss": 0.0757, "step": 2913 }, { "epoch": 0.75, "learning_rate": 8.939440672481213e-05, "loss": 0.0689, "step": 2914 }, { "epoch": 0.75, "learning_rate": 8.938564550691987e-05, "loss": 0.0654, "step": 2915 }, { "epoch": 0.75, "learning_rate": 8.937688110142945e-05, "loss": 0.0626, "step": 2916 }, { "epoch": 0.75, "learning_rate": 8.936811350905024e-05, "loss": 0.082, "step": 2917 }, { "epoch": 0.75, "learning_rate": 8.935934273049176e-05, "loss": 0.055, "step": 2918 }, { "epoch": 0.75, "learning_rate": 8.935056876646392e-05, "loss": 0.0804, "step": 2919 }, { "epoch": 0.75, "learning_rate": 8.934179161767679e-05, "loss": 0.0571, "step": 2920 }, { "epoch": 0.75, "learning_rate": 8.933301128484073e-05, "loss": 0.0729, "step": 2921 }, { "epoch": 0.75, "learning_rate": 8.932422776866638e-05, "loss": 0.0668, "step": 2922 }, { "epoch": 0.75, "learning_rate": 8.93154410698646e-05, "loss": 0.049, "step": 2923 }, { "epoch": 0.75, "learning_rate": 8.930665118914653e-05, "loss": 0.0692, "step": 2924 }, { "epoch": 0.75, "learning_rate": 8.929785812722356e-05, "loss": 0.0692, "step": 2925 }, { "epoch": 0.76, "learning_rate": 8.928906188480733e-05, "loss": 0.0403, "step": 2926 }, { "epoch": 0.76, "learning_rate": 8.928026246260974e-05, "loss": 0.0614, "step": 2927 }, { "epoch": 0.76, "learning_rate": 8.9271459861343e-05, "loss": 0.0543, "step": 2928 }, { "epoch": 0.76, "learning_rate": 8.926265408171948e-05, "loss": 0.0584, "step": 2929 }, { "epoch": 0.76, "learning_rate": 8.925384512445187e-05, "loss": 0.068, "step": 2930 }, { "epoch": 0.76, "learning_rate": 8.924503299025312e-05, "loss": 0.0634, "step": 2931 }, { "epoch": 0.76, "learning_rate": 8.923621767983642e-05, "loss": 0.0631, "step": 2932 }, { "epoch": 0.76, "learning_rate": 8.92273991939152e-05, "loss": 0.0658, "step": 2933 }, { "epoch": 0.76, "learning_rate": 8.92185775332032e-05, "loss": 0.0539, "step": 2934 }, { "epoch": 0.76, "learning_rate": 8.920975269841435e-05, "loss": 0.0534, "step": 2935 }, { "epoch": 0.76, "learning_rate": 8.920092469026288e-05, "loss": 0.0703, "step": 2936 }, { "epoch": 0.76, "learning_rate": 8.919209350946327e-05, "loss": 0.0417, "step": 2937 }, { "epoch": 0.76, "learning_rate": 8.918325915673026e-05, "loss": 0.053, "step": 2938 }, { "epoch": 0.76, "learning_rate": 8.917442163277883e-05, "loss": 0.0537, "step": 2939 }, { "epoch": 0.76, "learning_rate": 8.916558093832424e-05, "loss": 0.0658, "step": 2940 }, { "epoch": 0.76, "learning_rate": 8.915673707408197e-05, "loss": 0.0627, "step": 2941 }, { "epoch": 0.76, "learning_rate": 8.914789004076782e-05, "loss": 0.0672, "step": 2942 }, { "epoch": 0.76, "learning_rate": 8.913903983909777e-05, "loss": 0.066, "step": 2943 }, { "epoch": 0.76, "learning_rate": 8.91301864697881e-05, "loss": 0.0419, "step": 2944 }, { "epoch": 0.76, "learning_rate": 8.912132993355536e-05, "loss": 0.0631, "step": 2945 }, { "epoch": 0.76, "learning_rate": 8.91124702311163e-05, "loss": 0.0663, "step": 2946 }, { "epoch": 0.76, "learning_rate": 8.910360736318801e-05, "loss": 0.0769, "step": 2947 }, { "epoch": 0.76, "learning_rate": 8.909474133048776e-05, "loss": 0.0654, "step": 2948 }, { "epoch": 0.76, "learning_rate": 8.908587213373309e-05, "loss": 0.0711, "step": 2949 }, { "epoch": 0.76, "learning_rate": 8.907699977364186e-05, "loss": 0.0784, "step": 2950 }, { "epoch": 0.76, "learning_rate": 8.906812425093207e-05, "loss": 0.0524, "step": 2951 }, { "epoch": 0.76, "learning_rate": 8.905924556632211e-05, "loss": 0.0518, "step": 2952 }, { "epoch": 0.76, "learning_rate": 8.90503637205305e-05, "loss": 0.0616, "step": 2953 }, { "epoch": 0.76, "learning_rate": 8.904147871427611e-05, "loss": 0.0842, "step": 2954 }, { "epoch": 0.76, "learning_rate": 8.903259054827801e-05, "loss": 0.0709, "step": 2955 }, { "epoch": 0.76, "learning_rate": 8.902369922325558e-05, "loss": 0.0616, "step": 2956 }, { "epoch": 0.76, "learning_rate": 8.901480473992838e-05, "loss": 0.0516, "step": 2957 }, { "epoch": 0.76, "learning_rate": 8.900590709901628e-05, "loss": 0.0634, "step": 2958 }, { "epoch": 0.76, "learning_rate": 8.899700630123941e-05, "loss": 0.0775, "step": 2959 }, { "epoch": 0.76, "learning_rate": 8.898810234731813e-05, "loss": 0.0288, "step": 2960 }, { "epoch": 0.76, "learning_rate": 8.897919523797305e-05, "loss": 0.0491, "step": 2961 }, { "epoch": 0.76, "learning_rate": 8.897028497392506e-05, "loss": 0.0589, "step": 2962 }, { "epoch": 0.76, "learning_rate": 8.89613715558953e-05, "loss": 0.0826, "step": 2963 }, { "epoch": 0.76, "learning_rate": 8.895245498460514e-05, "loss": 0.0602, "step": 2964 }, { "epoch": 0.77, "learning_rate": 8.894353526077623e-05, "loss": 0.0645, "step": 2965 }, { "epoch": 0.77, "learning_rate": 8.89346123851305e-05, "loss": 0.0732, "step": 2966 }, { "epoch": 0.77, "learning_rate": 8.892568635839007e-05, "loss": 0.0657, "step": 2967 }, { "epoch": 0.77, "learning_rate": 8.891675718127737e-05, "loss": 0.0699, "step": 2968 }, { "epoch": 0.77, "learning_rate": 8.890782485451505e-05, "loss": 0.0813, "step": 2969 }, { "epoch": 0.77, "learning_rate": 8.889888937882604e-05, "loss": 0.0741, "step": 2970 }, { "epoch": 0.77, "learning_rate": 8.888995075493352e-05, "loss": 0.0569, "step": 2971 }, { "epoch": 0.77, "learning_rate": 8.88810089835609e-05, "loss": 0.0589, "step": 2972 }, { "epoch": 0.77, "learning_rate": 8.887206406543189e-05, "loss": 0.0563, "step": 2973 }, { "epoch": 0.77, "learning_rate": 8.886311600127039e-05, "loss": 0.0797, "step": 2974 }, { "epoch": 0.77, "learning_rate": 8.885416479180065e-05, "loss": 0.0489, "step": 2975 }, { "epoch": 0.77, "learning_rate": 8.884521043774706e-05, "loss": 0.0645, "step": 2976 }, { "epoch": 0.77, "learning_rate": 8.883625293983435e-05, "loss": 0.063, "step": 2977 }, { "epoch": 0.77, "learning_rate": 8.88272922987875e-05, "loss": 0.0545, "step": 2978 }, { "epoch": 0.77, "learning_rate": 8.881832851533167e-05, "loss": 0.079, "step": 2979 }, { "epoch": 0.77, "learning_rate": 8.880936159019237e-05, "loss": 0.0791, "step": 2980 }, { "epoch": 0.77, "learning_rate": 8.880039152409532e-05, "loss": 0.0413, "step": 2981 }, { "epoch": 0.77, "learning_rate": 8.879141831776645e-05, "loss": 0.0685, "step": 2982 }, { "epoch": 0.77, "learning_rate": 8.878244197193203e-05, "loss": 0.0562, "step": 2983 }, { "epoch": 0.77, "learning_rate": 8.877346248731853e-05, "loss": 0.0654, "step": 2984 }, { "epoch": 0.77, "learning_rate": 8.876447986465269e-05, "loss": 0.057, "step": 2985 }, { "epoch": 0.77, "learning_rate": 8.875549410466147e-05, "loss": 0.0844, "step": 2986 }, { "epoch": 0.77, "learning_rate": 8.874650520807216e-05, "loss": 0.0665, "step": 2987 }, { "epoch": 0.77, "learning_rate": 8.873751317561223e-05, "loss": 0.0436, "step": 2988 }, { "epoch": 0.77, "learning_rate": 8.872851800800945e-05, "loss": 0.045, "step": 2989 }, { "epoch": 0.77, "learning_rate": 8.871951970599183e-05, "loss": 0.0707, "step": 2990 }, { "epoch": 0.77, "learning_rate": 8.87105182702876e-05, "loss": 0.0877, "step": 2991 }, { "epoch": 0.77, "learning_rate": 8.87015137016253e-05, "loss": 0.0559, "step": 2992 }, { "epoch": 0.77, "learning_rate": 8.869250600073366e-05, "loss": 0.0702, "step": 2993 }, { "epoch": 0.77, "learning_rate": 8.868349516834176e-05, "loss": 0.068, "step": 2994 }, { "epoch": 0.77, "learning_rate": 8.867448120517881e-05, "loss": 0.0839, "step": 2995 }, { "epoch": 0.77, "learning_rate": 8.866546411197439e-05, "loss": 0.06, "step": 2996 }, { "epoch": 0.77, "learning_rate": 8.865644388945826e-05, "loss": 0.0783, "step": 2997 }, { "epoch": 0.77, "learning_rate": 8.864742053836044e-05, "loss": 0.0649, "step": 2998 }, { "epoch": 0.77, "learning_rate": 8.863839405941123e-05, "loss": 0.0546, "step": 2999 }, { "epoch": 0.77, "learning_rate": 8.862936445334116e-05, "loss": 0.0566, "step": 3000 }, { "epoch": 0.77, "learning_rate": 8.862033172088103e-05, "loss": 0.0672, "step": 3001 }, { "epoch": 0.77, "learning_rate": 8.86112958627619e-05, "loss": 0.0631, "step": 3002 }, { "epoch": 0.77, "learning_rate": 8.860225687971505e-05, "loss": 0.0676, "step": 3003 }, { "epoch": 0.78, "learning_rate": 8.859321477247205e-05, "loss": 0.0924, "step": 3004 }, { "epoch": 0.78, "learning_rate": 8.858416954176468e-05, "loss": 0.0525, "step": 3005 }, { "epoch": 0.78, "learning_rate": 8.857512118832501e-05, "loss": 0.0619, "step": 3006 }, { "epoch": 0.78, "learning_rate": 8.856606971288535e-05, "loss": 0.0595, "step": 3007 }, { "epoch": 0.78, "learning_rate": 8.855701511617826e-05, "loss": 0.0345, "step": 3008 }, { "epoch": 0.78, "learning_rate": 8.854795739893656e-05, "loss": 0.0895, "step": 3009 }, { "epoch": 0.78, "learning_rate": 8.853889656189333e-05, "loss": 0.0512, "step": 3010 }, { "epoch": 0.78, "learning_rate": 8.852983260578187e-05, "loss": 0.0686, "step": 3011 }, { "epoch": 0.78, "learning_rate": 8.852076553133575e-05, "loss": 0.0722, "step": 3012 }, { "epoch": 0.78, "learning_rate": 8.851169533928883e-05, "loss": 0.0832, "step": 3013 }, { "epoch": 0.78, "learning_rate": 8.850262203037514e-05, "loss": 0.0447, "step": 3014 }, { "epoch": 0.78, "learning_rate": 8.849354560532906e-05, "loss": 0.0444, "step": 3015 }, { "epoch": 0.78, "learning_rate": 8.848446606488512e-05, "loss": 0.0761, "step": 3016 }, { "epoch": 0.78, "learning_rate": 8.847538340977819e-05, "loss": 0.0577, "step": 3017 }, { "epoch": 0.78, "learning_rate": 8.846629764074335e-05, "loss": 0.0577, "step": 3018 }, { "epoch": 0.78, "learning_rate": 8.845720875851595e-05, "loss": 0.0476, "step": 3019 }, { "epoch": 0.78, "learning_rate": 8.844811676383155e-05, "loss": 0.0668, "step": 3020 }, { "epoch": 0.78, "learning_rate": 8.843902165742601e-05, "loss": 0.0607, "step": 3021 }, { "epoch": 0.78, "learning_rate": 8.842992344003542e-05, "loss": 0.0634, "step": 3022 }, { "epoch": 0.78, "learning_rate": 8.842082211239614e-05, "loss": 0.05, "step": 3023 }, { "epoch": 0.78, "learning_rate": 8.841171767524474e-05, "loss": 0.0676, "step": 3024 }, { "epoch": 0.78, "learning_rate": 8.84026101293181e-05, "loss": 0.0714, "step": 3025 }, { "epoch": 0.78, "learning_rate": 8.839349947535331e-05, "loss": 0.0603, "step": 3026 }, { "epoch": 0.78, "learning_rate": 8.838438571408772e-05, "loss": 0.0705, "step": 3027 }, { "epoch": 0.78, "learning_rate": 8.837526884625891e-05, "loss": 0.0539, "step": 3028 }, { "epoch": 0.78, "learning_rate": 8.836614887260478e-05, "loss": 0.0555, "step": 3029 }, { "epoch": 0.78, "learning_rate": 8.835702579386342e-05, "loss": 0.0618, "step": 3030 }, { "epoch": 0.78, "learning_rate": 8.834789961077316e-05, "loss": 0.0547, "step": 3031 }, { "epoch": 0.78, "learning_rate": 8.833877032407264e-05, "loss": 0.0687, "step": 3032 }, { "epoch": 0.78, "learning_rate": 8.832963793450073e-05, "loss": 0.0522, "step": 3033 }, { "epoch": 0.78, "learning_rate": 8.832050244279652e-05, "loss": 0.0612, "step": 3034 }, { "epoch": 0.78, "learning_rate": 8.831136384969937e-05, "loss": 0.0934, "step": 3035 }, { "epoch": 0.78, "learning_rate": 8.83022221559489e-05, "loss": 0.0659, "step": 3036 }, { "epoch": 0.78, "learning_rate": 8.829307736228498e-05, "loss": 0.0451, "step": 3037 }, { "epoch": 0.78, "learning_rate": 8.828392946944772e-05, "loss": 0.0747, "step": 3038 }, { "epoch": 0.78, "learning_rate": 8.827477847817749e-05, "loss": 0.0598, "step": 3039 }, { "epoch": 0.78, "learning_rate": 8.82656243892149e-05, "loss": 0.0655, "step": 3040 }, { "epoch": 0.78, "learning_rate": 8.825646720330083e-05, "loss": 0.0509, "step": 3041 }, { "epoch": 0.79, "learning_rate": 8.824730692117639e-05, "loss": 0.056, "step": 3042 }, { "epoch": 0.79, "learning_rate": 8.823814354358295e-05, "loss": 0.0714, "step": 3043 }, { "epoch": 0.79, "learning_rate": 8.822897707126212e-05, "loss": 0.0648, "step": 3044 }, { "epoch": 0.79, "learning_rate": 8.82198075049558e-05, "loss": 0.0361, "step": 3045 }, { "epoch": 0.79, "learning_rate": 8.821063484540607e-05, "loss": 0.0509, "step": 3046 }, { "epoch": 0.79, "learning_rate": 8.820145909335534e-05, "loss": 0.054, "step": 3047 }, { "epoch": 0.79, "learning_rate": 8.819228024954618e-05, "loss": 0.0858, "step": 3048 }, { "epoch": 0.79, "learning_rate": 8.818309831472152e-05, "loss": 0.0534, "step": 3049 }, { "epoch": 0.79, "learning_rate": 8.817391328962445e-05, "loss": 0.0443, "step": 3050 }, { "epoch": 0.79, "learning_rate": 8.816472517499834e-05, "loss": 0.0494, "step": 3051 }, { "epoch": 0.79, "learning_rate": 8.815553397158683e-05, "loss": 0.0821, "step": 3052 }, { "epoch": 0.79, "learning_rate": 8.814633968013377e-05, "loss": 0.0674, "step": 3053 }, { "epoch": 0.79, "learning_rate": 8.81371423013833e-05, "loss": 0.0813, "step": 3054 }, { "epoch": 0.79, "learning_rate": 8.812794183607978e-05, "loss": 0.0701, "step": 3055 }, { "epoch": 0.79, "learning_rate": 8.811873828496783e-05, "loss": 0.0542, "step": 3056 }, { "epoch": 0.79, "learning_rate": 8.810953164879233e-05, "loss": 0.0686, "step": 3057 }, { "epoch": 0.79, "learning_rate": 8.810032192829838e-05, "loss": 0.0377, "step": 3058 }, { "epoch": 0.79, "learning_rate": 8.809110912423137e-05, "loss": 0.0588, "step": 3059 }, { "epoch": 0.79, "learning_rate": 8.808189323733693e-05, "loss": 0.0496, "step": 3060 }, { "epoch": 0.79, "learning_rate": 8.80726742683609e-05, "loss": 0.0369, "step": 3061 }, { "epoch": 0.79, "learning_rate": 8.806345221804943e-05, "loss": 0.049, "step": 3062 }, { "epoch": 0.79, "learning_rate": 8.805422708714886e-05, "loss": 0.0695, "step": 3063 }, { "epoch": 0.79, "learning_rate": 8.804499887640581e-05, "loss": 0.0538, "step": 3064 }, { "epoch": 0.79, "learning_rate": 8.803576758656716e-05, "loss": 0.0559, "step": 3065 }, { "epoch": 0.79, "learning_rate": 8.802653321838004e-05, "loss": 0.0793, "step": 3066 }, { "epoch": 0.79, "learning_rate": 8.801729577259177e-05, "loss": 0.0479, "step": 3067 }, { "epoch": 0.79, "learning_rate": 8.800805524995e-05, "loss": 0.0692, "step": 3068 }, { "epoch": 0.79, "learning_rate": 8.79988116512026e-05, "loss": 0.0709, "step": 3069 }, { "epoch": 0.79, "learning_rate": 8.798956497709764e-05, "loss": 0.0612, "step": 3070 }, { "epoch": 0.79, "learning_rate": 8.798031522838352e-05, "loss": 0.0458, "step": 3071 }, { "epoch": 0.79, "learning_rate": 8.797106240580885e-05, "loss": 0.0425, "step": 3072 }, { "epoch": 0.79, "learning_rate": 8.796180651012247e-05, "loss": 0.052, "step": 3073 }, { "epoch": 0.79, "learning_rate": 8.795254754207348e-05, "loss": 0.0524, "step": 3074 }, { "epoch": 0.79, "learning_rate": 8.794328550241124e-05, "loss": 0.0676, "step": 3075 }, { "epoch": 0.79, "learning_rate": 8.793402039188538e-05, "loss": 0.0734, "step": 3076 }, { "epoch": 0.79, "learning_rate": 8.792475221124575e-05, "loss": 0.057, "step": 3077 }, { "epoch": 0.79, "learning_rate": 8.791548096124242e-05, "loss": 0.0526, "step": 3078 }, { "epoch": 0.79, "learning_rate": 8.790620664262575e-05, "loss": 0.07, "step": 3079 }, { "epoch": 0.79, "learning_rate": 8.789692925614637e-05, "loss": 0.0566, "step": 3080 }, { "epoch": 0.8, "learning_rate": 8.788764880255509e-05, "loss": 0.038, "step": 3081 }, { "epoch": 0.8, "learning_rate": 8.787836528260301e-05, "loss": 0.0587, "step": 3082 }, { "epoch": 0.8, "learning_rate": 8.786907869704149e-05, "loss": 0.0707, "step": 3083 }, { "epoch": 0.8, "learning_rate": 8.785978904662211e-05, "loss": 0.0557, "step": 3084 }, { "epoch": 0.8, "learning_rate": 8.785049633209671e-05, "loss": 0.071, "step": 3085 }, { "epoch": 0.8, "learning_rate": 8.784120055421739e-05, "loss": 0.0646, "step": 3086 }, { "epoch": 0.8, "learning_rate": 8.783190171373645e-05, "loss": 0.0543, "step": 3087 }, { "epoch": 0.8, "learning_rate": 8.782259981140653e-05, "loss": 0.0623, "step": 3088 }, { "epoch": 0.8, "learning_rate": 8.78132948479804e-05, "loss": 0.0625, "step": 3089 }, { "epoch": 0.8, "learning_rate": 8.780398682421117e-05, "loss": 0.0728, "step": 3090 }, { "epoch": 0.8, "learning_rate": 8.779467574085217e-05, "loss": 0.0664, "step": 3091 }, { "epoch": 0.8, "learning_rate": 8.778536159865696e-05, "loss": 0.046, "step": 3092 }, { "epoch": 0.8, "learning_rate": 8.777604439837938e-05, "loss": 0.0698, "step": 3093 }, { "epoch": 0.8, "learning_rate": 8.776672414077347e-05, "loss": 0.059, "step": 3094 }, { "epoch": 0.8, "learning_rate": 8.775740082659358e-05, "loss": 0.0564, "step": 3095 }, { "epoch": 0.8, "learning_rate": 8.774807445659422e-05, "loss": 0.0481, "step": 3096 }, { "epoch": 0.8, "learning_rate": 8.773874503153028e-05, "loss": 0.0491, "step": 3097 }, { "epoch": 0.8, "learning_rate": 8.772941255215677e-05, "loss": 0.0616, "step": 3098 }, { "epoch": 0.8, "learning_rate": 8.772007701922897e-05, "loss": 0.0501, "step": 3099 }, { "epoch": 0.8, "learning_rate": 8.77107384335025e-05, "loss": 0.0756, "step": 3100 }, { "epoch": 0.8, "learning_rate": 8.77013967957331e-05, "loss": 0.057, "step": 3101 }, { "epoch": 0.8, "learning_rate": 8.769205210667686e-05, "loss": 0.0646, "step": 3102 }, { "epoch": 0.8, "learning_rate": 8.768270436709002e-05, "loss": 0.0766, "step": 3103 }, { "epoch": 0.8, "learning_rate": 8.767335357772919e-05, "loss": 0.0669, "step": 3104 }, { "epoch": 0.8, "learning_rate": 8.766399973935111e-05, "loss": 0.0545, "step": 3105 }, { "epoch": 0.8, "learning_rate": 8.765464285271282e-05, "loss": 0.0805, "step": 3106 }, { "epoch": 0.8, "learning_rate": 8.764528291857162e-05, "loss": 0.0921, "step": 3107 }, { "epoch": 0.8, "learning_rate": 8.763591993768501e-05, "loss": 0.0509, "step": 3108 }, { "epoch": 0.8, "learning_rate": 8.762655391081078e-05, "loss": 0.0546, "step": 3109 }, { "epoch": 0.8, "learning_rate": 8.761718483870696e-05, "loss": 0.0568, "step": 3110 }, { "epoch": 0.8, "learning_rate": 8.760781272213178e-05, "loss": 0.0768, "step": 3111 }, { "epoch": 0.8, "learning_rate": 8.759843756184381e-05, "loss": 0.063, "step": 3112 }, { "epoch": 0.8, "learning_rate": 8.758905935860177e-05, "loss": 0.0874, "step": 3113 }, { "epoch": 0.8, "learning_rate": 8.757967811316467e-05, "loss": 0.0653, "step": 3114 }, { "epoch": 0.8, "learning_rate": 8.757029382629176e-05, "loss": 0.0725, "step": 3115 }, { "epoch": 0.8, "learning_rate": 8.756090649874256e-05, "loss": 0.0729, "step": 3116 }, { "epoch": 0.8, "learning_rate": 8.755151613127679e-05, "loss": 0.0527, "step": 3117 }, { "epoch": 0.8, "learning_rate": 8.754212272465445e-05, "loss": 0.0681, "step": 3118 }, { "epoch": 0.8, "learning_rate": 8.75327262796358e-05, "loss": 0.0671, "step": 3119 }, { "epoch": 0.81, "learning_rate": 8.752332679698128e-05, "loss": 0.0699, "step": 3120 }, { "epoch": 0.81, "learning_rate": 8.751392427745164e-05, "loss": 0.0677, "step": 3121 }, { "epoch": 0.81, "learning_rate": 8.750451872180785e-05, "loss": 0.0713, "step": 3122 }, { "epoch": 0.81, "learning_rate": 8.749511013081113e-05, "loss": 0.0612, "step": 3123 }, { "epoch": 0.81, "learning_rate": 8.748569850522296e-05, "loss": 0.0562, "step": 3124 }, { "epoch": 0.81, "learning_rate": 8.747628384580503e-05, "loss": 0.0709, "step": 3125 }, { "epoch": 0.81, "learning_rate": 8.746686615331931e-05, "loss": 0.0815, "step": 3126 }, { "epoch": 0.81, "learning_rate": 8.745744542852798e-05, "loss": 0.063, "step": 3127 }, { "epoch": 0.81, "learning_rate": 8.744802167219352e-05, "loss": 0.056, "step": 3128 }, { "epoch": 0.81, "learning_rate": 8.74385948850786e-05, "loss": 0.0608, "step": 3129 }, { "epoch": 0.81, "learning_rate": 8.742916506794619e-05, "loss": 0.053, "step": 3130 }, { "epoch": 0.81, "learning_rate": 8.741973222155941e-05, "loss": 0.0653, "step": 3131 }, { "epoch": 0.81, "learning_rate": 8.741029634668175e-05, "loss": 0.0611, "step": 3132 }, { "epoch": 0.81, "learning_rate": 8.740085744407685e-05, "loss": 0.0448, "step": 3133 }, { "epoch": 0.81, "learning_rate": 8.739141551450866e-05, "loss": 0.079, "step": 3134 }, { "epoch": 0.81, "learning_rate": 8.73819705587413e-05, "loss": 0.0669, "step": 3135 }, { "epoch": 0.81, "learning_rate": 8.73725225775392e-05, "loss": 0.0351, "step": 3136 }, { "epoch": 0.81, "learning_rate": 8.736307157166703e-05, "loss": 0.0528, "step": 3137 }, { "epoch": 0.81, "learning_rate": 8.735361754188967e-05, "loss": 0.0532, "step": 3138 }, { "epoch": 0.81, "learning_rate": 8.734416048897227e-05, "loss": 0.0634, "step": 3139 }, { "epoch": 0.81, "learning_rate": 8.73347004136802e-05, "loss": 0.0613, "step": 3140 }, { "epoch": 0.81, "learning_rate": 8.732523731677911e-05, "loss": 0.0691, "step": 3141 }, { "epoch": 0.81, "learning_rate": 8.731577119903486e-05, "loss": 0.0658, "step": 3142 }, { "epoch": 0.81, "learning_rate": 8.730630206121361e-05, "loss": 0.0636, "step": 3143 }, { "epoch": 0.81, "learning_rate": 8.729682990408171e-05, "loss": 0.0812, "step": 3144 }, { "epoch": 0.81, "learning_rate": 8.728735472840573e-05, "loss": 0.0786, "step": 3145 }, { "epoch": 0.81, "learning_rate": 8.727787653495256e-05, "loss": 0.0814, "step": 3146 }, { "epoch": 0.81, "learning_rate": 8.72683953244893e-05, "loss": 0.0638, "step": 3147 }, { "epoch": 0.81, "learning_rate": 8.725891109778326e-05, "loss": 0.0616, "step": 3148 }, { "epoch": 0.81, "learning_rate": 8.724942385560207e-05, "loss": 0.046, "step": 3149 }, { "epoch": 0.81, "learning_rate": 8.723993359871354e-05, "loss": 0.0478, "step": 3150 }, { "epoch": 0.81, "learning_rate": 8.723044032788576e-05, "loss": 0.038, "step": 3151 }, { "epoch": 0.81, "learning_rate": 8.722094404388702e-05, "loss": 0.0994, "step": 3152 }, { "epoch": 0.81, "learning_rate": 8.72114447474859e-05, "loss": 0.0415, "step": 3153 }, { "epoch": 0.81, "learning_rate": 8.720194243945121e-05, "loss": 0.0624, "step": 3154 }, { "epoch": 0.81, "learning_rate": 8.719243712055201e-05, "loss": 0.0721, "step": 3155 }, { "epoch": 0.81, "learning_rate": 8.718292879155756e-05, "loss": 0.0745, "step": 3156 }, { "epoch": 0.81, "learning_rate": 8.717341745323742e-05, "loss": 0.0659, "step": 3157 }, { "epoch": 0.81, "learning_rate": 8.716390310636137e-05, "loss": 0.0466, "step": 3158 }, { "epoch": 0.82, "learning_rate": 8.715438575169945e-05, "loss": 0.0448, "step": 3159 }, { "epoch": 0.82, "learning_rate": 8.71448653900219e-05, "loss": 0.0541, "step": 3160 }, { "epoch": 0.82, "learning_rate": 8.713534202209923e-05, "loss": 0.0554, "step": 3161 }, { "epoch": 0.82, "learning_rate": 8.712581564870222e-05, "loss": 0.0438, "step": 3162 }, { "epoch": 0.82, "learning_rate": 8.711628627060184e-05, "loss": 0.0594, "step": 3163 }, { "epoch": 0.82, "learning_rate": 8.710675388856936e-05, "loss": 0.0477, "step": 3164 }, { "epoch": 0.82, "learning_rate": 8.709721850337624e-05, "loss": 0.0308, "step": 3165 }, { "epoch": 0.82, "learning_rate": 8.708768011579422e-05, "loss": 0.0602, "step": 3166 }, { "epoch": 0.82, "learning_rate": 8.707813872659528e-05, "loss": 0.0386, "step": 3167 }, { "epoch": 0.82, "learning_rate": 8.706859433655161e-05, "loss": 0.0542, "step": 3168 }, { "epoch": 0.82, "learning_rate": 8.705904694643568e-05, "loss": 0.0689, "step": 3169 }, { "epoch": 0.82, "learning_rate": 8.704949655702019e-05, "loss": 0.0905, "step": 3170 }, { "epoch": 0.82, "learning_rate": 8.703994316907807e-05, "loss": 0.0665, "step": 3171 }, { "epoch": 0.82, "learning_rate": 8.703038678338252e-05, "loss": 0.0615, "step": 3172 }, { "epoch": 0.82, "learning_rate": 8.702082740070694e-05, "loss": 0.0654, "step": 3173 }, { "epoch": 0.82, "learning_rate": 8.701126502182504e-05, "loss": 0.0641, "step": 3174 }, { "epoch": 0.82, "learning_rate": 8.700169964751069e-05, "loss": 0.058, "step": 3175 }, { "epoch": 0.82, "learning_rate": 8.699213127853809e-05, "loss": 0.0461, "step": 3176 }, { "epoch": 0.82, "learning_rate": 8.698255991568158e-05, "loss": 0.0533, "step": 3177 }, { "epoch": 0.82, "learning_rate": 8.697298555971585e-05, "loss": 0.0703, "step": 3178 }, { "epoch": 0.82, "learning_rate": 8.696340821141575e-05, "loss": 0.0491, "step": 3179 }, { "epoch": 0.82, "learning_rate": 8.695382787155643e-05, "loss": 0.0557, "step": 3180 }, { "epoch": 0.82, "learning_rate": 8.694424454091322e-05, "loss": 0.0507, "step": 3181 }, { "epoch": 0.82, "learning_rate": 8.693465822026176e-05, "loss": 0.0792, "step": 3182 }, { "epoch": 0.82, "learning_rate": 8.692506891037788e-05, "loss": 0.0599, "step": 3183 }, { "epoch": 0.82, "learning_rate": 8.69154766120377e-05, "loss": 0.0543, "step": 3184 }, { "epoch": 0.82, "learning_rate": 8.690588132601752e-05, "loss": 0.0507, "step": 3185 }, { "epoch": 0.82, "learning_rate": 8.689628305309392e-05, "loss": 0.0636, "step": 3186 }, { "epoch": 0.82, "learning_rate": 8.688668179404373e-05, "loss": 0.0534, "step": 3187 }, { "epoch": 0.82, "learning_rate": 8.687707754964401e-05, "loss": 0.049, "step": 3188 }, { "epoch": 0.82, "learning_rate": 8.686747032067204e-05, "loss": 0.0547, "step": 3189 }, { "epoch": 0.82, "learning_rate": 8.685786010790537e-05, "loss": 0.0633, "step": 3190 }, { "epoch": 0.82, "learning_rate": 8.684824691212182e-05, "loss": 0.0506, "step": 3191 }, { "epoch": 0.82, "learning_rate": 8.683863073409935e-05, "loss": 0.0436, "step": 3192 }, { "epoch": 0.82, "learning_rate": 8.682901157461627e-05, "loss": 0.0795, "step": 3193 }, { "epoch": 0.82, "learning_rate": 8.681938943445109e-05, "loss": 0.082, "step": 3194 }, { "epoch": 0.82, "learning_rate": 8.680976431438252e-05, "loss": 0.0666, "step": 3195 }, { "epoch": 0.82, "learning_rate": 8.68001362151896e-05, "loss": 0.0666, "step": 3196 }, { "epoch": 0.83, "learning_rate": 8.679050513765153e-05, "loss": 0.0583, "step": 3197 }, { "epoch": 0.83, "learning_rate": 8.678087108254777e-05, "loss": 0.0891, "step": 3198 }, { "epoch": 0.83, "learning_rate": 8.677123405065807e-05, "loss": 0.0671, "step": 3199 }, { "epoch": 0.83, "learning_rate": 8.676159404276236e-05, "loss": 0.0725, "step": 3200 }, { "epoch": 0.83, "learning_rate": 8.675195105964083e-05, "loss": 0.0428, "step": 3201 }, { "epoch": 0.83, "learning_rate": 8.674230510207394e-05, "loss": 0.0635, "step": 3202 }, { "epoch": 0.83, "learning_rate": 8.673265617084235e-05, "loss": 0.0655, "step": 3203 }, { "epoch": 0.83, "learning_rate": 8.672300426672698e-05, "loss": 0.0555, "step": 3204 }, { "epoch": 0.83, "learning_rate": 8.671334939050896e-05, "loss": 0.0713, "step": 3205 }, { "epoch": 0.83, "learning_rate": 8.670369154296974e-05, "loss": 0.0946, "step": 3206 }, { "epoch": 0.83, "learning_rate": 8.669403072489091e-05, "loss": 0.0661, "step": 3207 }, { "epoch": 0.83, "learning_rate": 8.668436693705439e-05, "loss": 0.0623, "step": 3208 }, { "epoch": 0.83, "learning_rate": 8.667470018024228e-05, "loss": 0.0737, "step": 3209 }, { "epoch": 0.83, "learning_rate": 8.666503045523692e-05, "loss": 0.0837, "step": 3210 }, { "epoch": 0.83, "learning_rate": 8.665535776282094e-05, "loss": 0.054, "step": 3211 }, { "epoch": 0.83, "learning_rate": 8.664568210377717e-05, "loss": 0.0568, "step": 3212 }, { "epoch": 0.83, "learning_rate": 8.663600347888869e-05, "loss": 0.0695, "step": 3213 }, { "epoch": 0.83, "learning_rate": 8.662632188893882e-05, "loss": 0.073, "step": 3214 }, { "epoch": 0.83, "learning_rate": 8.661663733471113e-05, "loss": 0.0483, "step": 3215 }, { "epoch": 0.83, "learning_rate": 8.66069498169894e-05, "loss": 0.0614, "step": 3216 }, { "epoch": 0.83, "learning_rate": 8.659725933655767e-05, "loss": 0.0826, "step": 3217 }, { "epoch": 0.83, "learning_rate": 8.658756589420025e-05, "loss": 0.0404, "step": 3218 }, { "epoch": 0.83, "learning_rate": 8.657786949070161e-05, "loss": 0.055, "step": 3219 }, { "epoch": 0.83, "learning_rate": 8.656817012684656e-05, "loss": 0.0703, "step": 3220 }, { "epoch": 0.83, "learning_rate": 8.655846780342007e-05, "loss": 0.0652, "step": 3221 }, { "epoch": 0.83, "learning_rate": 8.654876252120739e-05, "loss": 0.0448, "step": 3222 }, { "epoch": 0.83, "learning_rate": 8.653905428099397e-05, "loss": 0.075, "step": 3223 }, { "epoch": 0.83, "learning_rate": 8.652934308356556e-05, "loss": 0.0716, "step": 3224 }, { "epoch": 0.83, "learning_rate": 8.651962892970811e-05, "loss": 0.0433, "step": 3225 }, { "epoch": 0.83, "learning_rate": 8.650991182020781e-05, "loss": 0.0615, "step": 3226 }, { "epoch": 0.83, "learning_rate": 8.650019175585108e-05, "loss": 0.0672, "step": 3227 }, { "epoch": 0.83, "learning_rate": 8.649046873742461e-05, "loss": 0.051, "step": 3228 }, { "epoch": 0.83, "learning_rate": 8.648074276571533e-05, "loss": 0.0626, "step": 3229 }, { "epoch": 0.83, "learning_rate": 8.647101384151037e-05, "loss": 0.0526, "step": 3230 }, { "epoch": 0.83, "learning_rate": 8.646128196559711e-05, "loss": 0.0439, "step": 3231 }, { "epoch": 0.83, "learning_rate": 8.64515471387632e-05, "loss": 0.0576, "step": 3232 }, { "epoch": 0.83, "learning_rate": 8.64418093617965e-05, "loss": 0.0553, "step": 3233 }, { "epoch": 0.83, "learning_rate": 8.643206863548513e-05, "loss": 0.0493, "step": 3234 }, { "epoch": 0.83, "learning_rate": 8.642232496061742e-05, "loss": 0.0618, "step": 3235 }, { "epoch": 0.84, "learning_rate": 8.641257833798197e-05, "loss": 0.052, "step": 3236 }, { "epoch": 0.84, "learning_rate": 8.640282876836758e-05, "loss": 0.0913, "step": 3237 }, { "epoch": 0.84, "learning_rate": 8.639307625256334e-05, "loss": 0.0541, "step": 3238 }, { "epoch": 0.84, "learning_rate": 8.638332079135854e-05, "loss": 0.0557, "step": 3239 }, { "epoch": 0.84, "learning_rate": 8.63735623855427e-05, "loss": 0.0815, "step": 3240 }, { "epoch": 0.84, "learning_rate": 8.636380103590562e-05, "loss": 0.0552, "step": 3241 }, { "epoch": 0.84, "learning_rate": 8.63540367432373e-05, "loss": 0.0429, "step": 3242 }, { "epoch": 0.84, "learning_rate": 8.634426950832801e-05, "loss": 0.0571, "step": 3243 }, { "epoch": 0.84, "learning_rate": 8.633449933196822e-05, "loss": 0.057, "step": 3244 }, { "epoch": 0.84, "learning_rate": 8.632472621494869e-05, "loss": 0.0599, "step": 3245 }, { "epoch": 0.84, "learning_rate": 8.631495015806036e-05, "loss": 0.0589, "step": 3246 }, { "epoch": 0.84, "learning_rate": 8.630517116209445e-05, "loss": 0.0612, "step": 3247 }, { "epoch": 0.84, "learning_rate": 8.62953892278424e-05, "loss": 0.059, "step": 3248 }, { "epoch": 0.84, "learning_rate": 8.628560435609588e-05, "loss": 0.0371, "step": 3249 }, { "epoch": 0.84, "learning_rate": 8.627581654764682e-05, "loss": 0.0955, "step": 3250 }, { "epoch": 0.84, "learning_rate": 8.626602580328738e-05, "loss": 0.0578, "step": 3251 }, { "epoch": 0.84, "learning_rate": 8.625623212380996e-05, "loss": 0.0641, "step": 3252 }, { "epoch": 0.84, "learning_rate": 8.624643551000717e-05, "loss": 0.0913, "step": 3253 }, { "epoch": 0.84, "learning_rate": 8.62366359626719e-05, "loss": 0.0661, "step": 3254 }, { "epoch": 0.84, "learning_rate": 8.622683348259724e-05, "loss": 0.0603, "step": 3255 }, { "epoch": 0.84, "learning_rate": 8.621702807057655e-05, "loss": 0.0675, "step": 3256 }, { "epoch": 0.84, "learning_rate": 8.620721972740338e-05, "loss": 0.0496, "step": 3257 }, { "epoch": 0.84, "learning_rate": 8.619740845387158e-05, "loss": 0.0524, "step": 3258 }, { "epoch": 0.84, "learning_rate": 8.618759425077522e-05, "loss": 0.0794, "step": 3259 }, { "epoch": 0.84, "learning_rate": 8.617777711890856e-05, "loss": 0.0321, "step": 3260 }, { "epoch": 0.84, "learning_rate": 8.616795705906613e-05, "loss": 0.0508, "step": 3261 }, { "epoch": 0.84, "learning_rate": 8.61581340720427e-05, "loss": 0.0635, "step": 3262 }, { "epoch": 0.84, "learning_rate": 8.614830815863331e-05, "loss": 0.0708, "step": 3263 }, { "epoch": 0.84, "learning_rate": 8.613847931963313e-05, "loss": 0.0458, "step": 3264 }, { "epoch": 0.84, "learning_rate": 8.612864755583771e-05, "loss": 0.0584, "step": 3265 }, { "epoch": 0.84, "learning_rate": 8.611881286804272e-05, "loss": 0.0664, "step": 3266 }, { "epoch": 0.84, "learning_rate": 8.610897525704413e-05, "loss": 0.0597, "step": 3267 }, { "epoch": 0.84, "learning_rate": 8.609913472363812e-05, "loss": 0.0371, "step": 3268 }, { "epoch": 0.84, "learning_rate": 8.608929126862112e-05, "loss": 0.0484, "step": 3269 }, { "epoch": 0.84, "learning_rate": 8.607944489278976e-05, "loss": 0.0752, "step": 3270 }, { "epoch": 0.84, "learning_rate": 8.6069595596941e-05, "loss": 0.0551, "step": 3271 }, { "epoch": 0.84, "learning_rate": 8.60597433818719e-05, "loss": 0.0547, "step": 3272 }, { "epoch": 0.84, "learning_rate": 8.604988824837987e-05, "loss": 0.0697, "step": 3273 }, { "epoch": 0.84, "learning_rate": 8.60400301972625e-05, "loss": 0.0466, "step": 3274 }, { "epoch": 0.85, "learning_rate": 8.603016922931765e-05, "loss": 0.0521, "step": 3275 }, { "epoch": 0.85, "learning_rate": 8.60203053453434e-05, "loss": 0.0577, "step": 3276 }, { "epoch": 0.85, "learning_rate": 8.601043854613806e-05, "loss": 0.0603, "step": 3277 }, { "epoch": 0.85, "learning_rate": 8.600056883250015e-05, "loss": 0.0684, "step": 3278 }, { "epoch": 0.85, "learning_rate": 8.599069620522849e-05, "loss": 0.0724, "step": 3279 }, { "epoch": 0.85, "learning_rate": 8.598082066512208e-05, "loss": 0.0568, "step": 3280 }, { "epoch": 0.85, "learning_rate": 8.597094221298018e-05, "loss": 0.0517, "step": 3281 }, { "epoch": 0.85, "learning_rate": 8.596106084960229e-05, "loss": 0.0872, "step": 3282 }, { "epoch": 0.85, "learning_rate": 8.595117657578813e-05, "loss": 0.0779, "step": 3283 }, { "epoch": 0.85, "learning_rate": 8.594128939233769e-05, "loss": 0.0657, "step": 3284 }, { "epoch": 0.85, "learning_rate": 8.593139930005114e-05, "loss": 0.0607, "step": 3285 }, { "epoch": 0.85, "learning_rate": 8.592150629972891e-05, "loss": 0.0588, "step": 3286 }, { "epoch": 0.85, "learning_rate": 8.59116103921717e-05, "loss": 0.0765, "step": 3287 }, { "epoch": 0.85, "learning_rate": 8.59017115781804e-05, "loss": 0.0527, "step": 3288 }, { "epoch": 0.85, "learning_rate": 8.589180985855615e-05, "loss": 0.0569, "step": 3289 }, { "epoch": 0.85, "learning_rate": 8.588190523410031e-05, "loss": 0.0572, "step": 3290 }, { "epoch": 0.85, "learning_rate": 8.587199770561454e-05, "loss": 0.0598, "step": 3291 }, { "epoch": 0.85, "learning_rate": 8.586208727390062e-05, "loss": 0.0682, "step": 3292 }, { "epoch": 0.85, "learning_rate": 8.585217393976068e-05, "loss": 0.0652, "step": 3293 }, { "epoch": 0.85, "learning_rate": 8.584225770399701e-05, "loss": 0.0465, "step": 3294 }, { "epoch": 0.85, "learning_rate": 8.583233856741218e-05, "loss": 0.0676, "step": 3295 }, { "epoch": 0.85, "learning_rate": 8.582241653080896e-05, "loss": 0.0826, "step": 3296 }, { "epoch": 0.85, "learning_rate": 8.581249159499038e-05, "loss": 0.0866, "step": 3297 }, { "epoch": 0.85, "learning_rate": 8.580256376075968e-05, "loss": 0.0596, "step": 3298 }, { "epoch": 0.85, "learning_rate": 8.579263302892037e-05, "loss": 0.0591, "step": 3299 }, { "epoch": 0.85, "learning_rate": 8.578269940027616e-05, "loss": 0.0609, "step": 3300 }, { "epoch": 0.85, "learning_rate": 8.577276287563103e-05, "loss": 0.0631, "step": 3301 }, { "epoch": 0.85, "learning_rate": 8.576282345578912e-05, "loss": 0.0737, "step": 3302 }, { "epoch": 0.85, "learning_rate": 8.57528811415549e-05, "loss": 0.0463, "step": 3303 }, { "epoch": 0.85, "learning_rate": 8.574293593373304e-05, "loss": 0.0596, "step": 3304 }, { "epoch": 0.85, "learning_rate": 8.573298783312842e-05, "loss": 0.0694, "step": 3305 }, { "epoch": 0.85, "learning_rate": 8.572303684054615e-05, "loss": 0.054, "step": 3306 }, { "epoch": 0.85, "learning_rate": 8.571308295679162e-05, "loss": 0.0691, "step": 3307 }, { "epoch": 0.85, "learning_rate": 8.570312618267042e-05, "loss": 0.0614, "step": 3308 }, { "epoch": 0.85, "learning_rate": 8.569316651898837e-05, "loss": 0.069, "step": 3309 }, { "epoch": 0.85, "learning_rate": 8.568320396655156e-05, "loss": 0.053, "step": 3310 }, { "epoch": 0.85, "learning_rate": 8.567323852616627e-05, "loss": 0.0555, "step": 3311 }, { "epoch": 0.85, "learning_rate": 8.566327019863904e-05, "loss": 0.0496, "step": 3312 }, { "epoch": 0.85, "learning_rate": 8.565329898477664e-05, "loss": 0.0776, "step": 3313 }, { "epoch": 0.86, "learning_rate": 8.564332488538604e-05, "loss": 0.075, "step": 3314 }, { "epoch": 0.86, "learning_rate": 8.563334790127453e-05, "loss": 0.0599, "step": 3315 }, { "epoch": 0.86, "learning_rate": 8.562336803324952e-05, "loss": 0.0639, "step": 3316 }, { "epoch": 0.86, "learning_rate": 8.561338528211876e-05, "loss": 0.0643, "step": 3317 }, { "epoch": 0.86, "learning_rate": 8.560339964869015e-05, "loss": 0.083, "step": 3318 }, { "epoch": 0.86, "learning_rate": 8.559341113377186e-05, "loss": 0.0546, "step": 3319 }, { "epoch": 0.86, "learning_rate": 8.55834197381723e-05, "loss": 0.0564, "step": 3320 }, { "epoch": 0.86, "learning_rate": 8.557342546270011e-05, "loss": 0.0515, "step": 3321 }, { "epoch": 0.86, "learning_rate": 8.556342830816413e-05, "loss": 0.0599, "step": 3322 }, { "epoch": 0.86, "learning_rate": 8.55534282753735e-05, "loss": 0.0746, "step": 3323 }, { "epoch": 0.86, "learning_rate": 8.554342536513751e-05, "loss": 0.0732, "step": 3324 }, { "epoch": 0.86, "learning_rate": 8.553341957826576e-05, "loss": 0.0627, "step": 3325 }, { "epoch": 0.86, "learning_rate": 8.552341091556804e-05, "loss": 0.0572, "step": 3326 }, { "epoch": 0.86, "learning_rate": 8.551339937785435e-05, "loss": 0.0539, "step": 3327 }, { "epoch": 0.86, "learning_rate": 8.5503384965935e-05, "loss": 0.0575, "step": 3328 }, { "epoch": 0.86, "learning_rate": 8.549336768062045e-05, "loss": 0.0713, "step": 3329 }, { "epoch": 0.86, "learning_rate": 8.548334752272146e-05, "loss": 0.0492, "step": 3330 }, { "epoch": 0.86, "learning_rate": 8.547332449304897e-05, "loss": 0.0383, "step": 3331 }, { "epoch": 0.86, "learning_rate": 8.546329859241416e-05, "loss": 0.0866, "step": 3332 }, { "epoch": 0.86, "learning_rate": 8.545326982162848e-05, "loss": 0.0624, "step": 3333 }, { "epoch": 0.86, "learning_rate": 8.54432381815036e-05, "loss": 0.0725, "step": 3334 }, { "epoch": 0.86, "learning_rate": 8.543320367285139e-05, "loss": 0.0533, "step": 3335 }, { "epoch": 0.86, "learning_rate": 8.542316629648399e-05, "loss": 0.0747, "step": 3336 }, { "epoch": 0.86, "learning_rate": 8.541312605321371e-05, "loss": 0.0607, "step": 3337 }, { "epoch": 0.86, "learning_rate": 8.54030829438532e-05, "loss": 0.0558, "step": 3338 }, { "epoch": 0.86, "learning_rate": 8.539303696921523e-05, "loss": 0.082, "step": 3339 }, { "epoch": 0.86, "learning_rate": 8.538298813011288e-05, "loss": 0.0482, "step": 3340 }, { "epoch": 0.86, "learning_rate": 8.53729364273594e-05, "loss": 0.0757, "step": 3341 }, { "epoch": 0.86, "learning_rate": 8.536288186176834e-05, "loss": 0.0536, "step": 3342 }, { "epoch": 0.86, "learning_rate": 8.535282443415342e-05, "loss": 0.0958, "step": 3343 }, { "epoch": 0.86, "learning_rate": 8.534276414532865e-05, "loss": 0.0641, "step": 3344 }, { "epoch": 0.86, "learning_rate": 8.533270099610821e-05, "loss": 0.0636, "step": 3345 }, { "epoch": 0.86, "learning_rate": 8.532263498730654e-05, "loss": 0.0564, "step": 3346 }, { "epoch": 0.86, "learning_rate": 8.531256611973833e-05, "loss": 0.0745, "step": 3347 }, { "epoch": 0.86, "learning_rate": 8.530249439421847e-05, "loss": 0.0572, "step": 3348 }, { "epoch": 0.86, "learning_rate": 8.529241981156211e-05, "loss": 0.0514, "step": 3349 }, { "epoch": 0.86, "learning_rate": 8.528234237258459e-05, "loss": 0.0686, "step": 3350 }, { "epoch": 0.86, "learning_rate": 8.527226207810154e-05, "loss": 0.0416, "step": 3351 }, { "epoch": 0.87, "learning_rate": 8.526217892892876e-05, "loss": 0.0553, "step": 3352 }, { "epoch": 0.87, "learning_rate": 8.525209292588234e-05, "loss": 0.0584, "step": 3353 }, { "epoch": 0.87, "learning_rate": 8.524200406977854e-05, "loss": 0.0406, "step": 3354 }, { "epoch": 0.87, "learning_rate": 8.523191236143389e-05, "loss": 0.0628, "step": 3355 }, { "epoch": 0.87, "learning_rate": 8.522181780166514e-05, "loss": 0.0431, "step": 3356 }, { "epoch": 0.87, "learning_rate": 8.521172039128929e-05, "loss": 0.0657, "step": 3357 }, { "epoch": 0.87, "learning_rate": 8.520162013112352e-05, "loss": 0.0821, "step": 3358 }, { "epoch": 0.87, "learning_rate": 8.519151702198532e-05, "loss": 0.0868, "step": 3359 }, { "epoch": 0.87, "learning_rate": 8.518141106469233e-05, "loss": 0.055, "step": 3360 }, { "epoch": 0.87, "learning_rate": 8.517130226006247e-05, "loss": 0.0497, "step": 3361 }, { "epoch": 0.87, "learning_rate": 8.516119060891387e-05, "loss": 0.077, "step": 3362 }, { "epoch": 0.87, "learning_rate": 8.51510761120649e-05, "loss": 0.0464, "step": 3363 }, { "epoch": 0.87, "learning_rate": 8.514095877033415e-05, "loss": 0.0455, "step": 3364 }, { "epoch": 0.87, "learning_rate": 8.513083858454045e-05, "loss": 0.0774, "step": 3365 }, { "epoch": 0.87, "learning_rate": 8.512071555550287e-05, "loss": 0.0572, "step": 3366 }, { "epoch": 0.87, "learning_rate": 8.511058968404068e-05, "loss": 0.0641, "step": 3367 }, { "epoch": 0.87, "learning_rate": 8.51004609709734e-05, "loss": 0.088, "step": 3368 }, { "epoch": 0.87, "learning_rate": 8.509032941712078e-05, "loss": 0.0576, "step": 3369 }, { "epoch": 0.87, "learning_rate": 8.50801950233028e-05, "loss": 0.0528, "step": 3370 }, { "epoch": 0.87, "learning_rate": 8.507005779033965e-05, "loss": 0.0519, "step": 3371 }, { "epoch": 0.87, "learning_rate": 8.50599177190518e-05, "loss": 0.0697, "step": 3372 }, { "epoch": 0.87, "learning_rate": 8.504977481025989e-05, "loss": 0.0557, "step": 3373 }, { "epoch": 0.87, "learning_rate": 8.503962906478481e-05, "loss": 0.0615, "step": 3374 }, { "epoch": 0.87, "learning_rate": 8.502948048344772e-05, "loss": 0.0377, "step": 3375 }, { "epoch": 0.87, "learning_rate": 8.501932906706993e-05, "loss": 0.0569, "step": 3376 }, { "epoch": 0.87, "learning_rate": 8.500917481647306e-05, "loss": 0.0596, "step": 3377 }, { "epoch": 0.87, "learning_rate": 8.49990177324789e-05, "loss": 0.0603, "step": 3378 }, { "epoch": 0.87, "learning_rate": 8.498885781590951e-05, "loss": 0.0614, "step": 3379 }, { "epoch": 0.87, "learning_rate": 8.497869506758717e-05, "loss": 0.0379, "step": 3380 }, { "epoch": 0.87, "learning_rate": 8.496852948833435e-05, "loss": 0.0477, "step": 3381 }, { "epoch": 0.87, "learning_rate": 8.495836107897382e-05, "loss": 0.061, "step": 3382 }, { "epoch": 0.87, "learning_rate": 8.494818984032849e-05, "loss": 0.0919, "step": 3383 }, { "epoch": 0.87, "learning_rate": 8.49380157732216e-05, "loss": 0.0534, "step": 3384 }, { "epoch": 0.87, "learning_rate": 8.492783887847653e-05, "loss": 0.0922, "step": 3385 }, { "epoch": 0.87, "learning_rate": 8.491765915691696e-05, "loss": 0.0605, "step": 3386 }, { "epoch": 0.87, "learning_rate": 8.490747660936674e-05, "loss": 0.0651, "step": 3387 }, { "epoch": 0.87, "learning_rate": 8.489729123664997e-05, "loss": 0.0597, "step": 3388 }, { "epoch": 0.87, "learning_rate": 8.488710303959101e-05, "loss": 0.0726, "step": 3389 }, { "epoch": 0.87, "learning_rate": 8.487691201901439e-05, "loss": 0.055, "step": 3390 }, { "epoch": 0.88, "learning_rate": 8.486671817574493e-05, "loss": 0.0661, "step": 3391 }, { "epoch": 0.88, "learning_rate": 8.485652151060764e-05, "loss": 0.0836, "step": 3392 }, { "epoch": 0.88, "learning_rate": 8.484632202442773e-05, "loss": 0.0375, "step": 3393 }, { "epoch": 0.88, "learning_rate": 8.483611971803072e-05, "loss": 0.0747, "step": 3394 }, { "epoch": 0.88, "learning_rate": 8.48259145922423e-05, "loss": 0.0686, "step": 3395 }, { "epoch": 0.88, "learning_rate": 8.48157066478884e-05, "loss": 0.0707, "step": 3396 }, { "epoch": 0.88, "learning_rate": 8.480549588579518e-05, "loss": 0.0665, "step": 3397 }, { "epoch": 0.88, "learning_rate": 8.479528230678903e-05, "loss": 0.0609, "step": 3398 }, { "epoch": 0.88, "learning_rate": 8.478506591169656e-05, "loss": 0.0769, "step": 3399 }, { "epoch": 0.88, "learning_rate": 8.477484670134461e-05, "loss": 0.0536, "step": 3400 }, { "epoch": 0.88, "learning_rate": 8.476462467656027e-05, "loss": 0.0698, "step": 3401 }, { "epoch": 0.88, "learning_rate": 8.475439983817081e-05, "loss": 0.0645, "step": 3402 }, { "epoch": 0.88, "learning_rate": 8.474417218700376e-05, "loss": 0.0741, "step": 3403 }, { "epoch": 0.88, "learning_rate": 8.473394172388692e-05, "loss": 0.1029, "step": 3404 }, { "epoch": 0.88, "learning_rate": 8.472370844964824e-05, "loss": 0.0507, "step": 3405 }, { "epoch": 0.88, "learning_rate": 8.47134723651159e-05, "loss": 0.0919, "step": 3406 }, { "epoch": 0.88, "learning_rate": 8.470323347111837e-05, "loss": 0.084, "step": 3407 }, { "epoch": 0.88, "learning_rate": 8.469299176848432e-05, "loss": 0.0815, "step": 3408 }, { "epoch": 0.88, "learning_rate": 8.468274725804262e-05, "loss": 0.0588, "step": 3409 }, { "epoch": 0.88, "learning_rate": 8.467249994062243e-05, "loss": 0.0506, "step": 3410 }, { "epoch": 0.88, "learning_rate": 8.466224981705304e-05, "loss": 0.0482, "step": 3411 }, { "epoch": 0.88, "learning_rate": 8.465199688816406e-05, "loss": 0.0774, "step": 3412 }, { "epoch": 0.88, "learning_rate": 8.464174115478527e-05, "loss": 0.0655, "step": 3413 }, { "epoch": 0.88, "learning_rate": 8.463148261774672e-05, "loss": 0.0339, "step": 3414 }, { "epoch": 0.88, "learning_rate": 8.462122127787863e-05, "loss": 0.061, "step": 3415 }, { "epoch": 0.88, "learning_rate": 8.461095713601151e-05, "loss": 0.0859, "step": 3416 }, { "epoch": 0.88, "learning_rate": 8.460069019297608e-05, "loss": 0.0666, "step": 3417 }, { "epoch": 0.88, "learning_rate": 8.459042044960322e-05, "loss": 0.0614, "step": 3418 }, { "epoch": 0.88, "learning_rate": 8.458014790672415e-05, "loss": 0.0754, "step": 3419 }, { "epoch": 0.88, "learning_rate": 8.456987256517023e-05, "loss": 0.0789, "step": 3420 }, { "epoch": 0.88, "learning_rate": 8.455959442577306e-05, "loss": 0.0645, "step": 3421 }, { "epoch": 0.88, "learning_rate": 8.454931348936452e-05, "loss": 0.0666, "step": 3422 }, { "epoch": 0.88, "learning_rate": 8.453902975677666e-05, "loss": 0.0921, "step": 3423 }, { "epoch": 0.88, "learning_rate": 8.452874322884174e-05, "loss": 0.0392, "step": 3424 }, { "epoch": 0.88, "learning_rate": 8.451845390639231e-05, "loss": 0.0603, "step": 3425 }, { "epoch": 0.88, "learning_rate": 8.450816179026113e-05, "loss": 0.0587, "step": 3426 }, { "epoch": 0.88, "learning_rate": 8.449786688128114e-05, "loss": 0.059, "step": 3427 }, { "epoch": 0.88, "learning_rate": 8.448756918028558e-05, "loss": 0.0533, "step": 3428 }, { "epoch": 0.88, "learning_rate": 8.447726868810781e-05, "loss": 0.0499, "step": 3429 }, { "epoch": 0.89, "learning_rate": 8.446696540558153e-05, "loss": 0.0479, "step": 3430 }, { "epoch": 0.89, "learning_rate": 8.445665933354061e-05, "loss": 0.0602, "step": 3431 }, { "epoch": 0.89, "learning_rate": 8.444635047281914e-05, "loss": 0.0568, "step": 3432 }, { "epoch": 0.89, "learning_rate": 8.443603882425143e-05, "loss": 0.06, "step": 3433 }, { "epoch": 0.89, "learning_rate": 8.442572438867207e-05, "loss": 0.0828, "step": 3434 }, { "epoch": 0.89, "learning_rate": 8.441540716691582e-05, "loss": 0.0609, "step": 3435 }, { "epoch": 0.89, "learning_rate": 8.440508715981767e-05, "loss": 0.0574, "step": 3436 }, { "epoch": 0.89, "learning_rate": 8.439476436821288e-05, "loss": 0.066, "step": 3437 }, { "epoch": 0.89, "learning_rate": 8.438443879293689e-05, "loss": 0.0485, "step": 3438 }, { "epoch": 0.89, "learning_rate": 8.437411043482538e-05, "loss": 0.053, "step": 3439 }, { "epoch": 0.89, "learning_rate": 8.436377929471421e-05, "loss": 0.0697, "step": 3440 }, { "epoch": 0.89, "learning_rate": 8.43534453734396e-05, "loss": 0.0656, "step": 3441 }, { "epoch": 0.89, "learning_rate": 8.434310867183783e-05, "loss": 0.0728, "step": 3442 }, { "epoch": 0.89, "learning_rate": 8.433276919074553e-05, "loss": 0.0466, "step": 3443 }, { "epoch": 0.89, "learning_rate": 8.432242693099946e-05, "loss": 0.0599, "step": 3444 }, { "epoch": 0.89, "learning_rate": 8.43120818934367e-05, "loss": 0.0565, "step": 3445 }, { "epoch": 0.89, "learning_rate": 8.430173407889445e-05, "loss": 0.0418, "step": 3446 }, { "epoch": 0.89, "learning_rate": 8.429138348821022e-05, "loss": 0.0453, "step": 3447 }, { "epoch": 0.89, "learning_rate": 8.428103012222172e-05, "loss": 0.0688, "step": 3448 }, { "epoch": 0.89, "learning_rate": 8.427067398176687e-05, "loss": 0.0686, "step": 3449 }, { "epoch": 0.89, "learning_rate": 8.426031506768381e-05, "loss": 0.0557, "step": 3450 }, { "epoch": 0.89, "learning_rate": 8.424995338081094e-05, "loss": 0.0547, "step": 3451 }, { "epoch": 0.89, "learning_rate": 8.423958892198687e-05, "loss": 0.0624, "step": 3452 }, { "epoch": 0.89, "learning_rate": 8.422922169205039e-05, "loss": 0.047, "step": 3453 }, { "epoch": 0.89, "learning_rate": 8.421885169184059e-05, "loss": 0.0557, "step": 3454 }, { "epoch": 0.89, "learning_rate": 8.42084789221967e-05, "loss": 0.0559, "step": 3455 }, { "epoch": 0.89, "learning_rate": 8.419810338395826e-05, "loss": 0.0722, "step": 3456 }, { "epoch": 0.89, "learning_rate": 8.418772507796498e-05, "loss": 0.0434, "step": 3457 }, { "epoch": 0.89, "learning_rate": 8.417734400505681e-05, "loss": 0.0515, "step": 3458 }, { "epoch": 0.89, "learning_rate": 8.416696016607392e-05, "loss": 0.0575, "step": 3459 }, { "epoch": 0.89, "learning_rate": 8.415657356185669e-05, "loss": 0.0496, "step": 3460 }, { "epoch": 0.89, "learning_rate": 8.414618419324576e-05, "loss": 0.0595, "step": 3461 }, { "epoch": 0.89, "learning_rate": 8.413579206108197e-05, "loss": 0.0669, "step": 3462 }, { "epoch": 0.89, "learning_rate": 8.412539716620637e-05, "loss": 0.0532, "step": 3463 }, { "epoch": 0.89, "learning_rate": 8.411499950946027e-05, "loss": 0.065, "step": 3464 }, { "epoch": 0.89, "learning_rate": 8.410459909168518e-05, "loss": 0.0544, "step": 3465 }, { "epoch": 0.89, "learning_rate": 8.409419591372282e-05, "loss": 0.0582, "step": 3466 }, { "epoch": 0.89, "learning_rate": 8.408378997641515e-05, "loss": 0.0478, "step": 3467 }, { "epoch": 0.89, "learning_rate": 8.407338128060439e-05, "loss": 0.0494, "step": 3468 }, { "epoch": 0.9, "learning_rate": 8.40629698271329e-05, "loss": 0.0483, "step": 3469 }, { "epoch": 0.9, "learning_rate": 8.405255561684335e-05, "loss": 0.0557, "step": 3470 }, { "epoch": 0.9, "learning_rate": 8.404213865057857e-05, "loss": 0.0849, "step": 3471 }, { "epoch": 0.9, "learning_rate": 8.403171892918164e-05, "loss": 0.0607, "step": 3472 }, { "epoch": 0.9, "learning_rate": 8.402129645349587e-05, "loss": 0.0554, "step": 3473 }, { "epoch": 0.9, "learning_rate": 8.401087122436476e-05, "loss": 0.0647, "step": 3474 }, { "epoch": 0.9, "learning_rate": 8.400044324263207e-05, "loss": 0.0435, "step": 3475 }, { "epoch": 0.9, "learning_rate": 8.399001250914178e-05, "loss": 0.0563, "step": 3476 }, { "epoch": 0.9, "learning_rate": 8.397957902473806e-05, "loss": 0.0618, "step": 3477 }, { "epoch": 0.9, "learning_rate": 8.396914279026532e-05, "loss": 0.0626, "step": 3478 }, { "epoch": 0.9, "learning_rate": 8.395870380656821e-05, "loss": 0.0619, "step": 3479 }, { "epoch": 0.9, "learning_rate": 8.394826207449157e-05, "loss": 0.067, "step": 3480 }, { "epoch": 0.9, "learning_rate": 8.393781759488052e-05, "loss": 0.0659, "step": 3481 }, { "epoch": 0.9, "learning_rate": 8.392737036858033e-05, "loss": 0.0671, "step": 3482 }, { "epoch": 0.9, "learning_rate": 8.391692039643653e-05, "loss": 0.0558, "step": 3483 }, { "epoch": 0.9, "learning_rate": 8.390646767929486e-05, "loss": 0.0569, "step": 3484 }, { "epoch": 0.9, "learning_rate": 8.389601221800131e-05, "loss": 0.0691, "step": 3485 }, { "epoch": 0.9, "learning_rate": 8.388555401340206e-05, "loss": 0.0626, "step": 3486 }, { "epoch": 0.9, "learning_rate": 8.387509306634352e-05, "loss": 0.0861, "step": 3487 }, { "epoch": 0.9, "learning_rate": 8.386462937767234e-05, "loss": 0.0803, "step": 3488 }, { "epoch": 0.9, "learning_rate": 8.385416294823536e-05, "loss": 0.0632, "step": 3489 }, { "epoch": 0.9, "learning_rate": 8.384369377887966e-05, "loss": 0.0342, "step": 3490 }, { "epoch": 0.9, "learning_rate": 8.383322187045257e-05, "loss": 0.0661, "step": 3491 }, { "epoch": 0.9, "learning_rate": 8.382274722380159e-05, "loss": 0.0666, "step": 3492 }, { "epoch": 0.9, "learning_rate": 8.381226983977444e-05, "loss": 0.052, "step": 3493 }, { "epoch": 0.9, "learning_rate": 8.380178971921914e-05, "loss": 0.0649, "step": 3494 }, { "epoch": 0.9, "learning_rate": 8.379130686298385e-05, "loss": 0.0734, "step": 3495 }, { "epoch": 0.9, "learning_rate": 8.378082127191696e-05, "loss": 0.0634, "step": 3496 }, { "epoch": 0.9, "learning_rate": 8.377033294686713e-05, "loss": 0.0581, "step": 3497 }, { "epoch": 0.9, "learning_rate": 8.375984188868319e-05, "loss": 0.0583, "step": 3498 }, { "epoch": 0.9, "learning_rate": 8.374934809821425e-05, "loss": 0.083, "step": 3499 }, { "epoch": 0.9, "learning_rate": 8.373885157630954e-05, "loss": 0.0474, "step": 3500 }, { "epoch": 0.9, "learning_rate": 8.372835232381865e-05, "loss": 0.0744, "step": 3501 }, { "epoch": 0.9, "learning_rate": 8.371785034159125e-05, "loss": 0.0535, "step": 3502 }, { "epoch": 0.9, "learning_rate": 8.370734563047734e-05, "loss": 0.0803, "step": 3503 }, { "epoch": 0.9, "learning_rate": 8.369683819132708e-05, "loss": 0.08, "step": 3504 }, { "epoch": 0.9, "learning_rate": 8.368632802499088e-05, "loss": 0.0688, "step": 3505 }, { "epoch": 0.9, "learning_rate": 8.367581513231932e-05, "loss": 0.064, "step": 3506 }, { "epoch": 0.91, "learning_rate": 8.366529951416331e-05, "loss": 0.074, "step": 3507 }, { "epoch": 0.91, "learning_rate": 8.365478117137384e-05, "loss": 0.0561, "step": 3508 }, { "epoch": 0.91, "learning_rate": 8.364426010480223e-05, "loss": 0.0477, "step": 3509 }, { "epoch": 0.91, "learning_rate": 8.363373631529998e-05, "loss": 0.0887, "step": 3510 }, { "epoch": 0.91, "learning_rate": 8.36232098037188e-05, "loss": 0.0535, "step": 3511 }, { "epoch": 0.91, "learning_rate": 8.361268057091063e-05, "loss": 0.0613, "step": 3512 }, { "epoch": 0.91, "learning_rate": 8.360214861772767e-05, "loss": 0.0681, "step": 3513 }, { "epoch": 0.91, "learning_rate": 8.359161394502224e-05, "loss": 0.0639, "step": 3514 }, { "epoch": 0.91, "learning_rate": 8.358107655364699e-05, "loss": 0.0469, "step": 3515 }, { "epoch": 0.91, "learning_rate": 8.357053644445472e-05, "loss": 0.0581, "step": 3516 }, { "epoch": 0.91, "learning_rate": 8.355999361829847e-05, "loss": 0.0624, "step": 3517 }, { "epoch": 0.91, "learning_rate": 8.354944807603153e-05, "loss": 0.0835, "step": 3518 }, { "epoch": 0.91, "learning_rate": 8.353889981850736e-05, "loss": 0.037, "step": 3519 }, { "epoch": 0.91, "learning_rate": 8.352834884657965e-05, "loss": 0.0442, "step": 3520 }, { "epoch": 0.91, "learning_rate": 8.351779516110236e-05, "loss": 0.0523, "step": 3521 }, { "epoch": 0.91, "learning_rate": 8.350723876292959e-05, "loss": 0.069, "step": 3522 }, { "epoch": 0.91, "learning_rate": 8.349667965291575e-05, "loss": 0.0437, "step": 3523 }, { "epoch": 0.91, "learning_rate": 8.348611783191536e-05, "loss": 0.0492, "step": 3524 }, { "epoch": 0.91, "learning_rate": 8.347555330078327e-05, "loss": 0.0643, "step": 3525 }, { "epoch": 0.91, "learning_rate": 8.346498606037447e-05, "loss": 0.04, "step": 3526 }, { "epoch": 0.91, "learning_rate": 8.34544161115442e-05, "loss": 0.0879, "step": 3527 }, { "epoch": 0.91, "learning_rate": 8.344384345514794e-05, "loss": 0.044, "step": 3528 }, { "epoch": 0.91, "learning_rate": 8.343326809204135e-05, "loss": 0.0541, "step": 3529 }, { "epoch": 0.91, "learning_rate": 8.342269002308032e-05, "loss": 0.0552, "step": 3530 }, { "epoch": 0.91, "learning_rate": 8.341210924912097e-05, "loss": 0.0638, "step": 3531 }, { "epoch": 0.91, "learning_rate": 8.340152577101967e-05, "loss": 0.0525, "step": 3532 }, { "epoch": 0.91, "learning_rate": 8.33909395896329e-05, "loss": 0.0915, "step": 3533 }, { "epoch": 0.91, "learning_rate": 8.338035070581749e-05, "loss": 0.0706, "step": 3534 }, { "epoch": 0.91, "learning_rate": 8.33697591204304e-05, "loss": 0.0707, "step": 3535 }, { "epoch": 0.91, "learning_rate": 8.335916483432886e-05, "loss": 0.0776, "step": 3536 }, { "epoch": 0.91, "learning_rate": 8.334856784837028e-05, "loss": 0.041, "step": 3537 }, { "epoch": 0.91, "learning_rate": 8.33379681634123e-05, "loss": 0.0709, "step": 3538 }, { "epoch": 0.91, "learning_rate": 8.332736578031281e-05, "loss": 0.0446, "step": 3539 }, { "epoch": 0.91, "learning_rate": 8.331676069992989e-05, "loss": 0.0453, "step": 3540 }, { "epoch": 0.91, "learning_rate": 8.330615292312183e-05, "loss": 0.0461, "step": 3541 }, { "epoch": 0.91, "learning_rate": 8.329554245074713e-05, "loss": 0.0665, "step": 3542 }, { "epoch": 0.91, "learning_rate": 8.328492928366454e-05, "loss": 0.0512, "step": 3543 }, { "epoch": 0.91, "learning_rate": 8.327431342273304e-05, "loss": 0.0436, "step": 3544 }, { "epoch": 0.91, "learning_rate": 8.326369486881179e-05, "loss": 0.0645, "step": 3545 }, { "epoch": 0.92, "learning_rate": 8.325307362276018e-05, "loss": 0.0647, "step": 3546 }, { "epoch": 0.92, "learning_rate": 8.32424496854378e-05, "loss": 0.0562, "step": 3547 }, { "epoch": 0.92, "learning_rate": 8.323182305770449e-05, "loss": 0.0696, "step": 3548 }, { "epoch": 0.92, "learning_rate": 8.32211937404203e-05, "loss": 0.043, "step": 3549 }, { "epoch": 0.92, "learning_rate": 8.32105617344455e-05, "loss": 0.0668, "step": 3550 }, { "epoch": 0.92, "learning_rate": 8.319992704064054e-05, "loss": 0.0812, "step": 3551 }, { "epoch": 0.92, "learning_rate": 8.318928965986615e-05, "loss": 0.0961, "step": 3552 }, { "epoch": 0.92, "learning_rate": 8.317864959298325e-05, "loss": 0.0748, "step": 3553 }, { "epoch": 0.92, "learning_rate": 8.316800684085293e-05, "loss": 0.0455, "step": 3554 }, { "epoch": 0.92, "learning_rate": 8.315736140433657e-05, "loss": 0.0542, "step": 3555 }, { "epoch": 0.92, "learning_rate": 8.314671328429574e-05, "loss": 0.0625, "step": 3556 }, { "epoch": 0.92, "learning_rate": 8.31360624815922e-05, "loss": 0.0443, "step": 3557 }, { "epoch": 0.92, "learning_rate": 8.312540899708799e-05, "loss": 0.0684, "step": 3558 }, { "epoch": 0.92, "learning_rate": 8.31147528316453e-05, "loss": 0.0659, "step": 3559 }, { "epoch": 0.92, "learning_rate": 8.310409398612656e-05, "loss": 0.0622, "step": 3560 }, { "epoch": 0.92, "learning_rate": 8.309343246139445e-05, "loss": 0.0611, "step": 3561 }, { "epoch": 0.92, "learning_rate": 8.30827682583118e-05, "loss": 0.0483, "step": 3562 }, { "epoch": 0.92, "learning_rate": 8.307210137774174e-05, "loss": 0.0702, "step": 3563 }, { "epoch": 0.92, "learning_rate": 8.306143182054754e-05, "loss": 0.0413, "step": 3564 }, { "epoch": 0.92, "learning_rate": 8.305075958759273e-05, "loss": 0.0458, "step": 3565 }, { "epoch": 0.92, "learning_rate": 8.304008467974105e-05, "loss": 0.0537, "step": 3566 }, { "epoch": 0.92, "learning_rate": 8.302940709785645e-05, "loss": 0.0799, "step": 3567 }, { "epoch": 0.92, "learning_rate": 8.301872684280309e-05, "loss": 0.0719, "step": 3568 }, { "epoch": 0.92, "learning_rate": 8.300804391544537e-05, "loss": 0.0618, "step": 3569 }, { "epoch": 0.92, "learning_rate": 8.299735831664788e-05, "loss": 0.047, "step": 3570 }, { "epoch": 0.92, "learning_rate": 8.298667004727544e-05, "loss": 0.0522, "step": 3571 }, { "epoch": 0.92, "learning_rate": 8.297597910819308e-05, "loss": 0.0536, "step": 3572 }, { "epoch": 0.92, "learning_rate": 8.296528550026605e-05, "loss": 0.0666, "step": 3573 }, { "epoch": 0.92, "learning_rate": 8.295458922435983e-05, "loss": 0.0562, "step": 3574 }, { "epoch": 0.92, "learning_rate": 8.294389028134009e-05, "loss": 0.0552, "step": 3575 }, { "epoch": 0.92, "learning_rate": 8.293318867207274e-05, "loss": 0.0499, "step": 3576 }, { "epoch": 0.92, "learning_rate": 8.292248439742385e-05, "loss": 0.0454, "step": 3577 }, { "epoch": 0.92, "learning_rate": 8.29117774582598e-05, "loss": 0.0453, "step": 3578 }, { "epoch": 0.92, "learning_rate": 8.290106785544713e-05, "loss": 0.0488, "step": 3579 }, { "epoch": 0.92, "learning_rate": 8.289035558985257e-05, "loss": 0.0658, "step": 3580 }, { "epoch": 0.92, "learning_rate": 8.28796406623431e-05, "loss": 0.0631, "step": 3581 }, { "epoch": 0.92, "learning_rate": 8.286892307378594e-05, "loss": 0.0511, "step": 3582 }, { "epoch": 0.92, "learning_rate": 8.285820282504848e-05, "loss": 0.0584, "step": 3583 }, { "epoch": 0.92, "learning_rate": 8.284747991699833e-05, "loss": 0.0462, "step": 3584 }, { "epoch": 0.93, "learning_rate": 8.283675435050336e-05, "loss": 0.0572, "step": 3585 }, { "epoch": 0.93, "learning_rate": 8.282602612643157e-05, "loss": 0.0614, "step": 3586 }, { "epoch": 0.93, "learning_rate": 8.281529524565126e-05, "loss": 0.0457, "step": 3587 }, { "epoch": 0.93, "learning_rate": 8.280456170903093e-05, "loss": 0.0512, "step": 3588 }, { "epoch": 0.93, "learning_rate": 8.279382551743926e-05, "loss": 0.0636, "step": 3589 }, { "epoch": 0.93, "learning_rate": 8.278308667174514e-05, "loss": 0.0502, "step": 3590 }, { "epoch": 0.93, "learning_rate": 8.277234517281771e-05, "loss": 0.0595, "step": 3591 }, { "epoch": 0.93, "learning_rate": 8.276160102152635e-05, "loss": 0.0682, "step": 3592 }, { "epoch": 0.93, "learning_rate": 8.275085421874056e-05, "loss": 0.0665, "step": 3593 }, { "epoch": 0.93, "learning_rate": 8.274010476533014e-05, "loss": 0.0461, "step": 3594 }, { "epoch": 0.93, "learning_rate": 8.272935266216506e-05, "loss": 0.0412, "step": 3595 }, { "epoch": 0.93, "learning_rate": 8.271859791011553e-05, "loss": 0.0716, "step": 3596 }, { "epoch": 0.93, "learning_rate": 8.270784051005199e-05, "loss": 0.066, "step": 3597 }, { "epoch": 0.93, "learning_rate": 8.269708046284503e-05, "loss": 0.0557, "step": 3598 }, { "epoch": 0.93, "learning_rate": 8.268631776936551e-05, "loss": 0.0673, "step": 3599 }, { "epoch": 0.93, "learning_rate": 8.267555243048446e-05, "loss": 0.0544, "step": 3600 }, { "epoch": 0.93, "learning_rate": 8.26647844470732e-05, "loss": 0.0615, "step": 3601 }, { "epoch": 0.93, "learning_rate": 8.265401382000318e-05, "loss": 0.0738, "step": 3602 }, { "epoch": 0.93, "learning_rate": 8.26432405501461e-05, "loss": 0.0747, "step": 3603 }, { "epoch": 0.93, "learning_rate": 8.263246463837388e-05, "loss": 0.0659, "step": 3604 }, { "epoch": 0.93, "learning_rate": 8.262168608555867e-05, "loss": 0.0913, "step": 3605 }, { "epoch": 0.93, "learning_rate": 8.261090489257278e-05, "loss": 0.0802, "step": 3606 }, { "epoch": 0.93, "learning_rate": 8.260012106028876e-05, "loss": 0.0561, "step": 3607 }, { "epoch": 0.93, "learning_rate": 8.258933458957941e-05, "loss": 0.0471, "step": 3608 }, { "epoch": 0.93, "learning_rate": 8.257854548131768e-05, "loss": 0.0693, "step": 3609 }, { "epoch": 0.93, "learning_rate": 8.256775373637678e-05, "loss": 0.0826, "step": 3610 }, { "epoch": 0.93, "learning_rate": 8.255695935563011e-05, "loss": 0.046, "step": 3611 }, { "epoch": 0.93, "learning_rate": 8.254616233995129e-05, "loss": 0.0562, "step": 3612 }, { "epoch": 0.93, "learning_rate": 8.253536269021416e-05, "loss": 0.0434, "step": 3613 }, { "epoch": 0.93, "learning_rate": 8.25245604072928e-05, "loss": 0.0571, "step": 3614 }, { "epoch": 0.93, "learning_rate": 8.251375549206141e-05, "loss": 0.0667, "step": 3615 }, { "epoch": 0.93, "learning_rate": 8.25029479453945e-05, "loss": 0.0756, "step": 3616 }, { "epoch": 0.93, "learning_rate": 8.249213776816676e-05, "loss": 0.0734, "step": 3617 }, { "epoch": 0.93, "learning_rate": 8.248132496125305e-05, "loss": 0.0578, "step": 3618 }, { "epoch": 0.93, "learning_rate": 8.247050952552856e-05, "loss": 0.0729, "step": 3619 }, { "epoch": 0.93, "learning_rate": 8.245969146186853e-05, "loss": 0.0597, "step": 3620 }, { "epoch": 0.93, "learning_rate": 8.244887077114858e-05, "loss": 0.0771, "step": 3621 }, { "epoch": 0.93, "learning_rate": 8.243804745424438e-05, "loss": 0.053, "step": 3622 }, { "epoch": 0.93, "learning_rate": 8.242722151203196e-05, "loss": 0.0636, "step": 3623 }, { "epoch": 0.94, "learning_rate": 8.241639294538746e-05, "loss": 0.0757, "step": 3624 }, { "epoch": 0.94, "learning_rate": 8.240556175518727e-05, "loss": 0.0625, "step": 3625 }, { "epoch": 0.94, "learning_rate": 8.2394727942308e-05, "loss": 0.0544, "step": 3626 }, { "epoch": 0.94, "learning_rate": 8.238389150762647e-05, "loss": 0.0464, "step": 3627 }, { "epoch": 0.94, "learning_rate": 8.237305245201968e-05, "loss": 0.0481, "step": 3628 }, { "epoch": 0.94, "learning_rate": 8.236221077636489e-05, "loss": 0.0559, "step": 3629 }, { "epoch": 0.94, "learning_rate": 8.235136648153954e-05, "loss": 0.0572, "step": 3630 }, { "epoch": 0.94, "learning_rate": 8.234051956842129e-05, "loss": 0.0508, "step": 3631 }, { "epoch": 0.94, "learning_rate": 8.232967003788802e-05, "loss": 0.0482, "step": 3632 }, { "epoch": 0.94, "learning_rate": 8.231881789081781e-05, "loss": 0.0382, "step": 3633 }, { "epoch": 0.94, "learning_rate": 8.230796312808896e-05, "loss": 0.0492, "step": 3634 }, { "epoch": 0.94, "learning_rate": 8.229710575057997e-05, "loss": 0.0682, "step": 3635 }, { "epoch": 0.94, "learning_rate": 8.228624575916958e-05, "loss": 0.0473, "step": 3636 }, { "epoch": 0.94, "learning_rate": 8.227538315473669e-05, "loss": 0.0606, "step": 3637 }, { "epoch": 0.94, "learning_rate": 8.226451793816048e-05, "loss": 0.0499, "step": 3638 }, { "epoch": 0.94, "learning_rate": 8.225365011032027e-05, "loss": 0.0792, "step": 3639 }, { "epoch": 0.94, "learning_rate": 8.224277967209565e-05, "loss": 0.0584, "step": 3640 }, { "epoch": 0.94, "learning_rate": 8.223190662436637e-05, "loss": 0.0686, "step": 3641 }, { "epoch": 0.94, "learning_rate": 8.222103096801245e-05, "loss": 0.051, "step": 3642 }, { "epoch": 0.94, "learning_rate": 8.221015270391408e-05, "loss": 0.0756, "step": 3643 }, { "epoch": 0.94, "learning_rate": 8.219927183295166e-05, "loss": 0.0584, "step": 3644 }, { "epoch": 0.94, "learning_rate": 8.218838835600583e-05, "loss": 0.0606, "step": 3645 }, { "epoch": 0.94, "learning_rate": 8.21775022739574e-05, "loss": 0.0416, "step": 3646 }, { "epoch": 0.94, "learning_rate": 8.216661358768743e-05, "loss": 0.0507, "step": 3647 }, { "epoch": 0.94, "learning_rate": 8.215572229807716e-05, "loss": 0.0557, "step": 3648 }, { "epoch": 0.94, "learning_rate": 8.214482840600806e-05, "loss": 0.0656, "step": 3649 }, { "epoch": 0.94, "learning_rate": 8.213393191236182e-05, "loss": 0.0658, "step": 3650 }, { "epoch": 0.94, "learning_rate": 8.21230328180203e-05, "loss": 0.0773, "step": 3651 }, { "epoch": 0.94, "learning_rate": 8.211213112386562e-05, "loss": 0.0617, "step": 3652 }, { "epoch": 0.94, "learning_rate": 8.210122683078008e-05, "loss": 0.0496, "step": 3653 }, { "epoch": 0.94, "learning_rate": 8.209031993964618e-05, "loss": 0.0656, "step": 3654 }, { "epoch": 0.94, "learning_rate": 8.207941045134666e-05, "loss": 0.063, "step": 3655 }, { "epoch": 0.94, "learning_rate": 8.206849836676447e-05, "loss": 0.0814, "step": 3656 }, { "epoch": 0.94, "learning_rate": 8.205758368678274e-05, "loss": 0.0826, "step": 3657 }, { "epoch": 0.94, "learning_rate": 8.204666641228482e-05, "loss": 0.0536, "step": 3658 }, { "epoch": 0.94, "learning_rate": 8.203574654415432e-05, "loss": 0.0849, "step": 3659 }, { "epoch": 0.94, "learning_rate": 8.202482408327497e-05, "loss": 0.0602, "step": 3660 }, { "epoch": 0.94, "learning_rate": 8.201389903053076e-05, "loss": 0.0489, "step": 3661 }, { "epoch": 0.95, "learning_rate": 8.200297138680593e-05, "loss": 0.0655, "step": 3662 }, { "epoch": 0.95, "learning_rate": 8.199204115298487e-05, "loss": 0.0683, "step": 3663 }, { "epoch": 0.95, "learning_rate": 8.198110832995216e-05, "loss": 0.0386, "step": 3664 }, { "epoch": 0.95, "learning_rate": 8.197017291859266e-05, "loss": 0.068, "step": 3665 }, { "epoch": 0.95, "learning_rate": 8.19592349197914e-05, "loss": 0.0437, "step": 3666 }, { "epoch": 0.95, "learning_rate": 8.194829433443363e-05, "loss": 0.0697, "step": 3667 }, { "epoch": 0.95, "learning_rate": 8.193735116340479e-05, "loss": 0.0617, "step": 3668 }, { "epoch": 0.95, "learning_rate": 8.192640540759056e-05, "loss": 0.0676, "step": 3669 }, { "epoch": 0.95, "learning_rate": 8.19154570678768e-05, "loss": 0.0537, "step": 3670 }, { "epoch": 0.95, "learning_rate": 8.190450614514959e-05, "loss": 0.0924, "step": 3671 }, { "epoch": 0.95, "learning_rate": 8.189355264029523e-05, "loss": 0.059, "step": 3672 }, { "epoch": 0.95, "learning_rate": 8.188259655420023e-05, "loss": 0.0725, "step": 3673 }, { "epoch": 0.95, "learning_rate": 8.187163788775128e-05, "loss": 0.0408, "step": 3674 }, { "epoch": 0.95, "learning_rate": 8.186067664183529e-05, "loss": 0.0859, "step": 3675 }, { "epoch": 0.95, "learning_rate": 8.18497128173394e-05, "loss": 0.059, "step": 3676 }, { "epoch": 0.95, "learning_rate": 8.183874641515096e-05, "loss": 0.0753, "step": 3677 }, { "epoch": 0.95, "learning_rate": 8.18277774361575e-05, "loss": 0.0577, "step": 3678 }, { "epoch": 0.95, "learning_rate": 8.181680588124676e-05, "loss": 0.0547, "step": 3679 }, { "epoch": 0.95, "learning_rate": 8.180583175130671e-05, "loss": 0.0694, "step": 3680 }, { "epoch": 0.95, "learning_rate": 8.179485504722552e-05, "loss": 0.073, "step": 3681 }, { "epoch": 0.95, "learning_rate": 8.178387576989158e-05, "loss": 0.064, "step": 3682 }, { "epoch": 0.95, "learning_rate": 8.177289392019346e-05, "loss": 0.0815, "step": 3683 }, { "epoch": 0.95, "learning_rate": 8.176190949901997e-05, "loss": 0.045, "step": 3684 }, { "epoch": 0.95, "learning_rate": 8.175092250726008e-05, "loss": 0.05, "step": 3685 }, { "epoch": 0.95, "learning_rate": 8.173993294580302e-05, "loss": 0.0476, "step": 3686 }, { "epoch": 0.95, "learning_rate": 8.172894081553823e-05, "loss": 0.0606, "step": 3687 }, { "epoch": 0.95, "learning_rate": 8.171794611735531e-05, "loss": 0.0776, "step": 3688 }, { "epoch": 0.95, "learning_rate": 8.170694885214407e-05, "loss": 0.0592, "step": 3689 }, { "epoch": 0.95, "learning_rate": 8.16959490207946e-05, "loss": 0.0447, "step": 3690 }, { "epoch": 0.95, "learning_rate": 8.168494662419715e-05, "loss": 0.0556, "step": 3691 }, { "epoch": 0.95, "learning_rate": 8.167394166324211e-05, "loss": 0.0654, "step": 3692 }, { "epoch": 0.95, "learning_rate": 8.166293413882023e-05, "loss": 0.0733, "step": 3693 }, { "epoch": 0.95, "learning_rate": 8.165192405182233e-05, "loss": 0.0547, "step": 3694 }, { "epoch": 0.95, "learning_rate": 8.164091140313949e-05, "loss": 0.0791, "step": 3695 }, { "epoch": 0.95, "learning_rate": 8.162989619366302e-05, "loss": 0.0691, "step": 3696 }, { "epoch": 0.95, "learning_rate": 8.161887842428442e-05, "loss": 0.0713, "step": 3697 }, { "epoch": 0.95, "learning_rate": 8.160785809589534e-05, "loss": 0.0778, "step": 3698 }, { "epoch": 0.95, "learning_rate": 8.159683520938773e-05, "loss": 0.0741, "step": 3699 }, { "epoch": 0.95, "learning_rate": 8.158580976565371e-05, "loss": 0.0662, "step": 3700 }, { "epoch": 0.96, "learning_rate": 8.157478176558558e-05, "loss": 0.0693, "step": 3701 }, { "epoch": 0.96, "learning_rate": 8.156375121007589e-05, "loss": 0.0502, "step": 3702 }, { "epoch": 0.96, "learning_rate": 8.155271810001735e-05, "loss": 0.0583, "step": 3703 }, { "epoch": 0.96, "learning_rate": 8.154168243630291e-05, "loss": 0.0634, "step": 3704 }, { "epoch": 0.96, "learning_rate": 8.153064421982573e-05, "loss": 0.0744, "step": 3705 }, { "epoch": 0.96, "learning_rate": 8.151960345147918e-05, "loss": 0.0507, "step": 3706 }, { "epoch": 0.96, "learning_rate": 8.150856013215677e-05, "loss": 0.0648, "step": 3707 }, { "epoch": 0.96, "learning_rate": 8.149751426275233e-05, "loss": 0.0511, "step": 3708 }, { "epoch": 0.96, "learning_rate": 8.14864658441598e-05, "loss": 0.0809, "step": 3709 }, { "epoch": 0.96, "learning_rate": 8.147541487727337e-05, "loss": 0.0719, "step": 3710 }, { "epoch": 0.96, "learning_rate": 8.146436136298743e-05, "loss": 0.08, "step": 3711 }, { "epoch": 0.96, "learning_rate": 8.145330530219656e-05, "loss": 0.0604, "step": 3712 }, { "epoch": 0.96, "learning_rate": 8.144224669579558e-05, "loss": 0.055, "step": 3713 }, { "epoch": 0.96, "learning_rate": 8.14311855446795e-05, "loss": 0.0532, "step": 3714 }, { "epoch": 0.96, "learning_rate": 8.14201218497435e-05, "loss": 0.0522, "step": 3715 }, { "epoch": 0.96, "learning_rate": 8.140905561188304e-05, "loss": 0.0583, "step": 3716 }, { "epoch": 0.96, "learning_rate": 8.139798683199369e-05, "loss": 0.0678, "step": 3717 }, { "epoch": 0.96, "learning_rate": 8.138691551097132e-05, "loss": 0.0539, "step": 3718 }, { "epoch": 0.96, "learning_rate": 8.137584164971199e-05, "loss": 0.0592, "step": 3719 }, { "epoch": 0.96, "learning_rate": 8.136476524911187e-05, "loss": 0.0726, "step": 3720 }, { "epoch": 0.96, "learning_rate": 8.135368631006746e-05, "loss": 0.0621, "step": 3721 }, { "epoch": 0.96, "learning_rate": 8.134260483347537e-05, "loss": 0.0489, "step": 3722 }, { "epoch": 0.96, "learning_rate": 8.133152082023252e-05, "loss": 0.0517, "step": 3723 }, { "epoch": 0.96, "learning_rate": 8.13204342712359e-05, "loss": 0.0633, "step": 3724 }, { "epoch": 0.96, "learning_rate": 8.130934518738282e-05, "loss": 0.0466, "step": 3725 }, { "epoch": 0.96, "learning_rate": 8.129825356957074e-05, "loss": 0.0321, "step": 3726 }, { "epoch": 0.96, "learning_rate": 8.128715941869738e-05, "loss": 0.0577, "step": 3727 }, { "epoch": 0.96, "learning_rate": 8.127606273566055e-05, "loss": 0.0819, "step": 3728 }, { "epoch": 0.96, "learning_rate": 8.126496352135838e-05, "loss": 0.0483, "step": 3729 }, { "epoch": 0.96, "learning_rate": 8.125386177668914e-05, "loss": 0.075, "step": 3730 }, { "epoch": 0.96, "learning_rate": 8.124275750255137e-05, "loss": 0.0513, "step": 3731 }, { "epoch": 0.96, "learning_rate": 8.123165069984373e-05, "loss": 0.0608, "step": 3732 }, { "epoch": 0.96, "learning_rate": 8.122054136946514e-05, "loss": 0.0721, "step": 3733 }, { "epoch": 0.96, "learning_rate": 8.120942951231471e-05, "loss": 0.0381, "step": 3734 }, { "epoch": 0.96, "learning_rate": 8.119831512929177e-05, "loss": 0.0634, "step": 3735 }, { "epoch": 0.96, "learning_rate": 8.118719822129583e-05, "loss": 0.0577, "step": 3736 }, { "epoch": 0.96, "learning_rate": 8.117607878922662e-05, "loss": 0.0557, "step": 3737 }, { "epoch": 0.96, "learning_rate": 8.116495683398404e-05, "loss": 0.0436, "step": 3738 }, { "epoch": 0.96, "learning_rate": 8.115383235646826e-05, "loss": 0.0557, "step": 3739 }, { "epoch": 0.97, "learning_rate": 8.114270535757961e-05, "loss": 0.0615, "step": 3740 }, { "epoch": 0.97, "learning_rate": 8.113157583821861e-05, "loss": 0.0386, "step": 3741 }, { "epoch": 0.97, "learning_rate": 8.112044379928603e-05, "loss": 0.0958, "step": 3742 }, { "epoch": 0.97, "learning_rate": 8.11093092416828e-05, "loss": 0.0499, "step": 3743 }, { "epoch": 0.97, "learning_rate": 8.109817216631009e-05, "loss": 0.059, "step": 3744 }, { "epoch": 0.97, "learning_rate": 8.108703257406925e-05, "loss": 0.0645, "step": 3745 }, { "epoch": 0.97, "learning_rate": 8.107589046586182e-05, "loss": 0.0685, "step": 3746 }, { "epoch": 0.97, "learning_rate": 8.10647458425896e-05, "loss": 0.0578, "step": 3747 }, { "epoch": 0.97, "learning_rate": 8.105359870515453e-05, "loss": 0.0332, "step": 3748 }, { "epoch": 0.97, "learning_rate": 8.104244905445881e-05, "loss": 0.0521, "step": 3749 }, { "epoch": 0.97, "learning_rate": 8.103129689140479e-05, "loss": 0.034, "step": 3750 }, { "epoch": 0.97, "learning_rate": 8.102014221689505e-05, "loss": 0.0594, "step": 3751 }, { "epoch": 0.97, "learning_rate": 8.100898503183238e-05, "loss": 0.0691, "step": 3752 }, { "epoch": 0.97, "learning_rate": 8.099782533711977e-05, "loss": 0.0571, "step": 3753 }, { "epoch": 0.97, "learning_rate": 8.098666313366038e-05, "loss": 0.054, "step": 3754 }, { "epoch": 0.97, "learning_rate": 8.097549842235763e-05, "loss": 0.0626, "step": 3755 }, { "epoch": 0.97, "learning_rate": 8.09643312041151e-05, "loss": 0.0587, "step": 3756 }, { "epoch": 0.97, "learning_rate": 8.095316147983658e-05, "loss": 0.0645, "step": 3757 }, { "epoch": 0.97, "learning_rate": 8.094198925042609e-05, "loss": 0.0832, "step": 3758 }, { "epoch": 0.97, "learning_rate": 8.093081451678781e-05, "loss": 0.0728, "step": 3759 }, { "epoch": 0.97, "learning_rate": 8.091963727982616e-05, "loss": 0.0469, "step": 3760 }, { "epoch": 0.97, "learning_rate": 8.090845754044574e-05, "loss": 0.0447, "step": 3761 }, { "epoch": 0.97, "learning_rate": 8.089727529955137e-05, "loss": 0.0843, "step": 3762 }, { "epoch": 0.97, "learning_rate": 8.088609055804804e-05, "loss": 0.0695, "step": 3763 }, { "epoch": 0.97, "learning_rate": 8.087490331684098e-05, "loss": 0.0563, "step": 3764 }, { "epoch": 0.97, "learning_rate": 8.086371357683562e-05, "loss": 0.0477, "step": 3765 }, { "epoch": 0.97, "learning_rate": 8.085252133893753e-05, "loss": 0.0567, "step": 3766 }, { "epoch": 0.97, "learning_rate": 8.084132660405261e-05, "loss": 0.0762, "step": 3767 }, { "epoch": 0.97, "learning_rate": 8.08301293730868e-05, "loss": 0.0626, "step": 3768 }, { "epoch": 0.97, "learning_rate": 8.081892964694639e-05, "loss": 0.0577, "step": 3769 }, { "epoch": 0.97, "learning_rate": 8.080772742653778e-05, "loss": 0.0622, "step": 3770 }, { "epoch": 0.97, "learning_rate": 8.079652271276759e-05, "loss": 0.0748, "step": 3771 }, { "epoch": 0.97, "learning_rate": 8.078531550654265e-05, "loss": 0.0436, "step": 3772 }, { "epoch": 0.97, "learning_rate": 8.077410580877003e-05, "loss": 0.0369, "step": 3773 }, { "epoch": 0.97, "learning_rate": 8.076289362035692e-05, "loss": 0.0777, "step": 3774 }, { "epoch": 0.97, "learning_rate": 8.075167894221079e-05, "loss": 0.0603, "step": 3775 }, { "epoch": 0.97, "learning_rate": 8.074046177523927e-05, "loss": 0.0514, "step": 3776 }, { "epoch": 0.97, "learning_rate": 8.072924212035016e-05, "loss": 0.0432, "step": 3777 }, { "epoch": 0.97, "learning_rate": 8.071801997845155e-05, "loss": 0.0486, "step": 3778 }, { "epoch": 0.98, "learning_rate": 8.070679535045165e-05, "loss": 0.0619, "step": 3779 }, { "epoch": 0.98, "learning_rate": 8.069556823725895e-05, "loss": 0.0658, "step": 3780 }, { "epoch": 0.98, "learning_rate": 8.068433863978202e-05, "loss": 0.0556, "step": 3781 }, { "epoch": 0.98, "learning_rate": 8.067310655892976e-05, "loss": 0.0637, "step": 3782 }, { "epoch": 0.98, "learning_rate": 8.06618719956112e-05, "loss": 0.062, "step": 3783 }, { "epoch": 0.98, "learning_rate": 8.06506349507356e-05, "loss": 0.0588, "step": 3784 }, { "epoch": 0.98, "learning_rate": 8.063939542521237e-05, "loss": 0.0635, "step": 3785 }, { "epoch": 0.98, "learning_rate": 8.06281534199512e-05, "loss": 0.0623, "step": 3786 }, { "epoch": 0.98, "learning_rate": 8.061690893586191e-05, "loss": 0.0449, "step": 3787 }, { "epoch": 0.98, "learning_rate": 8.06056619738546e-05, "loss": 0.0497, "step": 3788 }, { "epoch": 0.98, "learning_rate": 8.059441253483944e-05, "loss": 0.047, "step": 3789 }, { "epoch": 0.98, "learning_rate": 8.058316061972693e-05, "loss": 0.0762, "step": 3790 }, { "epoch": 0.98, "learning_rate": 8.057190622942772e-05, "loss": 0.0481, "step": 3791 }, { "epoch": 0.98, "learning_rate": 8.056064936485266e-05, "loss": 0.0659, "step": 3792 }, { "epoch": 0.98, "learning_rate": 8.05493900269128e-05, "loss": 0.0527, "step": 3793 }, { "epoch": 0.98, "learning_rate": 8.053812821651939e-05, "loss": 0.0649, "step": 3794 }, { "epoch": 0.98, "learning_rate": 8.052686393458387e-05, "loss": 0.0399, "step": 3795 }, { "epoch": 0.98, "learning_rate": 8.05155971820179e-05, "loss": 0.0523, "step": 3796 }, { "epoch": 0.98, "learning_rate": 8.050432795973336e-05, "loss": 0.0528, "step": 3797 }, { "epoch": 0.98, "learning_rate": 8.049305626864227e-05, "loss": 0.0569, "step": 3798 }, { "epoch": 0.98, "learning_rate": 8.048178210965687e-05, "loss": 0.0858, "step": 3799 }, { "epoch": 0.98, "learning_rate": 8.047050548368964e-05, "loss": 0.0705, "step": 3800 }, { "epoch": 0.98, "learning_rate": 8.045922639165324e-05, "loss": 0.0617, "step": 3801 }, { "epoch": 0.98, "learning_rate": 8.044794483446048e-05, "loss": 0.0839, "step": 3802 }, { "epoch": 0.98, "learning_rate": 8.043666081302443e-05, "loss": 0.069, "step": 3803 }, { "epoch": 0.98, "learning_rate": 8.042537432825836e-05, "loss": 0.075, "step": 3804 }, { "epoch": 0.98, "learning_rate": 8.041408538107568e-05, "loss": 0.0717, "step": 3805 }, { "epoch": 0.98, "learning_rate": 8.040279397239009e-05, "loss": 0.0721, "step": 3806 }, { "epoch": 0.98, "learning_rate": 8.03915001031154e-05, "loss": 0.0511, "step": 3807 }, { "epoch": 0.98, "learning_rate": 8.038020377416565e-05, "loss": 0.0703, "step": 3808 }, { "epoch": 0.98, "learning_rate": 8.036890498645508e-05, "loss": 0.0591, "step": 3809 }, { "epoch": 0.98, "learning_rate": 8.03576037408982e-05, "loss": 0.0635, "step": 3810 }, { "epoch": 0.98, "learning_rate": 8.034630003840957e-05, "loss": 0.0667, "step": 3811 }, { "epoch": 0.98, "learning_rate": 8.033499387990408e-05, "loss": 0.0802, "step": 3812 }, { "epoch": 0.98, "learning_rate": 8.032368526629676e-05, "loss": 0.0757, "step": 3813 }, { "epoch": 0.98, "learning_rate": 8.031237419850289e-05, "loss": 0.0663, "step": 3814 }, { "epoch": 0.98, "learning_rate": 8.030106067743781e-05, "loss": 0.0495, "step": 3815 }, { "epoch": 0.98, "learning_rate": 8.028974470401725e-05, "loss": 0.0836, "step": 3816 }, { "epoch": 0.99, "learning_rate": 8.0278426279157e-05, "loss": 0.0671, "step": 3817 }, { "epoch": 0.99, "learning_rate": 8.026710540377311e-05, "loss": 0.0392, "step": 3818 }, { "epoch": 0.99, "learning_rate": 8.025578207878182e-05, "loss": 0.0454, "step": 3819 }, { "epoch": 0.99, "learning_rate": 8.024445630509954e-05, "loss": 0.0525, "step": 3820 }, { "epoch": 0.99, "learning_rate": 8.023312808364291e-05, "loss": 0.0554, "step": 3821 }, { "epoch": 0.99, "learning_rate": 8.022179741532874e-05, "loss": 0.0464, "step": 3822 }, { "epoch": 0.99, "learning_rate": 8.02104643010741e-05, "loss": 0.0635, "step": 3823 }, { "epoch": 0.99, "learning_rate": 8.019912874179616e-05, "loss": 0.0873, "step": 3824 }, { "epoch": 0.99, "learning_rate": 8.018779073841237e-05, "loss": 0.058, "step": 3825 }, { "epoch": 0.99, "learning_rate": 8.017645029184034e-05, "loss": 0.0473, "step": 3826 }, { "epoch": 0.99, "learning_rate": 8.01651074029979e-05, "loss": 0.0521, "step": 3827 }, { "epoch": 0.99, "learning_rate": 8.015376207280305e-05, "loss": 0.0509, "step": 3828 }, { "epoch": 0.99, "learning_rate": 8.014241430217398e-05, "loss": 0.0673, "step": 3829 }, { "epoch": 0.99, "learning_rate": 8.013106409202915e-05, "loss": 0.0637, "step": 3830 }, { "epoch": 0.99, "learning_rate": 8.011971144328712e-05, "loss": 0.0551, "step": 3831 }, { "epoch": 0.99, "learning_rate": 8.010835635686673e-05, "loss": 0.0708, "step": 3832 }, { "epoch": 0.99, "learning_rate": 8.009699883368696e-05, "loss": 0.0692, "step": 3833 }, { "epoch": 0.99, "learning_rate": 8.0085638874667e-05, "loss": 0.0526, "step": 3834 }, { "epoch": 0.99, "learning_rate": 8.007427648072626e-05, "loss": 0.0671, "step": 3835 }, { "epoch": 0.99, "learning_rate": 8.006291165278434e-05, "loss": 0.0585, "step": 3836 }, { "epoch": 0.99, "learning_rate": 8.005154439176102e-05, "loss": 0.0496, "step": 3837 }, { "epoch": 0.99, "learning_rate": 8.004017469857627e-05, "loss": 0.0475, "step": 3838 }, { "epoch": 0.99, "learning_rate": 8.002880257415031e-05, "loss": 0.0805, "step": 3839 }, { "epoch": 0.99, "learning_rate": 8.001742801940349e-05, "loss": 0.061, "step": 3840 }, { "epoch": 0.99, "learning_rate": 8.000605103525642e-05, "loss": 0.0593, "step": 3841 }, { "epoch": 0.99, "learning_rate": 7.999467162262982e-05, "loss": 0.061, "step": 3842 }, { "epoch": 0.99, "learning_rate": 7.998328978244469e-05, "loss": 0.0488, "step": 3843 }, { "epoch": 0.99, "learning_rate": 7.997190551562221e-05, "loss": 0.0373, "step": 3844 }, { "epoch": 0.99, "learning_rate": 7.996051882308372e-05, "loss": 0.0577, "step": 3845 }, { "epoch": 0.99, "learning_rate": 7.994912970575081e-05, "loss": 0.0434, "step": 3846 }, { "epoch": 0.99, "learning_rate": 7.99377381645452e-05, "loss": 0.0737, "step": 3847 }, { "epoch": 0.99, "learning_rate": 7.992634420038885e-05, "loss": 0.0582, "step": 3848 }, { "epoch": 0.99, "learning_rate": 7.991494781420392e-05, "loss": 0.0743, "step": 3849 }, { "epoch": 0.99, "learning_rate": 7.990354900691274e-05, "loss": 0.0578, "step": 3850 }, { "epoch": 0.99, "learning_rate": 7.989214777943786e-05, "loss": 0.0433, "step": 3851 }, { "epoch": 0.99, "learning_rate": 7.9880744132702e-05, "loss": 0.0592, "step": 3852 }, { "epoch": 0.99, "learning_rate": 7.986933806762811e-05, "loss": 0.0542, "step": 3853 }, { "epoch": 0.99, "learning_rate": 7.985792958513931e-05, "loss": 0.0701, "step": 3854 }, { "epoch": 0.99, "learning_rate": 7.984651868615893e-05, "loss": 0.0542, "step": 3855 }, { "epoch": 1.0, "learning_rate": 7.983510537161047e-05, "loss": 0.0493, "step": 3856 }, { "epoch": 1.0, "learning_rate": 7.982368964241765e-05, "loss": 0.0776, "step": 3857 }, { "epoch": 1.0, "learning_rate": 7.981227149950441e-05, "loss": 0.0672, "step": 3858 }, { "epoch": 1.0, "learning_rate": 7.98008509437948e-05, "loss": 0.044, "step": 3859 }, { "epoch": 1.0, "learning_rate": 7.978942797621316e-05, "loss": 0.0654, "step": 3860 }, { "epoch": 1.0, "learning_rate": 7.977800259768397e-05, "loss": 0.0661, "step": 3861 }, { "epoch": 1.0, "learning_rate": 7.976657480913193e-05, "loss": 0.0718, "step": 3862 }, { "epoch": 1.0, "learning_rate": 7.97551446114819e-05, "loss": 0.0475, "step": 3863 }, { "epoch": 1.0, "learning_rate": 7.974371200565898e-05, "loss": 0.069, "step": 3864 }, { "epoch": 1.0, "learning_rate": 7.973227699258845e-05, "loss": 0.0814, "step": 3865 }, { "epoch": 1.0, "learning_rate": 7.972083957319577e-05, "loss": 0.0549, "step": 3866 }, { "epoch": 1.0, "learning_rate": 7.970939974840661e-05, "loss": 0.0358, "step": 3867 }, { "epoch": 1.0, "learning_rate": 7.969795751914684e-05, "loss": 0.0608, "step": 3868 }, { "epoch": 1.0, "learning_rate": 7.968651288634248e-05, "loss": 0.0586, "step": 3869 }, { "epoch": 1.0, "learning_rate": 7.96750658509198e-05, "loss": 0.0684, "step": 3870 }, { "epoch": 1.0, "learning_rate": 7.966361641380525e-05, "loss": 0.0631, "step": 3871 }, { "epoch": 1.0, "learning_rate": 7.965216457592545e-05, "loss": 0.0505, "step": 3872 }, { "epoch": 1.0, "learning_rate": 7.964071033820724e-05, "loss": 0.0743, "step": 3873 }, { "epoch": 1.0, "learning_rate": 7.962925370157766e-05, "loss": 0.0711, "step": 3874 }, { "epoch": 1.0, "learning_rate": 7.961779466696391e-05, "loss": 0.05, "step": 3875 }, { "epoch": 1.0, "learning_rate": 7.96063332352934e-05, "loss": 0.0512, "step": 3876 }, { "epoch": 1.0, "learning_rate": 7.959486940749375e-05, "loss": 0.0513, "step": 3877 }, { "epoch": 1.0, "learning_rate": 7.958340318449276e-05, "loss": 0.0449, "step": 3878 }, { "epoch": 1.0, "learning_rate": 7.957193456721843e-05, "loss": 0.0604, "step": 3879 }, { "epoch": 1.0, "learning_rate": 7.956046355659895e-05, "loss": 0.0679, "step": 3880 }, { "epoch": 1.0, "learning_rate": 7.95489901535627e-05, "loss": 0.0607, "step": 3881 }, { "epoch": 1.0, "learning_rate": 7.953751435903824e-05, "loss": 0.0504, "step": 3882 }, { "epoch": 1.0, "learning_rate": 7.952603617395436e-05, "loss": 0.0454, "step": 3883 }, { "epoch": 1.0, "learning_rate": 7.951455559924002e-05, "loss": 0.0401, "step": 3884 }, { "epoch": 1.0, "learning_rate": 7.950307263582439e-05, "loss": 0.063, "step": 3885 }, { "epoch": 1.0, "learning_rate": 7.949158728463679e-05, "loss": 0.0399, "step": 3886 }, { "epoch": 1.0, "learning_rate": 7.948009954660678e-05, "loss": 0.0365, "step": 3887 }, { "epoch": 1.0, "learning_rate": 7.94686094226641e-05, "loss": 0.0449, "step": 3888 }, { "epoch": 1.0, "learning_rate": 7.94571169137387e-05, "loss": 0.0331, "step": 3889 }, { "epoch": 1.0, "learning_rate": 7.944562202076065e-05, "loss": 0.0518, "step": 3890 }, { "epoch": 1.0, "learning_rate": 7.943412474466029e-05, "loss": 0.0399, "step": 3891 }, { "epoch": 1.0, "learning_rate": 7.942262508636818e-05, "loss": 0.0399, "step": 3892 }, { "epoch": 1.0, "learning_rate": 7.941112304681495e-05, "loss": 0.0422, "step": 3893 }, { "epoch": 1.0, "learning_rate": 7.939961862693153e-05, "loss": 0.0525, "step": 3894 }, { "epoch": 1.01, "learning_rate": 7.938811182764898e-05, "loss": 0.0512, "step": 3895 }, { "epoch": 1.01, "learning_rate": 7.937660264989862e-05, "loss": 0.0511, "step": 3896 }, { "epoch": 1.01, "learning_rate": 7.93650910946119e-05, "loss": 0.0444, "step": 3897 }, { "epoch": 1.01, "learning_rate": 7.93535771627205e-05, "loss": 0.065, "step": 3898 }, { "epoch": 1.01, "learning_rate": 7.934206085515623e-05, "loss": 0.0337, "step": 3899 }, { "epoch": 1.01, "learning_rate": 7.933054217285118e-05, "loss": 0.0463, "step": 3900 }, { "epoch": 1.01, "learning_rate": 7.93190211167376e-05, "loss": 0.0456, "step": 3901 }, { "epoch": 1.01, "learning_rate": 7.93074976877479e-05, "loss": 0.0419, "step": 3902 }, { "epoch": 1.01, "learning_rate": 7.92959718868147e-05, "loss": 0.0479, "step": 3903 }, { "epoch": 1.01, "learning_rate": 7.928444371487084e-05, "loss": 0.042, "step": 3904 }, { "epoch": 1.01, "learning_rate": 7.927291317284933e-05, "loss": 0.0589, "step": 3905 }, { "epoch": 1.01, "learning_rate": 7.926138026168335e-05, "loss": 0.0588, "step": 3906 }, { "epoch": 1.01, "learning_rate": 7.92498449823063e-05, "loss": 0.0439, "step": 3907 }, { "epoch": 1.01, "learning_rate": 7.923830733565179e-05, "loss": 0.0451, "step": 3908 }, { "epoch": 1.01, "learning_rate": 7.922676732265355e-05, "loss": 0.039, "step": 3909 }, { "epoch": 1.01, "learning_rate": 7.92152249442456e-05, "loss": 0.042, "step": 3910 }, { "epoch": 1.01, "learning_rate": 7.920368020136204e-05, "loss": 0.0353, "step": 3911 }, { "epoch": 1.01, "learning_rate": 7.919213309493728e-05, "loss": 0.048, "step": 3912 }, { "epoch": 1.01, "learning_rate": 7.91805836259058e-05, "loss": 0.0488, "step": 3913 }, { "epoch": 1.01, "learning_rate": 7.91690317952024e-05, "loss": 0.0498, "step": 3914 }, { "epoch": 1.01, "learning_rate": 7.915747760376197e-05, "loss": 0.0447, "step": 3915 }, { "epoch": 1.01, "learning_rate": 7.914592105251962e-05, "loss": 0.055, "step": 3916 }, { "epoch": 1.01, "learning_rate": 7.913436214241066e-05, "loss": 0.0412, "step": 3917 }, { "epoch": 1.01, "learning_rate": 7.912280087437058e-05, "loss": 0.0393, "step": 3918 }, { "epoch": 1.01, "learning_rate": 7.911123724933511e-05, "loss": 0.0519, "step": 3919 }, { "epoch": 1.01, "learning_rate": 7.909967126824007e-05, "loss": 0.0388, "step": 3920 }, { "epoch": 1.01, "learning_rate": 7.908810293202157e-05, "loss": 0.0581, "step": 3921 }, { "epoch": 1.01, "learning_rate": 7.907653224161585e-05, "loss": 0.049, "step": 3922 }, { "epoch": 1.01, "learning_rate": 7.906495919795938e-05, "loss": 0.0381, "step": 3923 }, { "epoch": 1.01, "learning_rate": 7.905338380198877e-05, "loss": 0.0284, "step": 3924 }, { "epoch": 1.01, "learning_rate": 7.904180605464087e-05, "loss": 0.0404, "step": 3925 }, { "epoch": 1.01, "learning_rate": 7.903022595685272e-05, "loss": 0.0569, "step": 3926 }, { "epoch": 1.01, "learning_rate": 7.90186435095615e-05, "loss": 0.0377, "step": 3927 }, { "epoch": 1.01, "learning_rate": 7.900705871370464e-05, "loss": 0.0347, "step": 3928 }, { "epoch": 1.01, "learning_rate": 7.899547157021971e-05, "loss": 0.0523, "step": 3929 }, { "epoch": 1.01, "learning_rate": 7.898388208004449e-05, "loss": 0.0433, "step": 3930 }, { "epoch": 1.01, "learning_rate": 7.897229024411696e-05, "loss": 0.0269, "step": 3931 }, { "epoch": 1.01, "learning_rate": 7.89606960633753e-05, "loss": 0.0425, "step": 3932 }, { "epoch": 1.01, "learning_rate": 7.894909953875784e-05, "loss": 0.0348, "step": 3933 }, { "epoch": 1.02, "learning_rate": 7.893750067120314e-05, "loss": 0.0346, "step": 3934 }, { "epoch": 1.02, "learning_rate": 7.89258994616499e-05, "loss": 0.0386, "step": 3935 }, { "epoch": 1.02, "learning_rate": 7.891429591103707e-05, "loss": 0.0416, "step": 3936 }, { "epoch": 1.02, "learning_rate": 7.890269002030375e-05, "loss": 0.0521, "step": 3937 }, { "epoch": 1.02, "learning_rate": 7.889108179038923e-05, "loss": 0.048, "step": 3938 }, { "epoch": 1.02, "learning_rate": 7.887947122223302e-05, "loss": 0.0379, "step": 3939 }, { "epoch": 1.02, "learning_rate": 7.886785831677477e-05, "loss": 0.0332, "step": 3940 }, { "epoch": 1.02, "learning_rate": 7.885624307495436e-05, "loss": 0.028, "step": 3941 }, { "epoch": 1.02, "learning_rate": 7.884462549771186e-05, "loss": 0.0374, "step": 3942 }, { "epoch": 1.02, "learning_rate": 7.88330055859875e-05, "loss": 0.0399, "step": 3943 }, { "epoch": 1.02, "learning_rate": 7.882138334072174e-05, "loss": 0.0515, "step": 3944 }, { "epoch": 1.02, "learning_rate": 7.880975876285516e-05, "loss": 0.0362, "step": 3945 }, { "epoch": 1.02, "learning_rate": 7.879813185332859e-05, "loss": 0.0354, "step": 3946 }, { "epoch": 1.02, "learning_rate": 7.878650261308303e-05, "loss": 0.0481, "step": 3947 }, { "epoch": 1.02, "learning_rate": 7.877487104305969e-05, "loss": 0.0369, "step": 3948 }, { "epoch": 1.02, "learning_rate": 7.876323714419993e-05, "loss": 0.0468, "step": 3949 }, { "epoch": 1.02, "learning_rate": 7.875160091744531e-05, "loss": 0.0333, "step": 3950 }, { "epoch": 1.02, "learning_rate": 7.87399623637376e-05, "loss": 0.046, "step": 3951 }, { "epoch": 1.02, "learning_rate": 7.872832148401872e-05, "loss": 0.0337, "step": 3952 }, { "epoch": 1.02, "learning_rate": 7.871667827923083e-05, "loss": 0.0475, "step": 3953 }, { "epoch": 1.02, "learning_rate": 7.870503275031623e-05, "loss": 0.0468, "step": 3954 }, { "epoch": 1.02, "learning_rate": 7.869338489821742e-05, "loss": 0.0438, "step": 3955 }, { "epoch": 1.02, "learning_rate": 7.868173472387712e-05, "loss": 0.0354, "step": 3956 }, { "epoch": 1.02, "learning_rate": 7.86700822282382e-05, "loss": 0.0393, "step": 3957 }, { "epoch": 1.02, "learning_rate": 7.865842741224375e-05, "loss": 0.0447, "step": 3958 }, { "epoch": 1.02, "learning_rate": 7.8646770276837e-05, "loss": 0.043, "step": 3959 }, { "epoch": 1.02, "learning_rate": 7.86351108229614e-05, "loss": 0.0481, "step": 3960 }, { "epoch": 1.02, "learning_rate": 7.862344905156061e-05, "loss": 0.0291, "step": 3961 }, { "epoch": 1.02, "learning_rate": 7.861178496357844e-05, "loss": 0.0419, "step": 3962 }, { "epoch": 1.02, "learning_rate": 7.860011855995887e-05, "loss": 0.0573, "step": 3963 }, { "epoch": 1.02, "learning_rate": 7.858844984164614e-05, "loss": 0.0302, "step": 3964 }, { "epoch": 1.02, "learning_rate": 7.85767788095846e-05, "loss": 0.0423, "step": 3965 }, { "epoch": 1.02, "learning_rate": 7.856510546471886e-05, "loss": 0.0387, "step": 3966 }, { "epoch": 1.02, "learning_rate": 7.855342980799364e-05, "loss": 0.0289, "step": 3967 }, { "epoch": 1.02, "learning_rate": 7.854175184035392e-05, "loss": 0.0535, "step": 3968 }, { "epoch": 1.02, "learning_rate": 7.85300715627448e-05, "loss": 0.0474, "step": 3969 }, { "epoch": 1.02, "learning_rate": 7.851838897611162e-05, "loss": 0.0396, "step": 3970 }, { "epoch": 1.02, "learning_rate": 7.85067040813999e-05, "loss": 0.0376, "step": 3971 }, { "epoch": 1.03, "learning_rate": 7.849501687955529e-05, "loss": 0.0406, "step": 3972 }, { "epoch": 1.03, "learning_rate": 7.84833273715237e-05, "loss": 0.0448, "step": 3973 }, { "epoch": 1.03, "learning_rate": 7.84716355582512e-05, "loss": 0.0407, "step": 3974 }, { "epoch": 1.03, "learning_rate": 7.845994144068405e-05, "loss": 0.0416, "step": 3975 }, { "epoch": 1.03, "learning_rate": 7.844824501976865e-05, "loss": 0.0814, "step": 3976 }, { "epoch": 1.03, "learning_rate": 7.843654629645166e-05, "loss": 0.0475, "step": 3977 }, { "epoch": 1.03, "learning_rate": 7.842484527167989e-05, "loss": 0.0427, "step": 3978 }, { "epoch": 1.03, "learning_rate": 7.841314194640032e-05, "loss": 0.0389, "step": 3979 }, { "epoch": 1.03, "learning_rate": 7.840143632156018e-05, "loss": 0.0472, "step": 3980 }, { "epoch": 1.03, "learning_rate": 7.838972839810679e-05, "loss": 0.0531, "step": 3981 }, { "epoch": 1.03, "learning_rate": 7.837801817698772e-05, "loss": 0.0523, "step": 3982 }, { "epoch": 1.03, "learning_rate": 7.836630565915073e-05, "loss": 0.0514, "step": 3983 }, { "epoch": 1.03, "learning_rate": 7.835459084554376e-05, "loss": 0.0501, "step": 3984 }, { "epoch": 1.03, "learning_rate": 7.834287373711486e-05, "loss": 0.0535, "step": 3985 }, { "epoch": 1.03, "learning_rate": 7.833115433481242e-05, "loss": 0.0418, "step": 3986 }, { "epoch": 1.03, "learning_rate": 7.831943263958485e-05, "loss": 0.0413, "step": 3987 }, { "epoch": 1.03, "learning_rate": 7.830770865238086e-05, "loss": 0.0301, "step": 3988 }, { "epoch": 1.03, "learning_rate": 7.829598237414932e-05, "loss": 0.0467, "step": 3989 }, { "epoch": 1.03, "learning_rate": 7.828425380583923e-05, "loss": 0.0603, "step": 3990 }, { "epoch": 1.03, "learning_rate": 7.827252294839986e-05, "loss": 0.0385, "step": 3991 }, { "epoch": 1.03, "learning_rate": 7.82607898027806e-05, "loss": 0.0523, "step": 3992 }, { "epoch": 1.03, "learning_rate": 7.824905436993106e-05, "loss": 0.0351, "step": 3993 }, { "epoch": 1.03, "learning_rate": 7.8237316650801e-05, "loss": 0.052, "step": 3994 }, { "epoch": 1.03, "learning_rate": 7.822557664634042e-05, "loss": 0.0432, "step": 3995 }, { "epoch": 1.03, "learning_rate": 7.821383435749948e-05, "loss": 0.0506, "step": 3996 }, { "epoch": 1.03, "learning_rate": 7.820208978522847e-05, "loss": 0.0361, "step": 3997 }, { "epoch": 1.03, "learning_rate": 7.819034293047794e-05, "loss": 0.0372, "step": 3998 }, { "epoch": 1.03, "learning_rate": 7.817859379419861e-05, "loss": 0.047, "step": 3999 }, { "epoch": 1.03, "learning_rate": 7.816684237734137e-05, "loss": 0.0532, "step": 4000 }, { "epoch": 1.03, "learning_rate": 7.815508868085728e-05, "loss": 0.0351, "step": 4001 }, { "epoch": 1.03, "learning_rate": 7.814333270569761e-05, "loss": 0.0358, "step": 4002 }, { "epoch": 1.03, "learning_rate": 7.813157445281381e-05, "loss": 0.0293, "step": 4003 }, { "epoch": 1.03, "learning_rate": 7.811981392315753e-05, "loss": 0.0311, "step": 4004 }, { "epoch": 1.03, "learning_rate": 7.810805111768054e-05, "loss": 0.0465, "step": 4005 }, { "epoch": 1.03, "learning_rate": 7.809628603733488e-05, "loss": 0.0429, "step": 4006 }, { "epoch": 1.03, "learning_rate": 7.80845186830727e-05, "loss": 0.0472, "step": 4007 }, { "epoch": 1.03, "learning_rate": 7.807274905584638e-05, "loss": 0.0359, "step": 4008 }, { "epoch": 1.03, "learning_rate": 7.806097715660847e-05, "loss": 0.036, "step": 4009 }, { "epoch": 1.03, "learning_rate": 7.804920298631173e-05, "loss": 0.0364, "step": 4010 }, { "epoch": 1.04, "learning_rate": 7.803742654590907e-05, "loss": 0.0539, "step": 4011 }, { "epoch": 1.04, "learning_rate": 7.802564783635356e-05, "loss": 0.0308, "step": 4012 }, { "epoch": 1.04, "learning_rate": 7.80138668585985e-05, "loss": 0.0532, "step": 4013 }, { "epoch": 1.04, "learning_rate": 7.800208361359737e-05, "loss": 0.0349, "step": 4014 }, { "epoch": 1.04, "learning_rate": 7.799029810230383e-05, "loss": 0.0332, "step": 4015 }, { "epoch": 1.04, "learning_rate": 7.79785103256717e-05, "loss": 0.0392, "step": 4016 }, { "epoch": 1.04, "learning_rate": 7.796672028465502e-05, "loss": 0.0375, "step": 4017 }, { "epoch": 1.04, "learning_rate": 7.795492798020795e-05, "loss": 0.0437, "step": 4018 }, { "epoch": 1.04, "learning_rate": 7.794313341328495e-05, "loss": 0.0453, "step": 4019 }, { "epoch": 1.04, "learning_rate": 7.793133658484051e-05, "loss": 0.0456, "step": 4020 }, { "epoch": 1.04, "learning_rate": 7.791953749582943e-05, "loss": 0.0489, "step": 4021 }, { "epoch": 1.04, "learning_rate": 7.790773614720664e-05, "loss": 0.0302, "step": 4022 }, { "epoch": 1.04, "learning_rate": 7.789593253992724e-05, "loss": 0.0526, "step": 4023 }, { "epoch": 1.04, "learning_rate": 7.788412667494656e-05, "loss": 0.0573, "step": 4024 }, { "epoch": 1.04, "learning_rate": 7.787231855322006e-05, "loss": 0.0412, "step": 4025 }, { "epoch": 1.04, "learning_rate": 7.786050817570341e-05, "loss": 0.0433, "step": 4026 }, { "epoch": 1.04, "learning_rate": 7.784869554335246e-05, "loss": 0.0357, "step": 4027 }, { "epoch": 1.04, "learning_rate": 7.783688065712327e-05, "loss": 0.0298, "step": 4028 }, { "epoch": 1.04, "learning_rate": 7.7825063517972e-05, "loss": 0.0422, "step": 4029 }, { "epoch": 1.04, "learning_rate": 7.78132441268551e-05, "loss": 0.0387, "step": 4030 }, { "epoch": 1.04, "learning_rate": 7.780142248472912e-05, "loss": 0.0444, "step": 4031 }, { "epoch": 1.04, "learning_rate": 7.778959859255082e-05, "loss": 0.0343, "step": 4032 }, { "epoch": 1.04, "learning_rate": 7.777777245127715e-05, "loss": 0.0431, "step": 4033 }, { "epoch": 1.04, "learning_rate": 7.776594406186523e-05, "loss": 0.0399, "step": 4034 }, { "epoch": 1.04, "learning_rate": 7.775411342527238e-05, "loss": 0.055, "step": 4035 }, { "epoch": 1.04, "learning_rate": 7.774228054245609e-05, "loss": 0.0355, "step": 4036 }, { "epoch": 1.04, "learning_rate": 7.773044541437399e-05, "loss": 0.0341, "step": 4037 }, { "epoch": 1.04, "learning_rate": 7.771860804198398e-05, "loss": 0.0416, "step": 4038 }, { "epoch": 1.04, "learning_rate": 7.770676842624407e-05, "loss": 0.0463, "step": 4039 }, { "epoch": 1.04, "learning_rate": 7.769492656811248e-05, "loss": 0.0345, "step": 4040 }, { "epoch": 1.04, "learning_rate": 7.768308246854762e-05, "loss": 0.0261, "step": 4041 }, { "epoch": 1.04, "learning_rate": 7.767123612850806e-05, "loss": 0.0541, "step": 4042 }, { "epoch": 1.04, "learning_rate": 7.765938754895255e-05, "loss": 0.0448, "step": 4043 }, { "epoch": 1.04, "learning_rate": 7.764753673084003e-05, "loss": 0.0459, "step": 4044 }, { "epoch": 1.04, "learning_rate": 7.763568367512965e-05, "loss": 0.0385, "step": 4045 }, { "epoch": 1.04, "learning_rate": 7.762382838278068e-05, "loss": 0.0488, "step": 4046 }, { "epoch": 1.04, "learning_rate": 7.76119708547526e-05, "loss": 0.0485, "step": 4047 }, { "epoch": 1.04, "learning_rate": 7.760011109200512e-05, "loss": 0.0417, "step": 4048 }, { "epoch": 1.04, "learning_rate": 7.758824909549806e-05, "loss": 0.0501, "step": 4049 }, { "epoch": 1.05, "learning_rate": 7.757638486619144e-05, "loss": 0.0347, "step": 4050 }, { "epoch": 1.05, "learning_rate": 7.756451840504546e-05, "loss": 0.0231, "step": 4051 }, { "epoch": 1.05, "learning_rate": 7.755264971302053e-05, "loss": 0.0335, "step": 4052 }, { "epoch": 1.05, "learning_rate": 7.754077879107722e-05, "loss": 0.0391, "step": 4053 }, { "epoch": 1.05, "learning_rate": 7.752890564017625e-05, "loss": 0.0491, "step": 4054 }, { "epoch": 1.05, "learning_rate": 7.751703026127859e-05, "loss": 0.0322, "step": 4055 }, { "epoch": 1.05, "learning_rate": 7.750515265534533e-05, "loss": 0.0329, "step": 4056 }, { "epoch": 1.05, "learning_rate": 7.749327282333773e-05, "loss": 0.0449, "step": 4057 }, { "epoch": 1.05, "learning_rate": 7.748139076621732e-05, "loss": 0.0275, "step": 4058 }, { "epoch": 1.05, "learning_rate": 7.746950648494571e-05, "loss": 0.0448, "step": 4059 }, { "epoch": 1.05, "learning_rate": 7.745761998048476e-05, "loss": 0.0398, "step": 4060 }, { "epoch": 1.05, "learning_rate": 7.744573125379642e-05, "loss": 0.0672, "step": 4061 }, { "epoch": 1.05, "learning_rate": 7.743384030584295e-05, "loss": 0.0428, "step": 4062 }, { "epoch": 1.05, "learning_rate": 7.742194713758669e-05, "loss": 0.0358, "step": 4063 }, { "epoch": 1.05, "learning_rate": 7.741005174999019e-05, "loss": 0.0372, "step": 4064 }, { "epoch": 1.05, "learning_rate": 7.739815414401618e-05, "loss": 0.0484, "step": 4065 }, { "epoch": 1.05, "learning_rate": 7.738625432062757e-05, "loss": 0.0424, "step": 4066 }, { "epoch": 1.05, "learning_rate": 7.737435228078745e-05, "loss": 0.0377, "step": 4067 }, { "epoch": 1.05, "learning_rate": 7.736244802545907e-05, "loss": 0.032, "step": 4068 }, { "epoch": 1.05, "learning_rate": 7.73505415556059e-05, "loss": 0.0427, "step": 4069 }, { "epoch": 1.05, "learning_rate": 7.733863287219157e-05, "loss": 0.045, "step": 4070 }, { "epoch": 1.05, "learning_rate": 7.732672197617986e-05, "loss": 0.0437, "step": 4071 }, { "epoch": 1.05, "learning_rate": 7.731480886853477e-05, "loss": 0.0448, "step": 4072 }, { "epoch": 1.05, "learning_rate": 7.730289355022047e-05, "loss": 0.0367, "step": 4073 }, { "epoch": 1.05, "learning_rate": 7.729097602220128e-05, "loss": 0.0383, "step": 4074 }, { "epoch": 1.05, "learning_rate": 7.727905628544174e-05, "loss": 0.0468, "step": 4075 }, { "epoch": 1.05, "learning_rate": 7.726713434090657e-05, "loss": 0.0198, "step": 4076 }, { "epoch": 1.05, "learning_rate": 7.725521018956059e-05, "loss": 0.0385, "step": 4077 }, { "epoch": 1.05, "learning_rate": 7.724328383236891e-05, "loss": 0.0496, "step": 4078 }, { "epoch": 1.05, "learning_rate": 7.723135527029675e-05, "loss": 0.0485, "step": 4079 }, { "epoch": 1.05, "learning_rate": 7.72194245043095e-05, "loss": 0.0667, "step": 4080 }, { "epoch": 1.05, "learning_rate": 7.720749153537278e-05, "loss": 0.0434, "step": 4081 }, { "epoch": 1.05, "learning_rate": 7.719555636445237e-05, "loss": 0.0351, "step": 4082 }, { "epoch": 1.05, "learning_rate": 7.718361899251421e-05, "loss": 0.0429, "step": 4083 }, { "epoch": 1.05, "learning_rate": 7.717167942052441e-05, "loss": 0.0563, "step": 4084 }, { "epoch": 1.05, "learning_rate": 7.715973764944928e-05, "loss": 0.0468, "step": 4085 }, { "epoch": 1.05, "learning_rate": 7.71477936802553e-05, "loss": 0.0351, "step": 4086 }, { "epoch": 1.05, "learning_rate": 7.713584751390916e-05, "loss": 0.0362, "step": 4087 }, { "epoch": 1.05, "learning_rate": 7.712389915137768e-05, "loss": 0.0452, "step": 4088 }, { "epoch": 1.06, "learning_rate": 7.711194859362787e-05, "loss": 0.0422, "step": 4089 }, { "epoch": 1.06, "learning_rate": 7.709999584162692e-05, "loss": 0.0487, "step": 4090 }, { "epoch": 1.06, "learning_rate": 7.708804089634223e-05, "loss": 0.0657, "step": 4091 }, { "epoch": 1.06, "learning_rate": 7.707608375874132e-05, "loss": 0.0479, "step": 4092 }, { "epoch": 1.06, "learning_rate": 7.706412442979194e-05, "loss": 0.0402, "step": 4093 }, { "epoch": 1.06, "learning_rate": 7.705216291046198e-05, "loss": 0.0447, "step": 4094 }, { "epoch": 1.06, "learning_rate": 7.704019920171953e-05, "loss": 0.0418, "step": 4095 }, { "epoch": 1.06, "learning_rate": 7.702823330453283e-05, "loss": 0.0513, "step": 4096 }, { "epoch": 1.06, "learning_rate": 7.701626521987035e-05, "loss": 0.042, "step": 4097 }, { "epoch": 1.06, "learning_rate": 7.700429494870066e-05, "loss": 0.0398, "step": 4098 }, { "epoch": 1.06, "learning_rate": 7.699232249199258e-05, "loss": 0.045, "step": 4099 }, { "epoch": 1.06, "learning_rate": 7.698034785071507e-05, "loss": 0.0429, "step": 4100 }, { "epoch": 1.06, "learning_rate": 7.696837102583728e-05, "loss": 0.0479, "step": 4101 }, { "epoch": 1.06, "learning_rate": 7.695639201832852e-05, "loss": 0.062, "step": 4102 }, { "epoch": 1.06, "learning_rate": 7.694441082915829e-05, "loss": 0.0546, "step": 4103 }, { "epoch": 1.06, "learning_rate": 7.693242745929626e-05, "loss": 0.0364, "step": 4104 }, { "epoch": 1.06, "learning_rate": 7.692044190971228e-05, "loss": 0.0429, "step": 4105 }, { "epoch": 1.06, "learning_rate": 7.69084541813764e-05, "loss": 0.0383, "step": 4106 }, { "epoch": 1.06, "learning_rate": 7.689646427525879e-05, "loss": 0.058, "step": 4107 }, { "epoch": 1.06, "learning_rate": 7.688447219232984e-05, "loss": 0.0338, "step": 4108 }, { "epoch": 1.06, "learning_rate": 7.68724779335601e-05, "loss": 0.0637, "step": 4109 }, { "epoch": 1.06, "learning_rate": 7.686048149992032e-05, "loss": 0.0472, "step": 4110 }, { "epoch": 1.06, "learning_rate": 7.684848289238139e-05, "loss": 0.0415, "step": 4111 }, { "epoch": 1.06, "learning_rate": 7.683648211191439e-05, "loss": 0.0338, "step": 4112 }, { "epoch": 1.06, "learning_rate": 7.68244791594906e-05, "loss": 0.0342, "step": 4113 }, { "epoch": 1.06, "learning_rate": 7.681247403608143e-05, "loss": 0.0557, "step": 4114 }, { "epoch": 1.06, "learning_rate": 7.680046674265853e-05, "loss": 0.0504, "step": 4115 }, { "epoch": 1.06, "learning_rate": 7.678845728019363e-05, "loss": 0.0441, "step": 4116 }, { "epoch": 1.06, "learning_rate": 7.677644564965873e-05, "loss": 0.0391, "step": 4117 }, { "epoch": 1.06, "learning_rate": 7.676443185202597e-05, "loss": 0.0304, "step": 4118 }, { "epoch": 1.06, "learning_rate": 7.676443185202597e-05, "loss": 0.0355, "step": 4119 }, { "epoch": 1.06, "learning_rate": 7.675241588826763e-05, "loss": 0.0341, "step": 4120 }, { "epoch": 1.06, "learning_rate": 7.674039775935624e-05, "loss": 0.049, "step": 4121 }, { "epoch": 1.06, "learning_rate": 7.672837746626442e-05, "loss": 0.0489, "step": 4122 }, { "epoch": 1.06, "learning_rate": 7.671635500996504e-05, "loss": 0.0537, "step": 4123 }, { "epoch": 1.06, "learning_rate": 7.670433039143112e-05, "loss": 0.0432, "step": 4124 }, { "epoch": 1.06, "learning_rate": 7.669230361163582e-05, "loss": 0.0321, "step": 4125 }, { "epoch": 1.06, "learning_rate": 7.66802746715525e-05, "loss": 0.0352, "step": 4126 }, { "epoch": 1.07, "learning_rate": 7.666824357215472e-05, "loss": 0.0439, "step": 4127 }, { "epoch": 1.07, "learning_rate": 7.665621031441618e-05, "loss": 0.0445, "step": 4128 }, { "epoch": 1.07, "learning_rate": 7.66441748993108e-05, "loss": 0.049, "step": 4129 }, { "epoch": 1.07, "learning_rate": 7.663213732781261e-05, "loss": 0.0519, "step": 4130 }, { "epoch": 1.07, "learning_rate": 7.662009760089584e-05, "loss": 0.0482, "step": 4131 }, { "epoch": 1.07, "learning_rate": 7.660805571953491e-05, "loss": 0.0355, "step": 4132 }, { "epoch": 1.07, "learning_rate": 7.659601168470442e-05, "loss": 0.0345, "step": 4133 }, { "epoch": 1.07, "learning_rate": 7.65839654973791e-05, "loss": 0.0423, "step": 4134 }, { "epoch": 1.07, "learning_rate": 7.657191715853393e-05, "loss": 0.044, "step": 4135 }, { "epoch": 1.07, "learning_rate": 7.655986666914396e-05, "loss": 0.0419, "step": 4136 }, { "epoch": 1.07, "learning_rate": 7.654781403018452e-05, "loss": 0.0509, "step": 4137 }, { "epoch": 1.07, "learning_rate": 7.653575924263107e-05, "loss": 0.0452, "step": 4138 }, { "epoch": 1.07, "learning_rate": 7.65237023074592e-05, "loss": 0.0448, "step": 4139 }, { "epoch": 1.07, "learning_rate": 7.651164322564473e-05, "loss": 0.0294, "step": 4140 }, { "epoch": 1.07, "learning_rate": 7.649958199816363e-05, "loss": 0.0435, "step": 4141 }, { "epoch": 1.07, "learning_rate": 7.648751862599209e-05, "loss": 0.0471, "step": 4142 }, { "epoch": 1.07, "learning_rate": 7.647545311010639e-05, "loss": 0.041, "step": 4143 }, { "epoch": 1.07, "learning_rate": 7.646338545148304e-05, "loss": 0.0368, "step": 4144 }, { "epoch": 1.07, "learning_rate": 7.645131565109871e-05, "loss": 0.0329, "step": 4145 }, { "epoch": 1.07, "learning_rate": 7.643924370993028e-05, "loss": 0.0459, "step": 4146 }, { "epoch": 1.07, "learning_rate": 7.642716962895473e-05, "loss": 0.0479, "step": 4147 }, { "epoch": 1.07, "learning_rate": 7.641509340914925e-05, "loss": 0.0274, "step": 4148 }, { "epoch": 1.07, "learning_rate": 7.640301505149123e-05, "loss": 0.0428, "step": 4149 }, { "epoch": 1.07, "learning_rate": 7.639093455695818e-05, "loss": 0.0437, "step": 4150 }, { "epoch": 1.07, "learning_rate": 7.637885192652784e-05, "loss": 0.0439, "step": 4151 }, { "epoch": 1.07, "learning_rate": 7.636676716117807e-05, "loss": 0.048, "step": 4152 }, { "epoch": 1.07, "learning_rate": 7.635468026188692e-05, "loss": 0.0327, "step": 4153 }, { "epoch": 1.07, "learning_rate": 7.634259122963266e-05, "loss": 0.058, "step": 4154 }, { "epoch": 1.07, "learning_rate": 7.633050006539364e-05, "loss": 0.0398, "step": 4155 }, { "epoch": 1.07, "learning_rate": 7.631840677014847e-05, "loss": 0.0558, "step": 4156 }, { "epoch": 1.07, "learning_rate": 7.630631134487589e-05, "loss": 0.0316, "step": 4157 }, { "epoch": 1.07, "learning_rate": 7.629421379055481e-05, "loss": 0.0389, "step": 4158 }, { "epoch": 1.07, "learning_rate": 7.628211410816432e-05, "loss": 0.0421, "step": 4159 }, { "epoch": 1.07, "learning_rate": 7.627001229868372e-05, "loss": 0.0391, "step": 4160 }, { "epoch": 1.07, "learning_rate": 7.625790836309237e-05, "loss": 0.0493, "step": 4161 }, { "epoch": 1.07, "learning_rate": 7.624580230236994e-05, "loss": 0.0492, "step": 4162 }, { "epoch": 1.07, "learning_rate": 7.623369411749619e-05, "loss": 0.0353, "step": 4163 }, { "epoch": 1.07, "learning_rate": 7.622158380945109e-05, "loss": 0.0388, "step": 4164 }, { "epoch": 1.07, "learning_rate": 7.620947137921473e-05, "loss": 0.0418, "step": 4165 }, { "epoch": 1.08, "learning_rate": 7.619735682776744e-05, "loss": 0.0452, "step": 4166 }, { "epoch": 1.08, "learning_rate": 7.618524015608964e-05, "loss": 0.0397, "step": 4167 }, { "epoch": 1.08, "learning_rate": 7.617312136516201e-05, "loss": 0.0442, "step": 4168 }, { "epoch": 1.08, "learning_rate": 7.616100045596536e-05, "loss": 0.0392, "step": 4169 }, { "epoch": 1.08, "learning_rate": 7.614887742948065e-05, "loss": 0.0408, "step": 4170 }, { "epoch": 1.08, "learning_rate": 7.613675228668904e-05, "loss": 0.0407, "step": 4171 }, { "epoch": 1.08, "learning_rate": 7.612462502857187e-05, "loss": 0.0366, "step": 4172 }, { "epoch": 1.08, "learning_rate": 7.61124956561106e-05, "loss": 0.0287, "step": 4173 }, { "epoch": 1.08, "learning_rate": 7.610036417028694e-05, "loss": 0.0497, "step": 4174 }, { "epoch": 1.08, "learning_rate": 7.608823057208269e-05, "loss": 0.0437, "step": 4175 }, { "epoch": 1.08, "learning_rate": 7.607609486247986e-05, "loss": 0.0225, "step": 4176 }, { "epoch": 1.08, "learning_rate": 7.606395704246068e-05, "loss": 0.0751, "step": 4177 }, { "epoch": 1.08, "learning_rate": 7.605181711300745e-05, "loss": 0.0502, "step": 4178 }, { "epoch": 1.08, "learning_rate": 7.603967507510268e-05, "loss": 0.0462, "step": 4179 }, { "epoch": 1.08, "learning_rate": 7.60275309297291e-05, "loss": 0.0553, "step": 4180 }, { "epoch": 1.08, "learning_rate": 7.601538467786958e-05, "loss": 0.0673, "step": 4181 }, { "epoch": 1.08, "learning_rate": 7.600323632050709e-05, "loss": 0.0488, "step": 4182 }, { "epoch": 1.08, "learning_rate": 7.599108585862491e-05, "loss": 0.042, "step": 4183 }, { "epoch": 1.08, "learning_rate": 7.597893329320636e-05, "loss": 0.0467, "step": 4184 }, { "epoch": 1.08, "learning_rate": 7.5966778625235e-05, "loss": 0.0453, "step": 4185 }, { "epoch": 1.08, "learning_rate": 7.595462185569454e-05, "loss": 0.0372, "step": 4186 }, { "epoch": 1.08, "learning_rate": 7.594246298556887e-05, "loss": 0.0439, "step": 4187 }, { "epoch": 1.08, "learning_rate": 7.593030201584204e-05, "loss": 0.0585, "step": 4188 }, { "epoch": 1.08, "learning_rate": 7.591813894749827e-05, "loss": 0.0361, "step": 4189 }, { "epoch": 1.08, "learning_rate": 7.590597378152198e-05, "loss": 0.0398, "step": 4190 }, { "epoch": 1.08, "learning_rate": 7.589380651889769e-05, "loss": 0.0446, "step": 4191 }, { "epoch": 1.08, "learning_rate": 7.588163716061014e-05, "loss": 0.0352, "step": 4192 }, { "epoch": 1.08, "learning_rate": 7.586946570764427e-05, "loss": 0.0585, "step": 4193 }, { "epoch": 1.08, "learning_rate": 7.585729216098515e-05, "loss": 0.0278, "step": 4194 }, { "epoch": 1.08, "learning_rate": 7.584511652161796e-05, "loss": 0.0426, "step": 4195 }, { "epoch": 1.08, "learning_rate": 7.583293879052815e-05, "loss": 0.0365, "step": 4196 }, { "epoch": 1.08, "learning_rate": 7.582075896870132e-05, "loss": 0.0376, "step": 4197 }, { "epoch": 1.08, "learning_rate": 7.580857705712321e-05, "loss": 0.0279, "step": 4198 }, { "epoch": 1.08, "learning_rate": 7.579639305677971e-05, "loss": 0.0598, "step": 4199 }, { "epoch": 1.08, "learning_rate": 7.578420696865694e-05, "loss": 0.0433, "step": 4200 }, { "epoch": 1.08, "learning_rate": 7.577201879374115e-05, "loss": 0.0322, "step": 4201 }, { "epoch": 1.08, "learning_rate": 7.575982853301873e-05, "loss": 0.0386, "step": 4202 }, { "epoch": 1.08, "learning_rate": 7.574763618747633e-05, "loss": 0.0498, "step": 4203 }, { "epoch": 1.08, "learning_rate": 7.573544175810067e-05, "loss": 0.0446, "step": 4204 }, { "epoch": 1.09, "learning_rate": 7.57232452458787e-05, "loss": 0.0401, "step": 4205 }, { "epoch": 1.09, "learning_rate": 7.571104665179753e-05, "loss": 0.0461, "step": 4206 }, { "epoch": 1.09, "learning_rate": 7.569884597684442e-05, "loss": 0.0613, "step": 4207 }, { "epoch": 1.09, "learning_rate": 7.568664322200679e-05, "loss": 0.0531, "step": 4208 }, { "epoch": 1.09, "learning_rate": 7.567443838827227e-05, "loss": 0.0369, "step": 4209 }, { "epoch": 1.09, "learning_rate": 7.56622314766286e-05, "loss": 0.0584, "step": 4210 }, { "epoch": 1.09, "learning_rate": 7.565002248806377e-05, "loss": 0.0472, "step": 4211 }, { "epoch": 1.09, "learning_rate": 7.563781142356588e-05, "loss": 0.0547, "step": 4212 }, { "epoch": 1.09, "learning_rate": 7.562559828412317e-05, "loss": 0.0371, "step": 4213 }, { "epoch": 1.09, "learning_rate": 7.56133830707241e-05, "loss": 0.0328, "step": 4214 }, { "epoch": 1.09, "learning_rate": 7.560116578435731e-05, "loss": 0.0406, "step": 4215 }, { "epoch": 1.09, "learning_rate": 7.558894642601156e-05, "loss": 0.0309, "step": 4216 }, { "epoch": 1.09, "learning_rate": 7.557672499667581e-05, "loss": 0.0377, "step": 4217 }, { "epoch": 1.09, "learning_rate": 7.556450149733916e-05, "loss": 0.0572, "step": 4218 }, { "epoch": 1.09, "learning_rate": 7.55522759289909e-05, "loss": 0.0615, "step": 4219 }, { "epoch": 1.09, "learning_rate": 7.554004829262051e-05, "loss": 0.0289, "step": 4220 }, { "epoch": 1.09, "learning_rate": 7.552781858921757e-05, "loss": 0.0353, "step": 4221 }, { "epoch": 1.09, "learning_rate": 7.551558681977188e-05, "loss": 0.0304, "step": 4222 }, { "epoch": 1.09, "learning_rate": 7.55033529852734e-05, "loss": 0.0297, "step": 4223 }, { "epoch": 1.09, "learning_rate": 7.549111708671223e-05, "loss": 0.0321, "step": 4224 }, { "epoch": 1.09, "learning_rate": 7.547887912507872e-05, "loss": 0.0481, "step": 4225 }, { "epoch": 1.09, "learning_rate": 7.546663910136325e-05, "loss": 0.0389, "step": 4226 }, { "epoch": 1.09, "learning_rate": 7.545439701655647e-05, "loss": 0.0527, "step": 4227 }, { "epoch": 1.09, "learning_rate": 7.544215287164918e-05, "loss": 0.0411, "step": 4228 }, { "epoch": 1.09, "learning_rate": 7.542990666763232e-05, "loss": 0.0517, "step": 4229 }, { "epoch": 1.09, "learning_rate": 7.5417658405497e-05, "loss": 0.0531, "step": 4230 }, { "epoch": 1.09, "learning_rate": 7.540540808623455e-05, "loss": 0.0246, "step": 4231 }, { "epoch": 1.09, "learning_rate": 7.539315571083638e-05, "loss": 0.0565, "step": 4232 }, { "epoch": 1.09, "learning_rate": 7.538090128029416e-05, "loss": 0.046, "step": 4233 }, { "epoch": 1.09, "learning_rate": 7.536864479559965e-05, "loss": 0.0359, "step": 4234 }, { "epoch": 1.09, "learning_rate": 7.535638625774477e-05, "loss": 0.044, "step": 4235 }, { "epoch": 1.09, "learning_rate": 7.534412566772169e-05, "loss": 0.0389, "step": 4236 }, { "epoch": 1.09, "learning_rate": 7.533186302652268e-05, "loss": 0.0316, "step": 4237 }, { "epoch": 1.09, "learning_rate": 7.531959833514021e-05, "loss": 0.0344, "step": 4238 }, { "epoch": 1.09, "learning_rate": 7.530733159456687e-05, "loss": 0.0663, "step": 4239 }, { "epoch": 1.09, "learning_rate": 7.529506280579544e-05, "loss": 0.0295, "step": 4240 }, { "epoch": 1.09, "learning_rate": 7.528279196981888e-05, "loss": 0.037, "step": 4241 }, { "epoch": 1.09, "learning_rate": 7.527051908763034e-05, "loss": 0.0445, "step": 4242 }, { "epoch": 1.09, "learning_rate": 7.525824416022306e-05, "loss": 0.0514, "step": 4243 }, { "epoch": 1.1, "learning_rate": 7.524596718859049e-05, "loss": 0.0369, "step": 4244 }, { "epoch": 1.1, "learning_rate": 7.523368817372626e-05, "loss": 0.0473, "step": 4245 }, { "epoch": 1.1, "learning_rate": 7.522140711662413e-05, "loss": 0.0453, "step": 4246 }, { "epoch": 1.1, "learning_rate": 7.520912401827806e-05, "loss": 0.0459, "step": 4247 }, { "epoch": 1.1, "learning_rate": 7.519683887968214e-05, "loss": 0.039, "step": 4248 }, { "epoch": 1.1, "learning_rate": 7.518455170183065e-05, "loss": 0.0351, "step": 4249 }, { "epoch": 1.1, "learning_rate": 7.517226248571802e-05, "loss": 0.0423, "step": 4250 }, { "epoch": 1.1, "learning_rate": 7.515997123233889e-05, "loss": 0.0434, "step": 4251 }, { "epoch": 1.1, "learning_rate": 7.514767794268798e-05, "loss": 0.0307, "step": 4252 }, { "epoch": 1.1, "learning_rate": 7.513538261776023e-05, "loss": 0.0508, "step": 4253 }, { "epoch": 1.1, "learning_rate": 7.512308525855077e-05, "loss": 0.0393, "step": 4254 }, { "epoch": 1.1, "learning_rate": 7.511078586605484e-05, "loss": 0.0465, "step": 4255 }, { "epoch": 1.1, "learning_rate": 7.509848444126785e-05, "loss": 0.0457, "step": 4256 }, { "epoch": 1.1, "learning_rate": 7.508618098518542e-05, "loss": 0.0392, "step": 4257 }, { "epoch": 1.1, "learning_rate": 7.507387549880331e-05, "loss": 0.0361, "step": 4258 }, { "epoch": 1.1, "learning_rate": 7.506156798311743e-05, "loss": 0.0385, "step": 4259 }, { "epoch": 1.1, "learning_rate": 7.504925843912383e-05, "loss": 0.0455, "step": 4260 }, { "epoch": 1.1, "learning_rate": 7.50369468678188e-05, "loss": 0.0451, "step": 4261 }, { "epoch": 1.1, "learning_rate": 7.502463327019874e-05, "loss": 0.0367, "step": 4262 }, { "epoch": 1.1, "learning_rate": 7.501231764726023e-05, "loss": 0.0421, "step": 4263 }, { "epoch": 1.1, "learning_rate": 7.500000000000001e-05, "loss": 0.0394, "step": 4264 }, { "epoch": 1.1, "learning_rate": 7.498768032941496e-05, "loss": 0.0375, "step": 4265 }, { "epoch": 1.1, "learning_rate": 7.497535863650219e-05, "loss": 0.0498, "step": 4266 }, { "epoch": 1.1, "learning_rate": 7.49630349222589e-05, "loss": 0.0361, "step": 4267 }, { "epoch": 1.1, "learning_rate": 7.49507091876825e-05, "loss": 0.0507, "step": 4268 }, { "epoch": 1.1, "learning_rate": 7.493838143377055e-05, "loss": 0.0418, "step": 4269 }, { "epoch": 1.1, "learning_rate": 7.492605166152074e-05, "loss": 0.0365, "step": 4270 }, { "epoch": 1.1, "learning_rate": 7.491371987193099e-05, "loss": 0.0384, "step": 4271 }, { "epoch": 1.1, "learning_rate": 7.490138606599937e-05, "loss": 0.0406, "step": 4272 }, { "epoch": 1.1, "learning_rate": 7.488905024472402e-05, "loss": 0.0485, "step": 4273 }, { "epoch": 1.1, "learning_rate": 7.487671240910338e-05, "loss": 0.0387, "step": 4274 }, { "epoch": 1.1, "learning_rate": 7.486437256013595e-05, "loss": 0.05, "step": 4275 }, { "epoch": 1.1, "learning_rate": 7.485203069882045e-05, "loss": 0.0567, "step": 4276 }, { "epoch": 1.1, "learning_rate": 7.483968682615576e-05, "loss": 0.0508, "step": 4277 }, { "epoch": 1.1, "learning_rate": 7.482734094314086e-05, "loss": 0.0337, "step": 4278 }, { "epoch": 1.1, "learning_rate": 7.481499305077496e-05, "loss": 0.0458, "step": 4279 }, { "epoch": 1.1, "learning_rate": 7.480264315005743e-05, "loss": 0.0539, "step": 4280 }, { "epoch": 1.1, "learning_rate": 7.479029124198778e-05, "loss": 0.0319, "step": 4281 }, { "epoch": 1.11, "learning_rate": 7.477793732756565e-05, "loss": 0.0645, "step": 4282 }, { "epoch": 1.11, "learning_rate": 7.476558140779092e-05, "loss": 0.0519, "step": 4283 }, { "epoch": 1.11, "learning_rate": 7.475322348366358e-05, "loss": 0.0285, "step": 4284 }, { "epoch": 1.11, "learning_rate": 7.474086355618379e-05, "loss": 0.04, "step": 4285 }, { "epoch": 1.11, "learning_rate": 7.472850162635189e-05, "loss": 0.0367, "step": 4286 }, { "epoch": 1.11, "learning_rate": 7.471613769516834e-05, "loss": 0.0382, "step": 4287 }, { "epoch": 1.11, "learning_rate": 7.470377176363381e-05, "loss": 0.0496, "step": 4288 }, { "epoch": 1.11, "learning_rate": 7.469140383274911e-05, "loss": 0.0626, "step": 4289 }, { "epoch": 1.11, "learning_rate": 7.467903390351523e-05, "loss": 0.0444, "step": 4290 }, { "epoch": 1.11, "learning_rate": 7.466666197693326e-05, "loss": 0.0332, "step": 4291 }, { "epoch": 1.11, "learning_rate": 7.465428805400453e-05, "loss": 0.0389, "step": 4292 }, { "epoch": 1.11, "learning_rate": 7.464191213573049e-05, "loss": 0.0453, "step": 4293 }, { "epoch": 1.11, "learning_rate": 7.462953422311279e-05, "loss": 0.0362, "step": 4294 }, { "epoch": 1.11, "learning_rate": 7.461715431715315e-05, "loss": 0.0346, "step": 4295 }, { "epoch": 1.11, "learning_rate": 7.460477241885356e-05, "loss": 0.0405, "step": 4296 }, { "epoch": 1.11, "learning_rate": 7.45923885292161e-05, "loss": 0.0471, "step": 4297 }, { "epoch": 1.11, "learning_rate": 7.458000264924307e-05, "loss": 0.0423, "step": 4298 }, { "epoch": 1.11, "learning_rate": 7.456761477993687e-05, "loss": 0.0482, "step": 4299 }, { "epoch": 1.11, "learning_rate": 7.455522492230007e-05, "loss": 0.0404, "step": 4300 }, { "epoch": 1.11, "learning_rate": 7.454283307733545e-05, "loss": 0.0422, "step": 4301 }, { "epoch": 1.11, "learning_rate": 7.45304392460459e-05, "loss": 0.0262, "step": 4302 }, { "epoch": 1.11, "learning_rate": 7.451804342943451e-05, "loss": 0.0371, "step": 4303 }, { "epoch": 1.11, "learning_rate": 7.45056456285045e-05, "loss": 0.0499, "step": 4304 }, { "epoch": 1.11, "learning_rate": 7.449324584425925e-05, "loss": 0.0476, "step": 4305 }, { "epoch": 1.11, "learning_rate": 7.448084407770231e-05, "loss": 0.0492, "step": 4306 }, { "epoch": 1.11, "learning_rate": 7.446844032983743e-05, "loss": 0.0388, "step": 4307 }, { "epoch": 1.11, "learning_rate": 7.445603460166843e-05, "loss": 0.0387, "step": 4308 }, { "epoch": 1.11, "learning_rate": 7.444362689419939e-05, "loss": 0.0438, "step": 4309 }, { "epoch": 1.11, "learning_rate": 7.443121720843446e-05, "loss": 0.0404, "step": 4310 }, { "epoch": 1.11, "learning_rate": 7.4418805545378e-05, "loss": 0.0485, "step": 4311 }, { "epoch": 1.11, "learning_rate": 7.440639190603458e-05, "loss": 0.0491, "step": 4312 }, { "epoch": 1.11, "learning_rate": 7.43939762914088e-05, "loss": 0.049, "step": 4313 }, { "epoch": 1.11, "learning_rate": 7.438155870250554e-05, "loss": 0.0546, "step": 4314 }, { "epoch": 1.11, "learning_rate": 7.436913914032976e-05, "loss": 0.0577, "step": 4315 }, { "epoch": 1.11, "learning_rate": 7.435671760588664e-05, "loss": 0.0471, "step": 4316 }, { "epoch": 1.11, "learning_rate": 7.434429410018147e-05, "loss": 0.0484, "step": 4317 }, { "epoch": 1.11, "learning_rate": 7.433186862421973e-05, "loss": 0.0349, "step": 4318 }, { "epoch": 1.11, "learning_rate": 7.431944117900705e-05, "loss": 0.0408, "step": 4319 }, { "epoch": 1.11, "learning_rate": 7.430701176554923e-05, "loss": 0.0361, "step": 4320 }, { "epoch": 1.12, "learning_rate": 7.429458038485222e-05, "loss": 0.0384, "step": 4321 }, { "epoch": 1.12, "learning_rate": 7.42821470379221e-05, "loss": 0.0437, "step": 4322 }, { "epoch": 1.12, "learning_rate": 7.426971172576517e-05, "loss": 0.0458, "step": 4323 }, { "epoch": 1.12, "learning_rate": 7.425727444938783e-05, "loss": 0.0235, "step": 4324 }, { "epoch": 1.12, "learning_rate": 7.424483520979671e-05, "loss": 0.0448, "step": 4325 }, { "epoch": 1.12, "learning_rate": 7.423239400799851e-05, "loss": 0.0364, "step": 4326 }, { "epoch": 1.12, "learning_rate": 7.421995084500015e-05, "loss": 0.0481, "step": 4327 }, { "epoch": 1.12, "learning_rate": 7.42075057218087e-05, "loss": 0.0464, "step": 4328 }, { "epoch": 1.12, "learning_rate": 7.419505863943138e-05, "loss": 0.048, "step": 4329 }, { "epoch": 1.12, "learning_rate": 7.418260959887556e-05, "loss": 0.0398, "step": 4330 }, { "epoch": 1.12, "learning_rate": 7.417015860114875e-05, "loss": 0.0386, "step": 4331 }, { "epoch": 1.12, "learning_rate": 7.415770564725871e-05, "loss": 0.0456, "step": 4332 }, { "epoch": 1.12, "learning_rate": 7.414525073821326e-05, "loss": 0.0322, "step": 4333 }, { "epoch": 1.12, "learning_rate": 7.413279387502041e-05, "loss": 0.0447, "step": 4334 }, { "epoch": 1.12, "learning_rate": 7.412033505868834e-05, "loss": 0.0389, "step": 4335 }, { "epoch": 1.12, "learning_rate": 7.410787429022537e-05, "loss": 0.0482, "step": 4336 }, { "epoch": 1.12, "learning_rate": 7.409541157064e-05, "loss": 0.052, "step": 4337 }, { "epoch": 1.12, "learning_rate": 7.408294690094088e-05, "loss": 0.0379, "step": 4338 }, { "epoch": 1.12, "learning_rate": 7.407048028213677e-05, "loss": 0.0278, "step": 4339 }, { "epoch": 1.12, "learning_rate": 7.405801171523669e-05, "loss": 0.045, "step": 4340 }, { "epoch": 1.12, "learning_rate": 7.404554120124971e-05, "loss": 0.0439, "step": 4341 }, { "epoch": 1.12, "learning_rate": 7.403306874118515e-05, "loss": 0.047, "step": 4342 }, { "epoch": 1.12, "learning_rate": 7.402059433605242e-05, "loss": 0.0331, "step": 4343 }, { "epoch": 1.12, "learning_rate": 7.400811798686109e-05, "loss": 0.0634, "step": 4344 }, { "epoch": 1.12, "learning_rate": 7.399563969462095e-05, "loss": 0.0361, "step": 4345 }, { "epoch": 1.12, "learning_rate": 7.398315946034188e-05, "loss": 0.0391, "step": 4346 }, { "epoch": 1.12, "learning_rate": 7.397067728503394e-05, "loss": 0.0438, "step": 4347 }, { "epoch": 1.12, "learning_rate": 7.395819316970736e-05, "loss": 0.047, "step": 4348 }, { "epoch": 1.12, "learning_rate": 7.394570711537252e-05, "loss": 0.0367, "step": 4349 }, { "epoch": 1.12, "learning_rate": 7.393321912303994e-05, "loss": 0.0407, "step": 4350 }, { "epoch": 1.12, "learning_rate": 7.392072919372032e-05, "loss": 0.0341, "step": 4351 }, { "epoch": 1.12, "learning_rate": 7.390823732842452e-05, "loss": 0.0412, "step": 4352 }, { "epoch": 1.12, "learning_rate": 7.389574352816351e-05, "loss": 0.0419, "step": 4353 }, { "epoch": 1.12, "learning_rate": 7.388324779394848e-05, "loss": 0.03, "step": 4354 }, { "epoch": 1.12, "learning_rate": 7.387075012679076e-05, "loss": 0.0399, "step": 4355 }, { "epoch": 1.12, "learning_rate": 7.385825052770178e-05, "loss": 0.044, "step": 4356 }, { "epoch": 1.12, "learning_rate": 7.384574899769318e-05, "loss": 0.036, "step": 4357 }, { "epoch": 1.12, "learning_rate": 7.383324553777678e-05, "loss": 0.0389, "step": 4358 }, { "epoch": 1.12, "learning_rate": 7.382074014896448e-05, "loss": 0.039, "step": 4359 }, { "epoch": 1.13, "learning_rate": 7.380823283226843e-05, "loss": 0.0388, "step": 4360 }, { "epoch": 1.13, "learning_rate": 7.379572358870083e-05, "loss": 0.0449, "step": 4361 }, { "epoch": 1.13, "learning_rate": 7.378321241927411e-05, "loss": 0.0634, "step": 4362 }, { "epoch": 1.13, "learning_rate": 7.377069932500084e-05, "loss": 0.0461, "step": 4363 }, { "epoch": 1.13, "learning_rate": 7.375818430689376e-05, "loss": 0.0435, "step": 4364 }, { "epoch": 1.13, "learning_rate": 7.374566736596573e-05, "loss": 0.0457, "step": 4365 }, { "epoch": 1.13, "learning_rate": 7.373314850322978e-05, "loss": 0.0275, "step": 4366 }, { "epoch": 1.13, "learning_rate": 7.372062771969909e-05, "loss": 0.0431, "step": 4367 }, { "epoch": 1.13, "learning_rate": 7.370810501638703e-05, "loss": 0.0378, "step": 4368 }, { "epoch": 1.13, "learning_rate": 7.369558039430708e-05, "loss": 0.039, "step": 4369 }, { "epoch": 1.13, "learning_rate": 7.36830538544729e-05, "loss": 0.0384, "step": 4370 }, { "epoch": 1.13, "learning_rate": 7.367052539789832e-05, "loss": 0.0554, "step": 4371 }, { "epoch": 1.13, "learning_rate": 7.365799502559727e-05, "loss": 0.0528, "step": 4372 }, { "epoch": 1.13, "learning_rate": 7.36454627385839e-05, "loss": 0.0426, "step": 4373 }, { "epoch": 1.13, "learning_rate": 7.363292853787248e-05, "loss": 0.0479, "step": 4374 }, { "epoch": 1.13, "learning_rate": 7.362039242447741e-05, "loss": 0.0455, "step": 4375 }, { "epoch": 1.13, "learning_rate": 7.360785439941331e-05, "loss": 0.0502, "step": 4376 }, { "epoch": 1.13, "learning_rate": 7.359531446369491e-05, "loss": 0.043, "step": 4377 }, { "epoch": 1.13, "learning_rate": 7.358277261833712e-05, "loss": 0.0356, "step": 4378 }, { "epoch": 1.13, "learning_rate": 7.357022886435496e-05, "loss": 0.0508, "step": 4379 }, { "epoch": 1.13, "learning_rate": 7.355768320276364e-05, "loss": 0.0381, "step": 4380 }, { "epoch": 1.13, "learning_rate": 7.354513563457855e-05, "loss": 0.0422, "step": 4381 }, { "epoch": 1.13, "learning_rate": 7.353258616081516e-05, "loss": 0.0437, "step": 4382 }, { "epoch": 1.13, "learning_rate": 7.352003478248915e-05, "loss": 0.0507, "step": 4383 }, { "epoch": 1.13, "learning_rate": 7.350748150061634e-05, "loss": 0.0585, "step": 4384 }, { "epoch": 1.13, "learning_rate": 7.349492631621271e-05, "loss": 0.0397, "step": 4385 }, { "epoch": 1.13, "learning_rate": 7.34823692302944e-05, "loss": 0.053, "step": 4386 }, { "epoch": 1.13, "learning_rate": 7.346981024387767e-05, "loss": 0.0392, "step": 4387 }, { "epoch": 1.13, "learning_rate": 7.345724935797898e-05, "loss": 0.061, "step": 4388 }, { "epoch": 1.13, "learning_rate": 7.344468657361488e-05, "loss": 0.0392, "step": 4389 }, { "epoch": 1.13, "learning_rate": 7.343212189180216e-05, "loss": 0.0266, "step": 4390 }, { "epoch": 1.13, "learning_rate": 7.34195553135577e-05, "loss": 0.0419, "step": 4391 }, { "epoch": 1.13, "learning_rate": 7.340698683989853e-05, "loss": 0.0515, "step": 4392 }, { "epoch": 1.13, "learning_rate": 7.339441647184189e-05, "loss": 0.0545, "step": 4393 }, { "epoch": 1.13, "learning_rate": 7.338184421040512e-05, "loss": 0.0592, "step": 4394 }, { "epoch": 1.13, "learning_rate": 7.336927005660572e-05, "loss": 0.0487, "step": 4395 }, { "epoch": 1.13, "learning_rate": 7.335669401146137e-05, "loss": 0.0493, "step": 4396 }, { "epoch": 1.13, "learning_rate": 7.334411607598988e-05, "loss": 0.042, "step": 4397 }, { "epoch": 1.13, "learning_rate": 7.333153625120922e-05, "loss": 0.0401, "step": 4398 }, { "epoch": 1.14, "learning_rate": 7.33189545381375e-05, "loss": 0.0479, "step": 4399 }, { "epoch": 1.14, "learning_rate": 7.330637093779303e-05, "loss": 0.0383, "step": 4400 }, { "epoch": 1.14, "learning_rate": 7.329378545119422e-05, "loss": 0.0374, "step": 4401 }, { "epoch": 1.14, "learning_rate": 7.328119807935964e-05, "loss": 0.0361, "step": 4402 }, { "epoch": 1.14, "learning_rate": 7.326860882330804e-05, "loss": 0.0463, "step": 4403 }, { "epoch": 1.14, "learning_rate": 7.32560176840583e-05, "loss": 0.0461, "step": 4404 }, { "epoch": 1.14, "learning_rate": 7.324342466262945e-05, "loss": 0.052, "step": 4405 }, { "epoch": 1.14, "learning_rate": 7.323082976004068e-05, "loss": 0.0383, "step": 4406 }, { "epoch": 1.14, "learning_rate": 7.321823297731137e-05, "loss": 0.0315, "step": 4407 }, { "epoch": 1.14, "learning_rate": 7.320563431546097e-05, "loss": 0.0454, "step": 4408 }, { "epoch": 1.14, "learning_rate": 7.319303377550915e-05, "loss": 0.0365, "step": 4409 }, { "epoch": 1.14, "learning_rate": 7.318043135847569e-05, "loss": 0.0349, "step": 4410 }, { "epoch": 1.14, "learning_rate": 7.316782706538056e-05, "loss": 0.046, "step": 4411 }, { "epoch": 1.14, "learning_rate": 7.315522089724387e-05, "loss": 0.0391, "step": 4412 }, { "epoch": 1.14, "learning_rate": 7.314261285508585e-05, "loss": 0.0465, "step": 4413 }, { "epoch": 1.14, "learning_rate": 7.313000293992693e-05, "loss": 0.0365, "step": 4414 }, { "epoch": 1.14, "learning_rate": 7.311739115278766e-05, "loss": 0.0534, "step": 4415 }, { "epoch": 1.14, "learning_rate": 7.310477749468874e-05, "loss": 0.033, "step": 4416 }, { "epoch": 1.14, "learning_rate": 7.309216196665105e-05, "loss": 0.049, "step": 4417 }, { "epoch": 1.14, "learning_rate": 7.307954456969558e-05, "loss": 0.0448, "step": 4418 }, { "epoch": 1.14, "learning_rate": 7.306692530484351e-05, "loss": 0.0436, "step": 4419 }, { "epoch": 1.14, "learning_rate": 7.305430417311616e-05, "loss": 0.0361, "step": 4420 }, { "epoch": 1.14, "learning_rate": 7.304168117553497e-05, "loss": 0.0513, "step": 4421 }, { "epoch": 1.14, "learning_rate": 7.302905631312157e-05, "loss": 0.0428, "step": 4422 }, { "epoch": 1.14, "learning_rate": 7.301642958689775e-05, "loss": 0.038, "step": 4423 }, { "epoch": 1.14, "learning_rate": 7.300380099788539e-05, "loss": 0.0477, "step": 4424 }, { "epoch": 1.14, "learning_rate": 7.29911705471066e-05, "loss": 0.0396, "step": 4425 }, { "epoch": 1.14, "learning_rate": 7.297853823558356e-05, "loss": 0.0517, "step": 4426 }, { "epoch": 1.14, "learning_rate": 7.296590406433865e-05, "loss": 0.0555, "step": 4427 }, { "epoch": 1.14, "learning_rate": 7.295326803439442e-05, "loss": 0.0387, "step": 4428 }, { "epoch": 1.14, "learning_rate": 7.294063014677352e-05, "loss": 0.0436, "step": 4429 }, { "epoch": 1.14, "learning_rate": 7.292799040249878e-05, "loss": 0.0633, "step": 4430 }, { "epoch": 1.14, "learning_rate": 7.291534880259316e-05, "loss": 0.0608, "step": 4431 }, { "epoch": 1.14, "learning_rate": 7.290270534807979e-05, "loss": 0.0326, "step": 4432 }, { "epoch": 1.14, "learning_rate": 7.289006003998194e-05, "loss": 0.0464, "step": 4433 }, { "epoch": 1.14, "learning_rate": 7.287741287932306e-05, "loss": 0.0463, "step": 4434 }, { "epoch": 1.14, "learning_rate": 7.286476386712667e-05, "loss": 0.0412, "step": 4435 }, { "epoch": 1.14, "learning_rate": 7.285211300441652e-05, "loss": 0.0427, "step": 4436 }, { "epoch": 1.15, "learning_rate": 7.283946029221649e-05, "loss": 0.0251, "step": 4437 }, { "epoch": 1.15, "learning_rate": 7.282680573155061e-05, "loss": 0.0552, "step": 4438 }, { "epoch": 1.15, "learning_rate": 7.281414932344301e-05, "loss": 0.0487, "step": 4439 }, { "epoch": 1.15, "learning_rate": 7.280149106891804e-05, "loss": 0.0324, "step": 4440 }, { "epoch": 1.15, "learning_rate": 7.278883096900017e-05, "loss": 0.0363, "step": 4441 }, { "epoch": 1.15, "learning_rate": 7.277616902471403e-05, "loss": 0.0368, "step": 4442 }, { "epoch": 1.15, "learning_rate": 7.276350523708437e-05, "loss": 0.0511, "step": 4443 }, { "epoch": 1.15, "learning_rate": 7.27508396071361e-05, "loss": 0.051, "step": 4444 }, { "epoch": 1.15, "learning_rate": 7.273817213589432e-05, "loss": 0.0578, "step": 4445 }, { "epoch": 1.15, "learning_rate": 7.27255028243842e-05, "loss": 0.0456, "step": 4446 }, { "epoch": 1.15, "learning_rate": 7.271283167363117e-05, "loss": 0.0411, "step": 4447 }, { "epoch": 1.15, "learning_rate": 7.270015868466069e-05, "loss": 0.0409, "step": 4448 }, { "epoch": 1.15, "learning_rate": 7.268748385849842e-05, "loss": 0.0521, "step": 4449 }, { "epoch": 1.15, "learning_rate": 7.267480719617022e-05, "loss": 0.0439, "step": 4450 }, { "epoch": 1.15, "learning_rate": 7.266212869870201e-05, "loss": 0.0472, "step": 4451 }, { "epoch": 1.15, "learning_rate": 7.264944836711992e-05, "loss": 0.0465, "step": 4452 }, { "epoch": 1.15, "learning_rate": 7.263676620245019e-05, "loss": 0.0333, "step": 4453 }, { "epoch": 1.15, "learning_rate": 7.262408220571923e-05, "loss": 0.0394, "step": 4454 }, { "epoch": 1.15, "learning_rate": 7.26113963779536e-05, "loss": 0.0325, "step": 4455 }, { "epoch": 1.15, "learning_rate": 7.259870872018e-05, "loss": 0.0399, "step": 4456 }, { "epoch": 1.15, "learning_rate": 7.258601923342529e-05, "loss": 0.0427, "step": 4457 }, { "epoch": 1.15, "learning_rate": 7.257332791871644e-05, "loss": 0.0505, "step": 4458 }, { "epoch": 1.15, "learning_rate": 7.256063477708061e-05, "loss": 0.0374, "step": 4459 }, { "epoch": 1.15, "learning_rate": 7.254793980954512e-05, "loss": 0.0455, "step": 4460 }, { "epoch": 1.15, "learning_rate": 7.253524301713736e-05, "loss": 0.0341, "step": 4461 }, { "epoch": 1.15, "learning_rate": 7.252254440088498e-05, "loss": 0.0375, "step": 4462 }, { "epoch": 1.15, "learning_rate": 7.250984396181566e-05, "loss": 0.0341, "step": 4463 }, { "epoch": 1.15, "learning_rate": 7.249714170095732e-05, "loss": 0.0321, "step": 4464 }, { "epoch": 1.15, "learning_rate": 7.248443761933799e-05, "loss": 0.0396, "step": 4465 }, { "epoch": 1.15, "learning_rate": 7.247173171798584e-05, "loss": 0.0389, "step": 4466 }, { "epoch": 1.15, "learning_rate": 7.245902399792919e-05, "loss": 0.0486, "step": 4467 }, { "epoch": 1.15, "learning_rate": 7.244631446019652e-05, "loss": 0.0483, "step": 4468 }, { "epoch": 1.15, "learning_rate": 7.243360310581646e-05, "loss": 0.059, "step": 4469 }, { "epoch": 1.15, "learning_rate": 7.242088993581777e-05, "loss": 0.0401, "step": 4470 }, { "epoch": 1.15, "learning_rate": 7.240817495122935e-05, "loss": 0.0329, "step": 4471 }, { "epoch": 1.15, "learning_rate": 7.239545815308028e-05, "loss": 0.033, "step": 4472 }, { "epoch": 1.15, "learning_rate": 7.238273954239978e-05, "loss": 0.0435, "step": 4473 }, { "epoch": 1.15, "learning_rate": 7.237001912021721e-05, "loss": 0.0386, "step": 4474 }, { "epoch": 1.15, "learning_rate": 7.235729688756202e-05, "loss": 0.0353, "step": 4475 }, { "epoch": 1.16, "learning_rate": 7.234457284546389e-05, "loss": 0.0566, "step": 4476 }, { "epoch": 1.16, "learning_rate": 7.233184699495263e-05, "loss": 0.0494, "step": 4477 }, { "epoch": 1.16, "learning_rate": 7.231911933705816e-05, "loss": 0.0359, "step": 4478 }, { "epoch": 1.16, "learning_rate": 7.230638987281059e-05, "loss": 0.0333, "step": 4479 }, { "epoch": 1.16, "learning_rate": 7.229365860324012e-05, "loss": 0.0445, "step": 4480 }, { "epoch": 1.16, "learning_rate": 7.228092552937716e-05, "loss": 0.049, "step": 4481 }, { "epoch": 1.16, "learning_rate": 7.226819065225221e-05, "loss": 0.0557, "step": 4482 }, { "epoch": 1.16, "learning_rate": 7.225545397289597e-05, "loss": 0.0264, "step": 4483 }, { "epoch": 1.16, "learning_rate": 7.224271549233924e-05, "loss": 0.0522, "step": 4484 }, { "epoch": 1.16, "learning_rate": 7.222997521161299e-05, "loss": 0.0378, "step": 4485 }, { "epoch": 1.16, "learning_rate": 7.221723313174831e-05, "loss": 0.0495, "step": 4486 }, { "epoch": 1.16, "learning_rate": 7.220448925377648e-05, "loss": 0.0354, "step": 4487 }, { "epoch": 1.16, "learning_rate": 7.219174357872889e-05, "loss": 0.0499, "step": 4488 }, { "epoch": 1.16, "learning_rate": 7.217899610763709e-05, "loss": 0.0427, "step": 4489 }, { "epoch": 1.16, "learning_rate": 7.216624684153276e-05, "loss": 0.0662, "step": 4490 }, { "epoch": 1.16, "learning_rate": 7.215349578144774e-05, "loss": 0.0451, "step": 4491 }, { "epoch": 1.16, "learning_rate": 7.214074292841403e-05, "loss": 0.0426, "step": 4492 }, { "epoch": 1.16, "learning_rate": 7.212798828346373e-05, "loss": 0.0442, "step": 4493 }, { "epoch": 1.16, "learning_rate": 7.211523184762912e-05, "loss": 0.0557, "step": 4494 }, { "epoch": 1.16, "learning_rate": 7.210247362194262e-05, "loss": 0.0399, "step": 4495 }, { "epoch": 1.16, "learning_rate": 7.20897136074368e-05, "loss": 0.0556, "step": 4496 }, { "epoch": 1.16, "learning_rate": 7.207695180514435e-05, "loss": 0.0391, "step": 4497 }, { "epoch": 1.16, "learning_rate": 7.206418821609812e-05, "loss": 0.0346, "step": 4498 }, { "epoch": 1.16, "learning_rate": 7.205142284133113e-05, "loss": 0.026, "step": 4499 }, { "epoch": 1.16, "learning_rate": 7.203865568187649e-05, "loss": 0.0408, "step": 4500 }, { "epoch": 1.16, "learning_rate": 7.202588673876751e-05, "loss": 0.051, "step": 4501 }, { "epoch": 1.16, "learning_rate": 7.20131160130376e-05, "loss": 0.064, "step": 4502 }, { "epoch": 1.16, "learning_rate": 7.200034350572033e-05, "loss": 0.0462, "step": 4503 }, { "epoch": 1.16, "learning_rate": 7.198756921784944e-05, "loss": 0.0542, "step": 4504 }, { "epoch": 1.16, "learning_rate": 7.197479315045876e-05, "loss": 0.0383, "step": 4505 }, { "epoch": 1.16, "learning_rate": 7.196201530458234e-05, "loss": 0.0448, "step": 4506 }, { "epoch": 1.16, "learning_rate": 7.19492356812543e-05, "loss": 0.0603, "step": 4507 }, { "epoch": 1.16, "learning_rate": 7.193645428150892e-05, "loss": 0.0436, "step": 4508 }, { "epoch": 1.16, "learning_rate": 7.192367110638064e-05, "loss": 0.0373, "step": 4509 }, { "epoch": 1.16, "learning_rate": 7.191088615690409e-05, "loss": 0.0378, "step": 4510 }, { "epoch": 1.16, "learning_rate": 7.189809943411393e-05, "loss": 0.041, "step": 4511 }, { "epoch": 1.16, "learning_rate": 7.188531093904505e-05, "loss": 0.0472, "step": 4512 }, { "epoch": 1.16, "learning_rate": 7.187252067273249e-05, "loss": 0.0572, "step": 4513 }, { "epoch": 1.16, "learning_rate": 7.185972863621136e-05, "loss": 0.0453, "step": 4514 }, { "epoch": 1.17, "learning_rate": 7.1846934830517e-05, "loss": 0.0347, "step": 4515 }, { "epoch": 1.17, "learning_rate": 7.183413925668481e-05, "loss": 0.0348, "step": 4516 }, { "epoch": 1.17, "learning_rate": 7.182134191575041e-05, "loss": 0.0433, "step": 4517 }, { "epoch": 1.17, "learning_rate": 7.18085428087495e-05, "loss": 0.0465, "step": 4518 }, { "epoch": 1.17, "learning_rate": 7.179574193671797e-05, "loss": 0.0451, "step": 4519 }, { "epoch": 1.17, "learning_rate": 7.178293930069182e-05, "loss": 0.0452, "step": 4520 }, { "epoch": 1.17, "learning_rate": 7.177013490170721e-05, "loss": 0.0452, "step": 4521 }, { "epoch": 1.17, "learning_rate": 7.175732874080042e-05, "loss": 0.0369, "step": 4522 }, { "epoch": 1.17, "learning_rate": 7.174452081900793e-05, "loss": 0.0412, "step": 4523 }, { "epoch": 1.17, "learning_rate": 7.17317111373663e-05, "loss": 0.0363, "step": 4524 }, { "epoch": 1.17, "learning_rate": 7.171889969691226e-05, "loss": 0.0504, "step": 4525 }, { "epoch": 1.17, "learning_rate": 7.170608649868267e-05, "loss": 0.0407, "step": 4526 }, { "epoch": 1.17, "learning_rate": 7.169327154371454e-05, "loss": 0.0336, "step": 4527 }, { "epoch": 1.17, "learning_rate": 7.168045483304502e-05, "loss": 0.0431, "step": 4528 }, { "epoch": 1.17, "learning_rate": 7.166763636771146e-05, "loss": 0.0281, "step": 4529 }, { "epoch": 1.17, "learning_rate": 7.16548161487512e-05, "loss": 0.0351, "step": 4530 }, { "epoch": 1.17, "learning_rate": 7.16419941772019e-05, "loss": 0.0446, "step": 4531 }, { "epoch": 1.17, "learning_rate": 7.162917045410123e-05, "loss": 0.0325, "step": 4532 }, { "epoch": 1.17, "learning_rate": 7.161634498048709e-05, "loss": 0.0422, "step": 4533 }, { "epoch": 1.17, "learning_rate": 7.160351775739746e-05, "loss": 0.0431, "step": 4534 }, { "epoch": 1.17, "learning_rate": 7.15906887858705e-05, "loss": 0.0339, "step": 4535 }, { "epoch": 1.17, "learning_rate": 7.157785806694447e-05, "loss": 0.036, "step": 4536 }, { "epoch": 1.17, "learning_rate": 7.156502560165783e-05, "loss": 0.0417, "step": 4537 }, { "epoch": 1.17, "learning_rate": 7.155219139104914e-05, "loss": 0.0344, "step": 4538 }, { "epoch": 1.17, "learning_rate": 7.153935543615709e-05, "loss": 0.0394, "step": 4539 }, { "epoch": 1.17, "learning_rate": 7.152651773802057e-05, "loss": 0.0334, "step": 4540 }, { "epoch": 1.17, "learning_rate": 7.151367829767854e-05, "loss": 0.0484, "step": 4541 }, { "epoch": 1.17, "learning_rate": 7.150083711617017e-05, "loss": 0.0368, "step": 4542 }, { "epoch": 1.17, "learning_rate": 7.14879941945347e-05, "loss": 0.0411, "step": 4543 }, { "epoch": 1.17, "learning_rate": 7.147514953381156e-05, "loss": 0.0401, "step": 4544 }, { "epoch": 1.17, "learning_rate": 7.14623031350403e-05, "loss": 0.0279, "step": 4545 }, { "epoch": 1.17, "learning_rate": 7.144945499926063e-05, "loss": 0.0331, "step": 4546 }, { "epoch": 1.17, "learning_rate": 7.143660512751237e-05, "loss": 0.0594, "step": 4547 }, { "epoch": 1.17, "learning_rate": 7.142375352083553e-05, "loss": 0.0473, "step": 4548 }, { "epoch": 1.17, "learning_rate": 7.14109001802702e-05, "loss": 0.0433, "step": 4549 }, { "epoch": 1.17, "learning_rate": 7.139804510685664e-05, "loss": 0.0432, "step": 4550 }, { "epoch": 1.17, "learning_rate": 7.138518830163528e-05, "loss": 0.0484, "step": 4551 }, { "epoch": 1.17, "learning_rate": 7.137232976564663e-05, "loss": 0.0514, "step": 4552 }, { "epoch": 1.17, "learning_rate": 7.135946949993137e-05, "loss": 0.0465, "step": 4553 }, { "epoch": 1.18, "learning_rate": 7.134660750553033e-05, "loss": 0.0378, "step": 4554 }, { "epoch": 1.18, "learning_rate": 7.133374378348447e-05, "loss": 0.049, "step": 4555 }, { "epoch": 1.18, "learning_rate": 7.132087833483488e-05, "loss": 0.0393, "step": 4556 }, { "epoch": 1.18, "learning_rate": 7.130801116062282e-05, "loss": 0.0355, "step": 4557 }, { "epoch": 1.18, "learning_rate": 7.129514226188963e-05, "loss": 0.0447, "step": 4558 }, { "epoch": 1.18, "learning_rate": 7.128227163967687e-05, "loss": 0.0349, "step": 4559 }, { "epoch": 1.18, "learning_rate": 7.126939929502619e-05, "loss": 0.0309, "step": 4560 }, { "epoch": 1.18, "learning_rate": 7.125652522897936e-05, "loss": 0.0311, "step": 4561 }, { "epoch": 1.18, "learning_rate": 7.124364944257835e-05, "loss": 0.0558, "step": 4562 }, { "epoch": 1.18, "learning_rate": 7.123077193686521e-05, "loss": 0.0485, "step": 4563 }, { "epoch": 1.18, "learning_rate": 7.121789271288218e-05, "loss": 0.0444, "step": 4564 }, { "epoch": 1.18, "learning_rate": 7.12050117716716e-05, "loss": 0.0395, "step": 4565 }, { "epoch": 1.18, "learning_rate": 7.119212911427596e-05, "loss": 0.0457, "step": 4566 }, { "epoch": 1.18, "learning_rate": 7.117924474173788e-05, "loss": 0.0408, "step": 4567 }, { "epoch": 1.18, "learning_rate": 7.116635865510017e-05, "loss": 0.0485, "step": 4568 }, { "epoch": 1.18, "learning_rate": 7.115347085540572e-05, "loss": 0.0436, "step": 4569 }, { "epoch": 1.18, "learning_rate": 7.114058134369755e-05, "loss": 0.0326, "step": 4570 }, { "epoch": 1.18, "learning_rate": 7.112769012101887e-05, "loss": 0.04, "step": 4571 }, { "epoch": 1.18, "learning_rate": 7.111479718841302e-05, "loss": 0.0439, "step": 4572 }, { "epoch": 1.18, "learning_rate": 7.110190254692346e-05, "loss": 0.0596, "step": 4573 }, { "epoch": 1.18, "learning_rate": 7.108900619759377e-05, "loss": 0.0393, "step": 4574 }, { "epoch": 1.18, "learning_rate": 7.107610814146769e-05, "loss": 0.0384, "step": 4575 }, { "epoch": 1.18, "learning_rate": 7.106320837958912e-05, "loss": 0.0163, "step": 4576 }, { "epoch": 1.18, "learning_rate": 7.105030691300207e-05, "loss": 0.0628, "step": 4577 }, { "epoch": 1.18, "learning_rate": 7.10374037427507e-05, "loss": 0.0645, "step": 4578 }, { "epoch": 1.18, "learning_rate": 7.102449886987927e-05, "loss": 0.0482, "step": 4579 }, { "epoch": 1.18, "learning_rate": 7.101159229543225e-05, "loss": 0.0595, "step": 4580 }, { "epoch": 1.18, "learning_rate": 7.099868402045418e-05, "loss": 0.0314, "step": 4581 }, { "epoch": 1.18, "learning_rate": 7.098577404598979e-05, "loss": 0.0369, "step": 4582 }, { "epoch": 1.18, "learning_rate": 7.097286237308391e-05, "loss": 0.047, "step": 4583 }, { "epoch": 1.18, "learning_rate": 7.09599490027815e-05, "loss": 0.0402, "step": 4584 }, { "epoch": 1.18, "learning_rate": 7.094703393612771e-05, "loss": 0.0595, "step": 4585 }, { "epoch": 1.18, "learning_rate": 7.093411717416778e-05, "loss": 0.0516, "step": 4586 }, { "epoch": 1.18, "learning_rate": 7.092119871794711e-05, "loss": 0.0494, "step": 4587 }, { "epoch": 1.18, "learning_rate": 7.090827856851121e-05, "loss": 0.0407, "step": 4588 }, { "epoch": 1.18, "learning_rate": 7.089535672690576e-05, "loss": 0.0282, "step": 4589 }, { "epoch": 1.18, "learning_rate": 7.088243319417658e-05, "loss": 0.0446, "step": 4590 }, { "epoch": 1.18, "learning_rate": 7.086950797136959e-05, "loss": 0.04, "step": 4591 }, { "epoch": 1.19, "learning_rate": 7.085658105953085e-05, "loss": 0.0568, "step": 4592 }, { "epoch": 1.19, "learning_rate": 7.084365245970661e-05, "loss": 0.0522, "step": 4593 }, { "epoch": 1.19, "learning_rate": 7.08307221729432e-05, "loss": 0.0483, "step": 4594 }, { "epoch": 1.19, "learning_rate": 7.081779020028712e-05, "loss": 0.0552, "step": 4595 }, { "epoch": 1.19, "learning_rate": 7.080485654278496e-05, "loss": 0.064, "step": 4596 }, { "epoch": 1.19, "learning_rate": 7.079192120148354e-05, "loss": 0.0307, "step": 4597 }, { "epoch": 1.19, "learning_rate": 7.07789841774297e-05, "loss": 0.0406, "step": 4598 }, { "epoch": 1.19, "learning_rate": 7.076604547167049e-05, "loss": 0.0426, "step": 4599 }, { "epoch": 1.19, "learning_rate": 7.075310508525309e-05, "loss": 0.0532, "step": 4600 }, { "epoch": 1.19, "learning_rate": 7.074016301922476e-05, "loss": 0.0463, "step": 4601 }, { "epoch": 1.19, "learning_rate": 7.072721927463301e-05, "loss": 0.0381, "step": 4602 }, { "epoch": 1.19, "learning_rate": 7.071427385252539e-05, "loss": 0.0336, "step": 4603 }, { "epoch": 1.19, "learning_rate": 7.070132675394958e-05, "loss": 0.0474, "step": 4604 }, { "epoch": 1.19, "learning_rate": 7.068837797995345e-05, "loss": 0.0512, "step": 4605 }, { "epoch": 1.19, "learning_rate": 7.067542753158499e-05, "loss": 0.0509, "step": 4606 }, { "epoch": 1.19, "learning_rate": 7.066247540989229e-05, "loss": 0.0404, "step": 4607 }, { "epoch": 1.19, "learning_rate": 7.064952161592365e-05, "loss": 0.057, "step": 4608 }, { "epoch": 1.19, "learning_rate": 7.063656615072742e-05, "loss": 0.0283, "step": 4609 }, { "epoch": 1.19, "learning_rate": 7.062360901535214e-05, "loss": 0.0664, "step": 4610 }, { "epoch": 1.19, "learning_rate": 7.061065021084647e-05, "loss": 0.0438, "step": 4611 }, { "epoch": 1.19, "learning_rate": 7.059768973825923e-05, "loss": 0.0474, "step": 4612 }, { "epoch": 1.19, "learning_rate": 7.058472759863928e-05, "loss": 0.0365, "step": 4613 }, { "epoch": 1.19, "learning_rate": 7.057176379303575e-05, "loss": 0.0303, "step": 4614 }, { "epoch": 1.19, "learning_rate": 7.055879832249781e-05, "loss": 0.0338, "step": 4615 }, { "epoch": 1.19, "learning_rate": 7.054583118807481e-05, "loss": 0.0392, "step": 4616 }, { "epoch": 1.19, "learning_rate": 7.053286239081621e-05, "loss": 0.0396, "step": 4617 }, { "epoch": 1.19, "learning_rate": 7.051989193177163e-05, "loss": 0.0317, "step": 4618 }, { "epoch": 1.19, "learning_rate": 7.050691981199075e-05, "loss": 0.0437, "step": 4619 }, { "epoch": 1.19, "learning_rate": 7.049394603252352e-05, "loss": 0.0425, "step": 4620 }, { "epoch": 1.19, "learning_rate": 7.04809705944199e-05, "loss": 0.0452, "step": 4621 }, { "epoch": 1.19, "learning_rate": 7.046799349873005e-05, "loss": 0.0299, "step": 4622 }, { "epoch": 1.19, "learning_rate": 7.045501474650423e-05, "loss": 0.04, "step": 4623 }, { "epoch": 1.19, "learning_rate": 7.044203433879286e-05, "loss": 0.041, "step": 4624 }, { "epoch": 1.19, "learning_rate": 7.042905227664648e-05, "loss": 0.031, "step": 4625 }, { "epoch": 1.19, "learning_rate": 7.041606856111578e-05, "loss": 0.0384, "step": 4626 }, { "epoch": 1.19, "learning_rate": 7.040308319325153e-05, "loss": 0.0409, "step": 4627 }, { "epoch": 1.19, "learning_rate": 7.039009617410473e-05, "loss": 0.0441, "step": 4628 }, { "epoch": 1.19, "learning_rate": 7.037710750472643e-05, "loss": 0.0422, "step": 4629 }, { "epoch": 1.19, "learning_rate": 7.036411718616783e-05, "loss": 0.051, "step": 4630 }, { "epoch": 1.2, "learning_rate": 7.035112521948028e-05, "loss": 0.0389, "step": 4631 }, { "epoch": 1.2, "learning_rate": 7.033813160571529e-05, "loss": 0.0341, "step": 4632 }, { "epoch": 1.2, "learning_rate": 7.032513634592443e-05, "loss": 0.0283, "step": 4633 }, { "epoch": 1.2, "learning_rate": 7.031213944115948e-05, "loss": 0.0531, "step": 4634 }, { "epoch": 1.2, "learning_rate": 7.02991408924723e-05, "loss": 0.0533, "step": 4635 }, { "epoch": 1.2, "learning_rate": 7.02861407009149e-05, "loss": 0.036, "step": 4636 }, { "epoch": 1.2, "learning_rate": 7.027313886753944e-05, "loss": 0.0457, "step": 4637 }, { "epoch": 1.2, "learning_rate": 7.026013539339819e-05, "loss": 0.0392, "step": 4638 }, { "epoch": 1.2, "learning_rate": 7.024713027954353e-05, "loss": 0.0432, "step": 4639 }, { "epoch": 1.2, "learning_rate": 7.023412352702805e-05, "loss": 0.0349, "step": 4640 }, { "epoch": 1.2, "learning_rate": 7.022111513690441e-05, "loss": 0.0446, "step": 4641 }, { "epoch": 1.2, "learning_rate": 7.02081051102254e-05, "loss": 0.0347, "step": 4642 }, { "epoch": 1.2, "learning_rate": 7.019509344804398e-05, "loss": 0.0424, "step": 4643 }, { "epoch": 1.2, "learning_rate": 7.01820801514132e-05, "loss": 0.048, "step": 4644 }, { "epoch": 1.2, "learning_rate": 7.016906522138629e-05, "loss": 0.0397, "step": 4645 }, { "epoch": 1.2, "learning_rate": 7.015604865901658e-05, "loss": 0.0452, "step": 4646 }, { "epoch": 1.2, "learning_rate": 7.014303046535755e-05, "loss": 0.0341, "step": 4647 }, { "epoch": 1.2, "learning_rate": 7.013001064146276e-05, "loss": 0.0487, "step": 4648 }, { "epoch": 1.2, "learning_rate": 7.011698918838599e-05, "loss": 0.0449, "step": 4649 }, { "epoch": 1.2, "learning_rate": 7.010396610718109e-05, "loss": 0.0345, "step": 4650 }, { "epoch": 1.2, "learning_rate": 7.009094139890206e-05, "loss": 0.0385, "step": 4651 }, { "epoch": 1.2, "learning_rate": 7.0077915064603e-05, "loss": 0.0468, "step": 4652 }, { "epoch": 1.2, "learning_rate": 7.00648871053382e-05, "loss": 0.0498, "step": 4653 }, { "epoch": 1.2, "learning_rate": 7.005185752216205e-05, "loss": 0.0502, "step": 4654 }, { "epoch": 1.2, "learning_rate": 7.003882631612907e-05, "loss": 0.047, "step": 4655 }, { "epoch": 1.2, "learning_rate": 7.002579348829392e-05, "loss": 0.0344, "step": 4656 }, { "epoch": 1.2, "learning_rate": 7.001275903971137e-05, "loss": 0.0502, "step": 4657 }, { "epoch": 1.2, "learning_rate": 6.999972297143633e-05, "loss": 0.0433, "step": 4658 }, { "epoch": 1.2, "learning_rate": 6.998668528452387e-05, "loss": 0.0489, "step": 4659 }, { "epoch": 1.2, "learning_rate": 6.99736459800292e-05, "loss": 0.0495, "step": 4660 }, { "epoch": 1.2, "learning_rate": 6.996060505900755e-05, "loss": 0.0455, "step": 4661 }, { "epoch": 1.2, "learning_rate": 6.994756252251441e-05, "loss": 0.0406, "step": 4662 }, { "epoch": 1.2, "learning_rate": 6.993451837160534e-05, "loss": 0.0385, "step": 4663 }, { "epoch": 1.2, "learning_rate": 6.992147260733606e-05, "loss": 0.0411, "step": 4664 }, { "epoch": 1.2, "learning_rate": 6.990842523076238e-05, "loss": 0.0428, "step": 4665 }, { "epoch": 1.2, "learning_rate": 6.989537624294027e-05, "loss": 0.0308, "step": 4666 }, { "epoch": 1.2, "learning_rate": 6.988232564492584e-05, "loss": 0.0511, "step": 4667 }, { "epoch": 1.2, "learning_rate": 6.986927343777529e-05, "loss": 0.0583, "step": 4668 }, { "epoch": 1.2, "learning_rate": 6.985621962254498e-05, "loss": 0.0467, "step": 4669 }, { "epoch": 1.21, "learning_rate": 6.984316420029141e-05, "loss": 0.0332, "step": 4670 }, { "epoch": 1.21, "learning_rate": 6.983010717207117e-05, "loss": 0.039, "step": 4671 }, { "epoch": 1.21, "learning_rate": 6.981704853894103e-05, "loss": 0.0469, "step": 4672 }, { "epoch": 1.21, "learning_rate": 6.980398830195785e-05, "loss": 0.0496, "step": 4673 }, { "epoch": 1.21, "learning_rate": 6.979092646217863e-05, "loss": 0.0447, "step": 4674 }, { "epoch": 1.21, "learning_rate": 6.97778630206605e-05, "loss": 0.0451, "step": 4675 }, { "epoch": 1.21, "learning_rate": 6.976479797846075e-05, "loss": 0.0378, "step": 4676 }, { "epoch": 1.21, "learning_rate": 6.975173133663675e-05, "loss": 0.0292, "step": 4677 }, { "epoch": 1.21, "learning_rate": 6.973866309624603e-05, "loss": 0.0629, "step": 4678 }, { "epoch": 1.21, "learning_rate": 6.972559325834623e-05, "loss": 0.0379, "step": 4679 }, { "epoch": 1.21, "learning_rate": 6.971252182399515e-05, "loss": 0.0603, "step": 4680 }, { "epoch": 1.21, "learning_rate": 6.969944879425069e-05, "loss": 0.0345, "step": 4681 }, { "epoch": 1.21, "learning_rate": 6.96863741701709e-05, "loss": 0.0352, "step": 4682 }, { "epoch": 1.21, "learning_rate": 6.967329795281392e-05, "loss": 0.045, "step": 4683 }, { "epoch": 1.21, "learning_rate": 6.966022014323808e-05, "loss": 0.048, "step": 4684 }, { "epoch": 1.21, "learning_rate": 6.964714074250178e-05, "loss": 0.0579, "step": 4685 }, { "epoch": 1.21, "learning_rate": 6.963405975166361e-05, "loss": 0.0464, "step": 4686 }, { "epoch": 1.21, "learning_rate": 6.962097717178221e-05, "loss": 0.0461, "step": 4687 }, { "epoch": 1.21, "learning_rate": 6.960789300391642e-05, "loss": 0.0407, "step": 4688 }, { "epoch": 1.21, "learning_rate": 6.959480724912517e-05, "loss": 0.0341, "step": 4689 }, { "epoch": 1.21, "learning_rate": 6.958171990846755e-05, "loss": 0.0361, "step": 4690 }, { "epoch": 1.21, "learning_rate": 6.956863098300274e-05, "loss": 0.0458, "step": 4691 }, { "epoch": 1.21, "learning_rate": 6.955554047379005e-05, "loss": 0.0472, "step": 4692 }, { "epoch": 1.21, "learning_rate": 6.954244838188897e-05, "loss": 0.0396, "step": 4693 }, { "epoch": 1.21, "learning_rate": 6.952935470835906e-05, "loss": 0.0332, "step": 4694 }, { "epoch": 1.21, "learning_rate": 6.951625945426003e-05, "loss": 0.0364, "step": 4695 }, { "epoch": 1.21, "learning_rate": 6.950316262065171e-05, "loss": 0.0287, "step": 4696 }, { "epoch": 1.21, "learning_rate": 6.94900642085941e-05, "loss": 0.0354, "step": 4697 }, { "epoch": 1.21, "learning_rate": 6.947696421914726e-05, "loss": 0.0444, "step": 4698 }, { "epoch": 1.21, "learning_rate": 6.946386265337143e-05, "loss": 0.0407, "step": 4699 }, { "epoch": 1.21, "learning_rate": 6.945075951232694e-05, "loss": 0.0427, "step": 4700 }, { "epoch": 1.21, "learning_rate": 6.943765479707429e-05, "loss": 0.0355, "step": 4701 }, { "epoch": 1.21, "learning_rate": 6.942454850867406e-05, "loss": 0.0402, "step": 4702 }, { "epoch": 1.21, "learning_rate": 6.941144064818701e-05, "loss": 0.0371, "step": 4703 }, { "epoch": 1.21, "learning_rate": 6.939833121667398e-05, "loss": 0.0455, "step": 4704 }, { "epoch": 1.21, "learning_rate": 6.938522021519595e-05, "loss": 0.0364, "step": 4705 }, { "epoch": 1.21, "learning_rate": 6.937210764481403e-05, "loss": 0.0438, "step": 4706 }, { "epoch": 1.21, "learning_rate": 6.935899350658949e-05, "loss": 0.0448, "step": 4707 }, { "epoch": 1.21, "learning_rate": 6.934587780158367e-05, "loss": 0.0398, "step": 4708 }, { "epoch": 1.22, "learning_rate": 6.933276053085806e-05, "loss": 0.0458, "step": 4709 }, { "epoch": 1.22, "learning_rate": 6.93196416954743e-05, "loss": 0.0408, "step": 4710 }, { "epoch": 1.22, "learning_rate": 6.930652129649411e-05, "loss": 0.0371, "step": 4711 }, { "epoch": 1.22, "learning_rate": 6.92933993349794e-05, "loss": 0.0314, "step": 4712 }, { "epoch": 1.22, "learning_rate": 6.928027581199213e-05, "loss": 0.0448, "step": 4713 }, { "epoch": 1.22, "learning_rate": 6.926715072859445e-05, "loss": 0.0564, "step": 4714 }, { "epoch": 1.22, "learning_rate": 6.925402408584862e-05, "loss": 0.0497, "step": 4715 }, { "epoch": 1.22, "learning_rate": 6.9240895884817e-05, "loss": 0.0392, "step": 4716 }, { "epoch": 1.22, "learning_rate": 6.922776612656209e-05, "loss": 0.0377, "step": 4717 }, { "epoch": 1.22, "learning_rate": 6.921463481214653e-05, "loss": 0.0428, "step": 4718 }, { "epoch": 1.22, "learning_rate": 6.920150194263309e-05, "loss": 0.0524, "step": 4719 }, { "epoch": 1.22, "learning_rate": 6.918836751908461e-05, "loss": 0.036, "step": 4720 }, { "epoch": 1.22, "learning_rate": 6.917523154256416e-05, "loss": 0.0424, "step": 4721 }, { "epoch": 1.22, "learning_rate": 6.916209401413484e-05, "loss": 0.0335, "step": 4722 }, { "epoch": 1.22, "learning_rate": 6.91489549348599e-05, "loss": 0.0366, "step": 4723 }, { "epoch": 1.22, "learning_rate": 6.913581430580274e-05, "loss": 0.0293, "step": 4724 }, { "epoch": 1.22, "learning_rate": 6.912267212802687e-05, "loss": 0.0568, "step": 4725 }, { "epoch": 1.22, "learning_rate": 6.910952840259591e-05, "loss": 0.044, "step": 4726 }, { "epoch": 1.22, "learning_rate": 6.909638313057364e-05, "loss": 0.0314, "step": 4727 }, { "epoch": 1.22, "learning_rate": 6.908323631302393e-05, "loss": 0.0391, "step": 4728 }, { "epoch": 1.22, "learning_rate": 6.907008795101081e-05, "loss": 0.0301, "step": 4729 }, { "epoch": 1.22, "learning_rate": 6.905693804559842e-05, "loss": 0.0264, "step": 4730 }, { "epoch": 1.22, "learning_rate": 6.904378659785099e-05, "loss": 0.0535, "step": 4731 }, { "epoch": 1.22, "learning_rate": 6.903063360883293e-05, "loss": 0.0359, "step": 4732 }, { "epoch": 1.22, "learning_rate": 6.901747907960874e-05, "loss": 0.043, "step": 4733 }, { "epoch": 1.22, "learning_rate": 6.900432301124308e-05, "loss": 0.0359, "step": 4734 }, { "epoch": 1.22, "learning_rate": 6.899116540480067e-05, "loss": 0.0415, "step": 4735 }, { "epoch": 1.22, "learning_rate": 6.897800626134641e-05, "loss": 0.047, "step": 4736 }, { "epoch": 1.22, "learning_rate": 6.896484558194534e-05, "loss": 0.0475, "step": 4737 }, { "epoch": 1.22, "learning_rate": 6.895168336766256e-05, "loss": 0.0306, "step": 4738 }, { "epoch": 1.22, "learning_rate": 6.893851961956332e-05, "loss": 0.0376, "step": 4739 }, { "epoch": 1.22, "learning_rate": 6.892535433871302e-05, "loss": 0.0453, "step": 4740 }, { "epoch": 1.22, "learning_rate": 6.891218752617716e-05, "loss": 0.0454, "step": 4741 }, { "epoch": 1.22, "learning_rate": 6.889901918302136e-05, "loss": 0.0369, "step": 4742 }, { "epoch": 1.22, "learning_rate": 6.888584931031141e-05, "loss": 0.0383, "step": 4743 }, { "epoch": 1.22, "learning_rate": 6.887267790911315e-05, "loss": 0.046, "step": 4744 }, { "epoch": 1.22, "learning_rate": 6.885950498049259e-05, "loss": 0.0401, "step": 4745 }, { "epoch": 1.22, "learning_rate": 6.884633052551585e-05, "loss": 0.0441, "step": 4746 }, { "epoch": 1.23, "learning_rate": 6.883315454524921e-05, "loss": 0.0278, "step": 4747 }, { "epoch": 1.23, "learning_rate": 6.8819977040759e-05, "loss": 0.0367, "step": 4748 }, { "epoch": 1.23, "learning_rate": 6.880679801311173e-05, "loss": 0.0423, "step": 4749 }, { "epoch": 1.23, "learning_rate": 6.879361746337401e-05, "loss": 0.0417, "step": 4750 }, { "epoch": 1.23, "learning_rate": 6.878043539261262e-05, "loss": 0.0434, "step": 4751 }, { "epoch": 1.23, "learning_rate": 6.876725180189439e-05, "loss": 0.0395, "step": 4752 }, { "epoch": 1.23, "learning_rate": 6.87540666922863e-05, "loss": 0.0371, "step": 4753 }, { "epoch": 1.23, "learning_rate": 6.874088006485546e-05, "loss": 0.0402, "step": 4754 }, { "epoch": 1.23, "learning_rate": 6.872769192066912e-05, "loss": 0.0357, "step": 4755 }, { "epoch": 1.23, "learning_rate": 6.871450226079466e-05, "loss": 0.0371, "step": 4756 }, { "epoch": 1.23, "learning_rate": 6.870131108629951e-05, "loss": 0.0426, "step": 4757 }, { "epoch": 1.23, "learning_rate": 6.86881183982513e-05, "loss": 0.0427, "step": 4758 }, { "epoch": 1.23, "learning_rate": 6.867492419771773e-05, "loss": 0.0371, "step": 4759 }, { "epoch": 1.23, "learning_rate": 6.866172848576669e-05, "loss": 0.0329, "step": 4760 }, { "epoch": 1.23, "learning_rate": 6.86485312634661e-05, "loss": 0.0411, "step": 4761 }, { "epoch": 1.23, "learning_rate": 6.863533253188408e-05, "loss": 0.0486, "step": 4762 }, { "epoch": 1.23, "learning_rate": 6.862213229208883e-05, "loss": 0.0342, "step": 4763 }, { "epoch": 1.23, "learning_rate": 6.86089305451487e-05, "loss": 0.0416, "step": 4764 }, { "epoch": 1.23, "learning_rate": 6.859572729213212e-05, "loss": 0.0502, "step": 4765 }, { "epoch": 1.23, "learning_rate": 6.858252253410769e-05, "loss": 0.0485, "step": 4766 }, { "epoch": 1.23, "learning_rate": 6.856931627214411e-05, "loss": 0.0417, "step": 4767 }, { "epoch": 1.23, "learning_rate": 6.855610850731018e-05, "loss": 0.0448, "step": 4768 }, { "epoch": 1.23, "learning_rate": 6.854289924067488e-05, "loss": 0.045, "step": 4769 }, { "epoch": 1.23, "learning_rate": 6.852968847330725e-05, "loss": 0.0375, "step": 4770 }, { "epoch": 1.23, "learning_rate": 6.851647620627648e-05, "loss": 0.0519, "step": 4771 }, { "epoch": 1.23, "learning_rate": 6.85032624406519e-05, "loss": 0.0406, "step": 4772 }, { "epoch": 1.23, "learning_rate": 6.849004717750292e-05, "loss": 0.0593, "step": 4773 }, { "epoch": 1.23, "learning_rate": 6.847683041789908e-05, "loss": 0.0362, "step": 4774 }, { "epoch": 1.23, "learning_rate": 6.846361216291006e-05, "loss": 0.0464, "step": 4775 }, { "epoch": 1.23, "learning_rate": 6.845039241360566e-05, "loss": 0.0498, "step": 4776 }, { "epoch": 1.23, "learning_rate": 6.843717117105579e-05, "loss": 0.0236, "step": 4777 }, { "epoch": 1.23, "learning_rate": 6.84239484363305e-05, "loss": 0.0504, "step": 4778 }, { "epoch": 1.23, "learning_rate": 6.841072421049992e-05, "loss": 0.0282, "step": 4779 }, { "epoch": 1.23, "learning_rate": 6.839749849463434e-05, "loss": 0.0392, "step": 4780 }, { "epoch": 1.23, "learning_rate": 6.838427128980415e-05, "loss": 0.026, "step": 4781 }, { "epoch": 1.23, "learning_rate": 6.837104259707988e-05, "loss": 0.0536, "step": 4782 }, { "epoch": 1.23, "learning_rate": 6.835781241753216e-05, "loss": 0.0314, "step": 4783 }, { "epoch": 1.23, "learning_rate": 6.834458075223174e-05, "loss": 0.0484, "step": 4784 }, { "epoch": 1.23, "learning_rate": 6.833134760224951e-05, "loss": 0.0523, "step": 4785 }, { "epoch": 1.24, "learning_rate": 6.831811296865646e-05, "loss": 0.0354, "step": 4786 }, { "epoch": 1.24, "learning_rate": 6.830487685252371e-05, "loss": 0.0399, "step": 4787 }, { "epoch": 1.24, "learning_rate": 6.829163925492252e-05, "loss": 0.0469, "step": 4788 }, { "epoch": 1.24, "learning_rate": 6.82784001769242e-05, "loss": 0.0428, "step": 4789 }, { "epoch": 1.24, "learning_rate": 6.826515961960029e-05, "loss": 0.0348, "step": 4790 }, { "epoch": 1.24, "learning_rate": 6.825191758402234e-05, "loss": 0.0486, "step": 4791 }, { "epoch": 1.24, "learning_rate": 6.823867407126207e-05, "loss": 0.0273, "step": 4792 }, { "epoch": 1.24, "learning_rate": 6.822542908239134e-05, "loss": 0.0506, "step": 4793 }, { "epoch": 1.24, "learning_rate": 6.821218261848211e-05, "loss": 0.0354, "step": 4794 }, { "epoch": 1.24, "learning_rate": 6.819893468060643e-05, "loss": 0.0706, "step": 4795 }, { "epoch": 1.24, "learning_rate": 6.81856852698365e-05, "loss": 0.0465, "step": 4796 }, { "epoch": 1.24, "learning_rate": 6.817243438724466e-05, "loss": 0.0531, "step": 4797 }, { "epoch": 1.24, "learning_rate": 6.815918203390333e-05, "loss": 0.0453, "step": 4798 }, { "epoch": 1.24, "learning_rate": 6.814592821088504e-05, "loss": 0.042, "step": 4799 }, { "epoch": 1.24, "learning_rate": 6.813267291926248e-05, "loss": 0.0501, "step": 4800 }, { "epoch": 1.24, "learning_rate": 6.811941616010847e-05, "loss": 0.0599, "step": 4801 }, { "epoch": 1.24, "learning_rate": 6.810615793449585e-05, "loss": 0.0274, "step": 4802 }, { "epoch": 1.24, "learning_rate": 6.809289824349771e-05, "loss": 0.0429, "step": 4803 }, { "epoch": 1.24, "learning_rate": 6.807963708818717e-05, "loss": 0.0413, "step": 4804 }, { "epoch": 1.24, "learning_rate": 6.80663744696375e-05, "loss": 0.0511, "step": 4805 }, { "epoch": 1.24, "learning_rate": 6.805311038892208e-05, "loss": 0.0283, "step": 4806 }, { "epoch": 1.24, "learning_rate": 6.803984484711442e-05, "loss": 0.0602, "step": 4807 }, { "epoch": 1.24, "learning_rate": 6.802657784528814e-05, "loss": 0.0346, "step": 4808 }, { "epoch": 1.24, "learning_rate": 6.801330938451696e-05, "loss": 0.0413, "step": 4809 }, { "epoch": 1.24, "learning_rate": 6.800003946587475e-05, "loss": 0.0401, "step": 4810 }, { "epoch": 1.24, "learning_rate": 6.798676809043549e-05, "loss": 0.049, "step": 4811 }, { "epoch": 1.24, "learning_rate": 6.797349525927327e-05, "loss": 0.0369, "step": 4812 }, { "epoch": 1.24, "learning_rate": 6.796022097346227e-05, "loss": 0.0567, "step": 4813 }, { "epoch": 1.24, "learning_rate": 6.794694523407686e-05, "loss": 0.059, "step": 4814 }, { "epoch": 1.24, "learning_rate": 6.793366804219147e-05, "loss": 0.0431, "step": 4815 }, { "epoch": 1.24, "learning_rate": 6.792038939888065e-05, "loss": 0.0442, "step": 4816 }, { "epoch": 1.24, "learning_rate": 6.79071093052191e-05, "loss": 0.0465, "step": 4817 }, { "epoch": 1.24, "learning_rate": 6.789382776228162e-05, "loss": 0.026, "step": 4818 }, { "epoch": 1.24, "learning_rate": 6.78805447711431e-05, "loss": 0.0392, "step": 4819 }, { "epoch": 1.24, "learning_rate": 6.786726033287858e-05, "loss": 0.0374, "step": 4820 }, { "epoch": 1.24, "learning_rate": 6.785397444856322e-05, "loss": 0.04, "step": 4821 }, { "epoch": 1.24, "learning_rate": 6.784068711927229e-05, "loss": 0.0477, "step": 4822 }, { "epoch": 1.24, "learning_rate": 6.782739834608116e-05, "loss": 0.0413, "step": 4823 }, { "epoch": 1.24, "learning_rate": 6.781410813006534e-05, "loss": 0.0372, "step": 4824 }, { "epoch": 1.25, "learning_rate": 6.780081647230045e-05, "loss": 0.0489, "step": 4825 }, { "epoch": 1.25, "learning_rate": 6.77875233738622e-05, "loss": 0.051, "step": 4826 }, { "epoch": 1.25, "learning_rate": 6.777422883582645e-05, "loss": 0.0431, "step": 4827 }, { "epoch": 1.25, "learning_rate": 6.776093285926919e-05, "loss": 0.0474, "step": 4828 }, { "epoch": 1.25, "learning_rate": 6.774763544526647e-05, "loss": 0.0469, "step": 4829 }, { "epoch": 1.25, "learning_rate": 6.773433659489452e-05, "loss": 0.0352, "step": 4830 }, { "epoch": 1.25, "learning_rate": 6.772103630922964e-05, "loss": 0.0384, "step": 4831 }, { "epoch": 1.25, "learning_rate": 6.770773458934826e-05, "loss": 0.0642, "step": 4832 }, { "epoch": 1.25, "learning_rate": 6.769443143632692e-05, "loss": 0.0366, "step": 4833 }, { "epoch": 1.25, "learning_rate": 6.768112685124231e-05, "loss": 0.049, "step": 4834 }, { "epoch": 1.25, "learning_rate": 6.76678208351712e-05, "loss": 0.0469, "step": 4835 }, { "epoch": 1.25, "learning_rate": 6.765451338919047e-05, "loss": 0.0424, "step": 4836 }, { "epoch": 1.25, "learning_rate": 6.764120451437714e-05, "loss": 0.0274, "step": 4837 }, { "epoch": 1.25, "learning_rate": 6.762789421180835e-05, "loss": 0.0349, "step": 4838 }, { "epoch": 1.25, "learning_rate": 6.761458248256132e-05, "loss": 0.0341, "step": 4839 }, { "epoch": 1.25, "learning_rate": 6.760126932771343e-05, "loss": 0.0535, "step": 4840 }, { "epoch": 1.25, "learning_rate": 6.758795474834214e-05, "loss": 0.0422, "step": 4841 }, { "epoch": 1.25, "learning_rate": 6.757463874552505e-05, "loss": 0.063, "step": 4842 }, { "epoch": 1.25, "learning_rate": 6.756132132033984e-05, "loss": 0.0314, "step": 4843 }, { "epoch": 1.25, "learning_rate": 6.754800247386436e-05, "loss": 0.0494, "step": 4844 }, { "epoch": 1.25, "learning_rate": 6.753468220717651e-05, "loss": 0.0424, "step": 4845 }, { "epoch": 1.25, "learning_rate": 6.752136052135438e-05, "loss": 0.037, "step": 4846 }, { "epoch": 1.25, "learning_rate": 6.75080374174761e-05, "loss": 0.0454, "step": 4847 }, { "epoch": 1.25, "learning_rate": 6.749471289661997e-05, "loss": 0.0324, "step": 4848 }, { "epoch": 1.25, "learning_rate": 6.748138695986437e-05, "loss": 0.033, "step": 4849 }, { "epoch": 1.25, "learning_rate": 6.746805960828781e-05, "loss": 0.0383, "step": 4850 }, { "epoch": 1.25, "learning_rate": 6.745473084296892e-05, "loss": 0.0533, "step": 4851 }, { "epoch": 1.25, "learning_rate": 6.744140066498643e-05, "loss": 0.0346, "step": 4852 }, { "epoch": 1.25, "learning_rate": 6.742806907541919e-05, "loss": 0.0467, "step": 4853 }, { "epoch": 1.25, "learning_rate": 6.741473607534616e-05, "loss": 0.0443, "step": 4854 }, { "epoch": 1.25, "learning_rate": 6.740140166584644e-05, "loss": 0.0482, "step": 4855 }, { "epoch": 1.25, "learning_rate": 6.738806584799921e-05, "loss": 0.0356, "step": 4856 }, { "epoch": 1.25, "learning_rate": 6.737472862288377e-05, "loss": 0.0376, "step": 4857 }, { "epoch": 1.25, "learning_rate": 6.736138999157957e-05, "loss": 0.05, "step": 4858 }, { "epoch": 1.25, "learning_rate": 6.734804995516611e-05, "loss": 0.0371, "step": 4859 }, { "epoch": 1.25, "learning_rate": 6.733470851472306e-05, "loss": 0.0386, "step": 4860 }, { "epoch": 1.25, "learning_rate": 6.732136567133019e-05, "loss": 0.0397, "step": 4861 }, { "epoch": 1.25, "learning_rate": 6.730802142606734e-05, "loss": 0.0376, "step": 4862 }, { "epoch": 1.25, "learning_rate": 6.729467578001453e-05, "loss": 0.0444, "step": 4863 }, { "epoch": 1.26, "learning_rate": 6.728132873425186e-05, "loss": 0.0435, "step": 4864 }, { "epoch": 1.26, "learning_rate": 6.726798028985955e-05, "loss": 0.0449, "step": 4865 }, { "epoch": 1.26, "learning_rate": 6.725463044791791e-05, "loss": 0.0346, "step": 4866 }, { "epoch": 1.26, "learning_rate": 6.72412792095074e-05, "loss": 0.0467, "step": 4867 }, { "epoch": 1.26, "learning_rate": 6.722792657570857e-05, "loss": 0.033, "step": 4868 }, { "epoch": 1.26, "learning_rate": 6.72145725476021e-05, "loss": 0.0393, "step": 4869 }, { "epoch": 1.26, "learning_rate": 6.720121712626877e-05, "loss": 0.049, "step": 4870 }, { "epoch": 1.26, "learning_rate": 6.718786031278944e-05, "loss": 0.0535, "step": 4871 }, { "epoch": 1.26, "learning_rate": 6.717450210824514e-05, "loss": 0.0407, "step": 4872 }, { "epoch": 1.26, "learning_rate": 6.716114251371702e-05, "loss": 0.0363, "step": 4873 }, { "epoch": 1.26, "learning_rate": 6.714778153028627e-05, "loss": 0.0506, "step": 4874 }, { "epoch": 1.26, "learning_rate": 6.713441915903423e-05, "loss": 0.0542, "step": 4875 }, { "epoch": 1.26, "learning_rate": 6.712105540104239e-05, "loss": 0.0406, "step": 4876 }, { "epoch": 1.26, "learning_rate": 6.710769025739231e-05, "loss": 0.034, "step": 4877 }, { "epoch": 1.26, "learning_rate": 6.709432372916566e-05, "loss": 0.041, "step": 4878 }, { "epoch": 1.26, "learning_rate": 6.708095581744424e-05, "loss": 0.0707, "step": 4879 }, { "epoch": 1.26, "learning_rate": 6.706758652330994e-05, "loss": 0.0428, "step": 4880 }, { "epoch": 1.26, "learning_rate": 6.705421584784479e-05, "loss": 0.0608, "step": 4881 }, { "epoch": 1.26, "learning_rate": 6.704084379213094e-05, "loss": 0.0451, "step": 4882 }, { "epoch": 1.26, "learning_rate": 6.70274703572506e-05, "loss": 0.0576, "step": 4883 }, { "epoch": 1.26, "learning_rate": 6.701409554428613e-05, "loss": 0.0418, "step": 4884 }, { "epoch": 1.26, "learning_rate": 6.700071935431998e-05, "loss": 0.0425, "step": 4885 }, { "epoch": 1.26, "learning_rate": 6.698734178843477e-05, "loss": 0.0451, "step": 4886 }, { "epoch": 1.26, "learning_rate": 6.697396284771314e-05, "loss": 0.053, "step": 4887 }, { "epoch": 1.26, "learning_rate": 6.69605825332379e-05, "loss": 0.0414, "step": 4888 }, { "epoch": 1.26, "learning_rate": 6.694720084609195e-05, "loss": 0.0631, "step": 4889 }, { "epoch": 1.26, "learning_rate": 6.693381778735834e-05, "loss": 0.0422, "step": 4890 }, { "epoch": 1.26, "learning_rate": 6.69204333581202e-05, "loss": 0.0487, "step": 4891 }, { "epoch": 1.26, "learning_rate": 6.690704755946074e-05, "loss": 0.0439, "step": 4892 }, { "epoch": 1.26, "learning_rate": 6.689366039246331e-05, "loss": 0.0407, "step": 4893 }, { "epoch": 1.26, "learning_rate": 6.688027185821141e-05, "loss": 0.037, "step": 4894 }, { "epoch": 1.26, "learning_rate": 6.686688195778861e-05, "loss": 0.0331, "step": 4895 }, { "epoch": 1.26, "learning_rate": 6.685349069227855e-05, "loss": 0.0373, "step": 4896 }, { "epoch": 1.26, "learning_rate": 6.684009806276508e-05, "loss": 0.0324, "step": 4897 }, { "epoch": 1.26, "learning_rate": 6.682670407033207e-05, "loss": 0.0537, "step": 4898 }, { "epoch": 1.26, "learning_rate": 6.681330871606356e-05, "loss": 0.0329, "step": 4899 }, { "epoch": 1.26, "learning_rate": 6.679991200104365e-05, "loss": 0.0391, "step": 4900 }, { "epoch": 1.26, "learning_rate": 6.67865139263566e-05, "loss": 0.0271, "step": 4901 }, { "epoch": 1.27, "learning_rate": 6.677311449308674e-05, "loss": 0.0437, "step": 4902 }, { "epoch": 1.27, "learning_rate": 6.675971370231853e-05, "loss": 0.0318, "step": 4903 }, { "epoch": 1.27, "learning_rate": 6.674631155513655e-05, "loss": 0.0717, "step": 4904 }, { "epoch": 1.27, "learning_rate": 6.673290805262545e-05, "loss": 0.0577, "step": 4905 }, { "epoch": 1.27, "learning_rate": 6.671950319587003e-05, "loss": 0.0513, "step": 4906 }, { "epoch": 1.27, "learning_rate": 6.67060969859552e-05, "loss": 0.0415, "step": 4907 }, { "epoch": 1.27, "learning_rate": 6.669268942396595e-05, "loss": 0.0441, "step": 4908 }, { "epoch": 1.27, "learning_rate": 6.667928051098738e-05, "loss": 0.0411, "step": 4909 }, { "epoch": 1.27, "learning_rate": 6.666587024810474e-05, "loss": 0.0468, "step": 4910 }, { "epoch": 1.27, "learning_rate": 6.665245863640334e-05, "loss": 0.0444, "step": 4911 }, { "epoch": 1.27, "learning_rate": 6.663904567696863e-05, "loss": 0.0331, "step": 4912 }, { "epoch": 1.27, "learning_rate": 6.662563137088618e-05, "loss": 0.0361, "step": 4913 }, { "epoch": 1.27, "learning_rate": 6.661221571924162e-05, "loss": 0.0471, "step": 4914 }, { "epoch": 1.27, "learning_rate": 6.659879872312073e-05, "loss": 0.0478, "step": 4915 }, { "epoch": 1.27, "learning_rate": 6.658538038360939e-05, "loss": 0.0379, "step": 4916 }, { "epoch": 1.27, "learning_rate": 6.657196070179359e-05, "loss": 0.0371, "step": 4917 }, { "epoch": 1.27, "learning_rate": 6.655853967875943e-05, "loss": 0.0536, "step": 4918 }, { "epoch": 1.27, "learning_rate": 6.654511731559308e-05, "loss": 0.0266, "step": 4919 }, { "epoch": 1.27, "learning_rate": 6.653169361338089e-05, "loss": 0.0595, "step": 4920 }, { "epoch": 1.27, "learning_rate": 6.651826857320926e-05, "loss": 0.0404, "step": 4921 }, { "epoch": 1.27, "learning_rate": 6.650484219616474e-05, "loss": 0.0484, "step": 4922 }, { "epoch": 1.27, "learning_rate": 6.649141448333395e-05, "loss": 0.0352, "step": 4923 }, { "epoch": 1.27, "learning_rate": 6.647798543580363e-05, "loss": 0.0408, "step": 4924 }, { "epoch": 1.27, "learning_rate": 6.646455505466063e-05, "loss": 0.0447, "step": 4925 }, { "epoch": 1.27, "learning_rate": 6.645112334099194e-05, "loss": 0.0254, "step": 4926 }, { "epoch": 1.27, "learning_rate": 6.643769029588461e-05, "loss": 0.0326, "step": 4927 }, { "epoch": 1.27, "learning_rate": 6.64242559204258e-05, "loss": 0.0442, "step": 4928 }, { "epoch": 1.27, "learning_rate": 6.641082021570282e-05, "loss": 0.0301, "step": 4929 }, { "epoch": 1.27, "learning_rate": 6.639738318280304e-05, "loss": 0.0496, "step": 4930 }, { "epoch": 1.27, "learning_rate": 6.6383944822814e-05, "loss": 0.0477, "step": 4931 }, { "epoch": 1.27, "learning_rate": 6.637050513682326e-05, "loss": 0.0318, "step": 4932 }, { "epoch": 1.27, "learning_rate": 6.635706412591854e-05, "loss": 0.0491, "step": 4933 }, { "epoch": 1.27, "learning_rate": 6.63436217911877e-05, "loss": 0.0345, "step": 4934 }, { "epoch": 1.27, "learning_rate": 6.633017813371863e-05, "loss": 0.0337, "step": 4935 }, { "epoch": 1.27, "learning_rate": 6.631673315459937e-05, "loss": 0.0416, "step": 4936 }, { "epoch": 1.27, "learning_rate": 6.630328685491808e-05, "loss": 0.0318, "step": 4937 }, { "epoch": 1.27, "learning_rate": 6.6289839235763e-05, "loss": 0.0367, "step": 4938 }, { "epoch": 1.27, "learning_rate": 6.627639029822248e-05, "loss": 0.0364, "step": 4939 }, { "epoch": 1.27, "learning_rate": 6.6262940043385e-05, "loss": 0.0441, "step": 4940 }, { "epoch": 1.28, "learning_rate": 6.62494884723391e-05, "loss": 0.0354, "step": 4941 }, { "epoch": 1.28, "learning_rate": 6.623603558617347e-05, "loss": 0.0431, "step": 4942 }, { "epoch": 1.28, "learning_rate": 6.622258138597691e-05, "loss": 0.0734, "step": 4943 }, { "epoch": 1.28, "learning_rate": 6.620912587283828e-05, "loss": 0.0343, "step": 4944 }, { "epoch": 1.28, "learning_rate": 6.619566904784662e-05, "loss": 0.039, "step": 4945 }, { "epoch": 1.28, "learning_rate": 6.618221091209097e-05, "loss": 0.0494, "step": 4946 }, { "epoch": 1.28, "learning_rate": 6.616875146666057e-05, "loss": 0.0432, "step": 4947 }, { "epoch": 1.28, "learning_rate": 6.615529071264475e-05, "loss": 0.0312, "step": 4948 }, { "epoch": 1.28, "learning_rate": 6.61418286511329e-05, "loss": 0.0513, "step": 4949 }, { "epoch": 1.28, "learning_rate": 6.612836528321454e-05, "loss": 0.0411, "step": 4950 }, { "epoch": 1.28, "learning_rate": 6.611490060997933e-05, "loss": 0.0433, "step": 4951 }, { "epoch": 1.28, "learning_rate": 6.610143463251698e-05, "loss": 0.0315, "step": 4952 }, { "epoch": 1.28, "learning_rate": 6.608796735191735e-05, "loss": 0.0498, "step": 4953 }, { "epoch": 1.28, "learning_rate": 6.60744987692704e-05, "loss": 0.0354, "step": 4954 }, { "epoch": 1.28, "learning_rate": 6.606102888566615e-05, "loss": 0.037, "step": 4955 }, { "epoch": 1.28, "learning_rate": 6.604755770219476e-05, "loss": 0.0498, "step": 4956 }, { "epoch": 1.28, "learning_rate": 6.603408521994651e-05, "loss": 0.0413, "step": 4957 }, { "epoch": 1.28, "learning_rate": 6.602061144001178e-05, "loss": 0.0498, "step": 4958 }, { "epoch": 1.28, "learning_rate": 6.6007136363481e-05, "loss": 0.0483, "step": 4959 }, { "epoch": 1.28, "learning_rate": 6.599365999144479e-05, "loss": 0.032, "step": 4960 }, { "epoch": 1.28, "learning_rate": 6.598018232499381e-05, "loss": 0.0462, "step": 4961 }, { "epoch": 1.28, "learning_rate": 6.596670336521886e-05, "loss": 0.0503, "step": 4962 }, { "epoch": 1.28, "learning_rate": 6.595322311321083e-05, "loss": 0.0398, "step": 4963 }, { "epoch": 1.28, "learning_rate": 6.593974157006071e-05, "loss": 0.0393, "step": 4964 }, { "epoch": 1.28, "learning_rate": 6.592625873685962e-05, "loss": 0.0351, "step": 4965 }, { "epoch": 1.28, "learning_rate": 6.591277461469873e-05, "loss": 0.0368, "step": 4966 }, { "epoch": 1.28, "learning_rate": 6.58992892046694e-05, "loss": 0.0366, "step": 4967 }, { "epoch": 1.28, "learning_rate": 6.5885802507863e-05, "loss": 0.043, "step": 4968 }, { "epoch": 1.28, "learning_rate": 6.587231452537108e-05, "loss": 0.0377, "step": 4969 }, { "epoch": 1.28, "learning_rate": 6.585882525828523e-05, "loss": 0.0427, "step": 4970 }, { "epoch": 1.28, "learning_rate": 6.58453347076972e-05, "loss": 0.0397, "step": 4971 }, { "epoch": 1.28, "learning_rate": 6.583184287469883e-05, "loss": 0.0433, "step": 4972 }, { "epoch": 1.28, "learning_rate": 6.581834976038203e-05, "loss": 0.0415, "step": 4973 }, { "epoch": 1.28, "learning_rate": 6.580485536583886e-05, "loss": 0.0446, "step": 4974 }, { "epoch": 1.28, "learning_rate": 6.579135969216144e-05, "loss": 0.0417, "step": 4975 }, { "epoch": 1.28, "learning_rate": 6.577786274044204e-05, "loss": 0.0389, "step": 4976 }, { "epoch": 1.28, "learning_rate": 6.576436451177298e-05, "loss": 0.0336, "step": 4977 }, { "epoch": 1.28, "learning_rate": 6.575086500724674e-05, "loss": 0.0368, "step": 4978 }, { "epoch": 1.28, "learning_rate": 6.573736422795584e-05, "loss": 0.0535, "step": 4979 }, { "epoch": 1.29, "learning_rate": 6.572386217499297e-05, "loss": 0.0535, "step": 4980 }, { "epoch": 1.29, "learning_rate": 6.57103588494509e-05, "loss": 0.0567, "step": 4981 }, { "epoch": 1.29, "learning_rate": 6.569685425242246e-05, "loss": 0.0514, "step": 4982 }, { "epoch": 1.29, "learning_rate": 6.568334838500065e-05, "loss": 0.0507, "step": 4983 }, { "epoch": 1.29, "learning_rate": 6.56698412482785e-05, "loss": 0.0411, "step": 4984 }, { "epoch": 1.29, "learning_rate": 6.565633284334922e-05, "loss": 0.0365, "step": 4985 }, { "epoch": 1.29, "learning_rate": 6.56428231713061e-05, "loss": 0.0451, "step": 4986 }, { "epoch": 1.29, "learning_rate": 6.562931223324244e-05, "loss": 0.0436, "step": 4987 }, { "epoch": 1.29, "learning_rate": 6.56158000302518e-05, "loss": 0.0337, "step": 4988 }, { "epoch": 1.29, "learning_rate": 6.560228656342774e-05, "loss": 0.047, "step": 4989 }, { "epoch": 1.29, "learning_rate": 6.558877183386393e-05, "loss": 0.0422, "step": 4990 }, { "epoch": 1.29, "learning_rate": 6.557525584265419e-05, "loss": 0.0346, "step": 4991 }, { "epoch": 1.29, "learning_rate": 6.556173859089236e-05, "loss": 0.0361, "step": 4992 }, { "epoch": 1.29, "learning_rate": 6.554822007967247e-05, "loss": 0.0345, "step": 4993 }, { "epoch": 1.29, "learning_rate": 6.553470031008859e-05, "loss": 0.0351, "step": 4994 }, { "epoch": 1.29, "learning_rate": 6.552117928323494e-05, "loss": 0.0334, "step": 4995 }, { "epoch": 1.29, "learning_rate": 6.550765700020581e-05, "loss": 0.0303, "step": 4996 }, { "epoch": 1.29, "learning_rate": 6.549413346209559e-05, "loss": 0.05, "step": 4997 }, { "epoch": 1.29, "learning_rate": 6.548060866999878e-05, "loss": 0.0395, "step": 4998 }, { "epoch": 1.29, "learning_rate": 6.546708262501e-05, "loss": 0.0417, "step": 4999 }, { "epoch": 1.29, "learning_rate": 6.545355532822393e-05, "loss": 0.0401, "step": 5000 }, { "epoch": 1.29, "learning_rate": 6.544002678073538e-05, "loss": 0.0496, "step": 5001 }, { "epoch": 1.29, "learning_rate": 6.542649698363925e-05, "loss": 0.0523, "step": 5002 }, { "epoch": 1.29, "learning_rate": 6.541296593803057e-05, "loss": 0.0259, "step": 5003 }, { "epoch": 1.29, "learning_rate": 6.539943364500444e-05, "loss": 0.0327, "step": 5004 }, { "epoch": 1.29, "learning_rate": 6.538590010565606e-05, "loss": 0.047, "step": 5005 }, { "epoch": 1.29, "learning_rate": 6.537236532108074e-05, "loss": 0.0436, "step": 5006 }, { "epoch": 1.29, "learning_rate": 6.53588292923739e-05, "loss": 0.0473, "step": 5007 }, { "epoch": 1.29, "learning_rate": 6.534529202063106e-05, "loss": 0.0482, "step": 5008 }, { "epoch": 1.29, "learning_rate": 6.533175350694778e-05, "loss": 0.0472, "step": 5009 }, { "epoch": 1.29, "learning_rate": 6.531821375241984e-05, "loss": 0.0372, "step": 5010 }, { "epoch": 1.29, "learning_rate": 6.530467275814301e-05, "loss": 0.0491, "step": 5011 }, { "epoch": 1.29, "learning_rate": 6.529113052521323e-05, "loss": 0.0364, "step": 5012 }, { "epoch": 1.29, "learning_rate": 6.527758705472649e-05, "loss": 0.0445, "step": 5013 }, { "epoch": 1.29, "learning_rate": 6.52640423477789e-05, "loss": 0.0368, "step": 5014 }, { "epoch": 1.29, "learning_rate": 6.525049640546671e-05, "loss": 0.0328, "step": 5015 }, { "epoch": 1.29, "learning_rate": 6.52369492288862e-05, "loss": 0.0273, "step": 5016 }, { "epoch": 1.29, "learning_rate": 6.52234008191338e-05, "loss": 0.0528, "step": 5017 }, { "epoch": 1.29, "learning_rate": 6.520985117730601e-05, "loss": 0.0497, "step": 5018 }, { "epoch": 1.3, "learning_rate": 6.519630030449945e-05, "loss": 0.0445, "step": 5019 }, { "epoch": 1.3, "learning_rate": 6.518274820181084e-05, "loss": 0.0507, "step": 5020 }, { "epoch": 1.3, "learning_rate": 6.516919487033699e-05, "loss": 0.0429, "step": 5021 }, { "epoch": 1.3, "learning_rate": 6.515564031117479e-05, "loss": 0.0366, "step": 5022 }, { "epoch": 1.3, "learning_rate": 6.514208452542127e-05, "loss": 0.0473, "step": 5023 }, { "epoch": 1.3, "learning_rate": 6.512852751417355e-05, "loss": 0.0353, "step": 5024 }, { "epoch": 1.3, "learning_rate": 6.511496927852883e-05, "loss": 0.0464, "step": 5025 }, { "epoch": 1.3, "learning_rate": 6.510140981958441e-05, "loss": 0.0343, "step": 5026 }, { "epoch": 1.3, "learning_rate": 6.50878491384377e-05, "loss": 0.0383, "step": 5027 }, { "epoch": 1.3, "learning_rate": 6.507428723618622e-05, "loss": 0.0298, "step": 5028 }, { "epoch": 1.3, "learning_rate": 6.506072411392756e-05, "loss": 0.0403, "step": 5029 }, { "epoch": 1.3, "learning_rate": 6.504715977275944e-05, "loss": 0.0495, "step": 5030 }, { "epoch": 1.3, "learning_rate": 6.503359421377963e-05, "loss": 0.0376, "step": 5031 }, { "epoch": 1.3, "learning_rate": 6.502002743808606e-05, "loss": 0.0447, "step": 5032 }, { "epoch": 1.3, "learning_rate": 6.500645944677673e-05, "loss": 0.0464, "step": 5033 }, { "epoch": 1.3, "learning_rate": 6.499289024094974e-05, "loss": 0.0388, "step": 5034 }, { "epoch": 1.3, "learning_rate": 6.497931982170328e-05, "loss": 0.0466, "step": 5035 }, { "epoch": 1.3, "learning_rate": 6.496574819013563e-05, "loss": 0.0367, "step": 5036 }, { "epoch": 1.3, "learning_rate": 6.495217534734519e-05, "loss": 0.068, "step": 5037 }, { "epoch": 1.3, "learning_rate": 6.493860129443047e-05, "loss": 0.0456, "step": 5038 }, { "epoch": 1.3, "learning_rate": 6.492502603249004e-05, "loss": 0.0317, "step": 5039 }, { "epoch": 1.3, "learning_rate": 6.491144956262259e-05, "loss": 0.0391, "step": 5040 }, { "epoch": 1.3, "learning_rate": 6.489787188592693e-05, "loss": 0.0365, "step": 5041 }, { "epoch": 1.3, "learning_rate": 6.488429300350188e-05, "loss": 0.0467, "step": 5042 }, { "epoch": 1.3, "learning_rate": 6.487071291644648e-05, "loss": 0.0423, "step": 5043 }, { "epoch": 1.3, "learning_rate": 6.485713162585978e-05, "loss": 0.0586, "step": 5044 }, { "epoch": 1.3, "learning_rate": 6.484354913284097e-05, "loss": 0.028, "step": 5045 }, { "epoch": 1.3, "learning_rate": 6.482996543848928e-05, "loss": 0.0414, "step": 5046 }, { "epoch": 1.3, "learning_rate": 6.481638054390414e-05, "loss": 0.0489, "step": 5047 }, { "epoch": 1.3, "learning_rate": 6.480279445018498e-05, "loss": 0.049, "step": 5048 }, { "epoch": 1.3, "learning_rate": 6.478920715843137e-05, "loss": 0.0321, "step": 5049 }, { "epoch": 1.3, "learning_rate": 6.477561866974296e-05, "loss": 0.0365, "step": 5050 }, { "epoch": 1.3, "learning_rate": 6.476202898521951e-05, "loss": 0.033, "step": 5051 }, { "epoch": 1.3, "learning_rate": 6.47484381059609e-05, "loss": 0.0383, "step": 5052 }, { "epoch": 1.3, "learning_rate": 6.473484603306706e-05, "loss": 0.0422, "step": 5053 }, { "epoch": 1.3, "learning_rate": 6.472125276763803e-05, "loss": 0.0401, "step": 5054 }, { "epoch": 1.3, "learning_rate": 6.470765831077395e-05, "loss": 0.0444, "step": 5055 }, { "epoch": 1.3, "learning_rate": 6.46940626635751e-05, "loss": 0.0397, "step": 5056 }, { "epoch": 1.31, "learning_rate": 6.468046582714178e-05, "loss": 0.0404, "step": 5057 }, { "epoch": 1.31, "learning_rate": 6.466686780257441e-05, "loss": 0.0255, "step": 5058 }, { "epoch": 1.31, "learning_rate": 6.465326859097356e-05, "loss": 0.0316, "step": 5059 }, { "epoch": 1.31, "learning_rate": 6.463966819343983e-05, "loss": 0.0348, "step": 5060 }, { "epoch": 1.31, "learning_rate": 6.462606661107396e-05, "loss": 0.0398, "step": 5061 }, { "epoch": 1.31, "learning_rate": 6.461246384497672e-05, "loss": 0.0533, "step": 5062 }, { "epoch": 1.31, "learning_rate": 6.459885989624908e-05, "loss": 0.0406, "step": 5063 }, { "epoch": 1.31, "learning_rate": 6.458525476599201e-05, "loss": 0.0444, "step": 5064 }, { "epoch": 1.31, "learning_rate": 6.457164845530664e-05, "loss": 0.0603, "step": 5065 }, { "epoch": 1.31, "learning_rate": 6.455804096529415e-05, "loss": 0.0383, "step": 5066 }, { "epoch": 1.31, "learning_rate": 6.454443229705583e-05, "loss": 0.0363, "step": 5067 }, { "epoch": 1.31, "learning_rate": 6.45308224516931e-05, "loss": 0.043, "step": 5068 }, { "epoch": 1.31, "learning_rate": 6.451721143030742e-05, "loss": 0.0265, "step": 5069 }, { "epoch": 1.31, "learning_rate": 6.450359923400038e-05, "loss": 0.0421, "step": 5070 }, { "epoch": 1.31, "learning_rate": 6.448998586387365e-05, "loss": 0.0423, "step": 5071 }, { "epoch": 1.31, "learning_rate": 6.447637132102899e-05, "loss": 0.0382, "step": 5072 }, { "epoch": 1.31, "learning_rate": 6.446275560656832e-05, "loss": 0.0373, "step": 5073 }, { "epoch": 1.31, "learning_rate": 6.444913872159353e-05, "loss": 0.0428, "step": 5074 }, { "epoch": 1.31, "learning_rate": 6.443552066720671e-05, "loss": 0.0436, "step": 5075 }, { "epoch": 1.31, "learning_rate": 6.442190144451e-05, "loss": 0.0343, "step": 5076 }, { "epoch": 1.31, "learning_rate": 6.440828105460565e-05, "loss": 0.063, "step": 5077 }, { "epoch": 1.31, "learning_rate": 6.439465949859603e-05, "loss": 0.0373, "step": 5078 }, { "epoch": 1.31, "learning_rate": 6.438103677758351e-05, "loss": 0.0433, "step": 5079 }, { "epoch": 1.31, "learning_rate": 6.436741289267066e-05, "loss": 0.0508, "step": 5080 }, { "epoch": 1.31, "learning_rate": 6.435378784496009e-05, "loss": 0.0589, "step": 5081 }, { "epoch": 1.31, "learning_rate": 6.434016163555452e-05, "loss": 0.0568, "step": 5082 }, { "epoch": 1.31, "learning_rate": 6.432653426555674e-05, "loss": 0.0574, "step": 5083 }, { "epoch": 1.31, "learning_rate": 6.43129057360697e-05, "loss": 0.0329, "step": 5084 }, { "epoch": 1.31, "learning_rate": 6.429927604819636e-05, "loss": 0.0377, "step": 5085 }, { "epoch": 1.31, "learning_rate": 6.428564520303981e-05, "loss": 0.0405, "step": 5086 }, { "epoch": 1.31, "learning_rate": 6.427201320170326e-05, "loss": 0.0429, "step": 5087 }, { "epoch": 1.31, "learning_rate": 6.425838004528996e-05, "loss": 0.0473, "step": 5088 }, { "epoch": 1.31, "learning_rate": 6.424474573490332e-05, "loss": 0.0444, "step": 5089 }, { "epoch": 1.31, "learning_rate": 6.423111027164674e-05, "loss": 0.0529, "step": 5090 }, { "epoch": 1.31, "learning_rate": 6.421747365662386e-05, "loss": 0.0411, "step": 5091 }, { "epoch": 1.31, "learning_rate": 6.420383589093828e-05, "loss": 0.0463, "step": 5092 }, { "epoch": 1.31, "learning_rate": 6.419019697569375e-05, "loss": 0.0606, "step": 5093 }, { "epoch": 1.31, "learning_rate": 6.417655691199412e-05, "loss": 0.0517, "step": 5094 }, { "epoch": 1.31, "learning_rate": 6.416291570094333e-05, "loss": 0.0373, "step": 5095 }, { "epoch": 1.32, "learning_rate": 6.414927334364538e-05, "loss": 0.0318, "step": 5096 }, { "epoch": 1.32, "learning_rate": 6.41356298412044e-05, "loss": 0.0602, "step": 5097 }, { "epoch": 1.32, "learning_rate": 6.41219851947246e-05, "loss": 0.0406, "step": 5098 }, { "epoch": 1.32, "learning_rate": 6.410833940531027e-05, "loss": 0.0446, "step": 5099 }, { "epoch": 1.32, "learning_rate": 6.409469247406583e-05, "loss": 0.0365, "step": 5100 }, { "epoch": 1.32, "learning_rate": 6.408104440209574e-05, "loss": 0.038, "step": 5101 }, { "epoch": 1.32, "learning_rate": 6.40673951905046e-05, "loss": 0.0428, "step": 5102 }, { "epoch": 1.32, "learning_rate": 6.405374484039706e-05, "loss": 0.0378, "step": 5103 }, { "epoch": 1.32, "learning_rate": 6.404009335287791e-05, "loss": 0.0372, "step": 5104 }, { "epoch": 1.32, "learning_rate": 6.402644072905197e-05, "loss": 0.0377, "step": 5105 }, { "epoch": 1.32, "learning_rate": 6.401278697002423e-05, "loss": 0.047, "step": 5106 }, { "epoch": 1.32, "learning_rate": 6.399913207689969e-05, "loss": 0.036, "step": 5107 }, { "epoch": 1.32, "learning_rate": 6.398547605078353e-05, "loss": 0.0388, "step": 5108 }, { "epoch": 1.32, "learning_rate": 6.397181889278092e-05, "loss": 0.0435, "step": 5109 }, { "epoch": 1.32, "learning_rate": 6.39581606039972e-05, "loss": 0.0527, "step": 5110 }, { "epoch": 1.32, "learning_rate": 6.394450118553777e-05, "loss": 0.0376, "step": 5111 }, { "epoch": 1.32, "learning_rate": 6.393084063850814e-05, "loss": 0.0482, "step": 5112 }, { "epoch": 1.32, "learning_rate": 6.39171789640139e-05, "loss": 0.0504, "step": 5113 }, { "epoch": 1.32, "learning_rate": 6.39035161631607e-05, "loss": 0.0492, "step": 5114 }, { "epoch": 1.32, "learning_rate": 6.388985223705435e-05, "loss": 0.0468, "step": 5115 }, { "epoch": 1.32, "learning_rate": 6.387618718680068e-05, "loss": 0.0588, "step": 5116 }, { "epoch": 1.32, "learning_rate": 6.386252101350569e-05, "loss": 0.041, "step": 5117 }, { "epoch": 1.32, "learning_rate": 6.384885371827536e-05, "loss": 0.0291, "step": 5118 }, { "epoch": 1.32, "learning_rate": 6.383518530221586e-05, "loss": 0.0478, "step": 5119 }, { "epoch": 1.32, "learning_rate": 6.382151576643341e-05, "loss": 0.0468, "step": 5120 }, { "epoch": 1.32, "learning_rate": 6.380784511203434e-05, "loss": 0.0382, "step": 5121 }, { "epoch": 1.32, "learning_rate": 6.379417334012506e-05, "loss": 0.0515, "step": 5122 }, { "epoch": 1.32, "learning_rate": 6.378050045181203e-05, "loss": 0.0317, "step": 5123 }, { "epoch": 1.32, "learning_rate": 6.376682644820187e-05, "loss": 0.0329, "step": 5124 }, { "epoch": 1.32, "learning_rate": 6.375315133040125e-05, "loss": 0.0256, "step": 5125 }, { "epoch": 1.32, "learning_rate": 6.373947509951695e-05, "loss": 0.0451, "step": 5126 }, { "epoch": 1.32, "learning_rate": 6.37257977566558e-05, "loss": 0.0365, "step": 5127 }, { "epoch": 1.32, "learning_rate": 6.371211930292476e-05, "loss": 0.0363, "step": 5128 }, { "epoch": 1.32, "learning_rate": 6.369843973943088e-05, "loss": 0.0274, "step": 5129 }, { "epoch": 1.32, "learning_rate": 6.36847590672813e-05, "loss": 0.0421, "step": 5130 }, { "epoch": 1.32, "learning_rate": 6.367107728758319e-05, "loss": 0.0412, "step": 5131 }, { "epoch": 1.32, "learning_rate": 6.365739440144391e-05, "loss": 0.0337, "step": 5132 }, { "epoch": 1.32, "learning_rate": 6.364371040997081e-05, "loss": 0.0409, "step": 5133 }, { "epoch": 1.32, "learning_rate": 6.363002531427143e-05, "loss": 0.0493, "step": 5134 }, { "epoch": 1.33, "learning_rate": 6.361633911545328e-05, "loss": 0.0412, "step": 5135 }, { "epoch": 1.33, "learning_rate": 6.360265181462408e-05, "loss": 0.0351, "step": 5136 }, { "epoch": 1.33, "learning_rate": 6.358896341289155e-05, "loss": 0.0449, "step": 5137 }, { "epoch": 1.33, "learning_rate": 6.357527391136356e-05, "loss": 0.0385, "step": 5138 }, { "epoch": 1.33, "learning_rate": 6.356158331114804e-05, "loss": 0.0364, "step": 5139 }, { "epoch": 1.33, "learning_rate": 6.354789161335298e-05, "loss": 0.041, "step": 5140 }, { "epoch": 1.33, "learning_rate": 6.353419881908653e-05, "loss": 0.0345, "step": 5141 }, { "epoch": 1.33, "learning_rate": 6.352050492945685e-05, "loss": 0.0415, "step": 5142 }, { "epoch": 1.33, "learning_rate": 6.350680994557227e-05, "loss": 0.0469, "step": 5143 }, { "epoch": 1.33, "learning_rate": 6.349311386854113e-05, "loss": 0.0326, "step": 5144 }, { "epoch": 1.33, "learning_rate": 6.34794166994719e-05, "loss": 0.0438, "step": 5145 }, { "epoch": 1.33, "learning_rate": 6.346571843947315e-05, "loss": 0.0527, "step": 5146 }, { "epoch": 1.33, "learning_rate": 6.345201908965351e-05, "loss": 0.0433, "step": 5147 }, { "epoch": 1.33, "learning_rate": 6.343831865112172e-05, "loss": 0.033, "step": 5148 }, { "epoch": 1.33, "learning_rate": 6.342461712498658e-05, "loss": 0.0406, "step": 5149 }, { "epoch": 1.33, "learning_rate": 6.3410914512357e-05, "loss": 0.034, "step": 5150 }, { "epoch": 1.33, "learning_rate": 6.339721081434198e-05, "loss": 0.0429, "step": 5151 }, { "epoch": 1.33, "learning_rate": 6.338350603205061e-05, "loss": 0.0412, "step": 5152 }, { "epoch": 1.33, "learning_rate": 6.336980016659204e-05, "loss": 0.0415, "step": 5153 }, { "epoch": 1.33, "learning_rate": 6.335609321907555e-05, "loss": 0.0315, "step": 5154 }, { "epoch": 1.33, "learning_rate": 6.334238519061044e-05, "loss": 0.0395, "step": 5155 }, { "epoch": 1.33, "learning_rate": 6.334238519061044e-05, "loss": 0.0363, "step": 5156 }, { "epoch": 1.33, "learning_rate": 6.332867608230619e-05, "loss": 0.0453, "step": 5157 }, { "epoch": 1.33, "learning_rate": 6.33149658952723e-05, "loss": 0.0662, "step": 5158 }, { "epoch": 1.33, "learning_rate": 6.330125463061838e-05, "loss": 0.052, "step": 5159 }, { "epoch": 1.33, "learning_rate": 6.328754228945411e-05, "loss": 0.0404, "step": 5160 }, { "epoch": 1.33, "learning_rate": 6.327382887288929e-05, "loss": 0.0404, "step": 5161 }, { "epoch": 1.33, "learning_rate": 6.326011438203378e-05, "loss": 0.044, "step": 5162 }, { "epoch": 1.33, "learning_rate": 6.324639881799752e-05, "loss": 0.0524, "step": 5163 }, { "epoch": 1.33, "learning_rate": 6.323268218189057e-05, "loss": 0.0394, "step": 5164 }, { "epoch": 1.33, "learning_rate": 6.321896447482305e-05, "loss": 0.0369, "step": 5165 }, { "epoch": 1.33, "learning_rate": 6.32052456979052e-05, "loss": 0.036, "step": 5166 }, { "epoch": 1.33, "learning_rate": 6.319152585224728e-05, "loss": 0.0399, "step": 5167 }, { "epoch": 1.33, "learning_rate": 6.31778049389597e-05, "loss": 0.0375, "step": 5168 }, { "epoch": 1.33, "learning_rate": 6.316408295915294e-05, "loss": 0.0315, "step": 5169 }, { "epoch": 1.33, "learning_rate": 6.315035991393756e-05, "loss": 0.0473, "step": 5170 }, { "epoch": 1.33, "learning_rate": 6.313663580442419e-05, "loss": 0.0341, "step": 5171 }, { "epoch": 1.33, "learning_rate": 6.312291063172357e-05, "loss": 0.0411, "step": 5172 }, { "epoch": 1.33, "learning_rate": 6.310918439694653e-05, "loss": 0.0391, "step": 5173 }, { "epoch": 1.34, "learning_rate": 6.309545710120397e-05, "loss": 0.0485, "step": 5174 }, { "epoch": 1.34, "learning_rate": 6.30817287456069e-05, "loss": 0.0349, "step": 5175 }, { "epoch": 1.34, "learning_rate": 6.306799933126635e-05, "loss": 0.0559, "step": 5176 }, { "epoch": 1.34, "learning_rate": 6.305426885929351e-05, "loss": 0.0383, "step": 5177 }, { "epoch": 1.34, "learning_rate": 6.304053733079963e-05, "loss": 0.0687, "step": 5178 }, { "epoch": 1.34, "learning_rate": 6.302680474689606e-05, "loss": 0.0515, "step": 5179 }, { "epoch": 1.34, "learning_rate": 6.301307110869418e-05, "loss": 0.0351, "step": 5180 }, { "epoch": 1.34, "learning_rate": 6.299933641730552e-05, "loss": 0.0624, "step": 5181 }, { "epoch": 1.34, "learning_rate": 6.298560067384167e-05, "loss": 0.0367, "step": 5182 }, { "epoch": 1.34, "learning_rate": 6.297186387941431e-05, "loss": 0.0433, "step": 5183 }, { "epoch": 1.34, "learning_rate": 6.295812603513517e-05, "loss": 0.0329, "step": 5184 }, { "epoch": 1.34, "learning_rate": 6.294438714211611e-05, "loss": 0.0304, "step": 5185 }, { "epoch": 1.34, "learning_rate": 6.293064720146909e-05, "loss": 0.05, "step": 5186 }, { "epoch": 1.34, "learning_rate": 6.291690621430608e-05, "loss": 0.0527, "step": 5187 }, { "epoch": 1.34, "learning_rate": 6.29031641817392e-05, "loss": 0.0406, "step": 5188 }, { "epoch": 1.34, "learning_rate": 6.288942110488063e-05, "loss": 0.0527, "step": 5189 }, { "epoch": 1.34, "learning_rate": 6.287567698484264e-05, "loss": 0.0481, "step": 5190 }, { "epoch": 1.34, "learning_rate": 6.286193182273758e-05, "loss": 0.0425, "step": 5191 }, { "epoch": 1.34, "learning_rate": 6.28481856196779e-05, "loss": 0.0394, "step": 5192 }, { "epoch": 1.34, "learning_rate": 6.283443837677611e-05, "loss": 0.043, "step": 5193 }, { "epoch": 1.34, "learning_rate": 6.282069009514479e-05, "loss": 0.0471, "step": 5194 }, { "epoch": 1.34, "learning_rate": 6.280694077589669e-05, "loss": 0.0396, "step": 5195 }, { "epoch": 1.34, "learning_rate": 6.279319042014454e-05, "loss": 0.0416, "step": 5196 }, { "epoch": 1.34, "learning_rate": 6.277943902900121e-05, "loss": 0.046, "step": 5197 }, { "epoch": 1.34, "learning_rate": 6.276568660357963e-05, "loss": 0.0294, "step": 5198 }, { "epoch": 1.34, "learning_rate": 6.275193314499283e-05, "loss": 0.0464, "step": 5199 }, { "epoch": 1.34, "learning_rate": 6.273817865435393e-05, "loss": 0.0436, "step": 5200 }, { "epoch": 1.34, "learning_rate": 6.272442313277612e-05, "loss": 0.035, "step": 5201 }, { "epoch": 1.34, "learning_rate": 6.271066658137266e-05, "loss": 0.0354, "step": 5202 }, { "epoch": 1.34, "learning_rate": 6.269690900125693e-05, "loss": 0.0327, "step": 5203 }, { "epoch": 1.34, "learning_rate": 6.268315039354233e-05, "loss": 0.0482, "step": 5204 }, { "epoch": 1.34, "learning_rate": 6.266939075934245e-05, "loss": 0.0374, "step": 5205 }, { "epoch": 1.34, "learning_rate": 6.265563009977085e-05, "loss": 0.0435, "step": 5206 }, { "epoch": 1.34, "learning_rate": 6.264186841594123e-05, "loss": 0.0587, "step": 5207 }, { "epoch": 1.34, "learning_rate": 6.262810570896737e-05, "loss": 0.0437, "step": 5208 }, { "epoch": 1.34, "learning_rate": 6.261434197996314e-05, "loss": 0.0395, "step": 5209 }, { "epoch": 1.34, "learning_rate": 6.260057723004247e-05, "loss": 0.0426, "step": 5210 }, { "epoch": 1.34, "learning_rate": 6.258681146031936e-05, "loss": 0.0316, "step": 5211 }, { "epoch": 1.35, "learning_rate": 6.257304467190795e-05, "loss": 0.0323, "step": 5212 }, { "epoch": 1.35, "learning_rate": 6.25592768659224e-05, "loss": 0.0406, "step": 5213 }, { "epoch": 1.35, "learning_rate": 6.2545508043477e-05, "loss": 0.0472, "step": 5214 }, { "epoch": 1.35, "learning_rate": 6.253173820568607e-05, "loss": 0.0395, "step": 5215 }, { "epoch": 1.35, "learning_rate": 6.251796735366409e-05, "loss": 0.0417, "step": 5216 }, { "epoch": 1.35, "learning_rate": 6.250419548852555e-05, "loss": 0.0533, "step": 5217 }, { "epoch": 1.35, "learning_rate": 6.249042261138506e-05, "loss": 0.058, "step": 5218 }, { "epoch": 1.35, "learning_rate": 6.247664872335728e-05, "loss": 0.0622, "step": 5219 }, { "epoch": 1.35, "learning_rate": 6.246287382555698e-05, "loss": 0.038, "step": 5220 }, { "epoch": 1.35, "learning_rate": 6.244909791909902e-05, "loss": 0.0347, "step": 5221 }, { "epoch": 1.35, "learning_rate": 6.24353210050983e-05, "loss": 0.0443, "step": 5222 }, { "epoch": 1.35, "learning_rate": 6.242154308466985e-05, "loss": 0.0415, "step": 5223 }, { "epoch": 1.35, "learning_rate": 6.240776415892873e-05, "loss": 0.0457, "step": 5224 }, { "epoch": 1.35, "learning_rate": 6.239398422899015e-05, "loss": 0.0356, "step": 5225 }, { "epoch": 1.35, "learning_rate": 6.238020329596933e-05, "loss": 0.0637, "step": 5226 }, { "epoch": 1.35, "learning_rate": 6.236642136098162e-05, "loss": 0.0428, "step": 5227 }, { "epoch": 1.35, "learning_rate": 6.235263842514241e-05, "loss": 0.0434, "step": 5228 }, { "epoch": 1.35, "learning_rate": 6.233885448956722e-05, "loss": 0.0443, "step": 5229 }, { "epoch": 1.35, "learning_rate": 6.23250695553716e-05, "loss": 0.0463, "step": 5230 }, { "epoch": 1.35, "learning_rate": 6.231128362367124e-05, "loss": 0.0395, "step": 5231 }, { "epoch": 1.35, "learning_rate": 6.229749669558183e-05, "loss": 0.0367, "step": 5232 }, { "epoch": 1.35, "learning_rate": 6.228370877221923e-05, "loss": 0.0426, "step": 5233 }, { "epoch": 1.35, "learning_rate": 6.226991985469932e-05, "loss": 0.054, "step": 5234 }, { "epoch": 1.35, "learning_rate": 6.225612994413809e-05, "loss": 0.0282, "step": 5235 }, { "epoch": 1.35, "learning_rate": 6.224233904165157e-05, "loss": 0.0438, "step": 5236 }, { "epoch": 1.35, "learning_rate": 6.222854714835592e-05, "loss": 0.0263, "step": 5237 }, { "epoch": 1.35, "learning_rate": 6.221475426536735e-05, "loss": 0.0355, "step": 5238 }, { "epoch": 1.35, "learning_rate": 6.220096039380219e-05, "loss": 0.045, "step": 5239 }, { "epoch": 1.35, "learning_rate": 6.21871655347768e-05, "loss": 0.0416, "step": 5240 }, { "epoch": 1.35, "learning_rate": 6.21733696894076e-05, "loss": 0.0459, "step": 5241 }, { "epoch": 1.35, "learning_rate": 6.215957285881118e-05, "loss": 0.0367, "step": 5242 }, { "epoch": 1.35, "learning_rate": 6.214577504410415e-05, "loss": 0.0381, "step": 5243 }, { "epoch": 1.35, "learning_rate": 6.21319762464032e-05, "loss": 0.042, "step": 5244 }, { "epoch": 1.35, "learning_rate": 6.21181764668251e-05, "loss": 0.0443, "step": 5245 }, { "epoch": 1.35, "learning_rate": 6.210437570648671e-05, "loss": 0.0487, "step": 5246 }, { "epoch": 1.35, "learning_rate": 6.209057396650498e-05, "loss": 0.0431, "step": 5247 }, { "epoch": 1.35, "learning_rate": 6.207677124799692e-05, "loss": 0.0457, "step": 5248 }, { "epoch": 1.35, "learning_rate": 6.206296755207964e-05, "loss": 0.0471, "step": 5249 }, { "epoch": 1.35, "learning_rate": 6.204916287987028e-05, "loss": 0.0346, "step": 5250 }, { "epoch": 1.36, "learning_rate": 6.203535723248612e-05, "loss": 0.0432, "step": 5251 }, { "epoch": 1.36, "learning_rate": 6.202155061104448e-05, "loss": 0.0556, "step": 5252 }, { "epoch": 1.36, "learning_rate": 6.20077430166628e-05, "loss": 0.037, "step": 5253 }, { "epoch": 1.36, "learning_rate": 6.19939344504585e-05, "loss": 0.0333, "step": 5254 }, { "epoch": 1.36, "learning_rate": 6.198012491354922e-05, "loss": 0.0519, "step": 5255 }, { "epoch": 1.36, "learning_rate": 6.196631440705258e-05, "loss": 0.0316, "step": 5256 }, { "epoch": 1.36, "learning_rate": 6.195250293208632e-05, "loss": 0.0441, "step": 5257 }, { "epoch": 1.36, "learning_rate": 6.193869048976822e-05, "loss": 0.0526, "step": 5258 }, { "epoch": 1.36, "learning_rate": 6.192487708121615e-05, "loss": 0.0309, "step": 5259 }, { "epoch": 1.36, "learning_rate": 6.191106270754812e-05, "loss": 0.0331, "step": 5260 }, { "epoch": 1.36, "learning_rate": 6.189724736988214e-05, "loss": 0.0338, "step": 5261 }, { "epoch": 1.36, "learning_rate": 6.188343106933634e-05, "loss": 0.0354, "step": 5262 }, { "epoch": 1.36, "learning_rate": 6.186961380702889e-05, "loss": 0.042, "step": 5263 }, { "epoch": 1.36, "learning_rate": 6.185579558407807e-05, "loss": 0.0302, "step": 5264 }, { "epoch": 1.36, "learning_rate": 6.184197640160224e-05, "loss": 0.0428, "step": 5265 }, { "epoch": 1.36, "learning_rate": 6.182815626071984e-05, "loss": 0.0542, "step": 5266 }, { "epoch": 1.36, "learning_rate": 6.181433516254935e-05, "loss": 0.036, "step": 5267 }, { "epoch": 1.36, "learning_rate": 6.180051310820936e-05, "loss": 0.0378, "step": 5268 }, { "epoch": 1.36, "learning_rate": 6.178669009881852e-05, "loss": 0.0489, "step": 5269 }, { "epoch": 1.36, "learning_rate": 6.17728661354956e-05, "loss": 0.0523, "step": 5270 }, { "epoch": 1.36, "learning_rate": 6.175904121935939e-05, "loss": 0.0332, "step": 5271 }, { "epoch": 1.36, "learning_rate": 6.174521535152879e-05, "loss": 0.0402, "step": 5272 }, { "epoch": 1.36, "learning_rate": 6.173138853312274e-05, "loss": 0.0503, "step": 5273 }, { "epoch": 1.36, "learning_rate": 6.171756076526034e-05, "loss": 0.0547, "step": 5274 }, { "epoch": 1.36, "learning_rate": 6.170373204906068e-05, "loss": 0.037, "step": 5275 }, { "epoch": 1.36, "learning_rate": 6.168990238564295e-05, "loss": 0.0666, "step": 5276 }, { "epoch": 1.36, "learning_rate": 6.167607177612645e-05, "loss": 0.0412, "step": 5277 }, { "epoch": 1.36, "learning_rate": 6.166224022163052e-05, "loss": 0.0817, "step": 5278 }, { "epoch": 1.36, "learning_rate": 6.164840772327459e-05, "loss": 0.0349, "step": 5279 }, { "epoch": 1.36, "learning_rate": 6.163457428217815e-05, "loss": 0.0394, "step": 5280 }, { "epoch": 1.36, "learning_rate": 6.162073989946083e-05, "loss": 0.0438, "step": 5281 }, { "epoch": 1.36, "learning_rate": 6.160690457624223e-05, "loss": 0.0514, "step": 5282 }, { "epoch": 1.36, "learning_rate": 6.159306831364213e-05, "loss": 0.0473, "step": 5283 }, { "epoch": 1.36, "learning_rate": 6.157923111278031e-05, "loss": 0.0549, "step": 5284 }, { "epoch": 1.36, "learning_rate": 6.156539297477666e-05, "loss": 0.0394, "step": 5285 }, { "epoch": 1.36, "learning_rate": 6.155155390075116e-05, "loss": 0.0345, "step": 5286 }, { "epoch": 1.36, "learning_rate": 6.153771389182382e-05, "loss": 0.0449, "step": 5287 }, { "epoch": 1.36, "learning_rate": 6.15238729491148e-05, "loss": 0.0425, "step": 5288 }, { "epoch": 1.36, "learning_rate": 6.151003107374423e-05, "loss": 0.0527, "step": 5289 }, { "epoch": 1.37, "learning_rate": 6.149618826683242e-05, "loss": 0.0442, "step": 5290 }, { "epoch": 1.37, "learning_rate": 6.148234452949967e-05, "loss": 0.0534, "step": 5291 }, { "epoch": 1.37, "learning_rate": 6.146849986286645e-05, "loss": 0.0444, "step": 5292 }, { "epoch": 1.37, "learning_rate": 6.145465426805319e-05, "loss": 0.0581, "step": 5293 }, { "epoch": 1.37, "learning_rate": 6.14408077461805e-05, "loss": 0.0341, "step": 5294 }, { "epoch": 1.37, "learning_rate": 6.1426960298369e-05, "loss": 0.0369, "step": 5295 }, { "epoch": 1.37, "learning_rate": 6.14131119257394e-05, "loss": 0.0596, "step": 5296 }, { "epoch": 1.37, "learning_rate": 6.13992626294125e-05, "loss": 0.0331, "step": 5297 }, { "epoch": 1.37, "learning_rate": 6.138541241050918e-05, "loss": 0.0463, "step": 5298 }, { "epoch": 1.37, "learning_rate": 6.137156127015036e-05, "loss": 0.0497, "step": 5299 }, { "epoch": 1.37, "learning_rate": 6.135770920945705e-05, "loss": 0.0414, "step": 5300 }, { "epoch": 1.37, "learning_rate": 6.134385622955035e-05, "loss": 0.0446, "step": 5301 }, { "epoch": 1.37, "learning_rate": 6.133000233155141e-05, "loss": 0.0446, "step": 5302 }, { "epoch": 1.37, "learning_rate": 6.13161475165815e-05, "loss": 0.0754, "step": 5303 }, { "epoch": 1.37, "learning_rate": 6.130229178576188e-05, "loss": 0.046, "step": 5304 }, { "epoch": 1.37, "learning_rate": 6.128843514021401e-05, "loss": 0.0522, "step": 5305 }, { "epoch": 1.37, "learning_rate": 6.127457758105927e-05, "loss": 0.0526, "step": 5306 }, { "epoch": 1.37, "learning_rate": 6.126071910941923e-05, "loss": 0.0448, "step": 5307 }, { "epoch": 1.37, "learning_rate": 6.12468597264155e-05, "loss": 0.0434, "step": 5308 }, { "epoch": 1.37, "learning_rate": 6.123299943316975e-05, "loss": 0.0449, "step": 5309 }, { "epoch": 1.37, "learning_rate": 6.121913823080377e-05, "loss": 0.0399, "step": 5310 }, { "epoch": 1.37, "learning_rate": 6.120527612043933e-05, "loss": 0.0423, "step": 5311 }, { "epoch": 1.37, "learning_rate": 6.119141310319838e-05, "loss": 0.0256, "step": 5312 }, { "epoch": 1.37, "learning_rate": 6.117754918020286e-05, "loss": 0.037, "step": 5313 }, { "epoch": 1.37, "learning_rate": 6.116368435257486e-05, "loss": 0.0338, "step": 5314 }, { "epoch": 1.37, "learning_rate": 6.114981862143647e-05, "loss": 0.0265, "step": 5315 }, { "epoch": 1.37, "learning_rate": 6.11359519879099e-05, "loss": 0.0348, "step": 5316 }, { "epoch": 1.37, "learning_rate": 6.112208445311741e-05, "loss": 0.0279, "step": 5317 }, { "epoch": 1.37, "learning_rate": 6.110821601818134e-05, "loss": 0.0435, "step": 5318 }, { "epoch": 1.37, "learning_rate": 6.109434668422412e-05, "loss": 0.0333, "step": 5319 }, { "epoch": 1.37, "learning_rate": 6.108047645236819e-05, "loss": 0.0391, "step": 5320 }, { "epoch": 1.37, "learning_rate": 6.106660532373616e-05, "loss": 0.0303, "step": 5321 }, { "epoch": 1.37, "learning_rate": 6.105273329945067e-05, "loss": 0.0439, "step": 5322 }, { "epoch": 1.37, "learning_rate": 6.103886038063438e-05, "loss": 0.0331, "step": 5323 }, { "epoch": 1.37, "learning_rate": 6.102498656841009e-05, "loss": 0.0433, "step": 5324 }, { "epoch": 1.37, "learning_rate": 6.101111186390064e-05, "loss": 0.0327, "step": 5325 }, { "epoch": 1.37, "learning_rate": 6.0997236268228966e-05, "loss": 0.0463, "step": 5326 }, { "epoch": 1.37, "learning_rate": 6.098335978251806e-05, "loss": 0.0454, "step": 5327 }, { "epoch": 1.37, "learning_rate": 6.0969482407890956e-05, "loss": 0.0414, "step": 5328 }, { "epoch": 1.38, "learning_rate": 6.0955604145470835e-05, "loss": 0.0472, "step": 5329 }, { "epoch": 1.38, "learning_rate": 6.094172499638088e-05, "loss": 0.0393, "step": 5330 }, { "epoch": 1.38, "learning_rate": 6.0927844961744387e-05, "loss": 0.0406, "step": 5331 }, { "epoch": 1.38, "learning_rate": 6.091396404268469e-05, "loss": 0.0453, "step": 5332 }, { "epoch": 1.38, "learning_rate": 6.090008224032523e-05, "loss": 0.0488, "step": 5333 }, { "epoch": 1.38, "learning_rate": 6.088619955578949e-05, "loss": 0.0362, "step": 5334 }, { "epoch": 1.38, "learning_rate": 6.087231599020104e-05, "loss": 0.0404, "step": 5335 }, { "epoch": 1.38, "learning_rate": 6.0858431544683545e-05, "loss": 0.0522, "step": 5336 }, { "epoch": 1.38, "learning_rate": 6.0844546220360686e-05, "loss": 0.0391, "step": 5337 }, { "epoch": 1.38, "learning_rate": 6.083066001835623e-05, "loss": 0.0391, "step": 5338 }, { "epoch": 1.38, "learning_rate": 6.081677293979407e-05, "loss": 0.0352, "step": 5339 }, { "epoch": 1.38, "learning_rate": 6.0802884985798116e-05, "loss": 0.0535, "step": 5340 }, { "epoch": 1.38, "learning_rate": 6.078899615749233e-05, "loss": 0.0529, "step": 5341 }, { "epoch": 1.38, "learning_rate": 6.077510645600081e-05, "loss": 0.0472, "step": 5342 }, { "epoch": 1.38, "learning_rate": 6.076121588244768e-05, "loss": 0.0332, "step": 5343 }, { "epoch": 1.38, "learning_rate": 6.074732443795715e-05, "loss": 0.0451, "step": 5344 }, { "epoch": 1.38, "learning_rate": 6.0733432123653486e-05, "loss": 0.0332, "step": 5345 }, { "epoch": 1.38, "learning_rate": 6.0719538940661046e-05, "loss": 0.0489, "step": 5346 }, { "epoch": 1.38, "learning_rate": 6.070564489010424e-05, "loss": 0.0285, "step": 5347 }, { "epoch": 1.38, "learning_rate": 6.069174997310756e-05, "loss": 0.0441, "step": 5348 }, { "epoch": 1.38, "learning_rate": 6.067785419079557e-05, "loss": 0.0346, "step": 5349 }, { "epoch": 1.38, "learning_rate": 6.0663957544292885e-05, "loss": 0.0351, "step": 5350 }, { "epoch": 1.38, "learning_rate": 6.06500600347242e-05, "loss": 0.0381, "step": 5351 }, { "epoch": 1.38, "learning_rate": 6.0636161663214276e-05, "loss": 0.0504, "step": 5352 }, { "epoch": 1.38, "learning_rate": 6.062226243088799e-05, "loss": 0.0351, "step": 5353 }, { "epoch": 1.38, "learning_rate": 6.06083623388702e-05, "loss": 0.0417, "step": 5354 }, { "epoch": 1.38, "learning_rate": 6.05944613882859e-05, "loss": 0.0369, "step": 5355 }, { "epoch": 1.38, "learning_rate": 6.058055958026013e-05, "loss": 0.0386, "step": 5356 }, { "epoch": 1.38, "learning_rate": 6.056665691591803e-05, "loss": 0.033, "step": 5357 }, { "epoch": 1.38, "learning_rate": 6.055275339638477e-05, "loss": 0.039, "step": 5358 }, { "epoch": 1.38, "learning_rate": 6.0538849022785584e-05, "loss": 0.0472, "step": 5359 }, { "epoch": 1.38, "learning_rate": 6.0524943796245805e-05, "loss": 0.0551, "step": 5360 }, { "epoch": 1.38, "learning_rate": 6.051103771789084e-05, "loss": 0.0516, "step": 5361 }, { "epoch": 1.38, "learning_rate": 6.0497130788846144e-05, "loss": 0.0464, "step": 5362 }, { "epoch": 1.38, "learning_rate": 6.0483223010237234e-05, "loss": 0.0479, "step": 5363 }, { "epoch": 1.38, "learning_rate": 6.046931438318972e-05, "loss": 0.0488, "step": 5364 }, { "epoch": 1.38, "learning_rate": 6.045540490882926e-05, "loss": 0.0374, "step": 5365 }, { "epoch": 1.38, "learning_rate": 6.044149458828161e-05, "loss": 0.0494, "step": 5366 }, { "epoch": 1.39, "learning_rate": 6.042758342267254e-05, "loss": 0.0462, "step": 5367 }, { "epoch": 1.39, "learning_rate": 6.041367141312795e-05, "loss": 0.0418, "step": 5368 }, { "epoch": 1.39, "learning_rate": 6.0399758560773754e-05, "loss": 0.044, "step": 5369 }, { "epoch": 1.39, "learning_rate": 6.0385844866736e-05, "loss": 0.0432, "step": 5370 }, { "epoch": 1.39, "learning_rate": 6.0371930332140734e-05, "loss": 0.0479, "step": 5371 }, { "epoch": 1.39, "learning_rate": 6.035801495811411e-05, "loss": 0.0401, "step": 5372 }, { "epoch": 1.39, "learning_rate": 6.034409874578234e-05, "loss": 0.0501, "step": 5373 }, { "epoch": 1.39, "learning_rate": 6.033018169627169e-05, "loss": 0.0454, "step": 5374 }, { "epoch": 1.39, "learning_rate": 6.031626381070855e-05, "loss": 0.0394, "step": 5375 }, { "epoch": 1.39, "learning_rate": 6.0302345090219305e-05, "loss": 0.0513, "step": 5376 }, { "epoch": 1.39, "learning_rate": 6.028842553593043e-05, "loss": 0.0349, "step": 5377 }, { "epoch": 1.39, "learning_rate": 6.0274505148968485e-05, "loss": 0.0442, "step": 5378 }, { "epoch": 1.39, "learning_rate": 6.02605839304601e-05, "loss": 0.0534, "step": 5379 }, { "epoch": 1.39, "learning_rate": 6.024666188153196e-05, "loss": 0.0346, "step": 5380 }, { "epoch": 1.39, "learning_rate": 6.023273900331081e-05, "loss": 0.0553, "step": 5381 }, { "epoch": 1.39, "learning_rate": 6.021881529692347e-05, "loss": 0.048, "step": 5382 }, { "epoch": 1.39, "learning_rate": 6.0204890763496826e-05, "loss": 0.0448, "step": 5383 }, { "epoch": 1.39, "learning_rate": 6.019096540415785e-05, "loss": 0.0448, "step": 5384 }, { "epoch": 1.39, "learning_rate": 6.0177039220033535e-05, "loss": 0.0506, "step": 5385 }, { "epoch": 1.39, "learning_rate": 6.0163112212250986e-05, "loss": 0.0424, "step": 5386 }, { "epoch": 1.39, "learning_rate": 6.0149184381937354e-05, "loss": 0.0411, "step": 5387 }, { "epoch": 1.39, "learning_rate": 6.013525573021985e-05, "loss": 0.0509, "step": 5388 }, { "epoch": 1.39, "learning_rate": 6.0121326258225794e-05, "loss": 0.0479, "step": 5389 }, { "epoch": 1.39, "learning_rate": 6.01073959670825e-05, "loss": 0.0541, "step": 5390 }, { "epoch": 1.39, "learning_rate": 6.00934648579174e-05, "loss": 0.0472, "step": 5391 }, { "epoch": 1.39, "learning_rate": 6.0079532931858e-05, "loss": 0.028, "step": 5392 }, { "epoch": 1.39, "learning_rate": 6.006560019003183e-05, "loss": 0.0523, "step": 5393 }, { "epoch": 1.39, "learning_rate": 6.0051666633566516e-05, "loss": 0.0358, "step": 5394 }, { "epoch": 1.39, "learning_rate": 6.003773226358975e-05, "loss": 0.0302, "step": 5395 }, { "epoch": 1.39, "learning_rate": 6.002379708122926e-05, "loss": 0.0448, "step": 5396 }, { "epoch": 1.39, "learning_rate": 6.000986108761288e-05, "loss": 0.0351, "step": 5397 }, { "epoch": 1.39, "learning_rate": 5.999592428386851e-05, "loss": 0.0371, "step": 5398 }, { "epoch": 1.39, "learning_rate": 5.998198667112407e-05, "loss": 0.0321, "step": 5399 }, { "epoch": 1.39, "learning_rate": 5.996804825050757e-05, "loss": 0.0424, "step": 5400 }, { "epoch": 1.39, "learning_rate": 5.995410902314709e-05, "loss": 0.0385, "step": 5401 }, { "epoch": 1.39, "learning_rate": 5.99401689901708e-05, "loss": 0.0334, "step": 5402 }, { "epoch": 1.39, "learning_rate": 5.9926228152706866e-05, "loss": 0.0387, "step": 5403 }, { "epoch": 1.39, "learning_rate": 5.99122865118836e-05, "loss": 0.0472, "step": 5404 }, { "epoch": 1.39, "learning_rate": 5.989834406882932e-05, "loss": 0.0336, "step": 5405 }, { "epoch": 1.4, "learning_rate": 5.988440082467243e-05, "loss": 0.053, "step": 5406 }, { "epoch": 1.4, "learning_rate": 5.987045678054142e-05, "loss": 0.0483, "step": 5407 }, { "epoch": 1.4, "learning_rate": 5.9856511937564786e-05, "loss": 0.0331, "step": 5408 }, { "epoch": 1.4, "learning_rate": 5.984256629687115e-05, "loss": 0.0472, "step": 5409 }, { "epoch": 1.4, "learning_rate": 5.982861985958918e-05, "loss": 0.0483, "step": 5410 }, { "epoch": 1.4, "learning_rate": 5.981467262684759e-05, "loss": 0.0324, "step": 5411 }, { "epoch": 1.4, "learning_rate": 5.9800724599775184e-05, "loss": 0.0407, "step": 5412 }, { "epoch": 1.4, "learning_rate": 5.978677577950079e-05, "loss": 0.045, "step": 5413 }, { "epoch": 1.4, "learning_rate": 5.977282616715336e-05, "loss": 0.0403, "step": 5414 }, { "epoch": 1.4, "learning_rate": 5.9758875763861853e-05, "loss": 0.0488, "step": 5415 }, { "epoch": 1.4, "learning_rate": 5.974492457075535e-05, "loss": 0.0507, "step": 5416 }, { "epoch": 1.4, "learning_rate": 5.973097258896292e-05, "loss": 0.0362, "step": 5417 }, { "epoch": 1.4, "learning_rate": 5.971701981961376e-05, "loss": 0.0355, "step": 5418 }, { "epoch": 1.4, "learning_rate": 5.970306626383712e-05, "loss": 0.0308, "step": 5419 }, { "epoch": 1.4, "learning_rate": 5.9689111922762274e-05, "loss": 0.0601, "step": 5420 }, { "epoch": 1.4, "learning_rate": 5.967515679751864e-05, "loss": 0.0393, "step": 5421 }, { "epoch": 1.4, "learning_rate": 5.966120088923559e-05, "loss": 0.0401, "step": 5422 }, { "epoch": 1.4, "learning_rate": 5.964724419904264e-05, "loss": 0.0421, "step": 5423 }, { "epoch": 1.4, "learning_rate": 5.963328672806935e-05, "loss": 0.0464, "step": 5424 }, { "epoch": 1.4, "learning_rate": 5.961932847744536e-05, "loss": 0.0429, "step": 5425 }, { "epoch": 1.4, "learning_rate": 5.960536944830031e-05, "loss": 0.0381, "step": 5426 }, { "epoch": 1.4, "learning_rate": 5.959140964176396e-05, "loss": 0.0508, "step": 5427 }, { "epoch": 1.4, "learning_rate": 5.957744905896614e-05, "loss": 0.0362, "step": 5428 }, { "epoch": 1.4, "learning_rate": 5.95634877010367e-05, "loss": 0.0447, "step": 5429 }, { "epoch": 1.4, "learning_rate": 5.95495255691056e-05, "loss": 0.046, "step": 5430 }, { "epoch": 1.4, "learning_rate": 5.953556266430281e-05, "loss": 0.0453, "step": 5431 }, { "epoch": 1.4, "learning_rate": 5.9521598987758395e-05, "loss": 0.039, "step": 5432 }, { "epoch": 1.4, "learning_rate": 5.950763454060249e-05, "loss": 0.0253, "step": 5433 }, { "epoch": 1.4, "learning_rate": 5.949366932396527e-05, "loss": 0.0493, "step": 5434 }, { "epoch": 1.4, "learning_rate": 5.9479703338976975e-05, "loss": 0.0502, "step": 5435 }, { "epoch": 1.4, "learning_rate": 5.9465736586767926e-05, "loss": 0.0455, "step": 5436 }, { "epoch": 1.4, "learning_rate": 5.94517690684685e-05, "loss": 0.0383, "step": 5437 }, { "epoch": 1.4, "learning_rate": 5.9437800785209105e-05, "loss": 0.0382, "step": 5438 }, { "epoch": 1.4, "learning_rate": 5.942383173812026e-05, "loss": 0.043, "step": 5439 }, { "epoch": 1.4, "learning_rate": 5.940986192833251e-05, "loss": 0.033, "step": 5440 }, { "epoch": 1.4, "learning_rate": 5.939589135697647e-05, "loss": 0.034, "step": 5441 }, { "epoch": 1.4, "learning_rate": 5.9381920025182834e-05, "loss": 0.0552, "step": 5442 }, { "epoch": 1.4, "learning_rate": 5.936794793408233e-05, "loss": 0.0332, "step": 5443 }, { "epoch": 1.4, "learning_rate": 5.935397508480578e-05, "loss": 0.048, "step": 5444 }, { "epoch": 1.41, "learning_rate": 5.934000147848402e-05, "loss": 0.0461, "step": 5445 }, { "epoch": 1.41, "learning_rate": 5.9326027116247996e-05, "loss": 0.0446, "step": 5446 }, { "epoch": 1.41, "learning_rate": 5.931205199922869e-05, "loss": 0.0369, "step": 5447 }, { "epoch": 1.41, "learning_rate": 5.929807612855717e-05, "loss": 0.0426, "step": 5448 }, { "epoch": 1.41, "learning_rate": 5.9284099505364496e-05, "loss": 0.0346, "step": 5449 }, { "epoch": 1.41, "learning_rate": 5.927012213078188e-05, "loss": 0.0401, "step": 5450 }, { "epoch": 1.41, "learning_rate": 5.925614400594054e-05, "loss": 0.0304, "step": 5451 }, { "epoch": 1.41, "learning_rate": 5.924216513197176e-05, "loss": 0.0402, "step": 5452 }, { "epoch": 1.41, "learning_rate": 5.9228185510006915e-05, "loss": 0.0571, "step": 5453 }, { "epoch": 1.41, "learning_rate": 5.9214205141177394e-05, "loss": 0.0412, "step": 5454 }, { "epoch": 1.41, "learning_rate": 5.920022402661468e-05, "loss": 0.055, "step": 5455 }, { "epoch": 1.41, "learning_rate": 5.91862421674503e-05, "loss": 0.053, "step": 5456 }, { "epoch": 1.41, "learning_rate": 5.917225956481587e-05, "loss": 0.0453, "step": 5457 }, { "epoch": 1.41, "learning_rate": 5.915827621984301e-05, "loss": 0.0403, "step": 5458 }, { "epoch": 1.41, "learning_rate": 5.9144292133663446e-05, "loss": 0.0478, "step": 5459 }, { "epoch": 1.41, "learning_rate": 5.913030730740896e-05, "loss": 0.0438, "step": 5460 }, { "epoch": 1.41, "learning_rate": 5.9116321742211386e-05, "loss": 0.0357, "step": 5461 }, { "epoch": 1.41, "learning_rate": 5.910233543920263e-05, "loss": 0.0387, "step": 5462 }, { "epoch": 1.41, "learning_rate": 5.90883483995146e-05, "loss": 0.0401, "step": 5463 }, { "epoch": 1.41, "learning_rate": 5.907436062427936e-05, "loss": 0.0494, "step": 5464 }, { "epoch": 1.41, "learning_rate": 5.9060372114628946e-05, "loss": 0.0302, "step": 5465 }, { "epoch": 1.41, "learning_rate": 5.9046382871695526e-05, "loss": 0.0404, "step": 5466 }, { "epoch": 1.41, "learning_rate": 5.903239289661124e-05, "loss": 0.0378, "step": 5467 }, { "epoch": 1.41, "learning_rate": 5.901840219050839e-05, "loss": 0.0496, "step": 5468 }, { "epoch": 1.41, "learning_rate": 5.900441075451926e-05, "loss": 0.0435, "step": 5469 }, { "epoch": 1.41, "learning_rate": 5.899041858977622e-05, "loss": 0.0515, "step": 5470 }, { "epoch": 1.41, "learning_rate": 5.897642569741171e-05, "loss": 0.0393, "step": 5471 }, { "epoch": 1.41, "learning_rate": 5.896243207855818e-05, "loss": 0.0602, "step": 5472 }, { "epoch": 1.41, "learning_rate": 5.8948437734348225e-05, "loss": 0.0414, "step": 5473 }, { "epoch": 1.41, "learning_rate": 5.8934442665914424e-05, "loss": 0.0382, "step": 5474 }, { "epoch": 1.41, "learning_rate": 5.892044687438945e-05, "loss": 0.0494, "step": 5475 }, { "epoch": 1.41, "learning_rate": 5.8906450360906006e-05, "loss": 0.0337, "step": 5476 }, { "epoch": 1.41, "learning_rate": 5.889245312659688e-05, "loss": 0.046, "step": 5477 }, { "epoch": 1.41, "learning_rate": 5.887845517259493e-05, "loss": 0.0368, "step": 5478 }, { "epoch": 1.41, "learning_rate": 5.886445650003303e-05, "loss": 0.0482, "step": 5479 }, { "epoch": 1.41, "learning_rate": 5.885045711004412e-05, "loss": 0.0376, "step": 5480 }, { "epoch": 1.41, "learning_rate": 5.883645700376125e-05, "loss": 0.0287, "step": 5481 }, { "epoch": 1.41, "learning_rate": 5.8822456182317466e-05, "loss": 0.0456, "step": 5482 }, { "epoch": 1.41, "learning_rate": 5.88084546468459e-05, "loss": 0.0404, "step": 5483 }, { "epoch": 1.42, "learning_rate": 5.879445239847976e-05, "loss": 0.0484, "step": 5484 }, { "epoch": 1.42, "learning_rate": 5.878044943835225e-05, "loss": 0.0441, "step": 5485 }, { "epoch": 1.42, "learning_rate": 5.87664457675967e-05, "loss": 0.046, "step": 5486 }, { "epoch": 1.42, "learning_rate": 5.8752441387346455e-05, "loss": 0.0345, "step": 5487 }, { "epoch": 1.42, "learning_rate": 5.873843629873495e-05, "loss": 0.046, "step": 5488 }, { "epoch": 1.42, "learning_rate": 5.872443050289564e-05, "loss": 0.0369, "step": 5489 }, { "epoch": 1.42, "learning_rate": 5.871042400096206e-05, "loss": 0.0422, "step": 5490 }, { "epoch": 1.42, "learning_rate": 5.869641679406781e-05, "loss": 0.0327, "step": 5491 }, { "epoch": 1.42, "learning_rate": 5.868240888334653e-05, "loss": 0.0427, "step": 5492 }, { "epoch": 1.42, "learning_rate": 5.86684002699319e-05, "loss": 0.0445, "step": 5493 }, { "epoch": 1.42, "learning_rate": 5.86543909549577e-05, "loss": 0.0362, "step": 5494 }, { "epoch": 1.42, "learning_rate": 5.864038093955775e-05, "loss": 0.0445, "step": 5495 }, { "epoch": 1.42, "learning_rate": 5.86263702248659e-05, "loss": 0.0789, "step": 5496 }, { "epoch": 1.42, "learning_rate": 5.8612358812016124e-05, "loss": 0.0362, "step": 5497 }, { "epoch": 1.42, "learning_rate": 5.859834670214236e-05, "loss": 0.0469, "step": 5498 }, { "epoch": 1.42, "learning_rate": 5.8584333896378665e-05, "loss": 0.0346, "step": 5499 }, { "epoch": 1.42, "learning_rate": 5.857032039585913e-05, "loss": 0.0275, "step": 5500 }, { "epoch": 1.42, "learning_rate": 5.8556306201717945e-05, "loss": 0.0497, "step": 5501 }, { "epoch": 1.42, "learning_rate": 5.854229131508928e-05, "loss": 0.0438, "step": 5502 }, { "epoch": 1.42, "learning_rate": 5.852827573710743e-05, "loss": 0.0273, "step": 5503 }, { "epoch": 1.42, "learning_rate": 5.851425946890668e-05, "loss": 0.0429, "step": 5504 }, { "epoch": 1.42, "learning_rate": 5.850024251162146e-05, "loss": 0.0535, "step": 5505 }, { "epoch": 1.42, "learning_rate": 5.8486224866386174e-05, "loss": 0.0306, "step": 5506 }, { "epoch": 1.42, "learning_rate": 5.847220653433531e-05, "loss": 0.0464, "step": 5507 }, { "epoch": 1.42, "learning_rate": 5.84581875166034e-05, "loss": 0.0372, "step": 5508 }, { "epoch": 1.42, "learning_rate": 5.844416781432509e-05, "loss": 0.0314, "step": 5509 }, { "epoch": 1.42, "learning_rate": 5.843014742863501e-05, "loss": 0.0304, "step": 5510 }, { "epoch": 1.42, "learning_rate": 5.841612636066786e-05, "loss": 0.0651, "step": 5511 }, { "epoch": 1.42, "learning_rate": 5.840210461155842e-05, "loss": 0.0375, "step": 5512 }, { "epoch": 1.42, "learning_rate": 5.838808218244151e-05, "loss": 0.0445, "step": 5513 }, { "epoch": 1.42, "learning_rate": 5.8374059074452016e-05, "loss": 0.0423, "step": 5514 }, { "epoch": 1.42, "learning_rate": 5.836003528872487e-05, "loss": 0.0388, "step": 5515 }, { "epoch": 1.42, "learning_rate": 5.834601082639504e-05, "loss": 0.0446, "step": 5516 }, { "epoch": 1.42, "learning_rate": 5.833198568859757e-05, "loss": 0.0331, "step": 5517 }, { "epoch": 1.42, "learning_rate": 5.831795987646759e-05, "loss": 0.0409, "step": 5518 }, { "epoch": 1.42, "learning_rate": 5.830393339114021e-05, "loss": 0.0447, "step": 5519 }, { "epoch": 1.42, "learning_rate": 5.828990623375064e-05, "loss": 0.0448, "step": 5520 }, { "epoch": 1.42, "learning_rate": 5.827587840543417e-05, "loss": 0.0366, "step": 5521 }, { "epoch": 1.43, "learning_rate": 5.826184990732609e-05, "loss": 0.0434, "step": 5522 }, { "epoch": 1.43, "learning_rate": 5.824782074056179e-05, "loss": 0.0323, "step": 5523 }, { "epoch": 1.43, "learning_rate": 5.8233790906276665e-05, "loss": 0.044, "step": 5524 }, { "epoch": 1.43, "learning_rate": 5.821976040560619e-05, "loss": 0.057, "step": 5525 }, { "epoch": 1.43, "learning_rate": 5.8205729239685926e-05, "loss": 0.0434, "step": 5526 }, { "epoch": 1.43, "learning_rate": 5.8191697409651445e-05, "loss": 0.0426, "step": 5527 }, { "epoch": 1.43, "learning_rate": 5.8177664916638366e-05, "loss": 0.0521, "step": 5528 }, { "epoch": 1.43, "learning_rate": 5.81636317617824e-05, "loss": 0.0384, "step": 5529 }, { "epoch": 1.43, "learning_rate": 5.814959794621928e-05, "loss": 0.0349, "step": 5530 }, { "epoch": 1.43, "learning_rate": 5.813556347108483e-05, "loss": 0.0496, "step": 5531 }, { "epoch": 1.43, "learning_rate": 5.8121528337514866e-05, "loss": 0.0392, "step": 5532 }, { "epoch": 1.43, "learning_rate": 5.810749254664531e-05, "loss": 0.0349, "step": 5533 }, { "epoch": 1.43, "learning_rate": 5.8093456099612133e-05, "loss": 0.0453, "step": 5534 }, { "epoch": 1.43, "learning_rate": 5.807941899755131e-05, "loss": 0.0524, "step": 5535 }, { "epoch": 1.43, "learning_rate": 5.806538124159897e-05, "loss": 0.0431, "step": 5536 }, { "epoch": 1.43, "learning_rate": 5.805134283289116e-05, "loss": 0.034, "step": 5537 }, { "epoch": 1.43, "learning_rate": 5.80373037725641e-05, "loss": 0.0498, "step": 5538 }, { "epoch": 1.43, "learning_rate": 5.802326406175398e-05, "loss": 0.0314, "step": 5539 }, { "epoch": 1.43, "learning_rate": 5.8009223701597116e-05, "loss": 0.0363, "step": 5540 }, { "epoch": 1.43, "learning_rate": 5.799518269322979e-05, "loss": 0.0466, "step": 5541 }, { "epoch": 1.43, "learning_rate": 5.79811410377884e-05, "loss": 0.0419, "step": 5542 }, { "epoch": 1.43, "learning_rate": 5.79670987364094e-05, "loss": 0.0274, "step": 5543 }, { "epoch": 1.43, "learning_rate": 5.795305579022925e-05, "loss": 0.0411, "step": 5544 }, { "epoch": 1.43, "learning_rate": 5.793901220038449e-05, "loss": 0.0582, "step": 5545 }, { "epoch": 1.43, "learning_rate": 5.7924967968011724e-05, "loss": 0.0387, "step": 5546 }, { "epoch": 1.43, "learning_rate": 5.7910923094247585e-05, "loss": 0.0388, "step": 5547 }, { "epoch": 1.43, "learning_rate": 5.789687758022876e-05, "loss": 0.0462, "step": 5548 }, { "epoch": 1.43, "learning_rate": 5.788283142709202e-05, "loss": 0.0442, "step": 5549 }, { "epoch": 1.43, "learning_rate": 5.786878463597413e-05, "loss": 0.0424, "step": 5550 }, { "epoch": 1.43, "learning_rate": 5.7854737208011954e-05, "loss": 0.0386, "step": 5551 }, { "epoch": 1.43, "learning_rate": 5.784068914434238e-05, "loss": 0.0369, "step": 5552 }, { "epoch": 1.43, "learning_rate": 5.78266404461024e-05, "loss": 0.0429, "step": 5553 }, { "epoch": 1.43, "learning_rate": 5.781259111442897e-05, "loss": 0.0461, "step": 5554 }, { "epoch": 1.43, "learning_rate": 5.7798541150459165e-05, "loss": 0.0335, "step": 5555 }, { "epoch": 1.43, "learning_rate": 5.778449055533008e-05, "loss": 0.0408, "step": 5556 }, { "epoch": 1.43, "learning_rate": 5.7770439330178896e-05, "loss": 0.0435, "step": 5557 }, { "epoch": 1.43, "learning_rate": 5.7756387476142806e-05, "loss": 0.0395, "step": 5558 }, { "epoch": 1.43, "learning_rate": 5.7742334994359045e-05, "loss": 0.0324, "step": 5559 }, { "epoch": 1.43, "learning_rate": 5.772828188596496e-05, "loss": 0.0445, "step": 5560 }, { "epoch": 1.44, "learning_rate": 5.771422815209789e-05, "loss": 0.0296, "step": 5561 }, { "epoch": 1.44, "learning_rate": 5.770017379389528e-05, "loss": 0.0366, "step": 5562 }, { "epoch": 1.44, "learning_rate": 5.768611881249453e-05, "loss": 0.0396, "step": 5563 }, { "epoch": 1.44, "learning_rate": 5.767206320903319e-05, "loss": 0.0439, "step": 5564 }, { "epoch": 1.44, "learning_rate": 5.765800698464882e-05, "loss": 0.0445, "step": 5565 }, { "epoch": 1.44, "learning_rate": 5.764395014047904e-05, "loss": 0.0372, "step": 5566 }, { "epoch": 1.44, "learning_rate": 5.762989267766149e-05, "loss": 0.0522, "step": 5567 }, { "epoch": 1.44, "learning_rate": 5.761583459733389e-05, "loss": 0.0461, "step": 5568 }, { "epoch": 1.44, "learning_rate": 5.7601775900634005e-05, "loss": 0.036, "step": 5569 }, { "epoch": 1.44, "learning_rate": 5.7587716588699657e-05, "loss": 0.0471, "step": 5570 }, { "epoch": 1.44, "learning_rate": 5.7573656662668705e-05, "loss": 0.0525, "step": 5571 }, { "epoch": 1.44, "learning_rate": 5.755959612367905e-05, "loss": 0.0416, "step": 5572 }, { "epoch": 1.44, "learning_rate": 5.7545534972868656e-05, "loss": 0.0483, "step": 5573 }, { "epoch": 1.44, "learning_rate": 5.753147321137554e-05, "loss": 0.0397, "step": 5574 }, { "epoch": 1.44, "learning_rate": 5.751741084033777e-05, "loss": 0.0436, "step": 5575 }, { "epoch": 1.44, "learning_rate": 5.750334786089343e-05, "loss": 0.0298, "step": 5576 }, { "epoch": 1.44, "learning_rate": 5.74892842741807e-05, "loss": 0.0511, "step": 5577 }, { "epoch": 1.44, "learning_rate": 5.7475220081337785e-05, "loss": 0.0446, "step": 5578 }, { "epoch": 1.44, "learning_rate": 5.746115528350295e-05, "loss": 0.0423, "step": 5579 }, { "epoch": 1.44, "learning_rate": 5.7447089881814476e-05, "loss": 0.0503, "step": 5580 }, { "epoch": 1.44, "learning_rate": 5.743302387741074e-05, "loss": 0.0261, "step": 5581 }, { "epoch": 1.44, "learning_rate": 5.741895727143013e-05, "loss": 0.0474, "step": 5582 }, { "epoch": 1.44, "learning_rate": 5.740489006501111e-05, "loss": 0.0448, "step": 5583 }, { "epoch": 1.44, "learning_rate": 5.73908222592922e-05, "loss": 0.051, "step": 5584 }, { "epoch": 1.44, "learning_rate": 5.73767538554119e-05, "loss": 0.0302, "step": 5585 }, { "epoch": 1.44, "learning_rate": 5.7362684854508855e-05, "loss": 0.0536, "step": 5586 }, { "epoch": 1.44, "learning_rate": 5.734861525772168e-05, "loss": 0.0577, "step": 5587 }, { "epoch": 1.44, "learning_rate": 5.7334545066189094e-05, "loss": 0.0686, "step": 5588 }, { "epoch": 1.44, "learning_rate": 5.732047428104982e-05, "loss": 0.044, "step": 5589 }, { "epoch": 1.44, "learning_rate": 5.7306402903442654e-05, "loss": 0.0295, "step": 5590 }, { "epoch": 1.44, "learning_rate": 5.729233093450644e-05, "loss": 0.0407, "step": 5591 }, { "epoch": 1.44, "learning_rate": 5.727825837538008e-05, "loss": 0.0628, "step": 5592 }, { "epoch": 1.44, "learning_rate": 5.726418522720247e-05, "loss": 0.0297, "step": 5593 }, { "epoch": 1.44, "learning_rate": 5.725011149111261e-05, "loss": 0.0392, "step": 5594 }, { "epoch": 1.44, "learning_rate": 5.723603716824954e-05, "loss": 0.0365, "step": 5595 }, { "epoch": 1.44, "learning_rate": 5.722196225975233e-05, "loss": 0.0491, "step": 5596 }, { "epoch": 1.44, "learning_rate": 5.720788676676011e-05, "loss": 0.0504, "step": 5597 }, { "epoch": 1.44, "learning_rate": 5.719381069041203e-05, "loss": 0.0483, "step": 5598 }, { "epoch": 1.44, "learning_rate": 5.717973403184733e-05, "loss": 0.0317, "step": 5599 }, { "epoch": 1.45, "learning_rate": 5.716565679220527e-05, "loss": 0.0451, "step": 5600 }, { "epoch": 1.45, "learning_rate": 5.7151578972625175e-05, "loss": 0.0575, "step": 5601 }, { "epoch": 1.45, "learning_rate": 5.713750057424637e-05, "loss": 0.0485, "step": 5602 }, { "epoch": 1.45, "learning_rate": 5.7123421598208305e-05, "loss": 0.0378, "step": 5603 }, { "epoch": 1.45, "learning_rate": 5.710934204565041e-05, "loss": 0.0384, "step": 5604 }, { "epoch": 1.45, "learning_rate": 5.7095261917712184e-05, "loss": 0.0457, "step": 5605 }, { "epoch": 1.45, "learning_rate": 5.7081181215533184e-05, "loss": 0.0492, "step": 5606 }, { "epoch": 1.45, "learning_rate": 5.706709994025299e-05, "loss": 0.0463, "step": 5607 }, { "epoch": 1.45, "learning_rate": 5.705301809301126e-05, "loss": 0.0376, "step": 5608 }, { "epoch": 1.45, "learning_rate": 5.7038935674947656e-05, "loss": 0.0365, "step": 5609 }, { "epoch": 1.45, "learning_rate": 5.702485268720195e-05, "loss": 0.0444, "step": 5610 }, { "epoch": 1.45, "learning_rate": 5.701076913091388e-05, "loss": 0.0388, "step": 5611 }, { "epoch": 1.45, "learning_rate": 5.699668500722328e-05, "loss": 0.0538, "step": 5612 }, { "epoch": 1.45, "learning_rate": 5.6982600317270026e-05, "loss": 0.0399, "step": 5613 }, { "epoch": 1.45, "learning_rate": 5.696851506219405e-05, "loss": 0.0372, "step": 5614 }, { "epoch": 1.45, "learning_rate": 5.695442924313529e-05, "loss": 0.0433, "step": 5615 }, { "epoch": 1.45, "learning_rate": 5.6940342861233755e-05, "loss": 0.0407, "step": 5616 }, { "epoch": 1.45, "learning_rate": 5.69262559176295e-05, "loss": 0.0404, "step": 5617 }, { "epoch": 1.45, "learning_rate": 5.691216841346265e-05, "loss": 0.0383, "step": 5618 }, { "epoch": 1.45, "learning_rate": 5.68980803498733e-05, "loss": 0.0327, "step": 5619 }, { "epoch": 1.45, "learning_rate": 5.688399172800167e-05, "loss": 0.0396, "step": 5620 }, { "epoch": 1.45, "learning_rate": 5.686990254898801e-05, "loss": 0.0376, "step": 5621 }, { "epoch": 1.45, "learning_rate": 5.685581281397255e-05, "loss": 0.0274, "step": 5622 }, { "epoch": 1.45, "learning_rate": 5.6841722524095675e-05, "loss": 0.0449, "step": 5623 }, { "epoch": 1.45, "learning_rate": 5.682763168049771e-05, "loss": 0.0498, "step": 5624 }, { "epoch": 1.45, "learning_rate": 5.681354028431907e-05, "loss": 0.0442, "step": 5625 }, { "epoch": 1.45, "learning_rate": 5.679944833670023e-05, "loss": 0.0502, "step": 5626 }, { "epoch": 1.45, "learning_rate": 5.6785355838781705e-05, "loss": 0.0482, "step": 5627 }, { "epoch": 1.45, "learning_rate": 5.6771262791704016e-05, "loss": 0.0288, "step": 5628 }, { "epoch": 1.45, "learning_rate": 5.675716919660777e-05, "loss": 0.0374, "step": 5629 }, { "epoch": 1.45, "learning_rate": 5.67430750546336e-05, "loss": 0.0503, "step": 5630 }, { "epoch": 1.45, "learning_rate": 5.672898036692218e-05, "loss": 0.0703, "step": 5631 }, { "epoch": 1.45, "learning_rate": 5.671488513461426e-05, "loss": 0.0489, "step": 5632 }, { "epoch": 1.45, "learning_rate": 5.6700789358850584e-05, "loss": 0.0411, "step": 5633 }, { "epoch": 1.45, "learning_rate": 5.6686693040771966e-05, "loss": 0.0506, "step": 5634 }, { "epoch": 1.45, "learning_rate": 5.6672596181519275e-05, "loss": 0.0403, "step": 5635 }, { "epoch": 1.45, "learning_rate": 5.6658498782233423e-05, "loss": 0.0405, "step": 5636 }, { "epoch": 1.45, "learning_rate": 5.664440084405533e-05, "loss": 0.0321, "step": 5637 }, { "epoch": 1.45, "learning_rate": 5.6630302368125985e-05, "loss": 0.0508, "step": 5638 }, { "epoch": 1.46, "learning_rate": 5.661620335558644e-05, "loss": 0.0627, "step": 5639 }, { "epoch": 1.46, "learning_rate": 5.6602103807577766e-05, "loss": 0.0406, "step": 5640 }, { "epoch": 1.46, "learning_rate": 5.658800372524106e-05, "loss": 0.0367, "step": 5641 }, { "epoch": 1.46, "learning_rate": 5.65739031097175e-05, "loss": 0.0398, "step": 5642 }, { "epoch": 1.46, "learning_rate": 5.6559801962148285e-05, "loss": 0.0592, "step": 5643 }, { "epoch": 1.46, "learning_rate": 5.654570028367467e-05, "loss": 0.0344, "step": 5644 }, { "epoch": 1.46, "learning_rate": 5.653159807543796e-05, "loss": 0.043, "step": 5645 }, { "epoch": 1.46, "learning_rate": 5.651749533857946e-05, "loss": 0.0368, "step": 5646 }, { "epoch": 1.46, "learning_rate": 5.650339207424056e-05, "loss": 0.0467, "step": 5647 }, { "epoch": 1.46, "learning_rate": 5.648928828356268e-05, "loss": 0.0414, "step": 5648 }, { "epoch": 1.46, "learning_rate": 5.647518396768729e-05, "loss": 0.0459, "step": 5649 }, { "epoch": 1.46, "learning_rate": 5.646107912775587e-05, "loss": 0.0307, "step": 5650 }, { "epoch": 1.46, "learning_rate": 5.6446973764909996e-05, "loss": 0.0376, "step": 5651 }, { "epoch": 1.46, "learning_rate": 5.643286788029123e-05, "loss": 0.0318, "step": 5652 }, { "epoch": 1.46, "learning_rate": 5.6418761475041226e-05, "loss": 0.037, "step": 5653 }, { "epoch": 1.46, "learning_rate": 5.640465455030164e-05, "loss": 0.0443, "step": 5654 }, { "epoch": 1.46, "learning_rate": 5.6390547107214195e-05, "loss": 0.0463, "step": 5655 }, { "epoch": 1.46, "learning_rate": 5.6376439146920654e-05, "loss": 0.0434, "step": 5656 }, { "epoch": 1.46, "learning_rate": 5.636233067056281e-05, "loss": 0.0456, "step": 5657 }, { "epoch": 1.46, "learning_rate": 5.634822167928253e-05, "loss": 0.042, "step": 5658 }, { "epoch": 1.46, "learning_rate": 5.6334112174221646e-05, "loss": 0.0483, "step": 5659 }, { "epoch": 1.46, "learning_rate": 5.632000215652211e-05, "loss": 0.0407, "step": 5660 }, { "epoch": 1.46, "learning_rate": 5.630589162732589e-05, "loss": 0.0377, "step": 5661 }, { "epoch": 1.46, "learning_rate": 5.6291780587775014e-05, "loss": 0.051, "step": 5662 }, { "epoch": 1.46, "learning_rate": 5.6277669039011485e-05, "loss": 0.0508, "step": 5663 }, { "epoch": 1.46, "learning_rate": 5.6263556982177414e-05, "loss": 0.045, "step": 5664 }, { "epoch": 1.46, "learning_rate": 5.6249444418414934e-05, "loss": 0.0497, "step": 5665 }, { "epoch": 1.46, "learning_rate": 5.6235331348866225e-05, "loss": 0.0365, "step": 5666 }, { "epoch": 1.46, "learning_rate": 5.6221217774673494e-05, "loss": 0.0454, "step": 5667 }, { "epoch": 1.46, "learning_rate": 5.620710369697898e-05, "loss": 0.0404, "step": 5668 }, { "epoch": 1.46, "learning_rate": 5.6192989116924996e-05, "loss": 0.0529, "step": 5669 }, { "epoch": 1.46, "learning_rate": 5.6178874035653874e-05, "loss": 0.0417, "step": 5670 }, { "epoch": 1.46, "learning_rate": 5.6164758454307985e-05, "loss": 0.0457, "step": 5671 }, { "epoch": 1.46, "learning_rate": 5.615064237402974e-05, "loss": 0.0488, "step": 5672 }, { "epoch": 1.46, "learning_rate": 5.6136525795961606e-05, "loss": 0.0439, "step": 5673 }, { "epoch": 1.46, "learning_rate": 5.612240872124608e-05, "loss": 0.0418, "step": 5674 }, { "epoch": 1.46, "learning_rate": 5.6108291151025705e-05, "loss": 0.0353, "step": 5675 }, { "epoch": 1.46, "learning_rate": 5.609417308644303e-05, "loss": 0.0271, "step": 5676 }, { "epoch": 1.47, "learning_rate": 5.608005452864069e-05, "loss": 0.056, "step": 5677 }, { "epoch": 1.47, "learning_rate": 5.6065935478761355e-05, "loss": 0.0504, "step": 5678 }, { "epoch": 1.47, "learning_rate": 5.6051815937947716e-05, "loss": 0.063, "step": 5679 }, { "epoch": 1.47, "learning_rate": 5.603769590734249e-05, "loss": 0.058, "step": 5680 }, { "epoch": 1.47, "learning_rate": 5.602357538808847e-05, "loss": 0.0523, "step": 5681 }, { "epoch": 1.47, "learning_rate": 5.6009454381328474e-05, "loss": 0.0582, "step": 5682 }, { "epoch": 1.47, "learning_rate": 5.5995332888205345e-05, "loss": 0.0434, "step": 5683 }, { "epoch": 1.47, "learning_rate": 5.5981210909861995e-05, "loss": 0.0681, "step": 5684 }, { "epoch": 1.47, "learning_rate": 5.596708844744134e-05, "loss": 0.0427, "step": 5685 }, { "epoch": 1.47, "learning_rate": 5.5952965502086364e-05, "loss": 0.0412, "step": 5686 }, { "epoch": 1.47, "learning_rate": 5.593884207494007e-05, "loss": 0.0379, "step": 5687 }, { "epoch": 1.47, "learning_rate": 5.5924718167145526e-05, "loss": 0.0497, "step": 5688 }, { "epoch": 1.47, "learning_rate": 5.591059377984581e-05, "loss": 0.0629, "step": 5689 }, { "epoch": 1.47, "learning_rate": 5.589646891418404e-05, "loss": 0.0455, "step": 5690 }, { "epoch": 1.47, "learning_rate": 5.588234357130341e-05, "loss": 0.0388, "step": 5691 }, { "epoch": 1.47, "learning_rate": 5.586821775234712e-05, "loss": 0.0439, "step": 5692 }, { "epoch": 1.47, "learning_rate": 5.585409145845839e-05, "loss": 0.0568, "step": 5693 }, { "epoch": 1.47, "learning_rate": 5.5839964690780524e-05, "loss": 0.0421, "step": 5694 }, { "epoch": 1.47, "learning_rate": 5.582583745045684e-05, "loss": 0.038, "step": 5695 }, { "epoch": 1.47, "learning_rate": 5.581170973863069e-05, "loss": 0.0482, "step": 5696 }, { "epoch": 1.47, "learning_rate": 5.57975815564455e-05, "loss": 0.0358, "step": 5697 }, { "epoch": 1.47, "learning_rate": 5.5783452905044676e-05, "loss": 0.0358, "step": 5698 }, { "epoch": 1.47, "learning_rate": 5.5769323785571706e-05, "loss": 0.0434, "step": 5699 }, { "epoch": 1.47, "learning_rate": 5.575519419917009e-05, "loss": 0.0395, "step": 5700 }, { "epoch": 1.47, "learning_rate": 5.5741064146983393e-05, "loss": 0.04, "step": 5701 }, { "epoch": 1.47, "learning_rate": 5.5726933630155196e-05, "loss": 0.0492, "step": 5702 }, { "epoch": 1.47, "learning_rate": 5.571280264982911e-05, "loss": 0.0264, "step": 5703 }, { "epoch": 1.47, "learning_rate": 5.569867120714882e-05, "loss": 0.0524, "step": 5704 }, { "epoch": 1.47, "learning_rate": 5.568453930325801e-05, "loss": 0.0373, "step": 5705 }, { "epoch": 1.47, "learning_rate": 5.567040693930045e-05, "loss": 0.0459, "step": 5706 }, { "epoch": 1.47, "learning_rate": 5.565627411641986e-05, "loss": 0.0345, "step": 5707 }, { "epoch": 1.47, "learning_rate": 5.564214083576008e-05, "loss": 0.05, "step": 5708 }, { "epoch": 1.47, "learning_rate": 5.5628007098464974e-05, "loss": 0.037, "step": 5709 }, { "epoch": 1.47, "learning_rate": 5.561387290567841e-05, "loss": 0.0303, "step": 5710 }, { "epoch": 1.47, "learning_rate": 5.559973825854431e-05, "loss": 0.0432, "step": 5711 }, { "epoch": 1.47, "learning_rate": 5.558560315820663e-05, "loss": 0.0346, "step": 5712 }, { "epoch": 1.47, "learning_rate": 5.557146760580938e-05, "loss": 0.0327, "step": 5713 }, { "epoch": 1.47, "learning_rate": 5.5557331602496584e-05, "loss": 0.0374, "step": 5714 }, { "epoch": 1.47, "learning_rate": 5.554319514941231e-05, "loss": 0.0433, "step": 5715 }, { "epoch": 1.48, "learning_rate": 5.552905824770066e-05, "loss": 0.0386, "step": 5716 }, { "epoch": 1.48, "learning_rate": 5.5514920898505785e-05, "loss": 0.0358, "step": 5717 }, { "epoch": 1.48, "learning_rate": 5.550078310297185e-05, "loss": 0.0404, "step": 5718 }, { "epoch": 1.48, "learning_rate": 5.5486644862243096e-05, "loss": 0.035, "step": 5719 }, { "epoch": 1.48, "learning_rate": 5.547250617746375e-05, "loss": 0.0389, "step": 5720 }, { "epoch": 1.48, "learning_rate": 5.5458367049778094e-05, "loss": 0.0557, "step": 5721 }, { "epoch": 1.48, "learning_rate": 5.544422748033047e-05, "loss": 0.0475, "step": 5722 }, { "epoch": 1.48, "learning_rate": 5.5430087470265226e-05, "loss": 0.0407, "step": 5723 }, { "epoch": 1.48, "learning_rate": 5.541594702072674e-05, "loss": 0.0502, "step": 5724 }, { "epoch": 1.48, "learning_rate": 5.5401806132859454e-05, "loss": 0.053, "step": 5725 }, { "epoch": 1.48, "learning_rate": 5.538766480780785e-05, "loss": 0.023, "step": 5726 }, { "epoch": 1.48, "learning_rate": 5.537352304671641e-05, "loss": 0.0264, "step": 5727 }, { "epoch": 1.48, "learning_rate": 5.535938085072966e-05, "loss": 0.0258, "step": 5728 }, { "epoch": 1.48, "learning_rate": 5.534523822099218e-05, "loss": 0.0508, "step": 5729 }, { "epoch": 1.48, "learning_rate": 5.533109515864858e-05, "loss": 0.0396, "step": 5730 }, { "epoch": 1.48, "learning_rate": 5.53169516648435e-05, "loss": 0.0375, "step": 5731 }, { "epoch": 1.48, "learning_rate": 5.5302807740721606e-05, "loss": 0.0525, "step": 5732 }, { "epoch": 1.48, "learning_rate": 5.528866338742762e-05, "loss": 0.0431, "step": 5733 }, { "epoch": 1.48, "learning_rate": 5.527451860610626e-05, "loss": 0.0326, "step": 5734 }, { "epoch": 1.48, "learning_rate": 5.526037339790234e-05, "loss": 0.0251, "step": 5735 }, { "epoch": 1.48, "learning_rate": 5.524622776396068e-05, "loss": 0.0424, "step": 5736 }, { "epoch": 1.48, "learning_rate": 5.5232081705426085e-05, "loss": 0.0424, "step": 5737 }, { "epoch": 1.48, "learning_rate": 5.5217935223443464e-05, "loss": 0.0425, "step": 5738 }, { "epoch": 1.48, "learning_rate": 5.520378831915772e-05, "loss": 0.0421, "step": 5739 }, { "epoch": 1.48, "learning_rate": 5.518964099371384e-05, "loss": 0.0399, "step": 5740 }, { "epoch": 1.48, "learning_rate": 5.517549324825677e-05, "loss": 0.042, "step": 5741 }, { "epoch": 1.48, "learning_rate": 5.5161345083931546e-05, "loss": 0.0269, "step": 5742 }, { "epoch": 1.48, "learning_rate": 5.5147196501883225e-05, "loss": 0.0401, "step": 5743 }, { "epoch": 1.48, "learning_rate": 5.513304750325687e-05, "loss": 0.0402, "step": 5744 }, { "epoch": 1.48, "learning_rate": 5.511889808919766e-05, "loss": 0.0348, "step": 5745 }, { "epoch": 1.48, "learning_rate": 5.5104748260850666e-05, "loss": 0.0397, "step": 5746 }, { "epoch": 1.48, "learning_rate": 5.509059801936114e-05, "loss": 0.0412, "step": 5747 }, { "epoch": 1.48, "learning_rate": 5.5076447365874284e-05, "loss": 0.0363, "step": 5748 }, { "epoch": 1.48, "learning_rate": 5.5062296301535366e-05, "loss": 0.0296, "step": 5749 }, { "epoch": 1.48, "learning_rate": 5.504814482748963e-05, "loss": 0.0394, "step": 5750 }, { "epoch": 1.48, "learning_rate": 5.503399294488244e-05, "loss": 0.0397, "step": 5751 }, { "epoch": 1.48, "learning_rate": 5.501984065485914e-05, "loss": 0.0356, "step": 5752 }, { "epoch": 1.48, "learning_rate": 5.500568795856512e-05, "loss": 0.0404, "step": 5753 }, { "epoch": 1.48, "learning_rate": 5.4991534857145785e-05, "loss": 0.0498, "step": 5754 }, { "epoch": 1.49, "learning_rate": 5.49773813517466e-05, "loss": 0.0403, "step": 5755 }, { "epoch": 1.49, "learning_rate": 5.496322744351304e-05, "loss": 0.0439, "step": 5756 }, { "epoch": 1.49, "learning_rate": 5.4949073133590647e-05, "loss": 0.0428, "step": 5757 }, { "epoch": 1.49, "learning_rate": 5.4934918423124945e-05, "loss": 0.0516, "step": 5758 }, { "epoch": 1.49, "learning_rate": 5.492076331326154e-05, "loss": 0.0627, "step": 5759 }, { "epoch": 1.49, "learning_rate": 5.4906607805146016e-05, "loss": 0.0409, "step": 5760 }, { "epoch": 1.49, "learning_rate": 5.4892451899924045e-05, "loss": 0.03, "step": 5761 }, { "epoch": 1.49, "learning_rate": 5.4878295598741325e-05, "loss": 0.0438, "step": 5762 }, { "epoch": 1.49, "learning_rate": 5.486413890274352e-05, "loss": 0.0413, "step": 5763 }, { "epoch": 1.49, "learning_rate": 5.48499818130764e-05, "loss": 0.0356, "step": 5764 }, { "epoch": 1.49, "learning_rate": 5.483582433088574e-05, "loss": 0.0482, "step": 5765 }, { "epoch": 1.49, "learning_rate": 5.4821666457317355e-05, "loss": 0.0346, "step": 5766 }, { "epoch": 1.49, "learning_rate": 5.480750819351708e-05, "loss": 0.0511, "step": 5767 }, { "epoch": 1.49, "learning_rate": 5.4793349540630756e-05, "loss": 0.0432, "step": 5768 }, { "epoch": 1.49, "learning_rate": 5.4779190499804325e-05, "loss": 0.0421, "step": 5769 }, { "epoch": 1.49, "learning_rate": 5.47650310721837e-05, "loss": 0.053, "step": 5770 }, { "epoch": 1.49, "learning_rate": 5.475087125891487e-05, "loss": 0.0385, "step": 5771 }, { "epoch": 1.49, "learning_rate": 5.4736711061143796e-05, "loss": 0.0402, "step": 5772 }, { "epoch": 1.49, "learning_rate": 5.472255048001652e-05, "loss": 0.0491, "step": 5773 }, { "epoch": 1.49, "learning_rate": 5.4708389516679116e-05, "loss": 0.0429, "step": 5774 }, { "epoch": 1.49, "learning_rate": 5.469422817227765e-05, "loss": 0.0524, "step": 5775 }, { "epoch": 1.49, "learning_rate": 5.468006644795826e-05, "loss": 0.0626, "step": 5776 }, { "epoch": 1.49, "learning_rate": 5.466590434486707e-05, "loss": 0.0513, "step": 5777 }, { "epoch": 1.49, "learning_rate": 5.4651741864150286e-05, "loss": 0.0479, "step": 5778 }, { "epoch": 1.49, "learning_rate": 5.463757900695411e-05, "loss": 0.0438, "step": 5779 }, { "epoch": 1.49, "learning_rate": 5.46234157744248e-05, "loss": 0.0422, "step": 5780 }, { "epoch": 1.49, "learning_rate": 5.4609252167708615e-05, "loss": 0.0447, "step": 5781 }, { "epoch": 1.49, "learning_rate": 5.459508818795185e-05, "loss": 0.0236, "step": 5782 }, { "epoch": 1.49, "learning_rate": 5.4580923836300844e-05, "loss": 0.0383, "step": 5783 }, { "epoch": 1.49, "learning_rate": 5.4566759113901986e-05, "loss": 0.0395, "step": 5784 }, { "epoch": 1.49, "learning_rate": 5.455259402190163e-05, "loss": 0.0679, "step": 5785 }, { "epoch": 1.49, "learning_rate": 5.453842856144622e-05, "loss": 0.0599, "step": 5786 }, { "epoch": 1.49, "learning_rate": 5.45242627336822e-05, "loss": 0.0354, "step": 5787 }, { "epoch": 1.49, "learning_rate": 5.451009653975607e-05, "loss": 0.0461, "step": 5788 }, { "epoch": 1.49, "learning_rate": 5.449592998081432e-05, "loss": 0.0439, "step": 5789 }, { "epoch": 1.49, "learning_rate": 5.448176305800351e-05, "loss": 0.0398, "step": 5790 }, { "epoch": 1.49, "learning_rate": 5.44675957724702e-05, "loss": 0.0446, "step": 5791 }, { "epoch": 1.49, "learning_rate": 5.4453428125361e-05, "loss": 0.0658, "step": 5792 }, { "epoch": 1.49, "learning_rate": 5.443926011782253e-05, "loss": 0.0386, "step": 5793 }, { "epoch": 1.5, "learning_rate": 5.4425091751001465e-05, "loss": 0.0452, "step": 5794 }, { "epoch": 1.5, "learning_rate": 5.441092302604447e-05, "loss": 0.0433, "step": 5795 }, { "epoch": 1.5, "learning_rate": 5.439675394409828e-05, "loss": 0.0247, "step": 5796 }, { "epoch": 1.5, "learning_rate": 5.4382584506309654e-05, "loss": 0.0483, "step": 5797 }, { "epoch": 1.5, "learning_rate": 5.4368414713825325e-05, "loss": 0.0245, "step": 5798 }, { "epoch": 1.5, "learning_rate": 5.435424456779212e-05, "loss": 0.0493, "step": 5799 }, { "epoch": 1.5, "learning_rate": 5.434007406935688e-05, "loss": 0.0429, "step": 5800 }, { "epoch": 1.5, "learning_rate": 5.432590321966647e-05, "loss": 0.0366, "step": 5801 }, { "epoch": 1.5, "learning_rate": 5.4311732019867766e-05, "loss": 0.0293, "step": 5802 }, { "epoch": 1.5, "learning_rate": 5.429756047110768e-05, "loss": 0.0464, "step": 5803 }, { "epoch": 1.5, "learning_rate": 5.4283388574533154e-05, "loss": 0.028, "step": 5804 }, { "epoch": 1.5, "learning_rate": 5.426921633129118e-05, "loss": 0.0409, "step": 5805 }, { "epoch": 1.5, "learning_rate": 5.4255043742528766e-05, "loss": 0.0425, "step": 5806 }, { "epoch": 1.5, "learning_rate": 5.424087080939292e-05, "loss": 0.0364, "step": 5807 }, { "epoch": 1.5, "learning_rate": 5.422669753303071e-05, "loss": 0.0311, "step": 5808 }, { "epoch": 1.5, "learning_rate": 5.421252391458922e-05, "loss": 0.032, "step": 5809 }, { "epoch": 1.5, "learning_rate": 5.419834995521557e-05, "loss": 0.0391, "step": 5810 }, { "epoch": 1.5, "learning_rate": 5.418417565605689e-05, "loss": 0.0439, "step": 5811 }, { "epoch": 1.5, "learning_rate": 5.417000101826035e-05, "loss": 0.024, "step": 5812 }, { "epoch": 1.5, "learning_rate": 5.4155826042973144e-05, "loss": 0.0368, "step": 5813 }, { "epoch": 1.5, "eval_loss": 0.5711690187454224, "eval_runtime": 665.9675, "eval_samples_per_second": 2.91, "eval_steps_per_second": 0.365, "step": 5813 }, { "epoch": 1.5, "learning_rate": 5.414165073134252e-05, "loss": 0.0367, "step": 5814 }, { "epoch": 1.5, "learning_rate": 5.41274750845157e-05, "loss": 0.0372, "step": 5815 }, { "epoch": 1.5, "learning_rate": 5.4113299103639956e-05, "loss": 0.0463, "step": 5816 }, { "epoch": 1.5, "learning_rate": 5.409912278986262e-05, "loss": 0.0455, "step": 5817 }, { "epoch": 1.5, "learning_rate": 5.4084946144331006e-05, "loss": 0.048, "step": 5818 }, { "epoch": 1.5, "learning_rate": 5.407076916819249e-05, "loss": 0.0404, "step": 5819 }, { "epoch": 1.5, "learning_rate": 5.405659186259443e-05, "loss": 0.0471, "step": 5820 }, { "epoch": 1.5, "learning_rate": 5.404241422868426e-05, "loss": 0.0488, "step": 5821 }, { "epoch": 1.5, "learning_rate": 5.40282362676094e-05, "loss": 0.0292, "step": 5822 }, { "epoch": 1.5, "learning_rate": 5.401405798051734e-05, "loss": 0.03, "step": 5823 }, { "epoch": 1.5, "learning_rate": 5.399987936855555e-05, "loss": 0.0396, "step": 5824 }, { "epoch": 1.5, "learning_rate": 5.3985700432871546e-05, "loss": 0.0391, "step": 5825 }, { "epoch": 1.5, "learning_rate": 5.397152117461288e-05, "loss": 0.0412, "step": 5826 }, { "epoch": 1.5, "learning_rate": 5.3957341594927134e-05, "loss": 0.056, "step": 5827 }, { "epoch": 1.5, "learning_rate": 5.394316169496187e-05, "loss": 0.0616, "step": 5828 }, { "epoch": 1.5, "learning_rate": 5.392898147586475e-05, "loss": 0.0647, "step": 5829 }, { "epoch": 1.5, "learning_rate": 5.391480093878338e-05, "loss": 0.0408, "step": 5830 }, { "epoch": 1.5, "learning_rate": 5.390062008486547e-05, "loss": 0.0535, "step": 5831 }, { "epoch": 1.51, "learning_rate": 5.388643891525872e-05, "loss": 0.0427, "step": 5832 }, { "epoch": 1.51, "learning_rate": 5.387225743111082e-05, "loss": 0.0404, "step": 5833 }, { "epoch": 1.51, "learning_rate": 5.3858075633569535e-05, "loss": 0.0342, "step": 5834 }, { "epoch": 1.51, "learning_rate": 5.384389352378265e-05, "loss": 0.0488, "step": 5835 }, { "epoch": 1.51, "learning_rate": 5.382971110289796e-05, "loss": 0.0307, "step": 5836 }, { "epoch": 1.51, "learning_rate": 5.38155283720633e-05, "loss": 0.0397, "step": 5837 }, { "epoch": 1.51, "learning_rate": 5.38013453324265e-05, "loss": 0.0428, "step": 5838 }, { "epoch": 1.51, "learning_rate": 5.378716198513545e-05, "loss": 0.0342, "step": 5839 }, { "epoch": 1.51, "learning_rate": 5.377297833133804e-05, "loss": 0.0399, "step": 5840 }, { "epoch": 1.51, "learning_rate": 5.375879437218222e-05, "loss": 0.0398, "step": 5841 }, { "epoch": 1.51, "learning_rate": 5.3744610108815905e-05, "loss": 0.038, "step": 5842 }, { "epoch": 1.51, "learning_rate": 5.37304255423871e-05, "loss": 0.0451, "step": 5843 }, { "epoch": 1.51, "learning_rate": 5.371624067404378e-05, "loss": 0.0324, "step": 5844 }, { "epoch": 1.51, "learning_rate": 5.3702055504934004e-05, "loss": 0.0413, "step": 5845 }, { "epoch": 1.51, "learning_rate": 5.368787003620579e-05, "loss": 0.0431, "step": 5846 }, { "epoch": 1.51, "learning_rate": 5.3673684269007205e-05, "loss": 0.027, "step": 5847 }, { "epoch": 1.51, "learning_rate": 5.365949820448637e-05, "loss": 0.0475, "step": 5848 }, { "epoch": 1.51, "learning_rate": 5.364531184379139e-05, "loss": 0.0384, "step": 5849 }, { "epoch": 1.51, "learning_rate": 5.363112518807043e-05, "loss": 0.0372, "step": 5850 }, { "epoch": 1.51, "learning_rate": 5.3616938238471625e-05, "loss": 0.0417, "step": 5851 }, { "epoch": 1.51, "learning_rate": 5.360275099614319e-05, "loss": 0.043, "step": 5852 }, { "epoch": 1.51, "learning_rate": 5.358856346223334e-05, "loss": 0.0276, "step": 5853 }, { "epoch": 1.51, "learning_rate": 5.3574375637890306e-05, "loss": 0.0536, "step": 5854 }, { "epoch": 1.51, "learning_rate": 5.356018752426236e-05, "loss": 0.0402, "step": 5855 }, { "epoch": 1.51, "learning_rate": 5.354599912249778e-05, "loss": 0.0414, "step": 5856 }, { "epoch": 1.51, "learning_rate": 5.353181043374488e-05, "loss": 0.0387, "step": 5857 }, { "epoch": 1.51, "learning_rate": 5.351762145915199e-05, "loss": 0.0464, "step": 5858 }, { "epoch": 1.51, "learning_rate": 5.350343219986749e-05, "loss": 0.0432, "step": 5859 }, { "epoch": 1.51, "learning_rate": 5.3489242657039714e-05, "loss": 0.0485, "step": 5860 }, { "epoch": 1.51, "learning_rate": 5.34750528318171e-05, "loss": 0.043, "step": 5861 }, { "epoch": 1.51, "learning_rate": 5.346086272534805e-05, "loss": 0.041, "step": 5862 }, { "epoch": 1.51, "learning_rate": 5.3446672338781033e-05, "loss": 0.0336, "step": 5863 }, { "epoch": 1.51, "learning_rate": 5.343248167326452e-05, "loss": 0.037, "step": 5864 }, { "epoch": 1.51, "learning_rate": 5.3418290729946976e-05, "loss": 0.0525, "step": 5865 }, { "epoch": 1.51, "learning_rate": 5.3404099509976945e-05, "loss": 0.0472, "step": 5866 }, { "epoch": 1.51, "learning_rate": 5.338990801450294e-05, "loss": 0.0427, "step": 5867 }, { "epoch": 1.51, "learning_rate": 5.337571624467356e-05, "loss": 0.0365, "step": 5868 }, { "epoch": 1.51, "learning_rate": 5.3361524201637345e-05, "loss": 0.0266, "step": 5869 }, { "epoch": 1.51, "learning_rate": 5.3347331886542916e-05, "loss": 0.0496, "step": 5870 }, { "epoch": 1.52, "learning_rate": 5.3333139300538904e-05, "loss": 0.0372, "step": 5871 }, { "epoch": 1.52, "learning_rate": 5.331894644477395e-05, "loss": 0.0534, "step": 5872 }, { "epoch": 1.52, "learning_rate": 5.330475332039675e-05, "loss": 0.0385, "step": 5873 }, { "epoch": 1.52, "learning_rate": 5.329055992855596e-05, "loss": 0.0473, "step": 5874 }, { "epoch": 1.52, "learning_rate": 5.3276366270400304e-05, "loss": 0.0397, "step": 5875 }, { "epoch": 1.52, "learning_rate": 5.326217234707852e-05, "loss": 0.049, "step": 5876 }, { "epoch": 1.52, "learning_rate": 5.324797815973938e-05, "loss": 0.0551, "step": 5877 }, { "epoch": 1.52, "learning_rate": 5.3233783709531635e-05, "loss": 0.0448, "step": 5878 }, { "epoch": 1.52, "learning_rate": 5.321958899760411e-05, "loss": 0.0383, "step": 5879 }, { "epoch": 1.52, "learning_rate": 5.32053940251056e-05, "loss": 0.048, "step": 5880 }, { "epoch": 1.52, "learning_rate": 5.3191198793184956e-05, "loss": 0.0384, "step": 5881 }, { "epoch": 1.52, "learning_rate": 5.317700330299107e-05, "loss": 0.0583, "step": 5882 }, { "epoch": 1.52, "learning_rate": 5.316280755567278e-05, "loss": 0.0485, "step": 5883 }, { "epoch": 1.52, "learning_rate": 5.3148611552379016e-05, "loss": 0.0442, "step": 5884 }, { "epoch": 1.52, "learning_rate": 5.313441529425869e-05, "loss": 0.0405, "step": 5885 }, { "epoch": 1.52, "learning_rate": 5.312021878246076e-05, "loss": 0.0472, "step": 5886 }, { "epoch": 1.52, "learning_rate": 5.310602201813419e-05, "loss": 0.0425, "step": 5887 }, { "epoch": 1.52, "learning_rate": 5.309182500242796e-05, "loss": 0.0513, "step": 5888 }, { "epoch": 1.52, "learning_rate": 5.3077627736491084e-05, "loss": 0.035, "step": 5889 }, { "epoch": 1.52, "learning_rate": 5.306343022147258e-05, "loss": 0.044, "step": 5890 }, { "epoch": 1.52, "learning_rate": 5.3049232458521514e-05, "loss": 0.0457, "step": 5891 }, { "epoch": 1.52, "learning_rate": 5.3035034448786925e-05, "loss": 0.0245, "step": 5892 }, { "epoch": 1.52, "learning_rate": 5.3020836193417924e-05, "loss": 0.044, "step": 5893 }, { "epoch": 1.52, "learning_rate": 5.300663769356361e-05, "loss": 0.0314, "step": 5894 }, { "epoch": 1.52, "learning_rate": 5.29924389503731e-05, "loss": 0.0617, "step": 5895 }, { "epoch": 1.52, "learning_rate": 5.297823996499557e-05, "loss": 0.0294, "step": 5896 }, { "epoch": 1.52, "learning_rate": 5.2964040738580156e-05, "loss": 0.0403, "step": 5897 }, { "epoch": 1.52, "learning_rate": 5.2949841272276055e-05, "loss": 0.035, "step": 5898 }, { "epoch": 1.52, "learning_rate": 5.293564156723247e-05, "loss": 0.0319, "step": 5899 }, { "epoch": 1.52, "learning_rate": 5.292144162459864e-05, "loss": 0.0373, "step": 5900 }, { "epoch": 1.52, "learning_rate": 5.290724144552379e-05, "loss": 0.0383, "step": 5901 }, { "epoch": 1.52, "learning_rate": 5.28930410311572e-05, "loss": 0.0373, "step": 5902 }, { "epoch": 1.52, "learning_rate": 5.287884038264813e-05, "loss": 0.0371, "step": 5903 }, { "epoch": 1.52, "learning_rate": 5.286463950114589e-05, "loss": 0.0503, "step": 5904 }, { "epoch": 1.52, "learning_rate": 5.2850438387799814e-05, "loss": 0.047, "step": 5905 }, { "epoch": 1.52, "learning_rate": 5.283623704375923e-05, "loss": 0.0366, "step": 5906 }, { "epoch": 1.52, "learning_rate": 5.2822035470173494e-05, "loss": 0.0299, "step": 5907 }, { "epoch": 1.52, "learning_rate": 5.280783366819197e-05, "loss": 0.0454, "step": 5908 }, { "epoch": 1.52, "learning_rate": 5.279363163896408e-05, "loss": 0.0361, "step": 5909 }, { "epoch": 1.53, "learning_rate": 5.277942938363921e-05, "loss": 0.0265, "step": 5910 }, { "epoch": 1.53, "learning_rate": 5.276522690336682e-05, "loss": 0.0258, "step": 5911 }, { "epoch": 1.53, "learning_rate": 5.275102419929632e-05, "loss": 0.0485, "step": 5912 }, { "epoch": 1.53, "learning_rate": 5.2736821272577205e-05, "loss": 0.0495, "step": 5913 }, { "epoch": 1.53, "learning_rate": 5.272261812435897e-05, "loss": 0.0336, "step": 5914 }, { "epoch": 1.53, "learning_rate": 5.270841475579109e-05, "loss": 0.0367, "step": 5915 }, { "epoch": 1.53, "learning_rate": 5.26942111680231e-05, "loss": 0.0361, "step": 5916 }, { "epoch": 1.53, "learning_rate": 5.268000736220454e-05, "loss": 0.0499, "step": 5917 }, { "epoch": 1.53, "learning_rate": 5.2665803339484965e-05, "loss": 0.0251, "step": 5918 }, { "epoch": 1.53, "learning_rate": 5.265159910101396e-05, "loss": 0.054, "step": 5919 }, { "epoch": 1.53, "learning_rate": 5.263739464794109e-05, "loss": 0.0345, "step": 5920 }, { "epoch": 1.53, "learning_rate": 5.2623189981415976e-05, "loss": 0.0257, "step": 5921 }, { "epoch": 1.53, "learning_rate": 5.260898510258826e-05, "loss": 0.0426, "step": 5922 }, { "epoch": 1.53, "learning_rate": 5.259478001260757e-05, "loss": 0.0357, "step": 5923 }, { "epoch": 1.53, "learning_rate": 5.258057471262356e-05, "loss": 0.0477, "step": 5924 }, { "epoch": 1.53, "learning_rate": 5.256636920378592e-05, "loss": 0.0458, "step": 5925 }, { "epoch": 1.53, "learning_rate": 5.2552163487244346e-05, "loss": 0.0514, "step": 5926 }, { "epoch": 1.53, "learning_rate": 5.2537957564148534e-05, "loss": 0.0373, "step": 5927 }, { "epoch": 1.53, "learning_rate": 5.252375143564824e-05, "loss": 0.0387, "step": 5928 }, { "epoch": 1.53, "learning_rate": 5.250954510289318e-05, "loss": 0.0556, "step": 5929 }, { "epoch": 1.53, "learning_rate": 5.249533856703312e-05, "loss": 0.0482, "step": 5930 }, { "epoch": 1.53, "learning_rate": 5.2481131829217845e-05, "loss": 0.0337, "step": 5931 }, { "epoch": 1.53, "learning_rate": 5.2466924890597156e-05, "loss": 0.0465, "step": 5932 }, { "epoch": 1.53, "learning_rate": 5.245271775232085e-05, "loss": 0.047, "step": 5933 }, { "epoch": 1.53, "learning_rate": 5.243851041553874e-05, "loss": 0.0321, "step": 5934 }, { "epoch": 1.53, "learning_rate": 5.24243028814007e-05, "loss": 0.045, "step": 5935 }, { "epoch": 1.53, "learning_rate": 5.2410095151056584e-05, "loss": 0.0458, "step": 5936 }, { "epoch": 1.53, "learning_rate": 5.239588722565624e-05, "loss": 0.0402, "step": 5937 }, { "epoch": 1.53, "learning_rate": 5.238167910634957e-05, "loss": 0.0478, "step": 5938 }, { "epoch": 1.53, "learning_rate": 5.2367470794286485e-05, "loss": 0.0435, "step": 5939 }, { "epoch": 1.53, "learning_rate": 5.235326229061691e-05, "loss": 0.0345, "step": 5940 }, { "epoch": 1.53, "learning_rate": 5.2339053596490786e-05, "loss": 0.0337, "step": 5941 }, { "epoch": 1.53, "learning_rate": 5.232484471305804e-05, "loss": 0.0406, "step": 5942 }, { "epoch": 1.53, "learning_rate": 5.231063564146865e-05, "loss": 0.0463, "step": 5943 }, { "epoch": 1.53, "learning_rate": 5.229642638287261e-05, "loss": 0.0361, "step": 5944 }, { "epoch": 1.53, "learning_rate": 5.2282216938419924e-05, "loss": 0.0504, "step": 5945 }, { "epoch": 1.53, "learning_rate": 5.226800730926058e-05, "loss": 0.0281, "step": 5946 }, { "epoch": 1.53, "learning_rate": 5.225379749654461e-05, "loss": 0.0344, "step": 5947 }, { "epoch": 1.53, "learning_rate": 5.223958750142207e-05, "loss": 0.0332, "step": 5948 }, { "epoch": 1.54, "learning_rate": 5.2225377325043025e-05, "loss": 0.0449, "step": 5949 }, { "epoch": 1.54, "learning_rate": 5.221116696855752e-05, "loss": 0.0297, "step": 5950 }, { "epoch": 1.54, "learning_rate": 5.219695643311566e-05, "loss": 0.0375, "step": 5951 }, { "epoch": 1.54, "learning_rate": 5.218274571986753e-05, "loss": 0.0348, "step": 5952 }, { "epoch": 1.54, "learning_rate": 5.216853482996327e-05, "loss": 0.0438, "step": 5953 }, { "epoch": 1.54, "learning_rate": 5.2154323764553005e-05, "loss": 0.0576, "step": 5954 }, { "epoch": 1.54, "learning_rate": 5.214011252478686e-05, "loss": 0.0466, "step": 5955 }, { "epoch": 1.54, "learning_rate": 5.2125901111815e-05, "loss": 0.0474, "step": 5956 }, { "epoch": 1.54, "learning_rate": 5.211168952678761e-05, "loss": 0.0365, "step": 5957 }, { "epoch": 1.54, "learning_rate": 5.209747777085486e-05, "loss": 0.0371, "step": 5958 }, { "epoch": 1.54, "learning_rate": 5.2083265845166975e-05, "loss": 0.0491, "step": 5959 }, { "epoch": 1.54, "learning_rate": 5.206905375087413e-05, "loss": 0.0524, "step": 5960 }, { "epoch": 1.54, "learning_rate": 5.205484148912658e-05, "loss": 0.0433, "step": 5961 }, { "epoch": 1.54, "learning_rate": 5.204062906107456e-05, "loss": 0.0426, "step": 5962 }, { "epoch": 1.54, "learning_rate": 5.2026416467868336e-05, "loss": 0.038, "step": 5963 }, { "epoch": 1.54, "learning_rate": 5.201220371065815e-05, "loss": 0.0661, "step": 5964 }, { "epoch": 1.54, "learning_rate": 5.199799079059429e-05, "loss": 0.0551, "step": 5965 }, { "epoch": 1.54, "learning_rate": 5.1983777708827066e-05, "loss": 0.0495, "step": 5966 }, { "epoch": 1.54, "learning_rate": 5.196956446650678e-05, "loss": 0.0341, "step": 5967 }, { "epoch": 1.54, "learning_rate": 5.195535106478374e-05, "loss": 0.0305, "step": 5968 }, { "epoch": 1.54, "learning_rate": 5.194113750480829e-05, "loss": 0.0503, "step": 5969 }, { "epoch": 1.54, "learning_rate": 5.192692378773078e-05, "loss": 0.0409, "step": 5970 }, { "epoch": 1.54, "learning_rate": 5.191270991470155e-05, "loss": 0.0601, "step": 5971 }, { "epoch": 1.54, "learning_rate": 5.189849588687099e-05, "loss": 0.0435, "step": 5972 }, { "epoch": 1.54, "learning_rate": 5.188428170538947e-05, "loss": 0.0477, "step": 5973 }, { "epoch": 1.54, "learning_rate": 5.187006737140739e-05, "loss": 0.0553, "step": 5974 }, { "epoch": 1.54, "learning_rate": 5.1855852886075184e-05, "loss": 0.0375, "step": 5975 }, { "epoch": 1.54, "learning_rate": 5.184163825054323e-05, "loss": 0.0457, "step": 5976 }, { "epoch": 1.54, "learning_rate": 5.1827423465961987e-05, "loss": 0.0466, "step": 5977 }, { "epoch": 1.54, "learning_rate": 5.181320853348189e-05, "loss": 0.0371, "step": 5978 }, { "epoch": 1.54, "learning_rate": 5.1798993454253406e-05, "loss": 0.0473, "step": 5979 }, { "epoch": 1.54, "learning_rate": 5.178477822942701e-05, "loss": 0.0537, "step": 5980 }, { "epoch": 1.54, "learning_rate": 5.177056286015316e-05, "loss": 0.041, "step": 5981 }, { "epoch": 1.54, "learning_rate": 5.1756347347582355e-05, "loss": 0.0326, "step": 5982 }, { "epoch": 1.54, "learning_rate": 5.17421316928651e-05, "loss": 0.0334, "step": 5983 }, { "epoch": 1.54, "learning_rate": 5.172791589715194e-05, "loss": 0.0507, "step": 5984 }, { "epoch": 1.54, "learning_rate": 5.171369996159336e-05, "loss": 0.0482, "step": 5985 }, { "epoch": 1.54, "learning_rate": 5.169948388733991e-05, "loss": 0.0475, "step": 5986 }, { "epoch": 1.55, "learning_rate": 5.1685267675542136e-05, "loss": 0.0435, "step": 5987 }, { "epoch": 1.55, "learning_rate": 5.1671051327350614e-05, "loss": 0.0364, "step": 5988 }, { "epoch": 1.55, "learning_rate": 5.1656834843915916e-05, "loss": 0.0307, "step": 5989 }, { "epoch": 1.55, "learning_rate": 5.1642618226388605e-05, "loss": 0.0449, "step": 5990 }, { "epoch": 1.55, "learning_rate": 5.162840147591928e-05, "loss": 0.031, "step": 5991 }, { "epoch": 1.55, "learning_rate": 5.1614184593658566e-05, "loss": 0.049, "step": 5992 }, { "epoch": 1.55, "learning_rate": 5.159996758075706e-05, "loss": 0.0391, "step": 5993 }, { "epoch": 1.55, "learning_rate": 5.158575043836539e-05, "loss": 0.0302, "step": 5994 }, { "epoch": 1.55, "learning_rate": 5.157153316763418e-05, "loss": 0.0408, "step": 5995 }, { "epoch": 1.55, "learning_rate": 5.1557315769714095e-05, "loss": 0.0472, "step": 5996 }, { "epoch": 1.55, "learning_rate": 5.1543098245755794e-05, "loss": 0.0635, "step": 5997 }, { "epoch": 1.55, "learning_rate": 5.1528880596909924e-05, "loss": 0.0376, "step": 5998 }, { "epoch": 1.55, "learning_rate": 5.151466282432718e-05, "loss": 0.0572, "step": 5999 }, { "epoch": 1.55, "learning_rate": 5.150044492915823e-05, "loss": 0.0481, "step": 6000 }, { "epoch": 1.55, "learning_rate": 5.148622691255378e-05, "loss": 0.0506, "step": 6001 }, { "epoch": 1.55, "learning_rate": 5.147200877566456e-05, "loss": 0.0532, "step": 6002 }, { "epoch": 1.55, "learning_rate": 5.145779051964125e-05, "loss": 0.0481, "step": 6003 }, { "epoch": 1.55, "learning_rate": 5.144357214563459e-05, "loss": 0.0274, "step": 6004 }, { "epoch": 1.55, "learning_rate": 5.142935365479532e-05, "loss": 0.0267, "step": 6005 }, { "epoch": 1.55, "learning_rate": 5.141513504827419e-05, "loss": 0.0363, "step": 6006 }, { "epoch": 1.55, "learning_rate": 5.140091632722193e-05, "loss": 0.0355, "step": 6007 }, { "epoch": 1.55, "learning_rate": 5.138669749278933e-05, "loss": 0.0355, "step": 6008 }, { "epoch": 1.55, "learning_rate": 5.1372478546127145e-05, "loss": 0.0228, "step": 6009 }, { "epoch": 1.55, "learning_rate": 5.1358259488386174e-05, "loss": 0.0264, "step": 6010 }, { "epoch": 1.55, "learning_rate": 5.134404032071719e-05, "loss": 0.05, "step": 6011 }, { "epoch": 1.55, "learning_rate": 5.1329821044271e-05, "loss": 0.0614, "step": 6012 }, { "epoch": 1.55, "learning_rate": 5.131560166019841e-05, "loss": 0.0446, "step": 6013 }, { "epoch": 1.55, "learning_rate": 5.1301382169650244e-05, "loss": 0.032, "step": 6014 }, { "epoch": 1.55, "learning_rate": 5.1287162573777335e-05, "loss": 0.0443, "step": 6015 }, { "epoch": 1.55, "learning_rate": 5.12729428737305e-05, "loss": 0.0449, "step": 6016 }, { "epoch": 1.55, "learning_rate": 5.125872307066059e-05, "loss": 0.0675, "step": 6017 }, { "epoch": 1.55, "learning_rate": 5.124450316571845e-05, "loss": 0.0511, "step": 6018 }, { "epoch": 1.55, "learning_rate": 5.1230283160054956e-05, "loss": 0.0298, "step": 6019 }, { "epoch": 1.55, "learning_rate": 5.121606305482096e-05, "loss": 0.0364, "step": 6020 }, { "epoch": 1.55, "learning_rate": 5.120184285116734e-05, "loss": 0.0495, "step": 6021 }, { "epoch": 1.55, "learning_rate": 5.1187622550244974e-05, "loss": 0.0565, "step": 6022 }, { "epoch": 1.55, "learning_rate": 5.1173402153204786e-05, "loss": 0.0324, "step": 6023 }, { "epoch": 1.55, "learning_rate": 5.115918166119763e-05, "loss": 0.0343, "step": 6024 }, { "epoch": 1.55, "learning_rate": 5.114496107537443e-05, "loss": 0.0302, "step": 6025 }, { "epoch": 1.56, "learning_rate": 5.1130740396886115e-05, "loss": 0.0427, "step": 6026 }, { "epoch": 1.56, "learning_rate": 5.1116519626883585e-05, "loss": 0.0369, "step": 6027 }, { "epoch": 1.56, "learning_rate": 5.11022987665178e-05, "loss": 0.037, "step": 6028 }, { "epoch": 1.56, "learning_rate": 5.108807781693966e-05, "loss": 0.0245, "step": 6029 }, { "epoch": 1.56, "learning_rate": 5.107385677930012e-05, "loss": 0.0312, "step": 6030 }, { "epoch": 1.56, "learning_rate": 5.105963565475015e-05, "loss": 0.0325, "step": 6031 }, { "epoch": 1.56, "learning_rate": 5.104541444444071e-05, "loss": 0.0463, "step": 6032 }, { "epoch": 1.56, "learning_rate": 5.103119314952273e-05, "loss": 0.0443, "step": 6033 }, { "epoch": 1.56, "learning_rate": 5.10169717711472e-05, "loss": 0.0248, "step": 6034 }, { "epoch": 1.56, "learning_rate": 5.100275031046511e-05, "loss": 0.0385, "step": 6035 }, { "epoch": 1.56, "learning_rate": 5.098852876862744e-05, "loss": 0.0361, "step": 6036 }, { "epoch": 1.56, "learning_rate": 5.097430714678517e-05, "loss": 0.0409, "step": 6037 }, { "epoch": 1.56, "learning_rate": 5.096008544608931e-05, "loss": 0.0437, "step": 6038 }, { "epoch": 1.56, "learning_rate": 5.094586366769086e-05, "loss": 0.0455, "step": 6039 }, { "epoch": 1.56, "learning_rate": 5.0931641812740836e-05, "loss": 0.0321, "step": 6040 }, { "epoch": 1.56, "learning_rate": 5.091741988239026e-05, "loss": 0.0277, "step": 6041 }, { "epoch": 1.56, "learning_rate": 5.0903197877790146e-05, "loss": 0.0436, "step": 6042 }, { "epoch": 1.56, "learning_rate": 5.088897580009151e-05, "loss": 0.0459, "step": 6043 }, { "epoch": 1.56, "learning_rate": 5.0874753650445404e-05, "loss": 0.0421, "step": 6044 }, { "epoch": 1.56, "learning_rate": 5.0860531430002886e-05, "loss": 0.0377, "step": 6045 }, { "epoch": 1.56, "learning_rate": 5.084630913991497e-05, "loss": 0.0572, "step": 6046 }, { "epoch": 1.56, "learning_rate": 5.0832086781332734e-05, "loss": 0.043, "step": 6047 }, { "epoch": 1.56, "learning_rate": 5.0817864355407207e-05, "loss": 0.0287, "step": 6048 }, { "epoch": 1.56, "learning_rate": 5.0803641863289496e-05, "loss": 0.0358, "step": 6049 }, { "epoch": 1.56, "learning_rate": 5.078941930613063e-05, "loss": 0.0422, "step": 6050 }, { "epoch": 1.56, "learning_rate": 5.077519668508168e-05, "loss": 0.0363, "step": 6051 }, { "epoch": 1.56, "learning_rate": 5.076097400129376e-05, "loss": 0.0346, "step": 6052 }, { "epoch": 1.56, "learning_rate": 5.074675125591793e-05, "loss": 0.0431, "step": 6053 }, { "epoch": 1.56, "learning_rate": 5.073252845010531e-05, "loss": 0.0371, "step": 6054 }, { "epoch": 1.56, "learning_rate": 5.071830558500694e-05, "loss": 0.0392, "step": 6055 }, { "epoch": 1.56, "learning_rate": 5.0704082661773965e-05, "loss": 0.0443, "step": 6056 }, { "epoch": 1.56, "learning_rate": 5.068985968155747e-05, "loss": 0.0371, "step": 6057 }, { "epoch": 1.56, "learning_rate": 5.0675636645508574e-05, "loss": 0.0264, "step": 6058 }, { "epoch": 1.56, "learning_rate": 5.066141355477837e-05, "loss": 0.0433, "step": 6059 }, { "epoch": 1.56, "learning_rate": 5.0647190410518e-05, "loss": 0.0403, "step": 6060 }, { "epoch": 1.56, "learning_rate": 5.0632967213878554e-05, "loss": 0.0412, "step": 6061 }, { "epoch": 1.56, "learning_rate": 5.061874396601121e-05, "loss": 0.0423, "step": 6062 }, { "epoch": 1.56, "learning_rate": 5.0604520668067036e-05, "loss": 0.0344, "step": 6063 }, { "epoch": 1.56, "learning_rate": 5.05902973211972e-05, "loss": 0.0469, "step": 6064 }, { "epoch": 1.57, "learning_rate": 5.057607392655284e-05, "loss": 0.0428, "step": 6065 }, { "epoch": 1.57, "learning_rate": 5.056185048528509e-05, "loss": 0.0473, "step": 6066 }, { "epoch": 1.57, "learning_rate": 5.0547626998545107e-05, "loss": 0.0398, "step": 6067 }, { "epoch": 1.57, "learning_rate": 5.053340346748403e-05, "loss": 0.0416, "step": 6068 }, { "epoch": 1.57, "learning_rate": 5.051917989325301e-05, "loss": 0.0326, "step": 6069 }, { "epoch": 1.57, "learning_rate": 5.0504956277003204e-05, "loss": 0.0461, "step": 6070 }, { "epoch": 1.57, "learning_rate": 5.04907326198858e-05, "loss": 0.0413, "step": 6071 }, { "epoch": 1.57, "learning_rate": 5.04765089230519e-05, "loss": 0.0386, "step": 6072 }, { "epoch": 1.57, "learning_rate": 5.046228518765273e-05, "loss": 0.0368, "step": 6073 }, { "epoch": 1.57, "learning_rate": 5.044806141483942e-05, "loss": 0.0445, "step": 6074 }, { "epoch": 1.57, "learning_rate": 5.043383760576317e-05, "loss": 0.0522, "step": 6075 }, { "epoch": 1.57, "learning_rate": 5.041961376157516e-05, "loss": 0.0339, "step": 6076 }, { "epoch": 1.57, "learning_rate": 5.040538988342654e-05, "loss": 0.0673, "step": 6077 }, { "epoch": 1.57, "learning_rate": 5.039116597246849e-05, "loss": 0.048, "step": 6078 }, { "epoch": 1.57, "learning_rate": 5.0376942029852216e-05, "loss": 0.054, "step": 6079 }, { "epoch": 1.57, "learning_rate": 5.036271805672891e-05, "loss": 0.0617, "step": 6080 }, { "epoch": 1.57, "learning_rate": 5.034849405424973e-05, "loss": 0.0517, "step": 6081 }, { "epoch": 1.57, "learning_rate": 5.0334270023565874e-05, "loss": 0.0313, "step": 6082 }, { "epoch": 1.57, "learning_rate": 5.032004596582857e-05, "loss": 0.0452, "step": 6083 }, { "epoch": 1.57, "learning_rate": 5.0305821882188975e-05, "loss": 0.0434, "step": 6084 }, { "epoch": 1.57, "learning_rate": 5.029159777379829e-05, "loss": 0.0264, "step": 6085 }, { "epoch": 1.57, "learning_rate": 5.027737364180775e-05, "loss": 0.033, "step": 6086 }, { "epoch": 1.57, "learning_rate": 5.026314948736851e-05, "loss": 0.0455, "step": 6087 }, { "epoch": 1.57, "learning_rate": 5.02489253116318e-05, "loss": 0.0487, "step": 6088 }, { "epoch": 1.57, "learning_rate": 5.0234701115748826e-05, "loss": 0.0328, "step": 6089 }, { "epoch": 1.57, "learning_rate": 5.022047690087078e-05, "loss": 0.0515, "step": 6090 }, { "epoch": 1.57, "learning_rate": 5.020625266814888e-05, "loss": 0.0417, "step": 6091 }, { "epoch": 1.57, "learning_rate": 5.0192028418734335e-05, "loss": 0.037, "step": 6092 }, { "epoch": 1.57, "learning_rate": 5.017780415377836e-05, "loss": 0.0443, "step": 6093 }, { "epoch": 1.57, "learning_rate": 5.0163579874432164e-05, "loss": 0.0298, "step": 6094 }, { "epoch": 1.57, "learning_rate": 5.014935558184694e-05, "loss": 0.0441, "step": 6095 }, { "epoch": 1.57, "learning_rate": 5.013513127717394e-05, "loss": 0.0354, "step": 6096 }, { "epoch": 1.57, "learning_rate": 5.012090696156437e-05, "loss": 0.0428, "step": 6097 }, { "epoch": 1.57, "learning_rate": 5.0106682636169425e-05, "loss": 0.0363, "step": 6098 }, { "epoch": 1.57, "learning_rate": 5.009245830214033e-05, "loss": 0.0351, "step": 6099 }, { "epoch": 1.57, "learning_rate": 5.007823396062832e-05, "loss": 0.0338, "step": 6100 }, { "epoch": 1.57, "learning_rate": 5.006400961278459e-05, "loss": 0.0365, "step": 6101 }, { "epoch": 1.57, "learning_rate": 5.004978525976038e-05, "loss": 0.0431, "step": 6102 }, { "epoch": 1.57, "learning_rate": 5.003556090270689e-05, "loss": 0.0495, "step": 6103 }, { "epoch": 1.58, "learning_rate": 5.002133654277535e-05, "loss": 0.041, "step": 6104 }, { "epoch": 1.58, "learning_rate": 5.0007112181116986e-05, "loss": 0.0431, "step": 6105 }, { "epoch": 1.58, "learning_rate": 4.999288781888301e-05, "loss": 0.0381, "step": 6106 }, { "epoch": 1.58, "learning_rate": 4.997866345722465e-05, "loss": 0.0279, "step": 6107 }, { "epoch": 1.58, "learning_rate": 4.996443909729313e-05, "loss": 0.0332, "step": 6108 }, { "epoch": 1.58, "learning_rate": 4.995021474023963e-05, "loss": 0.0285, "step": 6109 }, { "epoch": 1.58, "learning_rate": 4.9935990387215423e-05, "loss": 0.0538, "step": 6110 }, { "epoch": 1.58, "learning_rate": 4.9921766039371684e-05, "loss": 0.0419, "step": 6111 }, { "epoch": 1.58, "learning_rate": 4.990754169785968e-05, "loss": 0.0402, "step": 6112 }, { "epoch": 1.58, "learning_rate": 4.98933173638306e-05, "loss": 0.0452, "step": 6113 }, { "epoch": 1.58, "learning_rate": 4.987909303843564e-05, "loss": 0.0332, "step": 6114 }, { "epoch": 1.58, "learning_rate": 4.986486872282607e-05, "loss": 0.033, "step": 6115 }, { "epoch": 1.58, "learning_rate": 4.9850644418153055e-05, "loss": 0.0283, "step": 6116 }, { "epoch": 1.58, "learning_rate": 4.983642012556785e-05, "loss": 0.0539, "step": 6117 }, { "epoch": 1.58, "learning_rate": 4.982219584622166e-05, "loss": 0.0393, "step": 6118 }, { "epoch": 1.58, "learning_rate": 4.980797158126567e-05, "loss": 0.0383, "step": 6119 }, { "epoch": 1.58, "learning_rate": 4.979374733185113e-05, "loss": 0.0536, "step": 6120 }, { "epoch": 1.58, "learning_rate": 4.9779523099129245e-05, "loss": 0.0298, "step": 6121 }, { "epoch": 1.58, "learning_rate": 4.9765298884251186e-05, "loss": 0.0223, "step": 6122 }, { "epoch": 1.58, "learning_rate": 4.9751074688368214e-05, "loss": 0.0568, "step": 6123 }, { "epoch": 1.58, "learning_rate": 4.9736850512631493e-05, "loss": 0.0508, "step": 6124 }, { "epoch": 1.58, "learning_rate": 4.972262635819227e-05, "loss": 0.0355, "step": 6125 }, { "epoch": 1.58, "learning_rate": 4.970840222620172e-05, "loss": 0.0315, "step": 6126 }, { "epoch": 1.58, "learning_rate": 4.969417811781104e-05, "loss": 0.057, "step": 6127 }, { "epoch": 1.58, "learning_rate": 4.967995403417145e-05, "loss": 0.028, "step": 6128 }, { "epoch": 1.58, "learning_rate": 4.9665729976434124e-05, "loss": 0.038, "step": 6129 }, { "epoch": 1.58, "learning_rate": 4.9651505945750284e-05, "loss": 0.0299, "step": 6130 }, { "epoch": 1.58, "learning_rate": 4.963728194327112e-05, "loss": 0.0299, "step": 6131 }, { "epoch": 1.58, "learning_rate": 4.962305797014778e-05, "loss": 0.0346, "step": 6132 }, { "epoch": 1.58, "learning_rate": 4.960883402753152e-05, "loss": 0.0488, "step": 6133 }, { "epoch": 1.58, "learning_rate": 4.9594610116573494e-05, "loss": 0.0333, "step": 6134 }, { "epoch": 1.58, "learning_rate": 4.958038623842485e-05, "loss": 0.0266, "step": 6135 }, { "epoch": 1.58, "learning_rate": 4.956616239423684e-05, "loss": 0.0337, "step": 6136 }, { "epoch": 1.58, "learning_rate": 4.9551938585160575e-05, "loss": 0.0402, "step": 6137 }, { "epoch": 1.58, "learning_rate": 4.9537714812347284e-05, "loss": 0.0449, "step": 6138 }, { "epoch": 1.58, "learning_rate": 4.9523491076948115e-05, "loss": 0.0519, "step": 6139 }, { "epoch": 1.58, "learning_rate": 4.950926738011422e-05, "loss": 0.0301, "step": 6140 }, { "epoch": 1.58, "learning_rate": 4.94950437229968e-05, "loss": 0.0476, "step": 6141 }, { "epoch": 1.59, "learning_rate": 4.948082010674699e-05, "loss": 0.05, "step": 6142 }, { "epoch": 1.59, "learning_rate": 4.946659653251599e-05, "loss": 0.0484, "step": 6143 }, { "epoch": 1.59, "learning_rate": 4.945237300145491e-05, "loss": 0.0416, "step": 6144 }, { "epoch": 1.59, "learning_rate": 4.943814951471491e-05, "loss": 0.033, "step": 6145 }, { "epoch": 1.59, "learning_rate": 4.942392607344717e-05, "loss": 0.0447, "step": 6146 }, { "epoch": 1.59, "learning_rate": 4.940970267880282e-05, "loss": 0.0376, "step": 6147 }, { "epoch": 1.59, "learning_rate": 4.9395479331932975e-05, "loss": 0.0395, "step": 6148 }, { "epoch": 1.59, "learning_rate": 4.9381256033988824e-05, "loss": 0.0374, "step": 6149 }, { "epoch": 1.59, "learning_rate": 4.9367032786121444e-05, "loss": 0.0349, "step": 6150 }, { "epoch": 1.59, "learning_rate": 4.935280958948202e-05, "loss": 0.0488, "step": 6151 }, { "epoch": 1.59, "learning_rate": 4.933858644522165e-05, "loss": 0.045, "step": 6152 }, { "epoch": 1.59, "learning_rate": 4.932436335449144e-05, "loss": 0.0381, "step": 6153 }, { "epoch": 1.59, "learning_rate": 4.931014031844254e-05, "loss": 0.038, "step": 6154 }, { "epoch": 1.59, "learning_rate": 4.929591733822603e-05, "loss": 0.0338, "step": 6155 }, { "epoch": 1.59, "learning_rate": 4.928169441499306e-05, "loss": 0.0333, "step": 6156 }, { "epoch": 1.59, "learning_rate": 4.9267471549894714e-05, "loss": 0.0467, "step": 6157 }, { "epoch": 1.59, "learning_rate": 4.9253248744082065e-05, "loss": 0.0356, "step": 6158 }, { "epoch": 1.59, "learning_rate": 4.9239025998706254e-05, "loss": 0.0509, "step": 6159 }, { "epoch": 1.59, "learning_rate": 4.922480331491833e-05, "loss": 0.0364, "step": 6160 }, { "epoch": 1.59, "learning_rate": 4.921058069386939e-05, "loss": 0.0436, "step": 6161 }, { "epoch": 1.59, "learning_rate": 4.919635813671053e-05, "loss": 0.0514, "step": 6162 }, { "epoch": 1.59, "learning_rate": 4.918213564459279e-05, "loss": 0.037, "step": 6163 }, { "epoch": 1.59, "learning_rate": 4.9167913218667284e-05, "loss": 0.0319, "step": 6164 }, { "epoch": 1.59, "learning_rate": 4.915369086008504e-05, "loss": 0.0389, "step": 6165 }, { "epoch": 1.59, "learning_rate": 4.913946856999712e-05, "loss": 0.0624, "step": 6166 }, { "epoch": 1.59, "learning_rate": 4.91252463495546e-05, "loss": 0.0407, "step": 6167 }, { "epoch": 1.59, "learning_rate": 4.911102419990849e-05, "loss": 0.0396, "step": 6168 }, { "epoch": 1.59, "learning_rate": 4.909680212220987e-05, "loss": 0.0418, "step": 6169 }, { "epoch": 1.59, "learning_rate": 4.908258011760976e-05, "loss": 0.0505, "step": 6170 }, { "epoch": 1.59, "learning_rate": 4.906835818725916e-05, "loss": 0.0471, "step": 6171 }, { "epoch": 1.59, "learning_rate": 4.9054136332309145e-05, "loss": 0.0366, "step": 6172 }, { "epoch": 1.59, "learning_rate": 4.903991455391071e-05, "loss": 0.0502, "step": 6173 }, { "epoch": 1.59, "learning_rate": 4.9025692853214836e-05, "loss": 0.049, "step": 6174 }, { "epoch": 1.59, "learning_rate": 4.901147123137258e-05, "loss": 0.0394, "step": 6175 }, { "epoch": 1.59, "learning_rate": 4.899724968953489e-05, "loss": 0.0107, "step": 6176 }, { "epoch": 1.59, "learning_rate": 4.898302822885281e-05, "loss": 0.0706, "step": 6177 }, { "epoch": 1.59, "learning_rate": 4.8968806850477296e-05, "loss": 0.0508, "step": 6178 }, { "epoch": 1.59, "learning_rate": 4.895458555555931e-05, "loss": 0.0476, "step": 6179 }, { "epoch": 1.59, "learning_rate": 4.894036434524986e-05, "loss": 0.037, "step": 6180 }, { "epoch": 1.6, "learning_rate": 4.892614322069988e-05, "loss": 0.0528, "step": 6181 }, { "epoch": 1.6, "learning_rate": 4.8911922183060356e-05, "loss": 0.0415, "step": 6182 }, { "epoch": 1.6, "learning_rate": 4.889770123348223e-05, "loss": 0.0596, "step": 6183 }, { "epoch": 1.6, "learning_rate": 4.888348037311642e-05, "loss": 0.0474, "step": 6184 }, { "epoch": 1.6, "learning_rate": 4.8869259603113904e-05, "loss": 0.0432, "step": 6185 }, { "epoch": 1.6, "learning_rate": 4.8855038924625566e-05, "loss": 0.0484, "step": 6186 }, { "epoch": 1.6, "learning_rate": 4.884081833880239e-05, "loss": 0.0506, "step": 6187 }, { "epoch": 1.6, "learning_rate": 4.882659784679524e-05, "loss": 0.0445, "step": 6188 }, { "epoch": 1.6, "learning_rate": 4.881237744975502e-05, "loss": 0.0297, "step": 6189 }, { "epoch": 1.6, "learning_rate": 4.879815714883267e-05, "loss": 0.0467, "step": 6190 }, { "epoch": 1.6, "learning_rate": 4.878393694517906e-05, "loss": 0.059, "step": 6191 }, { "epoch": 1.6, "learning_rate": 4.876971683994505e-05, "loss": 0.0504, "step": 6192 }, { "epoch": 1.6, "learning_rate": 4.875549683428156e-05, "loss": 0.0377, "step": 6193 }, { "epoch": 1.6, "learning_rate": 4.874127692933941e-05, "loss": 0.0524, "step": 6194 }, { "epoch": 1.6, "learning_rate": 4.872705712626951e-05, "loss": 0.0471, "step": 6195 }, { "epoch": 1.6, "learning_rate": 4.8712837426222676e-05, "loss": 0.0335, "step": 6196 }, { "epoch": 1.6, "learning_rate": 4.8698617830349754e-05, "loss": 0.0305, "step": 6197 }, { "epoch": 1.6, "learning_rate": 4.8684398339801604e-05, "loss": 0.0446, "step": 6198 }, { "epoch": 1.6, "learning_rate": 4.8670178955729e-05, "loss": 0.0483, "step": 6199 }, { "epoch": 1.6, "learning_rate": 4.8655959679282826e-05, "loss": 0.0453, "step": 6200 }, { "epoch": 1.6, "learning_rate": 4.864174051161385e-05, "loss": 0.0492, "step": 6201 }, { "epoch": 1.6, "learning_rate": 4.862752145387286e-05, "loss": 0.0441, "step": 6202 }, { "epoch": 1.6, "learning_rate": 4.8613302507210684e-05, "loss": 0.0287, "step": 6203 }, { "epoch": 1.6, "learning_rate": 4.859908367277809e-05, "loss": 0.0392, "step": 6204 }, { "epoch": 1.6, "learning_rate": 4.8584864951725824e-05, "loss": 0.0328, "step": 6205 }, { "epoch": 1.6, "learning_rate": 4.85706463452047e-05, "loss": 0.0354, "step": 6206 }, { "epoch": 1.6, "learning_rate": 4.855642785436541e-05, "loss": 0.046, "step": 6207 }, { "epoch": 1.6, "learning_rate": 4.854220948035876e-05, "loss": 0.0441, "step": 6208 }, { "epoch": 1.6, "learning_rate": 4.852799122433546e-05, "loss": 0.0373, "step": 6209 }, { "epoch": 1.6, "learning_rate": 4.8513773087446213e-05, "loss": 0.0464, "step": 6210 }, { "epoch": 1.6, "learning_rate": 4.8499555070841786e-05, "loss": 0.03, "step": 6211 }, { "epoch": 1.6, "learning_rate": 4.848533717567283e-05, "loss": 0.0336, "step": 6212 }, { "epoch": 1.6, "learning_rate": 4.847111940309008e-05, "loss": 0.027, "step": 6213 }, { "epoch": 1.6, "learning_rate": 4.8456901754244225e-05, "loss": 0.0563, "step": 6214 }, { "epoch": 1.6, "learning_rate": 4.84426842302859e-05, "loss": 0.0435, "step": 6215 }, { "epoch": 1.6, "learning_rate": 4.8428466832365826e-05, "loss": 0.0328, "step": 6216 }, { "epoch": 1.6, "learning_rate": 4.841424956163463e-05, "loss": 0.0303, "step": 6217 }, { "epoch": 1.6, "learning_rate": 4.840003241924295e-05, "loss": 0.0398, "step": 6218 }, { "epoch": 1.6, "learning_rate": 4.838581540634145e-05, "loss": 0.0385, "step": 6219 }, { "epoch": 1.61, "learning_rate": 4.8371598524080715e-05, "loss": 0.0315, "step": 6220 }, { "epoch": 1.61, "learning_rate": 4.8357381773611407e-05, "loss": 0.0531, "step": 6221 }, { "epoch": 1.61, "learning_rate": 4.834316515608411e-05, "loss": 0.04, "step": 6222 }, { "epoch": 1.61, "learning_rate": 4.832894867264939e-05, "loss": 0.0459, "step": 6223 }, { "epoch": 1.61, "learning_rate": 4.8314732324457876e-05, "loss": 0.0726, "step": 6224 }, { "epoch": 1.61, "learning_rate": 4.83005161126601e-05, "loss": 0.0513, "step": 6225 }, { "epoch": 1.61, "learning_rate": 4.828630003840666e-05, "loss": 0.031, "step": 6226 }, { "epoch": 1.61, "learning_rate": 4.827208410284809e-05, "loss": 0.0523, "step": 6227 }, { "epoch": 1.61, "learning_rate": 4.8257868307134895e-05, "loss": 0.0491, "step": 6228 }, { "epoch": 1.61, "learning_rate": 4.824365265241766e-05, "loss": 0.0389, "step": 6229 }, { "epoch": 1.61, "learning_rate": 4.8229437139846865e-05, "loss": 0.0411, "step": 6230 }, { "epoch": 1.61, "learning_rate": 4.8215221770573e-05, "loss": 0.0306, "step": 6231 }, { "epoch": 1.61, "learning_rate": 4.82010065457466e-05, "loss": 0.0419, "step": 6232 }, { "epoch": 1.61, "learning_rate": 4.81867914665181e-05, "loss": 0.0585, "step": 6233 }, { "epoch": 1.61, "learning_rate": 4.817257653403802e-05, "loss": 0.0398, "step": 6234 }, { "epoch": 1.61, "learning_rate": 4.815836174945679e-05, "loss": 0.0411, "step": 6235 }, { "epoch": 1.61, "learning_rate": 4.814414711392483e-05, "loss": 0.0411, "step": 6236 }, { "epoch": 1.61, "learning_rate": 4.812993262859261e-05, "loss": 0.0385, "step": 6237 }, { "epoch": 1.61, "learning_rate": 4.811571829461053e-05, "loss": 0.0344, "step": 6238 }, { "epoch": 1.61, "learning_rate": 4.810150411312902e-05, "loss": 0.032, "step": 6239 }, { "epoch": 1.61, "learning_rate": 4.808729008529847e-05, "loss": 0.045, "step": 6240 }, { "epoch": 1.61, "learning_rate": 4.807307621226923e-05, "loss": 0.0413, "step": 6241 }, { "epoch": 1.61, "learning_rate": 4.805886249519172e-05, "loss": 0.0418, "step": 6242 }, { "epoch": 1.61, "learning_rate": 4.804464893521628e-05, "loss": 0.0266, "step": 6243 }, { "epoch": 1.61, "learning_rate": 4.803043553349323e-05, "loss": 0.0407, "step": 6244 }, { "epoch": 1.61, "learning_rate": 4.8016222291172945e-05, "loss": 0.0354, "step": 6245 }, { "epoch": 1.61, "learning_rate": 4.8002009209405705e-05, "loss": 0.0386, "step": 6246 }, { "epoch": 1.61, "learning_rate": 4.798779628934187e-05, "loss": 0.0474, "step": 6247 }, { "epoch": 1.61, "learning_rate": 4.7973583532131696e-05, "loss": 0.0546, "step": 6248 }, { "epoch": 1.61, "learning_rate": 4.795937093892544e-05, "loss": 0.0288, "step": 6249 }, { "epoch": 1.61, "learning_rate": 4.7945158510873436e-05, "loss": 0.0422, "step": 6250 }, { "epoch": 1.61, "learning_rate": 4.7930946249125866e-05, "loss": 0.0395, "step": 6251 }, { "epoch": 1.61, "learning_rate": 4.791673415483304e-05, "loss": 0.0495, "step": 6252 }, { "epoch": 1.61, "learning_rate": 4.790252222914515e-05, "loss": 0.0405, "step": 6253 }, { "epoch": 1.61, "learning_rate": 4.788831047321239e-05, "loss": 0.0404, "step": 6254 }, { "epoch": 1.61, "learning_rate": 4.787409888818501e-05, "loss": 0.0431, "step": 6255 }, { "epoch": 1.61, "learning_rate": 4.7859887475213154e-05, "loss": 0.0399, "step": 6256 }, { "epoch": 1.61, "learning_rate": 4.784567623544701e-05, "loss": 0.0485, "step": 6257 }, { "epoch": 1.61, "learning_rate": 4.7831465170036737e-05, "loss": 0.0345, "step": 6258 }, { "epoch": 1.62, "learning_rate": 4.781725428013246e-05, "loss": 0.0448, "step": 6259 }, { "epoch": 1.62, "learning_rate": 4.7803043566884356e-05, "loss": 0.0573, "step": 6260 }, { "epoch": 1.62, "learning_rate": 4.778883303144249e-05, "loss": 0.0341, "step": 6261 }, { "epoch": 1.62, "learning_rate": 4.7774622674956986e-05, "loss": 0.051, "step": 6262 }, { "epoch": 1.62, "learning_rate": 4.776041249857794e-05, "loss": 0.0584, "step": 6263 }, { "epoch": 1.62, "learning_rate": 4.774620250345539e-05, "loss": 0.044, "step": 6264 }, { "epoch": 1.62, "learning_rate": 4.773199269073944e-05, "loss": 0.0464, "step": 6265 }, { "epoch": 1.62, "learning_rate": 4.77177830615801e-05, "loss": 0.04, "step": 6266 }, { "epoch": 1.62, "learning_rate": 4.770357361712739e-05, "loss": 0.0321, "step": 6267 }, { "epoch": 1.62, "learning_rate": 4.768936435853136e-05, "loss": 0.0392, "step": 6268 }, { "epoch": 1.62, "learning_rate": 4.7675155286941976e-05, "loss": 0.041, "step": 6269 }, { "epoch": 1.62, "learning_rate": 4.7660946403509226e-05, "loss": 0.0452, "step": 6270 }, { "epoch": 1.62, "learning_rate": 4.764673770938311e-05, "loss": 0.0412, "step": 6271 }, { "epoch": 1.62, "learning_rate": 4.763252920571351e-05, "loss": 0.049, "step": 6272 }, { "epoch": 1.62, "learning_rate": 4.761832089365044e-05, "loss": 0.0372, "step": 6273 }, { "epoch": 1.62, "learning_rate": 4.760411277434377e-05, "loss": 0.039, "step": 6274 }, { "epoch": 1.62, "learning_rate": 4.758990484894343e-05, "loss": 0.0315, "step": 6275 }, { "epoch": 1.62, "learning_rate": 4.757569711859931e-05, "loss": 0.0518, "step": 6276 }, { "epoch": 1.62, "learning_rate": 4.7561489584461257e-05, "loss": 0.0397, "step": 6277 }, { "epoch": 1.62, "learning_rate": 4.7547282247679165e-05, "loss": 0.0315, "step": 6278 }, { "epoch": 1.62, "learning_rate": 4.7533075109402856e-05, "loss": 0.0312, "step": 6279 }, { "epoch": 1.62, "learning_rate": 4.751886817078215e-05, "loss": 0.0415, "step": 6280 }, { "epoch": 1.62, "learning_rate": 4.750466143296689e-05, "loss": 0.0391, "step": 6281 }, { "epoch": 1.62, "learning_rate": 4.7490454897106834e-05, "loss": 0.0414, "step": 6282 }, { "epoch": 1.62, "learning_rate": 4.747624856435177e-05, "loss": 0.0411, "step": 6283 }, { "epoch": 1.62, "learning_rate": 4.746204243585147e-05, "loss": 0.0364, "step": 6284 }, { "epoch": 1.62, "learning_rate": 4.744783651275566e-05, "loss": 0.0428, "step": 6285 }, { "epoch": 1.62, "learning_rate": 4.743363079621408e-05, "loss": 0.05, "step": 6286 }, { "epoch": 1.62, "learning_rate": 4.741942528737644e-05, "loss": 0.0447, "step": 6287 }, { "epoch": 1.62, "learning_rate": 4.740521998739244e-05, "loss": 0.0357, "step": 6288 }, { "epoch": 1.62, "learning_rate": 4.7391014897411755e-05, "loss": 0.0403, "step": 6289 }, { "epoch": 1.62, "learning_rate": 4.7376810018584016e-05, "loss": 0.0447, "step": 6290 }, { "epoch": 1.62, "learning_rate": 4.736260535205892e-05, "loss": 0.0394, "step": 6291 }, { "epoch": 1.62, "learning_rate": 4.734840089898605e-05, "loss": 0.0331, "step": 6292 }, { "epoch": 1.62, "learning_rate": 4.733419666051503e-05, "loss": 0.0346, "step": 6293 }, { "epoch": 1.62, "learning_rate": 4.7319992637795466e-05, "loss": 0.0371, "step": 6294 }, { "epoch": 1.62, "learning_rate": 4.730578883197691e-05, "loss": 0.0454, "step": 6295 }, { "epoch": 1.62, "learning_rate": 4.7291585244208915e-05, "loss": 0.0318, "step": 6296 }, { "epoch": 1.63, "learning_rate": 4.7277381875641035e-05, "loss": 0.0394, "step": 6297 }, { "epoch": 1.63, "learning_rate": 4.726317872742279e-05, "loss": 0.0355, "step": 6298 }, { "epoch": 1.63, "learning_rate": 4.724897580070369e-05, "loss": 0.0359, "step": 6299 }, { "epoch": 1.63, "learning_rate": 4.72347730966332e-05, "loss": 0.0396, "step": 6300 }, { "epoch": 1.63, "learning_rate": 4.7220570616360795e-05, "loss": 0.0425, "step": 6301 }, { "epoch": 1.63, "learning_rate": 4.720636836103593e-05, "loss": 0.0545, "step": 6302 }, { "epoch": 1.63, "learning_rate": 4.719216633180803e-05, "loss": 0.0462, "step": 6303 }, { "epoch": 1.63, "learning_rate": 4.7177964529826525e-05, "loss": 0.0262, "step": 6304 }, { "epoch": 1.63, "learning_rate": 4.716376295624078e-05, "loss": 0.0397, "step": 6305 }, { "epoch": 1.63, "learning_rate": 4.714956161220019e-05, "loss": 0.0416, "step": 6306 }, { "epoch": 1.63, "learning_rate": 4.713536049885412e-05, "loss": 0.0312, "step": 6307 }, { "epoch": 1.63, "learning_rate": 4.7121159617351886e-05, "loss": 0.0437, "step": 6308 }, { "epoch": 1.63, "learning_rate": 4.710695896884281e-05, "loss": 0.0297, "step": 6309 }, { "epoch": 1.63, "learning_rate": 4.709275855447621e-05, "loss": 0.0362, "step": 6310 }, { "epoch": 1.63, "learning_rate": 4.707855837540136e-05, "loss": 0.0498, "step": 6311 }, { "epoch": 1.63, "learning_rate": 4.706435843276754e-05, "loss": 0.0411, "step": 6312 }, { "epoch": 1.63, "learning_rate": 4.7050158727723956e-05, "loss": 0.0298, "step": 6313 }, { "epoch": 1.63, "learning_rate": 4.7035959261419856e-05, "loss": 0.0311, "step": 6314 }, { "epoch": 1.63, "learning_rate": 4.702176003500444e-05, "loss": 0.034, "step": 6315 }, { "epoch": 1.63, "learning_rate": 4.7007561049626895e-05, "loss": 0.0455, "step": 6316 }, { "epoch": 1.63, "learning_rate": 4.699336230643641e-05, "loss": 0.0447, "step": 6317 }, { "epoch": 1.63, "learning_rate": 4.697916380658209e-05, "loss": 0.0242, "step": 6318 }, { "epoch": 1.63, "learning_rate": 4.696496555121308e-05, "loss": 0.0549, "step": 6319 }, { "epoch": 1.63, "learning_rate": 4.69507675414785e-05, "loss": 0.0373, "step": 6320 }, { "epoch": 1.63, "learning_rate": 4.6936569778527425e-05, "loss": 0.044, "step": 6321 }, { "epoch": 1.63, "learning_rate": 4.692237226350893e-05, "loss": 0.0355, "step": 6322 }, { "epoch": 1.63, "learning_rate": 4.690817499757205e-05, "loss": 0.0369, "step": 6323 }, { "epoch": 1.63, "learning_rate": 4.689397798186582e-05, "loss": 0.0347, "step": 6324 }, { "epoch": 1.63, "learning_rate": 4.6879781217539256e-05, "loss": 0.0455, "step": 6325 }, { "epoch": 1.63, "learning_rate": 4.6865584705741316e-05, "loss": 0.0317, "step": 6326 }, { "epoch": 1.63, "learning_rate": 4.6851388447620996e-05, "loss": 0.0441, "step": 6327 }, { "epoch": 1.63, "learning_rate": 4.6837192444327224e-05, "loss": 0.0453, "step": 6328 }, { "epoch": 1.63, "learning_rate": 4.682299669700894e-05, "loss": 0.048, "step": 6329 }, { "epoch": 1.63, "learning_rate": 4.680880120681505e-05, "loss": 0.0553, "step": 6330 }, { "epoch": 1.63, "learning_rate": 4.6794605974894415e-05, "loss": 0.0484, "step": 6331 }, { "epoch": 1.63, "learning_rate": 4.6780411002395904e-05, "loss": 0.0411, "step": 6332 }, { "epoch": 1.63, "learning_rate": 4.676621629046837e-05, "loss": 0.0253, "step": 6333 }, { "epoch": 1.63, "learning_rate": 4.675202184026063e-05, "loss": 0.0329, "step": 6334 }, { "epoch": 1.63, "learning_rate": 4.673782765292149e-05, "loss": 0.0256, "step": 6335 }, { "epoch": 1.64, "learning_rate": 4.672363372959971e-05, "loss": 0.0372, "step": 6336 }, { "epoch": 1.64, "learning_rate": 4.670944007144406e-05, "loss": 0.0299, "step": 6337 }, { "epoch": 1.64, "learning_rate": 4.6695246679603264e-05, "loss": 0.0301, "step": 6338 }, { "epoch": 1.64, "learning_rate": 4.668105355522606e-05, "loss": 0.0412, "step": 6339 }, { "epoch": 1.64, "learning_rate": 4.666686069946111e-05, "loss": 0.0406, "step": 6340 }, { "epoch": 1.64, "learning_rate": 4.6652668113457096e-05, "loss": 0.0377, "step": 6341 }, { "epoch": 1.64, "learning_rate": 4.663847579836267e-05, "loss": 0.0314, "step": 6342 }, { "epoch": 1.64, "learning_rate": 4.662428375532645e-05, "loss": 0.0339, "step": 6343 }, { "epoch": 1.64, "learning_rate": 4.661009198549707e-05, "loss": 0.0446, "step": 6344 }, { "epoch": 1.64, "learning_rate": 4.659590049002307e-05, "loss": 0.0403, "step": 6345 }, { "epoch": 1.64, "learning_rate": 4.658170927005303e-05, "loss": 0.0512, "step": 6346 }, { "epoch": 1.64, "learning_rate": 4.656751832673549e-05, "loss": 0.042, "step": 6347 }, { "epoch": 1.64, "learning_rate": 4.655332766121898e-05, "loss": 0.0355, "step": 6348 }, { "epoch": 1.64, "learning_rate": 4.6539137274651955e-05, "loss": 0.0372, "step": 6349 }, { "epoch": 1.64, "learning_rate": 4.652494716818291e-05, "loss": 0.0303, "step": 6350 }, { "epoch": 1.64, "learning_rate": 4.651075734296029e-05, "loss": 0.0357, "step": 6351 }, { "epoch": 1.64, "learning_rate": 4.649656780013254e-05, "loss": 0.0362, "step": 6352 }, { "epoch": 1.64, "learning_rate": 4.648237854084801e-05, "loss": 0.0488, "step": 6353 }, { "epoch": 1.64, "learning_rate": 4.6468189566255125e-05, "loss": 0.0257, "step": 6354 }, { "epoch": 1.64, "learning_rate": 4.645400087750223e-05, "loss": 0.0499, "step": 6355 }, { "epoch": 1.64, "learning_rate": 4.643981247573765e-05, "loss": 0.0409, "step": 6356 }, { "epoch": 1.64, "learning_rate": 4.642562436210971e-05, "loss": 0.0409, "step": 6357 }, { "epoch": 1.64, "learning_rate": 4.6411436537766676e-05, "loss": 0.0273, "step": 6358 }, { "epoch": 1.64, "learning_rate": 4.6397249003856816e-05, "loss": 0.0458, "step": 6359 }, { "epoch": 1.64, "learning_rate": 4.638306176152838e-05, "loss": 0.0468, "step": 6360 }, { "epoch": 1.64, "learning_rate": 4.636887481192958e-05, "loss": 0.0443, "step": 6361 }, { "epoch": 1.64, "learning_rate": 4.6354688156208616e-05, "loss": 0.0345, "step": 6362 }, { "epoch": 1.64, "learning_rate": 4.6340501795513636e-05, "loss": 0.043, "step": 6363 }, { "epoch": 1.64, "learning_rate": 4.63263157309928e-05, "loss": 0.0392, "step": 6364 }, { "epoch": 1.64, "learning_rate": 4.631212996379423e-05, "loss": 0.0553, "step": 6365 }, { "epoch": 1.64, "learning_rate": 4.6297944495066014e-05, "loss": 0.0287, "step": 6366 }, { "epoch": 1.64, "learning_rate": 4.628375932595622e-05, "loss": 0.0406, "step": 6367 }, { "epoch": 1.64, "learning_rate": 4.6269574457612915e-05, "loss": 0.043, "step": 6368 }, { "epoch": 1.64, "learning_rate": 4.62553898911841e-05, "loss": 0.0363, "step": 6369 }, { "epoch": 1.64, "learning_rate": 4.6241205627817806e-05, "loss": 0.0461, "step": 6370 }, { "epoch": 1.64, "learning_rate": 4.622702166866197e-05, "loss": 0.0427, "step": 6371 }, { "epoch": 1.64, "learning_rate": 4.621283801486457e-05, "loss": 0.0579, "step": 6372 }, { "epoch": 1.64, "learning_rate": 4.6198654667573516e-05, "loss": 0.0468, "step": 6373 }, { "epoch": 1.64, "learning_rate": 4.6184471627936714e-05, "loss": 0.0416, "step": 6374 }, { "epoch": 1.65, "learning_rate": 4.6170288897102055e-05, "loss": 0.0366, "step": 6375 }, { "epoch": 1.65, "learning_rate": 4.6156106476217366e-05, "loss": 0.037, "step": 6376 }, { "epoch": 1.65, "learning_rate": 4.6141924366430477e-05, "loss": 0.0576, "step": 6377 }, { "epoch": 1.65, "learning_rate": 4.6127742568889205e-05, "loss": 0.0531, "step": 6378 }, { "epoch": 1.65, "learning_rate": 4.6113561084741294e-05, "loss": 0.0409, "step": 6379 }, { "epoch": 1.65, "learning_rate": 4.6099379915134535e-05, "loss": 0.0411, "step": 6380 }, { "epoch": 1.65, "learning_rate": 4.6085199061216624e-05, "loss": 0.0598, "step": 6381 }, { "epoch": 1.65, "learning_rate": 4.607101852413527e-05, "loss": 0.0242, "step": 6382 }, { "epoch": 1.65, "learning_rate": 4.6056838305038147e-05, "loss": 0.0545, "step": 6383 }, { "epoch": 1.65, "learning_rate": 4.604265840507288e-05, "loss": 0.0561, "step": 6384 }, { "epoch": 1.65, "learning_rate": 4.602847882538714e-05, "loss": 0.0513, "step": 6385 }, { "epoch": 1.65, "learning_rate": 4.6014299567128465e-05, "loss": 0.0727, "step": 6386 }, { "epoch": 1.65, "learning_rate": 4.600012063144446e-05, "loss": 0.0498, "step": 6387 }, { "epoch": 1.65, "learning_rate": 4.5985942019482684e-05, "loss": 0.0345, "step": 6388 }, { "epoch": 1.65, "learning_rate": 4.5971763732390614e-05, "loss": 0.0447, "step": 6389 }, { "epoch": 1.65, "learning_rate": 4.595758577131575e-05, "loss": 0.0471, "step": 6390 }, { "epoch": 1.65, "learning_rate": 4.594340813740558e-05, "loss": 0.0432, "step": 6391 }, { "epoch": 1.65, "learning_rate": 4.592923083180752e-05, "loss": 0.0221, "step": 6392 }, { "epoch": 1.65, "learning_rate": 4.5915053855669e-05, "loss": 0.0416, "step": 6393 }, { "epoch": 1.65, "learning_rate": 4.590087721013739e-05, "loss": 0.0544, "step": 6394 }, { "epoch": 1.65, "learning_rate": 4.588670089636005e-05, "loss": 0.0378, "step": 6395 }, { "epoch": 1.65, "learning_rate": 4.5872524915484327e-05, "loss": 0.0327, "step": 6396 }, { "epoch": 1.65, "learning_rate": 4.5858349268657494e-05, "loss": 0.036, "step": 6397 }, { "epoch": 1.65, "learning_rate": 4.584417395702687e-05, "loss": 0.0412, "step": 6398 }, { "epoch": 1.65, "learning_rate": 4.582999898173966e-05, "loss": 0.0477, "step": 6399 }, { "epoch": 1.65, "learning_rate": 4.5815824343943124e-05, "loss": 0.0354, "step": 6400 }, { "epoch": 1.65, "learning_rate": 4.580165004478445e-05, "loss": 0.0316, "step": 6401 }, { "epoch": 1.65, "learning_rate": 4.578747608541078e-05, "loss": 0.0363, "step": 6402 }, { "epoch": 1.65, "learning_rate": 4.57733024669693e-05, "loss": 0.0441, "step": 6403 }, { "epoch": 1.65, "learning_rate": 4.57591291906071e-05, "loss": 0.023, "step": 6404 }, { "epoch": 1.65, "learning_rate": 4.574495625747124e-05, "loss": 0.0392, "step": 6405 }, { "epoch": 1.65, "learning_rate": 4.573078366870883e-05, "loss": 0.0527, "step": 6406 }, { "epoch": 1.65, "learning_rate": 4.571661142546686e-05, "loss": 0.0369, "step": 6407 }, { "epoch": 1.65, "learning_rate": 4.570243952889234e-05, "loss": 0.0396, "step": 6408 }, { "epoch": 1.65, "learning_rate": 4.568826798013226e-05, "loss": 0.0389, "step": 6409 }, { "epoch": 1.65, "learning_rate": 4.5674096780333534e-05, "loss": 0.0361, "step": 6410 }, { "epoch": 1.65, "learning_rate": 4.5659925930643124e-05, "loss": 0.036, "step": 6411 }, { "epoch": 1.65, "learning_rate": 4.5645755432207885e-05, "loss": 0.0449, "step": 6412 }, { "epoch": 1.65, "learning_rate": 4.563158528617468e-05, "loss": 0.0427, "step": 6413 }, { "epoch": 1.66, "learning_rate": 4.561741549369038e-05, "loss": 0.034, "step": 6414 }, { "epoch": 1.66, "learning_rate": 4.560324605590172e-05, "loss": 0.0392, "step": 6415 }, { "epoch": 1.66, "learning_rate": 4.558907697395554e-05, "loss": 0.0352, "step": 6416 }, { "epoch": 1.66, "learning_rate": 4.557490824899856e-05, "loss": 0.0382, "step": 6417 }, { "epoch": 1.66, "learning_rate": 4.5560739882177475e-05, "loss": 0.0327, "step": 6418 }, { "epoch": 1.66, "learning_rate": 4.5546571874639014e-05, "loss": 0.0379, "step": 6419 }, { "epoch": 1.66, "learning_rate": 4.55324042275298e-05, "loss": 0.0378, "step": 6420 }, { "epoch": 1.66, "learning_rate": 4.55182369419965e-05, "loss": 0.0319, "step": 6421 }, { "epoch": 1.66, "learning_rate": 4.55040700191857e-05, "loss": 0.0396, "step": 6422 }, { "epoch": 1.66, "learning_rate": 4.5489903460243945e-05, "loss": 0.0419, "step": 6423 }, { "epoch": 1.66, "learning_rate": 4.5475737266317814e-05, "loss": 0.0288, "step": 6424 }, { "epoch": 1.66, "learning_rate": 4.546157143855379e-05, "loss": 0.0526, "step": 6425 }, { "epoch": 1.66, "learning_rate": 4.544740597809838e-05, "loss": 0.0472, "step": 6426 }, { "epoch": 1.66, "learning_rate": 4.543324088609804e-05, "loss": 0.0472, "step": 6427 }, { "epoch": 1.66, "learning_rate": 4.5419076163699154e-05, "loss": 0.0388, "step": 6428 }, { "epoch": 1.66, "learning_rate": 4.540491181204816e-05, "loss": 0.0524, "step": 6429 }, { "epoch": 1.66, "learning_rate": 4.539074783229141e-05, "loss": 0.0256, "step": 6430 }, { "epoch": 1.66, "learning_rate": 4.5376584225575206e-05, "loss": 0.045, "step": 6431 }, { "epoch": 1.66, "learning_rate": 4.53624209930459e-05, "loss": 0.0277, "step": 6432 }, { "epoch": 1.66, "learning_rate": 4.534825813584971e-05, "loss": 0.0501, "step": 6433 }, { "epoch": 1.66, "learning_rate": 4.533409565513294e-05, "loss": 0.0449, "step": 6434 }, { "epoch": 1.66, "learning_rate": 4.531993355204177e-05, "loss": 0.048, "step": 6435 }, { "epoch": 1.66, "learning_rate": 4.530577182772236e-05, "loss": 0.0301, "step": 6436 }, { "epoch": 1.66, "learning_rate": 4.52916104833209e-05, "loss": 0.0388, "step": 6437 }, { "epoch": 1.66, "learning_rate": 4.527744951998347e-05, "loss": 0.0418, "step": 6438 }, { "epoch": 1.66, "learning_rate": 4.5263288938856216e-05, "loss": 0.0403, "step": 6439 }, { "epoch": 1.66, "learning_rate": 4.524912874108515e-05, "loss": 0.0234, "step": 6440 }, { "epoch": 1.66, "learning_rate": 4.52349689278163e-05, "loss": 0.0297, "step": 6441 }, { "epoch": 1.66, "learning_rate": 4.522080950019568e-05, "loss": 0.0368, "step": 6442 }, { "epoch": 1.66, "learning_rate": 4.520665045936926e-05, "loss": 0.0331, "step": 6443 }, { "epoch": 1.66, "learning_rate": 4.519249180648294e-05, "loss": 0.0447, "step": 6444 }, { "epoch": 1.66, "learning_rate": 4.5178333542682663e-05, "loss": 0.0347, "step": 6445 }, { "epoch": 1.66, "learning_rate": 4.5164175669114255e-05, "loss": 0.035, "step": 6446 }, { "epoch": 1.66, "learning_rate": 4.515001818692361e-05, "loss": 0.0512, "step": 6447 }, { "epoch": 1.66, "learning_rate": 4.51358610972565e-05, "loss": 0.0408, "step": 6448 }, { "epoch": 1.66, "learning_rate": 4.5121704401258694e-05, "loss": 0.0459, "step": 6449 }, { "epoch": 1.66, "learning_rate": 4.510754810007596e-05, "loss": 0.0336, "step": 6450 }, { "epoch": 1.66, "learning_rate": 4.5093392194853976e-05, "loss": 0.0556, "step": 6451 }, { "epoch": 1.67, "learning_rate": 4.507923668673847e-05, "loss": 0.0252, "step": 6452 }, { "epoch": 1.67, "learning_rate": 4.5065081576875066e-05, "loss": 0.0314, "step": 6453 }, { "epoch": 1.67, "learning_rate": 4.505092686640936e-05, "loss": 0.0324, "step": 6454 }, { "epoch": 1.67, "learning_rate": 4.5036772556486964e-05, "loss": 0.0351, "step": 6455 }, { "epoch": 1.67, "learning_rate": 4.5022618648253424e-05, "loss": 0.0388, "step": 6456 }, { "epoch": 1.67, "learning_rate": 4.500846514285422e-05, "loss": 0.0415, "step": 6457 }, { "epoch": 1.67, "learning_rate": 4.49943120414349e-05, "loss": 0.0253, "step": 6458 }, { "epoch": 1.67, "learning_rate": 4.498015934514086e-05, "loss": 0.0596, "step": 6459 }, { "epoch": 1.67, "learning_rate": 4.496600705511757e-05, "loss": 0.0456, "step": 6460 }, { "epoch": 1.67, "learning_rate": 4.4951855172510385e-05, "loss": 0.0471, "step": 6461 }, { "epoch": 1.67, "learning_rate": 4.493770369846465e-05, "loss": 0.0485, "step": 6462 }, { "epoch": 1.67, "learning_rate": 4.492355263412573e-05, "loss": 0.0469, "step": 6463 }, { "epoch": 1.67, "learning_rate": 4.490940198063885e-05, "loss": 0.0419, "step": 6464 }, { "epoch": 1.67, "learning_rate": 4.489525173914934e-05, "loss": 0.0338, "step": 6465 }, { "epoch": 1.67, "learning_rate": 4.488110191080237e-05, "loss": 0.0479, "step": 6466 }, { "epoch": 1.67, "learning_rate": 4.486695249674312e-05, "loss": 0.0364, "step": 6467 }, { "epoch": 1.67, "learning_rate": 4.485280349811679e-05, "loss": 0.0409, "step": 6468 }, { "epoch": 1.67, "learning_rate": 4.483865491606847e-05, "loss": 0.0312, "step": 6469 }, { "epoch": 1.67, "learning_rate": 4.482450675174324e-05, "loss": 0.0444, "step": 6470 }, { "epoch": 1.67, "learning_rate": 4.481035900628618e-05, "loss": 0.0482, "step": 6471 }, { "epoch": 1.67, "learning_rate": 4.479621168084228e-05, "loss": 0.0535, "step": 6472 }, { "epoch": 1.67, "learning_rate": 4.4782064776556554e-05, "loss": 0.0484, "step": 6473 }, { "epoch": 1.67, "learning_rate": 4.476791829457394e-05, "loss": 0.0366, "step": 6474 }, { "epoch": 1.67, "learning_rate": 4.475377223603934e-05, "loss": 0.0392, "step": 6475 }, { "epoch": 1.67, "learning_rate": 4.473962660209766e-05, "loss": 0.0552, "step": 6476 }, { "epoch": 1.67, "learning_rate": 4.472548139389373e-05, "loss": 0.0267, "step": 6477 }, { "epoch": 1.67, "learning_rate": 4.4711336612572396e-05, "loss": 0.0358, "step": 6478 }, { "epoch": 1.67, "learning_rate": 4.469719225927841e-05, "loss": 0.0484, "step": 6479 }, { "epoch": 1.67, "learning_rate": 4.468304833515651e-05, "loss": 0.0466, "step": 6480 }, { "epoch": 1.67, "learning_rate": 4.4668904841351436e-05, "loss": 0.0433, "step": 6481 }, { "epoch": 1.67, "learning_rate": 4.465476177900782e-05, "loss": 0.0405, "step": 6482 }, { "epoch": 1.67, "learning_rate": 4.464061914927035e-05, "loss": 0.0697, "step": 6483 }, { "epoch": 1.67, "learning_rate": 4.462647695328361e-05, "loss": 0.042, "step": 6484 }, { "epoch": 1.67, "learning_rate": 4.461233519219215e-05, "loss": 0.029, "step": 6485 }, { "epoch": 1.67, "learning_rate": 4.459819386714055e-05, "loss": 0.0411, "step": 6486 }, { "epoch": 1.67, "learning_rate": 4.458405297927328e-05, "loss": 0.0496, "step": 6487 }, { "epoch": 1.67, "learning_rate": 4.4569912529734786e-05, "loss": 0.0505, "step": 6488 }, { "epoch": 1.67, "learning_rate": 4.455577251966955e-05, "loss": 0.0416, "step": 6489 }, { "epoch": 1.67, "learning_rate": 4.4541632950221904e-05, "loss": 0.038, "step": 6490 }, { "epoch": 1.68, "learning_rate": 4.452749382253626e-05, "loss": 0.0511, "step": 6491 }, { "epoch": 1.68, "learning_rate": 4.451335513775692e-05, "loss": 0.041, "step": 6492 }, { "epoch": 1.68, "learning_rate": 4.4499216897028145e-05, "loss": 0.0508, "step": 6493 }, { "epoch": 1.68, "learning_rate": 4.448507910149423e-05, "loss": 0.0483, "step": 6494 }, { "epoch": 1.68, "learning_rate": 4.447094175229934e-05, "loss": 0.0575, "step": 6495 }, { "epoch": 1.68, "learning_rate": 4.44568048505877e-05, "loss": 0.0449, "step": 6496 }, { "epoch": 1.68, "learning_rate": 4.444266839750344e-05, "loss": 0.0378, "step": 6497 }, { "epoch": 1.68, "learning_rate": 4.4428532394190625e-05, "loss": 0.0464, "step": 6498 }, { "epoch": 1.68, "learning_rate": 4.441439684179338e-05, "loss": 0.0448, "step": 6499 }, { "epoch": 1.68, "learning_rate": 4.4400261741455714e-05, "loss": 0.0353, "step": 6500 }, { "epoch": 1.68, "learning_rate": 4.43861270943216e-05, "loss": 0.0289, "step": 6501 }, { "epoch": 1.68, "learning_rate": 4.4371992901535045e-05, "loss": 0.0226, "step": 6502 }, { "epoch": 1.68, "learning_rate": 4.435785916423991e-05, "loss": 0.0383, "step": 6503 }, { "epoch": 1.68, "learning_rate": 4.434372588358015e-05, "loss": 0.0487, "step": 6504 }, { "epoch": 1.68, "learning_rate": 4.4329593060699585e-05, "loss": 0.047, "step": 6505 }, { "epoch": 1.68, "learning_rate": 4.4315460696741984e-05, "loss": 0.0483, "step": 6506 }, { "epoch": 1.68, "learning_rate": 4.430132879285119e-05, "loss": 0.0408, "step": 6507 }, { "epoch": 1.68, "learning_rate": 4.4287197350170885e-05, "loss": 0.0353, "step": 6508 }, { "epoch": 1.68, "learning_rate": 4.4273066369844815e-05, "loss": 0.0403, "step": 6509 }, { "epoch": 1.68, "learning_rate": 4.4258935853016625e-05, "loss": 0.0242, "step": 6510 }, { "epoch": 1.68, "learning_rate": 4.4244805800829916e-05, "loss": 0.0291, "step": 6511 }, { "epoch": 1.68, "learning_rate": 4.423067621442831e-05, "loss": 0.0282, "step": 6512 }, { "epoch": 1.68, "learning_rate": 4.421654709495535e-05, "loss": 0.0399, "step": 6513 }, { "epoch": 1.68, "learning_rate": 4.4202418443554506e-05, "loss": 0.0408, "step": 6514 }, { "epoch": 1.68, "learning_rate": 4.418829026136931e-05, "loss": 0.0519, "step": 6515 }, { "epoch": 1.68, "learning_rate": 4.417416254954316e-05, "loss": 0.0379, "step": 6516 }, { "epoch": 1.68, "learning_rate": 4.416003530921949e-05, "loss": 0.0462, "step": 6517 }, { "epoch": 1.68, "learning_rate": 4.414590854154163e-05, "loss": 0.0423, "step": 6518 }, { "epoch": 1.68, "learning_rate": 4.41317822476529e-05, "loss": 0.0291, "step": 6519 }, { "epoch": 1.68, "learning_rate": 4.41176564286966e-05, "loss": 0.0445, "step": 6520 }, { "epoch": 1.68, "learning_rate": 4.410353108581596e-05, "loss": 0.0319, "step": 6521 }, { "epoch": 1.68, "learning_rate": 4.4089406220154205e-05, "loss": 0.0513, "step": 6522 }, { "epoch": 1.68, "learning_rate": 4.407528183285449e-05, "loss": 0.038, "step": 6523 }, { "epoch": 1.68, "learning_rate": 4.406115792505993e-05, "loss": 0.0417, "step": 6524 }, { "epoch": 1.68, "learning_rate": 4.4047034497913654e-05, "loss": 0.0518, "step": 6525 }, { "epoch": 1.68, "learning_rate": 4.4032911552558684e-05, "loss": 0.0381, "step": 6526 }, { "epoch": 1.68, "learning_rate": 4.401878909013801e-05, "loss": 0.0264, "step": 6527 }, { "epoch": 1.68, "learning_rate": 4.4004667111794666e-05, "loss": 0.0338, "step": 6528 }, { "epoch": 1.68, "learning_rate": 4.399054561867153e-05, "loss": 0.0399, "step": 6529 }, { "epoch": 1.69, "learning_rate": 4.3976424611911536e-05, "loss": 0.0418, "step": 6530 }, { "epoch": 1.69, "learning_rate": 4.396230409265753e-05, "loss": 0.0418, "step": 6531 }, { "epoch": 1.69, "learning_rate": 4.3948184062052295e-05, "loss": 0.041, "step": 6532 }, { "epoch": 1.69, "learning_rate": 4.3934064521238656e-05, "loss": 0.0483, "step": 6533 }, { "epoch": 1.69, "learning_rate": 4.39199454713593e-05, "loss": 0.0303, "step": 6534 }, { "epoch": 1.69, "learning_rate": 4.390582691355698e-05, "loss": 0.0275, "step": 6535 }, { "epoch": 1.69, "learning_rate": 4.389170884897432e-05, "loss": 0.0419, "step": 6536 }, { "epoch": 1.69, "learning_rate": 4.387759127875392e-05, "loss": 0.046, "step": 6537 }, { "epoch": 1.69, "learning_rate": 4.38634742040384e-05, "loss": 0.045, "step": 6538 }, { "epoch": 1.69, "learning_rate": 4.384935762597028e-05, "loss": 0.0369, "step": 6539 }, { "epoch": 1.69, "learning_rate": 4.383524154569203e-05, "loss": 0.0437, "step": 6540 }, { "epoch": 1.69, "learning_rate": 4.3821125964346144e-05, "loss": 0.0469, "step": 6541 }, { "epoch": 1.69, "learning_rate": 4.380701088307501e-05, "loss": 0.0411, "step": 6542 }, { "epoch": 1.69, "learning_rate": 4.379289630302103e-05, "loss": 0.0388, "step": 6543 }, { "epoch": 1.69, "learning_rate": 4.377878222532653e-05, "loss": 0.0359, "step": 6544 }, { "epoch": 1.69, "learning_rate": 4.376466865113378e-05, "loss": 0.0437, "step": 6545 }, { "epoch": 1.69, "learning_rate": 4.375055558158507e-05, "loss": 0.0551, "step": 6546 }, { "epoch": 1.69, "learning_rate": 4.3736443017822584e-05, "loss": 0.0339, "step": 6547 }, { "epoch": 1.69, "learning_rate": 4.372233096098853e-05, "loss": 0.0358, "step": 6548 }, { "epoch": 1.69, "learning_rate": 4.370821941222502e-05, "loss": 0.0558, "step": 6549 }, { "epoch": 1.69, "learning_rate": 4.36941083726741e-05, "loss": 0.0624, "step": 6550 }, { "epoch": 1.69, "learning_rate": 4.3679997843477896e-05, "loss": 0.0507, "step": 6551 }, { "epoch": 1.69, "learning_rate": 4.366588782577837e-05, "loss": 0.0523, "step": 6552 }, { "epoch": 1.69, "learning_rate": 4.3651778320717486e-05, "loss": 0.0343, "step": 6553 }, { "epoch": 1.69, "learning_rate": 4.3637669329437194e-05, "loss": 0.0367, "step": 6554 }, { "epoch": 1.69, "learning_rate": 4.362356085307934e-05, "loss": 0.0489, "step": 6555 }, { "epoch": 1.69, "learning_rate": 4.360945289278581e-05, "loss": 0.0449, "step": 6556 }, { "epoch": 1.69, "learning_rate": 4.359534544969838e-05, "loss": 0.0453, "step": 6557 }, { "epoch": 1.69, "learning_rate": 4.3581238524958785e-05, "loss": 0.0422, "step": 6558 }, { "epoch": 1.69, "learning_rate": 4.356713211970879e-05, "loss": 0.0397, "step": 6559 }, { "epoch": 1.69, "learning_rate": 4.355302623509001e-05, "loss": 0.0436, "step": 6560 }, { "epoch": 1.69, "learning_rate": 4.3538920872244136e-05, "loss": 0.0438, "step": 6561 }, { "epoch": 1.69, "learning_rate": 4.352481603231273e-05, "loss": 0.0385, "step": 6562 }, { "epoch": 1.69, "learning_rate": 4.351071171643732e-05, "loss": 0.0473, "step": 6563 }, { "epoch": 1.69, "learning_rate": 4.349660792575945e-05, "loss": 0.0322, "step": 6564 }, { "epoch": 1.69, "learning_rate": 4.348250466142056e-05, "loss": 0.0449, "step": 6565 }, { "epoch": 1.69, "learning_rate": 4.3468401924562044e-05, "loss": 0.0539, "step": 6566 }, { "epoch": 1.69, "learning_rate": 4.3454299716325334e-05, "loss": 0.0358, "step": 6567 }, { "epoch": 1.69, "learning_rate": 4.3440198037851706e-05, "loss": 0.0397, "step": 6568 }, { "epoch": 1.7, "learning_rate": 4.3426096890282514e-05, "loss": 0.0381, "step": 6569 }, { "epoch": 1.7, "learning_rate": 4.341199627475897e-05, "loss": 0.0406, "step": 6570 }, { "epoch": 1.7, "learning_rate": 4.339789619242225e-05, "loss": 0.0506, "step": 6571 }, { "epoch": 1.7, "learning_rate": 4.338379664441357e-05, "loss": 0.0423, "step": 6572 }, { "epoch": 1.7, "learning_rate": 4.336969763187401e-05, "loss": 0.0496, "step": 6573 }, { "epoch": 1.7, "learning_rate": 4.335559915594468e-05, "loss": 0.0439, "step": 6574 }, { "epoch": 1.7, "learning_rate": 4.3341501217766595e-05, "loss": 0.0629, "step": 6575 }, { "epoch": 1.7, "learning_rate": 4.3327403818480716e-05, "loss": 0.0229, "step": 6576 }, { "epoch": 1.7, "learning_rate": 4.331330695922804e-05, "loss": 0.0242, "step": 6577 }, { "epoch": 1.7, "learning_rate": 4.3299210641149434e-05, "loss": 0.0398, "step": 6578 }, { "epoch": 1.7, "learning_rate": 4.328511486538575e-05, "loss": 0.0365, "step": 6579 }, { "epoch": 1.7, "learning_rate": 4.3271019633077824e-05, "loss": 0.0389, "step": 6580 }, { "epoch": 1.7, "learning_rate": 4.32569249453664e-05, "loss": 0.0256, "step": 6581 }, { "epoch": 1.7, "learning_rate": 4.3242830803392245e-05, "loss": 0.0401, "step": 6582 }, { "epoch": 1.7, "learning_rate": 4.3228737208296e-05, "loss": 0.0529, "step": 6583 }, { "epoch": 1.7, "learning_rate": 4.32146441612183e-05, "loss": 0.0606, "step": 6584 }, { "epoch": 1.7, "learning_rate": 4.320055166329978e-05, "loss": 0.0368, "step": 6585 }, { "epoch": 1.7, "learning_rate": 4.3186459715680924e-05, "loss": 0.0439, "step": 6586 }, { "epoch": 1.7, "learning_rate": 4.3172368319502304e-05, "loss": 0.0705, "step": 6587 }, { "epoch": 1.7, "learning_rate": 4.3158277475904344e-05, "loss": 0.0327, "step": 6588 }, { "epoch": 1.7, "learning_rate": 4.314418718602744e-05, "loss": 0.0364, "step": 6589 }, { "epoch": 1.7, "learning_rate": 4.313009745101201e-05, "loss": 0.0194, "step": 6590 }, { "epoch": 1.7, "learning_rate": 4.311600827199834e-05, "loss": 0.0347, "step": 6591 }, { "epoch": 1.7, "learning_rate": 4.31019196501267e-05, "loss": 0.0338, "step": 6592 }, { "epoch": 1.7, "learning_rate": 4.308783158653737e-05, "loss": 0.032, "step": 6593 }, { "epoch": 1.7, "learning_rate": 4.30737440823705e-05, "loss": 0.0347, "step": 6594 }, { "epoch": 1.7, "learning_rate": 4.3059657138766256e-05, "loss": 0.0276, "step": 6595 }, { "epoch": 1.7, "learning_rate": 4.3045570756864736e-05, "loss": 0.0382, "step": 6596 }, { "epoch": 1.7, "learning_rate": 4.303148493780596e-05, "loss": 0.0651, "step": 6597 }, { "epoch": 1.7, "learning_rate": 4.301739968272998e-05, "loss": 0.033, "step": 6598 }, { "epoch": 1.7, "learning_rate": 4.3003314992776716e-05, "loss": 0.0416, "step": 6599 }, { "epoch": 1.7, "learning_rate": 4.2989230869086135e-05, "loss": 0.0226, "step": 6600 }, { "epoch": 1.7, "learning_rate": 4.297514731279807e-05, "loss": 0.042, "step": 6601 }, { "epoch": 1.7, "learning_rate": 4.2961064325052335e-05, "loss": 0.0515, "step": 6602 }, { "epoch": 1.7, "learning_rate": 4.2946981906988753e-05, "loss": 0.0425, "step": 6603 }, { "epoch": 1.7, "learning_rate": 4.2932900059747024e-05, "loss": 0.0437, "step": 6604 }, { "epoch": 1.7, "learning_rate": 4.291881878446683e-05, "loss": 0.045, "step": 6605 }, { "epoch": 1.7, "learning_rate": 4.2904738082287835e-05, "loss": 0.0569, "step": 6606 }, { "epoch": 1.71, "learning_rate": 4.28906579543496e-05, "loss": 0.0299, "step": 6607 }, { "epoch": 1.71, "learning_rate": 4.2876578401791714e-05, "loss": 0.0339, "step": 6608 }, { "epoch": 1.71, "learning_rate": 4.2862499425753646e-05, "loss": 0.0448, "step": 6609 }, { "epoch": 1.71, "learning_rate": 4.284842102737484e-05, "loss": 0.0325, "step": 6610 }, { "epoch": 1.71, "learning_rate": 4.283434320779474e-05, "loss": 0.0461, "step": 6611 }, { "epoch": 1.71, "learning_rate": 4.2820265968152673e-05, "loss": 0.028, "step": 6612 }, { "epoch": 1.71, "learning_rate": 4.280618930958798e-05, "loss": 0.0302, "step": 6613 }, { "epoch": 1.71, "learning_rate": 4.2792113233239916e-05, "loss": 0.036, "step": 6614 }, { "epoch": 1.71, "learning_rate": 4.277803774024767e-05, "loss": 0.037, "step": 6615 }, { "epoch": 1.71, "learning_rate": 4.276396283175047e-05, "loss": 0.0348, "step": 6616 }, { "epoch": 1.71, "learning_rate": 4.274988850888741e-05, "loss": 0.0376, "step": 6617 }, { "epoch": 1.71, "learning_rate": 4.2735814772797547e-05, "loss": 0.0432, "step": 6618 }, { "epoch": 1.71, "learning_rate": 4.2721741624619944e-05, "loss": 0.033, "step": 6619 }, { "epoch": 1.71, "learning_rate": 4.2707669065493556e-05, "loss": 0.04, "step": 6620 }, { "epoch": 1.71, "learning_rate": 4.269359709655735e-05, "loss": 0.042, "step": 6621 }, { "epoch": 1.71, "learning_rate": 4.26795257189502e-05, "loss": 0.0472, "step": 6622 }, { "epoch": 1.71, "learning_rate": 4.266545493381092e-05, "loss": 0.0342, "step": 6623 }, { "epoch": 1.71, "learning_rate": 4.2651384742278335e-05, "loss": 0.0335, "step": 6624 }, { "epoch": 1.71, "learning_rate": 4.263731514549115e-05, "loss": 0.0341, "step": 6625 }, { "epoch": 1.71, "learning_rate": 4.2623246144588105e-05, "loss": 0.0328, "step": 6626 }, { "epoch": 1.71, "learning_rate": 4.260917774070783e-05, "loss": 0.0369, "step": 6627 }, { "epoch": 1.71, "learning_rate": 4.259510993498889e-05, "loss": 0.0311, "step": 6628 }, { "epoch": 1.71, "learning_rate": 4.258104272856988e-05, "loss": 0.0259, "step": 6629 }, { "epoch": 1.71, "learning_rate": 4.2566976122589264e-05, "loss": 0.0328, "step": 6630 }, { "epoch": 1.71, "learning_rate": 4.2552910118185536e-05, "loss": 0.0553, "step": 6631 }, { "epoch": 1.71, "learning_rate": 4.253884471649707e-05, "loss": 0.0273, "step": 6632 }, { "epoch": 1.71, "learning_rate": 4.252477991866222e-05, "loss": 0.026, "step": 6633 }, { "epoch": 1.71, "learning_rate": 4.251071572581931e-05, "loss": 0.0276, "step": 6634 }, { "epoch": 1.71, "learning_rate": 4.2496652139106586e-05, "loss": 0.0305, "step": 6635 }, { "epoch": 1.71, "learning_rate": 4.2482589159662236e-05, "loss": 0.0346, "step": 6636 }, { "epoch": 1.71, "learning_rate": 4.246852678862447e-05, "loss": 0.0272, "step": 6637 }, { "epoch": 1.71, "learning_rate": 4.245446502713134e-05, "loss": 0.0503, "step": 6638 }, { "epoch": 1.71, "learning_rate": 4.2440403876320964e-05, "loss": 0.0419, "step": 6639 }, { "epoch": 1.71, "learning_rate": 4.242634333733131e-05, "loss": 0.0313, "step": 6640 }, { "epoch": 1.71, "learning_rate": 4.241228341130034e-05, "loss": 0.0414, "step": 6641 }, { "epoch": 1.71, "learning_rate": 4.2398224099366e-05, "loss": 0.0678, "step": 6642 }, { "epoch": 1.71, "learning_rate": 4.238416540266611e-05, "loss": 0.0377, "step": 6643 }, { "epoch": 1.71, "learning_rate": 4.2370107322338526e-05, "loss": 0.0498, "step": 6644 }, { "epoch": 1.71, "learning_rate": 4.2356049859520984e-05, "loss": 0.031, "step": 6645 }, { "epoch": 1.72, "learning_rate": 4.234199301535118e-05, "loss": 0.0588, "step": 6646 }, { "epoch": 1.72, "learning_rate": 4.2327936790966824e-05, "loss": 0.0364, "step": 6647 }, { "epoch": 1.72, "learning_rate": 4.231388118750549e-05, "loss": 0.0485, "step": 6648 }, { "epoch": 1.72, "learning_rate": 4.229982620610474e-05, "loss": 0.0374, "step": 6649 }, { "epoch": 1.72, "learning_rate": 4.228577184790212e-05, "loss": 0.0342, "step": 6650 }, { "epoch": 1.72, "learning_rate": 4.2271718114035036e-05, "loss": 0.0389, "step": 6651 }, { "epoch": 1.72, "learning_rate": 4.225766500564096e-05, "loss": 0.04, "step": 6652 }, { "epoch": 1.72, "learning_rate": 4.2243612523857226e-05, "loss": 0.0336, "step": 6653 }, { "epoch": 1.72, "learning_rate": 4.222956066982111e-05, "loss": 0.039, "step": 6654 }, { "epoch": 1.72, "learning_rate": 4.221550944466993e-05, "loss": 0.0344, "step": 6655 }, { "epoch": 1.72, "learning_rate": 4.220145884954084e-05, "loss": 0.0302, "step": 6656 }, { "epoch": 1.72, "learning_rate": 4.218740888557104e-05, "loss": 0.0316, "step": 6657 }, { "epoch": 1.72, "learning_rate": 4.2173359553897626e-05, "loss": 0.0373, "step": 6658 }, { "epoch": 1.72, "learning_rate": 4.2159310855657615e-05, "loss": 0.0495, "step": 6659 }, { "epoch": 1.72, "learning_rate": 4.214526279198806e-05, "loss": 0.0448, "step": 6660 }, { "epoch": 1.72, "learning_rate": 4.2131215364025896e-05, "loss": 0.0377, "step": 6661 }, { "epoch": 1.72, "learning_rate": 4.2117168572907996e-05, "loss": 0.044, "step": 6662 }, { "epoch": 1.72, "learning_rate": 4.210312241977125e-05, "loss": 0.0334, "step": 6663 }, { "epoch": 1.72, "learning_rate": 4.208907690575242e-05, "loss": 0.0318, "step": 6664 }, { "epoch": 1.72, "learning_rate": 4.207503203198829e-05, "loss": 0.0315, "step": 6665 }, { "epoch": 1.72, "learning_rate": 4.206098779961553e-05, "loss": 0.0492, "step": 6666 }, { "epoch": 1.72, "learning_rate": 4.204694420977076e-05, "loss": 0.0399, "step": 6667 }, { "epoch": 1.72, "learning_rate": 4.203290126359062e-05, "loss": 0.0355, "step": 6668 }, { "epoch": 1.72, "learning_rate": 4.2018858962211597e-05, "loss": 0.0407, "step": 6669 }, { "epoch": 1.72, "learning_rate": 4.200481730677022e-05, "loss": 0.0532, "step": 6670 }, { "epoch": 1.72, "learning_rate": 4.199077629840291e-05, "loss": 0.0676, "step": 6671 }, { "epoch": 1.72, "learning_rate": 4.197673593824601e-05, "loss": 0.0459, "step": 6672 }, { "epoch": 1.72, "learning_rate": 4.196269622743591e-05, "loss": 0.0404, "step": 6673 }, { "epoch": 1.72, "learning_rate": 4.194865716710885e-05, "loss": 0.0431, "step": 6674 }, { "epoch": 1.72, "learning_rate": 4.193461875840104e-05, "loss": 0.061, "step": 6675 }, { "epoch": 1.72, "learning_rate": 4.192058100244869e-05, "loss": 0.0215, "step": 6676 }, { "epoch": 1.72, "learning_rate": 4.190654390038787e-05, "loss": 0.0264, "step": 6677 }, { "epoch": 1.72, "learning_rate": 4.18925074533547e-05, "loss": 0.0548, "step": 6678 }, { "epoch": 1.72, "learning_rate": 4.187847166248515e-05, "loss": 0.0526, "step": 6679 }, { "epoch": 1.72, "learning_rate": 4.1864436528915186e-05, "loss": 0.0307, "step": 6680 }, { "epoch": 1.72, "learning_rate": 4.185040205378073e-05, "loss": 0.0451, "step": 6681 }, { "epoch": 1.72, "learning_rate": 4.18363682382176e-05, "loss": 0.0331, "step": 6682 }, { "epoch": 1.72, "learning_rate": 4.182233508336164e-05, "loss": 0.0491, "step": 6683 }, { "epoch": 1.72, "learning_rate": 4.1808302590348574e-05, "loss": 0.0499, "step": 6684 }, { "epoch": 1.73, "learning_rate": 4.179427076031407e-05, "loss": 0.0586, "step": 6685 }, { "epoch": 1.73, "learning_rate": 4.1780239594393814e-05, "loss": 0.0446, "step": 6686 }, { "epoch": 1.73, "learning_rate": 4.176620909372336e-05, "loss": 0.0379, "step": 6687 }, { "epoch": 1.73, "learning_rate": 4.1752179259438215e-05, "loss": 0.0389, "step": 6688 }, { "epoch": 1.73, "learning_rate": 4.173815009267392e-05, "loss": 0.0381, "step": 6689 }, { "epoch": 1.73, "learning_rate": 4.1724121594565826e-05, "loss": 0.0367, "step": 6690 }, { "epoch": 1.73, "learning_rate": 4.171009376624936e-05, "loss": 0.0292, "step": 6691 }, { "epoch": 1.73, "learning_rate": 4.169606660885982e-05, "loss": 0.0271, "step": 6692 }, { "epoch": 1.73, "learning_rate": 4.168204012353243e-05, "loss": 0.0328, "step": 6693 }, { "epoch": 1.73, "learning_rate": 4.166801431140244e-05, "loss": 0.0491, "step": 6694 }, { "epoch": 1.73, "learning_rate": 4.165398917360497e-05, "loss": 0.0427, "step": 6695 }, { "epoch": 1.73, "learning_rate": 4.163996471127515e-05, "loss": 0.0243, "step": 6696 }, { "epoch": 1.73, "learning_rate": 4.1625940925548e-05, "loss": 0.0429, "step": 6697 }, { "epoch": 1.73, "learning_rate": 4.1611917817558485e-05, "loss": 0.0482, "step": 6698 }, { "epoch": 1.73, "learning_rate": 4.1597895388441595e-05, "loss": 0.0409, "step": 6699 }, { "epoch": 1.73, "learning_rate": 4.158387363933217e-05, "loss": 0.0369, "step": 6700 }, { "epoch": 1.73, "learning_rate": 4.156985257136501e-05, "loss": 0.0416, "step": 6701 }, { "epoch": 1.73, "learning_rate": 4.155583218567493e-05, "loss": 0.04, "step": 6702 }, { "epoch": 1.73, "learning_rate": 4.154181248339659e-05, "loss": 0.0372, "step": 6703 }, { "epoch": 1.73, "learning_rate": 4.152779346566471e-05, "loss": 0.0234, "step": 6704 }, { "epoch": 1.73, "learning_rate": 4.151377513361385e-05, "loss": 0.0414, "step": 6705 }, { "epoch": 1.73, "learning_rate": 4.1499757488378544e-05, "loss": 0.03, "step": 6706 }, { "epoch": 1.73, "learning_rate": 4.148574053109332e-05, "loss": 0.0271, "step": 6707 }, { "epoch": 1.73, "learning_rate": 4.1471724262892576e-05, "loss": 0.0368, "step": 6708 }, { "epoch": 1.73, "learning_rate": 4.145770868491072e-05, "loss": 0.0327, "step": 6709 }, { "epoch": 1.73, "learning_rate": 4.1443693798282066e-05, "loss": 0.0364, "step": 6710 }, { "epoch": 1.73, "learning_rate": 4.142967960414086e-05, "loss": 0.0436, "step": 6711 }, { "epoch": 1.73, "learning_rate": 4.1415666103621346e-05, "loss": 0.0379, "step": 6712 }, { "epoch": 1.73, "learning_rate": 4.140165329785766e-05, "loss": 0.0455, "step": 6713 }, { "epoch": 1.73, "learning_rate": 4.138764118798389e-05, "loss": 0.0339, "step": 6714 }, { "epoch": 1.73, "learning_rate": 4.1373629775134106e-05, "loss": 0.0391, "step": 6715 }, { "epoch": 1.73, "learning_rate": 4.135961906044225e-05, "loss": 0.0281, "step": 6716 }, { "epoch": 1.73, "learning_rate": 4.134560904504231e-05, "loss": 0.0503, "step": 6717 }, { "epoch": 1.73, "learning_rate": 4.133159973006812e-05, "loss": 0.0409, "step": 6718 }, { "epoch": 1.73, "learning_rate": 4.131759111665349e-05, "loss": 0.0406, "step": 6719 }, { "epoch": 1.73, "learning_rate": 4.1303583205932204e-05, "loss": 0.036, "step": 6720 }, { "epoch": 1.73, "learning_rate": 4.128957599903793e-05, "loss": 0.0348, "step": 6721 }, { "epoch": 1.73, "learning_rate": 4.127556949710437e-05, "loss": 0.0348, "step": 6722 }, { "epoch": 1.73, "learning_rate": 4.1261563701265066e-05, "loss": 0.0424, "step": 6723 }, { "epoch": 1.74, "learning_rate": 4.124755861265354e-05, "loss": 0.0422, "step": 6724 }, { "epoch": 1.74, "learning_rate": 4.1233554232403315e-05, "loss": 0.042, "step": 6725 }, { "epoch": 1.74, "learning_rate": 4.121955056164776e-05, "loss": 0.029, "step": 6726 }, { "epoch": 1.74, "learning_rate": 4.120554760152026e-05, "loss": 0.0292, "step": 6727 }, { "epoch": 1.74, "learning_rate": 4.119154535315411e-05, "loss": 0.0382, "step": 6728 }, { "epoch": 1.74, "learning_rate": 4.117754381768253e-05, "loss": 0.0393, "step": 6729 }, { "epoch": 1.74, "learning_rate": 4.1163542996238765e-05, "loss": 0.041, "step": 6730 }, { "epoch": 1.74, "learning_rate": 4.1149542889955886e-05, "loss": 0.0535, "step": 6731 }, { "epoch": 1.74, "learning_rate": 4.113554349996699e-05, "loss": 0.038, "step": 6732 }, { "epoch": 1.74, "learning_rate": 4.112154482740509e-05, "loss": 0.0349, "step": 6733 }, { "epoch": 1.74, "learning_rate": 4.1107546873403114e-05, "loss": 0.0434, "step": 6734 }, { "epoch": 1.74, "learning_rate": 4.1093549639094005e-05, "loss": 0.0521, "step": 6735 }, { "epoch": 1.74, "learning_rate": 4.107955312561056e-05, "loss": 0.0461, "step": 6736 }, { "epoch": 1.74, "learning_rate": 4.1065557334085574e-05, "loss": 0.0463, "step": 6737 }, { "epoch": 1.74, "learning_rate": 4.105156226565178e-05, "loss": 0.0392, "step": 6738 }, { "epoch": 1.74, "learning_rate": 4.103756792144182e-05, "loss": 0.0419, "step": 6739 }, { "epoch": 1.74, "learning_rate": 4.102357430258831e-05, "loss": 0.0324, "step": 6740 }, { "epoch": 1.74, "learning_rate": 4.10095814102238e-05, "loss": 0.0364, "step": 6741 }, { "epoch": 1.74, "learning_rate": 4.099558924548075e-05, "loss": 0.0441, "step": 6742 }, { "epoch": 1.74, "learning_rate": 4.0981597809491626e-05, "loss": 0.0308, "step": 6743 }, { "epoch": 1.74, "learning_rate": 4.096760710338876e-05, "loss": 0.0418, "step": 6744 }, { "epoch": 1.74, "learning_rate": 4.095361712830449e-05, "loss": 0.0457, "step": 6745 }, { "epoch": 1.74, "learning_rate": 4.0939627885371065e-05, "loss": 0.037, "step": 6746 }, { "epoch": 1.74, "learning_rate": 4.092563937572064e-05, "loss": 0.0359, "step": 6747 }, { "epoch": 1.74, "learning_rate": 4.09116516004854e-05, "loss": 0.0289, "step": 6748 }, { "epoch": 1.74, "learning_rate": 4.089766456079738e-05, "loss": 0.0366, "step": 6749 }, { "epoch": 1.74, "learning_rate": 4.088367825778861e-05, "loss": 0.04, "step": 6750 }, { "epoch": 1.74, "learning_rate": 4.086969269259104e-05, "loss": 0.0535, "step": 6751 }, { "epoch": 1.74, "learning_rate": 4.085570786633656e-05, "loss": 0.0399, "step": 6752 }, { "epoch": 1.74, "learning_rate": 4.0841723780157e-05, "loss": 0.0302, "step": 6753 }, { "epoch": 1.74, "learning_rate": 4.082774043518414e-05, "loss": 0.036, "step": 6754 }, { "epoch": 1.74, "learning_rate": 4.08137578325497e-05, "loss": 0.0448, "step": 6755 }, { "epoch": 1.74, "learning_rate": 4.0799775973385327e-05, "loss": 0.038, "step": 6756 }, { "epoch": 1.74, "learning_rate": 4.078579485882261e-05, "loss": 0.0465, "step": 6757 }, { "epoch": 1.74, "learning_rate": 4.077181448999309e-05, "loss": 0.0405, "step": 6758 }, { "epoch": 1.74, "learning_rate": 4.075783486802824e-05, "loss": 0.0429, "step": 6759 }, { "epoch": 1.74, "learning_rate": 4.074385599405946e-05, "loss": 0.0251, "step": 6760 }, { "epoch": 1.74, "learning_rate": 4.072987786921813e-05, "loss": 0.0424, "step": 6761 }, { "epoch": 1.75, "learning_rate": 4.071590049463551e-05, "loss": 0.0422, "step": 6762 }, { "epoch": 1.75, "learning_rate": 4.070192387144285e-05, "loss": 0.0554, "step": 6763 }, { "epoch": 1.75, "learning_rate": 4.068794800077132e-05, "loss": 0.0455, "step": 6764 }, { "epoch": 1.75, "learning_rate": 4.067397288375201e-05, "loss": 0.0546, "step": 6765 }, { "epoch": 1.75, "learning_rate": 4.0659998521515985e-05, "loss": 0.0371, "step": 6766 }, { "epoch": 1.75, "learning_rate": 4.064602491519423e-05, "loss": 0.0342, "step": 6767 }, { "epoch": 1.75, "learning_rate": 4.063205206591767e-05, "loss": 0.0417, "step": 6768 }, { "epoch": 1.75, "learning_rate": 4.061807997481718e-05, "loss": 0.0371, "step": 6769 }, { "epoch": 1.75, "learning_rate": 4.060410864302354e-05, "loss": 0.0416, "step": 6770 }, { "epoch": 1.75, "learning_rate": 4.0590138071667505e-05, "loss": 0.0498, "step": 6771 }, { "epoch": 1.75, "learning_rate": 4.057616826187975e-05, "loss": 0.0347, "step": 6772 }, { "epoch": 1.75, "learning_rate": 4.05621992147909e-05, "loss": 0.042, "step": 6773 }, { "epoch": 1.75, "learning_rate": 4.054823093153152e-05, "loss": 0.0357, "step": 6774 }, { "epoch": 1.75, "learning_rate": 4.053426341323208e-05, "loss": 0.0385, "step": 6775 }, { "epoch": 1.75, "learning_rate": 4.052029666102303e-05, "loss": 0.0342, "step": 6776 }, { "epoch": 1.75, "learning_rate": 4.050633067603474e-05, "loss": 0.0286, "step": 6777 }, { "epoch": 1.75, "learning_rate": 4.049236545939751e-05, "loss": 0.0321, "step": 6778 }, { "epoch": 1.75, "learning_rate": 4.047840101224161e-05, "loss": 0.0255, "step": 6779 }, { "epoch": 1.75, "learning_rate": 4.0464437335697195e-05, "loss": 0.0636, "step": 6780 }, { "epoch": 1.75, "learning_rate": 4.0450474430894406e-05, "loss": 0.0272, "step": 6781 }, { "epoch": 1.75, "learning_rate": 4.0436512298963305e-05, "loss": 0.0627, "step": 6782 }, { "epoch": 1.75, "learning_rate": 4.042255094103386e-05, "loss": 0.0465, "step": 6783 }, { "epoch": 1.75, "learning_rate": 4.040859035823604e-05, "loss": 0.0461, "step": 6784 }, { "epoch": 1.75, "learning_rate": 4.03946305516997e-05, "loss": 0.0433, "step": 6785 }, { "epoch": 1.75, "learning_rate": 4.038067152255466e-05, "loss": 0.0332, "step": 6786 }, { "epoch": 1.75, "learning_rate": 4.0366713271930654e-05, "loss": 0.0495, "step": 6787 }, { "epoch": 1.75, "learning_rate": 4.035275580095737e-05, "loss": 0.0398, "step": 6788 }, { "epoch": 1.75, "learning_rate": 4.033879911076442e-05, "loss": 0.0482, "step": 6789 }, { "epoch": 1.75, "learning_rate": 4.032484320248138e-05, "loss": 0.0483, "step": 6790 }, { "epoch": 1.75, "learning_rate": 4.031088807723772e-05, "loss": 0.0367, "step": 6791 }, { "epoch": 1.75, "learning_rate": 4.0296933736162895e-05, "loss": 0.0505, "step": 6792 }, { "epoch": 1.75, "learning_rate": 4.028298018038625e-05, "loss": 0.0419, "step": 6793 }, { "epoch": 1.75, "learning_rate": 4.0269027411037094e-05, "loss": 0.0391, "step": 6794 }, { "epoch": 1.75, "learning_rate": 4.0255075429244665e-05, "loss": 0.0612, "step": 6795 }, { "epoch": 1.75, "learning_rate": 4.024112423613816e-05, "loss": 0.0428, "step": 6796 }, { "epoch": 1.75, "learning_rate": 4.0227173832846656e-05, "loss": 0.0371, "step": 6797 }, { "epoch": 1.75, "learning_rate": 4.021322422049922e-05, "loss": 0.0299, "step": 6798 }, { "epoch": 1.75, "learning_rate": 4.0199275400224834e-05, "loss": 0.0591, "step": 6799 }, { "epoch": 1.75, "learning_rate": 4.018532737315243e-05, "loss": 0.0308, "step": 6800 }, { "epoch": 1.76, "learning_rate": 4.017138014041083e-05, "loss": 0.0315, "step": 6801 }, { "epoch": 1.76, "learning_rate": 4.015743370312885e-05, "loss": 0.0404, "step": 6802 }, { "epoch": 1.76, "learning_rate": 4.0143488062435225e-05, "loss": 0.0385, "step": 6803 }, { "epoch": 1.76, "learning_rate": 4.01295432194586e-05, "loss": 0.0297, "step": 6804 }, { "epoch": 1.76, "learning_rate": 4.0115599175327583e-05, "loss": 0.0294, "step": 6805 }, { "epoch": 1.76, "learning_rate": 4.0101655931170695e-05, "loss": 0.0499, "step": 6806 }, { "epoch": 1.76, "learning_rate": 4.008771348811641e-05, "loss": 0.0364, "step": 6807 }, { "epoch": 1.76, "learning_rate": 4.007377184729314e-05, "loss": 0.031, "step": 6808 }, { "epoch": 1.76, "learning_rate": 4.005983100982923e-05, "loss": 0.0455, "step": 6809 }, { "epoch": 1.76, "learning_rate": 4.0045890976852925e-05, "loss": 0.0357, "step": 6810 }, { "epoch": 1.76, "learning_rate": 4.0031951749492446e-05, "loss": 0.0433, "step": 6811 }, { "epoch": 1.76, "learning_rate": 4.001801332887595e-05, "loss": 0.0368, "step": 6812 }, { "epoch": 1.76, "learning_rate": 4.00040757161315e-05, "loss": 0.0421, "step": 6813 }, { "epoch": 1.76, "learning_rate": 3.999013891238712e-05, "loss": 0.0462, "step": 6814 }, { "epoch": 1.76, "learning_rate": 3.997620291877075e-05, "loss": 0.0495, "step": 6815 }, { "epoch": 1.76, "learning_rate": 3.9962267736410266e-05, "loss": 0.0372, "step": 6816 }, { "epoch": 1.76, "learning_rate": 3.994833336643349e-05, "loss": 0.0544, "step": 6817 }, { "epoch": 1.76, "learning_rate": 3.9934399809968174e-05, "loss": 0.0493, "step": 6818 }, { "epoch": 1.76, "learning_rate": 3.992046706814201e-05, "loss": 0.0519, "step": 6819 }, { "epoch": 1.76, "learning_rate": 3.9906535142082605e-05, "loss": 0.0331, "step": 6820 }, { "epoch": 1.76, "learning_rate": 3.989260403291751e-05, "loss": 0.0543, "step": 6821 }, { "epoch": 1.76, "learning_rate": 3.987867374177423e-05, "loss": 0.045, "step": 6822 }, { "epoch": 1.76, "learning_rate": 3.9864744269780155e-05, "loss": 0.0379, "step": 6823 }, { "epoch": 1.76, "learning_rate": 3.985081561806266e-05, "loss": 0.0279, "step": 6824 }, { "epoch": 1.76, "learning_rate": 3.983688778774902e-05, "loss": 0.0444, "step": 6825 }, { "epoch": 1.76, "learning_rate": 3.982296077996647e-05, "loss": 0.0351, "step": 6826 }, { "epoch": 1.76, "learning_rate": 3.980903459584217e-05, "loss": 0.0456, "step": 6827 }, { "epoch": 1.76, "learning_rate": 3.979510923650318e-05, "loss": 0.0252, "step": 6828 }, { "epoch": 1.76, "learning_rate": 3.9781184703076534e-05, "loss": 0.0401, "step": 6829 }, { "epoch": 1.76, "learning_rate": 3.97672609966892e-05, "loss": 0.047, "step": 6830 }, { "epoch": 1.76, "learning_rate": 3.975333811846804e-05, "loss": 0.048, "step": 6831 }, { "epoch": 1.76, "learning_rate": 3.9739416069539906e-05, "loss": 0.0527, "step": 6832 }, { "epoch": 1.76, "learning_rate": 3.972549485103152e-05, "loss": 0.0347, "step": 6833 }, { "epoch": 1.76, "learning_rate": 3.971157446406958e-05, "loss": 0.0328, "step": 6834 }, { "epoch": 1.76, "learning_rate": 3.9697654909780726e-05, "loss": 0.0298, "step": 6835 }, { "epoch": 1.76, "learning_rate": 3.968373618929145e-05, "loss": 0.0326, "step": 6836 }, { "epoch": 1.76, "learning_rate": 3.9669818303728315e-05, "loss": 0.0414, "step": 6837 }, { "epoch": 1.76, "learning_rate": 3.965590125421768e-05, "loss": 0.0416, "step": 6838 }, { "epoch": 1.76, "learning_rate": 3.96419850418859e-05, "loss": 0.0351, "step": 6839 }, { "epoch": 1.77, "learning_rate": 3.9628069667859285e-05, "loss": 0.0392, "step": 6840 }, { "epoch": 1.77, "learning_rate": 3.961415513326401e-05, "loss": 0.0363, "step": 6841 }, { "epoch": 1.77, "learning_rate": 3.960024143922625e-05, "loss": 0.0313, "step": 6842 }, { "epoch": 1.77, "learning_rate": 3.9586328586872064e-05, "loss": 0.0448, "step": 6843 }, { "epoch": 1.77, "learning_rate": 3.957241657732746e-05, "loss": 0.0446, "step": 6844 }, { "epoch": 1.77, "learning_rate": 3.9558505411718414e-05, "loss": 0.0357, "step": 6845 }, { "epoch": 1.77, "learning_rate": 3.954459509117075e-05, "loss": 0.0365, "step": 6846 }, { "epoch": 1.77, "learning_rate": 3.953068561681029e-05, "loss": 0.0294, "step": 6847 }, { "epoch": 1.77, "learning_rate": 3.9516776989762785e-05, "loss": 0.0378, "step": 6848 }, { "epoch": 1.77, "learning_rate": 3.950286921115386e-05, "loss": 0.0382, "step": 6849 }, { "epoch": 1.77, "learning_rate": 3.948896228210917e-05, "loss": 0.0436, "step": 6850 }, { "epoch": 1.77, "learning_rate": 3.94750562037542e-05, "loss": 0.0412, "step": 6851 }, { "epoch": 1.77, "learning_rate": 3.9461150977214434e-05, "loss": 0.046, "step": 6852 }, { "epoch": 1.77, "learning_rate": 3.9447246603615265e-05, "loss": 0.0483, "step": 6853 }, { "epoch": 1.77, "learning_rate": 3.943334308408198e-05, "loss": 0.0396, "step": 6854 }, { "epoch": 1.77, "learning_rate": 3.941944041973988e-05, "loss": 0.0468, "step": 6855 }, { "epoch": 1.77, "learning_rate": 3.9405538611714114e-05, "loss": 0.0447, "step": 6856 }, { "epoch": 1.77, "learning_rate": 3.9391637661129814e-05, "loss": 0.0386, "step": 6857 }, { "epoch": 1.77, "learning_rate": 3.937773756911204e-05, "loss": 0.0299, "step": 6858 }, { "epoch": 1.77, "learning_rate": 3.9363838336785715e-05, "loss": 0.0361, "step": 6859 }, { "epoch": 1.77, "learning_rate": 3.934993996527582e-05, "loss": 0.0358, "step": 6860 }, { "epoch": 1.77, "learning_rate": 3.933604245570714e-05, "loss": 0.0468, "step": 6861 }, { "epoch": 1.77, "learning_rate": 3.932214580920443e-05, "loss": 0.06, "step": 6862 }, { "epoch": 1.77, "learning_rate": 3.930825002689244e-05, "loss": 0.0337, "step": 6863 }, { "epoch": 1.77, "learning_rate": 3.929435510989577e-05, "loss": 0.0563, "step": 6864 }, { "epoch": 1.77, "learning_rate": 3.928046105933896e-05, "loss": 0.0402, "step": 6865 }, { "epoch": 1.77, "learning_rate": 3.9266567876346526e-05, "loss": 0.0488, "step": 6866 }, { "epoch": 1.77, "learning_rate": 3.9252675562042853e-05, "loss": 0.0401, "step": 6867 }, { "epoch": 1.77, "learning_rate": 3.9238784117552334e-05, "loss": 0.0384, "step": 6868 }, { "epoch": 1.77, "learning_rate": 3.9224893543999204e-05, "loss": 0.0371, "step": 6869 }, { "epoch": 1.77, "learning_rate": 3.921100384250767e-05, "loss": 0.0365, "step": 6870 }, { "epoch": 1.77, "learning_rate": 3.9197115014201916e-05, "loss": 0.0386, "step": 6871 }, { "epoch": 1.77, "learning_rate": 3.918322706020593e-05, "loss": 0.0342, "step": 6872 }, { "epoch": 1.77, "learning_rate": 3.9169339981643773e-05, "loss": 0.0449, "step": 6873 }, { "epoch": 1.77, "learning_rate": 3.915545377963934e-05, "loss": 0.0403, "step": 6874 }, { "epoch": 1.77, "learning_rate": 3.9141568455316466e-05, "loss": 0.0409, "step": 6875 }, { "epoch": 1.77, "learning_rate": 3.912768400979896e-05, "loss": 0.0453, "step": 6876 }, { "epoch": 1.77, "learning_rate": 3.911380044421052e-05, "loss": 0.0468, "step": 6877 }, { "epoch": 1.77, "learning_rate": 3.909991775967479e-05, "loss": 0.0388, "step": 6878 }, { "epoch": 1.78, "learning_rate": 3.908603595731533e-05, "loss": 0.0347, "step": 6879 }, { "epoch": 1.78, "learning_rate": 3.9072155038255625e-05, "loss": 0.046, "step": 6880 }, { "epoch": 1.78, "learning_rate": 3.905827500361914e-05, "loss": 0.0594, "step": 6881 }, { "epoch": 1.78, "learning_rate": 3.9044395854529183e-05, "loss": 0.037, "step": 6882 }, { "epoch": 1.78, "learning_rate": 3.903051759210905e-05, "loss": 0.0377, "step": 6883 }, { "epoch": 1.78, "learning_rate": 3.901664021748197e-05, "loss": 0.0529, "step": 6884 }, { "epoch": 1.78, "learning_rate": 3.900276373177104e-05, "loss": 0.0392, "step": 6885 }, { "epoch": 1.78, "learning_rate": 3.8988888136099366e-05, "loss": 0.0365, "step": 6886 }, { "epoch": 1.78, "learning_rate": 3.897501343158993e-05, "loss": 0.0461, "step": 6887 }, { "epoch": 1.78, "learning_rate": 3.8961139619365625e-05, "loss": 0.0419, "step": 6888 }, { "epoch": 1.78, "learning_rate": 3.894726670054935e-05, "loss": 0.0403, "step": 6889 }, { "epoch": 1.78, "learning_rate": 3.8933394676263827e-05, "loss": 0.0369, "step": 6890 }, { "epoch": 1.78, "learning_rate": 3.8919523547631814e-05, "loss": 0.0272, "step": 6891 }, { "epoch": 1.78, "learning_rate": 3.8905653315775916e-05, "loss": 0.0349, "step": 6892 }, { "epoch": 1.78, "learning_rate": 3.889178398181867e-05, "loss": 0.0286, "step": 6893 }, { "epoch": 1.78, "learning_rate": 3.887791554688261e-05, "loss": 0.0568, "step": 6894 }, { "epoch": 1.78, "learning_rate": 3.8864048012090105e-05, "loss": 0.0435, "step": 6895 }, { "epoch": 1.78, "learning_rate": 3.885018137856354e-05, "loss": 0.0282, "step": 6896 }, { "epoch": 1.78, "learning_rate": 3.883631564742516e-05, "loss": 0.0245, "step": 6897 }, { "epoch": 1.78, "learning_rate": 3.882245081979714e-05, "loss": 0.0279, "step": 6898 }, { "epoch": 1.78, "learning_rate": 3.880858689680164e-05, "loss": 0.0313, "step": 6899 }, { "epoch": 1.78, "learning_rate": 3.879472387956069e-05, "loss": 0.0573, "step": 6900 }, { "epoch": 1.78, "learning_rate": 3.878086176919625e-05, "loss": 0.0374, "step": 6901 }, { "epoch": 1.78, "learning_rate": 3.876700056683026e-05, "loss": 0.0532, "step": 6902 }, { "epoch": 1.78, "learning_rate": 3.87531402735845e-05, "loss": 0.037, "step": 6903 }, { "epoch": 1.78, "learning_rate": 3.8739280890580784e-05, "loss": 0.0424, "step": 6904 }, { "epoch": 1.78, "learning_rate": 3.872542241894076e-05, "loss": 0.0345, "step": 6905 }, { "epoch": 1.78, "learning_rate": 3.871156485978601e-05, "loss": 0.0561, "step": 6906 }, { "epoch": 1.78, "learning_rate": 3.869770821423812e-05, "loss": 0.0559, "step": 6907 }, { "epoch": 1.78, "learning_rate": 3.86838524834185e-05, "loss": 0.0374, "step": 6908 }, { "epoch": 1.78, "learning_rate": 3.8669997668448586e-05, "loss": 0.0446, "step": 6909 }, { "epoch": 1.78, "learning_rate": 3.865614377044967e-05, "loss": 0.0283, "step": 6910 }, { "epoch": 1.78, "learning_rate": 3.864229079054296e-05, "loss": 0.0274, "step": 6911 }, { "epoch": 1.78, "learning_rate": 3.862843872984966e-05, "loss": 0.0378, "step": 6912 }, { "epoch": 1.78, "learning_rate": 3.8614587589490844e-05, "loss": 0.0413, "step": 6913 }, { "epoch": 1.78, "learning_rate": 3.86007373705875e-05, "loss": 0.0452, "step": 6914 }, { "epoch": 1.78, "learning_rate": 3.8586888074260614e-05, "loss": 0.0282, "step": 6915 }, { "epoch": 1.78, "learning_rate": 3.8573039701631005e-05, "loss": 0.0397, "step": 6916 }, { "epoch": 1.79, "learning_rate": 3.8559192253819514e-05, "loss": 0.0491, "step": 6917 }, { "epoch": 1.79, "learning_rate": 3.8545345731946826e-05, "loss": 0.0426, "step": 6918 }, { "epoch": 1.79, "learning_rate": 3.8531500137133566e-05, "loss": 0.0496, "step": 6919 }, { "epoch": 1.79, "learning_rate": 3.8517655470500337e-05, "loss": 0.0503, "step": 6920 }, { "epoch": 1.79, "learning_rate": 3.850381173316758e-05, "loss": 0.0326, "step": 6921 }, { "epoch": 1.79, "learning_rate": 3.8489968926255774e-05, "loss": 0.054, "step": 6922 }, { "epoch": 1.79, "learning_rate": 3.847612705088522e-05, "loss": 0.0411, "step": 6923 }, { "epoch": 1.79, "learning_rate": 3.846228610817617e-05, "loss": 0.0398, "step": 6924 }, { "epoch": 1.79, "learning_rate": 3.844844609924885e-05, "loss": 0.0366, "step": 6925 }, { "epoch": 1.79, "learning_rate": 3.843460702522336e-05, "loss": 0.0384, "step": 6926 }, { "epoch": 1.79, "learning_rate": 3.8420768887219696e-05, "loss": 0.0459, "step": 6927 }, { "epoch": 1.79, "learning_rate": 3.840693168635789e-05, "loss": 0.0588, "step": 6928 }, { "epoch": 1.79, "learning_rate": 3.8393095423757766e-05, "loss": 0.0275, "step": 6929 }, { "epoch": 1.79, "learning_rate": 3.837926010053918e-05, "loss": 0.0388, "step": 6930 }, { "epoch": 1.79, "learning_rate": 3.836542571782186e-05, "loss": 0.0294, "step": 6931 }, { "epoch": 1.79, "learning_rate": 3.8351592276725425e-05, "loss": 0.0385, "step": 6932 }, { "epoch": 1.79, "learning_rate": 3.83377597783695e-05, "loss": 0.0348, "step": 6933 }, { "epoch": 1.79, "learning_rate": 3.832392822387355e-05, "loss": 0.0356, "step": 6934 }, { "epoch": 1.79, "learning_rate": 3.831009761435706e-05, "loss": 0.0281, "step": 6935 }, { "epoch": 1.79, "learning_rate": 3.8296267950939344e-05, "loss": 0.0389, "step": 6936 }, { "epoch": 1.79, "learning_rate": 3.828243923473966e-05, "loss": 0.0464, "step": 6937 }, { "epoch": 1.79, "learning_rate": 3.826861146687726e-05, "loss": 0.0247, "step": 6938 }, { "epoch": 1.79, "learning_rate": 3.825478464847122e-05, "loss": 0.0396, "step": 6939 }, { "epoch": 1.79, "learning_rate": 3.824095878064062e-05, "loss": 0.0382, "step": 6940 }, { "epoch": 1.79, "learning_rate": 3.8227133864504414e-05, "loss": 0.0462, "step": 6941 }, { "epoch": 1.79, "learning_rate": 3.821330990118147e-05, "loss": 0.0444, "step": 6942 }, { "epoch": 1.79, "learning_rate": 3.819948689179066e-05, "loss": 0.0387, "step": 6943 }, { "epoch": 1.79, "learning_rate": 3.818566483745067e-05, "loss": 0.0448, "step": 6944 }, { "epoch": 1.79, "learning_rate": 3.817184373928017e-05, "loss": 0.0296, "step": 6945 }, { "epoch": 1.79, "learning_rate": 3.815802359839776e-05, "loss": 0.0309, "step": 6946 }, { "epoch": 1.79, "learning_rate": 3.8144204415921926e-05, "loss": 0.0416, "step": 6947 }, { "epoch": 1.79, "learning_rate": 3.8130386192971126e-05, "loss": 0.0363, "step": 6948 }, { "epoch": 1.79, "learning_rate": 3.811656893066368e-05, "loss": 0.0328, "step": 6949 }, { "epoch": 1.79, "learning_rate": 3.810275263011786e-05, "loss": 0.0412, "step": 6950 }, { "epoch": 1.79, "learning_rate": 3.808893729245189e-05, "loss": 0.0393, "step": 6951 }, { "epoch": 1.79, "learning_rate": 3.807512291878384e-05, "loss": 0.0337, "step": 6952 }, { "epoch": 1.79, "learning_rate": 3.80613095102318e-05, "loss": 0.0342, "step": 6953 }, { "epoch": 1.79, "learning_rate": 3.804749706791371e-05, "loss": 0.0475, "step": 6954 }, { "epoch": 1.79, "learning_rate": 3.8033685592947424e-05, "loss": 0.0371, "step": 6955 }, { "epoch": 1.8, "learning_rate": 3.801987508645079e-05, "loss": 0.0443, "step": 6956 }, { "epoch": 1.8, "learning_rate": 3.800606554954151e-05, "loss": 0.0488, "step": 6957 }, { "epoch": 1.8, "learning_rate": 3.799225698333722e-05, "loss": 0.0419, "step": 6958 }, { "epoch": 1.8, "learning_rate": 3.797844938895553e-05, "loss": 0.0397, "step": 6959 }, { "epoch": 1.8, "learning_rate": 3.796464276751388e-05, "loss": 0.0412, "step": 6960 }, { "epoch": 1.8, "learning_rate": 3.7950837120129727e-05, "loss": 0.0489, "step": 6961 }, { "epoch": 1.8, "learning_rate": 3.793703244792038e-05, "loss": 0.0325, "step": 6962 }, { "epoch": 1.8, "learning_rate": 3.7923228752003074e-05, "loss": 0.0435, "step": 6963 }, { "epoch": 1.8, "learning_rate": 3.790942603349503e-05, "loss": 0.0416, "step": 6964 }, { "epoch": 1.8, "learning_rate": 3.789562429351329e-05, "loss": 0.0414, "step": 6965 }, { "epoch": 1.8, "learning_rate": 3.7881823533174914e-05, "loss": 0.039, "step": 6966 }, { "epoch": 1.8, "learning_rate": 3.7868023753596824e-05, "loss": 0.0446, "step": 6967 }, { "epoch": 1.8, "learning_rate": 3.785422495589586e-05, "loss": 0.0406, "step": 6968 }, { "epoch": 1.8, "learning_rate": 3.784042714118883e-05, "loss": 0.0441, "step": 6969 }, { "epoch": 1.8, "learning_rate": 3.7826630310592416e-05, "loss": 0.0383, "step": 6970 }, { "epoch": 1.8, "learning_rate": 3.781283446522322e-05, "loss": 0.0411, "step": 6971 }, { "epoch": 1.8, "learning_rate": 3.779903960619782e-05, "loss": 0.0534, "step": 6972 }, { "epoch": 1.8, "learning_rate": 3.778524573463263e-05, "loss": 0.0608, "step": 6973 }, { "epoch": 1.8, "learning_rate": 3.7771452851644084e-05, "loss": 0.0409, "step": 6974 }, { "epoch": 1.8, "learning_rate": 3.775766095834845e-05, "loss": 0.0306, "step": 6975 }, { "epoch": 1.8, "learning_rate": 3.7743870055861925e-05, "loss": 0.0319, "step": 6976 }, { "epoch": 1.8, "learning_rate": 3.773008014530069e-05, "loss": 0.0366, "step": 6977 }, { "epoch": 1.8, "learning_rate": 3.7716291227780765e-05, "loss": 0.0597, "step": 6978 }, { "epoch": 1.8, "learning_rate": 3.770250330441817e-05, "loss": 0.0431, "step": 6979 }, { "epoch": 1.8, "learning_rate": 3.7688716376328784e-05, "loss": 0.0456, "step": 6980 }, { "epoch": 1.8, "learning_rate": 3.76749304446284e-05, "loss": 0.0339, "step": 6981 }, { "epoch": 1.8, "learning_rate": 3.7661145510432795e-05, "loss": 0.0366, "step": 6982 }, { "epoch": 1.8, "learning_rate": 3.764736157485761e-05, "loss": 0.0445, "step": 6983 }, { "epoch": 1.8, "learning_rate": 3.763357863901839e-05, "loss": 0.0504, "step": 6984 }, { "epoch": 1.8, "learning_rate": 3.761979670403068e-05, "loss": 0.0589, "step": 6985 }, { "epoch": 1.8, "learning_rate": 3.760601577100985e-05, "loss": 0.0346, "step": 6986 }, { "epoch": 1.8, "learning_rate": 3.759223584107126e-05, "loss": 0.0264, "step": 6987 }, { "epoch": 1.8, "learning_rate": 3.7578456915330175e-05, "loss": 0.0458, "step": 6988 }, { "epoch": 1.8, "learning_rate": 3.75646789949017e-05, "loss": 0.04, "step": 6989 }, { "epoch": 1.8, "learning_rate": 3.7550902080900996e-05, "loss": 0.0516, "step": 6990 }, { "epoch": 1.8, "learning_rate": 3.753712617444302e-05, "loss": 0.0527, "step": 6991 }, { "epoch": 1.8, "learning_rate": 3.7523351276642736e-05, "loss": 0.0429, "step": 6992 }, { "epoch": 1.8, "learning_rate": 3.7509577388614963e-05, "loss": 0.0445, "step": 6993 }, { "epoch": 1.8, "learning_rate": 3.7495804511474454e-05, "loss": 0.0404, "step": 6994 }, { "epoch": 1.81, "learning_rate": 3.7482032646335916e-05, "loss": 0.0413, "step": 6995 }, { "epoch": 1.81, "learning_rate": 3.7468261794313944e-05, "loss": 0.0536, "step": 6996 }, { "epoch": 1.81, "learning_rate": 3.745449195652302e-05, "loss": 0.0368, "step": 6997 }, { "epoch": 1.81, "learning_rate": 3.7440723134077616e-05, "loss": 0.0372, "step": 6998 }, { "epoch": 1.81, "learning_rate": 3.742695532809206e-05, "loss": 0.048, "step": 6999 }, { "epoch": 1.81, "learning_rate": 3.741318853968065e-05, "loss": 0.0376, "step": 7000 }, { "epoch": 1.81, "learning_rate": 3.739942276995756e-05, "loss": 0.0533, "step": 7001 }, { "epoch": 1.81, "learning_rate": 3.7385658020036865e-05, "loss": 0.0319, "step": 7002 }, { "epoch": 1.81, "learning_rate": 3.737189429103264e-05, "loss": 0.0463, "step": 7003 }, { "epoch": 1.81, "learning_rate": 3.7358131584058764e-05, "loss": 0.0233, "step": 7004 }, { "epoch": 1.81, "learning_rate": 3.734436990022916e-05, "loss": 0.0428, "step": 7005 }, { "epoch": 1.81, "learning_rate": 3.7330609240657574e-05, "loss": 0.0399, "step": 7006 }, { "epoch": 1.81, "learning_rate": 3.731684960645766e-05, "loss": 0.0279, "step": 7007 }, { "epoch": 1.81, "learning_rate": 3.73030909987431e-05, "loss": 0.0344, "step": 7008 }, { "epoch": 1.81, "learning_rate": 3.728933341862736e-05, "loss": 0.0399, "step": 7009 }, { "epoch": 1.81, "learning_rate": 3.7275576867223896e-05, "loss": 0.0419, "step": 7010 }, { "epoch": 1.81, "learning_rate": 3.726182134564608e-05, "loss": 0.0307, "step": 7011 }, { "epoch": 1.81, "learning_rate": 3.7248066855007166e-05, "loss": 0.0328, "step": 7012 }, { "epoch": 1.81, "learning_rate": 3.7234313396420383e-05, "loss": 0.0363, "step": 7013 }, { "epoch": 1.81, "learning_rate": 3.722056097099881e-05, "loss": 0.0341, "step": 7014 }, { "epoch": 1.81, "learning_rate": 3.720680957985547e-05, "loss": 0.0392, "step": 7015 }, { "epoch": 1.81, "learning_rate": 3.7193059224103324e-05, "loss": 0.0353, "step": 7016 }, { "epoch": 1.81, "learning_rate": 3.71793099048552e-05, "loss": 0.0341, "step": 7017 }, { "epoch": 1.81, "learning_rate": 3.716556162322391e-05, "loss": 0.0309, "step": 7018 }, { "epoch": 1.81, "learning_rate": 3.715181438032212e-05, "loss": 0.0462, "step": 7019 }, { "epoch": 1.81, "learning_rate": 3.713806817726242e-05, "loss": 0.0324, "step": 7020 }, { "epoch": 1.81, "learning_rate": 3.712432301515737e-05, "loss": 0.0375, "step": 7021 }, { "epoch": 1.81, "learning_rate": 3.71105788951194e-05, "loss": 0.0385, "step": 7022 }, { "epoch": 1.81, "learning_rate": 3.709683581826081e-05, "loss": 0.0495, "step": 7023 }, { "epoch": 1.81, "learning_rate": 3.708309378569394e-05, "loss": 0.0357, "step": 7024 }, { "epoch": 1.81, "learning_rate": 3.706935279853092e-05, "loss": 0.0358, "step": 7025 }, { "epoch": 1.81, "learning_rate": 3.705561285788389e-05, "loss": 0.0384, "step": 7026 }, { "epoch": 1.81, "learning_rate": 3.7041873964864854e-05, "loss": 0.049, "step": 7027 }, { "epoch": 1.81, "learning_rate": 3.702813612058571e-05, "loss": 0.0325, "step": 7028 }, { "epoch": 1.81, "learning_rate": 3.7014399326158344e-05, "loss": 0.0372, "step": 7029 }, { "epoch": 1.81, "learning_rate": 3.7000663582694475e-05, "loss": 0.0452, "step": 7030 }, { "epoch": 1.81, "learning_rate": 3.698692889130583e-05, "loss": 0.0428, "step": 7031 }, { "epoch": 1.81, "learning_rate": 3.697319525310397e-05, "loss": 0.0518, "step": 7032 }, { "epoch": 1.81, "learning_rate": 3.695946266920036e-05, "loss": 0.0361, "step": 7033 }, { "epoch": 1.82, "learning_rate": 3.69457311407065e-05, "loss": 0.042, "step": 7034 }, { "epoch": 1.82, "learning_rate": 3.6932000668733675e-05, "loss": 0.0347, "step": 7035 }, { "epoch": 1.82, "learning_rate": 3.691827125439312e-05, "loss": 0.0347, "step": 7036 }, { "epoch": 1.82, "learning_rate": 3.690454289879603e-05, "loss": 0.0404, "step": 7037 }, { "epoch": 1.82, "learning_rate": 3.6890815603053466e-05, "loss": 0.0419, "step": 7038 }, { "epoch": 1.82, "learning_rate": 3.687708936827644e-05, "loss": 0.0456, "step": 7039 }, { "epoch": 1.82, "learning_rate": 3.686336419557583e-05, "loss": 0.0394, "step": 7040 }, { "epoch": 1.82, "learning_rate": 3.6849640086062454e-05, "loss": 0.0374, "step": 7041 }, { "epoch": 1.82, "learning_rate": 3.683591704084707e-05, "loss": 0.0364, "step": 7042 }, { "epoch": 1.82, "learning_rate": 3.68221950610403e-05, "loss": 0.0357, "step": 7043 }, { "epoch": 1.82, "learning_rate": 3.680847414775273e-05, "loss": 0.0467, "step": 7044 }, { "epoch": 1.82, "learning_rate": 3.6794754302094825e-05, "loss": 0.0572, "step": 7045 }, { "epoch": 1.82, "learning_rate": 3.678103552517694e-05, "loss": 0.0371, "step": 7046 }, { "epoch": 1.82, "learning_rate": 3.6767317818109445e-05, "loss": 0.0367, "step": 7047 }, { "epoch": 1.82, "learning_rate": 3.675360118200251e-05, "loss": 0.0407, "step": 7048 }, { "epoch": 1.82, "learning_rate": 3.673988561796624e-05, "loss": 0.0454, "step": 7049 }, { "epoch": 1.82, "learning_rate": 3.672617112711073e-05, "loss": 0.0459, "step": 7050 }, { "epoch": 1.82, "learning_rate": 3.671245771054589e-05, "loss": 0.0307, "step": 7051 }, { "epoch": 1.82, "learning_rate": 3.669874536938164e-05, "loss": 0.0401, "step": 7052 }, { "epoch": 1.82, "learning_rate": 3.6685034104727714e-05, "loss": 0.0475, "step": 7053 }, { "epoch": 1.82, "learning_rate": 3.667132391769381e-05, "loss": 0.0406, "step": 7054 }, { "epoch": 1.82, "learning_rate": 3.665761480938957e-05, "loss": 0.0389, "step": 7055 }, { "epoch": 1.82, "learning_rate": 3.6643906780924465e-05, "loss": 0.0335, "step": 7056 }, { "epoch": 1.82, "learning_rate": 3.6630199833407966e-05, "loss": 0.0324, "step": 7057 }, { "epoch": 1.82, "learning_rate": 3.6616493967949406e-05, "loss": 0.0432, "step": 7058 }, { "epoch": 1.82, "learning_rate": 3.660278918565801e-05, "loss": 0.0346, "step": 7059 }, { "epoch": 1.82, "learning_rate": 3.658908548764301e-05, "loss": 0.0392, "step": 7060 }, { "epoch": 1.82, "learning_rate": 3.657538287501344e-05, "loss": 0.0343, "step": 7061 }, { "epoch": 1.82, "learning_rate": 3.6561681348878296e-05, "loss": 0.0442, "step": 7062 }, { "epoch": 1.82, "learning_rate": 3.65479809103465e-05, "loss": 0.0295, "step": 7063 }, { "epoch": 1.82, "learning_rate": 3.653428156052685e-05, "loss": 0.0356, "step": 7064 }, { "epoch": 1.82, "learning_rate": 3.6520583300528114e-05, "loss": 0.0367, "step": 7065 }, { "epoch": 1.82, "learning_rate": 3.650688613145889e-05, "loss": 0.0341, "step": 7066 }, { "epoch": 1.82, "learning_rate": 3.649319005442775e-05, "loss": 0.0407, "step": 7067 }, { "epoch": 1.82, "learning_rate": 3.647949507054316e-05, "loss": 0.0475, "step": 7068 }, { "epoch": 1.82, "learning_rate": 3.646580118091348e-05, "loss": 0.0322, "step": 7069 }, { "epoch": 1.82, "learning_rate": 3.6452108386647025e-05, "loss": 0.0328, "step": 7070 }, { "epoch": 1.82, "learning_rate": 3.643841668885198e-05, "loss": 0.046, "step": 7071 }, { "epoch": 1.83, "learning_rate": 3.642472608863644e-05, "loss": 0.0436, "step": 7072 }, { "epoch": 1.83, "learning_rate": 3.641103658710845e-05, "loss": 0.0409, "step": 7073 }, { "epoch": 1.83, "learning_rate": 3.639734818537595e-05, "loss": 0.0539, "step": 7074 }, { "epoch": 1.83, "learning_rate": 3.638366088454672e-05, "loss": 0.048, "step": 7075 }, { "epoch": 1.83, "learning_rate": 3.63699746857286e-05, "loss": 0.0469, "step": 7076 }, { "epoch": 1.83, "learning_rate": 3.6356289590029185e-05, "loss": 0.0439, "step": 7077 }, { "epoch": 1.83, "learning_rate": 3.6342605598556104e-05, "loss": 0.0377, "step": 7078 }, { "epoch": 1.83, "learning_rate": 3.6328922712416824e-05, "loss": 0.0432, "step": 7079 }, { "epoch": 1.83, "learning_rate": 3.631524093271871e-05, "loss": 0.045, "step": 7080 }, { "epoch": 1.83, "learning_rate": 3.630156026056912e-05, "loss": 0.0438, "step": 7081 }, { "epoch": 1.83, "learning_rate": 3.628788069707524e-05, "loss": 0.0245, "step": 7082 }, { "epoch": 1.83, "learning_rate": 3.6274202243344215e-05, "loss": 0.0376, "step": 7083 }, { "epoch": 1.83, "learning_rate": 3.626052490048307e-05, "loss": 0.0462, "step": 7084 }, { "epoch": 1.83, "learning_rate": 3.6246848669598753e-05, "loss": 0.0558, "step": 7085 }, { "epoch": 1.83, "learning_rate": 3.623317355179814e-05, "loss": 0.0423, "step": 7086 }, { "epoch": 1.83, "learning_rate": 3.621949954818797e-05, "loss": 0.0325, "step": 7087 }, { "epoch": 1.83, "learning_rate": 3.6205826659874955e-05, "loss": 0.0249, "step": 7088 }, { "epoch": 1.83, "learning_rate": 3.619215488796567e-05, "loss": 0.0459, "step": 7089 }, { "epoch": 1.83, "learning_rate": 3.617848423356659e-05, "loss": 0.0344, "step": 7090 }, { "epoch": 1.83, "learning_rate": 3.616481469778416e-05, "loss": 0.036, "step": 7091 }, { "epoch": 1.83, "learning_rate": 3.615114628172467e-05, "loss": 0.0388, "step": 7092 }, { "epoch": 1.83, "learning_rate": 3.613747898649433e-05, "loss": 0.0487, "step": 7093 }, { "epoch": 1.83, "learning_rate": 3.612381281319933e-05, "loss": 0.0262, "step": 7094 }, { "epoch": 1.83, "learning_rate": 3.611014776294564e-05, "loss": 0.0422, "step": 7095 }, { "epoch": 1.83, "learning_rate": 3.60964838368393e-05, "loss": 0.0434, "step": 7096 }, { "epoch": 1.83, "learning_rate": 3.608282103598612e-05, "loss": 0.036, "step": 7097 }, { "epoch": 1.83, "learning_rate": 3.6069159361491854e-05, "loss": 0.0357, "step": 7098 }, { "epoch": 1.83, "learning_rate": 3.605549881446223e-05, "loss": 0.0213, "step": 7099 }, { "epoch": 1.83, "learning_rate": 3.6041839396002795e-05, "loss": 0.0419, "step": 7100 }, { "epoch": 1.83, "learning_rate": 3.602818110721909e-05, "loss": 0.0313, "step": 7101 }, { "epoch": 1.83, "learning_rate": 3.6014523949216497e-05, "loss": 0.0498, "step": 7102 }, { "epoch": 1.83, "learning_rate": 3.60008679231003e-05, "loss": 0.0289, "step": 7103 }, { "epoch": 1.83, "learning_rate": 3.598721302997578e-05, "loss": 0.0419, "step": 7104 }, { "epoch": 1.83, "learning_rate": 3.5973559270948044e-05, "loss": 0.029, "step": 7105 }, { "epoch": 1.83, "learning_rate": 3.59599066471221e-05, "loss": 0.0315, "step": 7106 }, { "epoch": 1.83, "learning_rate": 3.594625515960295e-05, "loss": 0.0468, "step": 7107 }, { "epoch": 1.83, "learning_rate": 3.593260480949541e-05, "loss": 0.0352, "step": 7108 }, { "epoch": 1.83, "learning_rate": 3.591895559790427e-05, "loss": 0.0448, "step": 7109 }, { "epoch": 1.83, "learning_rate": 3.590530752593419e-05, "loss": 0.0312, "step": 7110 }, { "epoch": 1.84, "learning_rate": 3.589166059468973e-05, "loss": 0.0386, "step": 7111 }, { "epoch": 1.84, "learning_rate": 3.587801480527542e-05, "loss": 0.0383, "step": 7112 }, { "epoch": 1.84, "learning_rate": 3.5864370158795604e-05, "loss": 0.0358, "step": 7113 }, { "epoch": 1.84, "learning_rate": 3.5850726656354626e-05, "loss": 0.0467, "step": 7114 }, { "epoch": 1.84, "learning_rate": 3.583708429905669e-05, "loss": 0.0485, "step": 7115 }, { "epoch": 1.84, "learning_rate": 3.5823443088005874e-05, "loss": 0.0301, "step": 7116 }, { "epoch": 1.84, "learning_rate": 3.580980302430625e-05, "loss": 0.0238, "step": 7117 }, { "epoch": 1.84, "learning_rate": 3.579616410906174e-05, "loss": 0.033, "step": 7118 }, { "epoch": 1.84, "learning_rate": 3.578252634337614e-05, "loss": 0.0444, "step": 7119 }, { "epoch": 1.84, "learning_rate": 3.576888972835326e-05, "loss": 0.0297, "step": 7120 }, { "epoch": 1.84, "learning_rate": 3.5755254265096695e-05, "loss": 0.0371, "step": 7121 }, { "epoch": 1.84, "learning_rate": 3.574161995471005e-05, "loss": 0.0489, "step": 7122 }, { "epoch": 1.84, "learning_rate": 3.572798679829676e-05, "loss": 0.0248, "step": 7123 }, { "epoch": 1.84, "learning_rate": 3.5714354796960194e-05, "loss": 0.042, "step": 7124 }, { "epoch": 1.84, "learning_rate": 3.5700723951803664e-05, "loss": 0.0333, "step": 7125 }, { "epoch": 1.84, "learning_rate": 3.56870942639303e-05, "loss": 0.0295, "step": 7126 }, { "epoch": 1.84, "learning_rate": 3.5673465734443255e-05, "loss": 0.0377, "step": 7127 }, { "epoch": 1.84, "learning_rate": 3.5659838364445505e-05, "loss": 0.0619, "step": 7128 }, { "epoch": 1.84, "learning_rate": 3.564621215503992e-05, "loss": 0.0463, "step": 7129 }, { "epoch": 1.84, "learning_rate": 3.563258710732936e-05, "loss": 0.0313, "step": 7130 }, { "epoch": 1.84, "learning_rate": 3.561896322241651e-05, "loss": 0.0352, "step": 7131 }, { "epoch": 1.84, "learning_rate": 3.560534050140399e-05, "loss": 0.0265, "step": 7132 }, { "epoch": 1.84, "learning_rate": 3.559171894539436e-05, "loss": 0.0322, "step": 7133 }, { "epoch": 1.84, "learning_rate": 3.5578098555490006e-05, "loss": 0.0223, "step": 7134 }, { "epoch": 1.84, "learning_rate": 3.556447933279331e-05, "loss": 0.0469, "step": 7135 }, { "epoch": 1.84, "learning_rate": 3.55508612784065e-05, "loss": 0.0392, "step": 7136 }, { "epoch": 1.84, "learning_rate": 3.55372443934317e-05, "loss": 0.0333, "step": 7137 }, { "epoch": 1.84, "learning_rate": 3.552362867897101e-05, "loss": 0.0353, "step": 7138 }, { "epoch": 1.84, "learning_rate": 3.5510014136126355e-05, "loss": 0.0346, "step": 7139 }, { "epoch": 1.84, "learning_rate": 3.549640076599964e-05, "loss": 0.0555, "step": 7140 }, { "epoch": 1.84, "learning_rate": 3.548278856969261e-05, "loss": 0.0346, "step": 7141 }, { "epoch": 1.84, "learning_rate": 3.546917754830691e-05, "loss": 0.0396, "step": 7142 }, { "epoch": 1.84, "learning_rate": 3.545556770294418e-05, "loss": 0.04, "step": 7143 }, { "epoch": 1.84, "learning_rate": 3.544195903470587e-05, "loss": 0.0337, "step": 7144 }, { "epoch": 1.84, "learning_rate": 3.5428351544693373e-05, "loss": 0.0254, "step": 7145 }, { "epoch": 1.84, "learning_rate": 3.5414745234008005e-05, "loss": 0.0265, "step": 7146 }, { "epoch": 1.84, "learning_rate": 3.540114010375093e-05, "loss": 0.0344, "step": 7147 }, { "epoch": 1.84, "learning_rate": 3.538753615502329e-05, "loss": 0.0559, "step": 7148 }, { "epoch": 1.84, "learning_rate": 3.5373933388926074e-05, "loss": 0.0242, "step": 7149 }, { "epoch": 1.85, "learning_rate": 3.5360331806560174e-05, "loss": 0.0304, "step": 7150 }, { "epoch": 1.85, "learning_rate": 3.5346731409026453e-05, "loss": 0.0374, "step": 7151 }, { "epoch": 1.85, "learning_rate": 3.533313219742559e-05, "loss": 0.0325, "step": 7152 }, { "epoch": 1.85, "learning_rate": 3.5319534172858234e-05, "loss": 0.0342, "step": 7153 }, { "epoch": 1.85, "learning_rate": 3.5305937336424924e-05, "loss": 0.0394, "step": 7154 }, { "epoch": 1.85, "learning_rate": 3.529234168922605e-05, "loss": 0.0239, "step": 7155 }, { "epoch": 1.85, "learning_rate": 3.527874723236199e-05, "loss": 0.0472, "step": 7156 }, { "epoch": 1.85, "learning_rate": 3.526515396693296e-05, "loss": 0.0301, "step": 7157 }, { "epoch": 1.85, "learning_rate": 3.525156189403911e-05, "loss": 0.031, "step": 7158 }, { "epoch": 1.85, "learning_rate": 3.52379710147805e-05, "loss": 0.0453, "step": 7159 }, { "epoch": 1.85, "learning_rate": 3.522438133025705e-05, "loss": 0.0269, "step": 7160 }, { "epoch": 1.85, "learning_rate": 3.521079284156865e-05, "loss": 0.0452, "step": 7161 }, { "epoch": 1.85, "learning_rate": 3.5197205549815045e-05, "loss": 0.0352, "step": 7162 }, { "epoch": 1.85, "learning_rate": 3.5183619456095866e-05, "loss": 0.0415, "step": 7163 }, { "epoch": 1.85, "learning_rate": 3.517003456151072e-05, "loss": 0.0476, "step": 7164 }, { "epoch": 1.85, "learning_rate": 3.5156450867159044e-05, "loss": 0.054, "step": 7165 }, { "epoch": 1.85, "learning_rate": 3.514286837414023e-05, "loss": 0.0529, "step": 7166 }, { "epoch": 1.85, "learning_rate": 3.512928708355354e-05, "loss": 0.0381, "step": 7167 }, { "epoch": 1.85, "learning_rate": 3.511570699649812e-05, "loss": 0.0377, "step": 7168 }, { "epoch": 1.85, "learning_rate": 3.51021281140731e-05, "loss": 0.0463, "step": 7169 }, { "epoch": 1.85, "learning_rate": 3.508855043737741e-05, "loss": 0.0464, "step": 7170 }, { "epoch": 1.85, "learning_rate": 3.507497396750996e-05, "loss": 0.0303, "step": 7171 }, { "epoch": 1.85, "learning_rate": 3.5061398705569545e-05, "loss": 0.0445, "step": 7172 }, { "epoch": 1.85, "learning_rate": 3.50478246526548e-05, "loss": 0.0394, "step": 7173 }, { "epoch": 1.85, "learning_rate": 3.503425180986438e-05, "loss": 0.0436, "step": 7174 }, { "epoch": 1.85, "learning_rate": 3.502068017829674e-05, "loss": 0.0405, "step": 7175 }, { "epoch": 1.85, "learning_rate": 3.500710975905026e-05, "loss": 0.0253, "step": 7176 }, { "epoch": 1.85, "learning_rate": 3.499354055322327e-05, "loss": 0.0467, "step": 7177 }, { "epoch": 1.85, "learning_rate": 3.497997256191393e-05, "loss": 0.0437, "step": 7178 }, { "epoch": 1.85, "learning_rate": 3.496640578622038e-05, "loss": 0.0452, "step": 7179 }, { "epoch": 1.85, "learning_rate": 3.495284022724059e-05, "loss": 0.038, "step": 7180 }, { "epoch": 1.85, "learning_rate": 3.493927588607244e-05, "loss": 0.029, "step": 7181 }, { "epoch": 1.85, "learning_rate": 3.492571276381379e-05, "loss": 0.0556, "step": 7182 }, { "epoch": 1.85, "learning_rate": 3.491215086156231e-05, "loss": 0.0317, "step": 7183 }, { "epoch": 1.85, "learning_rate": 3.4898590180415596e-05, "loss": 0.0391, "step": 7184 }, { "epoch": 1.85, "learning_rate": 3.4885030721471186e-05, "loss": 0.0516, "step": 7185 }, { "epoch": 1.85, "learning_rate": 3.487147248582644e-05, "loss": 0.0496, "step": 7186 }, { "epoch": 1.85, "learning_rate": 3.485791547457873e-05, "loss": 0.0423, "step": 7187 }, { "epoch": 1.85, "learning_rate": 3.484435968882521e-05, "loss": 0.0301, "step": 7188 }, { "epoch": 1.86, "learning_rate": 3.483080512966302e-05, "loss": 0.0615, "step": 7189 }, { "epoch": 1.86, "learning_rate": 3.4817251798189174e-05, "loss": 0.0317, "step": 7190 }, { "epoch": 1.86, "learning_rate": 3.4803699695500546e-05, "loss": 0.052, "step": 7191 }, { "epoch": 1.86, "learning_rate": 3.4790148822694e-05, "loss": 0.0242, "step": 7192 }, { "epoch": 1.86, "learning_rate": 3.477659918086621e-05, "loss": 0.0337, "step": 7193 }, { "epoch": 1.86, "learning_rate": 3.47630507711138e-05, "loss": 0.0502, "step": 7194 }, { "epoch": 1.86, "learning_rate": 3.47495035945333e-05, "loss": 0.0359, "step": 7195 }, { "epoch": 1.86, "learning_rate": 3.47359576522211e-05, "loss": 0.0521, "step": 7196 }, { "epoch": 1.86, "learning_rate": 3.472241294527352e-05, "loss": 0.0337, "step": 7197 }, { "epoch": 1.86, "learning_rate": 3.4708869474786786e-05, "loss": 0.0339, "step": 7198 }, { "epoch": 1.86, "learning_rate": 3.469532724185699e-05, "loss": 0.0542, "step": 7199 }, { "epoch": 1.86, "learning_rate": 3.4681786247580174e-05, "loss": 0.0354, "step": 7200 }, { "epoch": 1.86, "learning_rate": 3.466824649305222e-05, "loss": 0.0418, "step": 7201 }, { "epoch": 1.86, "learning_rate": 3.465470797936896e-05, "loss": 0.0338, "step": 7202 }, { "epoch": 1.86, "learning_rate": 3.464117070762611e-05, "loss": 0.0334, "step": 7203 }, { "epoch": 1.86, "learning_rate": 3.462763467891925e-05, "loss": 0.0363, "step": 7204 }, { "epoch": 1.86, "learning_rate": 3.461409989434395e-05, "loss": 0.0341, "step": 7205 }, { "epoch": 1.86, "learning_rate": 3.460056635499557e-05, "loss": 0.0425, "step": 7206 }, { "epoch": 1.86, "learning_rate": 3.4587034061969424e-05, "loss": 0.0356, "step": 7207 }, { "epoch": 1.86, "learning_rate": 3.457350301636075e-05, "loss": 0.0397, "step": 7208 }, { "epoch": 1.86, "learning_rate": 3.4559973219264643e-05, "loss": 0.0447, "step": 7209 }, { "epoch": 1.86, "learning_rate": 3.454644467177609e-05, "loss": 0.0478, "step": 7210 }, { "epoch": 1.86, "learning_rate": 3.453291737499001e-05, "loss": 0.0729, "step": 7211 }, { "epoch": 1.86, "learning_rate": 3.451939133000122e-05, "loss": 0.0474, "step": 7212 }, { "epoch": 1.86, "learning_rate": 3.450586653790442e-05, "loss": 0.0274, "step": 7213 }, { "epoch": 1.86, "learning_rate": 3.4492342999794204e-05, "loss": 0.0384, "step": 7214 }, { "epoch": 1.86, "learning_rate": 3.447882071676506e-05, "loss": 0.0504, "step": 7215 }, { "epoch": 1.86, "learning_rate": 3.446529968991142e-05, "loss": 0.0319, "step": 7216 }, { "epoch": 1.86, "learning_rate": 3.445177992032754e-05, "loss": 0.0479, "step": 7217 }, { "epoch": 1.86, "learning_rate": 3.4438261409107656e-05, "loss": 0.0383, "step": 7218 }, { "epoch": 1.86, "learning_rate": 3.442474415734583e-05, "loss": 0.0428, "step": 7219 }, { "epoch": 1.86, "learning_rate": 3.441122816613607e-05, "loss": 0.0361, "step": 7220 }, { "epoch": 1.86, "learning_rate": 3.4397713436572273e-05, "loss": 0.045, "step": 7221 }, { "epoch": 1.86, "learning_rate": 3.43841999697482e-05, "loss": 0.0324, "step": 7222 }, { "epoch": 1.86, "learning_rate": 3.437068776675756e-05, "loss": 0.039, "step": 7223 }, { "epoch": 1.86, "learning_rate": 3.435717682869392e-05, "loss": 0.0354, "step": 7224 }, { "epoch": 1.86, "learning_rate": 3.434366715665077e-05, "loss": 0.0412, "step": 7225 }, { "epoch": 1.86, "learning_rate": 3.433015875172151e-05, "loss": 0.0431, "step": 7226 }, { "epoch": 1.87, "learning_rate": 3.4316651614999366e-05, "loss": 0.0514, "step": 7227 }, { "epoch": 1.87, "learning_rate": 3.430314574757755e-05, "loss": 0.0401, "step": 7228 }, { "epoch": 1.87, "learning_rate": 3.4289641150549105e-05, "loss": 0.0397, "step": 7229 }, { "epoch": 1.87, "learning_rate": 3.4276137825007024e-05, "loss": 0.041, "step": 7230 }, { "epoch": 1.87, "learning_rate": 3.426263577204417e-05, "loss": 0.0324, "step": 7231 }, { "epoch": 1.87, "learning_rate": 3.424913499275328e-05, "loss": 0.0328, "step": 7232 }, { "epoch": 1.87, "learning_rate": 3.423563548822703e-05, "loss": 0.0554, "step": 7233 }, { "epoch": 1.87, "learning_rate": 3.422213725955797e-05, "loss": 0.0361, "step": 7234 }, { "epoch": 1.87, "learning_rate": 3.4208640307838566e-05, "loss": 0.046, "step": 7235 }, { "epoch": 1.87, "learning_rate": 3.419514463416116e-05, "loss": 0.0469, "step": 7236 }, { "epoch": 1.87, "learning_rate": 3.418165023961798e-05, "loss": 0.0589, "step": 7237 }, { "epoch": 1.87, "learning_rate": 3.416815712530118e-05, "loss": 0.0395, "step": 7238 }, { "epoch": 1.87, "learning_rate": 3.4154665292302814e-05, "loss": 0.033, "step": 7239 }, { "epoch": 1.87, "learning_rate": 3.414117474171479e-05, "loss": 0.034, "step": 7240 }, { "epoch": 1.87, "learning_rate": 3.412768547462894e-05, "loss": 0.0321, "step": 7241 }, { "epoch": 1.87, "learning_rate": 3.411419749213701e-05, "loss": 0.0321, "step": 7242 }, { "epoch": 1.87, "learning_rate": 3.4100710795330616e-05, "loss": 0.0344, "step": 7243 }, { "epoch": 1.87, "learning_rate": 3.408722538530128e-05, "loss": 0.0403, "step": 7244 }, { "epoch": 1.87, "learning_rate": 3.4073741263140405e-05, "loss": 0.0425, "step": 7245 }, { "epoch": 1.87, "learning_rate": 3.4060258429939296e-05, "loss": 0.0349, "step": 7246 }, { "epoch": 1.87, "learning_rate": 3.404677688678918e-05, "loss": 0.0359, "step": 7247 }, { "epoch": 1.87, "learning_rate": 3.403329663478114e-05, "loss": 0.0464, "step": 7248 }, { "epoch": 1.87, "learning_rate": 3.40198176750062e-05, "loss": 0.0463, "step": 7249 }, { "epoch": 1.87, "learning_rate": 3.4006340008555225e-05, "loss": 0.036, "step": 7250 }, { "epoch": 1.87, "learning_rate": 3.399286363651901e-05, "loss": 0.035, "step": 7251 }, { "epoch": 1.87, "learning_rate": 3.3979388559988236e-05, "loss": 0.0389, "step": 7252 }, { "epoch": 1.87, "learning_rate": 3.3965914780053496e-05, "loss": 0.0444, "step": 7253 }, { "epoch": 1.87, "learning_rate": 3.3952442297805256e-05, "loss": 0.0558, "step": 7254 }, { "epoch": 1.87, "learning_rate": 3.3938971114333865e-05, "loss": 0.0356, "step": 7255 }, { "epoch": 1.87, "learning_rate": 3.3925501230729615e-05, "loss": 0.0506, "step": 7256 }, { "epoch": 1.87, "learning_rate": 3.391203264808265e-05, "loss": 0.0481, "step": 7257 }, { "epoch": 1.87, "learning_rate": 3.389856536748303e-05, "loss": 0.0413, "step": 7258 }, { "epoch": 1.87, "learning_rate": 3.388509939002068e-05, "loss": 0.0395, "step": 7259 }, { "epoch": 1.87, "learning_rate": 3.3871634716785466e-05, "loss": 0.0522, "step": 7260 }, { "epoch": 1.87, "learning_rate": 3.3858171348867116e-05, "loss": 0.0407, "step": 7261 }, { "epoch": 1.87, "learning_rate": 3.384470928735527e-05, "loss": 0.0425, "step": 7262 }, { "epoch": 1.87, "learning_rate": 3.383124853333943e-05, "loss": 0.0366, "step": 7263 }, { "epoch": 1.87, "learning_rate": 3.381778908790904e-05, "loss": 0.036, "step": 7264 }, { "epoch": 1.87, "learning_rate": 3.3804330952153395e-05, "loss": 0.0479, "step": 7265 }, { "epoch": 1.88, "learning_rate": 3.379087412716172e-05, "loss": 0.0411, "step": 7266 }, { "epoch": 1.88, "learning_rate": 3.37774186140231e-05, "loss": 0.0483, "step": 7267 }, { "epoch": 1.88, "learning_rate": 3.376396441382653e-05, "loss": 0.0387, "step": 7268 }, { "epoch": 1.88, "learning_rate": 3.375051152766091e-05, "loss": 0.0354, "step": 7269 }, { "epoch": 1.88, "learning_rate": 3.3737059956615016e-05, "loss": 0.0262, "step": 7270 }, { "epoch": 1.88, "learning_rate": 3.372360970177754e-05, "loss": 0.0381, "step": 7271 }, { "epoch": 1.88, "learning_rate": 3.371016076423701e-05, "loss": 0.0474, "step": 7272 }, { "epoch": 1.88, "learning_rate": 3.3696713145081925e-05, "loss": 0.0374, "step": 7273 }, { "epoch": 1.88, "learning_rate": 3.368326684540063e-05, "loss": 0.0401, "step": 7274 }, { "epoch": 1.88, "learning_rate": 3.366982186628138e-05, "loss": 0.0568, "step": 7275 }, { "epoch": 1.88, "learning_rate": 3.3656378208812316e-05, "loss": 0.0242, "step": 7276 }, { "epoch": 1.88, "learning_rate": 3.364293587408146e-05, "loss": 0.0205, "step": 7277 }, { "epoch": 1.88, "learning_rate": 3.3629494863176756e-05, "loss": 0.0557, "step": 7278 }, { "epoch": 1.88, "learning_rate": 3.3616055177186026e-05, "loss": 0.055, "step": 7279 }, { "epoch": 1.88, "learning_rate": 3.3602616817196963e-05, "loss": 0.0187, "step": 7280 }, { "epoch": 1.88, "learning_rate": 3.35891797842972e-05, "loss": 0.04, "step": 7281 }, { "epoch": 1.88, "learning_rate": 3.357574407957421e-05, "loss": 0.0359, "step": 7282 }, { "epoch": 1.88, "learning_rate": 3.356230970411541e-05, "loss": 0.0288, "step": 7283 }, { "epoch": 1.88, "learning_rate": 3.354887665900808e-05, "loss": 0.0363, "step": 7284 }, { "epoch": 1.88, "learning_rate": 3.353544494533938e-05, "loss": 0.0284, "step": 7285 }, { "epoch": 1.88, "learning_rate": 3.352201456419639e-05, "loss": 0.0424, "step": 7286 }, { "epoch": 1.88, "learning_rate": 3.350858551666607e-05, "loss": 0.0435, "step": 7287 }, { "epoch": 1.88, "learning_rate": 3.349515780383526e-05, "loss": 0.0308, "step": 7288 }, { "epoch": 1.88, "learning_rate": 3.3481731426790744e-05, "loss": 0.0468, "step": 7289 }, { "epoch": 1.88, "learning_rate": 3.346830638661912e-05, "loss": 0.0371, "step": 7290 }, { "epoch": 1.88, "learning_rate": 3.345488268440693e-05, "loss": 0.033, "step": 7291 }, { "epoch": 1.88, "learning_rate": 3.3441460321240605e-05, "loss": 0.0206, "step": 7292 }, { "epoch": 1.88, "learning_rate": 3.342803929820641e-05, "loss": 0.0598, "step": 7293 }, { "epoch": 1.88, "learning_rate": 3.341461961639062e-05, "loss": 0.057, "step": 7294 }, { "epoch": 1.88, "learning_rate": 3.340120127687928e-05, "loss": 0.0251, "step": 7295 }, { "epoch": 1.88, "learning_rate": 3.3387784280758397e-05, "loss": 0.041, "step": 7296 }, { "epoch": 1.88, "learning_rate": 3.337436862911384e-05, "loss": 0.0283, "step": 7297 }, { "epoch": 1.88, "learning_rate": 3.336095432303138e-05, "loss": 0.0339, "step": 7298 }, { "epoch": 1.88, "learning_rate": 3.3347541363596676e-05, "loss": 0.0359, "step": 7299 }, { "epoch": 1.88, "learning_rate": 3.333412975189528e-05, "loss": 0.0474, "step": 7300 }, { "epoch": 1.88, "learning_rate": 3.332071948901263e-05, "loss": 0.0208, "step": 7301 }, { "epoch": 1.88, "learning_rate": 3.3307310576034066e-05, "loss": 0.0433, "step": 7302 }, { "epoch": 1.88, "learning_rate": 3.3293903014044814e-05, "loss": 0.0377, "step": 7303 }, { "epoch": 1.88, "learning_rate": 3.3280496804129966e-05, "loss": 0.0307, "step": 7304 }, { "epoch": 1.89, "learning_rate": 3.3267091947374565e-05, "loss": 0.0435, "step": 7305 }, { "epoch": 1.89, "learning_rate": 3.325368844486346e-05, "loss": 0.0327, "step": 7306 }, { "epoch": 1.89, "learning_rate": 3.3240286297681485e-05, "loss": 0.0397, "step": 7307 }, { "epoch": 1.89, "learning_rate": 3.3226885506913275e-05, "loss": 0.021, "step": 7308 }, { "epoch": 1.89, "learning_rate": 3.3213486073643414e-05, "loss": 0.031, "step": 7309 }, { "epoch": 1.89, "learning_rate": 3.320008799895636e-05, "loss": 0.0334, "step": 7310 }, { "epoch": 1.89, "learning_rate": 3.318669128393645e-05, "loss": 0.0411, "step": 7311 }, { "epoch": 1.89, "learning_rate": 3.317329592966794e-05, "loss": 0.0343, "step": 7312 }, { "epoch": 1.89, "learning_rate": 3.315990193723493e-05, "loss": 0.0368, "step": 7313 }, { "epoch": 1.89, "learning_rate": 3.314650930772145e-05, "loss": 0.0222, "step": 7314 }, { "epoch": 1.89, "learning_rate": 3.313311804221142e-05, "loss": 0.0442, "step": 7315 }, { "epoch": 1.89, "learning_rate": 3.311972814178859e-05, "loss": 0.0473, "step": 7316 }, { "epoch": 1.89, "learning_rate": 3.31063396075367e-05, "loss": 0.0295, "step": 7317 }, { "epoch": 1.89, "learning_rate": 3.309295244053929e-05, "loss": 0.0379, "step": 7318 }, { "epoch": 1.89, "learning_rate": 3.307956664187982e-05, "loss": 0.0345, "step": 7319 }, { "epoch": 1.89, "learning_rate": 3.3066182212641675e-05, "loss": 0.0437, "step": 7320 }, { "epoch": 1.89, "learning_rate": 3.3052799153908055e-05, "loss": 0.0304, "step": 7321 }, { "epoch": 1.89, "learning_rate": 3.303941746676212e-05, "loss": 0.0249, "step": 7322 }, { "epoch": 1.89, "learning_rate": 3.3026037152286895e-05, "loss": 0.0437, "step": 7323 }, { "epoch": 1.89, "learning_rate": 3.301265821156525e-05, "loss": 0.024, "step": 7324 }, { "epoch": 1.89, "learning_rate": 3.299928064568003e-05, "loss": 0.0249, "step": 7325 }, { "epoch": 1.89, "learning_rate": 3.2985904455713886e-05, "loss": 0.0344, "step": 7326 }, { "epoch": 1.89, "learning_rate": 3.297252964274942e-05, "loss": 0.0575, "step": 7327 }, { "epoch": 1.89, "learning_rate": 3.295915620786908e-05, "loss": 0.034, "step": 7328 }, { "epoch": 1.89, "learning_rate": 3.29457841521552e-05, "loss": 0.0498, "step": 7329 }, { "epoch": 1.89, "learning_rate": 3.293241347669007e-05, "loss": 0.0265, "step": 7330 }, { "epoch": 1.89, "learning_rate": 3.291904418255579e-05, "loss": 0.031, "step": 7331 }, { "epoch": 1.89, "learning_rate": 3.290567627083435e-05, "loss": 0.0463, "step": 7332 }, { "epoch": 1.89, "learning_rate": 3.2892309742607705e-05, "loss": 0.039, "step": 7333 }, { "epoch": 1.89, "learning_rate": 3.2878944598957605e-05, "loss": 0.0232, "step": 7334 }, { "epoch": 1.89, "learning_rate": 3.286558084096577e-05, "loss": 0.0282, "step": 7335 }, { "epoch": 1.89, "learning_rate": 3.285221846971376e-05, "loss": 0.0383, "step": 7336 }, { "epoch": 1.89, "learning_rate": 3.2838857486282995e-05, "loss": 0.0343, "step": 7337 }, { "epoch": 1.89, "learning_rate": 3.282549789175486e-05, "loss": 0.0324, "step": 7338 }, { "epoch": 1.89, "learning_rate": 3.281213968721057e-05, "loss": 0.0311, "step": 7339 }, { "epoch": 1.89, "learning_rate": 3.279878287373125e-05, "loss": 0.0495, "step": 7340 }, { "epoch": 1.89, "learning_rate": 3.278542745239791e-05, "loss": 0.0405, "step": 7341 }, { "epoch": 1.89, "learning_rate": 3.2772073424291424e-05, "loss": 0.0452, "step": 7342 }, { "epoch": 1.89, "learning_rate": 3.27587207904926e-05, "loss": 0.0405, "step": 7343 }, { "epoch": 1.9, "learning_rate": 3.2745369552082104e-05, "loss": 0.0515, "step": 7344 }, { "epoch": 1.9, "learning_rate": 3.273201971014046e-05, "loss": 0.0339, "step": 7345 }, { "epoch": 1.9, "learning_rate": 3.2718671265748154e-05, "loss": 0.0264, "step": 7346 }, { "epoch": 1.9, "learning_rate": 3.270532421998547e-05, "loss": 0.0373, "step": 7347 }, { "epoch": 1.9, "learning_rate": 3.269197857393268e-05, "loss": 0.0372, "step": 7348 }, { "epoch": 1.9, "learning_rate": 3.2678634328669846e-05, "loss": 0.0293, "step": 7349 }, { "epoch": 1.9, "learning_rate": 3.266529148527695e-05, "loss": 0.0307, "step": 7350 }, { "epoch": 1.9, "learning_rate": 3.26519500448339e-05, "loss": 0.041, "step": 7351 }, { "epoch": 1.9, "learning_rate": 3.263861000842043e-05, "loss": 0.0343, "step": 7352 }, { "epoch": 1.9, "learning_rate": 3.262527137711623e-05, "loss": 0.0413, "step": 7353 }, { "epoch": 1.9, "learning_rate": 3.2611934152000806e-05, "loss": 0.0294, "step": 7354 }, { "epoch": 1.9, "learning_rate": 3.259859833415356e-05, "loss": 0.0348, "step": 7355 }, { "epoch": 1.9, "learning_rate": 3.258526392465385e-05, "loss": 0.0479, "step": 7356 }, { "epoch": 1.9, "learning_rate": 3.2571930924580834e-05, "loss": 0.0415, "step": 7357 }, { "epoch": 1.9, "learning_rate": 3.2558599335013587e-05, "loss": 0.0387, "step": 7358 }, { "epoch": 1.9, "learning_rate": 3.25452691570311e-05, "loss": 0.0348, "step": 7359 }, { "epoch": 1.9, "learning_rate": 3.253194039171219e-05, "loss": 0.0398, "step": 7360 }, { "epoch": 1.9, "learning_rate": 3.251861304013564e-05, "loss": 0.0327, "step": 7361 }, { "epoch": 1.9, "learning_rate": 3.250528710338005e-05, "loss": 0.0417, "step": 7362 }, { "epoch": 1.9, "learning_rate": 3.249196258252391e-05, "loss": 0.039, "step": 7363 }, { "epoch": 1.9, "learning_rate": 3.247863947864563e-05, "loss": 0.0533, "step": 7364 }, { "epoch": 1.9, "learning_rate": 3.246531779282348e-05, "loss": 0.04, "step": 7365 }, { "epoch": 1.9, "learning_rate": 3.245199752613565e-05, "loss": 0.0399, "step": 7366 }, { "epoch": 1.9, "learning_rate": 3.243867867966017e-05, "loss": 0.0339, "step": 7367 }, { "epoch": 1.9, "learning_rate": 3.2425361254474965e-05, "loss": 0.0465, "step": 7368 }, { "epoch": 1.9, "learning_rate": 3.241204525165788e-05, "loss": 0.0433, "step": 7369 }, { "epoch": 1.9, "learning_rate": 3.2398730672286595e-05, "loss": 0.0389, "step": 7370 }, { "epoch": 1.9, "learning_rate": 3.238541751743868e-05, "loss": 0.0415, "step": 7371 }, { "epoch": 1.9, "learning_rate": 3.2372105788191664e-05, "loss": 0.0413, "step": 7372 }, { "epoch": 1.9, "learning_rate": 3.235879548562286e-05, "loss": 0.0513, "step": 7373 }, { "epoch": 1.9, "learning_rate": 3.234548661080954e-05, "loss": 0.0428, "step": 7374 }, { "epoch": 1.9, "learning_rate": 3.233217916482882e-05, "loss": 0.0451, "step": 7375 }, { "epoch": 1.9, "learning_rate": 3.231887314875769e-05, "loss": 0.0278, "step": 7376 }, { "epoch": 1.9, "learning_rate": 3.230556856367308e-05, "loss": 0.0342, "step": 7377 }, { "epoch": 1.9, "learning_rate": 3.2292265410651744e-05, "loss": 0.0363, "step": 7378 }, { "epoch": 1.9, "learning_rate": 3.227896369077037e-05, "loss": 0.0369, "step": 7379 }, { "epoch": 1.9, "learning_rate": 3.22656634051055e-05, "loss": 0.0336, "step": 7380 }, { "epoch": 1.9, "learning_rate": 3.225236455473353e-05, "loss": 0.0228, "step": 7381 }, { "epoch": 1.91, "learning_rate": 3.223906714073083e-05, "loss": 0.0389, "step": 7382 }, { "epoch": 1.91, "learning_rate": 3.2225771164173545e-05, "loss": 0.0405, "step": 7383 }, { "epoch": 1.91, "learning_rate": 3.221247662613782e-05, "loss": 0.0439, "step": 7384 }, { "epoch": 1.91, "learning_rate": 3.2199183527699576e-05, "loss": 0.0265, "step": 7385 }, { "epoch": 1.91, "learning_rate": 3.218589186993467e-05, "loss": 0.0468, "step": 7386 }, { "epoch": 1.91, "learning_rate": 3.217260165391885e-05, "loss": 0.0355, "step": 7387 }, { "epoch": 1.91, "learning_rate": 3.215931288072773e-05, "loss": 0.0461, "step": 7388 }, { "epoch": 1.91, "learning_rate": 3.2146025551436774e-05, "loss": 0.03, "step": 7389 }, { "epoch": 1.91, "learning_rate": 3.213273966712143e-05, "loss": 0.0369, "step": 7390 }, { "epoch": 1.91, "learning_rate": 3.211945522885691e-05, "loss": 0.0448, "step": 7391 }, { "epoch": 1.91, "learning_rate": 3.21061722377184e-05, "loss": 0.0414, "step": 7392 }, { "epoch": 1.91, "learning_rate": 3.209289069478091e-05, "loss": 0.051, "step": 7393 }, { "epoch": 1.91, "learning_rate": 3.2079610601119345e-05, "loss": 0.0379, "step": 7394 }, { "epoch": 1.91, "learning_rate": 3.2066331957808546e-05, "loss": 0.0605, "step": 7395 }, { "epoch": 1.91, "learning_rate": 3.205305476592313e-05, "loss": 0.04, "step": 7396 }, { "epoch": 1.91, "learning_rate": 3.203977902653773e-05, "loss": 0.0416, "step": 7397 }, { "epoch": 1.91, "learning_rate": 3.202650474072676e-05, "loss": 0.0305, "step": 7398 }, { "epoch": 1.91, "learning_rate": 3.201323190956451e-05, "loss": 0.032, "step": 7399 }, { "epoch": 1.91, "learning_rate": 3.1999960534125264e-05, "loss": 0.0407, "step": 7400 }, { "epoch": 1.91, "learning_rate": 3.198669061548306e-05, "loss": 0.0235, "step": 7401 }, { "epoch": 1.91, "learning_rate": 3.197342215471188e-05, "loss": 0.0453, "step": 7402 }, { "epoch": 1.91, "learning_rate": 3.196015515288559e-05, "loss": 0.0336, "step": 7403 }, { "epoch": 1.91, "learning_rate": 3.1946889611077914e-05, "loss": 0.0322, "step": 7404 }, { "epoch": 1.91, "learning_rate": 3.193362553036251e-05, "loss": 0.0317, "step": 7405 }, { "epoch": 1.91, "learning_rate": 3.192036291181284e-05, "loss": 0.0297, "step": 7406 }, { "epoch": 1.91, "learning_rate": 3.190710175650229e-05, "loss": 0.0366, "step": 7407 }, { "epoch": 1.91, "learning_rate": 3.189384206550415e-05, "loss": 0.0454, "step": 7408 }, { "epoch": 1.91, "learning_rate": 3.188058383989154e-05, "loss": 0.0403, "step": 7409 }, { "epoch": 1.91, "learning_rate": 3.186732708073752e-05, "loss": 0.0269, "step": 7410 }, { "epoch": 1.91, "learning_rate": 3.1854071789114973e-05, "loss": 0.0335, "step": 7411 }, { "epoch": 1.91, "learning_rate": 3.1840817966096684e-05, "loss": 0.0288, "step": 7412 }, { "epoch": 1.91, "learning_rate": 3.182756561275535e-05, "loss": 0.037, "step": 7413 }, { "epoch": 1.91, "learning_rate": 3.1814314730163505e-05, "loss": 0.0313, "step": 7414 }, { "epoch": 1.91, "learning_rate": 3.1801065319393573e-05, "loss": 0.0264, "step": 7415 }, { "epoch": 1.91, "learning_rate": 3.178781738151791e-05, "loss": 0.0452, "step": 7416 }, { "epoch": 1.91, "learning_rate": 3.177457091760865e-05, "loss": 0.0421, "step": 7417 }, { "epoch": 1.91, "learning_rate": 3.176132592873794e-05, "loss": 0.0237, "step": 7418 }, { "epoch": 1.91, "learning_rate": 3.174808241597769e-05, "loss": 0.0388, "step": 7419 }, { "epoch": 1.91, "learning_rate": 3.173484038039972e-05, "loss": 0.0295, "step": 7420 }, { "epoch": 1.92, "learning_rate": 3.172159982307581e-05, "loss": 0.0341, "step": 7421 }, { "epoch": 1.92, "learning_rate": 3.170836074507749e-05, "loss": 0.0245, "step": 7422 }, { "epoch": 1.92, "learning_rate": 3.1695123147476294e-05, "loss": 0.0314, "step": 7423 }, { "epoch": 1.92, "learning_rate": 3.168188703134356e-05, "loss": 0.0495, "step": 7424 }, { "epoch": 1.92, "learning_rate": 3.1668652397750496e-05, "loss": 0.0446, "step": 7425 }, { "epoch": 1.92, "learning_rate": 3.165541924776828e-05, "loss": 0.0374, "step": 7426 }, { "epoch": 1.92, "learning_rate": 3.164218758246786e-05, "loss": 0.0418, "step": 7427 }, { "epoch": 1.92, "learning_rate": 3.162895740292013e-05, "loss": 0.0274, "step": 7428 }, { "epoch": 1.92, "learning_rate": 3.1615728710195865e-05, "loss": 0.0387, "step": 7429 }, { "epoch": 1.92, "learning_rate": 3.1602501505365664e-05, "loss": 0.0628, "step": 7430 }, { "epoch": 1.92, "learning_rate": 3.1589275789500095e-05, "loss": 0.0355, "step": 7431 }, { "epoch": 1.92, "learning_rate": 3.157605156366953e-05, "loss": 0.0332, "step": 7432 }, { "epoch": 1.92, "learning_rate": 3.156282882894421e-05, "loss": 0.0335, "step": 7433 }, { "epoch": 1.92, "learning_rate": 3.1549607586394356e-05, "loss": 0.0288, "step": 7434 }, { "epoch": 1.92, "learning_rate": 3.153638783708995e-05, "loss": 0.0506, "step": 7435 }, { "epoch": 1.92, "learning_rate": 3.152316958210094e-05, "loss": 0.0512, "step": 7436 }, { "epoch": 1.92, "learning_rate": 3.150995282249711e-05, "loss": 0.0311, "step": 7437 }, { "epoch": 1.92, "learning_rate": 3.149673755934811e-05, "loss": 0.04, "step": 7438 }, { "epoch": 1.92, "learning_rate": 3.148352379372353e-05, "loss": 0.0408, "step": 7439 }, { "epoch": 1.92, "learning_rate": 3.147031152669277e-05, "loss": 0.0354, "step": 7440 }, { "epoch": 1.92, "learning_rate": 3.145710075932513e-05, "loss": 0.0246, "step": 7441 }, { "epoch": 1.92, "learning_rate": 3.1443891492689834e-05, "loss": 0.0328, "step": 7442 }, { "epoch": 1.92, "learning_rate": 3.1430683727855903e-05, "loss": 0.037, "step": 7443 }, { "epoch": 1.92, "learning_rate": 3.1417477465892325e-05, "loss": 0.0391, "step": 7444 }, { "epoch": 1.92, "learning_rate": 3.1404272707867904e-05, "loss": 0.0383, "step": 7445 }, { "epoch": 1.92, "learning_rate": 3.139106945485132e-05, "loss": 0.04, "step": 7446 }, { "epoch": 1.92, "learning_rate": 3.137786770791119e-05, "loss": 0.0338, "step": 7447 }, { "epoch": 1.92, "learning_rate": 3.136466746811592e-05, "loss": 0.0442, "step": 7448 }, { "epoch": 1.92, "learning_rate": 3.1351468736533907e-05, "loss": 0.0331, "step": 7449 }, { "epoch": 1.92, "learning_rate": 3.133827151423333e-05, "loss": 0.0413, "step": 7450 }, { "epoch": 1.92, "learning_rate": 3.1325075802282266e-05, "loss": 0.0475, "step": 7451 }, { "epoch": 1.92, "learning_rate": 3.1311881601748715e-05, "loss": 0.0415, "step": 7452 }, { "epoch": 1.92, "learning_rate": 3.129868891370051e-05, "loss": 0.029, "step": 7453 }, { "epoch": 1.92, "learning_rate": 3.128549773920535e-05, "loss": 0.0462, "step": 7454 }, { "epoch": 1.92, "learning_rate": 3.127230807933088e-05, "loss": 0.0494, "step": 7455 }, { "epoch": 1.92, "learning_rate": 3.1259119935144546e-05, "loss": 0.0532, "step": 7456 }, { "epoch": 1.92, "learning_rate": 3.124593330771373e-05, "loss": 0.0419, "step": 7457 }, { "epoch": 1.92, "learning_rate": 3.1232748198105646e-05, "loss": 0.0519, "step": 7458 }, { "epoch": 1.92, "learning_rate": 3.121956460738739e-05, "loss": 0.0421, "step": 7459 }, { "epoch": 1.93, "learning_rate": 3.120638253662599e-05, "loss": 0.0374, "step": 7460 }, { "epoch": 1.93, "learning_rate": 3.119320198688827e-05, "loss": 0.0285, "step": 7461 }, { "epoch": 1.93, "learning_rate": 3.118002295924101e-05, "loss": 0.0368, "step": 7462 }, { "epoch": 1.93, "learning_rate": 3.116684545475081e-05, "loss": 0.031, "step": 7463 }, { "epoch": 1.93, "learning_rate": 3.115366947448414e-05, "loss": 0.0403, "step": 7464 }, { "epoch": 1.93, "learning_rate": 3.1140495019507424e-05, "loss": 0.0394, "step": 7465 }, { "epoch": 1.93, "learning_rate": 3.112732209088687e-05, "loss": 0.0314, "step": 7466 }, { "epoch": 1.93, "learning_rate": 3.11141506896886e-05, "loss": 0.0331, "step": 7467 }, { "epoch": 1.93, "learning_rate": 3.1100980816978644e-05, "loss": 0.0357, "step": 7468 }, { "epoch": 1.93, "learning_rate": 3.108781247382285e-05, "loss": 0.0344, "step": 7469 }, { "epoch": 1.93, "learning_rate": 3.1074645661286996e-05, "loss": 0.0349, "step": 7470 }, { "epoch": 1.93, "learning_rate": 3.106148038043671e-05, "loss": 0.0432, "step": 7471 }, { "epoch": 1.93, "learning_rate": 3.104831663233746e-05, "loss": 0.0343, "step": 7472 }, { "epoch": 1.93, "learning_rate": 3.103515441805468e-05, "loss": 0.0516, "step": 7473 }, { "epoch": 1.93, "learning_rate": 3.102199373865358e-05, "loss": 0.0492, "step": 7474 }, { "epoch": 1.93, "learning_rate": 3.1008834595199346e-05, "loss": 0.0494, "step": 7475 }, { "epoch": 1.93, "learning_rate": 3.099567698875694e-05, "loss": 0.0265, "step": 7476 }, { "epoch": 1.93, "learning_rate": 3.098252092039126e-05, "loss": 0.0623, "step": 7477 }, { "epoch": 1.93, "learning_rate": 3.096936639116708e-05, "loss": 0.0363, "step": 7478 }, { "epoch": 1.93, "learning_rate": 3.0956213402149026e-05, "loss": 0.051, "step": 7479 }, { "epoch": 1.93, "learning_rate": 3.0943061954401595e-05, "loss": 0.0407, "step": 7480 }, { "epoch": 1.93, "learning_rate": 3.09299120489892e-05, "loss": 0.0462, "step": 7481 }, { "epoch": 1.93, "learning_rate": 3.091676368697607e-05, "loss": 0.0328, "step": 7482 }, { "epoch": 1.93, "learning_rate": 3.0903616869426374e-05, "loss": 0.0358, "step": 7483 }, { "epoch": 1.93, "learning_rate": 3.089047159740411e-05, "loss": 0.0649, "step": 7484 }, { "epoch": 1.93, "learning_rate": 3.087732787197315e-05, "loss": 0.0488, "step": 7485 }, { "epoch": 1.93, "learning_rate": 3.086418569419728e-05, "loss": 0.0394, "step": 7486 }, { "epoch": 1.93, "learning_rate": 3.08510450651401e-05, "loss": 0.0439, "step": 7487 }, { "epoch": 1.93, "learning_rate": 3.0837905985865176e-05, "loss": 0.0395, "step": 7488 }, { "epoch": 1.93, "learning_rate": 3.0824768457435856e-05, "loss": 0.0406, "step": 7489 }, { "epoch": 1.93, "learning_rate": 3.081163248091538e-05, "loss": 0.0467, "step": 7490 }, { "epoch": 1.93, "learning_rate": 3.0798498057366934e-05, "loss": 0.0367, "step": 7491 }, { "epoch": 1.93, "learning_rate": 3.0785365187853486e-05, "loss": 0.0556, "step": 7492 }, { "epoch": 1.93, "learning_rate": 3.0772233873437926e-05, "loss": 0.0467, "step": 7493 }, { "epoch": 1.93, "learning_rate": 3.075910411518302e-05, "loss": 0.0433, "step": 7494 }, { "epoch": 1.93, "learning_rate": 3.074597591415139e-05, "loss": 0.0491, "step": 7495 }, { "epoch": 1.93, "learning_rate": 3.073284927140555e-05, "loss": 0.0297, "step": 7496 }, { "epoch": 1.93, "learning_rate": 3.071972418800788e-05, "loss": 0.0379, "step": 7497 }, { "epoch": 1.93, "learning_rate": 3.070660066502061e-05, "loss": 0.0388, "step": 7498 }, { "epoch": 1.94, "learning_rate": 3.06934787035059e-05, "loss": 0.041, "step": 7499 }, { "epoch": 1.94, "learning_rate": 3.06803583045257e-05, "loss": 0.0279, "step": 7500 }, { "epoch": 1.94, "learning_rate": 3.066723946914195e-05, "loss": 0.0405, "step": 7501 }, { "epoch": 1.94, "learning_rate": 3.065412219841635e-05, "loss": 0.0406, "step": 7502 }, { "epoch": 1.94, "learning_rate": 3.064100649341052e-05, "loss": 0.0391, "step": 7503 }, { "epoch": 1.94, "learning_rate": 3.062789235518597e-05, "loss": 0.0476, "step": 7504 }, { "epoch": 1.94, "learning_rate": 3.061477978480407e-05, "loss": 0.0375, "step": 7505 }, { "epoch": 1.94, "learning_rate": 3.060166878332603e-05, "loss": 0.0347, "step": 7506 }, { "epoch": 1.94, "learning_rate": 3.058855935181301e-05, "loss": 0.0451, "step": 7507 }, { "epoch": 1.94, "learning_rate": 3.057545149132593e-05, "loss": 0.0384, "step": 7508 }, { "epoch": 1.94, "learning_rate": 3.056234520292572e-05, "loss": 0.0387, "step": 7509 }, { "epoch": 1.94, "learning_rate": 3.054924048767308e-05, "loss": 0.0248, "step": 7510 }, { "epoch": 1.94, "learning_rate": 3.053613734662858e-05, "loss": 0.0342, "step": 7511 }, { "epoch": 1.94, "learning_rate": 3.0523035780852753e-05, "loss": 0.0344, "step": 7512 }, { "epoch": 1.94, "learning_rate": 3.0509935791405908e-05, "loss": 0.0268, "step": 7513 }, { "epoch": 1.94, "learning_rate": 3.0496837379348293e-05, "loss": 0.0522, "step": 7514 }, { "epoch": 1.94, "learning_rate": 3.048374054573999e-05, "loss": 0.0513, "step": 7515 }, { "epoch": 1.94, "learning_rate": 3.0470645291640954e-05, "loss": 0.0283, "step": 7516 }, { "epoch": 1.94, "learning_rate": 3.0457551618111045e-05, "loss": 0.0349, "step": 7517 }, { "epoch": 1.94, "learning_rate": 3.0444459526209966e-05, "loss": 0.0351, "step": 7518 }, { "epoch": 1.94, "learning_rate": 3.0431369016997275e-05, "loss": 0.0354, "step": 7519 }, { "epoch": 1.94, "learning_rate": 3.0418280091532462e-05, "loss": 0.0396, "step": 7520 }, { "epoch": 1.94, "learning_rate": 3.0405192750874822e-05, "loss": 0.0265, "step": 7521 }, { "epoch": 1.94, "learning_rate": 3.039210699608359e-05, "loss": 0.0399, "step": 7522 }, { "epoch": 1.94, "learning_rate": 3.0379022828217806e-05, "loss": 0.0311, "step": 7523 }, { "epoch": 1.94, "learning_rate": 3.0365940248336405e-05, "loss": 0.0393, "step": 7524 }, { "epoch": 1.94, "learning_rate": 3.035285925749823e-05, "loss": 0.0384, "step": 7525 }, { "epoch": 1.94, "learning_rate": 3.0339779856761923e-05, "loss": 0.0299, "step": 7526 }, { "epoch": 1.94, "learning_rate": 3.032670204718609e-05, "loss": 0.0467, "step": 7527 }, { "epoch": 1.94, "learning_rate": 3.0313625829829128e-05, "loss": 0.0425, "step": 7528 }, { "epoch": 1.94, "learning_rate": 3.0300551205749316e-05, "loss": 0.0269, "step": 7529 }, { "epoch": 1.94, "learning_rate": 3.028747817600486e-05, "loss": 0.0284, "step": 7530 }, { "epoch": 1.94, "learning_rate": 3.0274406741653767e-05, "loss": 0.0468, "step": 7531 }, { "epoch": 1.94, "learning_rate": 3.026133690375398e-05, "loss": 0.0524, "step": 7532 }, { "epoch": 1.94, "learning_rate": 3.0248268663363267e-05, "loss": 0.0535, "step": 7533 }, { "epoch": 1.94, "learning_rate": 3.0235202021539255e-05, "loss": 0.0378, "step": 7534 }, { "epoch": 1.94, "learning_rate": 3.0222136979339505e-05, "loss": 0.0497, "step": 7535 }, { "epoch": 1.94, "learning_rate": 3.0209073537821393e-05, "loss": 0.0537, "step": 7536 }, { "epoch": 1.95, "learning_rate": 3.019601169804216e-05, "loss": 0.038, "step": 7537 }, { "epoch": 1.95, "learning_rate": 3.0182951461058985e-05, "loss": 0.0273, "step": 7538 }, { "epoch": 1.95, "learning_rate": 3.0169892827928826e-05, "loss": 0.0405, "step": 7539 }, { "epoch": 1.95, "learning_rate": 3.0156835799708605e-05, "loss": 0.0501, "step": 7540 }, { "epoch": 1.95, "learning_rate": 3.014378037745503e-05, "loss": 0.0515, "step": 7541 }, { "epoch": 1.95, "learning_rate": 3.0130726562224716e-05, "loss": 0.05, "step": 7542 }, { "epoch": 1.95, "learning_rate": 3.0117674355074175e-05, "loss": 0.0373, "step": 7543 }, { "epoch": 1.95, "learning_rate": 3.0104623757059725e-05, "loss": 0.048, "step": 7544 }, { "epoch": 1.95, "learning_rate": 3.009157476923763e-05, "loss": 0.0271, "step": 7545 }, { "epoch": 1.95, "learning_rate": 3.0078527392663956e-05, "loss": 0.0385, "step": 7546 }, { "epoch": 1.95, "learning_rate": 3.0065481628394658e-05, "loss": 0.0416, "step": 7547 }, { "epoch": 1.95, "learning_rate": 3.0052437477485606e-05, "loss": 0.0341, "step": 7548 }, { "epoch": 1.95, "learning_rate": 3.003939494099248e-05, "loss": 0.0318, "step": 7549 }, { "epoch": 1.95, "learning_rate": 3.0026354019970826e-05, "loss": 0.0407, "step": 7550 }, { "epoch": 1.95, "learning_rate": 3.0013314715476128e-05, "loss": 0.0436, "step": 7551 }, { "epoch": 1.95, "learning_rate": 3.000027702856366e-05, "loss": 0.0489, "step": 7552 }, { "epoch": 1.95, "learning_rate": 2.9987240960288644e-05, "loss": 0.0471, "step": 7553 }, { "epoch": 1.95, "learning_rate": 2.9974206511706103e-05, "loss": 0.0406, "step": 7554 }, { "epoch": 1.95, "learning_rate": 2.9961173683870925e-05, "loss": 0.0379, "step": 7555 }, { "epoch": 1.95, "learning_rate": 2.9948142477837954e-05, "loss": 0.0395, "step": 7556 }, { "epoch": 1.95, "learning_rate": 2.9935112894661786e-05, "loss": 0.0453, "step": 7557 }, { "epoch": 1.95, "learning_rate": 2.9922084935397005e-05, "loss": 0.0392, "step": 7558 }, { "epoch": 1.95, "learning_rate": 2.990905860109796e-05, "loss": 0.0387, "step": 7559 }, { "epoch": 1.95, "learning_rate": 2.989603389281891e-05, "loss": 0.0414, "step": 7560 }, { "epoch": 1.95, "learning_rate": 2.9883010811614016e-05, "loss": 0.036, "step": 7561 }, { "epoch": 1.95, "learning_rate": 2.9869989358537253e-05, "loss": 0.0351, "step": 7562 }, { "epoch": 1.95, "learning_rate": 2.9856969534642464e-05, "loss": 0.0548, "step": 7563 }, { "epoch": 1.95, "learning_rate": 2.9843951340983433e-05, "loss": 0.0326, "step": 7564 }, { "epoch": 1.95, "learning_rate": 2.9830934778613707e-05, "loss": 0.035, "step": 7565 }, { "epoch": 1.95, "learning_rate": 2.9817919848586805e-05, "loss": 0.0418, "step": 7566 }, { "epoch": 1.95, "learning_rate": 2.9804906551956047e-05, "loss": 0.0384, "step": 7567 }, { "epoch": 1.95, "learning_rate": 2.9791894889774608e-05, "loss": 0.0414, "step": 7568 }, { "epoch": 1.95, "learning_rate": 2.9778884863095613e-05, "loss": 0.0358, "step": 7569 }, { "epoch": 1.95, "learning_rate": 2.976587647297195e-05, "loss": 0.0518, "step": 7570 }, { "epoch": 1.95, "learning_rate": 2.9752869720456473e-05, "loss": 0.0498, "step": 7571 }, { "epoch": 1.95, "learning_rate": 2.9739864606601837e-05, "loss": 0.0376, "step": 7572 }, { "epoch": 1.95, "learning_rate": 2.972686113246056e-05, "loss": 0.0336, "step": 7573 }, { "epoch": 1.95, "learning_rate": 2.97138592990851e-05, "loss": 0.0402, "step": 7574 }, { "epoch": 1.95, "learning_rate": 2.9700859107527712e-05, "loss": 0.0467, "step": 7575 }, { "epoch": 1.96, "learning_rate": 2.968786055884052e-05, "loss": 0.0294, "step": 7576 }, { "epoch": 1.96, "learning_rate": 2.9674863654075574e-05, "loss": 0.0396, "step": 7577 }, { "epoch": 1.96, "learning_rate": 2.9661868394284713e-05, "loss": 0.0332, "step": 7578 }, { "epoch": 1.96, "learning_rate": 2.964887478051972e-05, "loss": 0.0427, "step": 7579 }, { "epoch": 1.96, "learning_rate": 2.9635882813832193e-05, "loss": 0.0456, "step": 7580 }, { "epoch": 1.96, "learning_rate": 2.962289249527359e-05, "loss": 0.03, "step": 7581 }, { "epoch": 1.96, "learning_rate": 2.960990382589529e-05, "loss": 0.0388, "step": 7582 }, { "epoch": 1.96, "learning_rate": 2.9596916806748465e-05, "loss": 0.026, "step": 7583 }, { "epoch": 1.96, "learning_rate": 2.9583931438884238e-05, "loss": 0.0344, "step": 7584 }, { "epoch": 1.96, "learning_rate": 2.9570947723353537e-05, "loss": 0.0449, "step": 7585 }, { "epoch": 1.96, "learning_rate": 2.9557965661207144e-05, "loss": 0.0437, "step": 7586 }, { "epoch": 1.96, "learning_rate": 2.9544985253495783e-05, "loss": 0.0271, "step": 7587 }, { "epoch": 1.96, "learning_rate": 2.9532006501269972e-05, "loss": 0.0279, "step": 7588 }, { "epoch": 1.96, "learning_rate": 2.95190294055801e-05, "loss": 0.029, "step": 7589 }, { "epoch": 1.96, "learning_rate": 2.9506053967476492e-05, "loss": 0.0286, "step": 7590 }, { "epoch": 1.96, "learning_rate": 2.949308018800924e-05, "loss": 0.0385, "step": 7591 }, { "epoch": 1.96, "learning_rate": 2.9480108068228397e-05, "loss": 0.0405, "step": 7592 }, { "epoch": 1.96, "learning_rate": 2.9467137609183804e-05, "loss": 0.022, "step": 7593 }, { "epoch": 1.96, "learning_rate": 2.9454168811925186e-05, "loss": 0.0247, "step": 7594 }, { "epoch": 1.96, "learning_rate": 2.9441201677502194e-05, "loss": 0.0574, "step": 7595 }, { "epoch": 1.96, "learning_rate": 2.9428236206964243e-05, "loss": 0.0308, "step": 7596 }, { "epoch": 1.96, "learning_rate": 2.941527240136072e-05, "loss": 0.0449, "step": 7597 }, { "epoch": 1.96, "learning_rate": 2.94023102617408e-05, "loss": 0.0416, "step": 7598 }, { "epoch": 1.96, "learning_rate": 2.9389349789153526e-05, "loss": 0.0316, "step": 7599 }, { "epoch": 1.96, "learning_rate": 2.937639098464786e-05, "loss": 0.0604, "step": 7600 }, { "epoch": 1.96, "learning_rate": 2.936343384927259e-05, "loss": 0.0368, "step": 7601 }, { "epoch": 1.96, "learning_rate": 2.9350478384076363e-05, "loss": 0.0349, "step": 7602 }, { "epoch": 1.96, "learning_rate": 2.933752459010772e-05, "loss": 0.0435, "step": 7603 }, { "epoch": 1.96, "learning_rate": 2.9324572468415024e-05, "loss": 0.0367, "step": 7604 }, { "epoch": 1.96, "learning_rate": 2.9311622020046565e-05, "loss": 0.031, "step": 7605 }, { "epoch": 1.96, "learning_rate": 2.9298673246050444e-05, "loss": 0.0469, "step": 7606 }, { "epoch": 1.96, "learning_rate": 2.9285726147474624e-05, "loss": 0.0455, "step": 7607 }, { "epoch": 1.96, "learning_rate": 2.9272780725366995e-05, "loss": 0.0525, "step": 7608 }, { "epoch": 1.96, "learning_rate": 2.9259836980775224e-05, "loss": 0.0383, "step": 7609 }, { "epoch": 1.96, "learning_rate": 2.9246894914746926e-05, "loss": 0.0444, "step": 7610 }, { "epoch": 1.96, "learning_rate": 2.9233954528329526e-05, "loss": 0.0426, "step": 7611 }, { "epoch": 1.96, "learning_rate": 2.922101582257031e-05, "loss": 0.0515, "step": 7612 }, { "epoch": 1.96, "learning_rate": 2.920807879851648e-05, "loss": 0.0269, "step": 7613 }, { "epoch": 1.96, "learning_rate": 2.9195143457215045e-05, "loss": 0.051, "step": 7614 }, { "epoch": 1.97, "learning_rate": 2.9182209799712877e-05, "loss": 0.0386, "step": 7615 }, { "epoch": 1.97, "learning_rate": 2.9169277827056803e-05, "loss": 0.0315, "step": 7616 }, { "epoch": 1.97, "learning_rate": 2.915634754029338e-05, "loss": 0.0251, "step": 7617 }, { "epoch": 1.97, "learning_rate": 2.9143418940469147e-05, "loss": 0.0464, "step": 7618 }, { "epoch": 1.97, "learning_rate": 2.913049202863043e-05, "loss": 0.0503, "step": 7619 }, { "epoch": 1.97, "learning_rate": 2.9117566805823416e-05, "loss": 0.0346, "step": 7620 }, { "epoch": 1.97, "learning_rate": 2.9104643273094234e-05, "loss": 0.0378, "step": 7621 }, { "epoch": 1.97, "learning_rate": 2.9091721431488784e-05, "loss": 0.0413, "step": 7622 }, { "epoch": 1.97, "learning_rate": 2.907880128205289e-05, "loss": 0.0334, "step": 7623 }, { "epoch": 1.97, "learning_rate": 2.9065882825832224e-05, "loss": 0.0384, "step": 7624 }, { "epoch": 1.97, "learning_rate": 2.9052966063872283e-05, "loss": 0.0336, "step": 7625 }, { "epoch": 1.97, "learning_rate": 2.90400509972185e-05, "loss": 0.0428, "step": 7626 }, { "epoch": 1.97, "learning_rate": 2.9027137626916108e-05, "loss": 0.0251, "step": 7627 }, { "epoch": 1.97, "learning_rate": 2.901422595401021e-05, "loss": 0.0399, "step": 7628 }, { "epoch": 1.97, "learning_rate": 2.9001315979545818e-05, "loss": 0.0353, "step": 7629 }, { "epoch": 1.97, "learning_rate": 2.8988407704567743e-05, "loss": 0.0524, "step": 7630 }, { "epoch": 1.97, "learning_rate": 2.8975501130120724e-05, "loss": 0.0422, "step": 7631 }, { "epoch": 1.97, "learning_rate": 2.8962596257249312e-05, "loss": 0.0297, "step": 7632 }, { "epoch": 1.97, "learning_rate": 2.894969308699792e-05, "loss": 0.0318, "step": 7633 }, { "epoch": 1.97, "learning_rate": 2.8936791620410874e-05, "loss": 0.0237, "step": 7634 }, { "epoch": 1.97, "learning_rate": 2.89238918585323e-05, "loss": 0.0395, "step": 7635 }, { "epoch": 1.97, "learning_rate": 2.891099380240624e-05, "loss": 0.0446, "step": 7636 }, { "epoch": 1.97, "learning_rate": 2.8898097453076557e-05, "loss": 0.0337, "step": 7637 }, { "epoch": 1.97, "learning_rate": 2.888520281158697e-05, "loss": 0.0366, "step": 7638 }, { "epoch": 1.97, "learning_rate": 2.887230987898113e-05, "loss": 0.0364, "step": 7639 }, { "epoch": 1.97, "learning_rate": 2.8859418656302462e-05, "loss": 0.0211, "step": 7640 }, { "epoch": 1.97, "learning_rate": 2.8846529144594292e-05, "loss": 0.0449, "step": 7641 }, { "epoch": 1.97, "learning_rate": 2.8833641344899836e-05, "loss": 0.0382, "step": 7642 }, { "epoch": 1.97, "learning_rate": 2.8820755258262105e-05, "loss": 0.0495, "step": 7643 }, { "epoch": 1.97, "learning_rate": 2.8807870885724047e-05, "loss": 0.0348, "step": 7644 }, { "epoch": 1.97, "learning_rate": 2.8794988228328412e-05, "loss": 0.0342, "step": 7645 }, { "epoch": 1.97, "learning_rate": 2.878210728711781e-05, "loss": 0.0362, "step": 7646 }, { "epoch": 1.97, "learning_rate": 2.8769228063134785e-05, "loss": 0.0358, "step": 7647 }, { "epoch": 1.97, "learning_rate": 2.875635055742164e-05, "loss": 0.0397, "step": 7648 }, { "epoch": 1.97, "learning_rate": 2.874347477102063e-05, "loss": 0.0432, "step": 7649 }, { "epoch": 1.97, "learning_rate": 2.8730600704973822e-05, "loss": 0.0369, "step": 7650 }, { "epoch": 1.97, "learning_rate": 2.8717728360323114e-05, "loss": 0.039, "step": 7651 }, { "epoch": 1.97, "learning_rate": 2.8704857738110368e-05, "loss": 0.0314, "step": 7652 }, { "epoch": 1.97, "learning_rate": 2.8691988839377206e-05, "loss": 0.0345, "step": 7653 }, { "epoch": 1.98, "learning_rate": 2.867912166516512e-05, "loss": 0.0445, "step": 7654 }, { "epoch": 1.98, "learning_rate": 2.866625621651554e-05, "loss": 0.0539, "step": 7655 }, { "epoch": 1.98, "learning_rate": 2.8653392494469666e-05, "loss": 0.0544, "step": 7656 }, { "epoch": 1.98, "learning_rate": 2.8640530500068635e-05, "loss": 0.0421, "step": 7657 }, { "epoch": 1.98, "learning_rate": 2.8627670234353388e-05, "loss": 0.0479, "step": 7658 }, { "epoch": 1.98, "learning_rate": 2.861481169836472e-05, "loss": 0.0473, "step": 7659 }, { "epoch": 1.98, "learning_rate": 2.8601954893143357e-05, "loss": 0.0337, "step": 7660 }, { "epoch": 1.98, "learning_rate": 2.8589099819729793e-05, "loss": 0.0409, "step": 7661 }, { "epoch": 1.98, "learning_rate": 2.857624647916447e-05, "loss": 0.0354, "step": 7662 }, { "epoch": 1.98, "learning_rate": 2.8563394872487624e-05, "loss": 0.0403, "step": 7663 }, { "epoch": 1.98, "learning_rate": 2.8550545000739372e-05, "loss": 0.0346, "step": 7664 }, { "epoch": 1.98, "learning_rate": 2.8537696864959707e-05, "loss": 0.0604, "step": 7665 }, { "epoch": 1.98, "learning_rate": 2.8524850466188457e-05, "loss": 0.0381, "step": 7666 }, { "epoch": 1.98, "learning_rate": 2.85120058054653e-05, "loss": 0.0562, "step": 7667 }, { "epoch": 1.98, "learning_rate": 2.849916288382984e-05, "loss": 0.0453, "step": 7668 }, { "epoch": 1.98, "learning_rate": 2.8486321702321444e-05, "loss": 0.0459, "step": 7669 }, { "epoch": 1.98, "learning_rate": 2.847348226197943e-05, "loss": 0.0361, "step": 7670 }, { "epoch": 1.98, "learning_rate": 2.8460644563842908e-05, "loss": 0.0391, "step": 7671 }, { "epoch": 1.98, "learning_rate": 2.844780860895086e-05, "loss": 0.0391, "step": 7672 }, { "epoch": 1.98, "learning_rate": 2.8434974398342174e-05, "loss": 0.0543, "step": 7673 }, { "epoch": 1.98, "learning_rate": 2.842214193305552e-05, "loss": 0.0402, "step": 7674 }, { "epoch": 1.98, "learning_rate": 2.8409311214129508e-05, "loss": 0.0389, "step": 7675 }, { "epoch": 1.98, "learning_rate": 2.8396482242602544e-05, "loss": 0.0204, "step": 7676 }, { "epoch": 1.98, "learning_rate": 2.83836550195129e-05, "loss": 0.0445, "step": 7677 }, { "epoch": 1.98, "learning_rate": 2.8370829545898764e-05, "loss": 0.0323, "step": 7678 }, { "epoch": 1.98, "learning_rate": 2.8358005822798093e-05, "loss": 0.0583, "step": 7679 }, { "epoch": 1.98, "learning_rate": 2.834518385124879e-05, "loss": 0.0347, "step": 7680 }, { "epoch": 1.98, "learning_rate": 2.8332363632288562e-05, "loss": 0.034, "step": 7681 }, { "epoch": 1.98, "learning_rate": 2.8319545166954963e-05, "loss": 0.0324, "step": 7682 }, { "epoch": 1.98, "learning_rate": 2.8306728456285466e-05, "loss": 0.0184, "step": 7683 }, { "epoch": 1.98, "learning_rate": 2.8293913501317347e-05, "loss": 0.0434, "step": 7684 }, { "epoch": 1.98, "learning_rate": 2.8281100303087748e-05, "loss": 0.0334, "step": 7685 }, { "epoch": 1.98, "learning_rate": 2.8268288862633707e-05, "loss": 0.0564, "step": 7686 }, { "epoch": 1.98, "learning_rate": 2.8255479180992062e-05, "loss": 0.0315, "step": 7687 }, { "epoch": 1.98, "learning_rate": 2.824267125919957e-05, "loss": 0.0316, "step": 7688 }, { "epoch": 1.98, "learning_rate": 2.8229865098292807e-05, "loss": 0.0336, "step": 7689 }, { "epoch": 1.98, "learning_rate": 2.8217060699308184e-05, "loss": 0.0265, "step": 7690 }, { "epoch": 1.98, "learning_rate": 2.820425806328204e-05, "loss": 0.0368, "step": 7691 }, { "epoch": 1.99, "learning_rate": 2.8191457191250494e-05, "loss": 0.0356, "step": 7692 }, { "epoch": 1.99, "learning_rate": 2.8178658084249594e-05, "loss": 0.0285, "step": 7693 }, { "epoch": 1.99, "learning_rate": 2.816586074331519e-05, "loss": 0.0391, "step": 7694 }, { "epoch": 1.99, "learning_rate": 2.8153065169483e-05, "loss": 0.0428, "step": 7695 }, { "epoch": 1.99, "learning_rate": 2.814027136378864e-05, "loss": 0.0293, "step": 7696 }, { "epoch": 1.99, "learning_rate": 2.8127479327267524e-05, "loss": 0.0365, "step": 7697 }, { "epoch": 1.99, "learning_rate": 2.811468906095494e-05, "loss": 0.0217, "step": 7698 }, { "epoch": 1.99, "learning_rate": 2.8101900565886086e-05, "loss": 0.0222, "step": 7699 }, { "epoch": 1.99, "learning_rate": 2.8089113843095915e-05, "loss": 0.0532, "step": 7700 }, { "epoch": 1.99, "learning_rate": 2.8076328893619354e-05, "loss": 0.0458, "step": 7701 }, { "epoch": 1.99, "learning_rate": 2.8063545718491103e-05, "loss": 0.0338, "step": 7702 }, { "epoch": 1.99, "learning_rate": 2.8050764318745715e-05, "loss": 0.0304, "step": 7703 }, { "epoch": 1.99, "learning_rate": 2.803798469541767e-05, "loss": 0.0462, "step": 7704 }, { "epoch": 1.99, "learning_rate": 2.802520684954122e-05, "loss": 0.0408, "step": 7705 }, { "epoch": 1.99, "learning_rate": 2.801243078215056e-05, "loss": 0.0263, "step": 7706 }, { "epoch": 1.99, "learning_rate": 2.7999656494279675e-05, "loss": 0.0421, "step": 7707 }, { "epoch": 1.99, "learning_rate": 2.79868839869624e-05, "loss": 0.0286, "step": 7708 }, { "epoch": 1.99, "learning_rate": 2.79741132612325e-05, "loss": 0.035, "step": 7709 }, { "epoch": 1.99, "learning_rate": 2.7961344318123518e-05, "loss": 0.0289, "step": 7710 }, { "epoch": 1.99, "learning_rate": 2.794857715866887e-05, "loss": 0.0308, "step": 7711 }, { "epoch": 1.99, "learning_rate": 2.793581178390188e-05, "loss": 0.0481, "step": 7712 }, { "epoch": 1.99, "learning_rate": 2.792304819485565e-05, "loss": 0.0522, "step": 7713 }, { "epoch": 1.99, "learning_rate": 2.7910286392563207e-05, "loss": 0.0569, "step": 7714 }, { "epoch": 1.99, "learning_rate": 2.7897526378057387e-05, "loss": 0.0391, "step": 7715 }, { "epoch": 1.99, "learning_rate": 2.7884768152370877e-05, "loss": 0.028, "step": 7716 }, { "epoch": 1.99, "learning_rate": 2.7872011716536284e-05, "loss": 0.03, "step": 7717 }, { "epoch": 1.99, "learning_rate": 2.785925707158597e-05, "loss": 0.0327, "step": 7718 }, { "epoch": 1.99, "learning_rate": 2.784650421855226e-05, "loss": 0.0512, "step": 7719 }, { "epoch": 1.99, "learning_rate": 2.7833753158467253e-05, "loss": 0.0605, "step": 7720 }, { "epoch": 1.99, "learning_rate": 2.7821003892362913e-05, "loss": 0.0353, "step": 7721 }, { "epoch": 1.99, "learning_rate": 2.7808256421271117e-05, "loss": 0.0373, "step": 7722 }, { "epoch": 1.99, "learning_rate": 2.779551074622353e-05, "loss": 0.036, "step": 7723 }, { "epoch": 1.99, "learning_rate": 2.7782766868251693e-05, "loss": 0.0233, "step": 7724 }, { "epoch": 1.99, "learning_rate": 2.7770024788387028e-05, "loss": 0.0354, "step": 7725 }, { "epoch": 1.99, "learning_rate": 2.7757284507660763e-05, "loss": 0.0442, "step": 7726 }, { "epoch": 1.99, "learning_rate": 2.7744546027104035e-05, "loss": 0.0512, "step": 7727 }, { "epoch": 1.99, "learning_rate": 2.7731809347747793e-05, "loss": 0.0318, "step": 7728 }, { "epoch": 1.99, "learning_rate": 2.771907447062284e-05, "loss": 0.0295, "step": 7729 }, { "epoch": 1.99, "learning_rate": 2.7706341396759883e-05, "loss": 0.0382, "step": 7730 }, { "epoch": 2.0, "learning_rate": 2.7693610127189413e-05, "loss": 0.0339, "step": 7731 }, { "epoch": 2.0, "learning_rate": 2.768088066294184e-05, "loss": 0.0443, "step": 7732 }, { "epoch": 2.0, "learning_rate": 2.766815300504738e-05, "loss": 0.046, "step": 7733 }, { "epoch": 2.0, "learning_rate": 2.7655427154536106e-05, "loss": 0.0346, "step": 7734 }, { "epoch": 2.0, "learning_rate": 2.7642703112437996e-05, "loss": 0.0412, "step": 7735 }, { "epoch": 2.0, "learning_rate": 2.7629980879782826e-05, "loss": 0.0262, "step": 7736 }, { "epoch": 2.0, "learning_rate": 2.7617260457600213e-05, "loss": 0.0442, "step": 7737 }, { "epoch": 2.0, "learning_rate": 2.7604541846919718e-05, "loss": 0.0291, "step": 7738 }, { "epoch": 2.0, "learning_rate": 2.7591825048770647e-05, "loss": 0.0405, "step": 7739 }, { "epoch": 2.0, "learning_rate": 2.7579110064182247e-05, "loss": 0.034, "step": 7740 }, { "epoch": 2.0, "learning_rate": 2.756639689418356e-05, "loss": 0.0484, "step": 7741 }, { "epoch": 2.0, "learning_rate": 2.7553685539803488e-05, "loss": 0.0385, "step": 7742 }, { "epoch": 2.0, "learning_rate": 2.754097600207083e-05, "loss": 0.0436, "step": 7743 }, { "epoch": 2.0, "learning_rate": 2.752826828201417e-05, "loss": 0.035, "step": 7744 }, { "epoch": 2.0, "learning_rate": 2.7515562380662018e-05, "loss": 0.0386, "step": 7745 }, { "epoch": 2.0, "learning_rate": 2.7502858299042687e-05, "loss": 0.0411, "step": 7746 }, { "epoch": 2.0, "learning_rate": 2.7490156038184333e-05, "loss": 0.0414, "step": 7747 }, { "epoch": 2.0, "learning_rate": 2.7477455599115032e-05, "loss": 0.0461, "step": 7748 }, { "epoch": 2.0, "learning_rate": 2.746475698286265e-05, "loss": 0.0415, "step": 7749 }, { "epoch": 2.0, "learning_rate": 2.7452060190454892e-05, "loss": 0.0454, "step": 7750 }, { "epoch": 2.0, "learning_rate": 2.74393652229194e-05, "loss": 0.0204, "step": 7751 }, { "epoch": 2.0, "learning_rate": 2.7426672081283565e-05, "loss": 0.0216, "step": 7752 }, { "epoch": 2.0, "learning_rate": 2.741398076657473e-05, "loss": 0.0289, "step": 7753 }, { "epoch": 2.0, "learning_rate": 2.7401291279820018e-05, "loss": 0.0266, "step": 7754 }, { "epoch": 2.0, "learning_rate": 2.7388603622046406e-05, "loss": 0.0203, "step": 7755 }, { "epoch": 2.0, "learning_rate": 2.7375917794280782e-05, "loss": 0.0213, "step": 7756 }, { "epoch": 2.0, "learning_rate": 2.7363233797549814e-05, "loss": 0.0237, "step": 7757 }, { "epoch": 2.0, "learning_rate": 2.7350551632880095e-05, "loss": 0.0261, "step": 7758 }, { "epoch": 2.0, "learning_rate": 2.7337871301298e-05, "loss": 0.0177, "step": 7759 }, { "epoch": 2.0, "learning_rate": 2.732519280382978e-05, "loss": 0.0195, "step": 7760 }, { "epoch": 2.0, "learning_rate": 2.7312516141501577e-05, "loss": 0.0199, "step": 7761 }, { "epoch": 2.0, "learning_rate": 2.7299841315339336e-05, "loss": 0.0267, "step": 7762 }, { "epoch": 2.0, "learning_rate": 2.728716832636884e-05, "loss": 0.0331, "step": 7763 }, { "epoch": 2.0, "learning_rate": 2.7274497175615798e-05, "loss": 0.019, "step": 7764 }, { "epoch": 2.0, "learning_rate": 2.7261827864105687e-05, "loss": 0.0306, "step": 7765 }, { "epoch": 2.0, "learning_rate": 2.7249160392863904e-05, "loss": 0.0308, "step": 7766 }, { "epoch": 2.0, "learning_rate": 2.7236494762915653e-05, "loss": 0.019, "step": 7767 }, { "epoch": 2.0, "learning_rate": 2.722383097528598e-05, "loss": 0.0236, "step": 7768 }, { "epoch": 2.0, "learning_rate": 2.7211169030999834e-05, "loss": 0.0318, "step": 7769 }, { "epoch": 2.01, "learning_rate": 2.7198508931081956e-05, "loss": 0.0311, "step": 7770 }, { "epoch": 2.01, "learning_rate": 2.7185850676557002e-05, "loss": 0.0268, "step": 7771 }, { "epoch": 2.01, "learning_rate": 2.717319426844942e-05, "loss": 0.0222, "step": 7772 }, { "epoch": 2.01, "learning_rate": 2.7160539707783504e-05, "loss": 0.025, "step": 7773 }, { "epoch": 2.01, "learning_rate": 2.714788699558348e-05, "loss": 0.0247, "step": 7774 }, { "epoch": 2.01, "learning_rate": 2.7135236132873353e-05, "loss": 0.0272, "step": 7775 }, { "epoch": 2.01, "learning_rate": 2.7122587120676958e-05, "loss": 0.0331, "step": 7776 }, { "epoch": 2.01, "learning_rate": 2.710993996001806e-05, "loss": 0.0352, "step": 7777 }, { "epoch": 2.01, "learning_rate": 2.7097294651920208e-05, "loss": 0.0265, "step": 7778 }, { "epoch": 2.01, "learning_rate": 2.7084651197406847e-05, "loss": 0.0217, "step": 7779 }, { "epoch": 2.01, "learning_rate": 2.7072009597501234e-05, "loss": 0.029, "step": 7780 }, { "epoch": 2.01, "learning_rate": 2.7059369853226478e-05, "loss": 0.0224, "step": 7781 }, { "epoch": 2.01, "learning_rate": 2.7046731965605587e-05, "loss": 0.0223, "step": 7782 }, { "epoch": 2.01, "learning_rate": 2.703409593566134e-05, "loss": 0.0335, "step": 7783 }, { "epoch": 2.01, "learning_rate": 2.7021461764416456e-05, "loss": 0.0278, "step": 7784 }, { "epoch": 2.01, "learning_rate": 2.700882945289343e-05, "loss": 0.0293, "step": 7785 }, { "epoch": 2.01, "learning_rate": 2.699619900211462e-05, "loss": 0.0216, "step": 7786 }, { "epoch": 2.01, "learning_rate": 2.6983570413102273e-05, "loss": 0.0229, "step": 7787 }, { "epoch": 2.01, "learning_rate": 2.697094368687845e-05, "loss": 0.028, "step": 7788 }, { "epoch": 2.01, "learning_rate": 2.6958318824465047e-05, "loss": 0.0256, "step": 7789 }, { "epoch": 2.01, "learning_rate": 2.6945695826883865e-05, "loss": 0.0184, "step": 7790 }, { "epoch": 2.01, "learning_rate": 2.693307469515649e-05, "loss": 0.0245, "step": 7791 }, { "epoch": 2.01, "learning_rate": 2.6920455430304424e-05, "loss": 0.0235, "step": 7792 }, { "epoch": 2.01, "learning_rate": 2.690783803334897e-05, "loss": 0.025, "step": 7793 }, { "epoch": 2.01, "learning_rate": 2.6895222505311257e-05, "loss": 0.024, "step": 7794 }, { "epoch": 2.01, "learning_rate": 2.6882608847212353e-05, "loss": 0.0388, "step": 7795 }, { "epoch": 2.01, "learning_rate": 2.6869997060073064e-05, "loss": 0.0312, "step": 7796 }, { "epoch": 2.01, "learning_rate": 2.6857387144914146e-05, "loss": 0.0283, "step": 7797 }, { "epoch": 2.01, "learning_rate": 2.6844779102756145e-05, "loss": 0.031, "step": 7798 }, { "epoch": 2.01, "learning_rate": 2.6832172934619438e-05, "loss": 0.0218, "step": 7799 }, { "epoch": 2.01, "learning_rate": 2.681956864152432e-05, "loss": 0.0212, "step": 7800 }, { "epoch": 2.01, "learning_rate": 2.6806966224490883e-05, "loss": 0.0253, "step": 7801 }, { "epoch": 2.01, "learning_rate": 2.679436568453904e-05, "loss": 0.0259, "step": 7802 }, { "epoch": 2.01, "learning_rate": 2.6781767022688653e-05, "loss": 0.033, "step": 7803 }, { "epoch": 2.01, "learning_rate": 2.6769170239959314e-05, "loss": 0.0174, "step": 7804 }, { "epoch": 2.01, "learning_rate": 2.675657533737056e-05, "loss": 0.025, "step": 7805 }, { "epoch": 2.01, "learning_rate": 2.6743982315941725e-05, "loss": 0.0262, "step": 7806 }, { "epoch": 2.01, "learning_rate": 2.6731391176691966e-05, "loss": 0.0239, "step": 7807 }, { "epoch": 2.01, "learning_rate": 2.671880192064037e-05, "loss": 0.0246, "step": 7808 }, { "epoch": 2.02, "learning_rate": 2.6706214548805786e-05, "loss": 0.0321, "step": 7809 }, { "epoch": 2.02, "learning_rate": 2.669362906220697e-05, "loss": 0.0318, "step": 7810 }, { "epoch": 2.02, "learning_rate": 2.668104546186251e-05, "loss": 0.0243, "step": 7811 }, { "epoch": 2.02, "learning_rate": 2.666846374879079e-05, "loss": 0.0269, "step": 7812 }, { "epoch": 2.02, "learning_rate": 2.665588392401014e-05, "loss": 0.0269, "step": 7813 }, { "epoch": 2.02, "learning_rate": 2.6643305988538658e-05, "loss": 0.0251, "step": 7814 }, { "epoch": 2.02, "learning_rate": 2.6630729943394294e-05, "loss": 0.0279, "step": 7815 }, { "epoch": 2.02, "learning_rate": 2.66181557895949e-05, "loss": 0.0206, "step": 7816 }, { "epoch": 2.02, "learning_rate": 2.660558352815812e-05, "loss": 0.0239, "step": 7817 }, { "epoch": 2.02, "learning_rate": 2.6593013160101475e-05, "loss": 0.0262, "step": 7818 }, { "epoch": 2.02, "learning_rate": 2.6580444686442325e-05, "loss": 0.0315, "step": 7819 }, { "epoch": 2.02, "learning_rate": 2.656787810819784e-05, "loss": 0.0263, "step": 7820 }, { "epoch": 2.02, "learning_rate": 2.6555313426385132e-05, "loss": 0.0269, "step": 7821 }, { "epoch": 2.02, "learning_rate": 2.654275064202103e-05, "loss": 0.0229, "step": 7822 }, { "epoch": 2.02, "learning_rate": 2.6530189756122336e-05, "loss": 0.0322, "step": 7823 }, { "epoch": 2.02, "learning_rate": 2.651763076970562e-05, "loss": 0.0266, "step": 7824 }, { "epoch": 2.02, "learning_rate": 2.650507368378729e-05, "loss": 0.0228, "step": 7825 }, { "epoch": 2.02, "learning_rate": 2.6492518499383673e-05, "loss": 0.0264, "step": 7826 }, { "epoch": 2.02, "learning_rate": 2.647996521751086e-05, "loss": 0.029, "step": 7827 }, { "epoch": 2.02, "learning_rate": 2.6467413839184864e-05, "loss": 0.0269, "step": 7828 }, { "epoch": 2.02, "learning_rate": 2.6454864365421477e-05, "loss": 0.0294, "step": 7829 }, { "epoch": 2.02, "learning_rate": 2.6442316797236354e-05, "loss": 0.0281, "step": 7830 }, { "epoch": 2.02, "learning_rate": 2.6429771135645055e-05, "loss": 0.0282, "step": 7831 }, { "epoch": 2.02, "learning_rate": 2.64172273816629e-05, "loss": 0.0261, "step": 7832 }, { "epoch": 2.02, "learning_rate": 2.6404685536305084e-05, "loss": 0.0277, "step": 7833 }, { "epoch": 2.02, "learning_rate": 2.63921456005867e-05, "loss": 0.0255, "step": 7834 }, { "epoch": 2.02, "learning_rate": 2.637960757552259e-05, "loss": 0.0265, "step": 7835 }, { "epoch": 2.02, "learning_rate": 2.6367071462127547e-05, "loss": 0.0245, "step": 7836 }, { "epoch": 2.02, "learning_rate": 2.6354537261416124e-05, "loss": 0.0369, "step": 7837 }, { "epoch": 2.02, "learning_rate": 2.6342004974402735e-05, "loss": 0.0251, "step": 7838 }, { "epoch": 2.02, "learning_rate": 2.63294746021017e-05, "loss": 0.0273, "step": 7839 }, { "epoch": 2.02, "learning_rate": 2.6316946145527095e-05, "loss": 0.0324, "step": 7840 }, { "epoch": 2.02, "learning_rate": 2.6304419605692925e-05, "loss": 0.0252, "step": 7841 }, { "epoch": 2.02, "learning_rate": 2.6291894983612987e-05, "loss": 0.0268, "step": 7842 }, { "epoch": 2.02, "learning_rate": 2.6279372280300913e-05, "loss": 0.0288, "step": 7843 }, { "epoch": 2.02, "learning_rate": 2.6266851496770238e-05, "loss": 0.0371, "step": 7844 }, { "epoch": 2.02, "learning_rate": 2.625433263403429e-05, "loss": 0.0329, "step": 7845 }, { "epoch": 2.02, "learning_rate": 2.6241815693106242e-05, "loss": 0.0289, "step": 7846 }, { "epoch": 2.03, "learning_rate": 2.6229300674999156e-05, "loss": 0.0283, "step": 7847 }, { "epoch": 2.03, "learning_rate": 2.6216787580725887e-05, "loss": 0.0292, "step": 7848 }, { "epoch": 2.03, "learning_rate": 2.6204276411299182e-05, "loss": 0.0303, "step": 7849 }, { "epoch": 2.03, "learning_rate": 2.6191767167731597e-05, "loss": 0.0337, "step": 7850 }, { "epoch": 2.03, "learning_rate": 2.6179259851035513e-05, "loss": 0.0222, "step": 7851 }, { "epoch": 2.03, "learning_rate": 2.6166754462223232e-05, "loss": 0.019, "step": 7852 }, { "epoch": 2.03, "learning_rate": 2.6154251002306817e-05, "loss": 0.0241, "step": 7853 }, { "epoch": 2.03, "learning_rate": 2.6141749472298238e-05, "loss": 0.032, "step": 7854 }, { "epoch": 2.03, "learning_rate": 2.6129249873209265e-05, "loss": 0.0202, "step": 7855 }, { "epoch": 2.03, "learning_rate": 2.6116752206051514e-05, "loss": 0.0222, "step": 7856 }, { "epoch": 2.03, "learning_rate": 2.6104256471836498e-05, "loss": 0.0244, "step": 7857 }, { "epoch": 2.03, "learning_rate": 2.6091762671575503e-05, "loss": 0.0229, "step": 7858 }, { "epoch": 2.03, "learning_rate": 2.6079270806279682e-05, "loss": 0.0223, "step": 7859 }, { "epoch": 2.03, "learning_rate": 2.6066780876960073e-05, "loss": 0.0337, "step": 7860 }, { "epoch": 2.03, "learning_rate": 2.6054292884627485e-05, "loss": 0.0216, "step": 7861 }, { "epoch": 2.03, "learning_rate": 2.6041806830292647e-05, "loss": 0.0294, "step": 7862 }, { "epoch": 2.03, "learning_rate": 2.6029322714966083e-05, "loss": 0.0252, "step": 7863 }, { "epoch": 2.03, "learning_rate": 2.601684053965814e-05, "loss": 0.0199, "step": 7864 }, { "epoch": 2.03, "learning_rate": 2.6004360305379073e-05, "loss": 0.0284, "step": 7865 }, { "epoch": 2.03, "learning_rate": 2.5991882013138906e-05, "loss": 0.0313, "step": 7866 }, { "epoch": 2.03, "learning_rate": 2.5979405663947597e-05, "loss": 0.0282, "step": 7867 }, { "epoch": 2.03, "learning_rate": 2.5966931258814863e-05, "loss": 0.022, "step": 7868 }, { "epoch": 2.03, "learning_rate": 2.5954458798750282e-05, "loss": 0.0223, "step": 7869 }, { "epoch": 2.03, "learning_rate": 2.5941988284763326e-05, "loss": 0.0256, "step": 7870 }, { "epoch": 2.03, "learning_rate": 2.5929519717863243e-05, "loss": 0.0269, "step": 7871 }, { "epoch": 2.03, "learning_rate": 2.5917053099059142e-05, "loss": 0.0316, "step": 7872 }, { "epoch": 2.03, "learning_rate": 2.590458842936001e-05, "loss": 0.0256, "step": 7873 }, { "epoch": 2.03, "learning_rate": 2.5892125709774627e-05, "loss": 0.0215, "step": 7874 }, { "epoch": 2.03, "learning_rate": 2.5879664941311672e-05, "loss": 0.0198, "step": 7875 }, { "epoch": 2.03, "learning_rate": 2.5867206124979608e-05, "loss": 0.0223, "step": 7876 }, { "epoch": 2.03, "learning_rate": 2.585474926178675e-05, "loss": 0.0211, "step": 7877 }, { "epoch": 2.03, "learning_rate": 2.5842294352741303e-05, "loss": 0.0287, "step": 7878 }, { "epoch": 2.03, "learning_rate": 2.582984139885124e-05, "loss": 0.0254, "step": 7879 }, { "epoch": 2.03, "learning_rate": 2.5817390401124464e-05, "loss": 0.0305, "step": 7880 }, { "epoch": 2.03, "learning_rate": 2.5804941360568647e-05, "loss": 0.0238, "step": 7881 }, { "epoch": 2.03, "learning_rate": 2.5792494278191303e-05, "loss": 0.0283, "step": 7882 }, { "epoch": 2.03, "learning_rate": 2.578004915499985e-05, "loss": 0.0272, "step": 7883 }, { "epoch": 2.03, "learning_rate": 2.576760599200151e-05, "loss": 0.0365, "step": 7884 }, { "epoch": 2.03, "learning_rate": 2.5755164790203296e-05, "loss": 0.0266, "step": 7885 }, { "epoch": 2.04, "learning_rate": 2.5742725550612167e-05, "loss": 0.0314, "step": 7886 }, { "epoch": 2.04, "learning_rate": 2.5730288274234832e-05, "loss": 0.0243, "step": 7887 }, { "epoch": 2.04, "learning_rate": 2.571785296207791e-05, "loss": 0.0263, "step": 7888 }, { "epoch": 2.04, "learning_rate": 2.5705419615147806e-05, "loss": 0.0316, "step": 7889 }, { "epoch": 2.04, "learning_rate": 2.5692988234450778e-05, "loss": 0.0274, "step": 7890 }, { "epoch": 2.04, "learning_rate": 2.568055882099296e-05, "loss": 0.0356, "step": 7891 }, { "epoch": 2.04, "learning_rate": 2.5668131375780276e-05, "loss": 0.0249, "step": 7892 }, { "epoch": 2.04, "learning_rate": 2.5655705899818542e-05, "loss": 0.0328, "step": 7893 }, { "epoch": 2.04, "learning_rate": 2.5643282394113388e-05, "loss": 0.0197, "step": 7894 }, { "epoch": 2.04, "learning_rate": 2.5630860859670248e-05, "loss": 0.0245, "step": 7895 }, { "epoch": 2.04, "learning_rate": 2.561844129749448e-05, "loss": 0.0268, "step": 7896 }, { "epoch": 2.04, "learning_rate": 2.560602370859122e-05, "loss": 0.0252, "step": 7897 }, { "epoch": 2.04, "learning_rate": 2.559360809396543e-05, "loss": 0.0299, "step": 7898 }, { "epoch": 2.04, "learning_rate": 2.5581194454622e-05, "loss": 0.0275, "step": 7899 }, { "epoch": 2.04, "learning_rate": 2.556878279156555e-05, "loss": 0.0227, "step": 7900 }, { "epoch": 2.04, "learning_rate": 2.5556373105800637e-05, "loss": 0.0244, "step": 7901 }, { "epoch": 2.04, "learning_rate": 2.5543965398331588e-05, "loss": 0.0178, "step": 7902 }, { "epoch": 2.04, "learning_rate": 2.5531559670162587e-05, "loss": 0.0232, "step": 7903 }, { "epoch": 2.04, "learning_rate": 2.55191559222977e-05, "loss": 0.0228, "step": 7904 }, { "epoch": 2.04, "learning_rate": 2.550675415574075e-05, "loss": 0.0215, "step": 7905 }, { "epoch": 2.04, "learning_rate": 2.549435437149551e-05, "loss": 0.0302, "step": 7906 }, { "epoch": 2.04, "learning_rate": 2.54819565705655e-05, "loss": 0.0311, "step": 7907 }, { "epoch": 2.04, "learning_rate": 2.546956075395409e-05, "loss": 0.0244, "step": 7908 }, { "epoch": 2.04, "learning_rate": 2.545716692266456e-05, "loss": 0.0198, "step": 7909 }, { "epoch": 2.04, "learning_rate": 2.5444775077699944e-05, "loss": 0.0279, "step": 7910 }, { "epoch": 2.04, "learning_rate": 2.5432385220063147e-05, "loss": 0.0194, "step": 7911 }, { "epoch": 2.04, "learning_rate": 2.5419997350756942e-05, "loss": 0.027, "step": 7912 }, { "epoch": 2.04, "learning_rate": 2.5407611470783887e-05, "loss": 0.0274, "step": 7913 }, { "epoch": 2.04, "learning_rate": 2.5395227581146453e-05, "loss": 0.0224, "step": 7914 }, { "epoch": 2.04, "learning_rate": 2.5382845682846866e-05, "loss": 0.0199, "step": 7915 }, { "epoch": 2.04, "learning_rate": 2.537046577688723e-05, "loss": 0.023, "step": 7916 }, { "epoch": 2.04, "learning_rate": 2.5358087864269514e-05, "loss": 0.0258, "step": 7917 }, { "epoch": 2.04, "learning_rate": 2.534571194599547e-05, "loss": 0.0301, "step": 7918 }, { "epoch": 2.04, "learning_rate": 2.533333802306675e-05, "loss": 0.0255, "step": 7919 }, { "epoch": 2.04, "learning_rate": 2.53209660964848e-05, "loss": 0.0337, "step": 7920 }, { "epoch": 2.04, "learning_rate": 2.5308596167250886e-05, "loss": 0.0276, "step": 7921 }, { "epoch": 2.04, "learning_rate": 2.5296228236366194e-05, "loss": 0.0242, "step": 7922 }, { "epoch": 2.04, "learning_rate": 2.5283862304831674e-05, "loss": 0.0311, "step": 7923 }, { "epoch": 2.04, "learning_rate": 2.5271498373648122e-05, "loss": 0.0224, "step": 7924 }, { "epoch": 2.05, "learning_rate": 2.5259136443816223e-05, "loss": 0.022, "step": 7925 }, { "epoch": 2.05, "learning_rate": 2.5246776516336422e-05, "loss": 0.0231, "step": 7926 }, { "epoch": 2.05, "learning_rate": 2.5234418592209085e-05, "loss": 0.0281, "step": 7927 }, { "epoch": 2.05, "learning_rate": 2.5222062672434364e-05, "loss": 0.0353, "step": 7928 }, { "epoch": 2.05, "learning_rate": 2.520970875801224e-05, "loss": 0.0246, "step": 7929 }, { "epoch": 2.05, "learning_rate": 2.5197356849942587e-05, "loss": 0.0335, "step": 7930 }, { "epoch": 2.05, "learning_rate": 2.5185006949225043e-05, "loss": 0.0342, "step": 7931 }, { "epoch": 2.05, "learning_rate": 2.5172659056859162e-05, "loss": 0.027, "step": 7932 }, { "epoch": 2.05, "learning_rate": 2.5160313173844276e-05, "loss": 0.0273, "step": 7933 }, { "epoch": 2.05, "learning_rate": 2.514796930117955e-05, "loss": 0.0291, "step": 7934 }, { "epoch": 2.05, "learning_rate": 2.513562743986406e-05, "loss": 0.0347, "step": 7935 }, { "epoch": 2.05, "learning_rate": 2.5123287590896643e-05, "loss": 0.0226, "step": 7936 }, { "epoch": 2.05, "learning_rate": 2.511094975527598e-05, "loss": 0.0308, "step": 7937 }, { "epoch": 2.05, "learning_rate": 2.509861393400066e-05, "loss": 0.0435, "step": 7938 }, { "epoch": 2.05, "learning_rate": 2.5086280128069e-05, "loss": 0.0247, "step": 7939 }, { "epoch": 2.05, "learning_rate": 2.5073948338479264e-05, "loss": 0.0283, "step": 7940 }, { "epoch": 2.05, "learning_rate": 2.5061618566229477e-05, "loss": 0.0317, "step": 7941 }, { "epoch": 2.05, "learning_rate": 2.5049290812317505e-05, "loss": 0.0265, "step": 7942 }, { "epoch": 2.05, "learning_rate": 2.503696507774111e-05, "loss": 0.0314, "step": 7943 }, { "epoch": 2.05, "learning_rate": 2.502464136349781e-05, "loss": 0.0288, "step": 7944 }, { "epoch": 2.05, "learning_rate": 2.5012319670585037e-05, "loss": 0.0293, "step": 7945 }, { "epoch": 2.05, "learning_rate": 2.500000000000001e-05, "loss": 0.0274, "step": 7946 }, { "epoch": 2.05, "learning_rate": 2.498768235273977e-05, "loss": 0.0346, "step": 7947 }, { "epoch": 2.05, "learning_rate": 2.4975366729801268e-05, "loss": 0.0321, "step": 7948 }, { "epoch": 2.05, "learning_rate": 2.496305313218122e-05, "loss": 0.0315, "step": 7949 }, { "epoch": 2.05, "learning_rate": 2.495074156087618e-05, "loss": 0.0376, "step": 7950 }, { "epoch": 2.05, "learning_rate": 2.4938432016882596e-05, "loss": 0.0169, "step": 7951 }, { "epoch": 2.05, "learning_rate": 2.492612450119669e-05, "loss": 0.0349, "step": 7952 }, { "epoch": 2.05, "learning_rate": 2.4913819014814576e-05, "loss": 0.0251, "step": 7953 }, { "epoch": 2.05, "learning_rate": 2.4901515558732158e-05, "loss": 0.0231, "step": 7954 }, { "epoch": 2.05, "learning_rate": 2.4889214133945173e-05, "loss": 0.0266, "step": 7955 }, { "epoch": 2.05, "learning_rate": 2.4876914741449247e-05, "loss": 0.0221, "step": 7956 }, { "epoch": 2.05, "learning_rate": 2.4864617382239768e-05, "loss": 0.0321, "step": 7957 }, { "epoch": 2.05, "learning_rate": 2.4852322057312038e-05, "loss": 0.022, "step": 7958 }, { "epoch": 2.05, "learning_rate": 2.4840028767661138e-05, "loss": 0.0259, "step": 7959 }, { "epoch": 2.05, "learning_rate": 2.482773751428198e-05, "loss": 0.0261, "step": 7960 }, { "epoch": 2.05, "learning_rate": 2.4815448298169364e-05, "loss": 0.0317, "step": 7961 }, { "epoch": 2.05, "learning_rate": 2.480316112031788e-05, "loss": 0.023, "step": 7962 }, { "epoch": 2.05, "learning_rate": 2.4790875981721952e-05, "loss": 0.0228, "step": 7963 }, { "epoch": 2.06, "learning_rate": 2.477859288337588e-05, "loss": 0.0303, "step": 7964 }, { "epoch": 2.06, "learning_rate": 2.4766311826273742e-05, "loss": 0.0259, "step": 7965 }, { "epoch": 2.06, "learning_rate": 2.475403281140951e-05, "loss": 0.0261, "step": 7966 }, { "epoch": 2.06, "learning_rate": 2.4741755839776952e-05, "loss": 0.0206, "step": 7967 }, { "epoch": 2.06, "learning_rate": 2.4729480912369663e-05, "loss": 0.0262, "step": 7968 }, { "epoch": 2.06, "learning_rate": 2.4717208030181115e-05, "loss": 0.0344, "step": 7969 }, { "epoch": 2.06, "learning_rate": 2.470493719420456e-05, "loss": 0.0169, "step": 7970 }, { "epoch": 2.06, "learning_rate": 2.469266840543315e-05, "loss": 0.0242, "step": 7971 }, { "epoch": 2.06, "learning_rate": 2.4680401664859814e-05, "loss": 0.0219, "step": 7972 }, { "epoch": 2.06, "learning_rate": 2.4668136973477318e-05, "loss": 0.0268, "step": 7973 }, { "epoch": 2.06, "learning_rate": 2.4655874332278317e-05, "loss": 0.0362, "step": 7974 }, { "epoch": 2.06, "learning_rate": 2.4643613742255227e-05, "loss": 0.0237, "step": 7975 }, { "epoch": 2.06, "learning_rate": 2.4631355204400376e-05, "loss": 0.0252, "step": 7976 }, { "epoch": 2.06, "learning_rate": 2.4619098719705858e-05, "loss": 0.0313, "step": 7977 }, { "epoch": 2.06, "learning_rate": 2.460684428916361e-05, "loss": 0.0237, "step": 7978 }, { "epoch": 2.06, "learning_rate": 2.4594591913765464e-05, "loss": 0.029, "step": 7979 }, { "epoch": 2.06, "learning_rate": 2.4582341594503012e-05, "loss": 0.0305, "step": 7980 }, { "epoch": 2.06, "learning_rate": 2.45700933323677e-05, "loss": 0.0223, "step": 7981 }, { "epoch": 2.06, "learning_rate": 2.455784712835084e-05, "loss": 0.023, "step": 7982 }, { "epoch": 2.06, "learning_rate": 2.4545602983443537e-05, "loss": 0.0244, "step": 7983 }, { "epoch": 2.06, "learning_rate": 2.4533360898636764e-05, "loss": 0.0204, "step": 7984 }, { "epoch": 2.06, "learning_rate": 2.4521120874921307e-05, "loss": 0.0206, "step": 7985 }, { "epoch": 2.06, "learning_rate": 2.4508882913287755e-05, "loss": 0.0249, "step": 7986 }, { "epoch": 2.06, "learning_rate": 2.4496647014726614e-05, "loss": 0.0248, "step": 7987 }, { "epoch": 2.06, "learning_rate": 2.448441318022812e-05, "loss": 0.0222, "step": 7988 }, { "epoch": 2.06, "learning_rate": 2.4472181410782442e-05, "loss": 0.0229, "step": 7989 }, { "epoch": 2.06, "learning_rate": 2.4459951707379514e-05, "loss": 0.0312, "step": 7990 }, { "epoch": 2.06, "learning_rate": 2.44477240710091e-05, "loss": 0.0327, "step": 7991 }, { "epoch": 2.06, "learning_rate": 2.443549850266085e-05, "loss": 0.0239, "step": 7992 }, { "epoch": 2.06, "learning_rate": 2.4423275003324213e-05, "loss": 0.0355, "step": 7993 }, { "epoch": 2.06, "learning_rate": 2.4411053573988447e-05, "loss": 0.025, "step": 7994 }, { "epoch": 2.06, "learning_rate": 2.4398834215642703e-05, "loss": 0.028, "step": 7995 }, { "epoch": 2.06, "learning_rate": 2.4386616929275898e-05, "loss": 0.0208, "step": 7996 }, { "epoch": 2.06, "learning_rate": 2.4374401715876842e-05, "loss": 0.0226, "step": 7997 }, { "epoch": 2.06, "learning_rate": 2.4362188576434146e-05, "loss": 0.03, "step": 7998 }, { "epoch": 2.06, "learning_rate": 2.4349977511936223e-05, "loss": 0.031, "step": 7999 }, { "epoch": 2.06, "learning_rate": 2.4337768523371397e-05, "loss": 0.0217, "step": 8000 }, { "epoch": 2.06, "learning_rate": 2.4325561611727733e-05, "loss": 0.0218, "step": 8001 }, { "epoch": 2.07, "learning_rate": 2.4313356777993212e-05, "loss": 0.02, "step": 8002 }, { "epoch": 2.07, "learning_rate": 2.43011540231556e-05, "loss": 0.0204, "step": 8003 }, { "epoch": 2.07, "learning_rate": 2.428895334820247e-05, "loss": 0.0377, "step": 8004 }, { "epoch": 2.07, "learning_rate": 2.42767547541213e-05, "loss": 0.0252, "step": 8005 }, { "epoch": 2.07, "learning_rate": 2.426455824189934e-05, "loss": 0.0288, "step": 8006 }, { "epoch": 2.07, "learning_rate": 2.425236381252368e-05, "loss": 0.028, "step": 8007 }, { "epoch": 2.07, "learning_rate": 2.4240171466981278e-05, "loss": 0.0314, "step": 8008 }, { "epoch": 2.07, "learning_rate": 2.4227981206258866e-05, "loss": 0.0239, "step": 8009 }, { "epoch": 2.07, "learning_rate": 2.421579303134307e-05, "loss": 0.029, "step": 8010 }, { "epoch": 2.07, "learning_rate": 2.4203606943220304e-05, "loss": 0.0278, "step": 8011 }, { "epoch": 2.07, "learning_rate": 2.4191422942876803e-05, "loss": 0.028, "step": 8012 }, { "epoch": 2.07, "learning_rate": 2.4179241031298687e-05, "loss": 0.0245, "step": 8013 }, { "epoch": 2.07, "learning_rate": 2.4167061209471842e-05, "loss": 0.0259, "step": 8014 }, { "epoch": 2.07, "learning_rate": 2.4154883478382055e-05, "loss": 0.029, "step": 8015 }, { "epoch": 2.07, "learning_rate": 2.414270783901489e-05, "loss": 0.0221, "step": 8016 }, { "epoch": 2.07, "learning_rate": 2.4130534292355727e-05, "loss": 0.0292, "step": 8017 }, { "epoch": 2.07, "learning_rate": 2.4118362839389863e-05, "loss": 0.0274, "step": 8018 }, { "epoch": 2.07, "learning_rate": 2.4106193481102334e-05, "loss": 0.0292, "step": 8019 }, { "epoch": 2.07, "learning_rate": 2.409402621847804e-05, "loss": 0.0283, "step": 8020 }, { "epoch": 2.07, "learning_rate": 2.4081861052501742e-05, "loss": 0.0291, "step": 8021 }, { "epoch": 2.07, "learning_rate": 2.4069697984157962e-05, "loss": 0.0305, "step": 8022 }, { "epoch": 2.07, "learning_rate": 2.4057537014431143e-05, "loss": 0.0263, "step": 8023 }, { "epoch": 2.07, "learning_rate": 2.4045378144305475e-05, "loss": 0.0284, "step": 8024 }, { "epoch": 2.07, "learning_rate": 2.4033221374765008e-05, "loss": 0.034, "step": 8025 }, { "epoch": 2.07, "learning_rate": 2.4021066706793656e-05, "loss": 0.0341, "step": 8026 }, { "epoch": 2.07, "learning_rate": 2.4008914141375093e-05, "loss": 0.0278, "step": 8027 }, { "epoch": 2.07, "learning_rate": 2.3996763679492906e-05, "loss": 0.0232, "step": 8028 }, { "epoch": 2.07, "learning_rate": 2.3984615322130443e-05, "loss": 0.0269, "step": 8029 }, { "epoch": 2.07, "learning_rate": 2.3972469070270893e-05, "loss": 0.0314, "step": 8030 }, { "epoch": 2.07, "learning_rate": 2.3960324924897326e-05, "loss": 0.0247, "step": 8031 }, { "epoch": 2.07, "learning_rate": 2.3948182886992583e-05, "loss": 0.0303, "step": 8032 }, { "epoch": 2.07, "learning_rate": 2.3936042957539333e-05, "loss": 0.0241, "step": 8033 }, { "epoch": 2.07, "learning_rate": 2.3923905137520143e-05, "loss": 0.0297, "step": 8034 }, { "epoch": 2.07, "learning_rate": 2.391176942791732e-05, "loss": 0.0278, "step": 8035 }, { "epoch": 2.07, "learning_rate": 2.3899635829713078e-05, "loss": 0.0273, "step": 8036 }, { "epoch": 2.07, "learning_rate": 2.3887504343889416e-05, "loss": 0.0232, "step": 8037 }, { "epoch": 2.07, "learning_rate": 2.3875374971428142e-05, "loss": 0.0312, "step": 8038 }, { "epoch": 2.07, "learning_rate": 2.3863247713310972e-05, "loss": 0.0257, "step": 8039 }, { "epoch": 2.07, "learning_rate": 2.3851122570519353e-05, "loss": 0.0308, "step": 8040 }, { "epoch": 2.08, "learning_rate": 2.3838999544034653e-05, "loss": 0.0299, "step": 8041 }, { "epoch": 2.08, "learning_rate": 2.3826878634838e-05, "loss": 0.0325, "step": 8042 }, { "epoch": 2.08, "learning_rate": 2.3814759843910363e-05, "loss": 0.0224, "step": 8043 }, { "epoch": 2.08, "learning_rate": 2.3802643172232585e-05, "loss": 0.0272, "step": 8044 }, { "epoch": 2.08, "learning_rate": 2.3790528620785292e-05, "loss": 0.031, "step": 8045 }, { "epoch": 2.08, "learning_rate": 2.3778416190548924e-05, "loss": 0.028, "step": 8046 }, { "epoch": 2.08, "learning_rate": 2.376630588250382e-05, "loss": 0.0321, "step": 8047 }, { "epoch": 2.08, "learning_rate": 2.375419769763006e-05, "loss": 0.0299, "step": 8048 }, { "epoch": 2.08, "learning_rate": 2.3742091636907637e-05, "loss": 0.0262, "step": 8049 }, { "epoch": 2.08, "learning_rate": 2.3729987701316313e-05, "loss": 0.0348, "step": 8050 }, { "epoch": 2.08, "learning_rate": 2.371788589183568e-05, "loss": 0.0139, "step": 8051 }, { "epoch": 2.08, "learning_rate": 2.37057862094452e-05, "loss": 0.0241, "step": 8052 }, { "epoch": 2.08, "learning_rate": 2.3693688655124108e-05, "loss": 0.0218, "step": 8053 }, { "epoch": 2.08, "learning_rate": 2.368159322985153e-05, "loss": 0.0301, "step": 8054 }, { "epoch": 2.08, "learning_rate": 2.366949993460637e-05, "loss": 0.026, "step": 8055 }, { "epoch": 2.08, "learning_rate": 2.3657408770367345e-05, "loss": 0.0272, "step": 8056 }, { "epoch": 2.08, "learning_rate": 2.364531973811308e-05, "loss": 0.0242, "step": 8057 }, { "epoch": 2.08, "learning_rate": 2.3633232838821956e-05, "loss": 0.0288, "step": 8058 }, { "epoch": 2.08, "learning_rate": 2.362114807347217e-05, "loss": 0.0302, "step": 8059 }, { "epoch": 2.08, "learning_rate": 2.3609065443041828e-05, "loss": 0.0242, "step": 8060 }, { "epoch": 2.08, "learning_rate": 2.3596984948508776e-05, "loss": 0.0241, "step": 8061 }, { "epoch": 2.08, "learning_rate": 2.3584906590850758e-05, "loss": 0.0233, "step": 8062 }, { "epoch": 2.08, "learning_rate": 2.357283037104529e-05, "loss": 0.0277, "step": 8063 }, { "epoch": 2.08, "learning_rate": 2.3560756290069725e-05, "loss": 0.036, "step": 8064 }, { "epoch": 2.08, "learning_rate": 2.354868434890129e-05, "loss": 0.0214, "step": 8065 }, { "epoch": 2.08, "learning_rate": 2.353661454851696e-05, "loss": 0.0245, "step": 8066 }, { "epoch": 2.08, "learning_rate": 2.352454688989362e-05, "loss": 0.0245, "step": 8067 }, { "epoch": 2.08, "learning_rate": 2.3512481374007933e-05, "loss": 0.0244, "step": 8068 }, { "epoch": 2.08, "learning_rate": 2.3500418001836366e-05, "loss": 0.0183, "step": 8069 }, { "epoch": 2.08, "learning_rate": 2.3488356774355284e-05, "loss": 0.0273, "step": 8070 }, { "epoch": 2.08, "learning_rate": 2.347629769254083e-05, "loss": 0.0199, "step": 8071 }, { "epoch": 2.08, "learning_rate": 2.3464240757368944e-05, "loss": 0.0277, "step": 8072 }, { "epoch": 2.08, "learning_rate": 2.3452185969815483e-05, "loss": 0.026, "step": 8073 }, { "epoch": 2.08, "learning_rate": 2.344013333085603e-05, "loss": 0.0264, "step": 8074 }, { "epoch": 2.08, "learning_rate": 2.342808284146609e-05, "loss": 0.0286, "step": 8075 }, { "epoch": 2.08, "learning_rate": 2.3416034502620908e-05, "loss": 0.0254, "step": 8076 }, { "epoch": 2.08, "learning_rate": 2.3403988315295592e-05, "loss": 0.0266, "step": 8077 }, { "epoch": 2.08, "learning_rate": 2.3391944280465107e-05, "loss": 0.0286, "step": 8078 }, { "epoch": 2.08, "learning_rate": 2.337990239910417e-05, "loss": 0.0184, "step": 8079 }, { "epoch": 2.09, "learning_rate": 2.3367862672187406e-05, "loss": 0.0252, "step": 8080 }, { "epoch": 2.09, "learning_rate": 2.3355825100689215e-05, "loss": 0.0258, "step": 8081 }, { "epoch": 2.09, "learning_rate": 2.334378968558381e-05, "loss": 0.026, "step": 8082 }, { "epoch": 2.09, "learning_rate": 2.3331756427845285e-05, "loss": 0.0266, "step": 8083 }, { "epoch": 2.09, "learning_rate": 2.3319725328447516e-05, "loss": 0.0254, "step": 8084 }, { "epoch": 2.09, "learning_rate": 2.3307696388364196e-05, "loss": 0.0273, "step": 8085 }, { "epoch": 2.09, "learning_rate": 2.3295669608568903e-05, "loss": 0.0271, "step": 8086 }, { "epoch": 2.09, "learning_rate": 2.3283644990034954e-05, "loss": 0.023, "step": 8087 }, { "epoch": 2.09, "learning_rate": 2.327162253373558e-05, "loss": 0.0246, "step": 8088 }, { "epoch": 2.09, "learning_rate": 2.325960224064378e-05, "loss": 0.0315, "step": 8089 }, { "epoch": 2.09, "learning_rate": 2.3247584111732367e-05, "loss": 0.034, "step": 8090 }, { "epoch": 2.09, "learning_rate": 2.3235568147974048e-05, "loss": 0.0247, "step": 8091 }, { "epoch": 2.09, "learning_rate": 2.3223554350341263e-05, "loss": 0.0319, "step": 8092 }, { "epoch": 2.09, "learning_rate": 2.3211542719806377e-05, "loss": 0.0234, "step": 8093 }, { "epoch": 2.09, "learning_rate": 2.3199533257341495e-05, "loss": 0.0251, "step": 8094 }, { "epoch": 2.09, "learning_rate": 2.3187525963918567e-05, "loss": 0.0214, "step": 8095 }, { "epoch": 2.09, "learning_rate": 2.317552084050941e-05, "loss": 0.0306, "step": 8096 }, { "epoch": 2.09, "learning_rate": 2.316351788808562e-05, "loss": 0.0299, "step": 8097 }, { "epoch": 2.09, "learning_rate": 2.315151710761862e-05, "loss": 0.0282, "step": 8098 }, { "epoch": 2.09, "learning_rate": 2.3139518500079697e-05, "loss": 0.0197, "step": 8099 }, { "epoch": 2.09, "learning_rate": 2.31275220664399e-05, "loss": 0.0297, "step": 8100 }, { "epoch": 2.09, "learning_rate": 2.3115527807670177e-05, "loss": 0.0251, "step": 8101 }, { "epoch": 2.09, "learning_rate": 2.3103535724741233e-05, "loss": 0.02, "step": 8102 }, { "epoch": 2.09, "learning_rate": 2.3091545818623612e-05, "loss": 0.0336, "step": 8103 }, { "epoch": 2.09, "learning_rate": 2.3079558090287722e-05, "loss": 0.0252, "step": 8104 }, { "epoch": 2.09, "learning_rate": 2.3067572540703735e-05, "loss": 0.0307, "step": 8105 }, { "epoch": 2.09, "learning_rate": 2.3055589170841717e-05, "loss": 0.0292, "step": 8106 }, { "epoch": 2.09, "learning_rate": 2.3043607981671494e-05, "loss": 0.0329, "step": 8107 }, { "epoch": 2.09, "learning_rate": 2.303162897416272e-05, "loss": 0.0209, "step": 8108 }, { "epoch": 2.09, "learning_rate": 2.301965214928493e-05, "loss": 0.0247, "step": 8109 }, { "epoch": 2.09, "learning_rate": 2.3007677508007435e-05, "loss": 0.0306, "step": 8110 }, { "epoch": 2.09, "learning_rate": 2.299570505129935e-05, "loss": 0.0248, "step": 8111 }, { "epoch": 2.09, "learning_rate": 2.298373478012967e-05, "loss": 0.0269, "step": 8112 }, { "epoch": 2.09, "learning_rate": 2.2971766695467172e-05, "loss": 0.0244, "step": 8113 }, { "epoch": 2.09, "learning_rate": 2.295980079828049e-05, "loss": 0.0316, "step": 8114 }, { "epoch": 2.09, "learning_rate": 2.294783708953804e-05, "loss": 0.0231, "step": 8115 }, { "epoch": 2.09, "learning_rate": 2.2935875570208065e-05, "loss": 0.0226, "step": 8116 }, { "epoch": 2.09, "learning_rate": 2.2923916241258693e-05, "loss": 0.031, "step": 8117 }, { "epoch": 2.09, "learning_rate": 2.2911959103657776e-05, "loss": 0.0269, "step": 8118 }, { "epoch": 2.1, "learning_rate": 2.290000415837309e-05, "loss": 0.021, "step": 8119 }, { "epoch": 2.1, "learning_rate": 2.288805140637216e-05, "loss": 0.0261, "step": 8120 }, { "epoch": 2.1, "learning_rate": 2.2876100848622333e-05, "loss": 0.0293, "step": 8121 }, { "epoch": 2.1, "learning_rate": 2.2864152486090856e-05, "loss": 0.0274, "step": 8122 }, { "epoch": 2.1, "learning_rate": 2.2852206319744696e-05, "loss": 0.0287, "step": 8123 }, { "epoch": 2.1, "learning_rate": 2.284026235055074e-05, "loss": 0.0238, "step": 8124 }, { "epoch": 2.1, "learning_rate": 2.282832057947562e-05, "loss": 0.0333, "step": 8125 }, { "epoch": 2.1, "learning_rate": 2.2816381007485805e-05, "loss": 0.0216, "step": 8126 }, { "epoch": 2.1, "learning_rate": 2.280444363554764e-05, "loss": 0.0283, "step": 8127 }, { "epoch": 2.1, "learning_rate": 2.2792508464627225e-05, "loss": 0.026, "step": 8128 }, { "epoch": 2.1, "learning_rate": 2.2780575495690503e-05, "loss": 0.0265, "step": 8129 }, { "epoch": 2.1, "learning_rate": 2.2768644729703276e-05, "loss": 0.025, "step": 8130 }, { "epoch": 2.1, "learning_rate": 2.27567161676311e-05, "loss": 0.0285, "step": 8131 }, { "epoch": 2.1, "learning_rate": 2.2744789810439422e-05, "loss": 0.0247, "step": 8132 }, { "epoch": 2.1, "learning_rate": 2.273286565909346e-05, "loss": 0.0313, "step": 8133 }, { "epoch": 2.1, "learning_rate": 2.2720943714558257e-05, "loss": 0.031, "step": 8134 }, { "epoch": 2.1, "learning_rate": 2.2709023977798732e-05, "loss": 0.0278, "step": 8135 }, { "epoch": 2.1, "learning_rate": 2.2697106449779536e-05, "loss": 0.0272, "step": 8136 }, { "epoch": 2.1, "learning_rate": 2.268519113146524e-05, "loss": 0.0227, "step": 8137 }, { "epoch": 2.1, "learning_rate": 2.267327802382016e-05, "loss": 0.034, "step": 8138 }, { "epoch": 2.1, "learning_rate": 2.266136712780845e-05, "loss": 0.0318, "step": 8139 }, { "epoch": 2.1, "learning_rate": 2.264945844439411e-05, "loss": 0.0355, "step": 8140 }, { "epoch": 2.1, "learning_rate": 2.2637551974540955e-05, "loss": 0.0318, "step": 8141 }, { "epoch": 2.1, "learning_rate": 2.2625647719212573e-05, "loss": 0.0336, "step": 8142 }, { "epoch": 2.1, "learning_rate": 2.2613745679372455e-05, "loss": 0.0297, "step": 8143 }, { "epoch": 2.1, "learning_rate": 2.2601845855983826e-05, "loss": 0.0283, "step": 8144 }, { "epoch": 2.1, "learning_rate": 2.2589948250009825e-05, "loss": 0.0261, "step": 8145 }, { "epoch": 2.1, "learning_rate": 2.257805286241333e-05, "loss": 0.0407, "step": 8146 }, { "epoch": 2.1, "learning_rate": 2.256615969415705e-05, "loss": 0.0329, "step": 8147 }, { "epoch": 2.1, "learning_rate": 2.255426874620359e-05, "loss": 0.0284, "step": 8148 }, { "epoch": 2.1, "learning_rate": 2.2542380019515258e-05, "loss": 0.0366, "step": 8149 }, { "epoch": 2.1, "learning_rate": 2.2530493515054296e-05, "loss": 0.035, "step": 8150 }, { "epoch": 2.1, "learning_rate": 2.2518609233782695e-05, "loss": 0.0281, "step": 8151 }, { "epoch": 2.1, "learning_rate": 2.2506727176662267e-05, "loss": 0.0238, "step": 8152 }, { "epoch": 2.1, "learning_rate": 2.2494847344654696e-05, "loss": 0.0222, "step": 8153 }, { "epoch": 2.1, "learning_rate": 2.248296973872143e-05, "loss": 0.03, "step": 8154 }, { "epoch": 2.1, "learning_rate": 2.2471094359823753e-05, "loss": 0.0236, "step": 8155 }, { "epoch": 2.1, "learning_rate": 2.2459221208922805e-05, "loss": 0.0243, "step": 8156 }, { "epoch": 2.11, "learning_rate": 2.2447350286979473e-05, "loss": 0.0303, "step": 8157 }, { "epoch": 2.11, "learning_rate": 2.2435481594954553e-05, "loss": 0.0258, "step": 8158 }, { "epoch": 2.11, "learning_rate": 2.2423615133808594e-05, "loss": 0.0262, "step": 8159 }, { "epoch": 2.11, "learning_rate": 2.241175090450196e-05, "loss": 0.0214, "step": 8160 }, { "epoch": 2.11, "learning_rate": 2.2399888907994897e-05, "loss": 0.0259, "step": 8161 }, { "epoch": 2.11, "learning_rate": 2.2388029145247396e-05, "loss": 0.0251, "step": 8162 }, { "epoch": 2.11, "learning_rate": 2.2376171617219344e-05, "loss": 0.0289, "step": 8163 }, { "epoch": 2.11, "learning_rate": 2.2364316324870382e-05, "loss": 0.0289, "step": 8164 }, { "epoch": 2.11, "learning_rate": 2.2352463269159978e-05, "loss": 0.0226, "step": 8165 }, { "epoch": 2.11, "learning_rate": 2.234061245104747e-05, "loss": 0.0304, "step": 8166 }, { "epoch": 2.11, "learning_rate": 2.2328763871491963e-05, "loss": 0.0244, "step": 8167 }, { "epoch": 2.11, "learning_rate": 2.2316917531452387e-05, "loss": 0.0293, "step": 8168 }, { "epoch": 2.11, "learning_rate": 2.230507343188753e-05, "loss": 0.0245, "step": 8169 }, { "epoch": 2.11, "learning_rate": 2.2293231573755934e-05, "loss": 0.0276, "step": 8170 }, { "epoch": 2.11, "learning_rate": 2.2281391958016034e-05, "loss": 0.0256, "step": 8171 }, { "epoch": 2.11, "learning_rate": 2.226955458562603e-05, "loss": 0.0252, "step": 8172 }, { "epoch": 2.11, "learning_rate": 2.225771945754393e-05, "loss": 0.033, "step": 8173 }, { "epoch": 2.11, "learning_rate": 2.224588657472763e-05, "loss": 0.0278, "step": 8174 }, { "epoch": 2.11, "learning_rate": 2.2234055938134768e-05, "loss": 0.0254, "step": 8175 }, { "epoch": 2.11, "learning_rate": 2.2222227548722857e-05, "loss": 0.027, "step": 8176 }, { "epoch": 2.11, "learning_rate": 2.2210401407449193e-05, "loss": 0.0185, "step": 8177 }, { "epoch": 2.11, "learning_rate": 2.2198577515270886e-05, "loss": 0.0228, "step": 8178 }, { "epoch": 2.11, "learning_rate": 2.218675587314491e-05, "loss": 0.0232, "step": 8179 }, { "epoch": 2.11, "learning_rate": 2.2174936482028007e-05, "loss": 0.0267, "step": 8180 }, { "epoch": 2.11, "learning_rate": 2.216311934287674e-05, "loss": 0.024, "step": 8181 }, { "epoch": 2.11, "learning_rate": 2.2151304456647544e-05, "loss": 0.0211, "step": 8182 }, { "epoch": 2.11, "learning_rate": 2.2139491824296593e-05, "loss": 0.0248, "step": 8183 }, { "epoch": 2.11, "learning_rate": 2.212768144677996e-05, "loss": 0.0235, "step": 8184 }, { "epoch": 2.11, "learning_rate": 2.2115873325053466e-05, "loss": 0.0314, "step": 8185 }, { "epoch": 2.11, "learning_rate": 2.2104067460072763e-05, "loss": 0.0223, "step": 8186 }, { "epoch": 2.11, "learning_rate": 2.2092263852793382e-05, "loss": 0.0255, "step": 8187 }, { "epoch": 2.11, "learning_rate": 2.2080462504170574e-05, "loss": 0.0313, "step": 8188 }, { "epoch": 2.11, "learning_rate": 2.20686634151595e-05, "loss": 0.0184, "step": 8189 }, { "epoch": 2.11, "learning_rate": 2.2056866586715087e-05, "loss": 0.0287, "step": 8190 }, { "epoch": 2.11, "learning_rate": 2.204507201979205e-05, "loss": 0.0279, "step": 8191 }, { "epoch": 2.11, "learning_rate": 2.2033279715345006e-05, "loss": 0.0239, "step": 8192 }, { "epoch": 2.11, "learning_rate": 2.2021489674328322e-05, "loss": 0.0235, "step": 8193 }, { "epoch": 2.11, "learning_rate": 2.2009701897696183e-05, "loss": 0.0228, "step": 8194 }, { "epoch": 2.11, "learning_rate": 2.1997916386402646e-05, "loss": 0.0248, "step": 8195 }, { "epoch": 2.12, "learning_rate": 2.198613314140151e-05, "loss": 0.0219, "step": 8196 }, { "epoch": 2.12, "learning_rate": 2.1974352163646467e-05, "loss": 0.0259, "step": 8197 }, { "epoch": 2.12, "learning_rate": 2.1962573454090966e-05, "loss": 0.0207, "step": 8198 }, { "epoch": 2.12, "learning_rate": 2.1950797013688274e-05, "loss": 0.0286, "step": 8199 }, { "epoch": 2.12, "learning_rate": 2.1939022843391534e-05, "loss": 0.027, "step": 8200 }, { "epoch": 2.12, "learning_rate": 2.1927250944153626e-05, "loss": 0.0208, "step": 8201 }, { "epoch": 2.12, "learning_rate": 2.1915481316927323e-05, "loss": 0.0243, "step": 8202 }, { "epoch": 2.12, "learning_rate": 2.1903713962665156e-05, "loss": 0.0285, "step": 8203 }, { "epoch": 2.12, "learning_rate": 2.1891948882319473e-05, "loss": 0.025, "step": 8204 }, { "epoch": 2.12, "learning_rate": 2.1880186076842496e-05, "loss": 0.0219, "step": 8205 }, { "epoch": 2.12, "learning_rate": 2.1868425547186205e-05, "loss": 0.025, "step": 8206 }, { "epoch": 2.12, "learning_rate": 2.1856667294302397e-05, "loss": 0.0256, "step": 8207 }, { "epoch": 2.12, "learning_rate": 2.184491131914274e-05, "loss": 0.0275, "step": 8208 }, { "epoch": 2.12, "learning_rate": 2.183315762265864e-05, "loss": 0.0275, "step": 8209 }, { "epoch": 2.12, "learning_rate": 2.1821406205801404e-05, "loss": 0.0291, "step": 8210 }, { "epoch": 2.12, "learning_rate": 2.1809657069522077e-05, "loss": 0.0239, "step": 8211 }, { "epoch": 2.12, "learning_rate": 2.1797910214771544e-05, "loss": 0.0276, "step": 8212 }, { "epoch": 2.12, "learning_rate": 2.178616564250055e-05, "loss": 0.0276, "step": 8213 }, { "epoch": 2.12, "learning_rate": 2.177442335365958e-05, "loss": 0.0337, "step": 8214 }, { "epoch": 2.12, "learning_rate": 2.1762683349199003e-05, "loss": 0.022, "step": 8215 }, { "epoch": 2.12, "learning_rate": 2.1750945630068963e-05, "loss": 0.0277, "step": 8216 }, { "epoch": 2.12, "learning_rate": 2.1739210197219405e-05, "loss": 0.0249, "step": 8217 }, { "epoch": 2.12, "learning_rate": 2.1727477051600148e-05, "loss": 0.0263, "step": 8218 }, { "epoch": 2.12, "learning_rate": 2.171574619416078e-05, "loss": 0.0302, "step": 8219 }, { "epoch": 2.12, "learning_rate": 2.1704017625850694e-05, "loss": 0.0249, "step": 8220 }, { "epoch": 2.12, "learning_rate": 2.1692291347619148e-05, "loss": 0.0259, "step": 8221 }, { "epoch": 2.12, "learning_rate": 2.1680567360415155e-05, "loss": 0.0253, "step": 8222 }, { "epoch": 2.12, "learning_rate": 2.1668845665187602e-05, "loss": 0.0278, "step": 8223 }, { "epoch": 2.12, "learning_rate": 2.1657126262885153e-05, "loss": 0.0272, "step": 8224 }, { "epoch": 2.12, "learning_rate": 2.1645409154456264e-05, "loss": 0.0242, "step": 8225 }, { "epoch": 2.12, "learning_rate": 2.163369434084928e-05, "loss": 0.0272, "step": 8226 }, { "epoch": 2.12, "learning_rate": 2.1621981823012284e-05, "loss": 0.0329, "step": 8227 }, { "epoch": 2.12, "learning_rate": 2.161027160189323e-05, "loss": 0.027, "step": 8228 }, { "epoch": 2.12, "learning_rate": 2.1598563678439847e-05, "loss": 0.0293, "step": 8229 }, { "epoch": 2.12, "learning_rate": 2.158685805359968e-05, "loss": 0.0293, "step": 8230 }, { "epoch": 2.12, "learning_rate": 2.1575154728320128e-05, "loss": 0.0277, "step": 8231 }, { "epoch": 2.12, "learning_rate": 2.1563453703548364e-05, "loss": 0.026, "step": 8232 }, { "epoch": 2.12, "learning_rate": 2.1551754980231365e-05, "loss": 0.0227, "step": 8233 }, { "epoch": 2.12, "learning_rate": 2.154005855931598e-05, "loss": 0.0248, "step": 8234 }, { "epoch": 2.13, "learning_rate": 2.1528364441748804e-05, "loss": 0.03, "step": 8235 }, { "epoch": 2.13, "learning_rate": 2.151667262847631e-05, "loss": 0.0267, "step": 8236 }, { "epoch": 2.13, "learning_rate": 2.1504983120444733e-05, "loss": 0.0307, "step": 8237 }, { "epoch": 2.13, "learning_rate": 2.149329591860012e-05, "loss": 0.0282, "step": 8238 }, { "epoch": 2.13, "learning_rate": 2.1481611023888394e-05, "loss": 0.0286, "step": 8239 }, { "epoch": 2.13, "learning_rate": 2.1469928437255198e-05, "loss": 0.0269, "step": 8240 }, { "epoch": 2.13, "learning_rate": 2.1458248159646095e-05, "loss": 0.0264, "step": 8241 }, { "epoch": 2.13, "learning_rate": 2.144657019200637e-05, "loss": 0.0246, "step": 8242 }, { "epoch": 2.13, "learning_rate": 2.1434894535281147e-05, "loss": 0.0263, "step": 8243 }, { "epoch": 2.13, "learning_rate": 2.1423221190415405e-05, "loss": 0.0214, "step": 8244 }, { "epoch": 2.13, "learning_rate": 2.1411550158353887e-05, "loss": 0.0376, "step": 8245 }, { "epoch": 2.13, "learning_rate": 2.139988144004114e-05, "loss": 0.0293, "step": 8246 }, { "epoch": 2.13, "learning_rate": 2.1388215036421594e-05, "loss": 0.0295, "step": 8247 }, { "epoch": 2.13, "learning_rate": 2.13765509484394e-05, "loss": 0.0396, "step": 8248 }, { "epoch": 2.13, "learning_rate": 2.136488917703861e-05, "loss": 0.0306, "step": 8249 }, { "epoch": 2.13, "learning_rate": 2.1353229723163025e-05, "loss": 0.0326, "step": 8250 }, { "epoch": 2.13, "learning_rate": 2.1341572587756263e-05, "loss": 0.0285, "step": 8251 }, { "epoch": 2.13, "learning_rate": 2.1329917771761805e-05, "loss": 0.0264, "step": 8252 }, { "epoch": 2.13, "learning_rate": 2.1318265276122878e-05, "loss": 0.0218, "step": 8253 }, { "epoch": 2.13, "learning_rate": 2.130661510178259e-05, "loss": 0.0199, "step": 8254 }, { "epoch": 2.13, "learning_rate": 2.1294967249683796e-05, "loss": 0.0204, "step": 8255 }, { "epoch": 2.13, "learning_rate": 2.1283321720769182e-05, "loss": 0.0228, "step": 8256 }, { "epoch": 2.13, "learning_rate": 2.1271678515981295e-05, "loss": 0.0201, "step": 8257 }, { "epoch": 2.13, "learning_rate": 2.126003763626243e-05, "loss": 0.0219, "step": 8258 }, { "epoch": 2.13, "learning_rate": 2.1248399082554703e-05, "loss": 0.0272, "step": 8259 }, { "epoch": 2.13, "learning_rate": 2.1236762855800096e-05, "loss": 0.0268, "step": 8260 }, { "epoch": 2.13, "learning_rate": 2.1225128956940315e-05, "loss": 0.0224, "step": 8261 }, { "epoch": 2.13, "learning_rate": 2.1213497386916974e-05, "loss": 0.0247, "step": 8262 }, { "epoch": 2.13, "learning_rate": 2.1201868146671433e-05, "loss": 0.022, "step": 8263 }, { "epoch": 2.13, "learning_rate": 2.119024123714486e-05, "loss": 0.0308, "step": 8264 }, { "epoch": 2.13, "learning_rate": 2.1178616659278287e-05, "loss": 0.0202, "step": 8265 }, { "epoch": 2.13, "learning_rate": 2.11669944140125e-05, "loss": 0.0245, "step": 8266 }, { "epoch": 2.13, "learning_rate": 2.115537450228815e-05, "loss": 0.0255, "step": 8267 }, { "epoch": 2.13, "learning_rate": 2.1143756925045656e-05, "loss": 0.0275, "step": 8268 }, { "epoch": 2.13, "learning_rate": 2.113214168322524e-05, "loss": 0.0284, "step": 8269 }, { "epoch": 2.13, "learning_rate": 2.1120528777767006e-05, "loss": 0.0274, "step": 8270 }, { "epoch": 2.13, "learning_rate": 2.1108918209610778e-05, "loss": 0.0257, "step": 8271 }, { "epoch": 2.13, "learning_rate": 2.1097309979696267e-05, "loss": 0.0229, "step": 8272 }, { "epoch": 2.13, "learning_rate": 2.108570408896295e-05, "loss": 0.0327, "step": 8273 }, { "epoch": 2.14, "learning_rate": 2.1074100538350105e-05, "loss": 0.0198, "step": 8274 }, { "epoch": 2.14, "learning_rate": 2.106249932879688e-05, "loss": 0.0256, "step": 8275 }, { "epoch": 2.14, "learning_rate": 2.1050900461242175e-05, "loss": 0.0253, "step": 8276 }, { "epoch": 2.14, "learning_rate": 2.1039303936624705e-05, "loss": 0.0272, "step": 8277 }, { "epoch": 2.14, "learning_rate": 2.1027709755883047e-05, "loss": 0.0314, "step": 8278 }, { "epoch": 2.14, "learning_rate": 2.1016117919955515e-05, "loss": 0.0278, "step": 8279 }, { "epoch": 2.14, "learning_rate": 2.100452842978031e-05, "loss": 0.0229, "step": 8280 }, { "epoch": 2.14, "learning_rate": 2.0992941286295387e-05, "loss": 0.022, "step": 8281 }, { "epoch": 2.14, "learning_rate": 2.0981356490438502e-05, "loss": 0.0259, "step": 8282 }, { "epoch": 2.14, "learning_rate": 2.0969774043147295e-05, "loss": 0.0204, "step": 8283 }, { "epoch": 2.14, "learning_rate": 2.0958193945359127e-05, "loss": 0.0259, "step": 8284 }, { "epoch": 2.14, "learning_rate": 2.0946616198011242e-05, "loss": 0.0224, "step": 8285 }, { "epoch": 2.14, "learning_rate": 2.0935040802040652e-05, "loss": 0.0279, "step": 8286 }, { "epoch": 2.14, "learning_rate": 2.0923467758384156e-05, "loss": 0.0249, "step": 8287 }, { "epoch": 2.14, "learning_rate": 2.0911897067978447e-05, "loss": 0.0242, "step": 8288 }, { "epoch": 2.14, "learning_rate": 2.090032873175995e-05, "loss": 0.0229, "step": 8289 }, { "epoch": 2.14, "learning_rate": 2.088876275066491e-05, "loss": 0.0322, "step": 8290 }, { "epoch": 2.14, "learning_rate": 2.0877199125629428e-05, "loss": 0.0276, "step": 8291 }, { "epoch": 2.14, "learning_rate": 2.086563785758935e-05, "loss": 0.0279, "step": 8292 }, { "epoch": 2.14, "learning_rate": 2.08540789474804e-05, "loss": 0.0241, "step": 8293 }, { "epoch": 2.14, "learning_rate": 2.0842522396238058e-05, "loss": 0.0375, "step": 8294 }, { "epoch": 2.14, "learning_rate": 2.0830968204797608e-05, "loss": 0.0227, "step": 8295 }, { "epoch": 2.14, "learning_rate": 2.08194163740942e-05, "loss": 0.0242, "step": 8296 }, { "epoch": 2.14, "learning_rate": 2.0807866905062735e-05, "loss": 0.0283, "step": 8297 }, { "epoch": 2.14, "learning_rate": 2.079631979863797e-05, "loss": 0.0231, "step": 8298 }, { "epoch": 2.14, "learning_rate": 2.078477505575443e-05, "loss": 0.03, "step": 8299 }, { "epoch": 2.14, "learning_rate": 2.0773232677346455e-05, "loss": 0.0242, "step": 8300 }, { "epoch": 2.14, "learning_rate": 2.076169266434823e-05, "loss": 0.0277, "step": 8301 }, { "epoch": 2.14, "learning_rate": 2.0750155017693712e-05, "loss": 0.027, "step": 8302 }, { "epoch": 2.14, "learning_rate": 2.0738619738316662e-05, "loss": 0.0244, "step": 8303 }, { "epoch": 2.14, "learning_rate": 2.0727086827150687e-05, "loss": 0.0273, "step": 8304 }, { "epoch": 2.14, "learning_rate": 2.071555628512916e-05, "loss": 0.0266, "step": 8305 }, { "epoch": 2.14, "learning_rate": 2.070402811318531e-05, "loss": 0.0227, "step": 8306 }, { "epoch": 2.14, "learning_rate": 2.0692502312252127e-05, "loss": 0.0299, "step": 8307 }, { "epoch": 2.14, "learning_rate": 2.0680978883262414e-05, "loss": 0.0226, "step": 8308 }, { "epoch": 2.14, "learning_rate": 2.0669457827148837e-05, "loss": 0.0199, "step": 8309 }, { "epoch": 2.14, "learning_rate": 2.065793914484378e-05, "loss": 0.0258, "step": 8310 }, { "epoch": 2.14, "learning_rate": 2.0646422837279532e-05, "loss": 0.0256, "step": 8311 }, { "epoch": 2.15, "learning_rate": 2.0634908905388123e-05, "loss": 0.0281, "step": 8312 }, { "epoch": 2.15, "learning_rate": 2.0623397350101385e-05, "loss": 0.0239, "step": 8313 }, { "epoch": 2.15, "learning_rate": 2.061188817235103e-05, "loss": 0.0228, "step": 8314 }, { "epoch": 2.15, "learning_rate": 2.06003813730685e-05, "loss": 0.0313, "step": 8315 }, { "epoch": 2.15, "learning_rate": 2.0588876953185064e-05, "loss": 0.023, "step": 8316 }, { "epoch": 2.15, "learning_rate": 2.0577374913631847e-05, "loss": 0.0291, "step": 8317 }, { "epoch": 2.15, "learning_rate": 2.0565875255339706e-05, "loss": 0.026, "step": 8318 }, { "epoch": 2.15, "learning_rate": 2.055437797923937e-05, "loss": 0.0277, "step": 8319 }, { "epoch": 2.15, "learning_rate": 2.054288308626134e-05, "loss": 0.0212, "step": 8320 }, { "epoch": 2.15, "learning_rate": 2.0531390577335906e-05, "loss": 0.0235, "step": 8321 }, { "epoch": 2.15, "learning_rate": 2.0519900453393243e-05, "loss": 0.0273, "step": 8322 }, { "epoch": 2.15, "learning_rate": 2.0508412715363222e-05, "loss": 0.0312, "step": 8323 }, { "epoch": 2.15, "learning_rate": 2.049692736417564e-05, "loss": 0.027, "step": 8324 }, { "epoch": 2.15, "learning_rate": 2.048544440076e-05, "loss": 0.0259, "step": 8325 }, { "epoch": 2.15, "learning_rate": 2.047396382604565e-05, "loss": 0.0218, "step": 8326 }, { "epoch": 2.15, "learning_rate": 2.0462485640961783e-05, "loss": 0.0287, "step": 8327 }, { "epoch": 2.15, "learning_rate": 2.0451009846437313e-05, "loss": 0.0266, "step": 8328 }, { "epoch": 2.15, "learning_rate": 2.0439536443401065e-05, "loss": 0.0239, "step": 8329 }, { "epoch": 2.15, "learning_rate": 2.0428065432781584e-05, "loss": 0.0254, "step": 8330 }, { "epoch": 2.15, "learning_rate": 2.0416596815507244e-05, "loss": 0.0272, "step": 8331 }, { "epoch": 2.15, "learning_rate": 2.0405130592506266e-05, "loss": 0.0267, "step": 8332 }, { "epoch": 2.15, "learning_rate": 2.0393666764706608e-05, "loss": 0.0257, "step": 8333 }, { "epoch": 2.15, "learning_rate": 2.038220533303611e-05, "loss": 0.0312, "step": 8334 }, { "epoch": 2.15, "learning_rate": 2.0370746298422366e-05, "loss": 0.031, "step": 8335 }, { "epoch": 2.15, "learning_rate": 2.0359289661792764e-05, "loss": 0.0322, "step": 8336 }, { "epoch": 2.15, "learning_rate": 2.0347835424074564e-05, "loss": 0.0236, "step": 8337 }, { "epoch": 2.15, "learning_rate": 2.033638358619478e-05, "loss": 0.0294, "step": 8338 }, { "epoch": 2.15, "learning_rate": 2.032493414908021e-05, "loss": 0.0296, "step": 8339 }, { "epoch": 2.15, "learning_rate": 2.0313487113657543e-05, "loss": 0.0246, "step": 8340 }, { "epoch": 2.15, "learning_rate": 2.030204248085318e-05, "loss": 0.0289, "step": 8341 }, { "epoch": 2.15, "learning_rate": 2.02906002515934e-05, "loss": 0.0272, "step": 8342 }, { "epoch": 2.15, "learning_rate": 2.0279160426804243e-05, "loss": 0.0256, "step": 8343 }, { "epoch": 2.15, "learning_rate": 2.026772300741156e-05, "loss": 0.0257, "step": 8344 }, { "epoch": 2.15, "learning_rate": 2.0256287994341032e-05, "loss": 0.0275, "step": 8345 }, { "epoch": 2.15, "learning_rate": 2.024485538851811e-05, "loss": 0.0291, "step": 8346 }, { "epoch": 2.15, "learning_rate": 2.02334251908681e-05, "loss": 0.0269, "step": 8347 }, { "epoch": 2.15, "learning_rate": 2.0221997402316058e-05, "loss": 0.0297, "step": 8348 }, { "epoch": 2.15, "learning_rate": 2.0210572023786856e-05, "loss": 0.0349, "step": 8349 }, { "epoch": 2.15, "learning_rate": 2.019914905620522e-05, "loss": 0.0351, "step": 8350 }, { "epoch": 2.16, "learning_rate": 2.0187728500495607e-05, "loss": 0.0284, "step": 8351 }, { "epoch": 2.16, "learning_rate": 2.017631035758235e-05, "loss": 0.0184, "step": 8352 }, { "epoch": 2.16, "learning_rate": 2.0164894628389546e-05, "loss": 0.0238, "step": 8353 }, { "epoch": 2.16, "learning_rate": 2.0153481313841083e-05, "loss": 0.027, "step": 8354 }, { "epoch": 2.16, "learning_rate": 2.0142070414860704e-05, "loss": 0.0268, "step": 8355 }, { "epoch": 2.16, "learning_rate": 2.0130661932371907e-05, "loss": 0.0376, "step": 8356 }, { "epoch": 2.16, "learning_rate": 2.011925586729801e-05, "loss": 0.0212, "step": 8357 }, { "epoch": 2.16, "learning_rate": 2.0107852220562168e-05, "loss": 0.0309, "step": 8358 }, { "epoch": 2.16, "learning_rate": 2.0096450993087275e-05, "loss": 0.0321, "step": 8359 }, { "epoch": 2.16, "learning_rate": 2.0085052185796098e-05, "loss": 0.0235, "step": 8360 }, { "epoch": 2.16, "learning_rate": 2.0073655799611175e-05, "loss": 0.0252, "step": 8361 }, { "epoch": 2.16, "learning_rate": 2.0062261835454816e-05, "loss": 0.028, "step": 8362 }, { "epoch": 2.16, "learning_rate": 2.0050870294249206e-05, "loss": 0.024, "step": 8363 }, { "epoch": 2.16, "learning_rate": 2.0039481176916273e-05, "loss": 0.02, "step": 8364 }, { "epoch": 2.16, "learning_rate": 2.0028094484377796e-05, "loss": 0.0264, "step": 8365 }, { "epoch": 2.16, "learning_rate": 2.001671021755532e-05, "loss": 0.0262, "step": 8366 }, { "epoch": 2.16, "learning_rate": 2.0005328377370186e-05, "loss": 0.026, "step": 8367 }, { "epoch": 2.16, "learning_rate": 1.9993948964743605e-05, "loss": 0.0256, "step": 8368 }, { "epoch": 2.16, "learning_rate": 1.9982571980596505e-05, "loss": 0.0268, "step": 8369 }, { "epoch": 2.16, "learning_rate": 1.99711974258497e-05, "loss": 0.0311, "step": 8370 }, { "epoch": 2.16, "learning_rate": 1.9959825301423735e-05, "loss": 0.0277, "step": 8371 }, { "epoch": 2.16, "learning_rate": 1.9948455608238985e-05, "loss": 0.0253, "step": 8372 }, { "epoch": 2.16, "learning_rate": 1.993708834721567e-05, "loss": 0.0213, "step": 8373 }, { "epoch": 2.16, "learning_rate": 1.9925723519273736e-05, "loss": 0.0217, "step": 8374 }, { "epoch": 2.16, "learning_rate": 1.9914361125333016e-05, "loss": 0.0204, "step": 8375 }, { "epoch": 2.16, "learning_rate": 1.9903001166313068e-05, "loss": 0.0279, "step": 8376 }, { "epoch": 2.16, "learning_rate": 1.9891643643133284e-05, "loss": 0.0258, "step": 8377 }, { "epoch": 2.16, "learning_rate": 1.988028855671289e-05, "loss": 0.027, "step": 8378 }, { "epoch": 2.16, "learning_rate": 1.9868935907970877e-05, "loss": 0.0231, "step": 8379 }, { "epoch": 2.16, "learning_rate": 1.9857585697826027e-05, "loss": 0.0277, "step": 8380 }, { "epoch": 2.16, "learning_rate": 1.9846237927196977e-05, "loss": 0.024, "step": 8381 }, { "epoch": 2.16, "learning_rate": 1.983489259700211e-05, "loss": 0.0337, "step": 8382 }, { "epoch": 2.16, "learning_rate": 1.9823549708159668e-05, "loss": 0.0235, "step": 8383 }, { "epoch": 2.16, "learning_rate": 1.9812209261587645e-05, "loss": 0.0271, "step": 8384 }, { "epoch": 2.16, "learning_rate": 1.9800871258203845e-05, "loss": 0.0305, "step": 8385 }, { "epoch": 2.16, "learning_rate": 1.9789535698925916e-05, "loss": 0.029, "step": 8386 }, { "epoch": 2.16, "learning_rate": 1.9778202584671253e-05, "loss": 0.025, "step": 8387 }, { "epoch": 2.16, "learning_rate": 1.9766871916357098e-05, "loss": 0.0303, "step": 8388 }, { "epoch": 2.16, "learning_rate": 1.9755543694900475e-05, "loss": 0.0267, "step": 8389 }, { "epoch": 2.17, "learning_rate": 1.974421792121819e-05, "loss": 0.0268, "step": 8390 }, { "epoch": 2.17, "learning_rate": 1.9732894596226897e-05, "loss": 0.0227, "step": 8391 }, { "epoch": 2.17, "learning_rate": 1.9721573720842996e-05, "loss": 0.0314, "step": 8392 }, { "epoch": 2.17, "learning_rate": 1.971025529598276e-05, "loss": 0.0279, "step": 8393 }, { "epoch": 2.17, "learning_rate": 1.96989393225622e-05, "loss": 0.0306, "step": 8394 }, { "epoch": 2.17, "learning_rate": 1.9687625801497133e-05, "loss": 0.0218, "step": 8395 }, { "epoch": 2.17, "learning_rate": 1.9676314733703237e-05, "loss": 0.0226, "step": 8396 }, { "epoch": 2.17, "learning_rate": 1.966500612009593e-05, "loss": 0.028, "step": 8397 }, { "epoch": 2.17, "learning_rate": 1.965369996159043e-05, "loss": 0.0261, "step": 8398 }, { "epoch": 2.17, "learning_rate": 1.9642396259101825e-05, "loss": 0.0273, "step": 8399 }, { "epoch": 2.17, "learning_rate": 1.963109501354491e-05, "loss": 0.0245, "step": 8400 }, { "epoch": 2.17, "learning_rate": 1.961979622583437e-05, "loss": 0.0268, "step": 8401 }, { "epoch": 2.17, "learning_rate": 1.960849989688463e-05, "loss": 0.0259, "step": 8402 }, { "epoch": 2.17, "learning_rate": 1.9597206027609922e-05, "loss": 0.0252, "step": 8403 }, { "epoch": 2.17, "learning_rate": 1.9585914618924318e-05, "loss": 0.0202, "step": 8404 }, { "epoch": 2.17, "learning_rate": 1.9574625671741643e-05, "loss": 0.0277, "step": 8405 }, { "epoch": 2.17, "learning_rate": 1.956333918697557e-05, "loss": 0.0248, "step": 8406 }, { "epoch": 2.17, "learning_rate": 1.9552055165539534e-05, "loss": 0.0256, "step": 8407 }, { "epoch": 2.17, "learning_rate": 1.9540773608346775e-05, "loss": 0.0252, "step": 8408 }, { "epoch": 2.17, "learning_rate": 1.9529494516310366e-05, "loss": 0.0268, "step": 8409 }, { "epoch": 2.17, "learning_rate": 1.9518217890343126e-05, "loss": 0.0264, "step": 8410 }, { "epoch": 2.17, "learning_rate": 1.9506943731357746e-05, "loss": 0.031, "step": 8411 }, { "epoch": 2.17, "learning_rate": 1.9495672040266656e-05, "loss": 0.027, "step": 8412 }, { "epoch": 2.17, "learning_rate": 1.948440281798209e-05, "loss": 0.0312, "step": 8413 }, { "epoch": 2.17, "learning_rate": 1.9473136065416138e-05, "loss": 0.0227, "step": 8414 }, { "epoch": 2.17, "learning_rate": 1.9461871783480612e-05, "loss": 0.0242, "step": 8415 }, { "epoch": 2.17, "learning_rate": 1.945060997308721e-05, "loss": 0.0367, "step": 8416 }, { "epoch": 2.17, "learning_rate": 1.9439350635147346e-05, "loss": 0.0258, "step": 8417 }, { "epoch": 2.17, "learning_rate": 1.9428093770572275e-05, "loss": 0.0242, "step": 8418 }, { "epoch": 2.17, "learning_rate": 1.9416839380273074e-05, "loss": 0.0262, "step": 8419 }, { "epoch": 2.17, "learning_rate": 1.9405587465160578e-05, "loss": 0.0282, "step": 8420 }, { "epoch": 2.17, "learning_rate": 1.939433802614542e-05, "loss": 0.0226, "step": 8421 }, { "epoch": 2.17, "learning_rate": 1.9383091064138093e-05, "loss": 0.0241, "step": 8422 }, { "epoch": 2.17, "learning_rate": 1.9371846580048798e-05, "loss": 0.0283, "step": 8423 }, { "epoch": 2.17, "learning_rate": 1.9360604574787634e-05, "loss": 0.0313, "step": 8424 }, { "epoch": 2.17, "learning_rate": 1.9349365049264425e-05, "loss": 0.0262, "step": 8425 }, { "epoch": 2.17, "learning_rate": 1.93381280043888e-05, "loss": 0.0287, "step": 8426 }, { "epoch": 2.17, "learning_rate": 1.932689344107025e-05, "loss": 0.0269, "step": 8427 }, { "epoch": 2.17, "learning_rate": 1.9315661360217978e-05, "loss": 0.0284, "step": 8428 }, { "epoch": 2.18, "learning_rate": 1.9304431762741073e-05, "loss": 0.0265, "step": 8429 }, { "epoch": 2.18, "learning_rate": 1.929320464954836e-05, "loss": 0.0228, "step": 8430 }, { "epoch": 2.18, "learning_rate": 1.928198002154846e-05, "loss": 0.029, "step": 8431 }, { "epoch": 2.18, "learning_rate": 1.927075787964985e-05, "loss": 0.0299, "step": 8432 }, { "epoch": 2.18, "learning_rate": 1.925953822476075e-05, "loss": 0.0286, "step": 8433 }, { "epoch": 2.18, "learning_rate": 1.924832105778922e-05, "loss": 0.0239, "step": 8434 }, { "epoch": 2.18, "learning_rate": 1.923710637964309e-05, "loss": 0.029, "step": 8435 }, { "epoch": 2.18, "learning_rate": 1.9225894191229975e-05, "loss": 0.0208, "step": 8436 }, { "epoch": 2.18, "learning_rate": 1.9214684493457353e-05, "loss": 0.0321, "step": 8437 }, { "epoch": 2.18, "learning_rate": 1.9203477287232434e-05, "loss": 0.029, "step": 8438 }, { "epoch": 2.18, "learning_rate": 1.9192272573462233e-05, "loss": 0.0269, "step": 8439 }, { "epoch": 2.18, "learning_rate": 1.9181070353053625e-05, "loss": 0.0271, "step": 8440 }, { "epoch": 2.18, "learning_rate": 1.9169870626913193e-05, "loss": 0.0271, "step": 8441 }, { "epoch": 2.18, "learning_rate": 1.9158673395947407e-05, "loss": 0.0314, "step": 8442 }, { "epoch": 2.18, "learning_rate": 1.9147478661062475e-05, "loss": 0.0285, "step": 8443 }, { "epoch": 2.18, "learning_rate": 1.9136286423164396e-05, "loss": 0.0235, "step": 8444 }, { "epoch": 2.18, "learning_rate": 1.9125096683159028e-05, "loss": 0.0248, "step": 8445 }, { "epoch": 2.18, "learning_rate": 1.911390944195196e-05, "loss": 0.0321, "step": 8446 }, { "epoch": 2.18, "learning_rate": 1.9102724700448642e-05, "loss": 0.0271, "step": 8447 }, { "epoch": 2.18, "learning_rate": 1.909154245955427e-05, "loss": 0.0328, "step": 8448 }, { "epoch": 2.18, "learning_rate": 1.908036272017384e-05, "loss": 0.0337, "step": 8449 }, { "epoch": 2.18, "learning_rate": 1.9069185483212194e-05, "loss": 0.0272, "step": 8450 }, { "epoch": 2.18, "learning_rate": 1.9058010749573906e-05, "loss": 0.0226, "step": 8451 }, { "epoch": 2.18, "learning_rate": 1.9046838520163423e-05, "loss": 0.0288, "step": 8452 }, { "epoch": 2.18, "learning_rate": 1.9035668795884914e-05, "loss": 0.0246, "step": 8453 }, { "epoch": 2.18, "learning_rate": 1.9024501577642372e-05, "loss": 0.0224, "step": 8454 }, { "epoch": 2.18, "learning_rate": 1.9013336866339625e-05, "loss": 0.0262, "step": 8455 }, { "epoch": 2.18, "learning_rate": 1.9002174662880234e-05, "loss": 0.0209, "step": 8456 }, { "epoch": 2.18, "learning_rate": 1.899101496816762e-05, "loss": 0.0239, "step": 8457 }, { "epoch": 2.18, "learning_rate": 1.897985778310496e-05, "loss": 0.0243, "step": 8458 }, { "epoch": 2.18, "learning_rate": 1.896870310859521e-05, "loss": 0.0222, "step": 8459 }, { "epoch": 2.18, "learning_rate": 1.8957550945541198e-05, "loss": 0.0264, "step": 8460 }, { "epoch": 2.18, "learning_rate": 1.8946401294845472e-05, "loss": 0.0296, "step": 8461 }, { "epoch": 2.18, "learning_rate": 1.8935254157410403e-05, "loss": 0.0271, "step": 8462 }, { "epoch": 2.18, "learning_rate": 1.892410953413819e-05, "loss": 0.0222, "step": 8463 }, { "epoch": 2.18, "learning_rate": 1.891296742593076e-05, "loss": 0.0189, "step": 8464 }, { "epoch": 2.18, "learning_rate": 1.890182783368992e-05, "loss": 0.0239, "step": 8465 }, { "epoch": 2.18, "learning_rate": 1.8890690758317215e-05, "loss": 0.0309, "step": 8466 }, { "epoch": 2.19, "learning_rate": 1.887955620071398e-05, "loss": 0.0205, "step": 8467 }, { "epoch": 2.19, "learning_rate": 1.8868424161781402e-05, "loss": 0.0287, "step": 8468 }, { "epoch": 2.19, "learning_rate": 1.88572946424204e-05, "loss": 0.0271, "step": 8469 }, { "epoch": 2.19, "learning_rate": 1.8846167643531742e-05, "loss": 0.0204, "step": 8470 }, { "epoch": 2.19, "learning_rate": 1.883504316601597e-05, "loss": 0.0208, "step": 8471 }, { "epoch": 2.19, "learning_rate": 1.8823921210773387e-05, "loss": 0.0217, "step": 8472 }, { "epoch": 2.19, "learning_rate": 1.8812801778704175e-05, "loss": 0.0294, "step": 8473 }, { "epoch": 2.19, "learning_rate": 1.880168487070822e-05, "loss": 0.0168, "step": 8474 }, { "epoch": 2.19, "learning_rate": 1.879057048768528e-05, "loss": 0.0289, "step": 8475 }, { "epoch": 2.19, "learning_rate": 1.8779458630534867e-05, "loss": 0.0301, "step": 8476 }, { "epoch": 2.19, "learning_rate": 1.8768349300156267e-05, "loss": 0.0326, "step": 8477 }, { "epoch": 2.19, "learning_rate": 1.8757242497448642e-05, "loss": 0.0223, "step": 8478 }, { "epoch": 2.19, "learning_rate": 1.8746138223310862e-05, "loss": 0.0272, "step": 8479 }, { "epoch": 2.19, "learning_rate": 1.8735036478641626e-05, "loss": 0.0274, "step": 8480 }, { "epoch": 2.19, "learning_rate": 1.8723937264339464e-05, "loss": 0.026, "step": 8481 }, { "epoch": 2.19, "learning_rate": 1.8712840581302626e-05, "loss": 0.024, "step": 8482 }, { "epoch": 2.19, "learning_rate": 1.8701746430429245e-05, "loss": 0.0217, "step": 8483 }, { "epoch": 2.19, "learning_rate": 1.869065481261718e-05, "loss": 0.0248, "step": 8484 }, { "epoch": 2.19, "learning_rate": 1.86795657287641e-05, "loss": 0.0233, "step": 8485 }, { "epoch": 2.19, "learning_rate": 1.8668479179767495e-05, "loss": 0.024, "step": 8486 }, { "epoch": 2.19, "learning_rate": 1.8657395166524616e-05, "loss": 0.0297, "step": 8487 }, { "epoch": 2.19, "learning_rate": 1.8646313689932547e-05, "loss": 0.0233, "step": 8488 }, { "epoch": 2.19, "learning_rate": 1.8635234750888135e-05, "loss": 0.024, "step": 8489 }, { "epoch": 2.19, "learning_rate": 1.8624158350288018e-05, "loss": 0.0291, "step": 8490 }, { "epoch": 2.19, "learning_rate": 1.861308448902867e-05, "loss": 0.0265, "step": 8491 }, { "epoch": 2.19, "learning_rate": 1.8602013168006293e-05, "loss": 0.0231, "step": 8492 }, { "epoch": 2.19, "learning_rate": 1.859094438811697e-05, "loss": 0.0236, "step": 8493 }, { "epoch": 2.19, "learning_rate": 1.8579878150256504e-05, "loss": 0.0303, "step": 8494 }, { "epoch": 2.19, "learning_rate": 1.85688144553205e-05, "loss": 0.0242, "step": 8495 }, { "epoch": 2.19, "learning_rate": 1.855775330420441e-05, "loss": 0.0279, "step": 8496 }, { "epoch": 2.19, "learning_rate": 1.8546694697803425e-05, "loss": 0.0241, "step": 8497 }, { "epoch": 2.19, "learning_rate": 1.853563863701257e-05, "loss": 0.0278, "step": 8498 }, { "epoch": 2.19, "learning_rate": 1.8524585122726635e-05, "loss": 0.025, "step": 8499 }, { "epoch": 2.19, "learning_rate": 1.851353415584019e-05, "loss": 0.0264, "step": 8500 }, { "epoch": 2.19, "learning_rate": 1.8502485737247665e-05, "loss": 0.0237, "step": 8501 }, { "epoch": 2.19, "learning_rate": 1.849143986784323e-05, "loss": 0.0256, "step": 8502 }, { "epoch": 2.19, "learning_rate": 1.848039654852083e-05, "loss": 0.0328, "step": 8503 }, { "epoch": 2.19, "learning_rate": 1.846935578017427e-05, "loss": 0.0266, "step": 8504 }, { "epoch": 2.19, "learning_rate": 1.845831756369708e-05, "loss": 0.0245, "step": 8505 }, { "epoch": 2.2, "learning_rate": 1.844728189998266e-05, "loss": 0.0337, "step": 8506 }, { "epoch": 2.2, "learning_rate": 1.8436248789924125e-05, "loss": 0.0206, "step": 8507 }, { "epoch": 2.2, "learning_rate": 1.8425218234414417e-05, "loss": 0.0272, "step": 8508 }, { "epoch": 2.2, "learning_rate": 1.8414190234346295e-05, "loss": 0.0289, "step": 8509 }, { "epoch": 2.2, "learning_rate": 1.8403164790612254e-05, "loss": 0.0277, "step": 8510 }, { "epoch": 2.2, "learning_rate": 1.839214190410466e-05, "loss": 0.023, "step": 8511 }, { "epoch": 2.2, "learning_rate": 1.83811215757156e-05, "loss": 0.0249, "step": 8512 }, { "epoch": 2.2, "learning_rate": 1.837010380633697e-05, "loss": 0.0306, "step": 8513 }, { "epoch": 2.2, "learning_rate": 1.8359088596860508e-05, "loss": 0.0244, "step": 8514 }, { "epoch": 2.2, "learning_rate": 1.8348075948177683e-05, "loss": 0.0285, "step": 8515 }, { "epoch": 2.2, "learning_rate": 1.8337065861179768e-05, "loss": 0.024, "step": 8516 }, { "epoch": 2.2, "learning_rate": 1.8326058336757883e-05, "loss": 0.0328, "step": 8517 }, { "epoch": 2.2, "learning_rate": 1.831505337580286e-05, "loss": 0.0329, "step": 8518 }, { "epoch": 2.2, "learning_rate": 1.8304050979205394e-05, "loss": 0.0257, "step": 8519 }, { "epoch": 2.2, "learning_rate": 1.8293051147855932e-05, "loss": 0.0265, "step": 8520 }, { "epoch": 2.2, "learning_rate": 1.82820538826447e-05, "loss": 0.0244, "step": 8521 }, { "epoch": 2.2, "learning_rate": 1.827105918446178e-05, "loss": 0.0245, "step": 8522 }, { "epoch": 2.2, "learning_rate": 1.8260067054196967e-05, "loss": 0.0226, "step": 8523 }, { "epoch": 2.2, "learning_rate": 1.8249077492739924e-05, "loss": 0.0277, "step": 8524 }, { "epoch": 2.2, "learning_rate": 1.823809050098005e-05, "loss": 0.0246, "step": 8525 }, { "epoch": 2.2, "learning_rate": 1.8227106079806533e-05, "loss": 0.0238, "step": 8526 }, { "epoch": 2.2, "learning_rate": 1.8216124230108422e-05, "loss": 0.0316, "step": 8527 }, { "epoch": 2.2, "learning_rate": 1.8205144952774484e-05, "loss": 0.0267, "step": 8528 }, { "epoch": 2.2, "learning_rate": 1.819416824869329e-05, "loss": 0.0303, "step": 8529 }, { "epoch": 2.2, "learning_rate": 1.818319411875325e-05, "loss": 0.0318, "step": 8530 }, { "epoch": 2.2, "learning_rate": 1.8172222563842506e-05, "loss": 0.0264, "step": 8531 }, { "epoch": 2.2, "learning_rate": 1.8161253584849047e-05, "loss": 0.0276, "step": 8532 }, { "epoch": 2.2, "learning_rate": 1.8150287182660608e-05, "loss": 0.023, "step": 8533 }, { "epoch": 2.2, "learning_rate": 1.813932335816471e-05, "loss": 0.0302, "step": 8534 }, { "epoch": 2.2, "learning_rate": 1.8128362112248735e-05, "loss": 0.0285, "step": 8535 }, { "epoch": 2.2, "learning_rate": 1.8117403445799775e-05, "loss": 0.0311, "step": 8536 }, { "epoch": 2.2, "learning_rate": 1.810644735970477e-05, "loss": 0.0221, "step": 8537 }, { "epoch": 2.2, "learning_rate": 1.809549385485042e-05, "loss": 0.026, "step": 8538 }, { "epoch": 2.2, "learning_rate": 1.8084542932123205e-05, "loss": 0.0275, "step": 8539 }, { "epoch": 2.2, "learning_rate": 1.807359459240945e-05, "loss": 0.021, "step": 8540 }, { "epoch": 2.2, "learning_rate": 1.8062648836595218e-05, "loss": 0.0388, "step": 8541 }, { "epoch": 2.2, "learning_rate": 1.8051705665566376e-05, "loss": 0.0345, "step": 8542 }, { "epoch": 2.2, "learning_rate": 1.8040765080208605e-05, "loss": 0.029, "step": 8543 }, { "epoch": 2.2, "learning_rate": 1.8029827081407336e-05, "loss": 0.0273, "step": 8544 }, { "epoch": 2.21, "learning_rate": 1.801889167004785e-05, "loss": 0.0376, "step": 8545 }, { "epoch": 2.21, "learning_rate": 1.8007958847015156e-05, "loss": 0.0291, "step": 8546 }, { "epoch": 2.21, "learning_rate": 1.7997028613194066e-05, "loss": 0.0335, "step": 8547 }, { "epoch": 2.21, "learning_rate": 1.798610096946924e-05, "loss": 0.0308, "step": 8548 }, { "epoch": 2.21, "learning_rate": 1.7975175916725035e-05, "loss": 0.0354, "step": 8549 }, { "epoch": 2.21, "learning_rate": 1.7964253455845697e-05, "loss": 0.0274, "step": 8550 }, { "epoch": 2.21, "learning_rate": 1.7953333587715186e-05, "loss": 0.0223, "step": 8551 }, { "epoch": 2.21, "learning_rate": 1.794241631321727e-05, "loss": 0.0229, "step": 8552 }, { "epoch": 2.21, "learning_rate": 1.7931501633235542e-05, "loss": 0.0279, "step": 8553 }, { "epoch": 2.21, "learning_rate": 1.7920589548653356e-05, "loss": 0.0294, "step": 8554 }, { "epoch": 2.21, "learning_rate": 1.790968006035383e-05, "loss": 0.0226, "step": 8555 }, { "epoch": 2.21, "learning_rate": 1.789877316921994e-05, "loss": 0.0214, "step": 8556 }, { "epoch": 2.21, "learning_rate": 1.7887868876134385e-05, "loss": 0.0269, "step": 8557 }, { "epoch": 2.21, "learning_rate": 1.7876967181979704e-05, "loss": 0.0186, "step": 8558 }, { "epoch": 2.21, "learning_rate": 1.7866068087638193e-05, "loss": 0.0232, "step": 8559 }, { "epoch": 2.21, "learning_rate": 1.7855171593991936e-05, "loss": 0.028, "step": 8560 }, { "epoch": 2.21, "learning_rate": 1.7844277701922846e-05, "loss": 0.024, "step": 8561 }, { "epoch": 2.21, "learning_rate": 1.783338641231257e-05, "loss": 0.0233, "step": 8562 }, { "epoch": 2.21, "learning_rate": 1.7822497726042605e-05, "loss": 0.0276, "step": 8563 }, { "epoch": 2.21, "learning_rate": 1.781161164399418e-05, "loss": 0.0234, "step": 8564 }, { "epoch": 2.21, "learning_rate": 1.780072816704833e-05, "loss": 0.0267, "step": 8565 }, { "epoch": 2.21, "learning_rate": 1.7789847296085922e-05, "loss": 0.0307, "step": 8566 }, { "epoch": 2.21, "learning_rate": 1.777896903198755e-05, "loss": 0.0277, "step": 8567 }, { "epoch": 2.21, "learning_rate": 1.7768093375633628e-05, "loss": 0.022, "step": 8568 }, { "epoch": 2.21, "learning_rate": 1.775722032790437e-05, "loss": 0.0267, "step": 8569 }, { "epoch": 2.21, "learning_rate": 1.7746349889679732e-05, "loss": 0.0282, "step": 8570 }, { "epoch": 2.21, "learning_rate": 1.773548206183953e-05, "loss": 0.0219, "step": 8571 }, { "epoch": 2.21, "learning_rate": 1.772461684526332e-05, "loss": 0.0275, "step": 8572 }, { "epoch": 2.21, "learning_rate": 1.7713754240830427e-05, "loss": 0.0188, "step": 8573 }, { "epoch": 2.21, "learning_rate": 1.7702894249420032e-05, "loss": 0.0263, "step": 8574 }, { "epoch": 2.21, "learning_rate": 1.7692036871911038e-05, "loss": 0.0229, "step": 8575 }, { "epoch": 2.21, "learning_rate": 1.768118210918219e-05, "loss": 0.0244, "step": 8576 }, { "epoch": 2.21, "learning_rate": 1.767032996211199e-05, "loss": 0.0229, "step": 8577 }, { "epoch": 2.21, "learning_rate": 1.7659480431578707e-05, "loss": 0.0245, "step": 8578 }, { "epoch": 2.21, "learning_rate": 1.7648633518460467e-05, "loss": 0.0316, "step": 8579 }, { "epoch": 2.21, "learning_rate": 1.7637789223635108e-05, "loss": 0.0265, "step": 8580 }, { "epoch": 2.21, "learning_rate": 1.7626947547980326e-05, "loss": 0.0202, "step": 8581 }, { "epoch": 2.21, "learning_rate": 1.7616108492373545e-05, "loss": 0.0225, "step": 8582 }, { "epoch": 2.21, "learning_rate": 1.7605272057692003e-05, "loss": 0.0242, "step": 8583 }, { "epoch": 2.22, "learning_rate": 1.7594438244812738e-05, "loss": 0.0267, "step": 8584 }, { "epoch": 2.22, "learning_rate": 1.7583607054612556e-05, "loss": 0.0304, "step": 8585 }, { "epoch": 2.22, "learning_rate": 1.7572778487968046e-05, "loss": 0.0287, "step": 8586 }, { "epoch": 2.22, "learning_rate": 1.756195254575562e-05, "loss": 0.0222, "step": 8587 }, { "epoch": 2.22, "learning_rate": 1.7551129228851427e-05, "loss": 0.018, "step": 8588 }, { "epoch": 2.22, "learning_rate": 1.754030853813146e-05, "loss": 0.0301, "step": 8589 }, { "epoch": 2.22, "learning_rate": 1.7529490474471456e-05, "loss": 0.0278, "step": 8590 }, { "epoch": 2.22, "learning_rate": 1.7518675038746933e-05, "loss": 0.0195, "step": 8591 }, { "epoch": 2.22, "learning_rate": 1.750786223183326e-05, "loss": 0.023, "step": 8592 }, { "epoch": 2.22, "learning_rate": 1.74970520546055e-05, "loss": 0.0308, "step": 8593 }, { "epoch": 2.22, "learning_rate": 1.7486244507938603e-05, "loss": 0.0235, "step": 8594 }, { "epoch": 2.22, "learning_rate": 1.7475439592707228e-05, "loss": 0.0222, "step": 8595 }, { "epoch": 2.22, "learning_rate": 1.7464637309785838e-05, "loss": 0.0244, "step": 8596 }, { "epoch": 2.22, "learning_rate": 1.7453837660048723e-05, "loss": 0.0245, "step": 8597 }, { "epoch": 2.22, "learning_rate": 1.744304064436991e-05, "loss": 0.0241, "step": 8598 }, { "epoch": 2.22, "learning_rate": 1.7432246263623238e-05, "loss": 0.0255, "step": 8599 }, { "epoch": 2.22, "learning_rate": 1.742145451868234e-05, "loss": 0.0336, "step": 8600 }, { "epoch": 2.22, "learning_rate": 1.7410665410420596e-05, "loss": 0.0242, "step": 8601 }, { "epoch": 2.22, "learning_rate": 1.7399878939711246e-05, "loss": 0.0356, "step": 8602 }, { "epoch": 2.22, "learning_rate": 1.738909510742724e-05, "loss": 0.0238, "step": 8603 }, { "epoch": 2.22, "learning_rate": 1.7378313914441328e-05, "loss": 0.0278, "step": 8604 }, { "epoch": 2.22, "learning_rate": 1.7367535361626112e-05, "loss": 0.0287, "step": 8605 }, { "epoch": 2.22, "learning_rate": 1.7356759449853893e-05, "loss": 0.0238, "step": 8606 }, { "epoch": 2.22, "learning_rate": 1.7345986179996827e-05, "loss": 0.0262, "step": 8607 }, { "epoch": 2.22, "learning_rate": 1.7335215552926815e-05, "loss": 0.0276, "step": 8608 }, { "epoch": 2.22, "learning_rate": 1.7324447569515533e-05, "loss": 0.0256, "step": 8609 }, { "epoch": 2.22, "learning_rate": 1.731368223063451e-05, "loss": 0.0262, "step": 8610 }, { "epoch": 2.22, "learning_rate": 1.730291953715499e-05, "loss": 0.0299, "step": 8611 }, { "epoch": 2.22, "learning_rate": 1.7292159489948012e-05, "loss": 0.0198, "step": 8612 }, { "epoch": 2.22, "learning_rate": 1.7281402089884462e-05, "loss": 0.0209, "step": 8613 }, { "epoch": 2.22, "learning_rate": 1.7270647337834933e-05, "loss": 0.0294, "step": 8614 }, { "epoch": 2.22, "learning_rate": 1.7259895234669872e-05, "loss": 0.028, "step": 8615 }, { "epoch": 2.22, "learning_rate": 1.7249145781259457e-05, "loss": 0.029, "step": 8616 }, { "epoch": 2.22, "learning_rate": 1.7238398978473662e-05, "loss": 0.026, "step": 8617 }, { "epoch": 2.22, "learning_rate": 1.7227654827182288e-05, "loss": 0.0311, "step": 8618 }, { "epoch": 2.22, "learning_rate": 1.7216913328254862e-05, "loss": 0.0297, "step": 8619 }, { "epoch": 2.22, "learning_rate": 1.7206174482560755e-05, "loss": 0.0286, "step": 8620 }, { "epoch": 2.22, "learning_rate": 1.719543829096908e-05, "loss": 0.0291, "step": 8621 }, { "epoch": 2.23, "learning_rate": 1.7184704754348728e-05, "loss": 0.0318, "step": 8622 }, { "epoch": 2.23, "learning_rate": 1.7173973873568438e-05, "loss": 0.0258, "step": 8623 }, { "epoch": 2.23, "learning_rate": 1.7163245649496666e-05, "loss": 0.0256, "step": 8624 }, { "epoch": 2.23, "learning_rate": 1.7152520083001674e-05, "loss": 0.0271, "step": 8625 }, { "epoch": 2.23, "learning_rate": 1.7141797174951536e-05, "loss": 0.0301, "step": 8626 }, { "epoch": 2.23, "learning_rate": 1.713107692621406e-05, "loss": 0.0332, "step": 8627 }, { "epoch": 2.23, "learning_rate": 1.71203593376569e-05, "loss": 0.0287, "step": 8628 }, { "epoch": 2.23, "learning_rate": 1.7109644410147448e-05, "loss": 0.0344, "step": 8629 }, { "epoch": 2.23, "learning_rate": 1.7098932144552882e-05, "loss": 0.0304, "step": 8630 }, { "epoch": 2.23, "learning_rate": 1.70882225417402e-05, "loss": 0.0313, "step": 8631 }, { "epoch": 2.23, "learning_rate": 1.707751560257614e-05, "loss": 0.0311, "step": 8632 }, { "epoch": 2.23, "learning_rate": 1.7066811327927275e-05, "loss": 0.0341, "step": 8633 }, { "epoch": 2.23, "learning_rate": 1.705610971865992e-05, "loss": 0.0264, "step": 8634 }, { "epoch": 2.23, "learning_rate": 1.704541077564017e-05, "loss": 0.03, "step": 8635 }, { "epoch": 2.23, "learning_rate": 1.703471449973395e-05, "loss": 0.0254, "step": 8636 }, { "epoch": 2.23, "learning_rate": 1.7024020891806936e-05, "loss": 0.0308, "step": 8637 }, { "epoch": 2.23, "learning_rate": 1.7013329952724564e-05, "loss": 0.0274, "step": 8638 }, { "epoch": 2.23, "learning_rate": 1.7002641683352132e-05, "loss": 0.0236, "step": 8639 }, { "epoch": 2.23, "learning_rate": 1.6991956084554633e-05, "loss": 0.0336, "step": 8640 }, { "epoch": 2.23, "learning_rate": 1.6981273157196913e-05, "loss": 0.0326, "step": 8641 }, { "epoch": 2.23, "learning_rate": 1.697059290214356e-05, "loss": 0.0312, "step": 8642 }, { "epoch": 2.23, "learning_rate": 1.695991532025895e-05, "loss": 0.0277, "step": 8643 }, { "epoch": 2.23, "learning_rate": 1.6949240412407274e-05, "loss": 0.0225, "step": 8644 }, { "epoch": 2.23, "learning_rate": 1.6938568179452458e-05, "loss": 0.0198, "step": 8645 }, { "epoch": 2.23, "learning_rate": 1.6927898622258266e-05, "loss": 0.0269, "step": 8646 }, { "epoch": 2.23, "learning_rate": 1.6917231741688204e-05, "loss": 0.0333, "step": 8647 }, { "epoch": 2.23, "learning_rate": 1.6906567538605556e-05, "loss": 0.0396, "step": 8648 }, { "epoch": 2.23, "learning_rate": 1.6895906013873443e-05, "loss": 0.0328, "step": 8649 }, { "epoch": 2.23, "learning_rate": 1.6885247168354718e-05, "loss": 0.0295, "step": 8650 }, { "epoch": 2.23, "learning_rate": 1.687459100291201e-05, "loss": 0.0321, "step": 8651 }, { "epoch": 2.23, "learning_rate": 1.6863937518407797e-05, "loss": 0.017, "step": 8652 }, { "epoch": 2.23, "learning_rate": 1.6853286715704258e-05, "loss": 0.0301, "step": 8653 }, { "epoch": 2.23, "learning_rate": 1.684263859566343e-05, "loss": 0.0282, "step": 8654 }, { "epoch": 2.23, "learning_rate": 1.683199315914708e-05, "loss": 0.0252, "step": 8655 }, { "epoch": 2.23, "learning_rate": 1.682135040701676e-05, "loss": 0.0246, "step": 8656 }, { "epoch": 2.23, "learning_rate": 1.681071034013385e-05, "loss": 0.0219, "step": 8657 }, { "epoch": 2.23, "learning_rate": 1.6800072959359452e-05, "loss": 0.0277, "step": 8658 }, { "epoch": 2.23, "learning_rate": 1.6789438265554515e-05, "loss": 0.0228, "step": 8659 }, { "epoch": 2.23, "learning_rate": 1.677880625957971e-05, "loss": 0.0341, "step": 8660 }, { "epoch": 2.24, "learning_rate": 1.6768176942295515e-05, "loss": 0.0206, "step": 8661 }, { "epoch": 2.24, "learning_rate": 1.6757550314562216e-05, "loss": 0.0312, "step": 8662 }, { "epoch": 2.24, "learning_rate": 1.6746926377239847e-05, "loss": 0.0307, "step": 8663 }, { "epoch": 2.24, "learning_rate": 1.6736305131188214e-05, "loss": 0.028, "step": 8664 }, { "epoch": 2.24, "learning_rate": 1.6725686577266963e-05, "loss": 0.0251, "step": 8665 }, { "epoch": 2.24, "learning_rate": 1.6715070716335445e-05, "loss": 0.0284, "step": 8666 }, { "epoch": 2.24, "learning_rate": 1.6704457549252878e-05, "loss": 0.0296, "step": 8667 }, { "epoch": 2.24, "learning_rate": 1.6693847076878196e-05, "loss": 0.0202, "step": 8668 }, { "epoch": 2.24, "learning_rate": 1.6683239300070115e-05, "loss": 0.0275, "step": 8669 }, { "epoch": 2.24, "learning_rate": 1.667263421968719e-05, "loss": 0.0342, "step": 8670 }, { "epoch": 2.24, "learning_rate": 1.6662031836587688e-05, "loss": 0.024, "step": 8671 }, { "epoch": 2.24, "learning_rate": 1.6651432151629725e-05, "loss": 0.0248, "step": 8672 }, { "epoch": 2.24, "learning_rate": 1.6640835165671155e-05, "loss": 0.0245, "step": 8673 }, { "epoch": 2.24, "learning_rate": 1.66302408795696e-05, "loss": 0.0283, "step": 8674 }, { "epoch": 2.24, "learning_rate": 1.661964929418252e-05, "loss": 0.0252, "step": 8675 }, { "epoch": 2.24, "learning_rate": 1.660906041036711e-05, "loss": 0.0297, "step": 8676 }, { "epoch": 2.24, "learning_rate": 1.6598474228980344e-05, "loss": 0.0171, "step": 8677 }, { "epoch": 2.24, "learning_rate": 1.6587890750879028e-05, "loss": 0.0255, "step": 8678 }, { "epoch": 2.24, "learning_rate": 1.6577309976919676e-05, "loss": 0.0258, "step": 8679 }, { "epoch": 2.24, "learning_rate": 1.6566731907958655e-05, "loss": 0.0267, "step": 8680 }, { "epoch": 2.24, "learning_rate": 1.655615654485207e-05, "loss": 0.0258, "step": 8681 }, { "epoch": 2.24, "learning_rate": 1.6545583888455796e-05, "loss": 0.0303, "step": 8682 }, { "epoch": 2.24, "learning_rate": 1.6535013939625543e-05, "loss": 0.0192, "step": 8683 }, { "epoch": 2.24, "learning_rate": 1.6524446699216733e-05, "loss": 0.0241, "step": 8684 }, { "epoch": 2.24, "learning_rate": 1.6513882168084644e-05, "loss": 0.0271, "step": 8685 }, { "epoch": 2.24, "learning_rate": 1.6503320347084272e-05, "loss": 0.0293, "step": 8686 }, { "epoch": 2.24, "learning_rate": 1.6492761237070403e-05, "loss": 0.0264, "step": 8687 }, { "epoch": 2.24, "learning_rate": 1.6482204838897647e-05, "loss": 0.034, "step": 8688 }, { "epoch": 2.24, "learning_rate": 1.647165115342036e-05, "loss": 0.0323, "step": 8689 }, { "epoch": 2.24, "learning_rate": 1.646110018149265e-05, "loss": 0.0353, "step": 8690 }, { "epoch": 2.24, "learning_rate": 1.6450551923968483e-05, "loss": 0.0242, "step": 8691 }, { "epoch": 2.24, "learning_rate": 1.6440006381701528e-05, "loss": 0.0254, "step": 8692 }, { "epoch": 2.24, "learning_rate": 1.6429463555545293e-05, "loss": 0.0222, "step": 8693 }, { "epoch": 2.24, "learning_rate": 1.641892344635303e-05, "loss": 0.0291, "step": 8694 }, { "epoch": 2.24, "learning_rate": 1.6408386054977763e-05, "loss": 0.0269, "step": 8695 }, { "epoch": 2.24, "learning_rate": 1.639785138227235e-05, "loss": 0.0294, "step": 8696 }, { "epoch": 2.24, "learning_rate": 1.6387319429089354e-05, "loss": 0.0264, "step": 8697 }, { "epoch": 2.24, "learning_rate": 1.63767901962812e-05, "loss": 0.0361, "step": 8698 }, { "epoch": 2.24, "learning_rate": 1.636626368470003e-05, "loss": 0.028, "step": 8699 }, { "epoch": 2.25, "learning_rate": 1.6355739895197765e-05, "loss": 0.0213, "step": 8700 }, { "epoch": 2.25, "learning_rate": 1.6345218828626162e-05, "loss": 0.0259, "step": 8701 }, { "epoch": 2.25, "learning_rate": 1.6334700485836714e-05, "loss": 0.0308, "step": 8702 }, { "epoch": 2.25, "learning_rate": 1.6324184867680675e-05, "loss": 0.025, "step": 8703 }, { "epoch": 2.25, "learning_rate": 1.6313671975009143e-05, "loss": 0.0267, "step": 8704 }, { "epoch": 2.25, "learning_rate": 1.630316180867292e-05, "loss": 0.026, "step": 8705 }, { "epoch": 2.25, "learning_rate": 1.6292654369522665e-05, "loss": 0.0271, "step": 8706 }, { "epoch": 2.25, "learning_rate": 1.6282149658408758e-05, "loss": 0.0232, "step": 8707 }, { "epoch": 2.25, "learning_rate": 1.6271647676181357e-05, "loss": 0.0242, "step": 8708 }, { "epoch": 2.25, "learning_rate": 1.6261148423690458e-05, "loss": 0.0286, "step": 8709 }, { "epoch": 2.25, "learning_rate": 1.6250651901785756e-05, "loss": 0.0274, "step": 8710 }, { "epoch": 2.25, "learning_rate": 1.6240158111316805e-05, "loss": 0.0225, "step": 8711 }, { "epoch": 2.25, "learning_rate": 1.6229667053132884e-05, "loss": 0.0203, "step": 8712 }, { "epoch": 2.25, "learning_rate": 1.621917872808304e-05, "loss": 0.0208, "step": 8713 }, { "epoch": 2.25, "learning_rate": 1.620869313701617e-05, "loss": 0.0238, "step": 8714 }, { "epoch": 2.25, "learning_rate": 1.6198210280780872e-05, "loss": 0.0282, "step": 8715 }, { "epoch": 2.25, "learning_rate": 1.6187730160225555e-05, "loss": 0.0291, "step": 8716 }, { "epoch": 2.25, "learning_rate": 1.6177252776198432e-05, "loss": 0.0251, "step": 8717 }, { "epoch": 2.25, "learning_rate": 1.6166778129547434e-05, "loss": 0.0287, "step": 8718 }, { "epoch": 2.25, "learning_rate": 1.615630622112034e-05, "loss": 0.0238, "step": 8719 }, { "epoch": 2.25, "learning_rate": 1.6145837051764655e-05, "loss": 0.0296, "step": 8720 }, { "epoch": 2.25, "learning_rate": 1.6135370622327665e-05, "loss": 0.028, "step": 8721 }, { "epoch": 2.25, "learning_rate": 1.6124906933656488e-05, "loss": 0.0266, "step": 8722 }, { "epoch": 2.25, "learning_rate": 1.6114445986597942e-05, "loss": 0.0325, "step": 8723 }, { "epoch": 2.25, "learning_rate": 1.6103987781998698e-05, "loss": 0.0283, "step": 8724 }, { "epoch": 2.25, "learning_rate": 1.609353232070515e-05, "loss": 0.0296, "step": 8725 }, { "epoch": 2.25, "learning_rate": 1.608307960356348e-05, "loss": 0.0263, "step": 8726 }, { "epoch": 2.25, "learning_rate": 1.6072629631419682e-05, "loss": 0.0231, "step": 8727 }, { "epoch": 2.25, "learning_rate": 1.6062182405119475e-05, "loss": 0.0247, "step": 8728 }, { "epoch": 2.25, "learning_rate": 1.605173792550842e-05, "loss": 0.027, "step": 8729 }, { "epoch": 2.25, "learning_rate": 1.6041296193431806e-05, "loss": 0.0225, "step": 8730 }, { "epoch": 2.25, "learning_rate": 1.6030857209734684e-05, "loss": 0.0228, "step": 8731 }, { "epoch": 2.25, "learning_rate": 1.6020420975261956e-05, "loss": 0.0226, "step": 8732 }, { "epoch": 2.25, "learning_rate": 1.6009987490858234e-05, "loss": 0.0278, "step": 8733 }, { "epoch": 2.25, "learning_rate": 1.5999556757367928e-05, "loss": 0.0268, "step": 8734 }, { "epoch": 2.25, "learning_rate": 1.5989128775635248e-05, "loss": 0.0288, "step": 8735 }, { "epoch": 2.25, "learning_rate": 1.597870354650413e-05, "loss": 0.0296, "step": 8736 }, { "epoch": 2.25, "learning_rate": 1.5968281070818364e-05, "loss": 0.0251, "step": 8737 }, { "epoch": 2.25, "learning_rate": 1.5957861349421438e-05, "loss": 0.0363, "step": 8738 }, { "epoch": 2.26, "learning_rate": 1.594744438315665e-05, "loss": 0.0287, "step": 8739 }, { "epoch": 2.26, "learning_rate": 1.59370301728671e-05, "loss": 0.0273, "step": 8740 }, { "epoch": 2.26, "learning_rate": 1.592661871939561e-05, "loss": 0.0305, "step": 8741 }, { "epoch": 2.26, "learning_rate": 1.591621002358485e-05, "loss": 0.03, "step": 8742 }, { "epoch": 2.26, "learning_rate": 1.5905804086277197e-05, "loss": 0.0287, "step": 8743 }, { "epoch": 2.26, "learning_rate": 1.589540090831483e-05, "loss": 0.0305, "step": 8744 }, { "epoch": 2.26, "learning_rate": 1.5885000490539736e-05, "loss": 0.0334, "step": 8745 }, { "epoch": 2.26, "learning_rate": 1.587460283379364e-05, "loss": 0.0264, "step": 8746 }, { "epoch": 2.26, "learning_rate": 1.5864207938918037e-05, "loss": 0.0303, "step": 8747 }, { "epoch": 2.26, "learning_rate": 1.5853815806754245e-05, "loss": 0.0351, "step": 8748 }, { "epoch": 2.26, "learning_rate": 1.5843426438143305e-05, "loss": 0.0304, "step": 8749 }, { "epoch": 2.26, "learning_rate": 1.5833039833926087e-05, "loss": 0.0393, "step": 8750 }, { "epoch": 2.26, "learning_rate": 1.5822655994943196e-05, "loss": 0.022, "step": 8751 }, { "epoch": 2.26, "learning_rate": 1.5812274922035014e-05, "loss": 0.0233, "step": 8752 }, { "epoch": 2.26, "learning_rate": 1.5801896616041744e-05, "loss": 0.0252, "step": 8753 }, { "epoch": 2.26, "learning_rate": 1.5791521077803293e-05, "loss": 0.0289, "step": 8754 }, { "epoch": 2.26, "learning_rate": 1.5781148308159428e-05, "loss": 0.0316, "step": 8755 }, { "epoch": 2.26, "learning_rate": 1.5770778307949617e-05, "loss": 0.0258, "step": 8756 }, { "epoch": 2.26, "learning_rate": 1.5760411078013136e-05, "loss": 0.0273, "step": 8757 }, { "epoch": 2.26, "learning_rate": 1.5750046619189056e-05, "loss": 0.0348, "step": 8758 }, { "epoch": 2.26, "learning_rate": 1.57396849323162e-05, "loss": 0.0188, "step": 8759 }, { "epoch": 2.26, "learning_rate": 1.5729326018233143e-05, "loss": 0.02, "step": 8760 }, { "epoch": 2.26, "learning_rate": 1.5718969877778293e-05, "loss": 0.0276, "step": 8761 }, { "epoch": 2.26, "learning_rate": 1.570861651178978e-05, "loss": 0.029, "step": 8762 }, { "epoch": 2.26, "learning_rate": 1.5698265921105564e-05, "loss": 0.0244, "step": 8763 }, { "epoch": 2.26, "learning_rate": 1.5687918106563326e-05, "loss": 0.0284, "step": 8764 }, { "epoch": 2.26, "learning_rate": 1.567757306900054e-05, "loss": 0.0195, "step": 8765 }, { "epoch": 2.26, "learning_rate": 1.5667230809254485e-05, "loss": 0.0246, "step": 8766 }, { "epoch": 2.26, "learning_rate": 1.5656891328162164e-05, "loss": 0.032, "step": 8767 }, { "epoch": 2.26, "learning_rate": 1.5646554626560413e-05, "loss": 0.0341, "step": 8768 }, { "epoch": 2.26, "learning_rate": 1.5636220705285788e-05, "loss": 0.025, "step": 8769 }, { "epoch": 2.26, "learning_rate": 1.562588956517464e-05, "loss": 0.0228, "step": 8770 }, { "epoch": 2.26, "learning_rate": 1.5615561207063123e-05, "loss": 0.0223, "step": 8771 }, { "epoch": 2.26, "learning_rate": 1.560523563178713e-05, "loss": 0.0268, "step": 8772 }, { "epoch": 2.26, "learning_rate": 1.5594912840182323e-05, "loss": 0.0199, "step": 8773 }, { "epoch": 2.26, "learning_rate": 1.558459283308419e-05, "loss": 0.0228, "step": 8774 }, { "epoch": 2.26, "learning_rate": 1.5574275611327926e-05, "loss": 0.0266, "step": 8775 }, { "epoch": 2.26, "learning_rate": 1.556396117574857e-05, "loss": 0.0324, "step": 8776 }, { "epoch": 2.27, "learning_rate": 1.555364952718088e-05, "loss": 0.0287, "step": 8777 }, { "epoch": 2.27, "learning_rate": 1.55433406664594e-05, "loss": 0.0264, "step": 8778 }, { "epoch": 2.27, "learning_rate": 1.5533034594418478e-05, "loss": 0.0259, "step": 8779 }, { "epoch": 2.27, "learning_rate": 1.5522731311892193e-05, "loss": 0.0293, "step": 8780 }, { "epoch": 2.27, "learning_rate": 1.551243081971444e-05, "loss": 0.0249, "step": 8781 }, { "epoch": 2.27, "learning_rate": 1.5502133118718866e-05, "loss": 0.0235, "step": 8782 }, { "epoch": 2.27, "learning_rate": 1.5491838209738873e-05, "loss": 0.0315, "step": 8783 }, { "epoch": 2.27, "learning_rate": 1.548154609360769e-05, "loss": 0.0317, "step": 8784 }, { "epoch": 2.27, "learning_rate": 1.5471256771158275e-05, "loss": 0.0238, "step": 8785 }, { "epoch": 2.27, "learning_rate": 1.546097024322336e-05, "loss": 0.0314, "step": 8786 }, { "epoch": 2.27, "learning_rate": 1.5450686510635492e-05, "loss": 0.0239, "step": 8787 }, { "epoch": 2.27, "learning_rate": 1.5440405574226934e-05, "loss": 0.0266, "step": 8788 }, { "epoch": 2.27, "learning_rate": 1.543012743482978e-05, "loss": 0.0237, "step": 8789 }, { "epoch": 2.27, "learning_rate": 1.5419852093275868e-05, "loss": 0.027, "step": 8790 }, { "epoch": 2.27, "learning_rate": 1.540957955039678e-05, "loss": 0.023, "step": 8791 }, { "epoch": 2.27, "learning_rate": 1.5399309807023944e-05, "loss": 0.0247, "step": 8792 }, { "epoch": 2.27, "learning_rate": 1.538904286398849e-05, "loss": 0.0246, "step": 8793 }, { "epoch": 2.27, "learning_rate": 1.5378778722121377e-05, "loss": 0.0281, "step": 8794 }, { "epoch": 2.27, "learning_rate": 1.5368517382253304e-05, "loss": 0.0243, "step": 8795 }, { "epoch": 2.27, "learning_rate": 1.535825884521474e-05, "loss": 0.0277, "step": 8796 }, { "epoch": 2.27, "learning_rate": 1.5348003111835963e-05, "loss": 0.0212, "step": 8797 }, { "epoch": 2.27, "learning_rate": 1.533775018294698e-05, "loss": 0.025, "step": 8798 }, { "epoch": 2.27, "learning_rate": 1.5327500059377587e-05, "loss": 0.0299, "step": 8799 }, { "epoch": 2.27, "learning_rate": 1.5317252741957384e-05, "loss": 0.0227, "step": 8800 }, { "epoch": 2.27, "learning_rate": 1.530700823151568e-05, "loss": 0.0235, "step": 8801 }, { "epoch": 2.27, "learning_rate": 1.5296766528881635e-05, "loss": 0.0292, "step": 8802 }, { "epoch": 2.27, "learning_rate": 1.5286527634884117e-05, "loss": 0.0234, "step": 8803 }, { "epoch": 2.27, "learning_rate": 1.527629155035178e-05, "loss": 0.0261, "step": 8804 }, { "epoch": 2.27, "learning_rate": 1.5266058276113094e-05, "loss": 0.0219, "step": 8805 }, { "epoch": 2.27, "learning_rate": 1.5255827812996227e-05, "loss": 0.0248, "step": 8806 }, { "epoch": 2.27, "learning_rate": 1.5245600161829199e-05, "loss": 0.0351, "step": 8807 }, { "epoch": 2.27, "learning_rate": 1.523537532343975e-05, "loss": 0.0303, "step": 8808 }, { "epoch": 2.27, "learning_rate": 1.5225153298655392e-05, "loss": 0.0205, "step": 8809 }, { "epoch": 2.27, "learning_rate": 1.5214934088303451e-05, "loss": 0.026, "step": 8810 }, { "epoch": 2.27, "learning_rate": 1.5204717693210985e-05, "loss": 0.0203, "step": 8811 }, { "epoch": 2.27, "learning_rate": 1.5194504114204821e-05, "loss": 0.023, "step": 8812 }, { "epoch": 2.27, "learning_rate": 1.5184293352111607e-05, "loss": 0.0219, "step": 8813 }, { "epoch": 2.27, "learning_rate": 1.5174085407757693e-05, "loss": 0.0283, "step": 8814 }, { "epoch": 2.27, "learning_rate": 1.5163880281969283e-05, "loss": 0.0259, "step": 8815 }, { "epoch": 2.28, "learning_rate": 1.5153677975572278e-05, "loss": 0.0294, "step": 8816 }, { "epoch": 2.28, "learning_rate": 1.5143478489392377e-05, "loss": 0.0266, "step": 8817 }, { "epoch": 2.28, "learning_rate": 1.513328182425508e-05, "loss": 0.0291, "step": 8818 }, { "epoch": 2.28, "learning_rate": 1.5123087980985606e-05, "loss": 0.0302, "step": 8819 }, { "epoch": 2.28, "learning_rate": 1.5112896960409001e-05, "loss": 0.0266, "step": 8820 }, { "epoch": 2.28, "learning_rate": 1.5102708763350043e-05, "loss": 0.0302, "step": 8821 }, { "epoch": 2.28, "learning_rate": 1.5092523390633267e-05, "loss": 0.0287, "step": 8822 }, { "epoch": 2.28, "learning_rate": 1.5082340843083054e-05, "loss": 0.0325, "step": 8823 }, { "epoch": 2.28, "learning_rate": 1.5072161121523481e-05, "loss": 0.0217, "step": 8824 }, { "epoch": 2.28, "learning_rate": 1.5061984226778408e-05, "loss": 0.0232, "step": 8825 }, { "epoch": 2.28, "learning_rate": 1.505181015967152e-05, "loss": 0.0288, "step": 8826 }, { "epoch": 2.28, "learning_rate": 1.5041638921026197e-05, "loss": 0.0265, "step": 8827 }, { "epoch": 2.28, "learning_rate": 1.5031470511665657e-05, "loss": 0.0324, "step": 8828 }, { "epoch": 2.28, "learning_rate": 1.5021304932412849e-05, "loss": 0.0274, "step": 8829 }, { "epoch": 2.28, "learning_rate": 1.5011142184090488e-05, "loss": 0.0257, "step": 8830 }, { "epoch": 2.28, "learning_rate": 1.5000982267521108e-05, "loss": 0.0298, "step": 8831 }, { "epoch": 2.28, "learning_rate": 1.4990825183526946e-05, "loss": 0.025, "step": 8832 }, { "epoch": 2.28, "learning_rate": 1.4980670932930079e-05, "loss": 0.0303, "step": 8833 }, { "epoch": 2.28, "learning_rate": 1.4970519516552306e-05, "loss": 0.0227, "step": 8834 }, { "epoch": 2.28, "learning_rate": 1.496037093521519e-05, "loss": 0.0295, "step": 8835 }, { "epoch": 2.28, "learning_rate": 1.4950225189740125e-05, "loss": 0.028, "step": 8836 }, { "epoch": 2.28, "learning_rate": 1.494008228094822e-05, "loss": 0.0252, "step": 8837 }, { "epoch": 2.28, "learning_rate": 1.4929942209660347e-05, "loss": 0.0291, "step": 8838 }, { "epoch": 2.28, "learning_rate": 1.4919804976697217e-05, "loss": 0.0302, "step": 8839 }, { "epoch": 2.28, "learning_rate": 1.4909670582879226e-05, "loss": 0.0296, "step": 8840 }, { "epoch": 2.28, "learning_rate": 1.489953902902661e-05, "loss": 0.037, "step": 8841 }, { "epoch": 2.28, "learning_rate": 1.488941031595934e-05, "loss": 0.0303, "step": 8842 }, { "epoch": 2.28, "learning_rate": 1.4879284444497139e-05, "loss": 0.0263, "step": 8843 }, { "epoch": 2.28, "learning_rate": 1.4869161415459554e-05, "loss": 0.0331, "step": 8844 }, { "epoch": 2.28, "learning_rate": 1.4859041229665848e-05, "loss": 0.0331, "step": 8845 }, { "epoch": 2.28, "learning_rate": 1.4848923887935112e-05, "loss": 0.032, "step": 8846 }, { "epoch": 2.28, "learning_rate": 1.4838809391086144e-05, "loss": 0.0263, "step": 8847 }, { "epoch": 2.28, "learning_rate": 1.4828697739937536e-05, "loss": 0.0307, "step": 8848 }, { "epoch": 2.28, "learning_rate": 1.4818588935307681e-05, "loss": 0.0314, "step": 8849 }, { "epoch": 2.28, "learning_rate": 1.4808482978014698e-05, "loss": 0.0283, "step": 8850 }, { "epoch": 2.28, "learning_rate": 1.4798379868876483e-05, "loss": 0.0327, "step": 8851 }, { "epoch": 2.28, "learning_rate": 1.4788279608710732e-05, "loss": 0.0402, "step": 8852 }, { "epoch": 2.28, "learning_rate": 1.4778182198334866e-05, "loss": 0.0203, "step": 8853 }, { "epoch": 2.28, "learning_rate": 1.4768087638566131e-05, "loss": 0.0245, "step": 8854 }, { "epoch": 2.29, "learning_rate": 1.4757995930221486e-05, "loss": 0.0287, "step": 8855 }, { "epoch": 2.29, "learning_rate": 1.4747907074117672e-05, "loss": 0.0301, "step": 8856 }, { "epoch": 2.29, "learning_rate": 1.4737821071071245e-05, "loss": 0.0257, "step": 8857 }, { "epoch": 2.29, "learning_rate": 1.472773792189846e-05, "loss": 0.0217, "step": 8858 }, { "epoch": 2.29, "learning_rate": 1.471765762741541e-05, "loss": 0.0191, "step": 8859 }, { "epoch": 2.29, "learning_rate": 1.4707580188437909e-05, "loss": 0.0263, "step": 8860 }, { "epoch": 2.29, "learning_rate": 1.4697505605781531e-05, "loss": 0.0193, "step": 8861 }, { "epoch": 2.29, "learning_rate": 1.468743388026168e-05, "loss": 0.025, "step": 8862 }, { "epoch": 2.29, "learning_rate": 1.4677365012693472e-05, "loss": 0.0208, "step": 8863 }, { "epoch": 2.29, "learning_rate": 1.4667299003891805e-05, "loss": 0.0295, "step": 8864 }, { "epoch": 2.29, "learning_rate": 1.4657235854671363e-05, "loss": 0.0239, "step": 8865 }, { "epoch": 2.29, "learning_rate": 1.4647175565846577e-05, "loss": 0.0281, "step": 8866 }, { "epoch": 2.29, "learning_rate": 1.463711813823167e-05, "loss": 0.0235, "step": 8867 }, { "epoch": 2.29, "learning_rate": 1.4627063572640615e-05, "loss": 0.037, "step": 8868 }, { "epoch": 2.29, "learning_rate": 1.4617011869887137e-05, "loss": 0.0258, "step": 8869 }, { "epoch": 2.29, "learning_rate": 1.4606963030784787e-05, "loss": 0.0183, "step": 8870 }, { "epoch": 2.29, "learning_rate": 1.4596917056146808e-05, "loss": 0.027, "step": 8871 }, { "epoch": 2.29, "learning_rate": 1.458687394678629e-05, "loss": 0.0311, "step": 8872 }, { "epoch": 2.29, "learning_rate": 1.4576833703516034e-05, "loss": 0.0279, "step": 8873 }, { "epoch": 2.29, "learning_rate": 1.4566796327148607e-05, "loss": 0.0295, "step": 8874 }, { "epoch": 2.29, "learning_rate": 1.45567618184964e-05, "loss": 0.0223, "step": 8875 }, { "epoch": 2.29, "learning_rate": 1.4546730178371504e-05, "loss": 0.0248, "step": 8876 }, { "epoch": 2.29, "learning_rate": 1.4536701407585839e-05, "loss": 0.0235, "step": 8877 }, { "epoch": 2.29, "learning_rate": 1.4526675506951055e-05, "loss": 0.0262, "step": 8878 }, { "epoch": 2.29, "learning_rate": 1.4516652477278552e-05, "loss": 0.0229, "step": 8879 }, { "epoch": 2.29, "learning_rate": 1.450663231937956e-05, "loss": 0.0345, "step": 8880 }, { "epoch": 2.29, "learning_rate": 1.449661503406502e-05, "loss": 0.0224, "step": 8881 }, { "epoch": 2.29, "learning_rate": 1.4486600622145652e-05, "loss": 0.0298, "step": 8882 }, { "epoch": 2.29, "learning_rate": 1.4476589084431984e-05, "loss": 0.0305, "step": 8883 }, { "epoch": 2.29, "learning_rate": 1.4466580421734243e-05, "loss": 0.0247, "step": 8884 }, { "epoch": 2.29, "learning_rate": 1.445657463486249e-05, "loss": 0.0249, "step": 8885 }, { "epoch": 2.29, "learning_rate": 1.4446571724626517e-05, "loss": 0.0234, "step": 8886 }, { "epoch": 2.29, "learning_rate": 1.4436571691835871e-05, "loss": 0.0275, "step": 8887 }, { "epoch": 2.29, "learning_rate": 1.4426574537299908e-05, "loss": 0.0297, "step": 8888 }, { "epoch": 2.29, "learning_rate": 1.44165802618277e-05, "loss": 0.0206, "step": 8889 }, { "epoch": 2.29, "learning_rate": 1.440658886622815e-05, "loss": 0.0286, "step": 8890 }, { "epoch": 2.29, "learning_rate": 1.439660035130987e-05, "loss": 0.0262, "step": 8891 }, { "epoch": 2.29, "learning_rate": 1.4386614717881247e-05, "loss": 0.0312, "step": 8892 }, { "epoch": 2.29, "learning_rate": 1.437663196675048e-05, "loss": 0.0334, "step": 8893 }, { "epoch": 2.3, "learning_rate": 1.4366652098725491e-05, "loss": 0.0285, "step": 8894 }, { "epoch": 2.3, "learning_rate": 1.4356675114613955e-05, "loss": 0.0288, "step": 8895 }, { "epoch": 2.3, "learning_rate": 1.434670101522338e-05, "loss": 0.0215, "step": 8896 }, { "epoch": 2.3, "learning_rate": 1.433672980136096e-05, "loss": 0.027, "step": 8897 }, { "epoch": 2.3, "learning_rate": 1.4326761473833734e-05, "loss": 0.0232, "step": 8898 }, { "epoch": 2.3, "learning_rate": 1.4316796033448449e-05, "loss": 0.0242, "step": 8899 }, { "epoch": 2.3, "learning_rate": 1.4306833481011627e-05, "loss": 0.022, "step": 8900 }, { "epoch": 2.3, "learning_rate": 1.4296873817329592e-05, "loss": 0.0298, "step": 8901 }, { "epoch": 2.3, "learning_rate": 1.428691704320838e-05, "loss": 0.0309, "step": 8902 }, { "epoch": 2.3, "learning_rate": 1.4276963159453855e-05, "loss": 0.0214, "step": 8903 }, { "epoch": 2.3, "learning_rate": 1.4267012166871596e-05, "loss": 0.0307, "step": 8904 }, { "epoch": 2.3, "learning_rate": 1.4257064066266956e-05, "loss": 0.023, "step": 8905 }, { "epoch": 2.3, "learning_rate": 1.4247118858445097e-05, "loss": 0.0248, "step": 8906 }, { "epoch": 2.3, "learning_rate": 1.423717654421089e-05, "loss": 0.0243, "step": 8907 }, { "epoch": 2.3, "learning_rate": 1.4227237124368987e-05, "loss": 0.0235, "step": 8908 }, { "epoch": 2.3, "learning_rate": 1.4217300599723848e-05, "loss": 0.0281, "step": 8909 }, { "epoch": 2.3, "learning_rate": 1.4207366971079627e-05, "loss": 0.0283, "step": 8910 }, { "epoch": 2.3, "learning_rate": 1.4197436239240325e-05, "loss": 0.031, "step": 8911 }, { "epoch": 2.3, "learning_rate": 1.4187508405009637e-05, "loss": 0.0238, "step": 8912 }, { "epoch": 2.3, "learning_rate": 1.4177583469191041e-05, "loss": 0.0261, "step": 8913 }, { "epoch": 2.3, "learning_rate": 1.416766143258783e-05, "loss": 0.0254, "step": 8914 }, { "epoch": 2.3, "learning_rate": 1.4157742296002985e-05, "loss": 0.0248, "step": 8915 }, { "epoch": 2.3, "learning_rate": 1.4147826060239328e-05, "loss": 0.0217, "step": 8916 }, { "epoch": 2.3, "learning_rate": 1.4137912726099389e-05, "loss": 0.0262, "step": 8917 }, { "epoch": 2.3, "learning_rate": 1.4128002294385472e-05, "loss": 0.0202, "step": 8918 }, { "epoch": 2.3, "learning_rate": 1.4118094765899686e-05, "loss": 0.0268, "step": 8919 }, { "epoch": 2.3, "learning_rate": 1.4108190141443866e-05, "loss": 0.025, "step": 8920 }, { "epoch": 2.3, "learning_rate": 1.4098288421819605e-05, "loss": 0.0285, "step": 8921 }, { "epoch": 2.3, "learning_rate": 1.4088389607828306e-05, "loss": 0.0238, "step": 8922 }, { "epoch": 2.3, "learning_rate": 1.4078493700271084e-05, "loss": 0.0189, "step": 8923 }, { "epoch": 2.3, "learning_rate": 1.4068600699948875e-05, "loss": 0.0276, "step": 8924 }, { "epoch": 2.3, "learning_rate": 1.4058710607662328e-05, "loss": 0.0268, "step": 8925 }, { "epoch": 2.3, "learning_rate": 1.4048823424211865e-05, "loss": 0.025, "step": 8926 }, { "epoch": 2.3, "learning_rate": 1.4038939150397724e-05, "loss": 0.0258, "step": 8927 }, { "epoch": 2.3, "learning_rate": 1.4029057787019828e-05, "loss": 0.023, "step": 8928 }, { "epoch": 2.3, "learning_rate": 1.4019179334877935e-05, "loss": 0.024, "step": 8929 }, { "epoch": 2.3, "learning_rate": 1.4009303794771533e-05, "loss": 0.0281, "step": 8930 }, { "epoch": 2.3, "learning_rate": 1.3999431167499854e-05, "loss": 0.0268, "step": 8931 }, { "epoch": 2.31, "learning_rate": 1.3989561453861955e-05, "loss": 0.0305, "step": 8932 }, { "epoch": 2.31, "learning_rate": 1.3979694654656606e-05, "loss": 0.0312, "step": 8933 }, { "epoch": 2.31, "learning_rate": 1.3969830770682341e-05, "loss": 0.0299, "step": 8934 }, { "epoch": 2.31, "learning_rate": 1.3959969802737499e-05, "loss": 0.0272, "step": 8935 }, { "epoch": 2.31, "learning_rate": 1.3950111751620131e-05, "loss": 0.0305, "step": 8936 }, { "epoch": 2.31, "learning_rate": 1.3940256618128116e-05, "loss": 0.035, "step": 8937 }, { "epoch": 2.31, "learning_rate": 1.3930404403059033e-05, "loss": 0.0329, "step": 8938 }, { "epoch": 2.31, "learning_rate": 1.3920555107210243e-05, "loss": 0.0358, "step": 8939 }, { "epoch": 2.31, "learning_rate": 1.3910708731378902e-05, "loss": 0.03, "step": 8940 }, { "epoch": 2.31, "learning_rate": 1.3900865276361886e-05, "loss": 0.0368, "step": 8941 }, { "epoch": 2.31, "learning_rate": 1.3891024742955877e-05, "loss": 0.0261, "step": 8942 }, { "epoch": 2.31, "learning_rate": 1.388118713195729e-05, "loss": 0.0302, "step": 8943 }, { "epoch": 2.31, "learning_rate": 1.3871352444162295e-05, "loss": 0.0274, "step": 8944 }, { "epoch": 2.31, "learning_rate": 1.386152068036687e-05, "loss": 0.0303, "step": 8945 }, { "epoch": 2.31, "learning_rate": 1.385169184136672e-05, "loss": 0.0279, "step": 8946 }, { "epoch": 2.31, "learning_rate": 1.3841865927957299e-05, "loss": 0.0296, "step": 8947 }, { "epoch": 2.31, "learning_rate": 1.3832042940933887e-05, "loss": 0.026, "step": 8948 }, { "epoch": 2.31, "learning_rate": 1.382222288109145e-05, "loss": 0.0359, "step": 8949 }, { "epoch": 2.31, "learning_rate": 1.3812405749224795e-05, "loss": 0.0353, "step": 8950 }, { "epoch": 2.31, "learning_rate": 1.3802591546128424e-05, "loss": 0.0277, "step": 8951 }, { "epoch": 2.31, "learning_rate": 1.3792780272596622e-05, "loss": 0.0161, "step": 8952 }, { "epoch": 2.31, "learning_rate": 1.3782971929423472e-05, "loss": 0.0261, "step": 8953 }, { "epoch": 2.31, "learning_rate": 1.3773166517402763e-05, "loss": 0.0229, "step": 8954 }, { "epoch": 2.31, "learning_rate": 1.3763364037328114e-05, "loss": 0.0234, "step": 8955 }, { "epoch": 2.31, "learning_rate": 1.3753564489992842e-05, "loss": 0.0266, "step": 8956 }, { "epoch": 2.31, "learning_rate": 1.3743767876190044e-05, "loss": 0.0252, "step": 8957 }, { "epoch": 2.31, "learning_rate": 1.3733974196712624e-05, "loss": 0.0254, "step": 8958 }, { "epoch": 2.31, "learning_rate": 1.372418345235319e-05, "loss": 0.0268, "step": 8959 }, { "epoch": 2.31, "learning_rate": 1.3714395643904126e-05, "loss": 0.0318, "step": 8960 }, { "epoch": 2.31, "learning_rate": 1.3704610772157616e-05, "loss": 0.0279, "step": 8961 }, { "epoch": 2.31, "learning_rate": 1.3694828837905554e-05, "loss": 0.0245, "step": 8962 }, { "epoch": 2.31, "learning_rate": 1.3685049841939646e-05, "loss": 0.0261, "step": 8963 }, { "epoch": 2.31, "learning_rate": 1.3675273785051324e-05, "loss": 0.0229, "step": 8964 }, { "epoch": 2.31, "learning_rate": 1.3665500668031778e-05, "loss": 0.0241, "step": 8965 }, { "epoch": 2.31, "learning_rate": 1.3655730491672003e-05, "loss": 0.0285, "step": 8966 }, { "epoch": 2.31, "learning_rate": 1.3645963256762707e-05, "loss": 0.0223, "step": 8967 }, { "epoch": 2.31, "learning_rate": 1.3636198964094393e-05, "loss": 0.0239, "step": 8968 }, { "epoch": 2.31, "learning_rate": 1.362643761445732e-05, "loss": 0.0235, "step": 8969 }, { "epoch": 2.31, "learning_rate": 1.3616679208641475e-05, "loss": 0.0307, "step": 8970 }, { "epoch": 2.32, "learning_rate": 1.3606923747436673e-05, "loss": 0.0315, "step": 8971 }, { "epoch": 2.32, "learning_rate": 1.3597171231632433e-05, "loss": 0.0218, "step": 8972 }, { "epoch": 2.32, "learning_rate": 1.3587421662018047e-05, "loss": 0.0246, "step": 8973 }, { "epoch": 2.32, "learning_rate": 1.3577675039382592e-05, "loss": 0.0296, "step": 8974 }, { "epoch": 2.32, "learning_rate": 1.3567931364514875e-05, "loss": 0.0297, "step": 8975 }, { "epoch": 2.32, "learning_rate": 1.3558190638203506e-05, "loss": 0.0188, "step": 8976 }, { "epoch": 2.32, "learning_rate": 1.3548452861236815e-05, "loss": 0.0262, "step": 8977 }, { "epoch": 2.32, "learning_rate": 1.3538718034402898e-05, "loss": 0.0252, "step": 8978 }, { "epoch": 2.32, "learning_rate": 1.3528986158489653e-05, "loss": 0.0235, "step": 8979 }, { "epoch": 2.32, "learning_rate": 1.3519257234284677e-05, "loss": 0.0211, "step": 8980 }, { "epoch": 2.32, "learning_rate": 1.3509531262575392e-05, "loss": 0.0209, "step": 8981 }, { "epoch": 2.32, "learning_rate": 1.3499808244148937e-05, "loss": 0.0339, "step": 8982 }, { "epoch": 2.32, "learning_rate": 1.3490088179792205e-05, "loss": 0.0223, "step": 8983 }, { "epoch": 2.32, "learning_rate": 1.3480371070291908e-05, "loss": 0.0226, "step": 8984 }, { "epoch": 2.32, "learning_rate": 1.347065691643446e-05, "loss": 0.0245, "step": 8985 }, { "epoch": 2.32, "learning_rate": 1.346094571900604e-05, "loss": 0.0254, "step": 8986 }, { "epoch": 2.32, "learning_rate": 1.3451237478792638e-05, "loss": 0.0343, "step": 8987 }, { "epoch": 2.32, "learning_rate": 1.3441532196579937e-05, "loss": 0.0257, "step": 8988 }, { "epoch": 2.32, "learning_rate": 1.3431829873153451e-05, "loss": 0.0217, "step": 8989 }, { "epoch": 2.32, "learning_rate": 1.3422130509298402e-05, "loss": 0.0289, "step": 8990 }, { "epoch": 2.32, "learning_rate": 1.3412434105799765e-05, "loss": 0.0258, "step": 8991 }, { "epoch": 2.32, "learning_rate": 1.3402740663442336e-05, "loss": 0.022, "step": 8992 }, { "epoch": 2.32, "learning_rate": 1.3393050183010608e-05, "loss": 0.0278, "step": 8993 }, { "epoch": 2.32, "learning_rate": 1.3383362665288885e-05, "loss": 0.0271, "step": 8994 }, { "epoch": 2.32, "learning_rate": 1.3373678111061188e-05, "loss": 0.0259, "step": 8995 }, { "epoch": 2.32, "learning_rate": 1.336399652111131e-05, "loss": 0.0263, "step": 8996 }, { "epoch": 2.32, "learning_rate": 1.3354317896222834e-05, "loss": 0.0237, "step": 8997 }, { "epoch": 2.32, "learning_rate": 1.3344642237179072e-05, "loss": 0.0263, "step": 8998 }, { "epoch": 2.32, "learning_rate": 1.3334969544763088e-05, "loss": 0.0262, "step": 8999 }, { "epoch": 2.32, "learning_rate": 1.3325299819757741e-05, "loss": 0.0265, "step": 9000 }, { "epoch": 2.32, "learning_rate": 1.3315633062945616e-05, "loss": 0.0235, "step": 9001 }, { "epoch": 2.32, "learning_rate": 1.3305969275109098e-05, "loss": 0.0218, "step": 9002 }, { "epoch": 2.32, "learning_rate": 1.3296308457030283e-05, "loss": 0.0259, "step": 9003 }, { "epoch": 2.32, "learning_rate": 1.3286650609491047e-05, "loss": 0.0228, "step": 9004 }, { "epoch": 2.32, "learning_rate": 1.3276995733273046e-05, "loss": 0.0255, "step": 9005 }, { "epoch": 2.32, "learning_rate": 1.3267343829157659e-05, "loss": 0.0319, "step": 9006 }, { "epoch": 2.32, "learning_rate": 1.325769489792607e-05, "loss": 0.0243, "step": 9007 }, { "epoch": 2.32, "learning_rate": 1.3248048940359182e-05, "loss": 0.021, "step": 9008 }, { "epoch": 2.32, "learning_rate": 1.3238405957237648e-05, "loss": 0.0253, "step": 9009 }, { "epoch": 2.33, "learning_rate": 1.3228765949341942e-05, "loss": 0.0195, "step": 9010 }, { "epoch": 2.33, "learning_rate": 1.3219128917452244e-05, "loss": 0.0274, "step": 9011 }, { "epoch": 2.33, "learning_rate": 1.3209494862348487e-05, "loss": 0.0258, "step": 9012 }, { "epoch": 2.33, "learning_rate": 1.3199863784810418e-05, "loss": 0.032, "step": 9013 }, { "epoch": 2.33, "learning_rate": 1.319023568561748e-05, "loss": 0.0297, "step": 9014 }, { "epoch": 2.33, "learning_rate": 1.3180610565548928e-05, "loss": 0.0251, "step": 9015 }, { "epoch": 2.33, "learning_rate": 1.3170988425383746e-05, "loss": 0.0336, "step": 9016 }, { "epoch": 2.33, "learning_rate": 1.3161369265900658e-05, "loss": 0.0242, "step": 9017 }, { "epoch": 2.33, "learning_rate": 1.315175308787821e-05, "loss": 0.0267, "step": 9018 }, { "epoch": 2.33, "learning_rate": 1.314213989209463e-05, "loss": 0.0236, "step": 9019 }, { "epoch": 2.33, "learning_rate": 1.3132529679327977e-05, "loss": 0.0215, "step": 9020 }, { "epoch": 2.33, "learning_rate": 1.3122922450356018e-05, "loss": 0.0322, "step": 9021 }, { "epoch": 2.33, "learning_rate": 1.3113318205956283e-05, "loss": 0.0221, "step": 9022 }, { "epoch": 2.33, "learning_rate": 1.3103716946906097e-05, "loss": 0.028, "step": 9023 }, { "epoch": 2.33, "learning_rate": 1.3094118673982498e-05, "loss": 0.0244, "step": 9024 }, { "epoch": 2.33, "learning_rate": 1.3084523387962316e-05, "loss": 0.0326, "step": 9025 }, { "epoch": 2.33, "learning_rate": 1.3074931089622127e-05, "loss": 0.0305, "step": 9026 }, { "epoch": 2.33, "learning_rate": 1.3065341779738244e-05, "loss": 0.0251, "step": 9027 }, { "epoch": 2.33, "learning_rate": 1.3055755459086789e-05, "loss": 0.0269, "step": 9028 }, { "epoch": 2.33, "learning_rate": 1.3046172128443596e-05, "loss": 0.026, "step": 9029 }, { "epoch": 2.33, "learning_rate": 1.3036591788584257e-05, "loss": 0.0298, "step": 9030 }, { "epoch": 2.33, "learning_rate": 1.302701444028417e-05, "loss": 0.0241, "step": 9031 }, { "epoch": 2.33, "learning_rate": 1.3017440084318421e-05, "loss": 0.0284, "step": 9032 }, { "epoch": 2.33, "learning_rate": 1.3007868721461936e-05, "loss": 0.0327, "step": 9033 }, { "epoch": 2.33, "learning_rate": 1.2998300352489323e-05, "loss": 0.0336, "step": 9034 }, { "epoch": 2.33, "learning_rate": 1.2988734978174977e-05, "loss": 0.0266, "step": 9035 }, { "epoch": 2.33, "learning_rate": 1.2979172599293072e-05, "loss": 0.0288, "step": 9036 }, { "epoch": 2.33, "learning_rate": 1.2969613216617493e-05, "loss": 0.0288, "step": 9037 }, { "epoch": 2.33, "learning_rate": 1.2960056830921941e-05, "loss": 0.0335, "step": 9038 }, { "epoch": 2.33, "learning_rate": 1.2950503442979828e-05, "loss": 0.0298, "step": 9039 }, { "epoch": 2.33, "learning_rate": 1.2940953053564326e-05, "loss": 0.0261, "step": 9040 }, { "epoch": 2.33, "learning_rate": 1.2931405663448398e-05, "loss": 0.0256, "step": 9041 }, { "epoch": 2.33, "learning_rate": 1.2921861273404735e-05, "loss": 0.0275, "step": 9042 }, { "epoch": 2.33, "learning_rate": 1.2912319884205776e-05, "loss": 0.0302, "step": 9043 }, { "epoch": 2.33, "learning_rate": 1.2902781496623766e-05, "loss": 0.0355, "step": 9044 }, { "epoch": 2.33, "learning_rate": 1.2893246111430647e-05, "loss": 0.0289, "step": 9045 }, { "epoch": 2.33, "learning_rate": 1.2883713729398167e-05, "loss": 0.0299, "step": 9046 }, { "epoch": 2.33, "learning_rate": 1.2874184351297802e-05, "loss": 0.029, "step": 9047 }, { "epoch": 2.33, "learning_rate": 1.2864657977900779e-05, "loss": 0.0311, "step": 9048 }, { "epoch": 2.34, "learning_rate": 1.2855134609978125e-05, "loss": 0.0307, "step": 9049 }, { "epoch": 2.34, "learning_rate": 1.2845614248300559e-05, "loss": 0.0366, "step": 9050 }, { "epoch": 2.34, "learning_rate": 1.2836096893638633e-05, "loss": 0.0184, "step": 9051 }, { "epoch": 2.34, "learning_rate": 1.2826582546762589e-05, "loss": 0.0245, "step": 9052 }, { "epoch": 2.34, "learning_rate": 1.2817071208442444e-05, "loss": 0.031, "step": 9053 }, { "epoch": 2.34, "learning_rate": 1.2807562879448009e-05, "loss": 0.0296, "step": 9054 }, { "epoch": 2.34, "learning_rate": 1.2798057560548799e-05, "loss": 0.0205, "step": 9055 }, { "epoch": 2.34, "learning_rate": 1.2788555252514101e-05, "loss": 0.0257, "step": 9056 }, { "epoch": 2.34, "learning_rate": 1.2779055956112996e-05, "loss": 0.0312, "step": 9057 }, { "epoch": 2.34, "learning_rate": 1.276955967211425e-05, "loss": 0.0209, "step": 9058 }, { "epoch": 2.34, "learning_rate": 1.2760066401286469e-05, "loss": 0.0209, "step": 9059 }, { "epoch": 2.34, "learning_rate": 1.2750576144397947e-05, "loss": 0.0194, "step": 9060 }, { "epoch": 2.34, "learning_rate": 1.2741088902216747e-05, "loss": 0.0197, "step": 9061 }, { "epoch": 2.34, "learning_rate": 1.2731604675510728e-05, "loss": 0.0263, "step": 9062 }, { "epoch": 2.34, "learning_rate": 1.2722123465047454e-05, "loss": 0.0247, "step": 9063 }, { "epoch": 2.34, "learning_rate": 1.2712645271594292e-05, "loss": 0.025, "step": 9064 }, { "epoch": 2.34, "learning_rate": 1.2703170095918326e-05, "loss": 0.0271, "step": 9065 }, { "epoch": 2.34, "learning_rate": 1.2693697938786392e-05, "loss": 0.0289, "step": 9066 }, { "epoch": 2.34, "learning_rate": 1.2684228800965137e-05, "loss": 0.0216, "step": 9067 }, { "epoch": 2.34, "learning_rate": 1.267476268322091e-05, "loss": 0.021, "step": 9068 }, { "epoch": 2.34, "learning_rate": 1.2665299586319812e-05, "loss": 0.0234, "step": 9069 }, { "epoch": 2.34, "learning_rate": 1.265583951102775e-05, "loss": 0.0272, "step": 9070 }, { "epoch": 2.34, "learning_rate": 1.2646382458110335e-05, "loss": 0.0234, "step": 9071 }, { "epoch": 2.34, "learning_rate": 1.2636928428332978e-05, "loss": 0.0222, "step": 9072 }, { "epoch": 2.34, "learning_rate": 1.2627477422460809e-05, "loss": 0.0234, "step": 9073 }, { "epoch": 2.34, "learning_rate": 1.2618029441258712e-05, "loss": 0.031, "step": 9074 }, { "epoch": 2.34, "learning_rate": 1.2608584485491365e-05, "loss": 0.033, "step": 9075 }, { "epoch": 2.34, "learning_rate": 1.2599142555923149e-05, "loss": 0.0221, "step": 9076 }, { "epoch": 2.34, "learning_rate": 1.2589703653318258e-05, "loss": 0.0248, "step": 9077 }, { "epoch": 2.34, "learning_rate": 1.2580267778440602e-05, "loss": 0.0271, "step": 9078 }, { "epoch": 2.34, "learning_rate": 1.2570834932053826e-05, "loss": 0.027, "step": 9079 }, { "epoch": 2.34, "learning_rate": 1.2561405114921404e-05, "loss": 0.0299, "step": 9080 }, { "epoch": 2.34, "learning_rate": 1.2551978327806497e-05, "loss": 0.022, "step": 9081 }, { "epoch": 2.34, "learning_rate": 1.254255457147202e-05, "loss": 0.0222, "step": 9082 }, { "epoch": 2.34, "learning_rate": 1.2533133846680711e-05, "loss": 0.0239, "step": 9083 }, { "epoch": 2.34, "learning_rate": 1.2523716154194976e-05, "loss": 0.0253, "step": 9084 }, { "epoch": 2.34, "learning_rate": 1.2514301494777053e-05, "loss": 0.025, "step": 9085 }, { "epoch": 2.34, "learning_rate": 1.2504889869188879e-05, "loss": 0.0261, "step": 9086 }, { "epoch": 2.35, "learning_rate": 1.2495481278192156e-05, "loss": 0.0223, "step": 9087 }, { "epoch": 2.35, "learning_rate": 1.248607572254838e-05, "loss": 0.0274, "step": 9088 }, { "epoch": 2.35, "learning_rate": 1.247667320301873e-05, "loss": 0.0245, "step": 9089 }, { "epoch": 2.35, "learning_rate": 1.246727372036422e-05, "loss": 0.0238, "step": 9090 }, { "epoch": 2.35, "learning_rate": 1.245787727534556e-05, "loss": 0.0258, "step": 9091 }, { "epoch": 2.35, "learning_rate": 1.2448483868723216e-05, "loss": 0.0288, "step": 9092 }, { "epoch": 2.35, "learning_rate": 1.2439093501257454e-05, "loss": 0.0329, "step": 9093 }, { "epoch": 2.35, "learning_rate": 1.2429706173708255e-05, "loss": 0.0308, "step": 9094 }, { "epoch": 2.35, "learning_rate": 1.2420321886835345e-05, "loss": 0.0226, "step": 9095 }, { "epoch": 2.35, "learning_rate": 1.241094064139825e-05, "loss": 0.0268, "step": 9096 }, { "epoch": 2.35, "learning_rate": 1.24015624381562e-05, "loss": 0.0202, "step": 9097 }, { "epoch": 2.35, "learning_rate": 1.2392187277868222e-05, "loss": 0.0285, "step": 9098 }, { "epoch": 2.35, "learning_rate": 1.2382815161293065e-05, "loss": 0.0263, "step": 9099 }, { "epoch": 2.35, "learning_rate": 1.2373446089189229e-05, "loss": 0.0262, "step": 9100 }, { "epoch": 2.35, "learning_rate": 1.2364080062315009e-05, "loss": 0.0317, "step": 9101 }, { "epoch": 2.35, "learning_rate": 1.2354717081428396e-05, "loss": 0.028, "step": 9102 }, { "epoch": 2.35, "learning_rate": 1.2345357147287189e-05, "loss": 0.0237, "step": 9103 }, { "epoch": 2.35, "learning_rate": 1.2336000260648911e-05, "loss": 0.026, "step": 9104 }, { "epoch": 2.35, "learning_rate": 1.2326646422270821e-05, "loss": 0.0276, "step": 9105 }, { "epoch": 2.35, "learning_rate": 1.231729563290998e-05, "loss": 0.0274, "step": 9106 }, { "epoch": 2.35, "learning_rate": 1.2307947893323169e-05, "loss": 0.0246, "step": 9107 }, { "epoch": 2.35, "learning_rate": 1.2298603204266906e-05, "loss": 0.0331, "step": 9108 }, { "epoch": 2.35, "learning_rate": 1.2289261566497523e-05, "loss": 0.0266, "step": 9109 }, { "epoch": 2.35, "learning_rate": 1.2279922980771024e-05, "loss": 0.0265, "step": 9110 }, { "epoch": 2.35, "learning_rate": 1.2270587447843252e-05, "loss": 0.0277, "step": 9111 }, { "epoch": 2.35, "learning_rate": 1.2261254968469738e-05, "loss": 0.0267, "step": 9112 }, { "epoch": 2.35, "learning_rate": 1.2251925543405773e-05, "loss": 0.0275, "step": 9113 }, { "epoch": 2.35, "learning_rate": 1.2242599173406444e-05, "loss": 0.0249, "step": 9114 }, { "epoch": 2.35, "learning_rate": 1.2233275859226534e-05, "loss": 0.029, "step": 9115 }, { "epoch": 2.35, "learning_rate": 1.2223955601620635e-05, "loss": 0.0296, "step": 9116 }, { "epoch": 2.35, "learning_rate": 1.2214638401343054e-05, "loss": 0.0241, "step": 9117 }, { "epoch": 2.35, "learning_rate": 1.2205324259147838e-05, "loss": 0.0298, "step": 9118 }, { "epoch": 2.35, "learning_rate": 1.2196013175788839e-05, "loss": 0.038, "step": 9119 }, { "epoch": 2.35, "learning_rate": 1.2186705152019622e-05, "loss": 0.0293, "step": 9120 }, { "epoch": 2.35, "learning_rate": 1.2177400188593491e-05, "loss": 0.0247, "step": 9121 }, { "epoch": 2.35, "learning_rate": 1.2168098286263557e-05, "loss": 0.0259, "step": 9122 }, { "epoch": 2.35, "learning_rate": 1.215879944578262e-05, "loss": 0.0298, "step": 9123 }, { "epoch": 2.35, "learning_rate": 1.21495036679033e-05, "loss": 0.0272, "step": 9124 }, { "epoch": 2.35, "learning_rate": 1.2140210953377907e-05, "loss": 0.0246, "step": 9125 }, { "epoch": 2.36, "learning_rate": 1.213092130295852e-05, "loss": 0.0222, "step": 9126 }, { "epoch": 2.36, "learning_rate": 1.2121634717397007e-05, "loss": 0.0267, "step": 9127 }, { "epoch": 2.36, "learning_rate": 1.211235119744492e-05, "loss": 0.0273, "step": 9128 }, { "epoch": 2.36, "learning_rate": 1.210307074385365e-05, "loss": 0.0216, "step": 9129 }, { "epoch": 2.36, "learning_rate": 1.2093793357374261e-05, "loss": 0.0306, "step": 9130 }, { "epoch": 2.36, "learning_rate": 1.2084519038757591e-05, "loss": 0.0303, "step": 9131 }, { "epoch": 2.36, "learning_rate": 1.2075247788754268e-05, "loss": 0.0369, "step": 9132 }, { "epoch": 2.36, "learning_rate": 1.206597960811463e-05, "loss": 0.0254, "step": 9133 }, { "epoch": 2.36, "learning_rate": 1.2056714497588762e-05, "loss": 0.0294, "step": 9134 }, { "epoch": 2.36, "learning_rate": 1.204745245792654e-05, "loss": 0.0336, "step": 9135 }, { "epoch": 2.36, "learning_rate": 1.2038193489877548e-05, "loss": 0.0342, "step": 9136 }, { "epoch": 2.36, "learning_rate": 1.2028937594191163e-05, "loss": 0.0287, "step": 9137 }, { "epoch": 2.36, "learning_rate": 1.2019684771616485e-05, "loss": 0.028, "step": 9138 }, { "epoch": 2.36, "learning_rate": 1.2010435022902362e-05, "loss": 0.0292, "step": 9139 }, { "epoch": 2.36, "learning_rate": 1.200118834879742e-05, "loss": 0.0279, "step": 9140 }, { "epoch": 2.36, "learning_rate": 1.1991944750049999e-05, "loss": 0.0356, "step": 9141 }, { "epoch": 2.36, "learning_rate": 1.1982704227408237e-05, "loss": 0.0297, "step": 9142 }, { "epoch": 2.36, "learning_rate": 1.1973466781619986e-05, "loss": 0.0336, "step": 9143 }, { "epoch": 2.36, "learning_rate": 1.1964232413432842e-05, "loss": 0.0316, "step": 9144 }, { "epoch": 2.36, "learning_rate": 1.19550011235942e-05, "loss": 0.0321, "step": 9145 }, { "epoch": 2.36, "learning_rate": 1.1945772912851167e-05, "loss": 0.0295, "step": 9146 }, { "epoch": 2.36, "learning_rate": 1.1936547781950592e-05, "loss": 0.0289, "step": 9147 }, { "epoch": 2.36, "learning_rate": 1.1927325731639116e-05, "loss": 0.0348, "step": 9148 }, { "epoch": 2.36, "learning_rate": 1.1918106762663079e-05, "loss": 0.0307, "step": 9149 }, { "epoch": 2.36, "learning_rate": 1.1908890875768636e-05, "loss": 0.0327, "step": 9150 }, { "epoch": 2.36, "learning_rate": 1.1899678071701636e-05, "loss": 0.0257, "step": 9151 }, { "epoch": 2.36, "learning_rate": 1.189046835120769e-05, "loss": 0.0262, "step": 9152 }, { "epoch": 2.36, "learning_rate": 1.1881261715032187e-05, "loss": 0.03, "step": 9153 }, { "epoch": 2.36, "learning_rate": 1.187205816392023e-05, "loss": 0.0151, "step": 9154 }, { "epoch": 2.36, "learning_rate": 1.1862857698616714e-05, "loss": 0.0253, "step": 9155 }, { "epoch": 2.36, "learning_rate": 1.1853660319866245e-05, "loss": 0.0289, "step": 9156 }, { "epoch": 2.36, "learning_rate": 1.1844466028413177e-05, "loss": 0.0252, "step": 9157 }, { "epoch": 2.36, "learning_rate": 1.1835274825001663e-05, "loss": 0.0241, "step": 9158 }, { "epoch": 2.36, "learning_rate": 1.1826086710375567e-05, "loss": 0.0279, "step": 9159 }, { "epoch": 2.36, "learning_rate": 1.1816901685278487e-05, "loss": 0.0266, "step": 9160 }, { "epoch": 2.36, "learning_rate": 1.1807719750453828e-05, "loss": 0.0274, "step": 9161 }, { "epoch": 2.36, "learning_rate": 1.1798540906644678e-05, "loss": 0.0261, "step": 9162 }, { "epoch": 2.36, "learning_rate": 1.178936515459394e-05, "loss": 0.019, "step": 9163 }, { "epoch": 2.36, "learning_rate": 1.1780192495044223e-05, "loss": 0.018, "step": 9164 }, { "epoch": 2.37, "learning_rate": 1.1771022928737884e-05, "loss": 0.0309, "step": 9165 }, { "epoch": 2.37, "learning_rate": 1.1761856456417064e-05, "loss": 0.0214, "step": 9166 }, { "epoch": 2.37, "learning_rate": 1.1752693078823611e-05, "loss": 0.0309, "step": 9167 }, { "epoch": 2.37, "learning_rate": 1.1743532796699174e-05, "loss": 0.0198, "step": 9168 }, { "epoch": 2.37, "learning_rate": 1.1734375610785108e-05, "loss": 0.0241, "step": 9169 }, { "epoch": 2.37, "learning_rate": 1.1725221521822516e-05, "loss": 0.026, "step": 9170 }, { "epoch": 2.37, "learning_rate": 1.1716070530552287e-05, "loss": 0.0306, "step": 9171 }, { "epoch": 2.37, "learning_rate": 1.1706922637715017e-05, "loss": 0.0271, "step": 9172 }, { "epoch": 2.37, "learning_rate": 1.1697777844051105e-05, "loss": 0.0288, "step": 9173 }, { "epoch": 2.37, "learning_rate": 1.1688636150300642e-05, "loss": 0.0299, "step": 9174 }, { "epoch": 2.37, "learning_rate": 1.167949755720349e-05, "loss": 0.0267, "step": 9175 }, { "epoch": 2.37, "learning_rate": 1.167036206549928e-05, "loss": 0.0246, "step": 9176 }, { "epoch": 2.37, "learning_rate": 1.1661229675927366e-05, "loss": 0.0254, "step": 9177 }, { "epoch": 2.37, "learning_rate": 1.1652100389226838e-05, "loss": 0.0219, "step": 9178 }, { "epoch": 2.37, "learning_rate": 1.1642974206136598e-05, "loss": 0.0212, "step": 9179 }, { "epoch": 2.37, "learning_rate": 1.1633851127395224e-05, "loss": 0.0202, "step": 9180 }, { "epoch": 2.37, "learning_rate": 1.1624731153741092e-05, "loss": 0.0246, "step": 9181 }, { "epoch": 2.37, "learning_rate": 1.1615614285912301e-05, "loss": 0.0201, "step": 9182 }, { "epoch": 2.37, "learning_rate": 1.1606500524646696e-05, "loss": 0.0277, "step": 9183 }, { "epoch": 2.37, "learning_rate": 1.1597389870681907e-05, "loss": 0.036, "step": 9184 }, { "epoch": 2.37, "learning_rate": 1.1588282324755256e-05, "loss": 0.0326, "step": 9185 }, { "epoch": 2.37, "learning_rate": 1.157917788760387e-05, "loss": 0.0295, "step": 9186 }, { "epoch": 2.37, "learning_rate": 1.1570076559964593e-05, "loss": 0.0243, "step": 9187 }, { "epoch": 2.37, "learning_rate": 1.1560978342573996e-05, "loss": 0.0324, "step": 9188 }, { "epoch": 2.37, "learning_rate": 1.1551883236168465e-05, "loss": 0.0297, "step": 9189 }, { "epoch": 2.37, "learning_rate": 1.1542791241484075e-05, "loss": 0.0259, "step": 9190 }, { "epoch": 2.37, "learning_rate": 1.1533702359256654e-05, "loss": 0.0359, "step": 9191 }, { "epoch": 2.37, "learning_rate": 1.1524616590221821e-05, "loss": 0.0229, "step": 9192 }, { "epoch": 2.37, "learning_rate": 1.1515533935114886e-05, "loss": 0.0283, "step": 9193 }, { "epoch": 2.37, "learning_rate": 1.1506454394670963e-05, "loss": 0.0327, "step": 9194 }, { "epoch": 2.37, "learning_rate": 1.1497377969624873e-05, "loss": 0.0252, "step": 9195 }, { "epoch": 2.37, "learning_rate": 1.1488304660711192e-05, "loss": 0.0231, "step": 9196 }, { "epoch": 2.37, "learning_rate": 1.1479234468664263e-05, "loss": 0.0295, "step": 9197 }, { "epoch": 2.37, "learning_rate": 1.1470167394218145e-05, "loss": 0.022, "step": 9198 }, { "epoch": 2.37, "learning_rate": 1.146110343810669e-05, "loss": 0.0238, "step": 9199 }, { "epoch": 2.37, "learning_rate": 1.1452042601063456e-05, "loss": 0.0308, "step": 9200 }, { "epoch": 2.37, "learning_rate": 1.144298488382175e-05, "loss": 0.0247, "step": 9201 }, { "epoch": 2.37, "learning_rate": 1.1433930287114675e-05, "loss": 0.0247, "step": 9202 }, { "epoch": 2.37, "learning_rate": 1.1424878811675022e-05, "loss": 0.0266, "step": 9203 }, { "epoch": 2.38, "learning_rate": 1.1415830458235343e-05, "loss": 0.0312, "step": 9204 }, { "epoch": 2.38, "learning_rate": 1.1406785227527977e-05, "loss": 0.0313, "step": 9205 }, { "epoch": 2.38, "learning_rate": 1.1397743120284959e-05, "loss": 0.0292, "step": 9206 }, { "epoch": 2.38, "learning_rate": 1.1388704137238115e-05, "loss": 0.0289, "step": 9207 }, { "epoch": 2.38, "learning_rate": 1.1379668279118982e-05, "loss": 0.0279, "step": 9208 }, { "epoch": 2.38, "learning_rate": 1.1370635546658854e-05, "loss": 0.0225, "step": 9209 }, { "epoch": 2.38, "learning_rate": 1.1361605940588794e-05, "loss": 0.0273, "step": 9210 }, { "epoch": 2.38, "learning_rate": 1.1352579461639573e-05, "loss": 0.0272, "step": 9211 }, { "epoch": 2.38, "learning_rate": 1.134355611054176e-05, "loss": 0.0289, "step": 9212 }, { "epoch": 2.38, "learning_rate": 1.1334535888025627e-05, "loss": 0.0269, "step": 9213 }, { "epoch": 2.38, "learning_rate": 1.1325518794821189e-05, "loss": 0.0328, "step": 9214 }, { "epoch": 2.38, "learning_rate": 1.131650483165826e-05, "loss": 0.0255, "step": 9215 }, { "epoch": 2.38, "learning_rate": 1.130749399926635e-05, "loss": 0.0314, "step": 9216 }, { "epoch": 2.38, "learning_rate": 1.1298486298374722e-05, "loss": 0.0312, "step": 9217 }, { "epoch": 2.38, "learning_rate": 1.128948172971242e-05, "loss": 0.0288, "step": 9218 }, { "epoch": 2.38, "learning_rate": 1.1280480294008184e-05, "loss": 0.0245, "step": 9219 }, { "epoch": 2.38, "learning_rate": 1.1271481991990558e-05, "loss": 0.0304, "step": 9220 }, { "epoch": 2.38, "learning_rate": 1.126248682438778e-05, "loss": 0.031, "step": 9221 }, { "epoch": 2.38, "learning_rate": 1.1253494791927849e-05, "loss": 0.0229, "step": 9222 }, { "epoch": 2.38, "learning_rate": 1.1244505895338543e-05, "loss": 0.0272, "step": 9223 }, { "epoch": 2.38, "learning_rate": 1.1235520135347333e-05, "loss": 0.0334, "step": 9224 }, { "epoch": 2.38, "learning_rate": 1.122653751268149e-05, "loss": 0.0265, "step": 9225 }, { "epoch": 2.38, "learning_rate": 1.1217558028067993e-05, "loss": 0.0365, "step": 9226 }, { "epoch": 2.38, "learning_rate": 1.1208581682233559e-05, "loss": 0.0287, "step": 9227 }, { "epoch": 2.38, "learning_rate": 1.1199608475904705e-05, "loss": 0.0356, "step": 9228 }, { "epoch": 2.38, "learning_rate": 1.1190638409807647e-05, "loss": 0.0244, "step": 9229 }, { "epoch": 2.38, "learning_rate": 1.1181671484668333e-05, "loss": 0.0315, "step": 9230 }, { "epoch": 2.38, "learning_rate": 1.1172707701212525e-05, "loss": 0.0242, "step": 9231 }, { "epoch": 2.38, "learning_rate": 1.1163747060165653e-05, "loss": 0.0279, "step": 9232 }, { "epoch": 2.38, "learning_rate": 1.1154789562252959e-05, "loss": 0.0265, "step": 9233 }, { "epoch": 2.38, "learning_rate": 1.1145835208199384e-05, "loss": 0.0254, "step": 9234 }, { "epoch": 2.38, "learning_rate": 1.113688399872962e-05, "loss": 0.0338, "step": 9235 }, { "epoch": 2.38, "learning_rate": 1.112793593456814e-05, "loss": 0.0288, "step": 9236 }, { "epoch": 2.38, "learning_rate": 1.1118991016439112e-05, "loss": 0.023, "step": 9237 }, { "epoch": 2.38, "learning_rate": 1.11100492450665e-05, "loss": 0.0299, "step": 9238 }, { "epoch": 2.38, "learning_rate": 1.1101110621173977e-05, "loss": 0.0293, "step": 9239 }, { "epoch": 2.38, "learning_rate": 1.1092175145484956e-05, "loss": 0.0243, "step": 9240 }, { "epoch": 2.38, "learning_rate": 1.1092175145484956e-05, "loss": 0.0252, "step": 9241 }, { "epoch": 2.39, "learning_rate": 1.108324281872265e-05, "loss": 0.0338, "step": 9242 }, { "epoch": 2.39, "learning_rate": 1.1074313641609935e-05, "loss": 0.0292, "step": 9243 }, { "epoch": 2.39, "learning_rate": 1.1065387614869516e-05, "loss": 0.0357, "step": 9244 }, { "epoch": 2.39, "learning_rate": 1.1056464739223781e-05, "loss": 0.0286, "step": 9245 }, { "epoch": 2.39, "learning_rate": 1.1047545015394872e-05, "loss": 0.0319, "step": 9246 }, { "epoch": 2.39, "learning_rate": 1.1038628444104725e-05, "loss": 0.0342, "step": 9247 }, { "epoch": 2.39, "learning_rate": 1.1029715026074966e-05, "loss": 0.0323, "step": 9248 }, { "epoch": 2.39, "learning_rate": 1.1020804762026964e-05, "loss": 0.0311, "step": 9249 }, { "epoch": 2.39, "learning_rate": 1.1011897652681897e-05, "loss": 0.0294, "step": 9250 }, { "epoch": 2.39, "learning_rate": 1.10029936987606e-05, "loss": 0.0148, "step": 9251 }, { "epoch": 2.39, "learning_rate": 1.0994092900983732e-05, "loss": 0.0242, "step": 9252 }, { "epoch": 2.39, "learning_rate": 1.0985195260071645e-05, "loss": 0.0231, "step": 9253 }, { "epoch": 2.39, "learning_rate": 1.0976300776744441e-05, "loss": 0.0315, "step": 9254 }, { "epoch": 2.39, "learning_rate": 1.0967409451721999e-05, "loss": 0.0201, "step": 9255 }, { "epoch": 2.39, "learning_rate": 1.0958521285723904e-05, "loss": 0.0266, "step": 9256 }, { "epoch": 2.39, "learning_rate": 1.0949636279469516e-05, "loss": 0.0273, "step": 9257 }, { "epoch": 2.39, "learning_rate": 1.0940754433677919e-05, "loss": 0.0322, "step": 9258 }, { "epoch": 2.39, "learning_rate": 1.0931875749067933e-05, "loss": 0.0234, "step": 9259 }, { "epoch": 2.39, "learning_rate": 1.0923000226358166e-05, "loss": 0.0287, "step": 9260 }, { "epoch": 2.39, "learning_rate": 1.091412786626691e-05, "loss": 0.0262, "step": 9261 }, { "epoch": 2.39, "learning_rate": 1.0905258669512252e-05, "loss": 0.0229, "step": 9262 }, { "epoch": 2.39, "learning_rate": 1.0896392636812003e-05, "loss": 0.0228, "step": 9263 }, { "epoch": 2.39, "learning_rate": 1.0887529768883697e-05, "loss": 0.021, "step": 9264 }, { "epoch": 2.39, "learning_rate": 1.0878670066444657e-05, "loss": 0.0226, "step": 9265 }, { "epoch": 2.39, "learning_rate": 1.0869813530211902e-05, "loss": 0.0205, "step": 9266 }, { "epoch": 2.39, "learning_rate": 1.086096016090225e-05, "loss": 0.035, "step": 9267 }, { "epoch": 2.39, "learning_rate": 1.0852109959232204e-05, "loss": 0.0354, "step": 9268 }, { "epoch": 2.39, "learning_rate": 1.0843262925918035e-05, "loss": 0.0238, "step": 9269 }, { "epoch": 2.39, "learning_rate": 1.0834419061675782e-05, "loss": 0.0212, "step": 9270 }, { "epoch": 2.39, "learning_rate": 1.082557836722119e-05, "loss": 0.0292, "step": 9271 }, { "epoch": 2.39, "learning_rate": 1.0816740843269756e-05, "loss": 0.0216, "step": 9272 }, { "epoch": 2.39, "learning_rate": 1.0807906490536751e-05, "loss": 0.0246, "step": 9273 }, { "epoch": 2.39, "learning_rate": 1.0799075309737133e-05, "loss": 0.0305, "step": 9274 }, { "epoch": 2.39, "learning_rate": 1.0790247301585672e-05, "loss": 0.026, "step": 9275 }, { "epoch": 2.39, "learning_rate": 1.0781422466796825e-05, "loss": 0.022, "step": 9276 }, { "epoch": 2.39, "learning_rate": 1.0772600806084804e-05, "loss": 0.0277, "step": 9277 }, { "epoch": 2.39, "learning_rate": 1.0763782320163596e-05, "loss": 0.0276, "step": 9278 }, { "epoch": 2.39, "learning_rate": 1.075496700974688e-05, "loss": 0.0273, "step": 9279 }, { "epoch": 2.39, "learning_rate": 1.0746154875548137e-05, "loss": 0.0262, "step": 9280 }, { "epoch": 2.4, "learning_rate": 1.0737345918280539e-05, "loss": 0.0275, "step": 9281 }, { "epoch": 2.4, "learning_rate": 1.0728540138657012e-05, "loss": 0.033, "step": 9282 }, { "epoch": 2.4, "learning_rate": 1.0719737537390261e-05, "loss": 0.0279, "step": 9283 }, { "epoch": 2.4, "learning_rate": 1.0710938115192681e-05, "loss": 0.0194, "step": 9284 }, { "epoch": 2.4, "learning_rate": 1.0702141872776456e-05, "loss": 0.0259, "step": 9285 }, { "epoch": 2.4, "learning_rate": 1.0693348810853488e-05, "loss": 0.0232, "step": 9286 }, { "epoch": 2.4, "learning_rate": 1.0684558930135408e-05, "loss": 0.027, "step": 9287 }, { "epoch": 2.4, "learning_rate": 1.0675772231333631e-05, "loss": 0.0267, "step": 9288 }, { "epoch": 2.4, "learning_rate": 1.0666988715159282e-05, "loss": 0.028, "step": 9289 }, { "epoch": 2.4, "learning_rate": 1.0658208382323221e-05, "loss": 0.0236, "step": 9290 }, { "epoch": 2.4, "learning_rate": 1.0649431233536101e-05, "loss": 0.0266, "step": 9291 }, { "epoch": 2.4, "learning_rate": 1.0640657269508242e-05, "loss": 0.0301, "step": 9292 }, { "epoch": 2.4, "learning_rate": 1.0631886490949789e-05, "loss": 0.0235, "step": 9293 }, { "epoch": 2.4, "learning_rate": 1.0623118898570561e-05, "loss": 0.0225, "step": 9294 }, { "epoch": 2.4, "learning_rate": 1.0614354493080136e-05, "loss": 0.0198, "step": 9295 }, { "epoch": 2.4, "learning_rate": 1.060559327518788e-05, "loss": 0.027, "step": 9296 }, { "epoch": 2.4, "learning_rate": 1.0596835245602826e-05, "loss": 0.0255, "step": 9297 }, { "epoch": 2.4, "learning_rate": 1.0588080405033818e-05, "loss": 0.0206, "step": 9298 }, { "epoch": 2.4, "learning_rate": 1.0579328754189399e-05, "loss": 0.0254, "step": 9299 }, { "epoch": 2.4, "learning_rate": 1.0570580293777854e-05, "loss": 0.0272, "step": 9300 }, { "epoch": 2.4, "learning_rate": 1.0561835024507243e-05, "loss": 0.0248, "step": 9301 }, { "epoch": 2.4, "learning_rate": 1.055309294708533e-05, "loss": 0.0246, "step": 9302 }, { "epoch": 2.4, "learning_rate": 1.0544354062219658e-05, "loss": 0.027, "step": 9303 }, { "epoch": 2.4, "learning_rate": 1.0535618370617473e-05, "loss": 0.0228, "step": 9304 }, { "epoch": 2.4, "learning_rate": 1.0526885872985776e-05, "loss": 0.0254, "step": 9305 }, { "epoch": 2.4, "learning_rate": 1.0518156570031335e-05, "loss": 0.0276, "step": 9306 }, { "epoch": 2.4, "learning_rate": 1.0509430462460618e-05, "loss": 0.0304, "step": 9307 }, { "epoch": 2.4, "learning_rate": 1.0500707550979877e-05, "loss": 0.0206, "step": 9308 }, { "epoch": 2.4, "learning_rate": 1.049198783629507e-05, "loss": 0.0373, "step": 9309 }, { "epoch": 2.4, "learning_rate": 1.0483271319111898e-05, "loss": 0.0259, "step": 9310 }, { "epoch": 2.4, "learning_rate": 1.0474558000135842e-05, "loss": 0.0226, "step": 9311 }, { "epoch": 2.4, "learning_rate": 1.0465847880072076e-05, "loss": 0.0254, "step": 9312 }, { "epoch": 2.4, "learning_rate": 1.0457140959625538e-05, "loss": 0.026, "step": 9313 }, { "epoch": 2.4, "learning_rate": 1.0448437239500918e-05, "loss": 0.0265, "step": 9314 }, { "epoch": 2.4, "learning_rate": 1.043973672040261e-05, "loss": 0.025, "step": 9315 }, { "epoch": 2.4, "learning_rate": 1.043103940303481e-05, "loss": 0.0235, "step": 9316 }, { "epoch": 2.4, "learning_rate": 1.0422345288101387e-05, "loss": 0.0285, "step": 9317 }, { "epoch": 2.4, "learning_rate": 1.0413654376305987e-05, "loss": 0.027, "step": 9318 }, { "epoch": 2.4, "learning_rate": 1.0404966668352006e-05, "loss": 0.0244, "step": 9319 }, { "epoch": 2.41, "learning_rate": 1.039628216494254e-05, "loss": 0.0235, "step": 9320 }, { "epoch": 2.41, "learning_rate": 1.0387600866780484e-05, "loss": 0.0294, "step": 9321 }, { "epoch": 2.41, "learning_rate": 1.0378922774568423e-05, "loss": 0.0235, "step": 9322 }, { "epoch": 2.41, "learning_rate": 1.0370247889008695e-05, "loss": 0.0229, "step": 9323 }, { "epoch": 2.41, "learning_rate": 1.0361576210803404e-05, "loss": 0.0271, "step": 9324 }, { "epoch": 2.41, "learning_rate": 1.0352907740654343e-05, "loss": 0.0273, "step": 9325 }, { "epoch": 2.41, "learning_rate": 1.0344242479263122e-05, "loss": 0.0239, "step": 9326 }, { "epoch": 2.41, "learning_rate": 1.0335580427331015e-05, "loss": 0.0277, "step": 9327 }, { "epoch": 2.41, "learning_rate": 1.0326921585559069e-05, "loss": 0.0274, "step": 9328 }, { "epoch": 2.41, "learning_rate": 1.0318265954648082e-05, "loss": 0.0286, "step": 9329 }, { "epoch": 2.41, "learning_rate": 1.0309613535298579e-05, "loss": 0.027, "step": 9330 }, { "epoch": 2.41, "learning_rate": 1.030096432821081e-05, "loss": 0.0369, "step": 9331 }, { "epoch": 2.41, "learning_rate": 1.02923183340848e-05, "loss": 0.0244, "step": 9332 }, { "epoch": 2.41, "learning_rate": 1.0283675553620281e-05, "loss": 0.0283, "step": 9333 }, { "epoch": 2.41, "learning_rate": 1.0275035987516756e-05, "loss": 0.0299, "step": 9334 }, { "epoch": 2.41, "learning_rate": 1.026639963647344e-05, "loss": 0.0231, "step": 9335 }, { "epoch": 2.41, "learning_rate": 1.0257766501189287e-05, "loss": 0.0297, "step": 9336 }, { "epoch": 2.41, "learning_rate": 1.0249136582363029e-05, "loss": 0.0301, "step": 9337 }, { "epoch": 2.41, "learning_rate": 1.0240509880693084e-05, "loss": 0.0229, "step": 9338 }, { "epoch": 2.41, "learning_rate": 1.023188639687766e-05, "loss": 0.0316, "step": 9339 }, { "epoch": 2.41, "learning_rate": 1.0223266131614677e-05, "loss": 0.0293, "step": 9340 }, { "epoch": 2.41, "learning_rate": 1.021464908560178e-05, "loss": 0.0242, "step": 9341 }, { "epoch": 2.41, "learning_rate": 1.0206035259536395e-05, "loss": 0.0321, "step": 9342 }, { "epoch": 2.41, "learning_rate": 1.0197424654115644e-05, "loss": 0.028, "step": 9343 }, { "epoch": 2.41, "learning_rate": 1.0188817270036433e-05, "loss": 0.0293, "step": 9344 }, { "epoch": 2.41, "learning_rate": 1.0180213107995368e-05, "loss": 0.0236, "step": 9345 }, { "epoch": 2.41, "learning_rate": 1.0171612168688804e-05, "loss": 0.0308, "step": 9346 }, { "epoch": 2.41, "learning_rate": 1.0163014452812857e-05, "loss": 0.0284, "step": 9347 }, { "epoch": 2.41, "learning_rate": 1.015441996106335e-05, "loss": 0.0348, "step": 9348 }, { "epoch": 2.41, "learning_rate": 1.0145828694135884e-05, "loss": 0.0255, "step": 9349 }, { "epoch": 2.41, "learning_rate": 1.0137240652725755e-05, "loss": 0.0296, "step": 9350 }, { "epoch": 2.41, "learning_rate": 1.0128655837528011e-05, "loss": 0.0269, "step": 9351 }, { "epoch": 2.41, "learning_rate": 1.0120074249237477e-05, "loss": 0.0236, "step": 9352 }, { "epoch": 2.41, "learning_rate": 1.0111495888548672e-05, "loss": 0.0244, "step": 9353 }, { "epoch": 2.41, "learning_rate": 1.0102920756155849e-05, "loss": 0.0261, "step": 9354 }, { "epoch": 2.41, "learning_rate": 1.009434885275305e-05, "loss": 0.0269, "step": 9355 }, { "epoch": 2.41, "learning_rate": 1.0085780179034e-05, "loss": 0.023, "step": 9356 }, { "epoch": 2.41, "learning_rate": 1.0077214735692214e-05, "loss": 0.0215, "step": 9357 }, { "epoch": 2.41, "learning_rate": 1.0068652523420902e-05, "loss": 0.023, "step": 9358 }, { "epoch": 2.42, "learning_rate": 1.0060093542913013e-05, "loss": 0.0306, "step": 9359 }, { "epoch": 2.42, "learning_rate": 1.0051537794861288e-05, "loss": 0.0219, "step": 9360 }, { "epoch": 2.42, "learning_rate": 1.0042985279958139e-05, "loss": 0.0224, "step": 9361 }, { "epoch": 2.42, "learning_rate": 1.0034435998895764e-05, "loss": 0.0226, "step": 9362 }, { "epoch": 2.42, "learning_rate": 1.0025889952366085e-05, "loss": 0.0237, "step": 9363 }, { "epoch": 2.42, "learning_rate": 1.0017347141060728e-05, "loss": 0.0257, "step": 9364 }, { "epoch": 2.42, "learning_rate": 1.000880756567113e-05, "loss": 0.0293, "step": 9365 }, { "epoch": 2.42, "learning_rate": 1.0000271226888392e-05, "loss": 0.0243, "step": 9366 }, { "epoch": 2.42, "learning_rate": 9.991738125403404e-06, "loss": 0.0275, "step": 9367 }, { "epoch": 2.42, "learning_rate": 9.98320826190678e-06, "loss": 0.02, "step": 9368 }, { "epoch": 2.42, "learning_rate": 9.974681637088834e-06, "loss": 0.0209, "step": 9369 }, { "epoch": 2.42, "learning_rate": 9.96615825163969e-06, "loss": 0.0239, "step": 9370 }, { "epoch": 2.42, "learning_rate": 9.957638106249162e-06, "loss": 0.0316, "step": 9371 }, { "epoch": 2.42, "learning_rate": 9.949121201606788e-06, "loss": 0.0307, "step": 9372 }, { "epoch": 2.42, "learning_rate": 9.940607538401892e-06, "loss": 0.0269, "step": 9373 }, { "epoch": 2.42, "learning_rate": 9.932097117323492e-06, "loss": 0.0217, "step": 9374 }, { "epoch": 2.42, "learning_rate": 9.92358993906038e-06, "loss": 0.0278, "step": 9375 }, { "epoch": 2.42, "learning_rate": 9.915086004301062e-06, "loss": 0.0336, "step": 9376 }, { "epoch": 2.42, "learning_rate": 9.906585313733768e-06, "loss": 0.0354, "step": 9377 }, { "epoch": 2.42, "learning_rate": 9.898087868046512e-06, "loss": 0.0273, "step": 9378 }, { "epoch": 2.42, "learning_rate": 9.889593667926994e-06, "loss": 0.0201, "step": 9379 }, { "epoch": 2.42, "learning_rate": 9.881102714062696e-06, "loss": 0.03, "step": 9380 }, { "epoch": 2.42, "learning_rate": 9.872615007140812e-06, "loss": 0.0286, "step": 9381 }, { "epoch": 2.42, "learning_rate": 9.864130547848255e-06, "loss": 0.0339, "step": 9382 }, { "epoch": 2.42, "learning_rate": 9.855649336871725e-06, "loss": 0.0307, "step": 9383 }, { "epoch": 2.42, "learning_rate": 9.847171374897613e-06, "loss": 0.025, "step": 9384 }, { "epoch": 2.42, "learning_rate": 9.838696662612091e-06, "loss": 0.0225, "step": 9385 }, { "epoch": 2.42, "learning_rate": 9.83022520070102e-06, "loss": 0.022, "step": 9386 }, { "epoch": 2.42, "learning_rate": 9.821756989850016e-06, "loss": 0.0275, "step": 9387 }, { "epoch": 2.42, "learning_rate": 9.813292030744458e-06, "loss": 0.0193, "step": 9388 }, { "epoch": 2.42, "learning_rate": 9.80483032406942e-06, "loss": 0.0274, "step": 9389 }, { "epoch": 2.42, "learning_rate": 9.796371870509757e-06, "loss": 0.0239, "step": 9390 }, { "epoch": 2.42, "learning_rate": 9.787916670750024e-06, "loss": 0.0238, "step": 9391 }, { "epoch": 2.42, "learning_rate": 9.779464725474507e-06, "loss": 0.0251, "step": 9392 }, { "epoch": 2.42, "learning_rate": 9.77101603536728e-06, "loss": 0.0312, "step": 9393 }, { "epoch": 2.42, "learning_rate": 9.762570601112108e-06, "loss": 0.0225, "step": 9394 }, { "epoch": 2.42, "learning_rate": 9.754128423392489e-06, "loss": 0.0281, "step": 9395 }, { "epoch": 2.42, "learning_rate": 9.745689502891703e-06, "loss": 0.0267, "step": 9396 }, { "epoch": 2.43, "learning_rate": 9.73725384029271e-06, "loss": 0.0228, "step": 9397 }, { "epoch": 2.43, "learning_rate": 9.728821436278251e-06, "loss": 0.0251, "step": 9398 }, { "epoch": 2.43, "learning_rate": 9.72039229153079e-06, "loss": 0.0201, "step": 9399 }, { "epoch": 2.43, "learning_rate": 9.711966406732492e-06, "loss": 0.03, "step": 9400 }, { "epoch": 2.43, "learning_rate": 9.703543782565323e-06, "loss": 0.0233, "step": 9401 }, { "epoch": 2.43, "learning_rate": 9.695124419710928e-06, "loss": 0.0289, "step": 9402 }, { "epoch": 2.43, "learning_rate": 9.686708318850735e-06, "loss": 0.0292, "step": 9403 }, { "epoch": 2.43, "learning_rate": 9.678295480665873e-06, "loss": 0.0298, "step": 9404 }, { "epoch": 2.43, "learning_rate": 9.669885905837194e-06, "loss": 0.0339, "step": 9405 }, { "epoch": 2.43, "learning_rate": 9.661479595045353e-06, "loss": 0.0238, "step": 9406 }, { "epoch": 2.43, "learning_rate": 9.653076548970662e-06, "loss": 0.0255, "step": 9407 }, { "epoch": 2.43, "learning_rate": 9.64467676829323e-06, "loss": 0.025, "step": 9408 }, { "epoch": 2.43, "learning_rate": 9.63628025369287e-06, "loss": 0.0243, "step": 9409 }, { "epoch": 2.43, "learning_rate": 9.627887005849112e-06, "loss": 0.0257, "step": 9410 }, { "epoch": 2.43, "learning_rate": 9.619497025441287e-06, "loss": 0.0304, "step": 9411 }, { "epoch": 2.43, "learning_rate": 9.611110313148403e-06, "loss": 0.0291, "step": 9412 }, { "epoch": 2.43, "learning_rate": 9.602726869649203e-06, "loss": 0.0253, "step": 9413 }, { "epoch": 2.43, "learning_rate": 9.59434669562222e-06, "loss": 0.0294, "step": 9414 }, { "epoch": 2.43, "learning_rate": 9.585969791745648e-06, "loss": 0.0268, "step": 9415 }, { "epoch": 2.43, "learning_rate": 9.577596158697495e-06, "loss": 0.0261, "step": 9416 }, { "epoch": 2.43, "learning_rate": 9.56922579715544e-06, "loss": 0.0294, "step": 9417 }, { "epoch": 2.43, "learning_rate": 9.560858707796916e-06, "loss": 0.0251, "step": 9418 }, { "epoch": 2.43, "learning_rate": 9.552494891299113e-06, "loss": 0.0269, "step": 9419 }, { "epoch": 2.43, "learning_rate": 9.544134348338929e-06, "loss": 0.0279, "step": 9420 }, { "epoch": 2.43, "learning_rate": 9.535777079593016e-06, "loss": 0.0229, "step": 9421 }, { "epoch": 2.43, "learning_rate": 9.527423085737752e-06, "loss": 0.0304, "step": 9422 }, { "epoch": 2.43, "learning_rate": 9.519072367449239e-06, "loss": 0.027, "step": 9423 }, { "epoch": 2.43, "learning_rate": 9.510724925403342e-06, "loss": 0.0273, "step": 9424 }, { "epoch": 2.43, "learning_rate": 9.50238076027562e-06, "loss": 0.0277, "step": 9425 }, { "epoch": 2.43, "learning_rate": 9.494039872741428e-06, "loss": 0.0273, "step": 9426 }, { "epoch": 2.43, "learning_rate": 9.485702263475794e-06, "loss": 0.0289, "step": 9427 }, { "epoch": 2.43, "learning_rate": 9.477367933153497e-06, "loss": 0.0314, "step": 9428 }, { "epoch": 2.43, "learning_rate": 9.46903688244909e-06, "loss": 0.0321, "step": 9429 }, { "epoch": 2.43, "learning_rate": 9.460709112036803e-06, "loss": 0.0233, "step": 9430 }, { "epoch": 2.43, "learning_rate": 9.452384622590632e-06, "loss": 0.0342, "step": 9431 }, { "epoch": 2.43, "learning_rate": 9.444063414784322e-06, "loss": 0.0384, "step": 9432 }, { "epoch": 2.43, "learning_rate": 9.435745489291297e-06, "loss": 0.0256, "step": 9433 }, { "epoch": 2.43, "learning_rate": 9.427430846784791e-06, "loss": 0.0232, "step": 9434 }, { "epoch": 2.43, "learning_rate": 9.419119487937717e-06, "loss": 0.0245, "step": 9435 }, { "epoch": 2.44, "learning_rate": 9.41081141342272e-06, "loss": 0.027, "step": 9436 }, { "epoch": 2.44, "learning_rate": 9.402506623912233e-06, "loss": 0.0279, "step": 9437 }, { "epoch": 2.44, "learning_rate": 9.394205120078353e-06, "loss": 0.0291, "step": 9438 }, { "epoch": 2.44, "learning_rate": 9.385906902592973e-06, "loss": 0.0285, "step": 9439 }, { "epoch": 2.44, "learning_rate": 9.377611972127675e-06, "loss": 0.0321, "step": 9440 }, { "epoch": 2.44, "learning_rate": 9.369320329353793e-06, "loss": 0.0251, "step": 9441 }, { "epoch": 2.44, "learning_rate": 9.361031974942409e-06, "loss": 0.0328, "step": 9442 }, { "epoch": 2.44, "learning_rate": 9.352746909564315e-06, "loss": 0.0312, "step": 9443 }, { "epoch": 2.44, "learning_rate": 9.34446513389003e-06, "loss": 0.0297, "step": 9444 }, { "epoch": 2.44, "learning_rate": 9.336186648589857e-06, "loss": 0.027, "step": 9445 }, { "epoch": 2.44, "learning_rate": 9.32791145433376e-06, "loss": 0.0318, "step": 9446 }, { "epoch": 2.44, "learning_rate": 9.319639551791516e-06, "loss": 0.0309, "step": 9447 }, { "epoch": 2.44, "learning_rate": 9.311370941632568e-06, "loss": 0.0347, "step": 9448 }, { "epoch": 2.44, "learning_rate": 9.303105624526121e-06, "loss": 0.029, "step": 9449 }, { "epoch": 2.44, "learning_rate": 9.294843601141123e-06, "loss": 0.0291, "step": 9450 }, { "epoch": 2.44, "learning_rate": 9.286584872146225e-06, "loss": 0.0362, "step": 9451 }, { "epoch": 2.44, "learning_rate": 9.27832943820986e-06, "loss": 0.0223, "step": 9452 }, { "epoch": 2.44, "learning_rate": 9.270077300000145e-06, "loss": 0.0207, "step": 9453 }, { "epoch": 2.44, "learning_rate": 9.261828458184946e-06, "loss": 0.0211, "step": 9454 }, { "epoch": 2.44, "learning_rate": 9.25358291343188e-06, "loss": 0.0187, "step": 9455 }, { "epoch": 2.44, "learning_rate": 9.245340666408286e-06, "loss": 0.0323, "step": 9456 }, { "epoch": 2.44, "learning_rate": 9.237101717781204e-06, "loss": 0.0284, "step": 9457 }, { "epoch": 2.44, "learning_rate": 9.228866068217478e-06, "loss": 0.0196, "step": 9458 }, { "epoch": 2.44, "learning_rate": 9.22063371838361e-06, "loss": 0.0192, "step": 9459 }, { "epoch": 2.44, "learning_rate": 9.21240466894589e-06, "loss": 0.0252, "step": 9460 }, { "epoch": 2.44, "learning_rate": 9.204178920570317e-06, "loss": 0.0279, "step": 9461 }, { "epoch": 2.44, "learning_rate": 9.195956473922607e-06, "loss": 0.0223, "step": 9462 }, { "epoch": 2.44, "learning_rate": 9.187737329668255e-06, "loss": 0.0351, "step": 9463 }, { "epoch": 2.44, "learning_rate": 9.179521488472432e-06, "loss": 0.0258, "step": 9464 }, { "epoch": 2.44, "learning_rate": 9.171308951000102e-06, "loss": 0.0225, "step": 9465 }, { "epoch": 2.44, "learning_rate": 9.163099717915913e-06, "loss": 0.0232, "step": 9466 }, { "epoch": 2.44, "learning_rate": 9.154893789884244e-06, "loss": 0.024, "step": 9467 }, { "epoch": 2.44, "learning_rate": 9.146691167569265e-06, "loss": 0.0329, "step": 9468 }, { "epoch": 2.44, "learning_rate": 9.138491851634805e-06, "loss": 0.0208, "step": 9469 }, { "epoch": 2.44, "learning_rate": 9.130295842744485e-06, "loss": 0.0294, "step": 9470 }, { "epoch": 2.44, "learning_rate": 9.122103141561616e-06, "loss": 0.0224, "step": 9471 }, { "epoch": 2.44, "learning_rate": 9.113913748749253e-06, "loss": 0.0238, "step": 9472 }, { "epoch": 2.44, "learning_rate": 9.105727664970209e-06, "loss": 0.0233, "step": 9473 }, { "epoch": 2.44, "learning_rate": 9.097544890887e-06, "loss": 0.0351, "step": 9474 }, { "epoch": 2.45, "learning_rate": 9.089365427161867e-06, "loss": 0.0268, "step": 9475 }, { "epoch": 2.45, "learning_rate": 9.081189274456819e-06, "loss": 0.0216, "step": 9476 }, { "epoch": 2.45, "learning_rate": 9.073016433433557e-06, "loss": 0.0194, "step": 9477 }, { "epoch": 2.45, "learning_rate": 9.064846904753554e-06, "loss": 0.0276, "step": 9478 }, { "epoch": 2.45, "learning_rate": 9.056680689077984e-06, "loss": 0.0265, "step": 9479 }, { "epoch": 2.45, "learning_rate": 9.048517787067756e-06, "loss": 0.0245, "step": 9480 }, { "epoch": 2.45, "learning_rate": 9.040358199383537e-06, "loss": 0.022, "step": 9481 }, { "epoch": 2.45, "learning_rate": 9.032201926685685e-06, "loss": 0.0184, "step": 9482 }, { "epoch": 2.45, "learning_rate": 9.02404896963433e-06, "loss": 0.0244, "step": 9483 }, { "epoch": 2.45, "learning_rate": 9.01589932888931e-06, "loss": 0.0239, "step": 9484 }, { "epoch": 2.45, "learning_rate": 9.007753005110187e-06, "loss": 0.0264, "step": 9485 }, { "epoch": 2.45, "learning_rate": 8.999609998956288e-06, "loss": 0.0296, "step": 9486 }, { "epoch": 2.45, "learning_rate": 8.991470311086648e-06, "loss": 0.022, "step": 9487 }, { "epoch": 2.45, "learning_rate": 8.983333942160005e-06, "loss": 0.034, "step": 9488 }, { "epoch": 2.45, "learning_rate": 8.975200892834905e-06, "loss": 0.0248, "step": 9489 }, { "epoch": 2.45, "learning_rate": 8.967071163769541e-06, "loss": 0.0218, "step": 9490 }, { "epoch": 2.45, "learning_rate": 8.958944755621912e-06, "loss": 0.0209, "step": 9491 }, { "epoch": 2.45, "learning_rate": 8.950821669049692e-06, "loss": 0.0291, "step": 9492 }, { "epoch": 2.45, "learning_rate": 8.942701904710293e-06, "loss": 0.0355, "step": 9493 }, { "epoch": 2.45, "learning_rate": 8.934585463260903e-06, "loss": 0.0287, "step": 9494 }, { "epoch": 2.45, "learning_rate": 8.926472345358384e-06, "loss": 0.0262, "step": 9495 }, { "epoch": 2.45, "learning_rate": 8.918362551659376e-06, "loss": 0.0287, "step": 9496 }, { "epoch": 2.45, "learning_rate": 8.910256082820223e-06, "loss": 0.0279, "step": 9497 }, { "epoch": 2.45, "learning_rate": 8.902152939496983e-06, "loss": 0.0267, "step": 9498 }, { "epoch": 2.45, "learning_rate": 8.894053122345503e-06, "loss": 0.03, "step": 9499 }, { "epoch": 2.45, "learning_rate": 8.885956632021308e-06, "loss": 0.0285, "step": 9500 }, { "epoch": 2.45, "learning_rate": 8.87786346917966e-06, "loss": 0.0287, "step": 9501 }, { "epoch": 2.45, "learning_rate": 8.869773634475586e-06, "loss": 0.0321, "step": 9502 }, { "epoch": 2.45, "learning_rate": 8.8616871285638e-06, "loss": 0.0277, "step": 9503 }, { "epoch": 2.45, "learning_rate": 8.853603952098783e-06, "loss": 0.0335, "step": 9504 }, { "epoch": 2.45, "learning_rate": 8.845524105734726e-06, "loss": 0.0265, "step": 9505 }, { "epoch": 2.45, "learning_rate": 8.837447590125542e-06, "loss": 0.0254, "step": 9506 }, { "epoch": 2.45, "learning_rate": 8.829374405924912e-06, "loss": 0.0232, "step": 9507 }, { "epoch": 2.45, "learning_rate": 8.821304553786198e-06, "loss": 0.0223, "step": 9508 }, { "epoch": 2.45, "learning_rate": 8.813238034362537e-06, "loss": 0.0264, "step": 9509 }, { "epoch": 2.45, "learning_rate": 8.805174848306774e-06, "loss": 0.024, "step": 9510 }, { "epoch": 2.45, "learning_rate": 8.797114996271466e-06, "loss": 0.0227, "step": 9511 }, { "epoch": 2.45, "learning_rate": 8.789058478908946e-06, "loss": 0.0219, "step": 9512 }, { "epoch": 2.45, "learning_rate": 8.781005296871248e-06, "loss": 0.0209, "step": 9513 }, { "epoch": 2.46, "learning_rate": 8.772955450810117e-06, "loss": 0.0283, "step": 9514 }, { "epoch": 2.46, "learning_rate": 8.764908941377081e-06, "loss": 0.0239, "step": 9515 }, { "epoch": 2.46, "learning_rate": 8.756865769223349e-06, "loss": 0.0239, "step": 9516 }, { "epoch": 2.46, "learning_rate": 8.748825934999894e-06, "loss": 0.0259, "step": 9517 }, { "epoch": 2.46, "learning_rate": 8.740789439357393e-06, "loss": 0.0261, "step": 9518 }, { "epoch": 2.46, "learning_rate": 8.732756282946259e-06, "loss": 0.0261, "step": 9519 }, { "epoch": 2.46, "learning_rate": 8.724726466416655e-06, "loss": 0.0267, "step": 9520 }, { "epoch": 2.46, "learning_rate": 8.716699990418437e-06, "loss": 0.0233, "step": 9521 }, { "epoch": 2.46, "learning_rate": 8.708676855601239e-06, "loss": 0.0253, "step": 9522 }, { "epoch": 2.46, "learning_rate": 8.700657062614382e-06, "loss": 0.025, "step": 9523 }, { "epoch": 2.46, "learning_rate": 8.692640612106923e-06, "loss": 0.0276, "step": 9524 }, { "epoch": 2.46, "learning_rate": 8.684627504727677e-06, "loss": 0.0264, "step": 9525 }, { "epoch": 2.46, "learning_rate": 8.676617741125154e-06, "loss": 0.0275, "step": 9526 }, { "epoch": 2.46, "learning_rate": 8.668611321947606e-06, "loss": 0.022, "step": 9527 }, { "epoch": 2.46, "learning_rate": 8.660608247843033e-06, "loss": 0.027, "step": 9528 }, { "epoch": 2.46, "learning_rate": 8.652608519459127e-06, "loss": 0.0248, "step": 9529 }, { "epoch": 2.46, "learning_rate": 8.644612137443354e-06, "loss": 0.028, "step": 9530 }, { "epoch": 2.46, "learning_rate": 8.636619102442867e-06, "loss": 0.0275, "step": 9531 }, { "epoch": 2.46, "learning_rate": 8.62862941510456e-06, "loss": 0.028, "step": 9532 }, { "epoch": 2.46, "learning_rate": 8.620643076075091e-06, "loss": 0.0282, "step": 9533 }, { "epoch": 2.46, "learning_rate": 8.612660086000785e-06, "loss": 0.0333, "step": 9534 }, { "epoch": 2.46, "learning_rate": 8.604680445527758e-06, "loss": 0.0262, "step": 9535 }, { "epoch": 2.46, "learning_rate": 8.596704155301815e-06, "loss": 0.028, "step": 9536 }, { "epoch": 2.46, "learning_rate": 8.588731215968481e-06, "loss": 0.0267, "step": 9537 }, { "epoch": 2.46, "learning_rate": 8.580761628173067e-06, "loss": 0.0288, "step": 9538 }, { "epoch": 2.46, "learning_rate": 8.572795392560557e-06, "loss": 0.0343, "step": 9539 }, { "epoch": 2.46, "learning_rate": 8.564832509775667e-06, "loss": 0.0295, "step": 9540 }, { "epoch": 2.46, "learning_rate": 8.556872980462882e-06, "loss": 0.0228, "step": 9541 }, { "epoch": 2.46, "learning_rate": 8.548916805266372e-06, "loss": 0.0272, "step": 9542 }, { "epoch": 2.46, "learning_rate": 8.540963984830075e-06, "loss": 0.0314, "step": 9543 }, { "epoch": 2.46, "learning_rate": 8.533014519797622e-06, "loss": 0.0333, "step": 9544 }, { "epoch": 2.46, "learning_rate": 8.525068410812375e-06, "loss": 0.0255, "step": 9545 }, { "epoch": 2.46, "learning_rate": 8.517125658517466e-06, "loss": 0.0256, "step": 9546 }, { "epoch": 2.46, "learning_rate": 8.509186263555696e-06, "loss": 0.0272, "step": 9547 }, { "epoch": 2.46, "learning_rate": 8.501250226569658e-06, "loss": 0.0308, "step": 9548 }, { "epoch": 2.46, "learning_rate": 8.493317548201608e-06, "loss": 0.0323, "step": 9549 }, { "epoch": 2.46, "learning_rate": 8.485388229093566e-06, "loss": 0.0374, "step": 9550 }, { "epoch": 2.46, "learning_rate": 8.477462269887293e-06, "loss": 0.0348, "step": 9551 }, { "epoch": 2.47, "learning_rate": 8.469539671224253e-06, "loss": 0.0199, "step": 9552 }, { "epoch": 2.47, "learning_rate": 8.461620433745632e-06, "loss": 0.0258, "step": 9553 }, { "epoch": 2.47, "learning_rate": 8.453704558092373e-06, "loss": 0.0159, "step": 9554 }, { "epoch": 2.47, "learning_rate": 8.44579204490512e-06, "loss": 0.0182, "step": 9555 }, { "epoch": 2.47, "learning_rate": 8.437882894824272e-06, "loss": 0.0189, "step": 9556 }, { "epoch": 2.47, "learning_rate": 8.429977108489934e-06, "loss": 0.0304, "step": 9557 }, { "epoch": 2.47, "learning_rate": 8.422074686541926e-06, "loss": 0.0339, "step": 9558 }, { "epoch": 2.47, "learning_rate": 8.414175629619847e-06, "loss": 0.0205, "step": 9559 }, { "epoch": 2.47, "learning_rate": 8.40627993836296e-06, "loss": 0.0278, "step": 9560 }, { "epoch": 2.47, "learning_rate": 8.398387613410319e-06, "loss": 0.02, "step": 9561 }, { "epoch": 2.47, "learning_rate": 8.390498655400652e-06, "loss": 0.0266, "step": 9562 }, { "epoch": 2.47, "learning_rate": 8.382613064972429e-06, "loss": 0.0217, "step": 9563 }, { "epoch": 2.47, "learning_rate": 8.374730842763878e-06, "loss": 0.0208, "step": 9564 }, { "epoch": 2.47, "learning_rate": 8.366851989412921e-06, "loss": 0.0229, "step": 9565 }, { "epoch": 2.47, "learning_rate": 8.358976505557203e-06, "loss": 0.0201, "step": 9566 }, { "epoch": 2.47, "learning_rate": 8.351104391834131e-06, "loss": 0.0264, "step": 9567 }, { "epoch": 2.47, "learning_rate": 8.343235648880798e-06, "loss": 0.0225, "step": 9568 }, { "epoch": 2.47, "learning_rate": 8.33537027733407e-06, "loss": 0.0281, "step": 9569 }, { "epoch": 2.47, "learning_rate": 8.327508277830503e-06, "loss": 0.0314, "step": 9570 }, { "epoch": 2.47, "learning_rate": 8.319649651006379e-06, "loss": 0.0379, "step": 9571 }, { "epoch": 2.47, "learning_rate": 8.311794397497747e-06, "loss": 0.0248, "step": 9572 }, { "epoch": 2.47, "learning_rate": 8.303942517940327e-06, "loss": 0.0207, "step": 9573 }, { "epoch": 2.47, "learning_rate": 8.296094012969618e-06, "loss": 0.0255, "step": 9574 }, { "epoch": 2.47, "learning_rate": 8.288248883220823e-06, "loss": 0.0221, "step": 9575 }, { "epoch": 2.47, "learning_rate": 8.280407129328843e-06, "loss": 0.0238, "step": 9576 }, { "epoch": 2.47, "learning_rate": 8.272568751928373e-06, "loss": 0.0303, "step": 9577 }, { "epoch": 2.47, "learning_rate": 8.264733751653776e-06, "loss": 0.0229, "step": 9578 }, { "epoch": 2.47, "learning_rate": 8.256902129139154e-06, "loss": 0.0296, "step": 9579 }, { "epoch": 2.47, "learning_rate": 8.249073885018367e-06, "loss": 0.028, "step": 9580 }, { "epoch": 2.47, "learning_rate": 8.241249019924952e-06, "loss": 0.0226, "step": 9581 }, { "epoch": 2.47, "learning_rate": 8.233427534492222e-06, "loss": 0.03, "step": 9582 }, { "epoch": 2.47, "learning_rate": 8.225609429353187e-06, "loss": 0.0313, "step": 9583 }, { "epoch": 2.47, "learning_rate": 8.21779470514057e-06, "loss": 0.0247, "step": 9584 }, { "epoch": 2.47, "learning_rate": 8.209983362486867e-06, "loss": 0.0288, "step": 9585 }, { "epoch": 2.47, "learning_rate": 8.202175402024253e-06, "loss": 0.0246, "step": 9586 }, { "epoch": 2.47, "learning_rate": 8.194370824384672e-06, "loss": 0.0235, "step": 9587 }, { "epoch": 2.47, "learning_rate": 8.186569630199759e-06, "loss": 0.0262, "step": 9588 }, { "epoch": 2.47, "learning_rate": 8.178771820100872e-06, "loss": 0.0239, "step": 9589 }, { "epoch": 2.47, "learning_rate": 8.170977394719143e-06, "loss": 0.0247, "step": 9590 }, { "epoch": 2.48, "learning_rate": 8.163186354685382e-06, "loss": 0.0246, "step": 9591 }, { "epoch": 2.48, "learning_rate": 8.155398700630129e-06, "loss": 0.0263, "step": 9592 }, { "epoch": 2.48, "learning_rate": 8.14761443318368e-06, "loss": 0.0251, "step": 9593 }, { "epoch": 2.48, "learning_rate": 8.139833552976029e-06, "loss": 0.0221, "step": 9594 }, { "epoch": 2.48, "learning_rate": 8.132056060636916e-06, "loss": 0.0291, "step": 9595 }, { "epoch": 2.48, "learning_rate": 8.124281956795799e-06, "loss": 0.0248, "step": 9596 }, { "epoch": 2.48, "learning_rate": 8.116511242081831e-06, "loss": 0.0299, "step": 9597 }, { "epoch": 2.48, "learning_rate": 8.108743917123956e-06, "loss": 0.0254, "step": 9598 }, { "epoch": 2.48, "learning_rate": 8.100979982550782e-06, "loss": 0.0257, "step": 9599 }, { "epoch": 2.48, "learning_rate": 8.093219438990684e-06, "loss": 0.0226, "step": 9600 }, { "epoch": 2.48, "learning_rate": 8.085462287071738e-06, "loss": 0.0272, "step": 9601 }, { "epoch": 2.48, "learning_rate": 8.07770852742174e-06, "loss": 0.0282, "step": 9602 }, { "epoch": 2.48, "learning_rate": 8.069958160668256e-06, "loss": 0.0355, "step": 9603 }, { "epoch": 2.48, "learning_rate": 8.062211187438524e-06, "loss": 0.0326, "step": 9604 }, { "epoch": 2.48, "learning_rate": 8.054467608359522e-06, "loss": 0.0252, "step": 9605 }, { "epoch": 2.48, "learning_rate": 8.046727424057988e-06, "loss": 0.0231, "step": 9606 }, { "epoch": 2.48, "learning_rate": 8.038990635160332e-06, "loss": 0.025, "step": 9607 }, { "epoch": 2.48, "learning_rate": 8.031257242292739e-06, "loss": 0.0257, "step": 9608 }, { "epoch": 2.48, "learning_rate": 8.023527246081087e-06, "loss": 0.0234, "step": 9609 }, { "epoch": 2.48, "learning_rate": 8.015800647150967e-06, "loss": 0.0274, "step": 9610 }, { "epoch": 2.48, "learning_rate": 8.008077446127748e-06, "loss": 0.0267, "step": 9611 }, { "epoch": 2.48, "learning_rate": 8.000357643636468e-06, "loss": 0.0278, "step": 9612 }, { "epoch": 2.48, "learning_rate": 7.992641240301928e-06, "loss": 0.0214, "step": 9613 }, { "epoch": 2.48, "learning_rate": 7.98492823674864e-06, "loss": 0.0256, "step": 9614 }, { "epoch": 2.48, "learning_rate": 7.977218633600824e-06, "loss": 0.0209, "step": 9615 }, { "epoch": 2.48, "learning_rate": 7.969512431482463e-06, "loss": 0.0279, "step": 9616 }, { "epoch": 2.48, "learning_rate": 7.96180963101722e-06, "loss": 0.0244, "step": 9617 }, { "epoch": 2.48, "learning_rate": 7.954110232828532e-06, "loss": 0.0239, "step": 9618 }, { "epoch": 2.48, "learning_rate": 7.946414237539518e-06, "loss": 0.0323, "step": 9619 }, { "epoch": 2.48, "learning_rate": 7.938721645773028e-06, "loss": 0.0219, "step": 9620 }, { "epoch": 2.48, "learning_rate": 7.931032458151671e-06, "loss": 0.0279, "step": 9621 }, { "epoch": 2.48, "learning_rate": 7.923346675297744e-06, "loss": 0.0359, "step": 9622 }, { "epoch": 2.48, "learning_rate": 7.91566429783327e-06, "loss": 0.0226, "step": 9623 }, { "epoch": 2.48, "learning_rate": 7.907985326380025e-06, "loss": 0.028, "step": 9624 }, { "epoch": 2.48, "learning_rate": 7.900309761559477e-06, "loss": 0.0278, "step": 9625 }, { "epoch": 2.48, "learning_rate": 7.892637603992843e-06, "loss": 0.0244, "step": 9626 }, { "epoch": 2.48, "learning_rate": 7.884968854301056e-06, "loss": 0.0296, "step": 9627 }, { "epoch": 2.48, "learning_rate": 7.877303513104744e-06, "loss": 0.0305, "step": 9628 }, { "epoch": 2.48, "learning_rate": 7.86964158102433e-06, "loss": 0.0252, "step": 9629 }, { "epoch": 2.49, "learning_rate": 7.861983058679873e-06, "loss": 0.0288, "step": 9630 }, { "epoch": 2.49, "learning_rate": 7.854327946691231e-06, "loss": 0.0283, "step": 9631 }, { "epoch": 2.49, "learning_rate": 7.846676245677947e-06, "loss": 0.0292, "step": 9632 }, { "epoch": 2.49, "learning_rate": 7.839027956259282e-06, "loss": 0.0255, "step": 9633 }, { "epoch": 2.49, "learning_rate": 7.831383079054255e-06, "loss": 0.0278, "step": 9634 }, { "epoch": 2.49, "learning_rate": 7.823741614681584e-06, "loss": 0.0273, "step": 9635 }, { "epoch": 2.49, "learning_rate": 7.816103563759696e-06, "loss": 0.0287, "step": 9636 }, { "epoch": 2.49, "learning_rate": 7.808468926906793e-06, "loss": 0.0323, "step": 9637 }, { "epoch": 2.49, "learning_rate": 7.800837704740737e-06, "loss": 0.0294, "step": 9638 }, { "epoch": 2.49, "learning_rate": 7.793209897879178e-06, "loss": 0.0266, "step": 9639 }, { "epoch": 2.49, "learning_rate": 7.785585506939442e-06, "loss": 0.0288, "step": 9640 }, { "epoch": 2.49, "learning_rate": 7.777964532538578e-06, "loss": 0.0303, "step": 9641 }, { "epoch": 2.49, "learning_rate": 7.770346975293402e-06, "loss": 0.0286, "step": 9642 }, { "epoch": 2.49, "learning_rate": 7.762732835820396e-06, "loss": 0.0295, "step": 9643 }, { "epoch": 2.49, "learning_rate": 7.75512211473583e-06, "loss": 0.0315, "step": 9644 }, { "epoch": 2.49, "learning_rate": 7.747514812655637e-06, "loss": 0.0288, "step": 9645 }, { "epoch": 2.49, "learning_rate": 7.739910930195504e-06, "loss": 0.0307, "step": 9646 }, { "epoch": 2.49, "learning_rate": 7.732310467970844e-06, "loss": 0.0325, "step": 9647 }, { "epoch": 2.49, "learning_rate": 7.724713426596775e-06, "loss": 0.0305, "step": 9648 }, { "epoch": 2.49, "learning_rate": 7.717119806688144e-06, "loss": 0.0302, "step": 9649 }, { "epoch": 2.49, "learning_rate": 7.70952960885955e-06, "loss": 0.0379, "step": 9650 }, { "epoch": 2.49, "learning_rate": 7.701942833725256e-06, "loss": 0.0178, "step": 9651 }, { "epoch": 2.49, "learning_rate": 7.694359481899315e-06, "loss": 0.0302, "step": 9652 }, { "epoch": 2.49, "learning_rate": 7.68677955399546e-06, "loss": 0.0254, "step": 9653 }, { "epoch": 2.49, "learning_rate": 7.679203050627138e-06, "loss": 0.0249, "step": 9654 }, { "epoch": 2.49, "learning_rate": 7.671629972407563e-06, "loss": 0.0236, "step": 9655 }, { "epoch": 2.49, "learning_rate": 7.664060319949629e-06, "loss": 0.0318, "step": 9656 }, { "epoch": 2.49, "learning_rate": 7.656494093865984e-06, "loss": 0.0364, "step": 9657 }, { "epoch": 2.49, "learning_rate": 7.648931294768986e-06, "loss": 0.026, "step": 9658 }, { "epoch": 2.49, "learning_rate": 7.641371923270697e-06, "loss": 0.0225, "step": 9659 }, { "epoch": 2.49, "learning_rate": 7.63381597998294e-06, "loss": 0.0246, "step": 9660 }, { "epoch": 2.49, "learning_rate": 7.626263465517236e-06, "loss": 0.0201, "step": 9661 }, { "epoch": 2.49, "learning_rate": 7.618714380484809e-06, "loss": 0.0305, "step": 9662 }, { "epoch": 2.49, "learning_rate": 7.6111687254966705e-06, "loss": 0.0253, "step": 9663 }, { "epoch": 2.49, "learning_rate": 7.603626501163469e-06, "loss": 0.0251, "step": 9664 }, { "epoch": 2.49, "learning_rate": 7.596087708095661e-06, "loss": 0.0286, "step": 9665 }, { "epoch": 2.49, "learning_rate": 7.58855234690336e-06, "loss": 0.032, "step": 9666 }, { "epoch": 2.49, "learning_rate": 7.581020418196416e-06, "loss": 0.0278, "step": 9667 }, { "epoch": 2.49, "learning_rate": 7.5734919225844355e-06, "loss": 0.0243, "step": 9668 }, { "epoch": 2.5, "learning_rate": 7.5659668606766986e-06, "loss": 0.0259, "step": 9669 }, { "epoch": 2.5, "learning_rate": 7.558445233082257e-06, "loss": 0.0263, "step": 9670 }, { "epoch": 2.5, "learning_rate": 7.550927040409844e-06, "loss": 0.033, "step": 9671 }, { "epoch": 2.5, "learning_rate": 7.543412283267914e-06, "loss": 0.0256, "step": 9672 }, { "epoch": 2.5, "learning_rate": 7.535900962264691e-06, "loss": 0.0216, "step": 9673 }, { "epoch": 2.5, "learning_rate": 7.528393078008067e-06, "loss": 0.021, "step": 9674 }, { "epoch": 2.5, "learning_rate": 7.520888631105677e-06, "loss": 0.0196, "step": 9675 }, { "epoch": 2.5, "learning_rate": 7.513387622164891e-06, "loss": 0.0259, "step": 9676 }, { "epoch": 2.5, "learning_rate": 7.505890051792769e-06, "loss": 0.0327, "step": 9677 }, { "epoch": 2.5, "learning_rate": 7.498395920596141e-06, "loss": 0.0214, "step": 9678 }, { "epoch": 2.5, "learning_rate": 7.4909052291815056e-06, "loss": 0.0264, "step": 9679 }, { "epoch": 2.5, "learning_rate": 7.483417978155105e-06, "loss": 0.0228, "step": 9680 }, { "epoch": 2.5, "learning_rate": 7.475934168122922e-06, "loss": 0.0292, "step": 9681 }, { "epoch": 2.5, "learning_rate": 7.468453799690628e-06, "loss": 0.0212, "step": 9682 }, { "epoch": 2.5, "learning_rate": 7.4609768734636486e-06, "loss": 0.0278, "step": 9683 }, { "epoch": 2.5, "learning_rate": 7.4535033900471066e-06, "loss": 0.0286, "step": 9684 }, { "epoch": 2.5, "learning_rate": 7.446033350045833e-06, "loss": 0.0317, "step": 9685 }, { "epoch": 2.5, "learning_rate": 7.438566754064435e-06, "loss": 0.0218, "step": 9686 }, { "epoch": 2.5, "learning_rate": 7.43110360270719e-06, "loss": 0.0278, "step": 9687 }, { "epoch": 2.5, "learning_rate": 7.423643896578098e-06, "loss": 0.023, "step": 9688 }, { "epoch": 2.5, "learning_rate": 7.416187636280924e-06, "loss": 0.0243, "step": 9689 }, { "epoch": 2.5, "learning_rate": 7.4087348224191e-06, "loss": 0.0252, "step": 9690 }, { "epoch": 2.5, "learning_rate": 7.401285455595824e-06, "loss": 0.0204, "step": 9691 }, { "epoch": 2.5, "learning_rate": 7.393839536413994e-06, "loss": 0.0238, "step": 9692 }, { "epoch": 2.5, "learning_rate": 7.386397065476209e-06, "loss": 0.0269, "step": 9693 }, { "epoch": 2.5, "learning_rate": 7.378958043384843e-06, "loss": 0.0278, "step": 9694 }, { "epoch": 2.5, "learning_rate": 7.3715224707419204e-06, "loss": 0.0264, "step": 9695 }, { "epoch": 2.5, "learning_rate": 7.36409034814926e-06, "loss": 0.0221, "step": 9696 }, { "epoch": 2.5, "learning_rate": 7.356661676208354e-06, "loss": 0.0287, "step": 9697 }, { "epoch": 2.5, "learning_rate": 7.349236455520414e-06, "loss": 0.0219, "step": 9698 }, { "epoch": 2.5, "learning_rate": 7.341814686686399e-06, "loss": 0.0263, "step": 9699 }, { "epoch": 2.5, "learning_rate": 7.334396370306978e-06, "loss": 0.0327, "step": 9700 }, { "epoch": 2.5, "learning_rate": 7.326981506982517e-06, "loss": 0.0317, "step": 9701 }, { "epoch": 2.5, "learning_rate": 7.319570097313144e-06, "loss": 0.0249, "step": 9702 }, { "epoch": 2.5, "learning_rate": 7.312162141898676e-06, "loss": 0.0335, "step": 9703 }, { "epoch": 2.5, "learning_rate": 7.304757641338672e-06, "loss": 0.0348, "step": 9704 }, { "epoch": 2.5, "learning_rate": 7.297356596232391e-06, "loss": 0.0279, "step": 9705 }, { "epoch": 2.5, "learning_rate": 7.289959007178815e-06, "loss": 0.027, "step": 9706 }, { "epoch": 2.51, "learning_rate": 7.2825648747766706e-06, "loss": 0.0212, "step": 9707 }, { "epoch": 2.51, "learning_rate": 7.275174199624363e-06, "loss": 0.0202, "step": 9708 }, { "epoch": 2.51, "learning_rate": 7.267786982320074e-06, "loss": 0.0189, "step": 9709 }, { "epoch": 2.51, "learning_rate": 7.260403223461654e-06, "loss": 0.029, "step": 9710 }, { "epoch": 2.51, "learning_rate": 7.253022923646679e-06, "loss": 0.0287, "step": 9711 }, { "epoch": 2.51, "learning_rate": 7.24564608347249e-06, "loss": 0.024, "step": 9712 }, { "epoch": 2.51, "learning_rate": 7.238272703536103e-06, "loss": 0.0279, "step": 9713 }, { "epoch": 2.51, "learning_rate": 7.230902784434251e-06, "loss": 0.0279, "step": 9714 }, { "epoch": 2.51, "learning_rate": 7.22353632676343e-06, "loss": 0.0285, "step": 9715 }, { "epoch": 2.51, "learning_rate": 7.216173331119807e-06, "loss": 0.0282, "step": 9716 }, { "epoch": 2.51, "learning_rate": 7.208813798099312e-06, "loss": 0.0236, "step": 9717 }, { "epoch": 2.51, "learning_rate": 7.201457728297567e-06, "loss": 0.0222, "step": 9718 }, { "epoch": 2.51, "learning_rate": 7.194105122309902e-06, "loss": 0.0261, "step": 9719 }, { "epoch": 2.51, "learning_rate": 7.186755980731413e-06, "loss": 0.0259, "step": 9720 }, { "epoch": 2.51, "learning_rate": 7.179410304156869e-06, "loss": 0.0273, "step": 9721 }, { "epoch": 2.51, "learning_rate": 7.172068093180795e-06, "loss": 0.0289, "step": 9722 }, { "epoch": 2.51, "learning_rate": 7.164729348397409e-06, "loss": 0.0234, "step": 9723 }, { "epoch": 2.51, "learning_rate": 7.157394070400642e-06, "loss": 0.0277, "step": 9724 }, { "epoch": 2.51, "learning_rate": 7.150062259784185e-06, "loss": 0.0238, "step": 9725 }, { "epoch": 2.51, "learning_rate": 7.142733917141414e-06, "loss": 0.026, "step": 9726 }, { "epoch": 2.51, "learning_rate": 7.135409043065422e-06, "loss": 0.0242, "step": 9727 }, { "epoch": 2.51, "learning_rate": 7.128087638149056e-06, "loss": 0.0242, "step": 9728 }, { "epoch": 2.51, "learning_rate": 7.120769702984831e-06, "loss": 0.0251, "step": 9729 }, { "epoch": 2.51, "learning_rate": 7.1134552381650395e-06, "loss": 0.0287, "step": 9730 }, { "epoch": 2.51, "learning_rate": 7.106144244281648e-06, "loss": 0.0277, "step": 9731 }, { "epoch": 2.51, "learning_rate": 7.098836721926349e-06, "loss": 0.0393, "step": 9732 }, { "epoch": 2.51, "learning_rate": 7.091532671690582e-06, "loss": 0.0291, "step": 9733 }, { "epoch": 2.51, "learning_rate": 7.084232094165461e-06, "loss": 0.0306, "step": 9734 }, { "epoch": 2.51, "learning_rate": 7.076934989941869e-06, "loss": 0.026, "step": 9735 }, { "epoch": 2.51, "learning_rate": 7.069641359610368e-06, "loss": 0.0329, "step": 9736 }, { "epoch": 2.51, "learning_rate": 7.062351203761247e-06, "loss": 0.0281, "step": 9737 }, { "epoch": 2.51, "learning_rate": 7.055064522984545e-06, "loss": 0.0273, "step": 9738 }, { "epoch": 2.51, "learning_rate": 7.047781317869973e-06, "loss": 0.0266, "step": 9739 }, { "epoch": 2.51, "learning_rate": 7.040501589006976e-06, "loss": 0.0319, "step": 9740 }, { "epoch": 2.51, "learning_rate": 7.03322533698475e-06, "loss": 0.0276, "step": 9741 }, { "epoch": 2.51, "learning_rate": 7.0259525623921625e-06, "loss": 0.0303, "step": 9742 }, { "epoch": 2.51, "learning_rate": 7.018683265817838e-06, "loss": 0.0297, "step": 9743 }, { "epoch": 2.51, "learning_rate": 7.011417447850094e-06, "loss": 0.03, "step": 9744 }, { "epoch": 2.51, "learning_rate": 7.00415510907696e-06, "loss": 0.0334, "step": 9745 }, { "epoch": 2.52, "learning_rate": 6.996896250086227e-06, "loss": 0.0275, "step": 9746 }, { "epoch": 2.52, "learning_rate": 6.989640871465353e-06, "loss": 0.0394, "step": 9747 }, { "epoch": 2.52, "learning_rate": 6.9823889738015566e-06, "loss": 0.0312, "step": 9748 }, { "epoch": 2.52, "learning_rate": 6.975140557681747e-06, "loss": 0.0298, "step": 9749 }, { "epoch": 2.52, "learning_rate": 6.967895623692544e-06, "loss": 0.0271, "step": 9750 }, { "epoch": 2.52, "learning_rate": 6.960654172420328e-06, "loss": 0.019, "step": 9751 }, { "epoch": 2.52, "learning_rate": 6.953416204451163e-06, "loss": 0.0188, "step": 9752 }, { "epoch": 2.52, "learning_rate": 6.946181720370825e-06, "loss": 0.0246, "step": 9753 }, { "epoch": 2.52, "learning_rate": 6.938950720764848e-06, "loss": 0.0154, "step": 9754 }, { "epoch": 2.52, "learning_rate": 6.931723206218427e-06, "loss": 0.0171, "step": 9755 }, { "epoch": 2.52, "learning_rate": 6.92449917731654e-06, "loss": 0.0209, "step": 9756 }, { "epoch": 2.52, "learning_rate": 6.9172786346438315e-06, "loss": 0.0313, "step": 9757 }, { "epoch": 2.52, "learning_rate": 6.910061578784671e-06, "loss": 0.0332, "step": 9758 }, { "epoch": 2.52, "learning_rate": 6.902848010323182e-06, "loss": 0.039, "step": 9759 }, { "epoch": 2.52, "learning_rate": 6.895637929843151e-06, "loss": 0.0275, "step": 9760 }, { "epoch": 2.52, "learning_rate": 6.888431337928142e-06, "loss": 0.0306, "step": 9761 }, { "epoch": 2.52, "learning_rate": 6.8812282351613914e-06, "loss": 0.0325, "step": 9762 }, { "epoch": 2.52, "learning_rate": 6.874028622125855e-06, "loss": 0.0238, "step": 9763 }, { "epoch": 2.52, "learning_rate": 6.866832499404247e-06, "loss": 0.021, "step": 9764 }, { "epoch": 2.52, "learning_rate": 6.859639867578937e-06, "loss": 0.0277, "step": 9765 }, { "epoch": 2.52, "learning_rate": 6.852450727232085e-06, "loss": 0.0213, "step": 9766 }, { "epoch": 2.52, "learning_rate": 6.845265078945501e-06, "loss": 0.0198, "step": 9767 }, { "epoch": 2.52, "learning_rate": 6.838082923300742e-06, "loss": 0.0197, "step": 9768 }, { "epoch": 2.52, "learning_rate": 6.830904260879107e-06, "loss": 0.0206, "step": 9769 }, { "epoch": 2.52, "learning_rate": 6.82372909226156e-06, "loss": 0.0225, "step": 9770 }, { "epoch": 2.52, "learning_rate": 6.816557418028807e-06, "loss": 0.0288, "step": 9771 }, { "epoch": 2.52, "learning_rate": 6.809389238761299e-06, "loss": 0.0308, "step": 9772 }, { "epoch": 2.52, "learning_rate": 6.802224555039149e-06, "loss": 0.023, "step": 9773 }, { "epoch": 2.52, "learning_rate": 6.7950633674422445e-06, "loss": 0.0272, "step": 9774 }, { "epoch": 2.52, "learning_rate": 6.787905676550143e-06, "loss": 0.0328, "step": 9775 }, { "epoch": 2.52, "learning_rate": 6.780751482942138e-06, "loss": 0.0271, "step": 9776 }, { "epoch": 2.52, "learning_rate": 6.773600787197249e-06, "loss": 0.0233, "step": 9777 }, { "epoch": 2.52, "learning_rate": 6.766453589894189e-06, "loss": 0.0211, "step": 9778 }, { "epoch": 2.52, "learning_rate": 6.759309891611421e-06, "loss": 0.0234, "step": 9779 }, { "epoch": 2.52, "learning_rate": 6.752169692927096e-06, "loss": 0.0298, "step": 9780 }, { "epoch": 2.52, "learning_rate": 6.745032994419087e-06, "loss": 0.0277, "step": 9781 }, { "epoch": 2.52, "learning_rate": 6.737899796664998e-06, "loss": 0.0231, "step": 9782 }, { "epoch": 2.52, "learning_rate": 6.73077010024214e-06, "loss": 0.0363, "step": 9783 }, { "epoch": 2.52, "learning_rate": 6.723643905727517e-06, "loss": 0.0265, "step": 9784 }, { "epoch": 2.53, "learning_rate": 6.7165212136979125e-06, "loss": 0.0261, "step": 9785 }, { "epoch": 2.53, "learning_rate": 6.70940202472975e-06, "loss": 0.024, "step": 9786 }, { "epoch": 2.53, "learning_rate": 6.702286339399239e-06, "loss": 0.0257, "step": 9787 }, { "epoch": 2.53, "learning_rate": 6.695174158282252e-06, "loss": 0.0251, "step": 9788 }, { "epoch": 2.53, "learning_rate": 6.688065481954398e-06, "loss": 0.0264, "step": 9789 }, { "epoch": 2.53, "learning_rate": 6.680960310991025e-06, "loss": 0.0279, "step": 9790 }, { "epoch": 2.53, "learning_rate": 6.673858645967146e-06, "loss": 0.0234, "step": 9791 }, { "epoch": 2.53, "learning_rate": 6.666760487457547e-06, "loss": 0.0288, "step": 9792 }, { "epoch": 2.53, "learning_rate": 6.659665836036694e-06, "loss": 0.0319, "step": 9793 }, { "epoch": 2.53, "learning_rate": 6.652574692278757e-06, "loss": 0.0294, "step": 9794 }, { "epoch": 2.53, "learning_rate": 6.645487056757682e-06, "loss": 0.0264, "step": 9795 }, { "epoch": 2.53, "learning_rate": 6.638402930047066e-06, "loss": 0.0296, "step": 9796 }, { "epoch": 2.53, "learning_rate": 6.631322312720251e-06, "loss": 0.026, "step": 9797 }, { "epoch": 2.53, "learning_rate": 6.6242452053503045e-06, "loss": 0.0264, "step": 9798 }, { "epoch": 2.53, "learning_rate": 6.617171608509981e-06, "loss": 0.0258, "step": 9799 }, { "epoch": 2.53, "learning_rate": 6.610101522771789e-06, "loss": 0.0293, "step": 9800 }, { "epoch": 2.53, "learning_rate": 6.603034948707915e-06, "loss": 0.0224, "step": 9801 }, { "epoch": 2.53, "learning_rate": 6.595971886890279e-06, "loss": 0.0212, "step": 9802 }, { "epoch": 2.53, "learning_rate": 6.588912337890524e-06, "loss": 0.027, "step": 9803 }, { "epoch": 2.53, "learning_rate": 6.581856302279993e-06, "loss": 0.0294, "step": 9804 }, { "epoch": 2.53, "learning_rate": 6.574803780629757e-06, "loss": 0.0267, "step": 9805 }, { "epoch": 2.53, "learning_rate": 6.5677547735106035e-06, "loss": 0.0255, "step": 9806 }, { "epoch": 2.53, "learning_rate": 6.56070928149301e-06, "loss": 0.0309, "step": 9807 }, { "epoch": 2.53, "learning_rate": 6.55366730514721e-06, "loss": 0.0256, "step": 9808 }, { "epoch": 2.53, "learning_rate": 6.546628845043124e-06, "loss": 0.0263, "step": 9809 }, { "epoch": 2.53, "learning_rate": 6.539593901750385e-06, "loss": 0.0272, "step": 9810 }, { "epoch": 2.53, "learning_rate": 6.5325624758383715e-06, "loss": 0.0251, "step": 9811 }, { "epoch": 2.53, "learning_rate": 6.525534567876135e-06, "loss": 0.028, "step": 9812 }, { "epoch": 2.53, "learning_rate": 6.518510178432491e-06, "loss": 0.022, "step": 9813 }, { "epoch": 2.53, "learning_rate": 6.511489308075936e-06, "loss": 0.0266, "step": 9814 }, { "epoch": 2.53, "learning_rate": 6.504471957374669e-06, "loss": 0.0267, "step": 9815 }, { "epoch": 2.53, "learning_rate": 6.497458126896655e-06, "loss": 0.029, "step": 9816 }, { "epoch": 2.53, "learning_rate": 6.490447817209522e-06, "loss": 0.0295, "step": 9817 }, { "epoch": 2.53, "learning_rate": 6.483441028880649e-06, "loss": 0.0237, "step": 9818 }, { "epoch": 2.53, "learning_rate": 6.476437762477117e-06, "loss": 0.0275, "step": 9819 }, { "epoch": 2.53, "learning_rate": 6.469438018565704e-06, "loss": 0.0237, "step": 9820 }, { "epoch": 2.53, "learning_rate": 6.462441797712948e-06, "loss": 0.0246, "step": 9821 }, { "epoch": 2.53, "learning_rate": 6.455449100485051e-06, "loss": 0.0308, "step": 9822 }, { "epoch": 2.53, "learning_rate": 6.448459927447953e-06, "loss": 0.0269, "step": 9823 }, { "epoch": 2.54, "learning_rate": 6.441474279167326e-06, "loss": 0.0246, "step": 9824 }, { "epoch": 2.54, "learning_rate": 6.434492156208522e-06, "loss": 0.0271, "step": 9825 }, { "epoch": 2.54, "learning_rate": 6.427513559136639e-06, "loss": 0.0235, "step": 9826 }, { "epoch": 2.54, "learning_rate": 6.4205384885164695e-06, "loss": 0.0295, "step": 9827 }, { "epoch": 2.54, "learning_rate": 6.413566944912519e-06, "loss": 0.0373, "step": 9828 }, { "epoch": 2.54, "learning_rate": 6.406598928889035e-06, "loss": 0.0299, "step": 9829 }, { "epoch": 2.54, "learning_rate": 6.399634441009933e-06, "loss": 0.0278, "step": 9830 }, { "epoch": 2.54, "learning_rate": 6.392673481838896e-06, "loss": 0.0249, "step": 9831 }, { "epoch": 2.54, "learning_rate": 6.385716051939284e-06, "loss": 0.0273, "step": 9832 }, { "epoch": 2.54, "learning_rate": 6.378762151874174e-06, "loss": 0.024, "step": 9833 }, { "epoch": 2.54, "learning_rate": 6.3718117822063824e-06, "loss": 0.0297, "step": 9834 }, { "epoch": 2.54, "learning_rate": 6.364864943498422e-06, "loss": 0.0269, "step": 9835 }, { "epoch": 2.54, "learning_rate": 6.357921636312497e-06, "loss": 0.0249, "step": 9836 }, { "epoch": 2.54, "learning_rate": 6.350981861210581e-06, "loss": 0.0308, "step": 9837 }, { "epoch": 2.54, "learning_rate": 6.344045618754307e-06, "loss": 0.0342, "step": 9838 }, { "epoch": 2.54, "learning_rate": 6.337112909505066e-06, "loss": 0.0322, "step": 9839 }, { "epoch": 2.54, "learning_rate": 6.330183734023937e-06, "loss": 0.0366, "step": 9840 }, { "epoch": 2.54, "learning_rate": 6.323258092871704e-06, "loss": 0.028, "step": 9841 }, { "epoch": 2.54, "learning_rate": 6.316335986608901e-06, "loss": 0.0266, "step": 9842 }, { "epoch": 2.54, "learning_rate": 6.309417415795732e-06, "loss": 0.0257, "step": 9843 }, { "epoch": 2.54, "learning_rate": 6.302502380992164e-06, "loss": 0.0334, "step": 9844 }, { "epoch": 2.54, "learning_rate": 6.2955908827578345e-06, "loss": 0.0319, "step": 9845 }, { "epoch": 2.54, "learning_rate": 6.288682921652106e-06, "loss": 0.0351, "step": 9846 }, { "epoch": 2.54, "learning_rate": 6.281778498234086e-06, "loss": 0.0349, "step": 9847 }, { "epoch": 2.54, "learning_rate": 6.2748776130625464e-06, "loss": 0.0286, "step": 9848 }, { "epoch": 2.54, "learning_rate": 6.2679802666959995e-06, "loss": 0.036, "step": 9849 }, { "epoch": 2.54, "learning_rate": 6.2610864596926775e-06, "loss": 0.0315, "step": 9850 }, { "epoch": 2.54, "learning_rate": 6.2541961926105066e-06, "loss": 0.0328, "step": 9851 }, { "epoch": 2.54, "learning_rate": 6.24730946600715e-06, "loss": 0.0232, "step": 9852 }, { "epoch": 2.54, "learning_rate": 6.240426280439959e-06, "loss": 0.0242, "step": 9853 }, { "epoch": 2.54, "learning_rate": 6.233546636466009e-06, "loss": 0.0319, "step": 9854 }, { "epoch": 2.54, "learning_rate": 6.2266705346421095e-06, "loss": 0.0252, "step": 9855 }, { "epoch": 2.54, "learning_rate": 6.219797975524733e-06, "loss": 0.0306, "step": 9856 }, { "epoch": 2.54, "learning_rate": 6.2129289596701275e-06, "loss": 0.0289, "step": 9857 }, { "epoch": 2.54, "learning_rate": 6.206063487634212e-06, "loss": 0.0227, "step": 9858 }, { "epoch": 2.54, "learning_rate": 6.199201559972612e-06, "loss": 0.0273, "step": 9859 }, { "epoch": 2.54, "learning_rate": 6.192343177240712e-06, "loss": 0.0231, "step": 9860 }, { "epoch": 2.54, "learning_rate": 6.185488339993562e-06, "loss": 0.0273, "step": 9861 }, { "epoch": 2.55, "learning_rate": 6.178637048785946e-06, "loss": 0.0259, "step": 9862 }, { "epoch": 2.55, "learning_rate": 6.1717893041723705e-06, "loss": 0.0196, "step": 9863 }, { "epoch": 2.55, "learning_rate": 6.164945106707032e-06, "loss": 0.0202, "step": 9864 }, { "epoch": 2.55, "learning_rate": 6.158104456943864e-06, "loss": 0.0239, "step": 9865 }, { "epoch": 2.55, "learning_rate": 6.1512673554364965e-06, "loss": 0.0305, "step": 9866 }, { "epoch": 2.55, "learning_rate": 6.144433802738264e-06, "loss": 0.0186, "step": 9867 }, { "epoch": 2.55, "learning_rate": 6.137603799402242e-06, "loss": 0.025, "step": 9868 }, { "epoch": 2.55, "learning_rate": 6.130777345981192e-06, "loss": 0.0199, "step": 9869 }, { "epoch": 2.55, "learning_rate": 6.123954443027613e-06, "loss": 0.0206, "step": 9870 }, { "epoch": 2.55, "learning_rate": 6.117135091093695e-06, "loss": 0.022, "step": 9871 }, { "epoch": 2.55, "learning_rate": 6.110319290731342e-06, "loss": 0.0269, "step": 9872 }, { "epoch": 2.55, "learning_rate": 6.1035070424921905e-06, "loss": 0.0299, "step": 9873 }, { "epoch": 2.55, "learning_rate": 6.096698346927571e-06, "loss": 0.0235, "step": 9874 }, { "epoch": 2.55, "learning_rate": 6.0898932045885204e-06, "loss": 0.0229, "step": 9875 }, { "epoch": 2.55, "learning_rate": 6.083091616025815e-06, "loss": 0.022, "step": 9876 }, { "epoch": 2.55, "learning_rate": 6.076293581789916e-06, "loss": 0.0278, "step": 9877 }, { "epoch": 2.55, "learning_rate": 6.06949910243102e-06, "loss": 0.0197, "step": 9878 }, { "epoch": 2.55, "learning_rate": 6.062708178499021e-06, "loss": 0.0211, "step": 9879 }, { "epoch": 2.55, "learning_rate": 6.055920810543514e-06, "loss": 0.0242, "step": 9880 }, { "epoch": 2.55, "learning_rate": 6.049136999113847e-06, "loss": 0.0297, "step": 9881 }, { "epoch": 2.55, "learning_rate": 6.042356744759031e-06, "loss": 0.0259, "step": 9882 }, { "epoch": 2.55, "learning_rate": 6.035580048027828e-06, "loss": 0.0222, "step": 9883 }, { "epoch": 2.55, "learning_rate": 6.028806909468693e-06, "loss": 0.0189, "step": 9884 }, { "epoch": 2.55, "learning_rate": 6.022037329629782e-06, "loss": 0.0217, "step": 9885 }, { "epoch": 2.55, "learning_rate": 6.015271309059001e-06, "loss": 0.0235, "step": 9886 }, { "epoch": 2.55, "learning_rate": 6.008508848303929e-06, "loss": 0.0197, "step": 9887 }, { "epoch": 2.55, "learning_rate": 6.001749947911866e-06, "loss": 0.0203, "step": 9888 }, { "epoch": 2.55, "learning_rate": 5.994994608429849e-06, "loss": 0.0218, "step": 9889 }, { "epoch": 2.55, "learning_rate": 5.988242830404589e-06, "loss": 0.0215, "step": 9890 }, { "epoch": 2.55, "learning_rate": 5.981494614382549e-06, "loss": 0.0226, "step": 9891 }, { "epoch": 2.55, "learning_rate": 5.97474996090987e-06, "loss": 0.0219, "step": 9892 }, { "epoch": 2.55, "learning_rate": 5.968008870532405e-06, "loss": 0.0242, "step": 9893 }, { "epoch": 2.55, "learning_rate": 5.961271343795754e-06, "loss": 0.0275, "step": 9894 }, { "epoch": 2.55, "learning_rate": 5.954537381245184e-06, "loss": 0.0226, "step": 9895 }, { "epoch": 2.55, "learning_rate": 5.947806983425713e-06, "loss": 0.0287, "step": 9896 }, { "epoch": 2.55, "learning_rate": 5.941080150882045e-06, "loss": 0.0282, "step": 9897 }, { "epoch": 2.55, "learning_rate": 5.934356884158593e-06, "loss": 0.03, "step": 9898 }, { "epoch": 2.55, "learning_rate": 5.9276371837995115e-06, "loss": 0.0274, "step": 9899 }, { "epoch": 2.55, "learning_rate": 5.920921050348627e-06, "loss": 0.0343, "step": 9900 }, { "epoch": 2.56, "learning_rate": 5.914208484349498e-06, "loss": 0.0318, "step": 9901 }, { "epoch": 2.56, "learning_rate": 5.907499486345408e-06, "loss": 0.0218, "step": 9902 }, { "epoch": 2.56, "learning_rate": 5.900794056879317e-06, "loss": 0.0299, "step": 9903 }, { "epoch": 2.56, "learning_rate": 5.894092196493933e-06, "loss": 0.0222, "step": 9904 }, { "epoch": 2.56, "learning_rate": 5.887393905731653e-06, "loss": 0.0275, "step": 9905 }, { "epoch": 2.56, "learning_rate": 5.880699185134575e-06, "loss": 0.0218, "step": 9906 }, { "epoch": 2.56, "learning_rate": 5.874008035244544e-06, "loss": 0.0315, "step": 9907 }, { "epoch": 2.56, "learning_rate": 5.867320456603071e-06, "loss": 0.0237, "step": 9908 }, { "epoch": 2.56, "learning_rate": 5.860636449751427e-06, "loss": 0.0204, "step": 9909 }, { "epoch": 2.56, "learning_rate": 5.85395601523056e-06, "loss": 0.0263, "step": 9910 }, { "epoch": 2.56, "learning_rate": 5.84727915358112e-06, "loss": 0.0224, "step": 9911 }, { "epoch": 2.56, "learning_rate": 5.8406058653435125e-06, "loss": 0.0251, "step": 9912 }, { "epoch": 2.56, "learning_rate": 5.833936151057806e-06, "loss": 0.0289, "step": 9913 }, { "epoch": 2.56, "learning_rate": 5.827270011263813e-06, "loss": 0.0258, "step": 9914 }, { "epoch": 2.56, "learning_rate": 5.820607446501047e-06, "loss": 0.026, "step": 9915 }, { "epoch": 2.56, "learning_rate": 5.81394845730871e-06, "loss": 0.024, "step": 9916 }, { "epoch": 2.56, "learning_rate": 5.807293044225753e-06, "loss": 0.033, "step": 9917 }, { "epoch": 2.56, "learning_rate": 5.800641207790814e-06, "loss": 0.0282, "step": 9918 }, { "epoch": 2.56, "learning_rate": 5.793992948542232e-06, "loss": 0.0243, "step": 9919 }, { "epoch": 2.56, "learning_rate": 5.787348267018095e-06, "loss": 0.022, "step": 9920 }, { "epoch": 2.56, "learning_rate": 5.780707163756149e-06, "loss": 0.0274, "step": 9921 }, { "epoch": 2.56, "learning_rate": 5.774069639293911e-06, "loss": 0.0222, "step": 9922 }, { "epoch": 2.56, "learning_rate": 5.7674356941685495e-06, "loss": 0.0238, "step": 9923 }, { "epoch": 2.56, "learning_rate": 5.760805328916974e-06, "loss": 0.0304, "step": 9924 }, { "epoch": 2.56, "learning_rate": 5.754178544075811e-06, "loss": 0.0318, "step": 9925 }, { "epoch": 2.56, "learning_rate": 5.747555340181371e-06, "loss": 0.0235, "step": 9926 }, { "epoch": 2.56, "learning_rate": 5.7409357177697076e-06, "loss": 0.0267, "step": 9927 }, { "epoch": 2.56, "learning_rate": 5.734319677376554e-06, "loss": 0.0293, "step": 9928 }, { "epoch": 2.56, "learning_rate": 5.727707219537365e-06, "loss": 0.0327, "step": 9929 }, { "epoch": 2.56, "learning_rate": 5.721098344787312e-06, "loss": 0.0247, "step": 9930 }, { "epoch": 2.56, "learning_rate": 5.714493053661274e-06, "loss": 0.0247, "step": 9931 }, { "epoch": 2.56, "learning_rate": 5.707891346693822e-06, "loss": 0.0267, "step": 9932 }, { "epoch": 2.56, "learning_rate": 5.7012932244192745e-06, "loss": 0.0246, "step": 9933 }, { "epoch": 2.56, "learning_rate": 5.6946986873716136e-06, "loss": 0.0291, "step": 9934 }, { "epoch": 2.56, "learning_rate": 5.6881077360845745e-06, "loss": 0.0348, "step": 9935 }, { "epoch": 2.56, "learning_rate": 5.681520371091581e-06, "loss": 0.0249, "step": 9936 }, { "epoch": 2.56, "learning_rate": 5.674936592925745e-06, "loss": 0.031, "step": 9937 }, { "epoch": 2.56, "learning_rate": 5.66835640211994e-06, "loss": 0.0289, "step": 9938 }, { "epoch": 2.56, "learning_rate": 5.661779799206701e-06, "loss": 0.0301, "step": 9939 }, { "epoch": 2.57, "learning_rate": 5.655206784718309e-06, "loss": 0.0315, "step": 9940 }, { "epoch": 2.57, "learning_rate": 5.648637359186726e-06, "loss": 0.0276, "step": 9941 }, { "epoch": 2.57, "learning_rate": 5.642071523143627e-06, "loss": 0.0321, "step": 9942 }, { "epoch": 2.57, "learning_rate": 5.635509277120432e-06, "loss": 0.0289, "step": 9943 }, { "epoch": 2.57, "learning_rate": 5.628950621648221e-06, "loss": 0.0253, "step": 9944 }, { "epoch": 2.57, "learning_rate": 5.622395557257803e-06, "loss": 0.0263, "step": 9945 }, { "epoch": 2.57, "learning_rate": 5.615844084479721e-06, "loss": 0.0233, "step": 9946 }, { "epoch": 2.57, "learning_rate": 5.609296203844177e-06, "loss": 0.028, "step": 9947 }, { "epoch": 2.57, "learning_rate": 5.602751915881138e-06, "loss": 0.0304, "step": 9948 }, { "epoch": 2.57, "learning_rate": 5.5962112211202345e-06, "loss": 0.0322, "step": 9949 }, { "epoch": 2.57, "learning_rate": 5.589674120090827e-06, "loss": 0.0353, "step": 9950 }, { "epoch": 2.57, "learning_rate": 5.583140613321991e-06, "loss": 0.0185, "step": 9951 }, { "epoch": 2.57, "learning_rate": 5.576610701342494e-06, "loss": 0.0315, "step": 9952 }, { "epoch": 2.57, "learning_rate": 5.570084384680829e-06, "loss": 0.0278, "step": 9953 }, { "epoch": 2.57, "learning_rate": 5.563561663865191e-06, "loss": 0.0213, "step": 9954 }, { "epoch": 2.57, "learning_rate": 5.557042539423463e-06, "loss": 0.0221, "step": 9955 }, { "epoch": 2.57, "learning_rate": 5.550527011883283e-06, "loss": 0.0282, "step": 9956 }, { "epoch": 2.57, "learning_rate": 5.5440150817719635e-06, "loss": 0.0241, "step": 9957 }, { "epoch": 2.57, "learning_rate": 5.537506749616528e-06, "loss": 0.0224, "step": 9958 }, { "epoch": 2.57, "learning_rate": 5.531002015943726e-06, "loss": 0.0196, "step": 9959 }, { "epoch": 2.57, "learning_rate": 5.524500881279987e-06, "loss": 0.0191, "step": 9960 }, { "epoch": 2.57, "learning_rate": 5.518003346151491e-06, "loss": 0.0251, "step": 9961 }, { "epoch": 2.57, "learning_rate": 5.511509411084093e-06, "loss": 0.0228, "step": 9962 }, { "epoch": 2.57, "learning_rate": 5.505019076603352e-06, "loss": 0.0334, "step": 9963 }, { "epoch": 2.57, "learning_rate": 5.498532343234575e-06, "loss": 0.0247, "step": 9964 }, { "epoch": 2.57, "learning_rate": 5.492049211502731e-06, "loss": 0.0238, "step": 9965 }, { "epoch": 2.57, "learning_rate": 5.48556968193254e-06, "loss": 0.0237, "step": 9966 }, { "epoch": 2.57, "learning_rate": 5.479093755048398e-06, "loss": 0.0201, "step": 9967 }, { "epoch": 2.57, "learning_rate": 5.472621431374414e-06, "loss": 0.0249, "step": 9968 }, { "epoch": 2.57, "learning_rate": 5.46615271143443e-06, "loss": 0.0307, "step": 9969 }, { "epoch": 2.57, "learning_rate": 5.459687595751967e-06, "loss": 0.0221, "step": 9970 }, { "epoch": 2.57, "learning_rate": 5.45322608485026e-06, "loss": 0.0226, "step": 9971 }, { "epoch": 2.57, "learning_rate": 5.446768179252276e-06, "loss": 0.0289, "step": 9972 }, { "epoch": 2.57, "learning_rate": 5.440313879480658e-06, "loss": 0.0298, "step": 9973 }, { "epoch": 2.57, "learning_rate": 5.433863186057786e-06, "loss": 0.0247, "step": 9974 }, { "epoch": 2.57, "learning_rate": 5.427416099505722e-06, "loss": 0.0215, "step": 9975 }, { "epoch": 2.57, "learning_rate": 5.420972620346243e-06, "loss": 0.0202, "step": 9976 }, { "epoch": 2.57, "learning_rate": 5.414532749100859e-06, "loss": 0.0242, "step": 9977 }, { "epoch": 2.57, "learning_rate": 5.4080964862907425e-06, "loss": 0.0225, "step": 9978 }, { "epoch": 2.58, "learning_rate": 5.401663832436826e-06, "loss": 0.0236, "step": 9979 }, { "epoch": 2.58, "learning_rate": 5.395234788059717e-06, "loss": 0.0333, "step": 9980 }, { "epoch": 2.58, "learning_rate": 5.388809353679714e-06, "loss": 0.0228, "step": 9981 }, { "epoch": 2.58, "learning_rate": 5.382387529816874e-06, "loss": 0.0233, "step": 9982 }, { "epoch": 2.58, "learning_rate": 5.375969316990925e-06, "loss": 0.0205, "step": 9983 }, { "epoch": 2.58, "learning_rate": 5.369554715721303e-06, "loss": 0.0229, "step": 9984 }, { "epoch": 2.58, "learning_rate": 5.363143726527181e-06, "loss": 0.0239, "step": 9985 }, { "epoch": 2.58, "learning_rate": 5.356736349927394e-06, "loss": 0.028, "step": 9986 }, { "epoch": 2.58, "learning_rate": 5.350332586440532e-06, "loss": 0.0256, "step": 9987 }, { "epoch": 2.58, "learning_rate": 5.34393243658487e-06, "loss": 0.0282, "step": 9988 }, { "epoch": 2.58, "learning_rate": 5.337535900878371e-06, "loss": 0.0209, "step": 9989 }, { "epoch": 2.58, "learning_rate": 5.331142979838749e-06, "loss": 0.0259, "step": 9990 }, { "epoch": 2.58, "learning_rate": 5.324753673983379e-06, "loss": 0.0211, "step": 9991 }, { "epoch": 2.58, "learning_rate": 5.318367983829392e-06, "loss": 0.0265, "step": 9992 }, { "epoch": 2.58, "learning_rate": 5.311985909893591e-06, "loss": 0.0237, "step": 9993 }, { "epoch": 2.58, "learning_rate": 5.305607452692479e-06, "loss": 0.0236, "step": 9994 }, { "epoch": 2.58, "learning_rate": 5.299232612742311e-06, "loss": 0.0244, "step": 9995 }, { "epoch": 2.58, "learning_rate": 5.292861390559012e-06, "loss": 0.0239, "step": 9996 }, { "epoch": 2.58, "learning_rate": 5.286493786658208e-06, "loss": 0.0281, "step": 9997 }, { "epoch": 2.58, "learning_rate": 5.280129801555272e-06, "loss": 0.0196, "step": 9998 }, { "epoch": 2.58, "learning_rate": 5.273769435765241e-06, "loss": 0.0278, "step": 9999 }, { "epoch": 2.58, "learning_rate": 5.267412689802903e-06, "loss": 0.0233, "step": 10000 }, { "epoch": 2.58, "learning_rate": 5.261059564182707e-06, "loss": 0.0268, "step": 10001 }, { "epoch": 2.58, "learning_rate": 5.254710059418827e-06, "loss": 0.0313, "step": 10002 }, { "epoch": 2.58, "learning_rate": 5.248364176025172e-06, "loss": 0.0245, "step": 10003 }, { "epoch": 2.58, "learning_rate": 5.242021914515305e-06, "loss": 0.0217, "step": 10004 }, { "epoch": 2.58, "learning_rate": 5.235683275402542e-06, "loss": 0.0237, "step": 10005 }, { "epoch": 2.58, "learning_rate": 5.2293482591998895e-06, "loss": 0.0296, "step": 10006 }, { "epoch": 2.58, "learning_rate": 5.223016866420044e-06, "loss": 0.0345, "step": 10007 }, { "epoch": 2.58, "learning_rate": 5.216689097575445e-06, "loss": 0.0272, "step": 10008 }, { "epoch": 2.58, "learning_rate": 5.210364953178204e-06, "loss": 0.0244, "step": 10009 }, { "epoch": 2.58, "learning_rate": 5.204044433740141e-06, "loss": 0.0289, "step": 10010 }, { "epoch": 2.58, "learning_rate": 5.197727539772823e-06, "loss": 0.0262, "step": 10011 }, { "epoch": 2.58, "learning_rate": 5.19141427178747e-06, "loss": 0.0255, "step": 10012 }, { "epoch": 2.58, "learning_rate": 5.185104630295057e-06, "loss": 0.0273, "step": 10013 }, { "epoch": 2.58, "learning_rate": 5.178798615806224e-06, "loss": 0.0255, "step": 10014 }, { "epoch": 2.58, "learning_rate": 5.172496228831336e-06, "loss": 0.0209, "step": 10015 }, { "epoch": 2.58, "learning_rate": 5.1661974698804795e-06, "loss": 0.0284, "step": 10016 }, { "epoch": 2.59, "learning_rate": 5.159902339463413e-06, "loss": 0.0307, "step": 10017 }, { "epoch": 2.59, "learning_rate": 5.1536108380896345e-06, "loss": 0.0301, "step": 10018 }, { "epoch": 2.59, "learning_rate": 5.147322966268331e-06, "loss": 0.0317, "step": 10019 }, { "epoch": 2.59, "learning_rate": 5.141038724508385e-06, "loss": 0.0237, "step": 10020 }, { "epoch": 2.59, "learning_rate": 5.134758113318422e-06, "loss": 0.0279, "step": 10021 }, { "epoch": 2.59, "learning_rate": 5.128481133206736e-06, "loss": 0.0213, "step": 10022 }, { "epoch": 2.59, "learning_rate": 5.122207784681337e-06, "loss": 0.0299, "step": 10023 }, { "epoch": 2.59, "learning_rate": 5.1159380682499645e-06, "loss": 0.0237, "step": 10024 }, { "epoch": 2.59, "learning_rate": 5.109671984420028e-06, "loss": 0.0304, "step": 10025 }, { "epoch": 2.59, "learning_rate": 5.103409533698672e-06, "loss": 0.0313, "step": 10026 }, { "epoch": 2.59, "learning_rate": 5.09715071659273e-06, "loss": 0.0256, "step": 10027 }, { "epoch": 2.59, "learning_rate": 5.090895533608736e-06, "loss": 0.0294, "step": 10028 }, { "epoch": 2.59, "learning_rate": 5.084643985252962e-06, "loss": 0.0291, "step": 10029 }, { "epoch": 2.59, "learning_rate": 5.078396072031349e-06, "loss": 0.0273, "step": 10030 }, { "epoch": 2.59, "learning_rate": 5.07215179444957e-06, "loss": 0.0282, "step": 10031 }, { "epoch": 2.59, "learning_rate": 5.065911153012992e-06, "loss": 0.026, "step": 10032 }, { "epoch": 2.59, "learning_rate": 5.059674148226673e-06, "loss": 0.0343, "step": 10033 }, { "epoch": 2.59, "learning_rate": 5.053440780595414e-06, "loss": 0.0292, "step": 10034 }, { "epoch": 2.59, "learning_rate": 5.0472110506236845e-06, "loss": 0.0282, "step": 10035 }, { "epoch": 2.59, "learning_rate": 5.040984958815675e-06, "loss": 0.0297, "step": 10036 }, { "epoch": 2.59, "learning_rate": 5.0347625056753e-06, "loss": 0.0253, "step": 10037 }, { "epoch": 2.59, "learning_rate": 5.028543691706139e-06, "loss": 0.0341, "step": 10038 }, { "epoch": 2.59, "learning_rate": 5.022328517411518e-06, "loss": 0.0285, "step": 10039 }, { "epoch": 2.59, "learning_rate": 5.016116983294438e-06, "loss": 0.0318, "step": 10040 }, { "epoch": 2.59, "learning_rate": 5.009909089857612e-06, "loss": 0.031, "step": 10041 }, { "epoch": 2.59, "learning_rate": 5.003704837603479e-06, "loss": 0.027, "step": 10042 }, { "epoch": 2.59, "learning_rate": 4.99750422703415e-06, "loss": 0.0323, "step": 10043 }, { "epoch": 2.59, "learning_rate": 4.991307258651478e-06, "loss": 0.0354, "step": 10044 }, { "epoch": 2.59, "learning_rate": 4.9851139329569914e-06, "loss": 0.0351, "step": 10045 }, { "epoch": 2.59, "learning_rate": 4.978924250451933e-06, "loss": 0.0265, "step": 10046 }, { "epoch": 2.59, "learning_rate": 4.972738211637262e-06, "loss": 0.0259, "step": 10047 }, { "epoch": 2.59, "learning_rate": 4.966555817013624e-06, "loss": 0.0304, "step": 10048 }, { "epoch": 2.59, "learning_rate": 4.9603770670813735e-06, "loss": 0.0294, "step": 10049 }, { "epoch": 2.59, "learning_rate": 4.954201962340594e-06, "loss": 0.0295, "step": 10050 }, { "epoch": 2.59, "learning_rate": 4.948030503291029e-06, "loss": 0.0265, "step": 10051 }, { "epoch": 2.59, "learning_rate": 4.941862690432181e-06, "loss": 0.0386, "step": 10052 }, { "epoch": 2.59, "learning_rate": 4.935698524263216e-06, "loss": 0.0263, "step": 10053 }, { "epoch": 2.59, "learning_rate": 4.929538005283013e-06, "loss": 0.0265, "step": 10054 }, { "epoch": 2.59, "learning_rate": 4.923381133990173e-06, "loss": 0.0182, "step": 10055 }, { "epoch": 2.6, "learning_rate": 4.917227910882977e-06, "loss": 0.0309, "step": 10056 }, { "epoch": 2.6, "learning_rate": 4.9110783364594405e-06, "loss": 0.0215, "step": 10057 }, { "epoch": 2.6, "learning_rate": 4.904932411217262e-06, "loss": 0.0242, "step": 10058 }, { "epoch": 2.6, "learning_rate": 4.8987901356538306e-06, "loss": 0.0312, "step": 10059 }, { "epoch": 2.6, "learning_rate": 4.892651510266283e-06, "loss": 0.028, "step": 10060 }, { "epoch": 2.6, "learning_rate": 4.886516535551428e-06, "loss": 0.0303, "step": 10061 }, { "epoch": 2.6, "learning_rate": 4.880385212005778e-06, "loss": 0.027, "step": 10062 }, { "epoch": 2.6, "learning_rate": 4.874257540125577e-06, "loss": 0.0229, "step": 10063 }, { "epoch": 2.6, "learning_rate": 4.868133520406737e-06, "loss": 0.0226, "step": 10064 }, { "epoch": 2.6, "learning_rate": 4.8620131533449135e-06, "loss": 0.0239, "step": 10065 }, { "epoch": 2.6, "learning_rate": 4.8558964394354315e-06, "loss": 0.0305, "step": 10066 }, { "epoch": 2.6, "learning_rate": 4.849783379173334e-06, "loss": 0.023, "step": 10067 }, { "epoch": 2.6, "learning_rate": 4.843673973053386e-06, "loss": 0.0276, "step": 10068 }, { "epoch": 2.6, "learning_rate": 4.837568221570016e-06, "loss": 0.0211, "step": 10069 }, { "epoch": 2.6, "learning_rate": 4.831466125217405e-06, "loss": 0.0254, "step": 10070 }, { "epoch": 2.6, "learning_rate": 4.825367684489396e-06, "loss": 0.0254, "step": 10071 }, { "epoch": 2.6, "learning_rate": 4.819272899879556e-06, "loss": 0.0271, "step": 10072 }, { "epoch": 2.6, "learning_rate": 4.81318177188117e-06, "loss": 0.0223, "step": 10073 }, { "epoch": 2.6, "learning_rate": 4.807094300987186e-06, "loss": 0.0206, "step": 10074 }, { "epoch": 2.6, "learning_rate": 4.801010487690305e-06, "loss": 0.0194, "step": 10075 }, { "epoch": 2.6, "learning_rate": 4.7949303324829e-06, "loss": 0.0195, "step": 10076 }, { "epoch": 2.6, "learning_rate": 4.788853835857038e-06, "loss": 0.0228, "step": 10077 }, { "epoch": 2.6, "learning_rate": 4.782780998304542e-06, "loss": 0.0231, "step": 10078 }, { "epoch": 2.6, "learning_rate": 4.776711820316881e-06, "loss": 0.0257, "step": 10079 }, { "epoch": 2.6, "learning_rate": 4.770646302385251e-06, "loss": 0.0235, "step": 10080 }, { "epoch": 2.6, "learning_rate": 4.7645844450005695e-06, "loss": 0.0201, "step": 10081 }, { "epoch": 2.6, "learning_rate": 4.758526248653422e-06, "loss": 0.0278, "step": 10082 }, { "epoch": 2.6, "learning_rate": 4.7524717138341335e-06, "loss": 0.0207, "step": 10083 }, { "epoch": 2.6, "learning_rate": 4.746420841032706e-06, "loss": 0.0269, "step": 10084 }, { "epoch": 2.6, "learning_rate": 4.740373630738848e-06, "loss": 0.0253, "step": 10085 }, { "epoch": 2.6, "learning_rate": 4.734330083441996e-06, "loss": 0.0201, "step": 10086 }, { "epoch": 2.6, "learning_rate": 4.728290199631252e-06, "loss": 0.0222, "step": 10087 }, { "epoch": 2.6, "learning_rate": 4.722253979795465e-06, "loss": 0.0219, "step": 10088 }, { "epoch": 2.6, "learning_rate": 4.716221424423145e-06, "loss": 0.029, "step": 10089 }, { "epoch": 2.6, "learning_rate": 4.710192534002528e-06, "loss": 0.0251, "step": 10090 }, { "epoch": 2.6, "learning_rate": 4.704167309021562e-06, "loss": 0.02, "step": 10091 }, { "epoch": 2.6, "learning_rate": 4.698145749967881e-06, "loss": 0.0226, "step": 10092 }, { "epoch": 2.6, "learning_rate": 4.69212785732881e-06, "loss": 0.022, "step": 10093 }, { "epoch": 2.6, "learning_rate": 4.686113631591421e-06, "loss": 0.0196, "step": 10094 }, { "epoch": 2.61, "learning_rate": 4.680103073242448e-06, "loss": 0.023, "step": 10095 }, { "epoch": 2.61, "learning_rate": 4.67409618276835e-06, "loss": 0.0237, "step": 10096 }, { "epoch": 2.61, "learning_rate": 4.668092960655285e-06, "loss": 0.025, "step": 10097 }, { "epoch": 2.61, "learning_rate": 4.662093407389101e-06, "loss": 0.0197, "step": 10098 }, { "epoch": 2.61, "learning_rate": 4.6560975234553715e-06, "loss": 0.0191, "step": 10099 }, { "epoch": 2.61, "learning_rate": 4.650105309339342e-06, "loss": 0.0234, "step": 10100 }, { "epoch": 2.61, "learning_rate": 4.644116765526007e-06, "loss": 0.0326, "step": 10101 }, { "epoch": 2.61, "learning_rate": 4.63813189250003e-06, "loss": 0.0252, "step": 10102 }, { "epoch": 2.61, "learning_rate": 4.6321506907457625e-06, "loss": 0.0253, "step": 10103 }, { "epoch": 2.61, "learning_rate": 4.62617316074731e-06, "loss": 0.0305, "step": 10104 }, { "epoch": 2.61, "learning_rate": 4.620199302988437e-06, "loss": 0.025, "step": 10105 }, { "epoch": 2.61, "learning_rate": 4.6142291179526235e-06, "loss": 0.0266, "step": 10106 }, { "epoch": 2.61, "learning_rate": 4.608262606123065e-06, "loss": 0.0259, "step": 10107 }, { "epoch": 2.61, "learning_rate": 4.6022997679826375e-06, "loss": 0.0225, "step": 10108 }, { "epoch": 2.61, "learning_rate": 4.596340604013944e-06, "loss": 0.0246, "step": 10109 }, { "epoch": 2.61, "learning_rate": 4.590385114699269e-06, "loss": 0.0201, "step": 10110 }, { "epoch": 2.61, "learning_rate": 4.584433300520602e-06, "loss": 0.0263, "step": 10111 }, { "epoch": 2.61, "learning_rate": 4.578485161959661e-06, "loss": 0.0265, "step": 10112 }, { "epoch": 2.61, "learning_rate": 4.5725406994978235e-06, "loss": 0.0288, "step": 10113 }, { "epoch": 2.61, "learning_rate": 4.566599913616209e-06, "loss": 0.0272, "step": 10114 }, { "epoch": 2.61, "learning_rate": 4.5606628047956235e-06, "loss": 0.0295, "step": 10115 }, { "epoch": 2.61, "learning_rate": 4.554729373516553e-06, "loss": 0.0292, "step": 10116 }, { "epoch": 2.61, "learning_rate": 4.548799620259242e-06, "loss": 0.0248, "step": 10117 }, { "epoch": 2.61, "learning_rate": 4.542873545503579e-06, "loss": 0.0261, "step": 10118 }, { "epoch": 2.61, "learning_rate": 4.53695114972918e-06, "loss": 0.0268, "step": 10119 }, { "epoch": 2.61, "learning_rate": 4.531032433415372e-06, "loss": 0.0274, "step": 10120 }, { "epoch": 2.61, "learning_rate": 4.525117397041162e-06, "loss": 0.0335, "step": 10121 }, { "epoch": 2.61, "learning_rate": 4.519206041085289e-06, "loss": 0.0346, "step": 10122 }, { "epoch": 2.61, "learning_rate": 4.51329836602617e-06, "loss": 0.0298, "step": 10123 }, { "epoch": 2.61, "learning_rate": 4.5073943723419164e-06, "loss": 0.0257, "step": 10124 }, { "epoch": 2.61, "learning_rate": 4.501494060510375e-06, "loss": 0.0306, "step": 10125 }, { "epoch": 2.61, "learning_rate": 4.4955974310090636e-06, "loss": 0.034, "step": 10126 }, { "epoch": 2.61, "learning_rate": 4.489704484315227e-06, "loss": 0.0321, "step": 10127 }, { "epoch": 2.61, "learning_rate": 4.483815220905791e-06, "loss": 0.0235, "step": 10128 }, { "epoch": 2.61, "learning_rate": 4.477929641257378e-06, "loss": 0.0289, "step": 10129 }, { "epoch": 2.61, "learning_rate": 4.472047745846347e-06, "loss": 0.0271, "step": 10130 }, { "epoch": 2.61, "learning_rate": 4.4661695351487346e-06, "loss": 0.0284, "step": 10131 }, { "epoch": 2.61, "learning_rate": 4.460295009640264e-06, "loss": 0.0247, "step": 10132 }, { "epoch": 2.61, "learning_rate": 4.454424169796401e-06, "loss": 0.0255, "step": 10133 }, { "epoch": 2.62, "learning_rate": 4.44855701609227e-06, "loss": 0.0318, "step": 10134 }, { "epoch": 2.62, "learning_rate": 4.442693549002741e-06, "loss": 0.0248, "step": 10135 }, { "epoch": 2.62, "learning_rate": 4.436833769002341e-06, "loss": 0.0287, "step": 10136 }, { "epoch": 2.62, "learning_rate": 4.430977676565323e-06, "loss": 0.0271, "step": 10137 }, { "epoch": 2.62, "learning_rate": 4.425125272165648e-06, "loss": 0.0291, "step": 10138 }, { "epoch": 2.62, "learning_rate": 4.419276556276952e-06, "loss": 0.0293, "step": 10139 }, { "epoch": 2.62, "learning_rate": 4.413431529372608e-06, "loss": 0.0281, "step": 10140 }, { "epoch": 2.62, "learning_rate": 4.40759019192567e-06, "loss": 0.0294, "step": 10141 }, { "epoch": 2.62, "learning_rate": 4.401752544408871e-06, "loss": 0.0278, "step": 10142 }, { "epoch": 2.62, "learning_rate": 4.395918587294701e-06, "loss": 0.0321, "step": 10143 }, { "epoch": 2.62, "learning_rate": 4.390088321055297e-06, "loss": 0.0246, "step": 10144 }, { "epoch": 2.62, "learning_rate": 4.384261746162527e-06, "loss": 0.0257, "step": 10145 }, { "epoch": 2.62, "learning_rate": 4.378438863087958e-06, "loss": 0.0352, "step": 10146 }, { "epoch": 2.62, "learning_rate": 4.372619672302841e-06, "loss": 0.0286, "step": 10147 }, { "epoch": 2.62, "learning_rate": 4.366804174278155e-06, "loss": 0.034, "step": 10148 }, { "epoch": 2.62, "learning_rate": 4.360992369484562e-06, "loss": 0.0333, "step": 10149 }, { "epoch": 2.62, "learning_rate": 4.35518425839242e-06, "loss": 0.035, "step": 10150 }, { "epoch": 2.62, "learning_rate": 4.349379841471807e-06, "loss": 0.0167, "step": 10151 }, { "epoch": 2.62, "learning_rate": 4.343579119192481e-06, "loss": 0.0289, "step": 10152 }, { "epoch": 2.62, "learning_rate": 4.337782092023929e-06, "loss": 0.0323, "step": 10153 }, { "epoch": 2.62, "learning_rate": 4.331988760435307e-06, "loss": 0.0296, "step": 10154 }, { "epoch": 2.62, "learning_rate": 4.326199124895486e-06, "loss": 0.0206, "step": 10155 }, { "epoch": 2.62, "learning_rate": 4.320413185873045e-06, "loss": 0.0183, "step": 10156 }, { "epoch": 2.62, "learning_rate": 4.3146309438362564e-06, "loss": 0.0419, "step": 10157 }, { "epoch": 2.62, "learning_rate": 4.3088523992530995e-06, "loss": 0.0286, "step": 10158 }, { "epoch": 2.62, "learning_rate": 4.303077552591239e-06, "loss": 0.0274, "step": 10159 }, { "epoch": 2.62, "learning_rate": 4.297306404318052e-06, "loss": 0.0219, "step": 10160 }, { "epoch": 2.62, "learning_rate": 4.2915389549006255e-06, "loss": 0.0249, "step": 10161 }, { "epoch": 2.62, "learning_rate": 4.285775204805726e-06, "loss": 0.0288, "step": 10162 }, { "epoch": 2.62, "learning_rate": 4.280015154499828e-06, "loss": 0.0209, "step": 10163 }, { "epoch": 2.62, "learning_rate": 4.274258804449121e-06, "loss": 0.0265, "step": 10164 }, { "epoch": 2.62, "learning_rate": 4.268506155119473e-06, "loss": 0.0212, "step": 10165 }, { "epoch": 2.62, "learning_rate": 4.26275720697647e-06, "loss": 0.0236, "step": 10166 }, { "epoch": 2.62, "learning_rate": 4.257011960485397e-06, "loss": 0.0285, "step": 10167 }, { "epoch": 2.62, "learning_rate": 4.251270416111213e-06, "loss": 0.0291, "step": 10168 }, { "epoch": 2.62, "learning_rate": 4.2455325743186225e-06, "loss": 0.0221, "step": 10169 }, { "epoch": 2.62, "learning_rate": 4.23979843557199e-06, "loss": 0.0169, "step": 10170 }, { "epoch": 2.62, "learning_rate": 4.234068000335406e-06, "loss": 0.0244, "step": 10171 }, { "epoch": 2.63, "learning_rate": 4.2283412690726554e-06, "loss": 0.0191, "step": 10172 }, { "epoch": 2.63, "learning_rate": 4.222618242247195e-06, "loss": 0.0305, "step": 10173 }, { "epoch": 2.63, "learning_rate": 4.216898920322237e-06, "loss": 0.0225, "step": 10174 }, { "epoch": 2.63, "learning_rate": 4.21118330376064e-06, "loss": 0.0194, "step": 10175 }, { "epoch": 2.63, "learning_rate": 4.205471393025007e-06, "loss": 0.0225, "step": 10176 }, { "epoch": 2.63, "learning_rate": 4.19976318857761e-06, "loss": 0.0308, "step": 10177 }, { "epoch": 2.63, "learning_rate": 4.194058690880415e-06, "loss": 0.0231, "step": 10178 }, { "epoch": 2.63, "learning_rate": 4.188357900395135e-06, "loss": 0.025, "step": 10179 }, { "epoch": 2.63, "learning_rate": 4.182660817583134e-06, "loss": 0.0282, "step": 10180 }, { "epoch": 2.63, "learning_rate": 4.176967442905483e-06, "loss": 0.0256, "step": 10181 }, { "epoch": 2.63, "learning_rate": 4.171277776822991e-06, "loss": 0.0292, "step": 10182 }, { "epoch": 2.63, "learning_rate": 4.165591819796117e-06, "loss": 0.0197, "step": 10183 }, { "epoch": 2.63, "learning_rate": 4.159909572285053e-06, "loss": 0.0239, "step": 10184 }, { "epoch": 2.63, "learning_rate": 4.154231034749684e-06, "loss": 0.0275, "step": 10185 }, { "epoch": 2.63, "learning_rate": 4.148556207649579e-06, "loss": 0.02, "step": 10186 }, { "epoch": 2.63, "learning_rate": 4.1428850914440324e-06, "loss": 0.0269, "step": 10187 }, { "epoch": 2.63, "learning_rate": 4.137217686592004e-06, "loss": 0.0218, "step": 10188 }, { "epoch": 2.63, "learning_rate": 4.131553993552201e-06, "loss": 0.0299, "step": 10189 }, { "epoch": 2.63, "learning_rate": 4.125894012782988e-06, "loss": 0.0281, "step": 10190 }, { "epoch": 2.63, "learning_rate": 4.120237744742439e-06, "loss": 0.025, "step": 10191 }, { "epoch": 2.63, "learning_rate": 4.114585189888348e-06, "loss": 0.024, "step": 10192 }, { "epoch": 2.63, "learning_rate": 4.108936348678172e-06, "loss": 0.0233, "step": 10193 }, { "epoch": 2.63, "learning_rate": 4.103291221569111e-06, "loss": 0.0295, "step": 10194 }, { "epoch": 2.63, "learning_rate": 4.0976498090180334e-06, "loss": 0.0252, "step": 10195 }, { "epoch": 2.63, "learning_rate": 4.092012111481508e-06, "loss": 0.024, "step": 10196 }, { "epoch": 2.63, "learning_rate": 4.086378129415819e-06, "loss": 0.0228, "step": 10197 }, { "epoch": 2.63, "learning_rate": 4.080747863276934e-06, "loss": 0.0287, "step": 10198 }, { "epoch": 2.63, "learning_rate": 4.0751213135205414e-06, "loss": 0.0294, "step": 10199 }, { "epoch": 2.63, "learning_rate": 4.069498480602008e-06, "loss": 0.025, "step": 10200 }, { "epoch": 2.63, "learning_rate": 4.063879364976392e-06, "loss": 0.0257, "step": 10201 }, { "epoch": 2.63, "learning_rate": 4.058263967098485e-06, "loss": 0.0237, "step": 10202 }, { "epoch": 2.63, "learning_rate": 4.052652287422754e-06, "loss": 0.0258, "step": 10203 }, { "epoch": 2.63, "learning_rate": 4.047044326403354e-06, "loss": 0.0265, "step": 10204 }, { "epoch": 2.63, "learning_rate": 4.04144008449418e-06, "loss": 0.0202, "step": 10205 }, { "epoch": 2.63, "learning_rate": 4.035839562148769e-06, "loss": 0.0241, "step": 10206 }, { "epoch": 2.63, "learning_rate": 4.030242759820418e-06, "loss": 0.024, "step": 10207 }, { "epoch": 2.63, "learning_rate": 4.024649677962084e-06, "loss": 0.0328, "step": 10208 }, { "epoch": 2.63, "learning_rate": 4.019060317026413e-06, "loss": 0.0294, "step": 10209 }, { "epoch": 2.63, "learning_rate": 4.0134746774657895e-06, "loss": 0.0243, "step": 10210 }, { "epoch": 2.64, "learning_rate": 4.007892759732268e-06, "loss": 0.0279, "step": 10211 }, { "epoch": 2.64, "learning_rate": 4.002314564277615e-06, "loss": 0.0244, "step": 10212 }, { "epoch": 2.64, "learning_rate": 3.996740091553291e-06, "loss": 0.0216, "step": 10213 }, { "epoch": 2.64, "learning_rate": 3.991169342010437e-06, "loss": 0.0269, "step": 10214 }, { "epoch": 2.64, "learning_rate": 3.985602316099935e-06, "loss": 0.0286, "step": 10215 }, { "epoch": 2.64, "learning_rate": 3.980039014272324e-06, "loss": 0.0287, "step": 10216 }, { "epoch": 2.64, "learning_rate": 3.9744794369778735e-06, "loss": 0.0233, "step": 10217 }, { "epoch": 2.64, "learning_rate": 3.968923584666529e-06, "loss": 0.0283, "step": 10218 }, { "epoch": 2.64, "learning_rate": 3.963371457787929e-06, "loss": 0.0268, "step": 10219 }, { "epoch": 2.64, "learning_rate": 3.957823056791448e-06, "loss": 0.0237, "step": 10220 }, { "epoch": 2.64, "learning_rate": 3.952278382126123e-06, "loss": 0.0221, "step": 10221 }, { "epoch": 2.64, "learning_rate": 3.9467374342406885e-06, "loss": 0.0218, "step": 10222 }, { "epoch": 2.64, "learning_rate": 3.941200213583612e-06, "loss": 0.0214, "step": 10223 }, { "epoch": 2.64, "learning_rate": 3.935666720603026e-06, "loss": 0.0262, "step": 10224 }, { "epoch": 2.64, "learning_rate": 3.9301369557467775e-06, "loss": 0.0288, "step": 10225 }, { "epoch": 2.64, "learning_rate": 3.924610919462401e-06, "loss": 0.0264, "step": 10226 }, { "epoch": 2.64, "learning_rate": 3.919088612197136e-06, "loss": 0.0288, "step": 10227 }, { "epoch": 2.64, "learning_rate": 3.913570034397929e-06, "loss": 0.0271, "step": 10228 }, { "epoch": 2.64, "learning_rate": 3.908055186511394e-06, "loss": 0.0233, "step": 10229 }, { "epoch": 2.64, "learning_rate": 3.902544068983894e-06, "loss": 0.0286, "step": 10230 }, { "epoch": 2.64, "learning_rate": 3.897036682261434e-06, "loss": 0.0261, "step": 10231 }, { "epoch": 2.64, "learning_rate": 3.891533026789751e-06, "loss": 0.0256, "step": 10232 }, { "epoch": 2.64, "learning_rate": 3.886033103014286e-06, "loss": 0.0254, "step": 10233 }, { "epoch": 2.64, "learning_rate": 3.880536911380139e-06, "loss": 0.0291, "step": 10234 }, { "epoch": 2.64, "learning_rate": 3.875044452332155e-06, "loss": 0.0274, "step": 10235 }, { "epoch": 2.64, "learning_rate": 3.86955572631485e-06, "loss": 0.0266, "step": 10236 }, { "epoch": 2.64, "learning_rate": 3.864070733772429e-06, "loss": 0.0291, "step": 10237 }, { "epoch": 2.64, "learning_rate": 3.858589475148833e-06, "loss": 0.0246, "step": 10238 }, { "epoch": 2.64, "learning_rate": 3.85311195088765e-06, "loss": 0.0233, "step": 10239 }, { "epoch": 2.64, "learning_rate": 3.8476381614322156e-06, "loss": 0.0261, "step": 10240 }, { "epoch": 2.64, "learning_rate": 3.84216810722553e-06, "loss": 0.0299, "step": 10241 }, { "epoch": 2.64, "learning_rate": 3.836701788710295e-06, "loss": 0.0326, "step": 10242 }, { "epoch": 2.64, "learning_rate": 3.831239206328935e-06, "loss": 0.0346, "step": 10243 }, { "epoch": 2.64, "learning_rate": 3.825780360523534e-06, "loss": 0.0257, "step": 10244 }, { "epoch": 2.64, "learning_rate": 3.820325251735895e-06, "loss": 0.0304, "step": 10245 }, { "epoch": 2.64, "learning_rate": 3.814873880407527e-06, "loss": 0.0278, "step": 10246 }, { "epoch": 2.64, "learning_rate": 3.8094262469796085e-06, "loss": 0.0283, "step": 10247 }, { "epoch": 2.64, "learning_rate": 3.8039823518930594e-06, "loss": 0.0375, "step": 10248 }, { "epoch": 2.64, "learning_rate": 3.7985421955884503e-06, "loss": 0.0306, "step": 10249 }, { "epoch": 2.65, "learning_rate": 3.793105778506062e-06, "loss": 0.0291, "step": 10250 }, { "epoch": 2.65, "learning_rate": 3.787673101085909e-06, "loss": 0.0285, "step": 10251 }, { "epoch": 2.65, "learning_rate": 3.78224416376764e-06, "loss": 0.0249, "step": 10252 }, { "epoch": 2.65, "learning_rate": 3.776818966990664e-06, "loss": 0.0259, "step": 10253 }, { "epoch": 2.65, "learning_rate": 3.7713975111940468e-06, "loss": 0.0223, "step": 10254 }, { "epoch": 2.65, "learning_rate": 3.7659797968165546e-06, "loss": 0.0252, "step": 10255 }, { "epoch": 2.65, "learning_rate": 3.7605658242966802e-06, "loss": 0.0272, "step": 10256 }, { "epoch": 2.65, "learning_rate": 3.755155594072568e-06, "loss": 0.021, "step": 10257 }, { "epoch": 2.65, "learning_rate": 3.7497491065821013e-06, "loss": 0.0284, "step": 10258 }, { "epoch": 2.65, "learning_rate": 3.7443463622628404e-06, "loss": 0.0211, "step": 10259 }, { "epoch": 2.65, "learning_rate": 3.7389473615520364e-06, "loss": 0.0322, "step": 10260 }, { "epoch": 2.65, "learning_rate": 3.7335521048866563e-06, "loss": 0.0255, "step": 10261 }, { "epoch": 2.65, "learning_rate": 3.7281605927033558e-06, "loss": 0.0231, "step": 10262 }, { "epoch": 2.65, "learning_rate": 3.7227728254384752e-06, "loss": 0.0204, "step": 10263 }, { "epoch": 2.65, "learning_rate": 3.717388803528071e-06, "loss": 0.0228, "step": 10264 }, { "epoch": 2.65, "learning_rate": 3.7120085274078844e-06, "loss": 0.0276, "step": 10265 }, { "epoch": 2.65, "learning_rate": 3.7066319975133656e-06, "loss": 0.0286, "step": 10266 }, { "epoch": 2.65, "learning_rate": 3.7012592142796455e-06, "loss": 0.0259, "step": 10267 }, { "epoch": 2.65, "learning_rate": 3.6958901781415534e-06, "loss": 0.0163, "step": 10268 }, { "epoch": 2.65, "learning_rate": 3.6905248895336363e-06, "loss": 0.0255, "step": 10269 }, { "epoch": 2.65, "learning_rate": 3.6851633488901083e-06, "loss": 0.0355, "step": 10270 }, { "epoch": 2.65, "learning_rate": 3.6798055566449107e-06, "loss": 0.0274, "step": 10271 }, { "epoch": 2.65, "learning_rate": 3.6744515132316583e-06, "loss": 0.0234, "step": 10272 }, { "epoch": 2.65, "learning_rate": 3.669101219083654e-06, "loss": 0.0194, "step": 10273 }, { "epoch": 2.65, "learning_rate": 3.663754674633946e-06, "loss": 0.0267, "step": 10274 }, { "epoch": 2.65, "learning_rate": 3.6584118803152156e-06, "loss": 0.0209, "step": 10275 }, { "epoch": 2.65, "learning_rate": 3.65307283655989e-06, "loss": 0.0209, "step": 10276 }, { "epoch": 2.65, "learning_rate": 3.6477375438000673e-06, "loss": 0.0258, "step": 10277 }, { "epoch": 2.65, "learning_rate": 3.642406002467541e-06, "loss": 0.0202, "step": 10278 }, { "epoch": 2.65, "learning_rate": 3.6370782129938276e-06, "loss": 0.0224, "step": 10279 }, { "epoch": 2.65, "learning_rate": 3.6317541758101036e-06, "loss": 0.024, "step": 10280 }, { "epoch": 2.65, "learning_rate": 3.6264338913472695e-06, "loss": 0.0269, "step": 10281 }, { "epoch": 2.65, "learning_rate": 3.6211173600359085e-06, "loss": 0.0257, "step": 10282 }, { "epoch": 2.65, "learning_rate": 3.615804582306298e-06, "loss": 0.0271, "step": 10283 }, { "epoch": 2.65, "learning_rate": 3.610495558588428e-06, "loss": 0.0218, "step": 10284 }, { "epoch": 2.65, "learning_rate": 3.6051902893119716e-06, "loss": 0.02, "step": 10285 }, { "epoch": 2.65, "learning_rate": 3.5998887749062848e-06, "loss": 0.0293, "step": 10286 }, { "epoch": 2.65, "learning_rate": 3.5945910158004525e-06, "loss": 0.0275, "step": 10287 }, { "epoch": 2.65, "learning_rate": 3.589297012423226e-06, "loss": 0.0195, "step": 10288 }, { "epoch": 2.66, "learning_rate": 3.5840067652030797e-06, "loss": 0.0271, "step": 10289 }, { "epoch": 2.66, "learning_rate": 3.578720274568159e-06, "loss": 0.0188, "step": 10290 }, { "epoch": 2.66, "learning_rate": 3.5734375409463117e-06, "loss": 0.0285, "step": 10291 }, { "epoch": 2.66, "learning_rate": 3.568158564765095e-06, "loss": 0.0312, "step": 10292 }, { "epoch": 2.66, "learning_rate": 3.5628833464517398e-06, "loss": 0.0257, "step": 10293 }, { "epoch": 2.66, "learning_rate": 3.557611886433204e-06, "loss": 0.0274, "step": 10294 }, { "epoch": 2.66, "learning_rate": 3.5523441851361084e-06, "loss": 0.03, "step": 10295 }, { "epoch": 2.66, "learning_rate": 3.5470802429867832e-06, "loss": 0.0214, "step": 10296 }, { "epoch": 2.66, "learning_rate": 3.5418200604112604e-06, "loss": 0.028, "step": 10297 }, { "epoch": 2.66, "learning_rate": 3.5365636378352607e-06, "loss": 0.0233, "step": 10298 }, { "epoch": 2.66, "learning_rate": 3.5313109756842045e-06, "loss": 0.021, "step": 10299 }, { "epoch": 2.66, "learning_rate": 3.5260620743832073e-06, "loss": 0.0221, "step": 10300 }, { "epoch": 2.66, "learning_rate": 3.520816934357063e-06, "loss": 0.0246, "step": 10301 }, { "epoch": 2.66, "learning_rate": 3.5155755560302985e-06, "loss": 0.028, "step": 10302 }, { "epoch": 2.66, "learning_rate": 3.510337939827102e-06, "loss": 0.0313, "step": 10303 }, { "epoch": 2.66, "learning_rate": 3.5051040861713623e-06, "loss": 0.0243, "step": 10304 }, { "epoch": 2.66, "learning_rate": 3.499873995486691e-06, "loss": 0.029, "step": 10305 }, { "epoch": 2.66, "learning_rate": 3.494647668196349e-06, "loss": 0.0277, "step": 10306 }, { "epoch": 2.66, "learning_rate": 3.4894251047233484e-06, "loss": 0.0296, "step": 10307 }, { "epoch": 2.66, "learning_rate": 3.484206305490345e-06, "loss": 0.0265, "step": 10308 }, { "epoch": 2.66, "learning_rate": 3.478991270919718e-06, "loss": 0.0235, "step": 10309 }, { "epoch": 2.66, "learning_rate": 3.473780001433541e-06, "loss": 0.0227, "step": 10310 }, { "epoch": 2.66, "learning_rate": 3.4685724974535592e-06, "loss": 0.0242, "step": 10311 }, { "epoch": 2.66, "learning_rate": 3.4633687594012586e-06, "loss": 0.0349, "step": 10312 }, { "epoch": 2.66, "learning_rate": 3.4581687876977795e-06, "loss": 0.0246, "step": 10313 }, { "epoch": 2.66, "learning_rate": 3.4529725827639637e-06, "loss": 0.0204, "step": 10314 }, { "epoch": 2.66, "learning_rate": 3.447780145020374e-06, "loss": 0.0244, "step": 10315 }, { "epoch": 2.66, "learning_rate": 3.4425914748872247e-06, "loss": 0.0335, "step": 10316 }, { "epoch": 2.66, "learning_rate": 3.4374065727844796e-06, "loss": 0.0296, "step": 10317 }, { "epoch": 2.66, "learning_rate": 3.4322254391317475e-06, "loss": 0.0286, "step": 10318 }, { "epoch": 2.66, "learning_rate": 3.4270480743483547e-06, "loss": 0.0293, "step": 10319 }, { "epoch": 2.66, "learning_rate": 3.4218744788533376e-06, "loss": 0.0314, "step": 10320 }, { "epoch": 2.66, "learning_rate": 3.4167046530653834e-06, "loss": 0.0266, "step": 10321 }, { "epoch": 2.66, "learning_rate": 3.41153859740293e-06, "loss": 0.035, "step": 10322 }, { "epoch": 2.66, "learning_rate": 3.4063763122840654e-06, "loss": 0.0246, "step": 10323 }, { "epoch": 2.66, "learning_rate": 3.401217798126588e-06, "loss": 0.0225, "step": 10324 }, { "epoch": 2.66, "learning_rate": 3.396063055347998e-06, "loss": 0.0257, "step": 10325 }, { "epoch": 2.66, "learning_rate": 3.390912084365483e-06, "loss": 0.0226, "step": 10326 }, { "epoch": 2.67, "learning_rate": 3.3857648855959214e-06, "loss": 0.0246, "step": 10327 }, { "epoch": 2.67, "learning_rate": 3.3806214594559014e-06, "loss": 0.0314, "step": 10328 }, { "epoch": 2.67, "learning_rate": 3.37548180636168e-06, "loss": 0.0291, "step": 10329 }, { "epoch": 2.67, "learning_rate": 3.370345926729246e-06, "loss": 0.0271, "step": 10330 }, { "epoch": 2.67, "learning_rate": 3.3652138209742447e-06, "loss": 0.0312, "step": 10331 }, { "epoch": 2.67, "learning_rate": 3.3600854895120327e-06, "loss": 0.0286, "step": 10332 }, { "epoch": 2.67, "learning_rate": 3.354960932757678e-06, "loss": 0.0264, "step": 10333 }, { "epoch": 2.67, "learning_rate": 3.349840151125905e-06, "loss": 0.025, "step": 10334 }, { "epoch": 2.67, "learning_rate": 3.34472314503117e-06, "loss": 0.0268, "step": 10335 }, { "epoch": 2.67, "learning_rate": 3.3396099148876036e-06, "loss": 0.0258, "step": 10336 }, { "epoch": 2.67, "learning_rate": 3.334500461109019e-06, "loss": 0.0323, "step": 10337 }, { "epoch": 2.67, "learning_rate": 3.329394784108969e-06, "loss": 0.0246, "step": 10338 }, { "epoch": 2.67, "learning_rate": 3.3242928843006447e-06, "loss": 0.0247, "step": 10339 }, { "epoch": 2.67, "learning_rate": 3.319194762096978e-06, "loss": 0.0285, "step": 10340 }, { "epoch": 2.67, "learning_rate": 3.3141004179105595e-06, "loss": 0.0292, "step": 10341 }, { "epoch": 2.67, "learning_rate": 3.309009852153694e-06, "loss": 0.0258, "step": 10342 }, { "epoch": 2.67, "learning_rate": 3.30392306523839e-06, "loss": 0.0311, "step": 10343 }, { "epoch": 2.67, "learning_rate": 3.298840057576319e-06, "loss": 0.0269, "step": 10344 }, { "epoch": 2.67, "learning_rate": 3.2937608295788625e-06, "loss": 0.0269, "step": 10345 }, { "epoch": 2.67, "learning_rate": 3.2886853816571194e-06, "loss": 0.0256, "step": 10346 }, { "epoch": 2.67, "learning_rate": 3.283613714221834e-06, "loss": 0.0304, "step": 10347 }, { "epoch": 2.67, "learning_rate": 3.2785458276834878e-06, "loss": 0.0325, "step": 10348 }, { "epoch": 2.67, "learning_rate": 3.273481722452243e-06, "loss": 0.0326, "step": 10349 }, { "epoch": 2.67, "learning_rate": 3.2684213989379376e-06, "loss": 0.035, "step": 10350 }, { "epoch": 2.67, "learning_rate": 3.263364857550133e-06, "loss": 0.0121, "step": 10351 }, { "epoch": 2.67, "learning_rate": 3.2583120986980575e-06, "loss": 0.0203, "step": 10352 }, { "epoch": 2.67, "learning_rate": 3.2532631227906562e-06, "loss": 0.0288, "step": 10353 }, { "epoch": 2.67, "learning_rate": 3.2482179302365633e-06, "loss": 0.0251, "step": 10354 }, { "epoch": 2.67, "learning_rate": 3.2431765214440803e-06, "loss": 0.0214, "step": 10355 }, { "epoch": 2.67, "learning_rate": 3.2381388968212412e-06, "loss": 0.0288, "step": 10356 }, { "epoch": 2.67, "learning_rate": 3.2331050567757536e-06, "loss": 0.0249, "step": 10357 }, { "epoch": 2.67, "learning_rate": 3.2280750017150084e-06, "loss": 0.0294, "step": 10358 }, { "epoch": 2.67, "learning_rate": 3.2230487320461244e-06, "loss": 0.0243, "step": 10359 }, { "epoch": 2.67, "learning_rate": 3.2180262481758706e-06, "loss": 0.0242, "step": 10360 }, { "epoch": 2.67, "learning_rate": 3.213007550510755e-06, "loss": 0.023, "step": 10361 }, { "epoch": 2.67, "learning_rate": 3.207992639456936e-06, "loss": 0.0206, "step": 10362 }, { "epoch": 2.67, "learning_rate": 3.2029815154202893e-06, "loss": 0.0191, "step": 10363 }, { "epoch": 2.67, "learning_rate": 3.19797417880639e-06, "loss": 0.0293, "step": 10364 }, { "epoch": 2.67, "learning_rate": 3.192970630020481e-06, "loss": 0.0278, "step": 10365 }, { "epoch": 2.68, "learning_rate": 3.1879708694675324e-06, "loss": 0.0347, "step": 10366 }, { "epoch": 2.68, "learning_rate": 3.1829748975521767e-06, "loss": 0.0295, "step": 10367 }, { "epoch": 2.68, "learning_rate": 3.177982714678757e-06, "loss": 0.0264, "step": 10368 }, { "epoch": 2.68, "learning_rate": 3.1729943212513104e-06, "loss": 0.0231, "step": 10369 }, { "epoch": 2.68, "learning_rate": 3.168009717673548e-06, "loss": 0.0254, "step": 10370 }, { "epoch": 2.68, "learning_rate": 3.163028904348908e-06, "loss": 0.0285, "step": 10371 }, { "epoch": 2.68, "learning_rate": 3.1580518816804903e-06, "loss": 0.0224, "step": 10372 }, { "epoch": 2.68, "learning_rate": 3.1530786500711006e-06, "loss": 0.0344, "step": 10373 }, { "epoch": 2.68, "learning_rate": 3.1481092099232446e-06, "loss": 0.022, "step": 10374 }, { "epoch": 2.68, "learning_rate": 3.143143561639106e-06, "loss": 0.0246, "step": 10375 }, { "epoch": 2.68, "learning_rate": 3.138181705620569e-06, "loss": 0.0322, "step": 10376 }, { "epoch": 2.68, "learning_rate": 3.133223642269223e-06, "loss": 0.0214, "step": 10377 }, { "epoch": 2.68, "learning_rate": 3.12826937198632e-06, "loss": 0.0275, "step": 10378 }, { "epoch": 2.68, "learning_rate": 3.123318895172844e-06, "loss": 0.0283, "step": 10379 }, { "epoch": 2.68, "learning_rate": 3.1183722122294477e-06, "loss": 0.0272, "step": 10380 }, { "epoch": 2.68, "learning_rate": 3.113429323556466e-06, "loss": 0.0257, "step": 10381 }, { "epoch": 2.68, "learning_rate": 3.1084902295539566e-06, "loss": 0.0253, "step": 10382 }, { "epoch": 2.68, "learning_rate": 3.103554930621644e-06, "loss": 0.0272, "step": 10383 }, { "epoch": 2.68, "learning_rate": 3.0986234271589697e-06, "loss": 0.017, "step": 10384 }, { "epoch": 2.68, "learning_rate": 3.093695719565054e-06, "loss": 0.0262, "step": 10385 }, { "epoch": 2.68, "learning_rate": 3.0887718082386886e-06, "loss": 0.0254, "step": 10386 }, { "epoch": 2.68, "learning_rate": 3.0838516935784102e-06, "loss": 0.0241, "step": 10387 }, { "epoch": 2.68, "learning_rate": 3.0789353759824057e-06, "loss": 0.0204, "step": 10388 }, { "epoch": 2.68, "learning_rate": 3.074022855848563e-06, "loss": 0.0194, "step": 10389 }, { "epoch": 2.68, "learning_rate": 3.0691141335744746e-06, "loss": 0.0222, "step": 10390 }, { "epoch": 2.68, "learning_rate": 3.0642092095574058e-06, "loss": 0.0351, "step": 10391 }, { "epoch": 2.68, "learning_rate": 3.0593080841943455e-06, "loss": 0.0237, "step": 10392 }, { "epoch": 2.68, "learning_rate": 3.0544107578819424e-06, "loss": 0.0276, "step": 10393 }, { "epoch": 2.68, "learning_rate": 3.049517231016552e-06, "loss": 0.02, "step": 10394 }, { "epoch": 2.68, "learning_rate": 3.044627503994235e-06, "loss": 0.0235, "step": 10395 }, { "epoch": 2.68, "learning_rate": 3.039741577210714e-06, "loss": 0.0216, "step": 10396 }, { "epoch": 2.68, "learning_rate": 3.034859451061439e-06, "loss": 0.0248, "step": 10397 }, { "epoch": 2.68, "learning_rate": 3.0299811259415223e-06, "loss": 0.0197, "step": 10398 }, { "epoch": 2.68, "learning_rate": 3.0251066022457807e-06, "loss": 0.024, "step": 10399 }, { "epoch": 2.68, "learning_rate": 3.020235880368738e-06, "loss": 0.0234, "step": 10400 }, { "epoch": 2.68, "learning_rate": 3.0153689607045845e-06, "loss": 0.0252, "step": 10401 }, { "epoch": 2.68, "learning_rate": 3.01050584364721e-06, "loss": 0.0264, "step": 10402 }, { "epoch": 2.68, "learning_rate": 3.005646529590217e-06, "loss": 0.0235, "step": 10403 }, { "epoch": 2.68, "learning_rate": 3.000791018926863e-06, "loss": 0.0272, "step": 10404 }, { "epoch": 2.69, "learning_rate": 2.9959393120501446e-06, "loss": 0.0259, "step": 10405 }, { "epoch": 2.69, "learning_rate": 2.991091409352709e-06, "loss": 0.0245, "step": 10406 }, { "epoch": 2.69, "learning_rate": 2.9862473112269084e-06, "loss": 0.0345, "step": 10407 }, { "epoch": 2.69, "learning_rate": 2.981407018064808e-06, "loss": 0.0328, "step": 10408 }, { "epoch": 2.69, "learning_rate": 2.9765705302581215e-06, "loss": 0.0246, "step": 10409 }, { "epoch": 2.69, "learning_rate": 2.9717378481983026e-06, "loss": 0.0226, "step": 10410 }, { "epoch": 2.69, "learning_rate": 2.9669089722764663e-06, "loss": 0.0259, "step": 10411 }, { "epoch": 2.69, "learning_rate": 2.9620839028834226e-06, "loss": 0.027, "step": 10412 }, { "epoch": 2.69, "learning_rate": 2.9572626404096917e-06, "loss": 0.0257, "step": 10413 }, { "epoch": 2.69, "learning_rate": 2.952445185245467e-06, "loss": 0.0278, "step": 10414 }, { "epoch": 2.69, "learning_rate": 2.9476315377806263e-06, "loss": 0.0311, "step": 10415 }, { "epoch": 2.69, "learning_rate": 2.942821698404774e-06, "loss": 0.0304, "step": 10416 }, { "epoch": 2.69, "learning_rate": 2.938015667507166e-06, "loss": 0.0241, "step": 10417 }, { "epoch": 2.69, "learning_rate": 2.9332134454767844e-06, "loss": 0.025, "step": 10418 }, { "epoch": 2.69, "learning_rate": 2.9284150327022806e-06, "loss": 0.022, "step": 10419 }, { "epoch": 2.69, "learning_rate": 2.9236204295719994e-06, "loss": 0.0353, "step": 10420 }, { "epoch": 2.69, "learning_rate": 2.918829636473991e-06, "loss": 0.0335, "step": 10421 }, { "epoch": 2.69, "learning_rate": 2.914042653795984e-06, "loss": 0.0289, "step": 10422 }, { "epoch": 2.69, "learning_rate": 2.909259481925408e-06, "loss": 0.0364, "step": 10423 }, { "epoch": 2.69, "learning_rate": 2.9044801212493755e-06, "loss": 0.0211, "step": 10424 }, { "epoch": 2.69, "learning_rate": 2.8997045721546877e-06, "loss": 0.0232, "step": 10425 }, { "epoch": 2.69, "learning_rate": 2.8949328350278583e-06, "loss": 0.0282, "step": 10426 }, { "epoch": 2.69, "learning_rate": 2.890164910255072e-06, "loss": 0.0293, "step": 10427 }, { "epoch": 2.69, "learning_rate": 2.885400798222199e-06, "loss": 0.0253, "step": 10428 }, { "epoch": 2.69, "learning_rate": 2.880640499314835e-06, "loss": 0.028, "step": 10429 }, { "epoch": 2.69, "learning_rate": 2.8758840139182284e-06, "loss": 0.0343, "step": 10430 }, { "epoch": 2.69, "learning_rate": 2.871131342417349e-06, "loss": 0.0258, "step": 10431 }, { "epoch": 2.69, "learning_rate": 2.866382485196839e-06, "loss": 0.027, "step": 10432 }, { "epoch": 2.69, "learning_rate": 2.8616374426410243e-06, "loss": 0.0316, "step": 10433 }, { "epoch": 2.69, "learning_rate": 2.8568962151339583e-06, "loss": 0.0272, "step": 10434 }, { "epoch": 2.69, "learning_rate": 2.852158803059346e-06, "loss": 0.0315, "step": 10435 }, { "epoch": 2.69, "learning_rate": 2.8474252068006136e-06, "loss": 0.0319, "step": 10436 }, { "epoch": 2.69, "learning_rate": 2.8426954267408544e-06, "loss": 0.0254, "step": 10437 }, { "epoch": 2.69, "learning_rate": 2.8379694632628628e-06, "loss": 0.0289, "step": 10438 }, { "epoch": 2.69, "learning_rate": 2.8332473167491324e-06, "loss": 0.0256, "step": 10439 }, { "epoch": 2.69, "learning_rate": 2.8285289875818412e-06, "loss": 0.0307, "step": 10440 }, { "epoch": 2.69, "learning_rate": 2.823814476142844e-06, "loss": 0.0241, "step": 10441 }, { "epoch": 2.69, "learning_rate": 2.8191037828137257e-06, "loss": 0.0254, "step": 10442 }, { "epoch": 2.69, "learning_rate": 2.8143969079757083e-06, "loss": 0.0232, "step": 10443 }, { "epoch": 2.7, "learning_rate": 2.8096938520097537e-06, "loss": 0.029, "step": 10444 }, { "epoch": 2.7, "learning_rate": 2.8049946152964856e-06, "loss": 0.0258, "step": 10445 }, { "epoch": 2.7, "learning_rate": 2.8002991982162276e-06, "loss": 0.0326, "step": 10446 }, { "epoch": 2.7, "learning_rate": 2.7956076011489972e-06, "loss": 0.035, "step": 10447 }, { "epoch": 2.7, "learning_rate": 2.7909198244744915e-06, "loss": 0.0286, "step": 10448 }, { "epoch": 2.7, "learning_rate": 2.7862358685721235e-06, "loss": 0.0281, "step": 10449 }, { "epoch": 2.7, "learning_rate": 2.7815557338209617e-06, "loss": 0.0281, "step": 10450 }, { "epoch": 2.7, "learning_rate": 2.7768794205997873e-06, "loss": 0.0217, "step": 10451 }, { "epoch": 2.7, "learning_rate": 2.7722069292870747e-06, "loss": 0.0222, "step": 10452 }, { "epoch": 2.7, "learning_rate": 2.7675382602609823e-06, "loss": 0.0247, "step": 10453 }, { "epoch": 2.7, "learning_rate": 2.7628734138993472e-06, "loss": 0.0302, "step": 10454 }, { "epoch": 2.7, "learning_rate": 2.7582123905797284e-06, "loss": 0.0206, "step": 10455 }, { "epoch": 2.7, "learning_rate": 2.7535551906793412e-06, "loss": 0.0246, "step": 10456 }, { "epoch": 2.7, "learning_rate": 2.748901814575111e-06, "loss": 0.0301, "step": 10457 }, { "epoch": 2.7, "learning_rate": 2.7442522626436595e-06, "loss": 0.0207, "step": 10458 }, { "epoch": 2.7, "learning_rate": 2.739606535261263e-06, "loss": 0.0178, "step": 10459 }, { "epoch": 2.7, "learning_rate": 2.7349646328039436e-06, "loss": 0.0284, "step": 10460 }, { "epoch": 2.7, "learning_rate": 2.7303265556473668e-06, "loss": 0.03, "step": 10461 }, { "epoch": 2.7, "learning_rate": 2.725692304166916e-06, "loss": 0.0293, "step": 10462 }, { "epoch": 2.7, "learning_rate": 2.7210618787376463e-06, "loss": 0.0252, "step": 10463 }, { "epoch": 2.7, "learning_rate": 2.716435279734314e-06, "loss": 0.0161, "step": 10464 }, { "epoch": 2.7, "learning_rate": 2.7118125075313694e-06, "loss": 0.023, "step": 10465 }, { "epoch": 2.7, "learning_rate": 2.707193562502941e-06, "loss": 0.0261, "step": 10466 }, { "epoch": 2.7, "learning_rate": 2.702578445022852e-06, "loss": 0.0258, "step": 10467 }, { "epoch": 2.7, "learning_rate": 2.6979671554646313e-06, "loss": 0.0264, "step": 10468 }, { "epoch": 2.7, "learning_rate": 2.693359694201464e-06, "loss": 0.0246, "step": 10469 }, { "epoch": 2.7, "learning_rate": 2.6887560616062623e-06, "loss": 0.0214, "step": 10470 }, { "epoch": 2.7, "learning_rate": 2.6841562580516068e-06, "loss": 0.0193, "step": 10471 }, { "epoch": 2.7, "learning_rate": 2.6795602839097657e-06, "loss": 0.0283, "step": 10472 }, { "epoch": 2.7, "learning_rate": 2.6749681395527194e-06, "loss": 0.0197, "step": 10473 }, { "epoch": 2.7, "learning_rate": 2.6703798253521093e-06, "loss": 0.0208, "step": 10474 }, { "epoch": 2.7, "learning_rate": 2.6657953416792945e-06, "loss": 0.021, "step": 10475 }, { "epoch": 2.7, "learning_rate": 2.661214688905306e-06, "loss": 0.0264, "step": 10476 }, { "epoch": 2.7, "learning_rate": 2.6566378674008574e-06, "loss": 0.0308, "step": 10477 }, { "epoch": 2.7, "learning_rate": 2.6520648775363865e-06, "loss": 0.0229, "step": 10478 }, { "epoch": 2.7, "learning_rate": 2.6474957196819914e-06, "loss": 0.0289, "step": 10479 }, { "epoch": 2.7, "learning_rate": 2.6429303942074545e-06, "loss": 0.0254, "step": 10480 }, { "epoch": 2.7, "learning_rate": 2.63836890148228e-06, "loss": 0.0203, "step": 10481 }, { "epoch": 2.71, "learning_rate": 2.633811241875622e-06, "loss": 0.028, "step": 10482 }, { "epoch": 2.71, "learning_rate": 2.6292574157563697e-06, "loss": 0.0234, "step": 10483 }, { "epoch": 2.71, "learning_rate": 2.6247074234930667e-06, "loss": 0.0223, "step": 10484 }, { "epoch": 2.71, "learning_rate": 2.6201612654539465e-06, "loss": 0.0266, "step": 10485 }, { "epoch": 2.71, "learning_rate": 2.615618942006964e-06, "loss": 0.0232, "step": 10486 }, { "epoch": 2.71, "learning_rate": 2.6110804535197263e-06, "loss": 0.0291, "step": 10487 }, { "epoch": 2.71, "learning_rate": 2.606545800359561e-06, "loss": 0.0295, "step": 10488 }, { "epoch": 2.71, "learning_rate": 2.602014982893458e-06, "loss": 0.0283, "step": 10489 }, { "epoch": 2.71, "learning_rate": 2.597488001488113e-06, "loss": 0.0213, "step": 10490 }, { "epoch": 2.71, "learning_rate": 2.5929648565099164e-06, "loss": 0.0294, "step": 10491 }, { "epoch": 2.71, "learning_rate": 2.588445548324936e-06, "loss": 0.0247, "step": 10492 }, { "epoch": 2.71, "learning_rate": 2.583930077298924e-06, "loss": 0.0302, "step": 10493 }, { "epoch": 2.71, "learning_rate": 2.5794184437973434e-06, "loss": 0.0213, "step": 10494 }, { "epoch": 2.71, "learning_rate": 2.574910648185319e-06, "loss": 0.0308, "step": 10495 }, { "epoch": 2.71, "learning_rate": 2.5704066908277036e-06, "loss": 0.0269, "step": 10496 }, { "epoch": 2.71, "learning_rate": 2.565906572088994e-06, "loss": 0.0244, "step": 10497 }, { "epoch": 2.71, "learning_rate": 2.561410292333405e-06, "loss": 0.0251, "step": 10498 }, { "epoch": 2.71, "learning_rate": 2.556917851924845e-06, "loss": 0.0233, "step": 10499 }, { "epoch": 2.71, "learning_rate": 2.552429251226879e-06, "loss": 0.0279, "step": 10500 }, { "epoch": 2.71, "learning_rate": 2.547944490602805e-06, "loss": 0.0291, "step": 10501 }, { "epoch": 2.71, "learning_rate": 2.5434635704155773e-06, "loss": 0.024, "step": 10502 }, { "epoch": 2.71, "learning_rate": 2.538986491027845e-06, "loss": 0.0234, "step": 10503 }, { "epoch": 2.71, "learning_rate": 2.534513252801962e-06, "loss": 0.0237, "step": 10504 }, { "epoch": 2.71, "learning_rate": 2.530043856099962e-06, "loss": 0.0248, "step": 10505 }, { "epoch": 2.71, "learning_rate": 2.525578301283549e-06, "loss": 0.0276, "step": 10506 }, { "epoch": 2.71, "learning_rate": 2.5211165887141508e-06, "loss": 0.0255, "step": 10507 }, { "epoch": 2.71, "learning_rate": 2.5166587187528623e-06, "loss": 0.0272, "step": 10508 }, { "epoch": 2.71, "learning_rate": 2.5122046917604726e-06, "loss": 0.0253, "step": 10509 }, { "epoch": 2.71, "learning_rate": 2.507754508097465e-06, "loss": 0.0313, "step": 10510 }, { "epoch": 2.71, "learning_rate": 2.5033081681239855e-06, "loss": 0.0313, "step": 10511 }, { "epoch": 2.71, "learning_rate": 2.4988656721999183e-06, "loss": 0.0338, "step": 10512 }, { "epoch": 2.71, "learning_rate": 2.4944270206847863e-06, "loss": 0.0285, "step": 10513 }, { "epoch": 2.71, "learning_rate": 2.489992213937836e-06, "loss": 0.0262, "step": 10514 }, { "epoch": 2.71, "learning_rate": 2.4855612523179804e-06, "loss": 0.0252, "step": 10515 }, { "epoch": 2.71, "learning_rate": 2.4811341361838324e-06, "loss": 0.0315, "step": 10516 }, { "epoch": 2.71, "learning_rate": 2.4767108658937e-06, "loss": 0.0265, "step": 10517 }, { "epoch": 2.71, "learning_rate": 2.4722914418055578e-06, "loss": 0.0286, "step": 10518 }, { "epoch": 2.71, "learning_rate": 2.467875864277092e-06, "loss": 0.0298, "step": 10519 }, { "epoch": 2.71, "learning_rate": 2.4634641336656718e-06, "loss": 0.0258, "step": 10520 }, { "epoch": 2.72, "learning_rate": 2.45905625032834e-06, "loss": 0.0226, "step": 10521 }, { "epoch": 2.72, "learning_rate": 2.4546522146218552e-06, "loss": 0.0244, "step": 10522 }, { "epoch": 2.72, "learning_rate": 2.4502520269026384e-06, "loss": 0.0274, "step": 10523 }, { "epoch": 2.72, "learning_rate": 2.4458556875268046e-06, "loss": 0.0308, "step": 10524 }, { "epoch": 2.72, "learning_rate": 2.44146319685018e-06, "loss": 0.0328, "step": 10525 }, { "epoch": 2.72, "learning_rate": 2.4370745552282416e-06, "loss": 0.0242, "step": 10526 }, { "epoch": 2.72, "learning_rate": 2.432689763016194e-06, "loss": 0.0294, "step": 10527 }, { "epoch": 2.72, "learning_rate": 2.4283088205689088e-06, "loss": 0.0292, "step": 10528 }, { "epoch": 2.72, "learning_rate": 2.42393172824093e-06, "loss": 0.0272, "step": 10529 }, { "epoch": 2.72, "learning_rate": 2.4195584863865304e-06, "loss": 0.0317, "step": 10530 }, { "epoch": 2.72, "learning_rate": 2.415189095359638e-06, "loss": 0.0258, "step": 10531 }, { "epoch": 2.72, "learning_rate": 2.410823555513886e-06, "loss": 0.0277, "step": 10532 }, { "epoch": 2.72, "learning_rate": 2.4064618672025917e-06, "loss": 0.0243, "step": 10533 }, { "epoch": 2.72, "learning_rate": 2.4021040307787513e-06, "loss": 0.03, "step": 10534 }, { "epoch": 2.72, "learning_rate": 2.3977500465950707e-06, "loss": 0.0276, "step": 10535 }, { "epoch": 2.72, "learning_rate": 2.393399915003919e-06, "loss": 0.0303, "step": 10536 }, { "epoch": 2.72, "learning_rate": 2.3890536363573633e-06, "loss": 0.0299, "step": 10537 }, { "epoch": 2.72, "learning_rate": 2.384711211007179e-06, "loss": 0.0296, "step": 10538 }, { "epoch": 2.72, "learning_rate": 2.380372639304784e-06, "loss": 0.0223, "step": 10539 }, { "epoch": 2.72, "learning_rate": 2.376037921601343e-06, "loss": 0.0279, "step": 10540 }, { "epoch": 2.72, "learning_rate": 2.3717070582476577e-06, "loss": 0.0269, "step": 10541 }, { "epoch": 2.72, "learning_rate": 2.3673800495942367e-06, "loss": 0.0301, "step": 10542 }, { "epoch": 2.72, "learning_rate": 2.363056895991289e-06, "loss": 0.0322, "step": 10543 }, { "epoch": 2.72, "learning_rate": 2.3587375977886905e-06, "loss": 0.0306, "step": 10544 }, { "epoch": 2.72, "learning_rate": 2.3544221553360225e-06, "loss": 0.0298, "step": 10545 }, { "epoch": 2.72, "learning_rate": 2.3501105689825497e-06, "loss": 0.0375, "step": 10546 }, { "epoch": 2.72, "learning_rate": 2.345802839077205e-06, "loss": 0.03, "step": 10547 }, { "epoch": 2.72, "learning_rate": 2.3414989659686414e-06, "loss": 0.0319, "step": 10548 }, { "epoch": 2.72, "learning_rate": 2.3371989500051763e-06, "loss": 0.0341, "step": 10549 }, { "epoch": 2.72, "learning_rate": 2.3329027915348244e-06, "loss": 0.0316, "step": 10550 }, { "epoch": 2.72, "learning_rate": 2.328610490905292e-06, "loss": 0.0202, "step": 10551 }, { "epoch": 2.72, "learning_rate": 2.3243220484639617e-06, "loss": 0.0316, "step": 10552 }, { "epoch": 2.72, "learning_rate": 2.3200374645579116e-06, "loss": 0.033, "step": 10553 }, { "epoch": 2.72, "learning_rate": 2.315756739533909e-06, "loss": 0.021, "step": 10554 }, { "epoch": 2.72, "learning_rate": 2.311479873738398e-06, "loss": 0.0249, "step": 10555 }, { "epoch": 2.72, "learning_rate": 2.3072068675175305e-06, "loss": 0.028, "step": 10556 }, { "epoch": 2.72, "learning_rate": 2.3029377212171186e-06, "loss": 0.0191, "step": 10557 }, { "epoch": 2.72, "learning_rate": 2.2986724351826916e-06, "loss": 0.0317, "step": 10558 }, { "epoch": 2.72, "learning_rate": 2.294411009759445e-06, "loss": 0.0311, "step": 10559 }, { "epoch": 2.73, "learning_rate": 2.290153445292259e-06, "loss": 0.0259, "step": 10560 }, { "epoch": 2.73, "learning_rate": 2.28589974212573e-06, "loss": 0.0204, "step": 10561 }, { "epoch": 2.73, "learning_rate": 2.281649900604116e-06, "loss": 0.0193, "step": 10562 }, { "epoch": 2.73, "learning_rate": 2.2774039210713584e-06, "loss": 0.0292, "step": 10563 }, { "epoch": 2.73, "learning_rate": 2.2731618038711155e-06, "loss": 0.0241, "step": 10564 }, { "epoch": 2.73, "learning_rate": 2.2689235493466963e-06, "loss": 0.0289, "step": 10565 }, { "epoch": 2.73, "learning_rate": 2.2646891578411266e-06, "loss": 0.0233, "step": 10566 }, { "epoch": 2.73, "learning_rate": 2.2604586296971098e-06, "loss": 0.0229, "step": 10567 }, { "epoch": 2.73, "learning_rate": 2.2562319652570274e-06, "loss": 0.0222, "step": 10568 }, { "epoch": 2.73, "learning_rate": 2.2520091648629614e-06, "loss": 0.0218, "step": 10569 }, { "epoch": 2.73, "learning_rate": 2.247790228856672e-06, "loss": 0.0338, "step": 10570 }, { "epoch": 2.73, "learning_rate": 2.2435751575796136e-06, "loss": 0.0217, "step": 10571 }, { "epoch": 2.73, "learning_rate": 2.2393639513729248e-06, "loss": 0.0233, "step": 10572 }, { "epoch": 2.73, "learning_rate": 2.2351566105774268e-06, "loss": 0.0176, "step": 10573 }, { "epoch": 2.73, "learning_rate": 2.2309531355336365e-06, "loss": 0.0173, "step": 10574 }, { "epoch": 2.73, "learning_rate": 2.2267535265817598e-06, "loss": 0.0376, "step": 10575 }, { "epoch": 2.73, "learning_rate": 2.222557784061663e-06, "loss": 0.0267, "step": 10576 }, { "epoch": 2.73, "learning_rate": 2.2183659083129416e-06, "loss": 0.0259, "step": 10577 }, { "epoch": 2.73, "learning_rate": 2.2141778996748408e-06, "loss": 0.0216, "step": 10578 }, { "epoch": 2.73, "learning_rate": 2.2099937584863227e-06, "loss": 0.0279, "step": 10579 }, { "epoch": 2.73, "learning_rate": 2.205813485086017e-06, "loss": 0.0264, "step": 10580 }, { "epoch": 2.73, "learning_rate": 2.201637079812241e-06, "loss": 0.0294, "step": 10581 }, { "epoch": 2.73, "learning_rate": 2.197464543003008e-06, "loss": 0.0253, "step": 10582 }, { "epoch": 2.73, "learning_rate": 2.19329587499601e-06, "loss": 0.0255, "step": 10583 }, { "epoch": 2.73, "learning_rate": 2.1891310761286364e-06, "loss": 0.0266, "step": 10584 }, { "epoch": 2.73, "learning_rate": 2.184970146737958e-06, "loss": 0.023, "step": 10585 }, { "epoch": 2.73, "learning_rate": 2.1808130871607157e-06, "loss": 0.0221, "step": 10586 }, { "epoch": 2.73, "learning_rate": 2.176659897733374e-06, "loss": 0.0243, "step": 10587 }, { "epoch": 2.73, "learning_rate": 2.172510578792053e-06, "loss": 0.0269, "step": 10588 }, { "epoch": 2.73, "learning_rate": 2.1683651306725562e-06, "loss": 0.021, "step": 10589 }, { "epoch": 2.73, "learning_rate": 2.164223553710415e-06, "loss": 0.0271, "step": 10590 }, { "epoch": 2.73, "learning_rate": 2.1600858482407947e-06, "loss": 0.0232, "step": 10591 }, { "epoch": 2.73, "learning_rate": 2.1559520145985833e-06, "loss": 0.0295, "step": 10592 }, { "epoch": 2.73, "learning_rate": 2.151822053118352e-06, "loss": 0.0227, "step": 10593 }, { "epoch": 2.73, "learning_rate": 2.147695964134333e-06, "loss": 0.0207, "step": 10594 }, { "epoch": 2.73, "learning_rate": 2.1435737479804765e-06, "loss": 0.0248, "step": 10595 }, { "epoch": 2.73, "learning_rate": 2.1394554049903925e-06, "loss": 0.0273, "step": 10596 }, { "epoch": 2.73, "learning_rate": 2.1353409354974096e-06, "loss": 0.0304, "step": 10597 }, { "epoch": 2.73, "learning_rate": 2.131230339834517e-06, "loss": 0.0316, "step": 10598 }, { "epoch": 2.74, "learning_rate": 2.1271236183343813e-06, "loss": 0.0301, "step": 10599 }, { "epoch": 2.74, "learning_rate": 2.1230207713293983e-06, "loss": 0.0217, "step": 10600 }, { "epoch": 2.74, "learning_rate": 2.1189217991516076e-06, "loss": 0.0216, "step": 10601 }, { "epoch": 2.74, "learning_rate": 2.1148267021327493e-06, "loss": 0.0219, "step": 10602 }, { "epoch": 2.74, "learning_rate": 2.1107354806042643e-06, "loss": 0.0247, "step": 10603 }, { "epoch": 2.74, "learning_rate": 2.106648134897249e-06, "loss": 0.0238, "step": 10604 }, { "epoch": 2.74, "learning_rate": 2.102564665342527e-06, "loss": 0.0283, "step": 10605 }, { "epoch": 2.74, "learning_rate": 2.0984850722705728e-06, "loss": 0.0222, "step": 10606 }, { "epoch": 2.74, "learning_rate": 2.094409356011551e-06, "loss": 0.0219, "step": 10607 }, { "epoch": 2.74, "learning_rate": 2.0903375168953408e-06, "loss": 0.0359, "step": 10608 }, { "epoch": 2.74, "learning_rate": 2.0862695552514744e-06, "loss": 0.0342, "step": 10609 }, { "epoch": 2.74, "learning_rate": 2.082205471409199e-06, "loss": 0.0332, "step": 10610 }, { "epoch": 2.74, "learning_rate": 2.0781452656974175e-06, "loss": 0.0263, "step": 10611 }, { "epoch": 2.74, "learning_rate": 2.0740889384447347e-06, "loss": 0.0271, "step": 10612 }, { "epoch": 2.74, "learning_rate": 2.0700364899794545e-06, "loss": 0.0232, "step": 10613 }, { "epoch": 2.74, "learning_rate": 2.0659879206295475e-06, "loss": 0.0257, "step": 10614 }, { "epoch": 2.74, "learning_rate": 2.061943230722668e-06, "loss": 0.0272, "step": 10615 }, { "epoch": 2.74, "learning_rate": 2.0579024205861774e-06, "loss": 0.0284, "step": 10616 }, { "epoch": 2.74, "learning_rate": 2.0538654905470968e-06, "loss": 0.0233, "step": 10617 }, { "epoch": 2.74, "learning_rate": 2.0498324409321646e-06, "loss": 0.0328, "step": 10618 }, { "epoch": 2.74, "learning_rate": 2.0458032720677757e-06, "loss": 0.0224, "step": 10619 }, { "epoch": 2.74, "learning_rate": 2.0417779842800243e-06, "loss": 0.0252, "step": 10620 }, { "epoch": 2.74, "learning_rate": 2.0377565778946948e-06, "loss": 0.024, "step": 10621 }, { "epoch": 2.74, "learning_rate": 2.033739053237238e-06, "loss": 0.025, "step": 10622 }, { "epoch": 2.74, "learning_rate": 2.029725410632821e-06, "loss": 0.0318, "step": 10623 }, { "epoch": 2.74, "learning_rate": 2.025715650406268e-06, "loss": 0.022, "step": 10624 }, { "epoch": 2.74, "learning_rate": 2.021709772882102e-06, "loss": 0.0306, "step": 10625 }, { "epoch": 2.74, "learning_rate": 2.017707778384542e-06, "loss": 0.0265, "step": 10626 }, { "epoch": 2.74, "learning_rate": 2.013709667237473e-06, "loss": 0.0299, "step": 10627 }, { "epoch": 2.74, "learning_rate": 2.009715439764465e-06, "loss": 0.0248, "step": 10628 }, { "epoch": 2.74, "learning_rate": 2.0057250962887964e-06, "loss": 0.0283, "step": 10629 }, { "epoch": 2.74, "learning_rate": 2.0017386371334103e-06, "loss": 0.0332, "step": 10630 }, { "epoch": 2.74, "learning_rate": 1.9977560626209533e-06, "loss": 0.0292, "step": 10631 }, { "epoch": 2.74, "learning_rate": 1.9937773730737353e-06, "loss": 0.031, "step": 10632 }, { "epoch": 2.74, "learning_rate": 1.9898025688137644e-06, "loss": 0.0262, "step": 10633 }, { "epoch": 2.74, "learning_rate": 1.9858316501627395e-06, "loss": 0.0297, "step": 10634 }, { "epoch": 2.74, "learning_rate": 1.9818646174420363e-06, "loss": 0.0273, "step": 10635 }, { "epoch": 2.74, "learning_rate": 1.9779014709727205e-06, "loss": 0.0246, "step": 10636 }, { "epoch": 2.75, "learning_rate": 1.9739422110755356e-06, "loss": 0.021, "step": 10637 }, { "epoch": 2.75, "learning_rate": 1.9699868380709196e-06, "loss": 0.0323, "step": 10638 }, { "epoch": 2.75, "learning_rate": 1.9660353522789943e-06, "loss": 0.031, "step": 10639 }, { "epoch": 2.75, "learning_rate": 1.9620877540195703e-06, "loss": 0.0338, "step": 10640 }, { "epoch": 2.75, "learning_rate": 1.95814404361212e-06, "loss": 0.0306, "step": 10641 }, { "epoch": 2.75, "learning_rate": 1.954204221375844e-06, "loss": 0.0362, "step": 10642 }, { "epoch": 2.75, "learning_rate": 1.950268287629581e-06, "loss": 0.0322, "step": 10643 }, { "epoch": 2.75, "learning_rate": 1.9463362426918983e-06, "loss": 0.0377, "step": 10644 }, { "epoch": 2.75, "learning_rate": 1.9424080868810148e-06, "loss": 0.0264, "step": 10645 }, { "epoch": 2.75, "learning_rate": 1.938483820514847e-06, "loss": 0.0309, "step": 10646 }, { "epoch": 2.75, "learning_rate": 1.934563443911008e-06, "loss": 0.034, "step": 10647 }, { "epoch": 2.75, "learning_rate": 1.930646957386778e-06, "loss": 0.0305, "step": 10648 }, { "epoch": 2.75, "learning_rate": 1.9267343612591304e-06, "loss": 0.0309, "step": 10649 }, { "epoch": 2.75, "learning_rate": 1.922825655844729e-06, "loss": 0.0273, "step": 10650 }, { "epoch": 2.75, "learning_rate": 1.9189208414599093e-06, "loss": 0.0216, "step": 10651 }, { "epoch": 2.75, "learning_rate": 1.9150199184207017e-06, "loss": 0.0232, "step": 10652 }, { "epoch": 2.75, "learning_rate": 1.9111228870428267e-06, "loss": 0.0299, "step": 10653 }, { "epoch": 2.75, "learning_rate": 1.90722974764167e-06, "loss": 0.0203, "step": 10654 }, { "epoch": 2.75, "learning_rate": 1.9033405005323246e-06, "loss": 0.0183, "step": 10655 }, { "epoch": 2.75, "learning_rate": 1.899455146029555e-06, "loss": 0.0209, "step": 10656 }, { "epoch": 2.75, "learning_rate": 1.8955736844478212e-06, "loss": 0.0201, "step": 10657 }, { "epoch": 2.75, "learning_rate": 1.8916961161012547e-06, "loss": 0.0242, "step": 10658 }, { "epoch": 2.75, "learning_rate": 1.8878224413036716e-06, "loss": 0.02, "step": 10659 }, { "epoch": 2.75, "learning_rate": 1.8839526603685986e-06, "loss": 0.0267, "step": 10660 }, { "epoch": 2.75, "learning_rate": 1.8800867736092132e-06, "loss": 0.0258, "step": 10661 }, { "epoch": 2.75, "learning_rate": 1.8762247813383983e-06, "loss": 0.0275, "step": 10662 }, { "epoch": 2.75, "learning_rate": 1.8723666838687204e-06, "loss": 0.0247, "step": 10663 }, { "epoch": 2.75, "learning_rate": 1.8685124815124188e-06, "loss": 0.0217, "step": 10664 }, { "epoch": 2.75, "learning_rate": 1.864662174581433e-06, "loss": 0.0204, "step": 10665 }, { "epoch": 2.75, "learning_rate": 1.8608157633873746e-06, "loss": 0.0303, "step": 10666 }, { "epoch": 2.75, "learning_rate": 1.8569732482415502e-06, "loss": 0.0209, "step": 10667 }, { "epoch": 2.75, "learning_rate": 1.8531346294549446e-06, "loss": 0.0214, "step": 10668 }, { "epoch": 2.75, "learning_rate": 1.84929990733822e-06, "loss": 0.0282, "step": 10669 }, { "epoch": 2.75, "learning_rate": 1.8454690822017506e-06, "loss": 0.026, "step": 10670 }, { "epoch": 2.75, "learning_rate": 1.8416421543555606e-06, "loss": 0.0277, "step": 10671 }, { "epoch": 2.75, "learning_rate": 1.8378191241093745e-06, "loss": 0.0218, "step": 10672 }, { "epoch": 2.75, "learning_rate": 1.8339999917726114e-06, "loss": 0.0213, "step": 10673 }, { "epoch": 2.75, "learning_rate": 1.8301847576543575e-06, "loss": 0.0253, "step": 10674 }, { "epoch": 2.75, "learning_rate": 1.8263734220633932e-06, "loss": 0.0224, "step": 10675 }, { "epoch": 2.76, "learning_rate": 1.8225659853081888e-06, "loss": 0.0229, "step": 10676 }, { "epoch": 2.76, "learning_rate": 1.8187624476968757e-06, "loss": 0.0238, "step": 10677 }, { "epoch": 2.76, "learning_rate": 1.8149628095373017e-06, "loss": 0.0295, "step": 10678 }, { "epoch": 2.76, "learning_rate": 1.8111670711369654e-06, "loss": 0.0293, "step": 10679 }, { "epoch": 2.76, "learning_rate": 1.807375232803088e-06, "loss": 0.0232, "step": 10680 }, { "epoch": 2.76, "learning_rate": 1.8035872948425458e-06, "loss": 0.0185, "step": 10681 }, { "epoch": 2.76, "learning_rate": 1.7998032575618939e-06, "loss": 0.0198, "step": 10682 }, { "epoch": 2.76, "learning_rate": 1.7960231212674095e-06, "loss": 0.0235, "step": 10683 }, { "epoch": 2.76, "learning_rate": 1.7922468862650144e-06, "loss": 0.023, "step": 10684 }, { "epoch": 2.76, "learning_rate": 1.7884745528603309e-06, "loss": 0.0259, "step": 10685 }, { "epoch": 2.76, "learning_rate": 1.7847061213586758e-06, "loss": 0.0256, "step": 10686 }, { "epoch": 2.76, "learning_rate": 1.7809415920650274e-06, "loss": 0.0199, "step": 10687 }, { "epoch": 2.76, "learning_rate": 1.7771809652840699e-06, "loss": 0.0231, "step": 10688 }, { "epoch": 2.76, "learning_rate": 1.7734242413201596e-06, "loss": 0.0218, "step": 10689 }, { "epoch": 2.76, "learning_rate": 1.7696714204773313e-06, "loss": 0.0268, "step": 10690 }, { "epoch": 2.76, "learning_rate": 1.7659225030593252e-06, "loss": 0.0305, "step": 10691 }, { "epoch": 2.76, "learning_rate": 1.7621774893695376e-06, "loss": 0.0239, "step": 10692 }, { "epoch": 2.76, "learning_rate": 1.7584363797110815e-06, "loss": 0.0325, "step": 10693 }, { "epoch": 2.76, "learning_rate": 1.7546991743867259e-06, "loss": 0.0254, "step": 10694 }, { "epoch": 2.76, "learning_rate": 1.7509658736989286e-06, "loss": 0.0289, "step": 10695 }, { "epoch": 2.76, "learning_rate": 1.7472364779498484e-06, "loss": 0.0302, "step": 10696 }, { "epoch": 2.76, "learning_rate": 1.7435109874413102e-06, "loss": 0.0247, "step": 10697 }, { "epoch": 2.76, "learning_rate": 1.7397894024748229e-06, "loss": 0.0271, "step": 10698 }, { "epoch": 2.76, "learning_rate": 1.736071723351601e-06, "loss": 0.0229, "step": 10699 }, { "epoch": 2.76, "learning_rate": 1.7323579503725095e-06, "loss": 0.0279, "step": 10700 }, { "epoch": 2.76, "learning_rate": 1.7286480838381358e-06, "loss": 0.0275, "step": 10701 }, { "epoch": 2.76, "learning_rate": 1.7249421240487117e-06, "loss": 0.0196, "step": 10702 }, { "epoch": 2.76, "learning_rate": 1.7212400713041809e-06, "loss": 0.0285, "step": 10703 }, { "epoch": 2.76, "learning_rate": 1.7175419259041592e-06, "loss": 0.0276, "step": 10704 }, { "epoch": 2.76, "learning_rate": 1.713847688147946e-06, "loss": 0.0266, "step": 10705 }, { "epoch": 2.76, "learning_rate": 1.710157358334541e-06, "loss": 0.0265, "step": 10706 }, { "epoch": 2.76, "learning_rate": 1.7064709367625943e-06, "loss": 0.0291, "step": 10707 }, { "epoch": 2.76, "learning_rate": 1.702788423730467e-06, "loss": 0.0253, "step": 10708 }, { "epoch": 2.76, "learning_rate": 1.6991098195361986e-06, "loss": 0.0213, "step": 10709 }, { "epoch": 2.76, "learning_rate": 1.6954351244775123e-06, "loss": 0.0234, "step": 10710 }, { "epoch": 2.76, "learning_rate": 1.6917643388517978e-06, "loss": 0.0251, "step": 10711 }, { "epoch": 2.76, "learning_rate": 1.6880974629561563e-06, "loss": 0.0208, "step": 10712 }, { "epoch": 2.76, "learning_rate": 1.684434497087356e-06, "loss": 0.0213, "step": 10713 }, { "epoch": 2.76, "learning_rate": 1.6807754415418487e-06, "loss": 0.0284, "step": 10714 }, { "epoch": 2.77, "learning_rate": 1.6771202966157806e-06, "loss": 0.0242, "step": 10715 }, { "epoch": 2.77, "learning_rate": 1.6734690626049598e-06, "loss": 0.0219, "step": 10716 }, { "epoch": 2.77, "learning_rate": 1.6698217398049e-06, "loss": 0.0241, "step": 10717 }, { "epoch": 2.77, "learning_rate": 1.6661783285107924e-06, "loss": 0.0291, "step": 10718 }, { "epoch": 2.77, "learning_rate": 1.662538829017507e-06, "loss": 0.0247, "step": 10719 }, { "epoch": 2.77, "learning_rate": 1.658903241619597e-06, "loss": 0.0322, "step": 10720 }, { "epoch": 2.77, "learning_rate": 1.6552715666112995e-06, "loss": 0.0278, "step": 10721 }, { "epoch": 2.77, "learning_rate": 1.6516438042865456e-06, "loss": 0.0236, "step": 10722 }, { "epoch": 2.77, "learning_rate": 1.6480199549389342e-06, "loss": 0.024, "step": 10723 }, { "epoch": 2.77, "learning_rate": 1.6444000188617526e-06, "loss": 0.022, "step": 10724 }, { "epoch": 2.77, "learning_rate": 1.6407839963479776e-06, "loss": 0.025, "step": 10725 }, { "epoch": 2.77, "learning_rate": 1.6371718876902643e-06, "loss": 0.0269, "step": 10726 }, { "epoch": 2.77, "learning_rate": 1.6335636931809506e-06, "loss": 0.0227, "step": 10727 }, { "epoch": 2.77, "learning_rate": 1.6299594131120588e-06, "loss": 0.0244, "step": 10728 }, { "epoch": 2.77, "learning_rate": 1.6263590477752888e-06, "loss": 0.0225, "step": 10729 }, { "epoch": 2.77, "learning_rate": 1.622762597462041e-06, "loss": 0.0309, "step": 10730 }, { "epoch": 2.77, "learning_rate": 1.619170062463371e-06, "loss": 0.0319, "step": 10731 }, { "epoch": 2.77, "learning_rate": 1.6155814430700466e-06, "loss": 0.0307, "step": 10732 }, { "epoch": 2.77, "learning_rate": 1.6119967395725023e-06, "loss": 0.0264, "step": 10733 }, { "epoch": 2.77, "learning_rate": 1.6084159522608554e-06, "loss": 0.0238, "step": 10734 }, { "epoch": 2.77, "learning_rate": 1.6048390814249137e-06, "loss": 0.0246, "step": 10735 }, { "epoch": 2.77, "learning_rate": 1.601266127354162e-06, "loss": 0.0262, "step": 10736 }, { "epoch": 2.77, "learning_rate": 1.597697090337763e-06, "loss": 0.0254, "step": 10737 }, { "epoch": 2.77, "learning_rate": 1.5941319706645863e-06, "loss": 0.0273, "step": 10738 }, { "epoch": 2.77, "learning_rate": 1.5905707686231508e-06, "loss": 0.0285, "step": 10739 }, { "epoch": 2.77, "learning_rate": 1.587013484501687e-06, "loss": 0.0286, "step": 10740 }, { "epoch": 2.77, "learning_rate": 1.583460118588098e-06, "loss": 0.0284, "step": 10741 }, { "epoch": 2.77, "learning_rate": 1.5799106711699484e-06, "loss": 0.0224, "step": 10742 }, { "epoch": 2.77, "learning_rate": 1.5763651425345306e-06, "loss": 0.0288, "step": 10743 }, { "epoch": 2.77, "learning_rate": 1.5728235329687813e-06, "loss": 0.0298, "step": 10744 }, { "epoch": 2.77, "learning_rate": 1.5692858427593382e-06, "loss": 0.0282, "step": 10745 }, { "epoch": 2.77, "learning_rate": 1.5657520721925112e-06, "loss": 0.0281, "step": 10746 }, { "epoch": 2.77, "learning_rate": 1.5622222215543048e-06, "loss": 0.0343, "step": 10747 }, { "epoch": 2.77, "learning_rate": 1.5586962911303958e-06, "loss": 0.0302, "step": 10748 }, { "epoch": 2.77, "learning_rate": 1.5551742812061565e-06, "loss": 0.0339, "step": 10749 }, { "epoch": 2.77, "learning_rate": 1.551656192066625e-06, "loss": 0.0378, "step": 10750 }, { "epoch": 2.77, "learning_rate": 1.548142023996535e-06, "loss": 0.0224, "step": 10751 }, { "epoch": 2.77, "learning_rate": 1.544631777280292e-06, "loss": 0.0268, "step": 10752 }, { "epoch": 2.77, "learning_rate": 1.5411254522020025e-06, "loss": 0.0211, "step": 10753 }, { "epoch": 2.78, "learning_rate": 1.537623049045439e-06, "loss": 0.0224, "step": 10754 }, { "epoch": 2.78, "learning_rate": 1.534124568094053e-06, "loss": 0.0233, "step": 10755 }, { "epoch": 2.78, "learning_rate": 1.5306300096310011e-06, "loss": 0.0224, "step": 10756 }, { "epoch": 2.78, "learning_rate": 1.527139373939096e-06, "loss": 0.0224, "step": 10757 }, { "epoch": 2.78, "learning_rate": 1.5236526613008617e-06, "loss": 0.0277, "step": 10758 }, { "epoch": 2.78, "learning_rate": 1.520169871998467e-06, "loss": 0.0244, "step": 10759 }, { "epoch": 2.78, "learning_rate": 1.5166910063137973e-06, "loss": 0.0234, "step": 10760 }, { "epoch": 2.78, "learning_rate": 1.5132160645284054e-06, "loss": 0.0284, "step": 10761 }, { "epoch": 2.78, "learning_rate": 1.509745046923533e-06, "loss": 0.0254, "step": 10762 }, { "epoch": 2.78, "learning_rate": 1.5062779537800886e-06, "loss": 0.0366, "step": 10763 }, { "epoch": 2.78, "learning_rate": 1.5028147853786866e-06, "loss": 0.0286, "step": 10764 }, { "epoch": 2.78, "learning_rate": 1.4993555419995975e-06, "loss": 0.0242, "step": 10765 }, { "epoch": 2.78, "learning_rate": 1.495900223922808e-06, "loss": 0.0257, "step": 10766 }, { "epoch": 2.78, "learning_rate": 1.492448831427956e-06, "loss": 0.0248, "step": 10767 }, { "epoch": 2.78, "learning_rate": 1.4890013647943625e-06, "loss": 0.0203, "step": 10768 }, { "epoch": 2.78, "learning_rate": 1.4855578243010593e-06, "loss": 0.0213, "step": 10769 }, { "epoch": 2.78, "learning_rate": 1.482118210226735e-06, "loss": 0.0368, "step": 10770 }, { "epoch": 2.78, "learning_rate": 1.478682522849767e-06, "loss": 0.0227, "step": 10771 }, { "epoch": 2.78, "learning_rate": 1.4752507624482214e-06, "loss": 0.0226, "step": 10772 }, { "epoch": 2.78, "learning_rate": 1.4718229292998265e-06, "loss": 0.0234, "step": 10773 }, { "epoch": 2.78, "learning_rate": 1.4683990236820266e-06, "loss": 0.0192, "step": 10774 }, { "epoch": 2.78, "learning_rate": 1.464979045871917e-06, "loss": 0.0226, "step": 10775 }, { "epoch": 2.78, "learning_rate": 1.4615629961462874e-06, "loss": 0.0216, "step": 10776 }, { "epoch": 2.78, "learning_rate": 1.4581508747816108e-06, "loss": 0.0251, "step": 10777 }, { "epoch": 2.78, "learning_rate": 1.4547426820540333e-06, "loss": 0.0386, "step": 10778 }, { "epoch": 2.78, "learning_rate": 1.4513384182394062e-06, "loss": 0.0244, "step": 10779 }, { "epoch": 2.78, "learning_rate": 1.4479380836132372e-06, "loss": 0.0257, "step": 10780 }, { "epoch": 2.78, "learning_rate": 1.4445416784507171e-06, "loss": 0.0244, "step": 10781 }, { "epoch": 2.78, "learning_rate": 1.4411492030267427e-06, "loss": 0.0243, "step": 10782 }, { "epoch": 2.78, "learning_rate": 1.4377606576158609e-06, "loss": 0.0241, "step": 10783 }, { "epoch": 2.78, "learning_rate": 1.4343760424923359e-06, "loss": 0.0319, "step": 10784 }, { "epoch": 2.78, "learning_rate": 1.4309953579300872e-06, "loss": 0.0263, "step": 10785 }, { "epoch": 2.78, "learning_rate": 1.4276186042027128e-06, "loss": 0.0265, "step": 10786 }, { "epoch": 2.78, "learning_rate": 1.4242457815835163e-06, "loss": 0.0232, "step": 10787 }, { "epoch": 2.78, "learning_rate": 1.4208768903454629e-06, "loss": 0.0307, "step": 10788 }, { "epoch": 2.78, "learning_rate": 1.4175119307612116e-06, "loss": 0.0272, "step": 10789 }, { "epoch": 2.78, "learning_rate": 1.4141509031031009e-06, "loss": 0.025, "step": 10790 }, { "epoch": 2.78, "learning_rate": 1.4107938076431347e-06, "loss": 0.0234, "step": 10791 }, { "epoch": 2.79, "learning_rate": 1.4074406446530352e-06, "loss": 0.0236, "step": 10792 }, { "epoch": 2.79, "learning_rate": 1.4040914144041684e-06, "loss": 0.0267, "step": 10793 }, { "epoch": 2.79, "learning_rate": 1.4007461171676006e-06, "loss": 0.0214, "step": 10794 }, { "epoch": 2.79, "learning_rate": 1.3974047532140765e-06, "loss": 0.0267, "step": 10795 }, { "epoch": 2.79, "learning_rate": 1.394067322814019e-06, "loss": 0.0202, "step": 10796 }, { "epoch": 2.79, "learning_rate": 1.3907338262375501e-06, "loss": 0.0291, "step": 10797 }, { "epoch": 2.79, "learning_rate": 1.3874042637544492e-06, "loss": 0.0225, "step": 10798 }, { "epoch": 2.79, "learning_rate": 1.3840786356341783e-06, "loss": 0.0196, "step": 10799 }, { "epoch": 2.79, "learning_rate": 1.380756942145911e-06, "loss": 0.0263, "step": 10800 }, { "epoch": 2.79, "learning_rate": 1.377439183558471e-06, "loss": 0.0215, "step": 10801 }, { "epoch": 2.79, "learning_rate": 1.3741253601403713e-06, "loss": 0.0276, "step": 10802 }, { "epoch": 2.79, "learning_rate": 1.3708154721598142e-06, "loss": 0.0232, "step": 10803 }, { "epoch": 2.79, "learning_rate": 1.36750951988468e-06, "loss": 0.027, "step": 10804 }, { "epoch": 2.79, "learning_rate": 1.3642075035825264e-06, "loss": 0.025, "step": 10805 }, { "epoch": 2.79, "learning_rate": 1.360909423520601e-06, "loss": 0.0228, "step": 10806 }, { "epoch": 2.79, "learning_rate": 1.357615279965818e-06, "loss": 0.0203, "step": 10807 }, { "epoch": 2.79, "learning_rate": 1.3543250731847867e-06, "loss": 0.0264, "step": 10808 }, { "epoch": 2.79, "learning_rate": 1.3510388034437937e-06, "loss": 0.0257, "step": 10809 }, { "epoch": 2.79, "learning_rate": 1.3477564710088098e-06, "loss": 0.03, "step": 10810 }, { "epoch": 2.79, "learning_rate": 1.3444780761454833e-06, "loss": 0.0252, "step": 10811 }, { "epoch": 2.79, "learning_rate": 1.3412036191191357e-06, "loss": 0.0312, "step": 10812 }, { "epoch": 2.79, "learning_rate": 1.337933100194788e-06, "loss": 0.022, "step": 10813 }, { "epoch": 2.79, "learning_rate": 1.3346665196371288e-06, "loss": 0.0288, "step": 10814 }, { "epoch": 2.79, "learning_rate": 1.3314038777105353e-06, "loss": 0.0229, "step": 10815 }, { "epoch": 2.79, "learning_rate": 1.328145174679063e-06, "loss": 0.0264, "step": 10816 }, { "epoch": 2.79, "learning_rate": 1.3248904108064397e-06, "loss": 0.0298, "step": 10817 }, { "epoch": 2.79, "learning_rate": 1.3216395863560992e-06, "loss": 0.0281, "step": 10818 }, { "epoch": 2.79, "learning_rate": 1.3183927015911257e-06, "loss": 0.029, "step": 10819 }, { "epoch": 2.79, "learning_rate": 1.3151497567743033e-06, "loss": 0.0219, "step": 10820 }, { "epoch": 2.79, "learning_rate": 1.3119107521680995e-06, "loss": 0.029, "step": 10821 }, { "epoch": 2.79, "learning_rate": 1.3086756880346495e-06, "loss": 0.0252, "step": 10822 }, { "epoch": 2.79, "learning_rate": 1.3054445646357826e-06, "loss": 0.0237, "step": 10823 }, { "epoch": 2.79, "learning_rate": 1.3022173822329952e-06, "loss": 0.0293, "step": 10824 }, { "epoch": 2.79, "learning_rate": 1.2989941410874784e-06, "loss": 0.0266, "step": 10825 }, { "epoch": 2.79, "learning_rate": 1.2957748414601012e-06, "loss": 0.0224, "step": 10826 }, { "epoch": 2.79, "learning_rate": 1.2925594836114051e-06, "loss": 0.0229, "step": 10827 }, { "epoch": 2.79, "learning_rate": 1.2893480678016213e-06, "loss": 0.0268, "step": 10828 }, { "epoch": 2.79, "learning_rate": 1.2861405942906635e-06, "loss": 0.0244, "step": 10829 }, { "epoch": 2.79, "learning_rate": 1.2829370633381132e-06, "loss": 0.0213, "step": 10830 }, { "epoch": 2.8, "learning_rate": 1.279737475203252e-06, "loss": 0.0284, "step": 10831 }, { "epoch": 2.8, "learning_rate": 1.2765418301450227e-06, "loss": 0.0258, "step": 10832 }, { "epoch": 2.8, "learning_rate": 1.2733501284220573e-06, "loss": 0.0283, "step": 10833 }, { "epoch": 2.8, "learning_rate": 1.2701623702926823e-06, "loss": 0.0276, "step": 10834 }, { "epoch": 2.8, "learning_rate": 1.2669785560148805e-06, "loss": 0.0316, "step": 10835 }, { "epoch": 2.8, "learning_rate": 1.2637986858463401e-06, "loss": 0.0316, "step": 10836 }, { "epoch": 2.8, "learning_rate": 1.2606227600444053e-06, "loss": 0.0255, "step": 10837 }, { "epoch": 2.8, "learning_rate": 1.257450778866115e-06, "loss": 0.0282, "step": 10838 }, { "epoch": 2.8, "learning_rate": 1.2542827425681913e-06, "loss": 0.0291, "step": 10839 }, { "epoch": 2.8, "learning_rate": 1.2511186514070294e-06, "loss": 0.0277, "step": 10840 }, { "epoch": 2.8, "learning_rate": 1.2479585056387133e-06, "loss": 0.0288, "step": 10841 }, { "epoch": 2.8, "learning_rate": 1.2448023055189995e-06, "loss": 0.0328, "step": 10842 }, { "epoch": 2.8, "learning_rate": 1.241650051303328e-06, "loss": 0.0249, "step": 10843 }, { "epoch": 2.8, "learning_rate": 1.2385017432468226e-06, "loss": 0.0313, "step": 10844 }, { "epoch": 2.8, "learning_rate": 1.2353573816042906e-06, "loss": 0.0318, "step": 10845 }, { "epoch": 2.8, "learning_rate": 1.2322169666301952e-06, "loss": 0.0331, "step": 10846 }, { "epoch": 2.8, "learning_rate": 1.229080498578722e-06, "loss": 0.0343, "step": 10847 }, { "epoch": 2.8, "learning_rate": 1.2259479777037008e-06, "loss": 0.0283, "step": 10848 }, { "epoch": 2.8, "learning_rate": 1.2228194042586682e-06, "loss": 0.0394, "step": 10849 }, { "epoch": 2.8, "learning_rate": 1.219694778496816e-06, "loss": 0.0274, "step": 10850 }, { "epoch": 2.8, "learning_rate": 1.2165741006710307e-06, "loss": 0.0254, "step": 10851 }, { "epoch": 2.8, "learning_rate": 1.213457371033888e-06, "loss": 0.0295, "step": 10852 }, { "epoch": 2.8, "learning_rate": 1.2103445898376253e-06, "loss": 0.0266, "step": 10853 }, { "epoch": 2.8, "learning_rate": 1.2072357573341743e-06, "loss": 0.0288, "step": 10854 }, { "epoch": 2.8, "learning_rate": 1.2041308737751445e-06, "loss": 0.0251, "step": 10855 }, { "epoch": 2.8, "learning_rate": 1.201029939411813e-06, "loss": 0.0213, "step": 10856 }, { "epoch": 2.8, "learning_rate": 1.1979329544951567e-06, "loss": 0.0244, "step": 10857 }, { "epoch": 2.8, "learning_rate": 1.1948399192758198e-06, "loss": 0.0307, "step": 10858 }, { "epoch": 2.8, "learning_rate": 1.1917508340041294e-06, "loss": 0.0272, "step": 10859 }, { "epoch": 2.8, "learning_rate": 1.1886656989301026e-06, "loss": 0.0231, "step": 10860 }, { "epoch": 2.8, "learning_rate": 1.1855845143034172e-06, "loss": 0.0221, "step": 10861 }, { "epoch": 2.8, "learning_rate": 1.1825072803734516e-06, "loss": 0.0251, "step": 10862 }, { "epoch": 2.8, "learning_rate": 1.1794339973892509e-06, "loss": 0.0227, "step": 10863 }, { "epoch": 2.8, "learning_rate": 1.1763646655995497e-06, "loss": 0.0294, "step": 10864 }, { "epoch": 2.8, "learning_rate": 1.1732992852527547e-06, "loss": 0.0262, "step": 10865 }, { "epoch": 2.8, "learning_rate": 1.1702378565969506e-06, "loss": 0.0266, "step": 10866 }, { "epoch": 2.8, "learning_rate": 1.167180379879923e-06, "loss": 0.0261, "step": 10867 }, { "epoch": 2.8, "learning_rate": 1.1641268553491126e-06, "loss": 0.0264, "step": 10868 }, { "epoch": 2.8, "learning_rate": 1.1610772832516436e-06, "loss": 0.0267, "step": 10869 }, { "epoch": 2.81, "learning_rate": 1.1580316638343469e-06, "loss": 0.0221, "step": 10870 }, { "epoch": 2.81, "learning_rate": 1.1549899973436974e-06, "loss": 0.0286, "step": 10871 }, { "epoch": 2.81, "learning_rate": 1.1519522840258644e-06, "loss": 0.0211, "step": 10872 }, { "epoch": 2.81, "learning_rate": 1.1489185241267132e-06, "loss": 0.0244, "step": 10873 }, { "epoch": 2.81, "learning_rate": 1.1458887178917577e-06, "loss": 0.0223, "step": 10874 }, { "epoch": 2.81, "learning_rate": 1.1428628655662243e-06, "loss": 0.0237, "step": 10875 }, { "epoch": 2.81, "learning_rate": 1.1398409673950006e-06, "loss": 0.0243, "step": 10876 }, { "epoch": 2.81, "learning_rate": 1.1368230236226518e-06, "loss": 0.0267, "step": 10877 }, { "epoch": 2.81, "learning_rate": 1.1338090344934382e-06, "loss": 0.0269, "step": 10878 }, { "epoch": 2.81, "learning_rate": 1.1307990002512814e-06, "loss": 0.0232, "step": 10879 }, { "epoch": 2.81, "learning_rate": 1.1277929211397976e-06, "loss": 0.0253, "step": 10880 }, { "epoch": 2.81, "learning_rate": 1.124790797402281e-06, "loss": 0.0302, "step": 10881 }, { "epoch": 2.81, "learning_rate": 1.1217926292816927e-06, "loss": 0.0247, "step": 10882 }, { "epoch": 2.81, "learning_rate": 1.1187984170206944e-06, "loss": 0.0206, "step": 10883 }, { "epoch": 2.81, "learning_rate": 1.1158081608616088e-06, "loss": 0.0294, "step": 10884 }, { "epoch": 2.81, "learning_rate": 1.112821861046448e-06, "loss": 0.0252, "step": 10885 }, { "epoch": 2.81, "learning_rate": 1.1098395178169074e-06, "loss": 0.0219, "step": 10886 }, { "epoch": 2.81, "learning_rate": 1.106861131414344e-06, "loss": 0.0235, "step": 10887 }, { "epoch": 2.81, "learning_rate": 1.103886702079826e-06, "loss": 0.0272, "step": 10888 }, { "epoch": 2.81, "learning_rate": 1.1009162300540722e-06, "loss": 0.0199, "step": 10889 }, { "epoch": 2.81, "learning_rate": 1.0979497155774898e-06, "loss": 0.0189, "step": 10890 }, { "epoch": 2.81, "learning_rate": 1.0949871588901705e-06, "loss": 0.0295, "step": 10891 }, { "epoch": 2.81, "learning_rate": 1.0920285602318837e-06, "loss": 0.029, "step": 10892 }, { "epoch": 2.81, "learning_rate": 1.0890739198420763e-06, "loss": 0.0331, "step": 10893 }, { "epoch": 2.81, "learning_rate": 1.0861232379598795e-06, "loss": 0.0281, "step": 10894 }, { "epoch": 2.81, "learning_rate": 1.0831765148240968e-06, "loss": 0.0289, "step": 10895 }, { "epoch": 2.81, "learning_rate": 1.080233750673215e-06, "loss": 0.0216, "step": 10896 }, { "epoch": 2.81, "learning_rate": 1.0772949457454051e-06, "loss": 0.0224, "step": 10897 }, { "epoch": 2.81, "learning_rate": 1.0743601002785097e-06, "loss": 0.0336, "step": 10898 }, { "epoch": 2.81, "learning_rate": 1.0714292145100558e-06, "loss": 0.0251, "step": 10899 }, { "epoch": 2.81, "learning_rate": 1.0685022886772478e-06, "loss": 0.0219, "step": 10900 }, { "epoch": 2.81, "learning_rate": 1.0655793230169742e-06, "loss": 0.0229, "step": 10901 }, { "epoch": 2.81, "learning_rate": 1.0626603177657957e-06, "loss": 0.0289, "step": 10902 }, { "epoch": 2.81, "learning_rate": 1.0597452731599512e-06, "loss": 0.0292, "step": 10903 }, { "epoch": 2.81, "learning_rate": 1.0568341894353794e-06, "loss": 0.0323, "step": 10904 }, { "epoch": 2.81, "learning_rate": 1.0539270668276646e-06, "loss": 0.023, "step": 10905 }, { "epoch": 2.81, "learning_rate": 1.0510239055721072e-06, "loss": 0.0198, "step": 10906 }, { "epoch": 2.81, "learning_rate": 1.0481247059036526e-06, "loss": 0.022, "step": 10907 }, { "epoch": 2.81, "learning_rate": 1.0452294680569464e-06, "loss": 0.0266, "step": 10908 }, { "epoch": 2.82, "learning_rate": 1.0423381922663178e-06, "loss": 0.0247, "step": 10909 }, { "epoch": 2.82, "learning_rate": 1.0394508787657575e-06, "loss": 0.0234, "step": 10910 }, { "epoch": 2.82, "learning_rate": 1.0365675277889453e-06, "loss": 0.0254, "step": 10911 }, { "epoch": 2.82, "learning_rate": 1.0336881395692388e-06, "loss": 0.0233, "step": 10912 }, { "epoch": 2.82, "learning_rate": 1.0308127143396796e-06, "loss": 0.0282, "step": 10913 }, { "epoch": 2.82, "learning_rate": 1.0279412523329812e-06, "loss": 0.0297, "step": 10914 }, { "epoch": 2.82, "learning_rate": 1.0250737537815413e-06, "loss": 0.028, "step": 10915 }, { "epoch": 2.82, "learning_rate": 1.0222102189174298e-06, "loss": 0.0364, "step": 10916 }, { "epoch": 2.82, "learning_rate": 1.0193506479724113e-06, "loss": 0.0268, "step": 10917 }, { "epoch": 2.82, "learning_rate": 1.0164950411779117e-06, "loss": 0.0261, "step": 10918 }, { "epoch": 2.82, "learning_rate": 1.0136433987650463e-06, "loss": 0.0275, "step": 10919 }, { "epoch": 2.82, "learning_rate": 1.010795720964608e-06, "loss": 0.0226, "step": 10920 }, { "epoch": 2.82, "learning_rate": 1.0079520080070627e-06, "loss": 0.0222, "step": 10921 }, { "epoch": 2.82, "learning_rate": 1.0051122601225705e-06, "loss": 0.028, "step": 10922 }, { "epoch": 2.82, "learning_rate": 1.0022764775409532e-06, "loss": 0.0364, "step": 10923 }, { "epoch": 2.82, "learning_rate": 9.994446604917163e-07, "loss": 0.0268, "step": 10924 }, { "epoch": 2.82, "learning_rate": 9.96616809204054e-07, "loss": 0.0294, "step": 10925 }, { "epoch": 2.82, "learning_rate": 9.937929239068277e-07, "loss": 0.0273, "step": 10926 }, { "epoch": 2.82, "learning_rate": 9.909730048285937e-07, "loss": 0.0327, "step": 10927 }, { "epoch": 2.82, "learning_rate": 9.881570521975636e-07, "loss": 0.0271, "step": 10928 }, { "epoch": 2.82, "learning_rate": 9.853450662416385e-07, "loss": 0.0356, "step": 10929 }, { "epoch": 2.82, "learning_rate": 9.825370471884199e-07, "loss": 0.0305, "step": 10930 }, { "epoch": 2.82, "learning_rate": 9.797329952651478e-07, "loss": 0.022, "step": 10931 }, { "epoch": 2.82, "learning_rate": 9.769329106987745e-07, "loss": 0.0261, "step": 10932 }, { "epoch": 2.82, "learning_rate": 9.74136793715924e-07, "loss": 0.0277, "step": 10933 }, { "epoch": 2.82, "learning_rate": 9.713446445428764e-07, "loss": 0.0234, "step": 10934 }, { "epoch": 2.82, "learning_rate": 9.685564634056232e-07, "loss": 0.0214, "step": 10935 }, { "epoch": 2.82, "learning_rate": 9.657722505298228e-07, "loss": 0.0298, "step": 10936 }, { "epoch": 2.82, "learning_rate": 9.629920061407948e-07, "loss": 0.0295, "step": 10937 }, { "epoch": 2.82, "learning_rate": 9.60215730463565e-07, "loss": 0.029, "step": 10938 }, { "epoch": 2.82, "learning_rate": 9.574434237228147e-07, "loss": 0.0237, "step": 10939 }, { "epoch": 2.82, "learning_rate": 9.54675086142931e-07, "loss": 0.0306, "step": 10940 }, { "epoch": 2.82, "learning_rate": 9.51910717947957e-07, "loss": 0.0223, "step": 10941 }, { "epoch": 2.82, "learning_rate": 9.491503193616136e-07, "loss": 0.0264, "step": 10942 }, { "epoch": 2.82, "learning_rate": 9.463938906073166e-07, "loss": 0.0344, "step": 10943 }, { "epoch": 2.82, "learning_rate": 9.436414319081432e-07, "loss": 0.035, "step": 10944 }, { "epoch": 2.82, "learning_rate": 9.408929434868763e-07, "loss": 0.0316, "step": 10945 }, { "epoch": 2.82, "learning_rate": 9.381484255659378e-07, "loss": 0.0276, "step": 10946 }, { "epoch": 2.83, "learning_rate": 9.354078783674614e-07, "loss": 0.0282, "step": 10947 }, { "epoch": 2.83, "learning_rate": 9.326713021132472e-07, "loss": 0.0306, "step": 10948 }, { "epoch": 2.83, "learning_rate": 9.299386970247737e-07, "loss": 0.0355, "step": 10949 }, { "epoch": 2.83, "learning_rate": 9.272100633231917e-07, "loss": 0.0319, "step": 10950 }, { "epoch": 2.83, "learning_rate": 9.244854012293525e-07, "loss": 0.0263, "step": 10951 }, { "epoch": 2.83, "learning_rate": 9.217647109637518e-07, "loss": 0.0313, "step": 10952 }, { "epoch": 2.83, "learning_rate": 9.190479927466023e-07, "loss": 0.0203, "step": 10953 }, { "epoch": 2.83, "learning_rate": 9.16335246797767e-07, "loss": 0.0233, "step": 10954 }, { "epoch": 2.83, "learning_rate": 9.136264733367983e-07, "loss": 0.0347, "step": 10955 }, { "epoch": 2.83, "learning_rate": 9.109216725829206e-07, "loss": 0.0254, "step": 10956 }, { "epoch": 2.83, "learning_rate": 9.082208447550478e-07, "loss": 0.021, "step": 10957 }, { "epoch": 2.83, "learning_rate": 9.055239900717661e-07, "loss": 0.018, "step": 10958 }, { "epoch": 2.83, "learning_rate": 9.028311087513341e-07, "loss": 0.0278, "step": 10959 }, { "epoch": 2.83, "learning_rate": 9.001422010116944e-07, "loss": 0.0163, "step": 10960 }, { "epoch": 2.83, "learning_rate": 8.974572670704784e-07, "loss": 0.026, "step": 10961 }, { "epoch": 2.83, "learning_rate": 8.947763071449733e-07, "loss": 0.0266, "step": 10962 }, { "epoch": 2.83, "learning_rate": 8.920993214521722e-07, "loss": 0.026, "step": 10963 }, { "epoch": 2.83, "learning_rate": 8.894263102087186e-07, "loss": 0.0268, "step": 10964 }, { "epoch": 2.83, "learning_rate": 8.867572736309504e-07, "loss": 0.0177, "step": 10965 }, { "epoch": 2.83, "learning_rate": 8.840922119348838e-07, "loss": 0.0204, "step": 10966 }, { "epoch": 2.83, "learning_rate": 8.814311253362073e-07, "loss": 0.0309, "step": 10967 }, { "epoch": 2.83, "learning_rate": 8.787740140502876e-07, "loss": 0.0237, "step": 10968 }, { "epoch": 2.83, "learning_rate": 8.761208782921804e-07, "loss": 0.0189, "step": 10969 }, { "epoch": 2.83, "learning_rate": 8.734717182766083e-07, "loss": 0.0293, "step": 10970 }, { "epoch": 2.83, "learning_rate": 8.708265342179722e-07, "loss": 0.0239, "step": 10971 }, { "epoch": 2.83, "learning_rate": 8.681853263303674e-07, "loss": 0.0275, "step": 10972 }, { "epoch": 2.83, "learning_rate": 8.65548094827534e-07, "loss": 0.0313, "step": 10973 }, { "epoch": 2.83, "learning_rate": 8.629148399229292e-07, "loss": 0.0338, "step": 10974 }, { "epoch": 2.83, "learning_rate": 8.602855618296602e-07, "loss": 0.0224, "step": 10975 }, { "epoch": 2.83, "learning_rate": 8.576602607605289e-07, "loss": 0.0184, "step": 10976 }, { "epoch": 2.83, "learning_rate": 8.550389369280043e-07, "loss": 0.0351, "step": 10977 }, { "epoch": 2.83, "learning_rate": 8.524215905442334e-07, "loss": 0.0275, "step": 10978 }, { "epoch": 2.83, "learning_rate": 8.498082218210579e-07, "loss": 0.0269, "step": 10979 }, { "epoch": 2.83, "learning_rate": 8.471988309699807e-07, "loss": 0.0235, "step": 10980 }, { "epoch": 2.83, "learning_rate": 8.44593418202183e-07, "loss": 0.0266, "step": 10981 }, { "epoch": 2.83, "learning_rate": 8.41991983728535e-07, "loss": 0.0231, "step": 10982 }, { "epoch": 2.83, "learning_rate": 8.393945277595682e-07, "loss": 0.0338, "step": 10983 }, { "epoch": 2.83, "learning_rate": 8.368010505055201e-07, "loss": 0.0178, "step": 10984 }, { "epoch": 2.83, "learning_rate": 8.342115521762728e-07, "loss": 0.0243, "step": 10985 }, { "epoch": 2.84, "learning_rate": 8.31626032981403e-07, "loss": 0.0288, "step": 10986 }, { "epoch": 2.84, "learning_rate": 8.290444931301766e-07, "loss": 0.0222, "step": 10987 }, { "epoch": 2.84, "learning_rate": 8.2646693283151e-07, "loss": 0.0285, "step": 10988 }, { "epoch": 2.84, "learning_rate": 8.238933522940306e-07, "loss": 0.0219, "step": 10989 }, { "epoch": 2.84, "learning_rate": 8.213237517260108e-07, "loss": 0.0272, "step": 10990 }, { "epoch": 2.84, "learning_rate": 8.187581313354176e-07, "loss": 0.0242, "step": 10991 }, { "epoch": 2.84, "learning_rate": 8.161964913299069e-07, "loss": 0.0189, "step": 10992 }, { "epoch": 2.84, "learning_rate": 8.136388319167853e-07, "loss": 0.0199, "step": 10993 }, { "epoch": 2.84, "learning_rate": 8.110851533030595e-07, "loss": 0.0203, "step": 10994 }, { "epoch": 2.84, "learning_rate": 8.085354556954028e-07, "loss": 0.0279, "step": 10995 }, { "epoch": 2.84, "learning_rate": 8.059897393001669e-07, "loss": 0.0297, "step": 10996 }, { "epoch": 2.84, "learning_rate": 8.034480043233983e-07, "loss": 0.0308, "step": 10997 }, { "epoch": 2.84, "learning_rate": 8.009102509707933e-07, "loss": 0.0287, "step": 10998 }, { "epoch": 2.84, "learning_rate": 7.983764794477433e-07, "loss": 0.0244, "step": 10999 }, { "epoch": 2.84, "learning_rate": 7.958466899593176e-07, "loss": 0.0275, "step": 11000 }, { "epoch": 2.84, "learning_rate": 7.933208827102523e-07, "loss": 0.021, "step": 11001 }, { "epoch": 2.84, "learning_rate": 7.907990579049785e-07, "loss": 0.0218, "step": 11002 }, { "epoch": 2.84, "learning_rate": 7.882812157475883e-07, "loss": 0.0262, "step": 11003 }, { "epoch": 2.84, "learning_rate": 7.857673564418577e-07, "loss": 0.0286, "step": 11004 }, { "epoch": 2.84, "learning_rate": 7.832574801912463e-07, "loss": 0.0322, "step": 11005 }, { "epoch": 2.84, "learning_rate": 7.807515871988802e-07, "loss": 0.0204, "step": 11006 }, { "epoch": 2.84, "learning_rate": 7.782496776675697e-07, "loss": 0.0293, "step": 11007 }, { "epoch": 2.84, "learning_rate": 7.75751751799808e-07, "loss": 0.0313, "step": 11008 }, { "epoch": 2.84, "learning_rate": 7.732578097977506e-07, "loss": 0.0274, "step": 11009 }, { "epoch": 2.84, "learning_rate": 7.707678518632466e-07, "loss": 0.0266, "step": 11010 }, { "epoch": 2.84, "learning_rate": 7.682818781978129e-07, "loss": 0.0252, "step": 11011 }, { "epoch": 2.84, "learning_rate": 7.657998890026441e-07, "loss": 0.0286, "step": 11012 }, { "epoch": 2.84, "learning_rate": 7.633218844786238e-07, "loss": 0.0277, "step": 11013 }, { "epoch": 2.84, "learning_rate": 7.608478648262918e-07, "loss": 0.0218, "step": 11014 }, { "epoch": 2.84, "learning_rate": 7.583778302458877e-07, "loss": 0.0275, "step": 11015 }, { "epoch": 2.84, "learning_rate": 7.559117809373184e-07, "loss": 0.0267, "step": 11016 }, { "epoch": 2.84, "learning_rate": 7.534497171001631e-07, "loss": 0.0281, "step": 11017 }, { "epoch": 2.84, "learning_rate": 7.509916389336847e-07, "loss": 0.0302, "step": 11018 }, { "epoch": 2.84, "learning_rate": 7.485375466368294e-07, "loss": 0.0263, "step": 11019 }, { "epoch": 2.84, "learning_rate": 7.460874404082052e-07, "loss": 0.0282, "step": 11020 }, { "epoch": 2.84, "learning_rate": 7.436413204461146e-07, "loss": 0.0283, "step": 11021 }, { "epoch": 2.84, "learning_rate": 7.411991869485269e-07, "loss": 0.0254, "step": 11022 }, { "epoch": 2.84, "learning_rate": 7.387610401130895e-07, "loss": 0.0327, "step": 11023 }, { "epoch": 2.84, "learning_rate": 7.363268801371281e-07, "loss": 0.0275, "step": 11024 }, { "epoch": 2.85, "learning_rate": 7.338967072176462e-07, "loss": 0.0266, "step": 11025 }, { "epoch": 2.85, "learning_rate": 7.314705215513307e-07, "loss": 0.0233, "step": 11026 }, { "epoch": 2.85, "learning_rate": 7.290483233345358e-07, "loss": 0.0254, "step": 11027 }, { "epoch": 2.85, "learning_rate": 7.266301127632991e-07, "loss": 0.0257, "step": 11028 }, { "epoch": 2.85, "learning_rate": 7.242158900333307e-07, "loss": 0.0344, "step": 11029 }, { "epoch": 2.85, "learning_rate": 7.218056553400187e-07, "loss": 0.0231, "step": 11030 }, { "epoch": 2.85, "learning_rate": 7.193994088784406e-07, "loss": 0.0247, "step": 11031 }, { "epoch": 2.85, "learning_rate": 7.169971508433293e-07, "loss": 0.0293, "step": 11032 }, { "epoch": 2.85, "learning_rate": 7.145988814291127e-07, "loss": 0.0296, "step": 11033 }, { "epoch": 2.85, "learning_rate": 7.122046008298966e-07, "loss": 0.0292, "step": 11034 }, { "epoch": 2.85, "learning_rate": 7.098143092394427e-07, "loss": 0.0303, "step": 11035 }, { "epoch": 2.85, "learning_rate": 7.074280068512129e-07, "loss": 0.0269, "step": 11036 }, { "epoch": 2.85, "learning_rate": 7.050456938583417e-07, "loss": 0.0285, "step": 11037 }, { "epoch": 2.85, "learning_rate": 7.026673704536302e-07, "loss": 0.0257, "step": 11038 }, { "epoch": 2.85, "learning_rate": 7.002930368295635e-07, "loss": 0.0276, "step": 11039 }, { "epoch": 2.85, "learning_rate": 6.979226931783045e-07, "loss": 0.0218, "step": 11040 }, { "epoch": 2.85, "learning_rate": 6.955563396916942e-07, "loss": 0.0342, "step": 11041 }, { "epoch": 2.85, "learning_rate": 6.931939765612461e-07, "loss": 0.0298, "step": 11042 }, { "epoch": 2.85, "learning_rate": 6.908356039781516e-07, "loss": 0.0277, "step": 11043 }, { "epoch": 2.85, "learning_rate": 6.884812221332915e-07, "loss": 0.0316, "step": 11044 }, { "epoch": 2.85, "learning_rate": 6.861308312172021e-07, "loss": 0.0292, "step": 11045 }, { "epoch": 2.85, "learning_rate": 6.837844314201037e-07, "loss": 0.0274, "step": 11046 }, { "epoch": 2.85, "learning_rate": 6.814420229319108e-07, "loss": 0.0266, "step": 11047 }, { "epoch": 2.85, "learning_rate": 6.791036059421941e-07, "loss": 0.0324, "step": 11048 }, { "epoch": 2.85, "learning_rate": 6.767691806402133e-07, "loss": 0.0293, "step": 11049 }, { "epoch": 2.85, "learning_rate": 6.74438747214895e-07, "loss": 0.0277, "step": 11050 }, { "epoch": 2.85, "learning_rate": 6.72112305854844e-07, "loss": 0.0125, "step": 11051 }, { "epoch": 2.85, "learning_rate": 6.697898567483596e-07, "loss": 0.0288, "step": 11052 }, { "epoch": 2.85, "learning_rate": 6.674714000833915e-07, "loss": 0.0232, "step": 11053 }, { "epoch": 2.85, "learning_rate": 6.651569360475896e-07, "loss": 0.0292, "step": 11054 }, { "epoch": 2.85, "learning_rate": 6.62846464828265e-07, "loss": 0.0201, "step": 11055 }, { "epoch": 2.85, "learning_rate": 6.605399866124073e-07, "loss": 0.0228, "step": 11056 }, { "epoch": 2.85, "learning_rate": 6.582375015866948e-07, "loss": 0.0218, "step": 11057 }, { "epoch": 2.85, "learning_rate": 6.55939009937473e-07, "loss": 0.0226, "step": 11058 }, { "epoch": 2.85, "learning_rate": 6.536445118507594e-07, "loss": 0.0255, "step": 11059 }, { "epoch": 2.85, "learning_rate": 6.513540075122615e-07, "loss": 0.022, "step": 11060 }, { "epoch": 2.85, "learning_rate": 6.490674971073473e-07, "loss": 0.0224, "step": 11061 }, { "epoch": 2.85, "learning_rate": 6.467849808210857e-07, "loss": 0.0177, "step": 11062 }, { "epoch": 2.85, "learning_rate": 6.445064588381955e-07, "loss": 0.0371, "step": 11063 }, { "epoch": 2.86, "learning_rate": 6.422319313430902e-07, "loss": 0.022, "step": 11064 }, { "epoch": 2.86, "learning_rate": 6.399613985198504e-07, "loss": 0.0199, "step": 11065 }, { "epoch": 2.86, "learning_rate": 6.376948605522403e-07, "loss": 0.0262, "step": 11066 }, { "epoch": 2.86, "learning_rate": 6.354323176236965e-07, "loss": 0.028, "step": 11067 }, { "epoch": 2.86, "learning_rate": 6.331737699173335e-07, "loss": 0.0279, "step": 11068 }, { "epoch": 2.86, "learning_rate": 6.309192176159384e-07, "loss": 0.0222, "step": 11069 }, { "epoch": 2.86, "learning_rate": 6.28668660901982e-07, "loss": 0.0219, "step": 11070 }, { "epoch": 2.86, "learning_rate": 6.264220999576132e-07, "loss": 0.0236, "step": 11071 }, { "epoch": 2.86, "learning_rate": 6.241795349646474e-07, "loss": 0.0311, "step": 11072 }, { "epoch": 2.86, "learning_rate": 6.219409661045839e-07, "loss": 0.0211, "step": 11073 }, { "epoch": 2.86, "learning_rate": 6.197063935585945e-07, "loss": 0.0307, "step": 11074 }, { "epoch": 2.86, "learning_rate": 6.174758175075291e-07, "loss": 0.0217, "step": 11075 }, { "epoch": 2.86, "learning_rate": 6.152492381319264e-07, "loss": 0.0256, "step": 11076 }, { "epoch": 2.86, "learning_rate": 6.130266556119701e-07, "loss": 0.0185, "step": 11077 }, { "epoch": 2.86, "learning_rate": 6.108080701275609e-07, "loss": 0.0255, "step": 11078 }, { "epoch": 2.86, "learning_rate": 6.085934818582384e-07, "loss": 0.0242, "step": 11079 }, { "epoch": 2.86, "learning_rate": 6.06382890983248e-07, "loss": 0.0247, "step": 11080 }, { "epoch": 2.86, "learning_rate": 6.041762976814913e-07, "loss": 0.0181, "step": 11081 }, { "epoch": 2.86, "learning_rate": 6.019737021315585e-07, "loss": 0.0247, "step": 11082 }, { "epoch": 2.86, "learning_rate": 5.997751045117184e-07, "loss": 0.0232, "step": 11083 }, { "epoch": 2.86, "learning_rate": 5.975805049998951e-07, "loss": 0.0379, "step": 11084 }, { "epoch": 2.86, "learning_rate": 5.953899037737187e-07, "loss": 0.0233, "step": 11085 }, { "epoch": 2.86, "learning_rate": 5.932033010104754e-07, "loss": 0.0238, "step": 11086 }, { "epoch": 2.86, "learning_rate": 5.91020696887129e-07, "loss": 0.0216, "step": 11087 }, { "epoch": 2.86, "learning_rate": 5.888420915803272e-07, "loss": 0.0311, "step": 11088 }, { "epoch": 2.86, "learning_rate": 5.866674852663956e-07, "loss": 0.0238, "step": 11089 }, { "epoch": 2.86, "learning_rate": 5.844968781213266e-07, "loss": 0.0244, "step": 11090 }, { "epoch": 2.86, "learning_rate": 5.823302703207967e-07, "loss": 0.0302, "step": 11091 }, { "epoch": 2.86, "learning_rate": 5.801676620401542e-07, "loss": 0.0232, "step": 11092 }, { "epoch": 2.86, "learning_rate": 5.780090534544258e-07, "loss": 0.0287, "step": 11093 }, { "epoch": 2.86, "learning_rate": 5.758544447383163e-07, "loss": 0.0251, "step": 11094 }, { "epoch": 2.86, "learning_rate": 5.737038360662028e-07, "loss": 0.0216, "step": 11095 }, { "epoch": 2.86, "learning_rate": 5.715572276121351e-07, "loss": 0.0278, "step": 11096 }, { "epoch": 2.86, "learning_rate": 5.694146195498517e-07, "loss": 0.028, "step": 11097 }, { "epoch": 2.86, "learning_rate": 5.672760120527643e-07, "loss": 0.0228, "step": 11098 }, { "epoch": 2.86, "learning_rate": 5.651414052939453e-07, "loss": 0.0303, "step": 11099 }, { "epoch": 2.86, "learning_rate": 5.63010799446162e-07, "loss": 0.0247, "step": 11100 }, { "epoch": 2.86, "learning_rate": 5.608841946818489e-07, "loss": 0.0236, "step": 11101 }, { "epoch": 2.87, "learning_rate": 5.587615911731181e-07, "loss": 0.0359, "step": 11102 }, { "epoch": 2.87, "learning_rate": 5.566429890917601e-07, "loss": 0.0249, "step": 11103 }, { "epoch": 2.87, "learning_rate": 5.545283886092434e-07, "loss": 0.0266, "step": 11104 }, { "epoch": 2.87, "learning_rate": 5.524177898966976e-07, "loss": 0.0357, "step": 11105 }, { "epoch": 2.87, "learning_rate": 5.503111931249472e-07, "loss": 0.025, "step": 11106 }, { "epoch": 2.87, "learning_rate": 5.482085984644836e-07, "loss": 0.0274, "step": 11107 }, { "epoch": 2.87, "learning_rate": 5.461100060854818e-07, "loss": 0.0255, "step": 11108 }, { "epoch": 2.87, "learning_rate": 5.440154161577782e-07, "loss": 0.0211, "step": 11109 }, { "epoch": 2.87, "learning_rate": 5.419248288508982e-07, "loss": 0.0184, "step": 11110 }, { "epoch": 2.87, "learning_rate": 5.3983824433404e-07, "loss": 0.028, "step": 11111 }, { "epoch": 2.87, "learning_rate": 5.377556627760794e-07, "loss": 0.0324, "step": 11112 }, { "epoch": 2.87, "learning_rate": 5.356770843455594e-07, "loss": 0.0218, "step": 11113 }, { "epoch": 2.87, "learning_rate": 5.336025092107122e-07, "loss": 0.0227, "step": 11114 }, { "epoch": 2.87, "learning_rate": 5.315319375394312e-07, "loss": 0.0285, "step": 11115 }, { "epoch": 2.87, "learning_rate": 5.294653694993046e-07, "loss": 0.029, "step": 11116 }, { "epoch": 2.87, "learning_rate": 5.274028052575819e-07, "loss": 0.0236, "step": 11117 }, { "epoch": 2.87, "learning_rate": 5.253442449811851e-07, "loss": 0.0218, "step": 11118 }, { "epoch": 2.87, "learning_rate": 5.232896888367311e-07, "loss": 0.0395, "step": 11119 }, { "epoch": 2.87, "learning_rate": 5.212391369904923e-07, "loss": 0.0282, "step": 11120 }, { "epoch": 2.87, "learning_rate": 5.191925896084304e-07, "loss": 0.0256, "step": 11121 }, { "epoch": 2.87, "learning_rate": 5.171500468561796e-07, "loss": 0.0251, "step": 11122 }, { "epoch": 2.87, "learning_rate": 5.151115088990466e-07, "loss": 0.0227, "step": 11123 }, { "epoch": 2.87, "learning_rate": 5.13076975902016e-07, "loss": 0.0248, "step": 11124 }, { "epoch": 2.87, "learning_rate": 5.110464480297505e-07, "loss": 0.0337, "step": 11125 }, { "epoch": 2.87, "learning_rate": 5.090199254465855e-07, "loss": 0.0263, "step": 11126 }, { "epoch": 2.87, "learning_rate": 5.069974083165341e-07, "loss": 0.0253, "step": 11127 }, { "epoch": 2.87, "learning_rate": 5.04978896803282e-07, "loss": 0.027, "step": 11128 }, { "epoch": 2.87, "learning_rate": 5.029643910701987e-07, "loss": 0.0238, "step": 11129 }, { "epoch": 2.87, "learning_rate": 5.009538912803202e-07, "loss": 0.0294, "step": 11130 }, { "epoch": 2.87, "learning_rate": 4.989473975963666e-07, "loss": 0.0254, "step": 11131 }, { "epoch": 2.87, "learning_rate": 4.969449101807245e-07, "loss": 0.0252, "step": 11132 }, { "epoch": 2.87, "learning_rate": 4.949464291954642e-07, "loss": 0.0279, "step": 11133 }, { "epoch": 2.87, "learning_rate": 4.929519548023287e-07, "loss": 0.0259, "step": 11134 }, { "epoch": 2.87, "learning_rate": 4.909614871627332e-07, "loss": 0.0215, "step": 11135 }, { "epoch": 2.87, "learning_rate": 4.889750264377712e-07, "loss": 0.025, "step": 11136 }, { "epoch": 2.87, "learning_rate": 4.869925727882252e-07, "loss": 0.0236, "step": 11137 }, { "epoch": 2.87, "learning_rate": 4.850141263745223e-07, "loss": 0.031, "step": 11138 }, { "epoch": 2.87, "learning_rate": 4.83039687356801e-07, "loss": 0.0243, "step": 11139 }, { "epoch": 2.87, "learning_rate": 4.810692558948504e-07, "loss": 0.0274, "step": 11140 }, { "epoch": 2.88, "learning_rate": 4.791028321481428e-07, "loss": 0.0304, "step": 11141 }, { "epoch": 2.88, "learning_rate": 4.771404162758286e-07, "loss": 0.0235, "step": 11142 }, { "epoch": 2.88, "learning_rate": 4.751820084367309e-07, "loss": 0.0227, "step": 11143 }, { "epoch": 2.88, "learning_rate": 4.732276087893561e-07, "loss": 0.0314, "step": 11144 }, { "epoch": 2.88, "learning_rate": 4.712772174918723e-07, "loss": 0.0288, "step": 11145 }, { "epoch": 2.88, "learning_rate": 4.6933083470212524e-07, "loss": 0.0288, "step": 11146 }, { "epoch": 2.88, "learning_rate": 4.6738846057765575e-07, "loss": 0.0282, "step": 11147 }, { "epoch": 2.88, "learning_rate": 4.6545009527565465e-07, "loss": 0.0349, "step": 11148 }, { "epoch": 2.88, "learning_rate": 4.635157389530076e-07, "loss": 0.0346, "step": 11149 }, { "epoch": 2.88, "learning_rate": 4.6158539176626714e-07, "loss": 0.0386, "step": 11150 }, { "epoch": 2.88, "learning_rate": 4.5965905387165273e-07, "loss": 0.0227, "step": 11151 }, { "epoch": 2.88, "learning_rate": 4.577367254250786e-07, "loss": 0.0372, "step": 11152 }, { "epoch": 2.88, "learning_rate": 4.558184065821147e-07, "loss": 0.02, "step": 11153 }, { "epoch": 2.88, "learning_rate": 4.539040974980258e-07, "loss": 0.0398, "step": 11154 }, { "epoch": 2.88, "learning_rate": 4.5199379832774333e-07, "loss": 0.0219, "step": 11155 }, { "epoch": 2.88, "learning_rate": 4.500875092258661e-07, "loss": 0.0276, "step": 11156 }, { "epoch": 2.88, "learning_rate": 4.4818523034667603e-07, "loss": 0.0334, "step": 11157 }, { "epoch": 2.88, "learning_rate": 4.46286961844139e-07, "loss": 0.0249, "step": 11158 }, { "epoch": 2.88, "learning_rate": 4.4439270387187646e-07, "loss": 0.0194, "step": 11159 }, { "epoch": 2.88, "learning_rate": 4.425024565832103e-07, "loss": 0.0262, "step": 11160 }, { "epoch": 2.88, "learning_rate": 4.40616220131107e-07, "loss": 0.0236, "step": 11161 }, { "epoch": 2.88, "learning_rate": 4.3873399466823895e-07, "loss": 0.0207, "step": 11162 }, { "epoch": 2.88, "learning_rate": 4.368557803469342e-07, "loss": 0.0266, "step": 11163 }, { "epoch": 2.88, "learning_rate": 4.3498157731919896e-07, "loss": 0.0256, "step": 11164 }, { "epoch": 2.88, "learning_rate": 4.331113857367286e-07, "loss": 0.0269, "step": 11165 }, { "epoch": 2.88, "learning_rate": 4.312452057508687e-07, "loss": 0.026, "step": 11166 }, { "epoch": 2.88, "learning_rate": 4.293830375126706e-07, "loss": 0.0299, "step": 11167 }, { "epoch": 2.88, "learning_rate": 4.2752488117283604e-07, "loss": 0.0215, "step": 11168 }, { "epoch": 2.88, "learning_rate": 4.256707368817503e-07, "loss": 0.0241, "step": 11169 }, { "epoch": 2.88, "learning_rate": 4.2382060478948215e-07, "loss": 0.0235, "step": 11170 }, { "epoch": 2.88, "learning_rate": 4.219744850457563e-07, "loss": 0.0252, "step": 11171 }, { "epoch": 2.88, "learning_rate": 4.201323777999977e-07, "loss": 0.0275, "step": 11172 }, { "epoch": 2.88, "learning_rate": 4.1829428320128705e-07, "loss": 0.0206, "step": 11173 }, { "epoch": 2.88, "learning_rate": 4.164602013983887e-07, "loss": 0.0221, "step": 11174 }, { "epoch": 2.88, "learning_rate": 4.1463013253973395e-07, "loss": 0.0241, "step": 11175 }, { "epoch": 2.88, "learning_rate": 4.1280407677344866e-07, "loss": 0.0263, "step": 11176 }, { "epoch": 2.88, "learning_rate": 4.109820342473092e-07, "loss": 0.0207, "step": 11177 }, { "epoch": 2.88, "learning_rate": 4.0916400510878086e-07, "loss": 0.0315, "step": 11178 }, { "epoch": 2.88, "learning_rate": 4.0734998950500704e-07, "loss": 0.0283, "step": 11179 }, { "epoch": 2.89, "learning_rate": 4.055399875827981e-07, "loss": 0.0255, "step": 11180 }, { "epoch": 2.89, "learning_rate": 4.03733999488648e-07, "loss": 0.0224, "step": 11181 }, { "epoch": 2.89, "learning_rate": 4.01932025368712e-07, "loss": 0.0284, "step": 11182 }, { "epoch": 2.89, "learning_rate": 4.0013406536883457e-07, "loss": 0.0262, "step": 11183 }, { "epoch": 2.89, "learning_rate": 3.983401196345271e-07, "loss": 0.025, "step": 11184 }, { "epoch": 2.89, "learning_rate": 3.965501883109846e-07, "loss": 0.0258, "step": 11185 }, { "epoch": 2.89, "learning_rate": 3.9476427154306327e-07, "loss": 0.0242, "step": 11186 }, { "epoch": 2.89, "learning_rate": 3.9298236947530876e-07, "loss": 0.0267, "step": 11187 }, { "epoch": 2.89, "learning_rate": 3.9120448225193895e-07, "loss": 0.0276, "step": 11188 }, { "epoch": 2.89, "learning_rate": 3.8943061001683325e-07, "loss": 0.0215, "step": 11189 }, { "epoch": 2.89, "learning_rate": 3.876607529135712e-07, "loss": 0.0253, "step": 11190 }, { "epoch": 2.89, "learning_rate": 3.858949110853771e-07, "loss": 0.0225, "step": 11191 }, { "epoch": 2.89, "learning_rate": 3.8413308467517007e-07, "loss": 0.0262, "step": 11192 }, { "epoch": 2.89, "learning_rate": 3.823752738255471e-07, "loss": 0.0229, "step": 11193 }, { "epoch": 2.89, "learning_rate": 3.806214786787665e-07, "loss": 0.0276, "step": 11194 }, { "epoch": 2.89, "learning_rate": 3.788716993767705e-07, "loss": 0.0211, "step": 11195 }, { "epoch": 2.89, "learning_rate": 3.771259360611734e-07, "loss": 0.0242, "step": 11196 }, { "epoch": 2.89, "learning_rate": 3.7538418887326234e-07, "loss": 0.0255, "step": 11197 }, { "epoch": 2.89, "learning_rate": 3.736464579540078e-07, "loss": 0.0243, "step": 11198 }, { "epoch": 2.89, "learning_rate": 3.719127434440472e-07, "loss": 0.025, "step": 11199 }, { "epoch": 2.89, "learning_rate": 3.701830454836908e-07, "loss": 0.0258, "step": 11200 }, { "epoch": 2.89, "learning_rate": 3.6845736421293744e-07, "loss": 0.0251, "step": 11201 }, { "epoch": 2.89, "learning_rate": 3.667356997714366e-07, "loss": 0.0312, "step": 11202 }, { "epoch": 2.89, "learning_rate": 3.6501805229854915e-07, "loss": 0.0252, "step": 11203 }, { "epoch": 2.89, "learning_rate": 3.633044219332693e-07, "loss": 0.0255, "step": 11204 }, { "epoch": 2.89, "learning_rate": 3.615948088142973e-07, "loss": 0.0242, "step": 11205 }, { "epoch": 2.89, "learning_rate": 3.5988921308000023e-07, "loss": 0.024, "step": 11206 }, { "epoch": 2.89, "learning_rate": 3.58187634868401e-07, "loss": 0.0299, "step": 11207 }, { "epoch": 2.89, "learning_rate": 3.564900743172339e-07, "loss": 0.031, "step": 11208 }, { "epoch": 2.89, "learning_rate": 3.5479653156387236e-07, "loss": 0.0266, "step": 11209 }, { "epoch": 2.89, "learning_rate": 3.5310700674538456e-07, "loss": 0.0287, "step": 11210 }, { "epoch": 2.89, "learning_rate": 3.51421499998511e-07, "loss": 0.0309, "step": 11211 }, { "epoch": 2.89, "learning_rate": 3.49740011459665e-07, "loss": 0.0327, "step": 11212 }, { "epoch": 2.89, "learning_rate": 3.480625412649263e-07, "loss": 0.0254, "step": 11213 }, { "epoch": 2.89, "learning_rate": 3.4638908955006986e-07, "loss": 0.0246, "step": 11214 }, { "epoch": 2.89, "learning_rate": 3.447196564505262e-07, "loss": 0.0239, "step": 11215 }, { "epoch": 2.89, "learning_rate": 3.4305424210140935e-07, "loss": 0.0218, "step": 11216 }, { "epoch": 2.89, "learning_rate": 3.4139284663750046e-07, "loss": 0.0231, "step": 11217 }, { "epoch": 2.89, "learning_rate": 3.397354701932698e-07, "loss": 0.0252, "step": 11218 }, { "epoch": 2.9, "learning_rate": 3.380821129028489e-07, "loss": 0.0278, "step": 11219 }, { "epoch": 2.9, "learning_rate": 3.364327749000529e-07, "loss": 0.0279, "step": 11220 }, { "epoch": 2.9, "learning_rate": 3.3478745631836393e-07, "loss": 0.0261, "step": 11221 }, { "epoch": 2.9, "learning_rate": 3.3314615729094224e-07, "loss": 0.0285, "step": 11222 }, { "epoch": 2.9, "learning_rate": 3.315088779506259e-07, "loss": 0.0225, "step": 11223 }, { "epoch": 2.9, "learning_rate": 3.298756184299201e-07, "loss": 0.0293, "step": 11224 }, { "epoch": 2.9, "learning_rate": 3.282463788610135e-07, "loss": 0.0218, "step": 11225 }, { "epoch": 2.9, "learning_rate": 3.2662115937576734e-07, "loss": 0.0298, "step": 11226 }, { "epoch": 2.9, "learning_rate": 3.249999601057152e-07, "loss": 0.0289, "step": 11227 }, { "epoch": 2.9, "learning_rate": 3.2338278118205777e-07, "loss": 0.025, "step": 11228 }, { "epoch": 2.9, "learning_rate": 3.217696227356848e-07, "loss": 0.021, "step": 11229 }, { "epoch": 2.9, "learning_rate": 3.201604848971473e-07, "loss": 0.0331, "step": 11230 }, { "epoch": 2.9, "learning_rate": 3.185553677966913e-07, "loss": 0.0233, "step": 11231 }, { "epoch": 2.9, "learning_rate": 3.1695427156421264e-07, "loss": 0.0285, "step": 11232 }, { "epoch": 2.9, "learning_rate": 3.1535719632929674e-07, "loss": 0.0243, "step": 11233 }, { "epoch": 2.9, "learning_rate": 3.137641422211957e-07, "loss": 0.0266, "step": 11234 }, { "epoch": 2.9, "learning_rate": 3.121751093688452e-07, "loss": 0.0257, "step": 11235 }, { "epoch": 2.9, "learning_rate": 3.1059009790084803e-07, "loss": 0.0234, "step": 11236 }, { "epoch": 2.9, "learning_rate": 3.090091079454849e-07, "loss": 0.0302, "step": 11237 }, { "epoch": 2.9, "learning_rate": 3.074321396307034e-07, "loss": 0.0253, "step": 11238 }, { "epoch": 2.9, "learning_rate": 3.058591930841459e-07, "loss": 0.0346, "step": 11239 }, { "epoch": 2.9, "learning_rate": 3.0429026843310505e-07, "loss": 0.0293, "step": 11240 }, { "epoch": 2.9, "learning_rate": 3.0272536580455703e-07, "loss": 0.0347, "step": 11241 }, { "epoch": 2.9, "learning_rate": 3.0116448532516716e-07, "loss": 0.0292, "step": 11242 }, { "epoch": 2.9, "learning_rate": 2.9960762712124556e-07, "loss": 0.0275, "step": 11243 }, { "epoch": 2.9, "learning_rate": 2.980547913188025e-07, "loss": 0.0307, "step": 11244 }, { "epoch": 2.9, "learning_rate": 2.965059780435209e-07, "loss": 0.0283, "step": 11245 }, { "epoch": 2.9, "learning_rate": 2.9496118742073366e-07, "loss": 0.021, "step": 11246 }, { "epoch": 2.9, "learning_rate": 2.9342041957547416e-07, "loss": 0.0345, "step": 11247 }, { "epoch": 2.9, "learning_rate": 2.918836746324427e-07, "loss": 0.0352, "step": 11248 }, { "epoch": 2.9, "learning_rate": 2.9035095271601756e-07, "loss": 0.0363, "step": 11249 }, { "epoch": 2.9, "learning_rate": 2.888222539502328e-07, "loss": 0.0385, "step": 11250 }, { "epoch": 2.9, "learning_rate": 2.8729757845882275e-07, "loss": 0.0131, "step": 11251 }, { "epoch": 2.9, "learning_rate": 2.8577692636517774e-07, "loss": 0.0225, "step": 11252 }, { "epoch": 2.9, "learning_rate": 2.8426029779236586e-07, "loss": 0.0187, "step": 11253 }, { "epoch": 2.9, "learning_rate": 2.827476928631445e-07, "loss": 0.0312, "step": 11254 }, { "epoch": 2.9, "learning_rate": 2.812391116999213e-07, "loss": 0.0161, "step": 11255 }, { "epoch": 2.9, "learning_rate": 2.7973455442479314e-07, "loss": 0.0237, "step": 11256 }, { "epoch": 2.91, "learning_rate": 2.782340211595291e-07, "loss": 0.03, "step": 11257 }, { "epoch": 2.91, "learning_rate": 2.7673751202557664e-07, "loss": 0.0329, "step": 11258 }, { "epoch": 2.91, "learning_rate": 2.752450271440443e-07, "loss": 0.0237, "step": 11259 }, { "epoch": 2.91, "learning_rate": 2.7375656663573e-07, "loss": 0.0214, "step": 11260 }, { "epoch": 2.91, "learning_rate": 2.722721306210985e-07, "loss": 0.0237, "step": 11261 }, { "epoch": 2.91, "learning_rate": 2.707917192202869e-07, "loss": 0.0244, "step": 11262 }, { "epoch": 2.91, "learning_rate": 2.693153325531106e-07, "loss": 0.0237, "step": 11263 }, { "epoch": 2.91, "learning_rate": 2.6784297073905173e-07, "loss": 0.0319, "step": 11264 }, { "epoch": 2.91, "learning_rate": 2.6637463389728725e-07, "loss": 0.0256, "step": 11265 }, { "epoch": 2.91, "learning_rate": 2.6491032214663867e-07, "loss": 0.0241, "step": 11266 }, { "epoch": 2.91, "learning_rate": 2.6345003560563355e-07, "loss": 0.0273, "step": 11267 }, { "epoch": 2.91, "learning_rate": 2.619937743924439e-07, "loss": 0.0253, "step": 11268 }, { "epoch": 2.91, "learning_rate": 2.605415386249255e-07, "loss": 0.0257, "step": 11269 }, { "epoch": 2.91, "learning_rate": 2.590933284206287e-07, "loss": 0.0284, "step": 11270 }, { "epoch": 2.91, "learning_rate": 2.5764914389674875e-07, "loss": 0.0267, "step": 11271 }, { "epoch": 2.91, "learning_rate": 2.5620898517017544e-07, "loss": 0.0222, "step": 11272 }, { "epoch": 2.91, "learning_rate": 2.5477285235745994e-07, "loss": 0.0299, "step": 11273 }, { "epoch": 2.91, "learning_rate": 2.5334074557483713e-07, "loss": 0.0244, "step": 11274 }, { "epoch": 2.91, "learning_rate": 2.519126649382031e-07, "loss": 0.0292, "step": 11275 }, { "epoch": 2.91, "learning_rate": 2.5048861056314875e-07, "loss": 0.0223, "step": 11276 }, { "epoch": 2.91, "learning_rate": 2.4906858256491527e-07, "loss": 0.0217, "step": 11277 }, { "epoch": 2.91, "learning_rate": 2.4765258105843846e-07, "loss": 0.0202, "step": 11278 }, { "epoch": 2.91, "learning_rate": 2.462406061583156e-07, "loss": 0.0253, "step": 11279 }, { "epoch": 2.91, "learning_rate": 2.448326579788218e-07, "loss": 0.0261, "step": 11280 }, { "epoch": 2.91, "learning_rate": 2.4342873663391053e-07, "loss": 0.0215, "step": 11281 }, { "epoch": 2.91, "learning_rate": 2.4202884223720193e-07, "loss": 0.025, "step": 11282 }, { "epoch": 2.91, "learning_rate": 2.406329749019942e-07, "loss": 0.0322, "step": 11283 }, { "epoch": 2.91, "learning_rate": 2.3924113474125267e-07, "loss": 0.0293, "step": 11284 }, { "epoch": 2.91, "learning_rate": 2.3785332186763708e-07, "loss": 0.0209, "step": 11285 }, { "epoch": 2.91, "learning_rate": 2.364695363934577e-07, "loss": 0.0324, "step": 11286 }, { "epoch": 2.91, "learning_rate": 2.3508977843070822e-07, "loss": 0.0288, "step": 11287 }, { "epoch": 2.91, "learning_rate": 2.3371404809106045e-07, "loss": 0.022, "step": 11288 }, { "epoch": 2.91, "learning_rate": 2.3234234548585864e-07, "loss": 0.0279, "step": 11289 }, { "epoch": 2.91, "learning_rate": 2.3097467072610847e-07, "loss": 0.0194, "step": 11290 }, { "epoch": 2.91, "learning_rate": 2.296110239225102e-07, "loss": 0.021, "step": 11291 }, { "epoch": 2.91, "learning_rate": 2.282514051854201e-07, "loss": 0.0254, "step": 11292 }, { "epoch": 2.91, "learning_rate": 2.268958146248834e-07, "loss": 0.0251, "step": 11293 }, { "epoch": 2.91, "learning_rate": 2.2554425235060683e-07, "loss": 0.0277, "step": 11294 }, { "epoch": 2.91, "learning_rate": 2.2419671847198064e-07, "loss": 0.026, "step": 11295 }, { "epoch": 2.92, "learning_rate": 2.2285321309806206e-07, "loss": 0.0226, "step": 11296 }, { "epoch": 2.92, "learning_rate": 2.2151373633758078e-07, "loss": 0.0294, "step": 11297 }, { "epoch": 2.92, "learning_rate": 2.2017828829895558e-07, "loss": 0.0246, "step": 11298 }, { "epoch": 2.92, "learning_rate": 2.1884686909025565e-07, "loss": 0.0304, "step": 11299 }, { "epoch": 2.92, "learning_rate": 2.1751947881924473e-07, "loss": 0.0335, "step": 11300 }, { "epoch": 2.92, "learning_rate": 2.1619611759335355e-07, "loss": 0.0248, "step": 11301 }, { "epoch": 2.92, "learning_rate": 2.1487678551968538e-07, "loss": 0.0329, "step": 11302 }, { "epoch": 2.92, "learning_rate": 2.1356148270501031e-07, "loss": 0.0202, "step": 11303 }, { "epoch": 2.92, "learning_rate": 2.1225020925578765e-07, "loss": 0.0247, "step": 11304 }, { "epoch": 2.92, "learning_rate": 2.109429652781325e-07, "loss": 0.0279, "step": 11305 }, { "epoch": 2.92, "learning_rate": 2.0963975087786024e-07, "loss": 0.0295, "step": 11306 }, { "epoch": 2.92, "learning_rate": 2.0834056616043097e-07, "loss": 0.0262, "step": 11307 }, { "epoch": 2.92, "learning_rate": 2.0704541123099385e-07, "loss": 0.0234, "step": 11308 }, { "epoch": 2.92, "learning_rate": 2.057542861943762e-07, "loss": 0.0211, "step": 11309 }, { "epoch": 2.92, "learning_rate": 2.0446719115506109e-07, "loss": 0.0271, "step": 11310 }, { "epoch": 2.92, "learning_rate": 2.0318412621723183e-07, "loss": 0.0224, "step": 11311 }, { "epoch": 2.92, "learning_rate": 2.0190509148471648e-07, "loss": 0.0254, "step": 11312 }, { "epoch": 2.92, "learning_rate": 2.0063008706103782e-07, "loss": 0.0329, "step": 11313 }, { "epoch": 2.92, "learning_rate": 1.9935911304938548e-07, "loss": 0.0242, "step": 11314 }, { "epoch": 2.92, "learning_rate": 1.9809216955262722e-07, "loss": 0.0206, "step": 11315 }, { "epoch": 2.92, "learning_rate": 1.9682925667329211e-07, "loss": 0.033, "step": 11316 }, { "epoch": 2.92, "learning_rate": 1.9557037451359285e-07, "loss": 0.0263, "step": 11317 }, { "epoch": 2.92, "learning_rate": 1.9431552317542012e-07, "loss": 0.019, "step": 11318 }, { "epoch": 2.92, "learning_rate": 1.9306470276033163e-07, "loss": 0.0272, "step": 11319 }, { "epoch": 2.92, "learning_rate": 1.9181791336955746e-07, "loss": 0.0283, "step": 11320 }, { "epoch": 2.92, "learning_rate": 1.9057515510400027e-07, "loss": 0.027, "step": 11321 }, { "epoch": 2.92, "learning_rate": 1.8933642806425178e-07, "loss": 0.0287, "step": 11322 }, { "epoch": 2.92, "learning_rate": 1.8810173235055407e-07, "loss": 0.0269, "step": 11323 }, { "epoch": 2.92, "learning_rate": 1.8687106806283827e-07, "loss": 0.0269, "step": 11324 }, { "epoch": 2.92, "learning_rate": 1.8564443530071362e-07, "loss": 0.0267, "step": 11325 }, { "epoch": 2.92, "learning_rate": 1.8442183416344516e-07, "loss": 0.0283, "step": 11326 }, { "epoch": 2.92, "learning_rate": 1.832032647499815e-07, "loss": 0.0241, "step": 11327 }, { "epoch": 2.92, "learning_rate": 1.8198872715895488e-07, "loss": 0.0238, "step": 11328 }, { "epoch": 2.92, "learning_rate": 1.8077822148864776e-07, "loss": 0.0248, "step": 11329 }, { "epoch": 2.92, "learning_rate": 1.7957174783704288e-07, "loss": 0.027, "step": 11330 }, { "epoch": 2.92, "learning_rate": 1.7836930630177883e-07, "loss": 0.0282, "step": 11331 }, { "epoch": 2.92, "learning_rate": 1.771708969801722e-07, "loss": 0.0271, "step": 11332 }, { "epoch": 2.92, "learning_rate": 1.7597651996921204e-07, "loss": 0.0259, "step": 11333 }, { "epoch": 2.92, "learning_rate": 1.747861753655655e-07, "loss": 0.0313, "step": 11334 }, { "epoch": 2.93, "learning_rate": 1.7359986326556667e-07, "loss": 0.0341, "step": 11335 }, { "epoch": 2.93, "learning_rate": 1.724175837652331e-07, "loss": 0.0304, "step": 11336 }, { "epoch": 2.93, "learning_rate": 1.712393369602494e-07, "loss": 0.0309, "step": 11337 }, { "epoch": 2.93, "learning_rate": 1.7006512294597264e-07, "loss": 0.0331, "step": 11338 }, { "epoch": 2.93, "learning_rate": 1.6889494181743794e-07, "loss": 0.0323, "step": 11339 }, { "epoch": 2.93, "learning_rate": 1.6772879366934724e-07, "loss": 0.0289, "step": 11340 }, { "epoch": 2.93, "learning_rate": 1.6656667859608065e-07, "loss": 0.0301, "step": 11341 }, { "epoch": 2.93, "learning_rate": 1.6540859669169628e-07, "loss": 0.0275, "step": 11342 }, { "epoch": 2.93, "learning_rate": 1.6425454804991913e-07, "loss": 0.0272, "step": 11343 }, { "epoch": 2.93, "learning_rate": 1.6310453276414672e-07, "loss": 0.0294, "step": 11344 }, { "epoch": 2.93, "learning_rate": 1.619585509274546e-07, "loss": 0.0277, "step": 11345 }, { "epoch": 2.93, "learning_rate": 1.6081660263259635e-07, "loss": 0.0341, "step": 11346 }, { "epoch": 2.93, "learning_rate": 1.5967868797198138e-07, "loss": 0.0279, "step": 11347 }, { "epoch": 2.93, "learning_rate": 1.5854480703771934e-07, "loss": 0.0296, "step": 11348 }, { "epoch": 2.93, "learning_rate": 1.5741495992156464e-07, "loss": 0.0334, "step": 11349 }, { "epoch": 2.93, "learning_rate": 1.5628914671497186e-07, "loss": 0.0321, "step": 11350 }, { "epoch": 2.93, "learning_rate": 1.5516736750904593e-07, "loss": 0.0181, "step": 11351 }, { "epoch": 2.93, "learning_rate": 1.5404962239458086e-07, "loss": 0.0328, "step": 11352 }, { "epoch": 2.93, "learning_rate": 1.5293591146203766e-07, "loss": 0.0215, "step": 11353 }, { "epoch": 2.93, "learning_rate": 1.518262348015498e-07, "loss": 0.0218, "step": 11354 }, { "epoch": 2.93, "learning_rate": 1.5072059250293425e-07, "loss": 0.0228, "step": 11355 }, { "epoch": 2.93, "learning_rate": 1.4961898465566947e-07, "loss": 0.0301, "step": 11356 }, { "epoch": 2.93, "learning_rate": 1.4852141134890642e-07, "loss": 0.0243, "step": 11357 }, { "epoch": 2.93, "learning_rate": 1.4742787267148505e-07, "loss": 0.0302, "step": 11358 }, { "epoch": 2.93, "learning_rate": 1.4633836871190687e-07, "loss": 0.0187, "step": 11359 }, { "epoch": 2.93, "learning_rate": 1.452528995583402e-07, "loss": 0.0222, "step": 11360 }, { "epoch": 2.93, "learning_rate": 1.4417146529864256e-07, "loss": 0.0239, "step": 11361 }, { "epoch": 2.93, "learning_rate": 1.430940660203328e-07, "loss": 0.0264, "step": 11362 }, { "epoch": 2.93, "learning_rate": 1.42020701810619e-07, "loss": 0.0214, "step": 11363 }, { "epoch": 2.93, "learning_rate": 1.409513727563594e-07, "loss": 0.0223, "step": 11364 }, { "epoch": 2.93, "learning_rate": 1.3988607894410145e-07, "loss": 0.0188, "step": 11365 }, { "epoch": 2.93, "learning_rate": 1.3882482046006505e-07, "loss": 0.0219, "step": 11366 }, { "epoch": 2.93, "learning_rate": 1.3776759739014267e-07, "loss": 0.0197, "step": 11367 }, { "epoch": 2.93, "learning_rate": 1.3671440981989358e-07, "loss": 0.0248, "step": 11368 }, { "epoch": 2.93, "learning_rate": 1.356652578345552e-07, "loss": 0.0222, "step": 11369 }, { "epoch": 2.93, "learning_rate": 1.346201415190429e-07, "loss": 0.0234, "step": 11370 }, { "epoch": 2.93, "learning_rate": 1.3357906095793903e-07, "loss": 0.0294, "step": 11371 }, { "epoch": 2.93, "learning_rate": 1.3254201623550399e-07, "loss": 0.0269, "step": 11372 }, { "epoch": 2.93, "learning_rate": 1.3150900743566509e-07, "loss": 0.0191, "step": 11373 }, { "epoch": 2.94, "learning_rate": 1.3048003464202207e-07, "loss": 0.0306, "step": 11374 }, { "epoch": 2.94, "learning_rate": 1.2945509793786391e-07, "loss": 0.0232, "step": 11375 }, { "epoch": 2.94, "learning_rate": 1.2843419740613538e-07, "loss": 0.0223, "step": 11376 }, { "epoch": 2.94, "learning_rate": 1.274173331294648e-07, "loss": 0.0233, "step": 11377 }, { "epoch": 2.94, "learning_rate": 1.264045051901419e-07, "loss": 0.0167, "step": 11378 }, { "epoch": 2.94, "learning_rate": 1.2539571367015112e-07, "loss": 0.0277, "step": 11379 }, { "epoch": 2.94, "learning_rate": 1.243909586511216e-07, "loss": 0.0297, "step": 11380 }, { "epoch": 2.94, "learning_rate": 1.2339024021438273e-07, "loss": 0.0219, "step": 11381 }, { "epoch": 2.94, "learning_rate": 1.223935584409197e-07, "loss": 0.0244, "step": 11382 }, { "epoch": 2.94, "learning_rate": 1.2140091341140135e-07, "loss": 0.0254, "step": 11383 }, { "epoch": 2.94, "learning_rate": 1.2041230520616343e-07, "loss": 0.0259, "step": 11384 }, { "epoch": 2.94, "learning_rate": 1.1942773390521412e-07, "loss": 0.026, "step": 11385 }, { "epoch": 2.94, "learning_rate": 1.1844719958823968e-07, "loss": 0.0216, "step": 11386 }, { "epoch": 2.94, "learning_rate": 1.1747070233460444e-07, "loss": 0.0301, "step": 11387 }, { "epoch": 2.94, "learning_rate": 1.1649824222332296e-07, "loss": 0.0298, "step": 11388 }, { "epoch": 2.94, "learning_rate": 1.1552981933311558e-07, "loss": 0.0288, "step": 11389 }, { "epoch": 2.94, "learning_rate": 1.1456543374235295e-07, "loss": 0.0291, "step": 11390 }, { "epoch": 2.94, "learning_rate": 1.1360508552908933e-07, "loss": 0.0284, "step": 11391 }, { "epoch": 2.94, "learning_rate": 1.126487747710403e-07, "loss": 0.0197, "step": 11392 }, { "epoch": 2.94, "learning_rate": 1.1169650154561062e-07, "loss": 0.025, "step": 11393 }, { "epoch": 2.94, "learning_rate": 1.1074826592986642e-07, "loss": 0.0405, "step": 11394 }, { "epoch": 2.94, "learning_rate": 1.0980406800055187e-07, "loss": 0.0308, "step": 11395 }, { "epoch": 2.94, "learning_rate": 1.0886390783408918e-07, "loss": 0.0219, "step": 11396 }, { "epoch": 2.94, "learning_rate": 1.0792778550656191e-07, "loss": 0.0326, "step": 11397 }, { "epoch": 2.94, "learning_rate": 1.0699570109373725e-07, "loss": 0.0234, "step": 11398 }, { "epoch": 2.94, "learning_rate": 1.0606765467104374e-07, "loss": 0.0281, "step": 11399 }, { "epoch": 2.94, "learning_rate": 1.0514364631359908e-07, "loss": 0.0261, "step": 11400 }, { "epoch": 2.94, "learning_rate": 1.0422367609618788e-07, "loss": 0.0223, "step": 11401 }, { "epoch": 2.94, "learning_rate": 1.0330774409325617e-07, "loss": 0.0179, "step": 11402 }, { "epoch": 2.94, "learning_rate": 1.0239585037894462e-07, "loss": 0.028, "step": 11403 }, { "epoch": 2.94, "learning_rate": 1.014879950270442e-07, "loss": 0.021, "step": 11404 }, { "epoch": 2.94, "learning_rate": 1.005841781110406e-07, "loss": 0.0251, "step": 11405 }, { "epoch": 2.94, "learning_rate": 9.968439970407528e-08, "loss": 0.0226, "step": 11406 }, { "epoch": 2.94, "learning_rate": 9.878865987897335e-08, "loss": 0.0227, "step": 11407 }, { "epoch": 2.94, "learning_rate": 9.78969587082268e-08, "loss": 0.0304, "step": 11408 }, { "epoch": 2.94, "learning_rate": 9.700929626400568e-08, "loss": 0.0281, "step": 11409 }, { "epoch": 2.94, "learning_rate": 9.612567261815808e-08, "loss": 0.0248, "step": 11410 }, { "epoch": 2.94, "learning_rate": 9.524608784218236e-08, "loss": 0.0317, "step": 11411 }, { "epoch": 2.95, "learning_rate": 9.437054200728268e-08, "loss": 0.0245, "step": 11412 }, { "epoch": 2.95, "learning_rate": 9.349903518430791e-08, "loss": 0.024, "step": 11413 }, { "epoch": 2.95, "learning_rate": 9.263156744379053e-08, "loss": 0.0306, "step": 11414 }, { "epoch": 2.95, "learning_rate": 9.176813885595214e-08, "loss": 0.0213, "step": 11415 }, { "epoch": 2.95, "learning_rate": 9.090874949065908e-08, "loss": 0.0259, "step": 11416 }, { "epoch": 2.95, "learning_rate": 9.005339941746682e-08, "loss": 0.0262, "step": 11417 }, { "epoch": 2.95, "learning_rate": 8.920208870560331e-08, "loss": 0.0281, "step": 11418 }, { "epoch": 2.95, "learning_rate": 8.835481742396345e-08, "loss": 0.0289, "step": 11419 }, { "epoch": 2.95, "learning_rate": 8.751158564113127e-08, "loss": 0.0277, "step": 11420 }, { "epoch": 2.95, "learning_rate": 8.667239342534106e-08, "loss": 0.025, "step": 11421 }, { "epoch": 2.95, "learning_rate": 8.583724084451072e-08, "loss": 0.0245, "step": 11422 }, { "epoch": 2.95, "learning_rate": 8.50061279662362e-08, "loss": 0.0287, "step": 11423 }, { "epoch": 2.95, "learning_rate": 8.417905485778033e-08, "loss": 0.0247, "step": 11424 }, { "epoch": 2.95, "learning_rate": 8.335602158608402e-08, "loss": 0.0276, "step": 11425 }, { "epoch": 2.95, "learning_rate": 8.253702821774955e-08, "loss": 0.0257, "step": 11426 }, { "epoch": 2.95, "learning_rate": 8.172207481906835e-08, "loss": 0.029, "step": 11427 }, { "epoch": 2.95, "learning_rate": 8.09111614559932e-08, "loss": 0.0248, "step": 11428 }, { "epoch": 2.95, "learning_rate": 8.010428819416049e-08, "loss": 0.0299, "step": 11429 }, { "epoch": 2.95, "learning_rate": 7.930145509886244e-08, "loss": 0.0266, "step": 11430 }, { "epoch": 2.95, "learning_rate": 7.85026622350804e-08, "loss": 0.0292, "step": 11431 }, { "epoch": 2.95, "learning_rate": 7.770790966746266e-08, "loss": 0.0283, "step": 11432 }, { "epoch": 2.95, "learning_rate": 7.691719746033e-08, "loss": 0.0234, "step": 11433 }, { "epoch": 2.95, "learning_rate": 7.613052567767565e-08, "loss": 0.0286, "step": 11434 }, { "epoch": 2.95, "learning_rate": 7.534789438317091e-08, "loss": 0.0256, "step": 11435 }, { "epoch": 2.95, "learning_rate": 7.456930364015402e-08, "loss": 0.0327, "step": 11436 }, { "epoch": 2.95, "learning_rate": 7.379475351164122e-08, "loss": 0.0306, "step": 11437 }, { "epoch": 2.95, "learning_rate": 7.302424406031571e-08, "loss": 0.0276, "step": 11438 }, { "epoch": 2.95, "learning_rate": 7.225777534854428e-08, "loss": 0.03, "step": 11439 }, { "epoch": 2.95, "learning_rate": 7.149534743834951e-08, "loss": 0.0251, "step": 11440 }, { "epoch": 2.95, "learning_rate": 7.073696039143762e-08, "loss": 0.0301, "step": 11441 }, { "epoch": 2.95, "learning_rate": 6.998261426919839e-08, "loss": 0.0291, "step": 11442 }, { "epoch": 2.95, "learning_rate": 6.923230913267187e-08, "loss": 0.0404, "step": 11443 }, { "epoch": 2.95, "learning_rate": 6.848604504258727e-08, "loss": 0.0391, "step": 11444 }, { "epoch": 2.95, "learning_rate": 6.774382205934071e-08, "loss": 0.0289, "step": 11445 }, { "epoch": 2.95, "learning_rate": 6.700564024300637e-08, "loss": 0.0277, "step": 11446 }, { "epoch": 2.95, "learning_rate": 6.627149965332535e-08, "loss": 0.0342, "step": 11447 }, { "epoch": 2.95, "learning_rate": 6.554140034970568e-08, "loss": 0.0335, "step": 11448 }, { "epoch": 2.95, "learning_rate": 6.481534239125009e-08, "loss": 0.0382, "step": 11449 }, { "epoch": 2.95, "learning_rate": 6.409332583671156e-08, "loss": 0.0307, "step": 11450 }, { "epoch": 2.96, "learning_rate": 6.337535074452672e-08, "loss": 0.0399, "step": 11451 }, { "epoch": 2.96, "learning_rate": 6.266141717281015e-08, "loss": 0.0275, "step": 11452 }, { "epoch": 2.96, "learning_rate": 6.195152517933234e-08, "loss": 0.0229, "step": 11453 }, { "epoch": 2.96, "learning_rate": 6.12456748215473e-08, "loss": 0.0207, "step": 11454 }, { "epoch": 2.96, "learning_rate": 6.054386615659268e-08, "loss": 0.0318, "step": 11455 }, { "epoch": 2.96, "learning_rate": 5.984609924125639e-08, "loss": 0.0317, "step": 11456 }, { "epoch": 2.96, "learning_rate": 5.9152374132021016e-08, "loss": 0.025, "step": 11457 }, { "epoch": 2.96, "learning_rate": 5.8462690885019436e-08, "loss": 0.0255, "step": 11458 }, { "epoch": 2.96, "learning_rate": 5.777704955607921e-08, "loss": 0.0235, "step": 11459 }, { "epoch": 2.96, "learning_rate": 5.709545020068929e-08, "loss": 0.0217, "step": 11460 }, { "epoch": 2.96, "learning_rate": 5.6417892874016665e-08, "loss": 0.0262, "step": 11461 }, { "epoch": 2.96, "learning_rate": 5.574437763088969e-08, "loss": 0.0237, "step": 11462 }, { "epoch": 2.96, "learning_rate": 5.507490452582587e-08, "loss": 0.0283, "step": 11463 }, { "epoch": 2.96, "learning_rate": 5.440947361299853e-08, "loss": 0.0179, "step": 11464 }, { "epoch": 2.96, "learning_rate": 5.37480849462757e-08, "loss": 0.0233, "step": 11465 }, { "epoch": 2.96, "learning_rate": 5.309073857917568e-08, "loss": 0.0228, "step": 11466 }, { "epoch": 2.96, "learning_rate": 5.2437434564900355e-08, "loss": 0.0215, "step": 11467 }, { "epoch": 2.96, "learning_rate": 5.178817295632965e-08, "loss": 0.0269, "step": 11468 }, { "epoch": 2.96, "learning_rate": 5.1142953806004867e-08, "loss": 0.033, "step": 11469 }, { "epoch": 2.96, "learning_rate": 5.0501777166150895e-08, "loss": 0.0269, "step": 11470 }, { "epoch": 2.96, "learning_rate": 4.986464308864847e-08, "loss": 0.0194, "step": 11471 }, { "epoch": 2.96, "learning_rate": 4.923155162507853e-08, "loss": 0.0318, "step": 11472 }, { "epoch": 2.96, "learning_rate": 4.860250282666678e-08, "loss": 0.0356, "step": 11473 }, { "epoch": 2.96, "learning_rate": 4.797749674432805e-08, "loss": 0.0243, "step": 11474 }, { "epoch": 2.96, "learning_rate": 4.73565334286441e-08, "loss": 0.0216, "step": 11475 }, { "epoch": 2.96, "learning_rate": 4.673961292988027e-08, "loss": 0.0286, "step": 11476 }, { "epoch": 2.96, "learning_rate": 4.6126735297952195e-08, "loss": 0.022, "step": 11477 }, { "epoch": 2.96, "learning_rate": 4.5517900582470186e-08, "loss": 0.0217, "step": 11478 }, { "epoch": 2.96, "learning_rate": 4.4913108832711494e-08, "loss": 0.0247, "step": 11479 }, { "epoch": 2.96, "learning_rate": 4.4312360097614746e-08, "loss": 0.0285, "step": 11480 }, { "epoch": 2.96, "learning_rate": 4.371565442580772e-08, "loss": 0.0258, "step": 11481 }, { "epoch": 2.96, "learning_rate": 4.31229918655851e-08, "loss": 0.0275, "step": 11482 }, { "epoch": 2.96, "learning_rate": 4.2534372464902995e-08, "loss": 0.0333, "step": 11483 }, { "epoch": 2.96, "learning_rate": 4.1949796271406604e-08, "loss": 0.0229, "step": 11484 }, { "epoch": 2.96, "learning_rate": 4.1369263332408094e-08, "loss": 0.0312, "step": 11485 }, { "epoch": 2.96, "learning_rate": 4.0792773694892096e-08, "loss": 0.0254, "step": 11486 }, { "epoch": 2.96, "learning_rate": 4.0220327405515734e-08, "loss": 0.0232, "step": 11487 }, { "epoch": 2.96, "learning_rate": 3.9651924510603067e-08, "loss": 0.0246, "step": 11488 }, { "epoch": 2.96, "learning_rate": 3.9087565056161735e-08, "loss": 0.0231, "step": 11489 }, { "epoch": 2.97, "learning_rate": 3.8527249087866316e-08, "loss": 0.0283, "step": 11490 }, { "epoch": 2.97, "learning_rate": 3.797097665106386e-08, "loss": 0.0217, "step": 11491 }, { "epoch": 2.97, "learning_rate": 3.741874779077947e-08, "loss": 0.03, "step": 11492 }, { "epoch": 2.97, "learning_rate": 3.687056255169963e-08, "loss": 0.0273, "step": 11493 }, { "epoch": 2.97, "learning_rate": 3.632642097819438e-08, "loss": 0.0251, "step": 11494 }, { "epoch": 2.97, "learning_rate": 3.5786323114306296e-08, "loss": 0.0239, "step": 11495 }, { "epoch": 2.97, "learning_rate": 3.525026900373929e-08, "loss": 0.0256, "step": 11496 }, { "epoch": 2.97, "learning_rate": 3.4718258689880876e-08, "loss": 0.0204, "step": 11497 }, { "epoch": 2.97, "learning_rate": 3.4190292215791064e-08, "loss": 0.0288, "step": 11498 }, { "epoch": 2.97, "learning_rate": 3.3666369624196784e-08, "loss": 0.0212, "step": 11499 }, { "epoch": 2.97, "learning_rate": 3.314649095750299e-08, "loss": 0.0231, "step": 11500 }, { "epoch": 2.97, "learning_rate": 3.2630656257787164e-08, "loss": 0.0328, "step": 11501 }, { "epoch": 2.97, "learning_rate": 3.211886556678812e-08, "loss": 0.027, "step": 11502 }, { "epoch": 2.97, "learning_rate": 3.1611118925933824e-08, "loss": 0.025, "step": 11503 }, { "epoch": 2.97, "learning_rate": 3.11074163763192e-08, "loss": 0.0201, "step": 11504 }, { "epoch": 2.97, "learning_rate": 3.060775795870052e-08, "loss": 0.028, "step": 11505 }, { "epoch": 2.97, "learning_rate": 3.011214371352877e-08, "loss": 0.0253, "step": 11506 }, { "epoch": 2.97, "learning_rate": 2.962057368091076e-08, "loss": 0.027, "step": 11507 }, { "epoch": 2.97, "learning_rate": 2.9133047900631315e-08, "loss": 0.0271, "step": 11508 }, { "epoch": 2.97, "learning_rate": 2.8649566412142226e-08, "loss": 0.0235, "step": 11509 }, { "epoch": 2.97, "learning_rate": 2.81701292545844e-08, "loss": 0.0299, "step": 11510 }, { "epoch": 2.97, "learning_rate": 2.7694736466749028e-08, "loss": 0.0307, "step": 11511 }, { "epoch": 2.97, "learning_rate": 2.722338808711089e-08, "loss": 0.0214, "step": 11512 }, { "epoch": 2.97, "learning_rate": 2.6756084153828355e-08, "loss": 0.0257, "step": 11513 }, { "epoch": 2.97, "learning_rate": 2.629282470471006e-08, "loss": 0.0257, "step": 11514 }, { "epoch": 2.97, "learning_rate": 2.5833609777253797e-08, "loss": 0.0232, "step": 11515 }, { "epoch": 2.97, "learning_rate": 2.5378439408629828e-08, "loss": 0.0274, "step": 11516 }, { "epoch": 2.97, "learning_rate": 2.49273136356698e-08, "loss": 0.0261, "step": 11517 }, { "epoch": 2.97, "learning_rate": 2.4480232494888955e-08, "loss": 0.0264, "step": 11518 }, { "epoch": 2.97, "learning_rate": 2.4037196022463904e-08, "loss": 0.0326, "step": 11519 }, { "epoch": 2.97, "learning_rate": 2.3598204254260404e-08, "loss": 0.0262, "step": 11520 }, { "epoch": 2.97, "learning_rate": 2.3163257225800038e-08, "loss": 0.0263, "step": 11521 }, { "epoch": 2.97, "learning_rate": 2.2732354972287982e-08, "loss": 0.0335, "step": 11522 }, { "epoch": 2.97, "learning_rate": 2.2305497528601892e-08, "loss": 0.0283, "step": 11523 }, { "epoch": 2.97, "learning_rate": 2.1882684929280806e-08, "loss": 0.0301, "step": 11524 }, { "epoch": 2.97, "learning_rate": 2.1463917208547346e-08, "loss": 0.0271, "step": 11525 }, { "epoch": 2.97, "learning_rate": 2.1049194400296625e-08, "loss": 0.0278, "step": 11526 }, { "epoch": 2.97, "learning_rate": 2.063851653808513e-08, "loss": 0.032, "step": 11527 }, { "epoch": 2.97, "learning_rate": 2.023188365516404e-08, "loss": 0.0223, "step": 11528 }, { "epoch": 2.98, "learning_rate": 1.9829295784429268e-08, "loss": 0.0232, "step": 11529 }, { "epoch": 2.98, "learning_rate": 1.943075295847141e-08, "loss": 0.0241, "step": 11530 }, { "epoch": 2.98, "learning_rate": 1.9036255209542443e-08, "loss": 0.0284, "step": 11531 }, { "epoch": 2.98, "learning_rate": 1.864580256957238e-08, "loss": 0.0321, "step": 11532 }, { "epoch": 2.98, "learning_rate": 1.825939507016372e-08, "loss": 0.0299, "step": 11533 }, { "epoch": 2.98, "learning_rate": 1.78770327425859e-08, "loss": 0.0294, "step": 11534 }, { "epoch": 2.98, "learning_rate": 1.7498715617780825e-08, "loss": 0.0262, "step": 11535 }, { "epoch": 2.98, "learning_rate": 1.7124443726374008e-08, "loss": 0.0311, "step": 11536 }, { "epoch": 2.98, "learning_rate": 1.6754217098657877e-08, "loss": 0.025, "step": 11537 }, { "epoch": 2.98, "learning_rate": 1.6388035764591802e-08, "loss": 0.0322, "step": 11538 }, { "epoch": 2.98, "learning_rate": 1.602589975380764e-08, "loss": 0.0274, "step": 11539 }, { "epoch": 2.98, "learning_rate": 1.5667809095620822e-08, "loss": 0.0301, "step": 11540 }, { "epoch": 2.98, "learning_rate": 1.5313763819008175e-08, "loss": 0.0245, "step": 11541 }, { "epoch": 2.98, "learning_rate": 1.4963763952630104e-08, "loss": 0.0299, "step": 11542 }, { "epoch": 2.98, "learning_rate": 1.4617809524808402e-08, "loss": 0.0307, "step": 11543 }, { "epoch": 2.98, "learning_rate": 1.4275900563542888e-08, "loss": 0.0279, "step": 11544 }, { "epoch": 2.98, "learning_rate": 1.3938037096505874e-08, "loss": 0.0249, "step": 11545 }, { "epoch": 2.98, "learning_rate": 1.3604219151036601e-08, "loss": 0.0354, "step": 11546 }, { "epoch": 2.98, "learning_rate": 1.3274446754157899e-08, "loss": 0.0298, "step": 11547 }, { "epoch": 2.98, "learning_rate": 1.2948719932559527e-08, "loss": 0.0265, "step": 11548 }, { "epoch": 2.98, "learning_rate": 1.2627038712603733e-08, "loss": 0.0277, "step": 11549 }, { "epoch": 2.98, "learning_rate": 1.2309403120319696e-08, "loss": 0.0229, "step": 11550 }, { "epoch": 2.98, "learning_rate": 1.1995813181420179e-08, "loss": 0.0215, "step": 11551 }, { "epoch": 2.98, "learning_rate": 1.1686268921284882e-08, "loss": 0.0268, "step": 11552 }, { "epoch": 2.98, "learning_rate": 1.1380770364960436e-08, "loss": 0.0248, "step": 11553 }, { "epoch": 2.98, "learning_rate": 1.1079317537177058e-08, "loss": 0.024, "step": 11554 }, { "epoch": 2.98, "learning_rate": 1.078191046232635e-08, "loss": 0.0183, "step": 11555 }, { "epoch": 2.98, "learning_rate": 1.0488549164489048e-08, "loss": 0.0289, "step": 11556 }, { "epoch": 2.98, "learning_rate": 1.019923366739617e-08, "loss": 0.0242, "step": 11557 }, { "epoch": 2.98, "learning_rate": 9.913963994473418e-09, "loss": 0.026, "step": 11558 }, { "epoch": 2.98, "learning_rate": 9.632740168796783e-09, "loss": 0.0207, "step": 11559 }, { "epoch": 2.98, "learning_rate": 9.355562213136937e-09, "loss": 0.0237, "step": 11560 }, { "epoch": 2.98, "learning_rate": 9.082430149925936e-09, "loss": 0.0254, "step": 11561 }, { "epoch": 2.98, "learning_rate": 8.813344001257218e-09, "loss": 0.0272, "step": 11562 }, { "epoch": 2.98, "learning_rate": 8.548303788924461e-09, "loss": 0.0216, "step": 11563 }, { "epoch": 2.98, "learning_rate": 8.287309534371624e-09, "loss": 0.0202, "step": 11564 }, { "epoch": 2.98, "learning_rate": 8.030361258715147e-09, "loss": 0.0193, "step": 11565 }, { "epoch": 2.98, "learning_rate": 7.77745898276061e-09, "loss": 0.0247, "step": 11566 }, { "epoch": 2.99, "learning_rate": 7.528602726974977e-09, "loss": 0.0266, "step": 11567 }, { "epoch": 2.99, "learning_rate": 7.283792511492138e-09, "loss": 0.0225, "step": 11568 }, { "epoch": 2.99, "learning_rate": 7.043028356129578e-09, "loss": 0.023, "step": 11569 }, { "epoch": 2.99, "learning_rate": 6.806310280371708e-09, "loss": 0.0264, "step": 11570 }, { "epoch": 2.99, "learning_rate": 6.57363830338098e-09, "loss": 0.0304, "step": 11571 }, { "epoch": 2.99, "learning_rate": 6.3450124439867755e-09, "loss": 0.0218, "step": 11572 }, { "epoch": 2.99, "learning_rate": 6.12043272069096e-09, "loss": 0.0248, "step": 11573 }, { "epoch": 2.99, "learning_rate": 5.899899151667887e-09, "loss": 0.0222, "step": 11574 }, { "epoch": 2.99, "learning_rate": 5.683411754764389e-09, "loss": 0.0289, "step": 11575 }, { "epoch": 2.99, "learning_rate": 5.470970547505339e-09, "loss": 0.0236, "step": 11576 }, { "epoch": 2.99, "learning_rate": 5.2625755470880886e-09, "loss": 0.0249, "step": 11577 }, { "epoch": 2.99, "learning_rate": 5.058226770371377e-09, "loss": 0.0224, "step": 11578 }, { "epoch": 2.99, "learning_rate": 4.857924233897526e-09, "loss": 0.0312, "step": 11579 }, { "epoch": 2.99, "learning_rate": 4.661667953875793e-09, "loss": 0.0271, "step": 11580 }, { "epoch": 2.99, "learning_rate": 4.469457946187916e-09, "loss": 0.0291, "step": 11581 }, { "epoch": 2.99, "learning_rate": 4.281294226399224e-09, "loss": 0.0225, "step": 11582 }, { "epoch": 2.99, "learning_rate": 4.0971768097253224e-09, "loss": 0.0218, "step": 11583 }, { "epoch": 2.99, "learning_rate": 3.917105711076508e-09, "loss": 0.0232, "step": 11584 }, { "epoch": 2.99, "learning_rate": 3.741080945024455e-09, "loss": 0.0271, "step": 11585 }, { "epoch": 2.99, "learning_rate": 3.5691025258133282e-09, "loss": 0.0227, "step": 11586 }, { "epoch": 2.99, "learning_rate": 3.401170467365322e-09, "loss": 0.0293, "step": 11587 }, { "epoch": 2.99, "learning_rate": 3.2372847832695674e-09, "loss": 0.0273, "step": 11588 }, { "epoch": 2.99, "learning_rate": 3.077445486787678e-09, "loss": 0.0288, "step": 11589 }, { "epoch": 2.99, "learning_rate": 2.921652590859303e-09, "loss": 0.0247, "step": 11590 }, { "epoch": 2.99, "learning_rate": 2.769906108091025e-09, "loss": 0.0242, "step": 11591 }, { "epoch": 2.99, "learning_rate": 2.6222060507674617e-09, "loss": 0.0247, "step": 11592 }, { "epoch": 2.99, "learning_rate": 2.478552430840164e-09, "loss": 0.0264, "step": 11593 }, { "epoch": 2.99, "learning_rate": 2.3389452599331676e-09, "loss": 0.022, "step": 11594 }, { "epoch": 2.99, "learning_rate": 2.2033845493485416e-09, "loss": 0.0236, "step": 11595 }, { "epoch": 2.99, "learning_rate": 2.0718703100608418e-09, "loss": 0.0254, "step": 11596 }, { "epoch": 2.99, "learning_rate": 1.944402552706004e-09, "loss": 0.0227, "step": 11597 }, { "epoch": 2.99, "learning_rate": 1.8209812876035515e-09, "loss": 0.0285, "step": 11598 }, { "epoch": 2.99, "learning_rate": 1.7016065247399404e-09, "loss": 0.0271, "step": 11599 }, { "epoch": 2.99, "learning_rate": 1.5862782737852134e-09, "loss": 0.0315, "step": 11600 }, { "epoch": 2.99, "learning_rate": 1.4749965440652435e-09, "loss": 0.0272, "step": 11601 }, { "epoch": 2.99, "learning_rate": 1.367761344589491e-09, "loss": 0.0216, "step": 11602 }, { "epoch": 2.99, "learning_rate": 1.2645726840343485e-09, "loss": 0.0302, "step": 11603 }, { "epoch": 2.99, "learning_rate": 1.1654305707542446e-09, "loss": 0.0235, "step": 11604 }, { "epoch": 2.99, "learning_rate": 1.0703350127705403e-09, "loss": 0.0263, "step": 11605 }, { "epoch": 3.0, "learning_rate": 9.792860177826324e-10, "loss": 0.0273, "step": 11606 }, { "epoch": 3.0, "learning_rate": 8.922835931512997e-10, "loss": 0.0302, "step": 11607 }, { "epoch": 3.0, "learning_rate": 8.093277459320092e-10, "loss": 0.0269, "step": 11608 }, { "epoch": 3.0, "learning_rate": 7.304184828249571e-10, "loss": 0.0302, "step": 11609 }, { "epoch": 3.0, "learning_rate": 6.555558102250281e-10, "loss": 0.0274, "step": 11610 }, { "epoch": 3.0, "learning_rate": 5.847397341884886e-10, "loss": 0.0269, "step": 11611 }, { "epoch": 3.0, "learning_rate": 5.179702604440894e-10, "loss": 0.0272, "step": 11612 }, { "epoch": 3.0, "learning_rate": 4.55247394404168e-10, "loss": 0.0291, "step": 11613 }, { "epoch": 3.0, "learning_rate": 3.965711411313411e-10, "loss": 0.0281, "step": 11614 }, { "epoch": 3.0, "learning_rate": 3.4194150538846557e-10, "loss": 0.0335, "step": 11615 }, { "epoch": 3.0, "learning_rate": 2.9135849158867803e-10, "loss": 0.0278, "step": 11616 }, { "epoch": 3.0, "learning_rate": 2.448221038231502e-10, "loss": 0.024, "step": 11617 }, { "epoch": 3.0, "learning_rate": 2.023323458666404e-10, "loss": 0.0279, "step": 11618 }, { "epoch": 3.0, "learning_rate": 1.6388922115528894e-10, "loss": 0.0257, "step": 11619 }, { "epoch": 3.0, "learning_rate": 1.2949273279772022e-10, "loss": 0.0314, "step": 11620 }, { "epoch": 3.0, "learning_rate": 9.914288357504298e-11, "loss": 0.0327, "step": 11621 }, { "epoch": 3.0, "learning_rate": 7.28396759519523e-11, "loss": 0.0283, "step": 11622 }, { "epoch": 3.0, "learning_rate": 5.058311205452526e-11, "loss": 0.0311, "step": 11623 }, { "epoch": 3.0, "learning_rate": 3.237319368132319e-11, "loss": 0.0308, "step": 11624 }, { "epoch": 3.0, "learning_rate": 1.820992230339158e-11, "loss": 0.0355, "step": 11625 } ], "logging_steps": 1, "max_steps": 11625, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.799596171454382e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }