{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7378, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.00900900900901e-08, "loss": 1.5329, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.801801801801802e-07, "loss": 1.4796, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.702702702702703e-07, "loss": 1.5099, "step": 3 }, { "epoch": 0.0, "learning_rate": 3.603603603603604e-07, "loss": 1.5197, "step": 4 }, { "epoch": 0.0, "learning_rate": 4.504504504504505e-07, "loss": 1.5587, "step": 5 }, { "epoch": 0.0, "learning_rate": 5.405405405405406e-07, "loss": 1.4855, "step": 6 }, { "epoch": 0.0, "learning_rate": 6.306306306306306e-07, "loss": 1.5762, "step": 7 }, { "epoch": 0.0, "learning_rate": 7.207207207207208e-07, "loss": 1.5567, "step": 8 }, { "epoch": 0.0, "learning_rate": 8.108108108108109e-07, "loss": 1.5344, "step": 9 }, { "epoch": 0.0, "learning_rate": 9.00900900900901e-07, "loss": 1.478, "step": 10 }, { "epoch": 0.0, "learning_rate": 9.909909909909911e-07, "loss": 1.5364, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.0810810810810812e-06, "loss": 1.4757, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.1711711711711712e-06, "loss": 1.5433, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.2612612612612613e-06, "loss": 1.5281, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.3513513513513515e-06, "loss": 1.525, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.4414414414414416e-06, "loss": 1.3948, "step": 16 }, { "epoch": 0.0, "learning_rate": 1.5315315315315316e-06, "loss": 1.4453, "step": 17 }, { "epoch": 0.0, "learning_rate": 1.6216216216216219e-06, "loss": 1.307, "step": 18 }, { "epoch": 0.0, "learning_rate": 1.711711711711712e-06, "loss": 1.4192, "step": 19 }, { "epoch": 0.0, "learning_rate": 1.801801801801802e-06, "loss": 1.3362, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.8918918918918922e-06, "loss": 1.2985, "step": 21 }, { "epoch": 0.0, "learning_rate": 1.9819819819819822e-06, "loss": 1.3883, "step": 22 }, { "epoch": 0.0, "learning_rate": 2.0720720720720723e-06, "loss": 1.3485, "step": 23 }, { "epoch": 0.0, "learning_rate": 2.1621621621621623e-06, "loss": 1.2902, "step": 24 }, { "epoch": 0.0, "learning_rate": 2.2522522522522524e-06, "loss": 1.2428, "step": 25 }, { "epoch": 0.0, "learning_rate": 2.3423423423423424e-06, "loss": 1.3008, "step": 26 }, { "epoch": 0.0, "learning_rate": 2.432432432432433e-06, "loss": 1.1959, "step": 27 }, { "epoch": 0.0, "learning_rate": 2.5225225225225225e-06, "loss": 1.2508, "step": 28 }, { "epoch": 0.0, "learning_rate": 2.612612612612613e-06, "loss": 1.2069, "step": 29 }, { "epoch": 0.0, "learning_rate": 2.702702702702703e-06, "loss": 1.223, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.7927927927927926e-06, "loss": 1.2771, "step": 31 }, { "epoch": 0.0, "learning_rate": 2.882882882882883e-06, "loss": 1.3161, "step": 32 }, { "epoch": 0.0, "learning_rate": 2.9729729729729736e-06, "loss": 1.2437, "step": 33 }, { "epoch": 0.0, "learning_rate": 3.063063063063063e-06, "loss": 1.2238, "step": 34 }, { "epoch": 0.0, "learning_rate": 3.1531531531531532e-06, "loss": 1.2664, "step": 35 }, { "epoch": 0.0, "learning_rate": 3.2432432432432437e-06, "loss": 1.231, "step": 36 }, { "epoch": 0.01, "learning_rate": 3.3333333333333333e-06, "loss": 1.2872, "step": 37 }, { "epoch": 0.01, "learning_rate": 3.423423423423424e-06, "loss": 1.223, "step": 38 }, { "epoch": 0.01, "learning_rate": 3.513513513513514e-06, "loss": 1.1799, "step": 39 }, { "epoch": 0.01, "learning_rate": 3.603603603603604e-06, "loss": 1.2199, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.693693693693694e-06, "loss": 1.2544, "step": 41 }, { "epoch": 0.01, "learning_rate": 3.7837837837837844e-06, "loss": 1.1518, "step": 42 }, { "epoch": 0.01, "learning_rate": 3.8738738738738744e-06, "loss": 1.2487, "step": 43 }, { "epoch": 0.01, "learning_rate": 3.9639639639639645e-06, "loss": 1.207, "step": 44 }, { "epoch": 0.01, "learning_rate": 4.0540540540540545e-06, "loss": 1.176, "step": 45 }, { "epoch": 0.01, "learning_rate": 4.1441441441441446e-06, "loss": 1.17, "step": 46 }, { "epoch": 0.01, "learning_rate": 4.234234234234235e-06, "loss": 1.2495, "step": 47 }, { "epoch": 0.01, "learning_rate": 4.324324324324325e-06, "loss": 1.1125, "step": 48 }, { "epoch": 0.01, "learning_rate": 4.414414414414415e-06, "loss": 1.1242, "step": 49 }, { "epoch": 0.01, "learning_rate": 4.504504504504505e-06, "loss": 1.1376, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.594594594594596e-06, "loss": 1.1772, "step": 51 }, { "epoch": 0.01, "learning_rate": 4.684684684684685e-06, "loss": 1.1064, "step": 52 }, { "epoch": 0.01, "learning_rate": 4.774774774774775e-06, "loss": 1.2259, "step": 53 }, { "epoch": 0.01, "learning_rate": 4.864864864864866e-06, "loss": 1.1684, "step": 54 }, { "epoch": 0.01, "learning_rate": 4.954954954954955e-06, "loss": 1.1131, "step": 55 }, { "epoch": 0.01, "learning_rate": 5.045045045045045e-06, "loss": 1.1551, "step": 56 }, { "epoch": 0.01, "learning_rate": 5.135135135135135e-06, "loss": 1.174, "step": 57 }, { "epoch": 0.01, "learning_rate": 5.225225225225226e-06, "loss": 1.1585, "step": 58 }, { "epoch": 0.01, "learning_rate": 5.315315315315316e-06, "loss": 1.1465, "step": 59 }, { "epoch": 0.01, "learning_rate": 5.405405405405406e-06, "loss": 1.1368, "step": 60 }, { "epoch": 0.01, "learning_rate": 5.495495495495496e-06, "loss": 1.1044, "step": 61 }, { "epoch": 0.01, "learning_rate": 5.585585585585585e-06, "loss": 1.1823, "step": 62 }, { "epoch": 0.01, "learning_rate": 5.675675675675676e-06, "loss": 1.1132, "step": 63 }, { "epoch": 0.01, "learning_rate": 5.765765765765766e-06, "loss": 1.1564, "step": 64 }, { "epoch": 0.01, "learning_rate": 5.855855855855856e-06, "loss": 1.2014, "step": 65 }, { "epoch": 0.01, "learning_rate": 5.945945945945947e-06, "loss": 1.1205, "step": 66 }, { "epoch": 0.01, "learning_rate": 6.036036036036037e-06, "loss": 1.1792, "step": 67 }, { "epoch": 0.01, "learning_rate": 6.126126126126126e-06, "loss": 1.1222, "step": 68 }, { "epoch": 0.01, "learning_rate": 6.2162162162162164e-06, "loss": 1.1388, "step": 69 }, { "epoch": 0.01, "learning_rate": 6.3063063063063065e-06, "loss": 1.1192, "step": 70 }, { "epoch": 0.01, "learning_rate": 6.396396396396397e-06, "loss": 1.0475, "step": 71 }, { "epoch": 0.01, "learning_rate": 6.486486486486487e-06, "loss": 1.054, "step": 72 }, { "epoch": 0.01, "learning_rate": 6.5765765765765775e-06, "loss": 1.137, "step": 73 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-06, "loss": 1.1725, "step": 74 }, { "epoch": 0.01, "learning_rate": 6.7567567567567575e-06, "loss": 1.1458, "step": 75 }, { "epoch": 0.01, "learning_rate": 6.846846846846848e-06, "loss": 1.1548, "step": 76 }, { "epoch": 0.01, "learning_rate": 6.936936936936938e-06, "loss": 1.088, "step": 77 }, { "epoch": 0.01, "learning_rate": 7.027027027027028e-06, "loss": 1.1419, "step": 78 }, { "epoch": 0.01, "learning_rate": 7.117117117117117e-06, "loss": 1.2025, "step": 79 }, { "epoch": 0.01, "learning_rate": 7.207207207207208e-06, "loss": 1.1473, "step": 80 }, { "epoch": 0.01, "learning_rate": 7.297297297297298e-06, "loss": 1.1163, "step": 81 }, { "epoch": 0.01, "learning_rate": 7.387387387387388e-06, "loss": 1.1433, "step": 82 }, { "epoch": 0.01, "learning_rate": 7.477477477477479e-06, "loss": 1.074, "step": 83 }, { "epoch": 0.01, "learning_rate": 7.567567567567569e-06, "loss": 1.1515, "step": 84 }, { "epoch": 0.01, "learning_rate": 7.657657657657658e-06, "loss": 1.1138, "step": 85 }, { "epoch": 0.01, "learning_rate": 7.747747747747749e-06, "loss": 1.0936, "step": 86 }, { "epoch": 0.01, "learning_rate": 7.837837837837838e-06, "loss": 1.0827, "step": 87 }, { "epoch": 0.01, "learning_rate": 7.927927927927929e-06, "loss": 1.0834, "step": 88 }, { "epoch": 0.01, "learning_rate": 8.018018018018018e-06, "loss": 1.0508, "step": 89 }, { "epoch": 0.01, "learning_rate": 8.108108108108109e-06, "loss": 1.1186, "step": 90 }, { "epoch": 0.01, "learning_rate": 8.198198198198198e-06, "loss": 1.1309, "step": 91 }, { "epoch": 0.01, "learning_rate": 8.288288288288289e-06, "loss": 1.1148, "step": 92 }, { "epoch": 0.01, "learning_rate": 8.378378378378378e-06, "loss": 1.0856, "step": 93 }, { "epoch": 0.01, "learning_rate": 8.46846846846847e-06, "loss": 1.1282, "step": 94 }, { "epoch": 0.01, "learning_rate": 8.55855855855856e-06, "loss": 1.0472, "step": 95 }, { "epoch": 0.01, "learning_rate": 8.64864864864865e-06, "loss": 1.0974, "step": 96 }, { "epoch": 0.01, "learning_rate": 8.738738738738739e-06, "loss": 1.1473, "step": 97 }, { "epoch": 0.01, "learning_rate": 8.82882882882883e-06, "loss": 1.1367, "step": 98 }, { "epoch": 0.01, "learning_rate": 8.91891891891892e-06, "loss": 1.0934, "step": 99 }, { "epoch": 0.01, "learning_rate": 9.00900900900901e-06, "loss": 1.0467, "step": 100 }, { "epoch": 0.01, "learning_rate": 9.0990990990991e-06, "loss": 1.097, "step": 101 }, { "epoch": 0.01, "learning_rate": 9.189189189189191e-06, "loss": 1.1488, "step": 102 }, { "epoch": 0.01, "learning_rate": 9.27927927927928e-06, "loss": 1.0601, "step": 103 }, { "epoch": 0.01, "learning_rate": 9.36936936936937e-06, "loss": 1.0515, "step": 104 }, { "epoch": 0.01, "learning_rate": 9.45945945945946e-06, "loss": 1.0913, "step": 105 }, { "epoch": 0.01, "learning_rate": 9.54954954954955e-06, "loss": 1.0708, "step": 106 }, { "epoch": 0.01, "learning_rate": 9.63963963963964e-06, "loss": 1.1234, "step": 107 }, { "epoch": 0.01, "learning_rate": 9.729729729729732e-06, "loss": 1.1122, "step": 108 }, { "epoch": 0.01, "learning_rate": 9.81981981981982e-06, "loss": 1.1105, "step": 109 }, { "epoch": 0.01, "learning_rate": 9.90990990990991e-06, "loss": 1.0641, "step": 110 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 1.0876, "step": 111 }, { "epoch": 0.02, "learning_rate": 1.009009009009009e-05, "loss": 1.1106, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.0180180180180181e-05, "loss": 1.0211, "step": 113 }, { "epoch": 0.02, "learning_rate": 1.027027027027027e-05, "loss": 1.079, "step": 114 }, { "epoch": 0.02, "learning_rate": 1.0360360360360363e-05, "loss": 1.0706, "step": 115 }, { "epoch": 0.02, "learning_rate": 1.0450450450450452e-05, "loss": 1.1284, "step": 116 }, { "epoch": 0.02, "learning_rate": 1.0540540540540541e-05, "loss": 1.0729, "step": 117 }, { "epoch": 0.02, "learning_rate": 1.0630630630630632e-05, "loss": 1.0762, "step": 118 }, { "epoch": 0.02, "learning_rate": 1.0720720720720721e-05, "loss": 1.0784, "step": 119 }, { "epoch": 0.02, "learning_rate": 1.0810810810810812e-05, "loss": 1.0528, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.0900900900900901e-05, "loss": 1.1541, "step": 121 }, { "epoch": 0.02, "learning_rate": 1.0990990990990992e-05, "loss": 1.0422, "step": 122 }, { "epoch": 0.02, "learning_rate": 1.1081081081081081e-05, "loss": 1.1011, "step": 123 }, { "epoch": 0.02, "learning_rate": 1.117117117117117e-05, "loss": 1.1217, "step": 124 }, { "epoch": 0.02, "learning_rate": 1.1261261261261263e-05, "loss": 1.1304, "step": 125 }, { "epoch": 0.02, "learning_rate": 1.1351351351351352e-05, "loss": 1.0344, "step": 126 }, { "epoch": 0.02, "learning_rate": 1.1441441441441443e-05, "loss": 1.002, "step": 127 }, { "epoch": 0.02, "learning_rate": 1.1531531531531532e-05, "loss": 1.0919, "step": 128 }, { "epoch": 0.02, "learning_rate": 1.1621621621621622e-05, "loss": 1.0649, "step": 129 }, { "epoch": 0.02, "learning_rate": 1.1711711711711713e-05, "loss": 1.0652, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.1801801801801802e-05, "loss": 1.0487, "step": 131 }, { "epoch": 0.02, "learning_rate": 1.1891891891891894e-05, "loss": 1.0465, "step": 132 }, { "epoch": 0.02, "learning_rate": 1.1981981981981983e-05, "loss": 1.0768, "step": 133 }, { "epoch": 0.02, "learning_rate": 1.2072072072072074e-05, "loss": 0.9794, "step": 134 }, { "epoch": 0.02, "learning_rate": 1.2162162162162164e-05, "loss": 1.0697, "step": 135 }, { "epoch": 0.02, "learning_rate": 1.2252252252252253e-05, "loss": 1.0871, "step": 136 }, { "epoch": 0.02, "learning_rate": 1.2342342342342344e-05, "loss": 1.1014, "step": 137 }, { "epoch": 0.02, "learning_rate": 1.2432432432432433e-05, "loss": 1.0593, "step": 138 }, { "epoch": 0.02, "learning_rate": 1.2522522522522524e-05, "loss": 1.1098, "step": 139 }, { "epoch": 0.02, "learning_rate": 1.2612612612612613e-05, "loss": 1.0109, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.2702702702702702e-05, "loss": 1.0694, "step": 141 }, { "epoch": 0.02, "learning_rate": 1.2792792792792795e-05, "loss": 1.1646, "step": 142 }, { "epoch": 0.02, "learning_rate": 1.2882882882882884e-05, "loss": 1.0878, "step": 143 }, { "epoch": 0.02, "learning_rate": 1.2972972972972975e-05, "loss": 1.0761, "step": 144 }, { "epoch": 0.02, "learning_rate": 1.3063063063063064e-05, "loss": 1.16, "step": 145 }, { "epoch": 0.02, "learning_rate": 1.3153153153153155e-05, "loss": 1.0295, "step": 146 }, { "epoch": 0.02, "learning_rate": 1.3243243243243244e-05, "loss": 1.0708, "step": 147 }, { "epoch": 0.02, "learning_rate": 1.3333333333333333e-05, "loss": 1.0838, "step": 148 }, { "epoch": 0.02, "learning_rate": 1.3423423423423426e-05, "loss": 1.0923, "step": 149 }, { "epoch": 0.02, "learning_rate": 1.3513513513513515e-05, "loss": 1.0735, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.3603603603603606e-05, "loss": 1.0285, "step": 151 }, { "epoch": 0.02, "learning_rate": 1.3693693693693695e-05, "loss": 1.0821, "step": 152 }, { "epoch": 0.02, "learning_rate": 1.3783783783783784e-05, "loss": 1.1349, "step": 153 }, { "epoch": 0.02, "learning_rate": 1.3873873873873875e-05, "loss": 1.1298, "step": 154 }, { "epoch": 0.02, "learning_rate": 1.3963963963963964e-05, "loss": 1.0283, "step": 155 }, { "epoch": 0.02, "learning_rate": 1.4054054054054055e-05, "loss": 1.0531, "step": 156 }, { "epoch": 0.02, "learning_rate": 1.4144144144144145e-05, "loss": 1.0433, "step": 157 }, { "epoch": 0.02, "learning_rate": 1.4234234234234234e-05, "loss": 1.0927, "step": 158 }, { "epoch": 0.02, "learning_rate": 1.4324324324324326e-05, "loss": 1.0982, "step": 159 }, { "epoch": 0.02, "learning_rate": 1.4414414414414416e-05, "loss": 1.0949, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.4504504504504506e-05, "loss": 1.1391, "step": 161 }, { "epoch": 0.02, "learning_rate": 1.4594594594594596e-05, "loss": 1.0866, "step": 162 }, { "epoch": 0.02, "learning_rate": 1.4684684684684686e-05, "loss": 1.0582, "step": 163 }, { "epoch": 0.02, "learning_rate": 1.4774774774774776e-05, "loss": 1.1075, "step": 164 }, { "epoch": 0.02, "learning_rate": 1.4864864864864865e-05, "loss": 1.1084, "step": 165 }, { "epoch": 0.02, "learning_rate": 1.4954954954954957e-05, "loss": 1.123, "step": 166 }, { "epoch": 0.02, "learning_rate": 1.5045045045045045e-05, "loss": 1.1115, "step": 167 }, { "epoch": 0.02, "learning_rate": 1.5135135135135138e-05, "loss": 1.1405, "step": 168 }, { "epoch": 0.02, "learning_rate": 1.5225225225225227e-05, "loss": 1.0938, "step": 169 }, { "epoch": 0.02, "learning_rate": 1.5315315315315316e-05, "loss": 1.0397, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.540540540540541e-05, "loss": 1.0375, "step": 171 }, { "epoch": 0.02, "learning_rate": 1.5495495495495498e-05, "loss": 1.0204, "step": 172 }, { "epoch": 0.02, "learning_rate": 1.5585585585585587e-05, "loss": 1.0704, "step": 173 }, { "epoch": 0.02, "learning_rate": 1.5675675675675676e-05, "loss": 1.0751, "step": 174 }, { "epoch": 0.02, "learning_rate": 1.576576576576577e-05, "loss": 1.0699, "step": 175 }, { "epoch": 0.02, "learning_rate": 1.5855855855855858e-05, "loss": 1.154, "step": 176 }, { "epoch": 0.02, "learning_rate": 1.5945945945945947e-05, "loss": 1.0631, "step": 177 }, { "epoch": 0.02, "learning_rate": 1.6036036036036036e-05, "loss": 1.0476, "step": 178 }, { "epoch": 0.02, "learning_rate": 1.6126126126126126e-05, "loss": 1.0438, "step": 179 }, { "epoch": 0.02, "learning_rate": 1.6216216216216218e-05, "loss": 1.0214, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.6306306306306307e-05, "loss": 1.0567, "step": 181 }, { "epoch": 0.02, "learning_rate": 1.6396396396396396e-05, "loss": 1.0637, "step": 182 }, { "epoch": 0.02, "learning_rate": 1.648648648648649e-05, "loss": 1.0573, "step": 183 }, { "epoch": 0.02, "learning_rate": 1.6576576576576578e-05, "loss": 1.064, "step": 184 }, { "epoch": 0.03, "learning_rate": 1.6666666666666667e-05, "loss": 1.0472, "step": 185 }, { "epoch": 0.03, "learning_rate": 1.6756756756756757e-05, "loss": 1.0657, "step": 186 }, { "epoch": 0.03, "learning_rate": 1.6846846846846846e-05, "loss": 1.0572, "step": 187 }, { "epoch": 0.03, "learning_rate": 1.693693693693694e-05, "loss": 1.0894, "step": 188 }, { "epoch": 0.03, "learning_rate": 1.7027027027027028e-05, "loss": 1.0494, "step": 189 }, { "epoch": 0.03, "learning_rate": 1.711711711711712e-05, "loss": 1.0465, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.720720720720721e-05, "loss": 1.0745, "step": 191 }, { "epoch": 0.03, "learning_rate": 1.72972972972973e-05, "loss": 1.0701, "step": 192 }, { "epoch": 0.03, "learning_rate": 1.7387387387387388e-05, "loss": 1.0685, "step": 193 }, { "epoch": 0.03, "learning_rate": 1.7477477477477477e-05, "loss": 1.0849, "step": 194 }, { "epoch": 0.03, "learning_rate": 1.756756756756757e-05, "loss": 0.9969, "step": 195 }, { "epoch": 0.03, "learning_rate": 1.765765765765766e-05, "loss": 1.0896, "step": 196 }, { "epoch": 0.03, "learning_rate": 1.774774774774775e-05, "loss": 1.0565, "step": 197 }, { "epoch": 0.03, "learning_rate": 1.783783783783784e-05, "loss": 1.0938, "step": 198 }, { "epoch": 0.03, "learning_rate": 1.792792792792793e-05, "loss": 1.0666, "step": 199 }, { "epoch": 0.03, "learning_rate": 1.801801801801802e-05, "loss": 1.0856, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.8108108108108108e-05, "loss": 1.0625, "step": 201 }, { "epoch": 0.03, "learning_rate": 1.81981981981982e-05, "loss": 1.1317, "step": 202 }, { "epoch": 0.03, "learning_rate": 1.828828828828829e-05, "loss": 1.0667, "step": 203 }, { "epoch": 0.03, "learning_rate": 1.8378378378378383e-05, "loss": 1.0531, "step": 204 }, { "epoch": 0.03, "learning_rate": 1.8468468468468472e-05, "loss": 1.1046, "step": 205 }, { "epoch": 0.03, "learning_rate": 1.855855855855856e-05, "loss": 1.0332, "step": 206 }, { "epoch": 0.03, "learning_rate": 1.864864864864865e-05, "loss": 1.03, "step": 207 }, { "epoch": 0.03, "learning_rate": 1.873873873873874e-05, "loss": 1.0354, "step": 208 }, { "epoch": 0.03, "learning_rate": 1.8828828828828832e-05, "loss": 1.0382, "step": 209 }, { "epoch": 0.03, "learning_rate": 1.891891891891892e-05, "loss": 1.1094, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.900900900900901e-05, "loss": 1.0549, "step": 211 }, { "epoch": 0.03, "learning_rate": 1.90990990990991e-05, "loss": 1.0446, "step": 212 }, { "epoch": 0.03, "learning_rate": 1.918918918918919e-05, "loss": 1.1234, "step": 213 }, { "epoch": 0.03, "learning_rate": 1.927927927927928e-05, "loss": 1.0339, "step": 214 }, { "epoch": 0.03, "learning_rate": 1.936936936936937e-05, "loss": 1.0804, "step": 215 }, { "epoch": 0.03, "learning_rate": 1.9459459459459463e-05, "loss": 0.9989, "step": 216 }, { "epoch": 0.03, "learning_rate": 1.9549549549549552e-05, "loss": 1.0256, "step": 217 }, { "epoch": 0.03, "learning_rate": 1.963963963963964e-05, "loss": 1.1146, "step": 218 }, { "epoch": 0.03, "learning_rate": 1.972972972972973e-05, "loss": 1.0596, "step": 219 }, { "epoch": 0.03, "learning_rate": 1.981981981981982e-05, "loss": 1.0095, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.9909909909909912e-05, "loss": 1.0552, "step": 221 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 1.1031, "step": 222 }, { "epoch": 0.03, "learning_rate": 1.999999903632836e-05, "loss": 1.0627, "step": 223 }, { "epoch": 0.03, "learning_rate": 1.9999996145313622e-05, "loss": 1.0919, "step": 224 }, { "epoch": 0.03, "learning_rate": 1.9999991326956344e-05, "loss": 1.0429, "step": 225 }, { "epoch": 0.03, "learning_rate": 1.9999984581257452e-05, "loss": 1.0458, "step": 226 }, { "epoch": 0.03, "learning_rate": 1.999997590821825e-05, "loss": 1.0775, "step": 227 }, { "epoch": 0.03, "learning_rate": 1.999996530784041e-05, "loss": 1.1056, "step": 228 }, { "epoch": 0.03, "learning_rate": 1.999995278012597e-05, "loss": 1.0265, "step": 229 }, { "epoch": 0.03, "learning_rate": 1.999993832507735e-05, "loss": 1.1053, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.9999921942697335e-05, "loss": 1.0909, "step": 231 }, { "epoch": 0.03, "learning_rate": 1.999990363298908e-05, "loss": 1.0418, "step": 232 }, { "epoch": 0.03, "learning_rate": 1.9999883395956114e-05, "loss": 1.0644, "step": 233 }, { "epoch": 0.03, "learning_rate": 1.999986123160234e-05, "loss": 1.0771, "step": 234 }, { "epoch": 0.03, "learning_rate": 1.9999837139932027e-05, "loss": 1.0286, "step": 235 }, { "epoch": 0.03, "learning_rate": 1.9999811120949818e-05, "loss": 1.0349, "step": 236 }, { "epoch": 0.03, "learning_rate": 1.9999783174660733e-05, "loss": 1.0942, "step": 237 }, { "epoch": 0.03, "learning_rate": 1.9999753301070156e-05, "loss": 1.0608, "step": 238 }, { "epoch": 0.03, "learning_rate": 1.999972150018384e-05, "loss": 1.043, "step": 239 }, { "epoch": 0.03, "learning_rate": 1.9999687772007917e-05, "loss": 1.0133, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.999965211654889e-05, "loss": 1.0934, "step": 241 }, { "epoch": 0.03, "learning_rate": 1.999961453381363e-05, "loss": 1.055, "step": 242 }, { "epoch": 0.03, "learning_rate": 1.9999575023809377e-05, "loss": 1.0337, "step": 243 }, { "epoch": 0.03, "learning_rate": 1.999953358654375e-05, "loss": 1.0855, "step": 244 }, { "epoch": 0.03, "learning_rate": 1.9999490222024733e-05, "loss": 1.0475, "step": 245 }, { "epoch": 0.03, "learning_rate": 1.9999444930260684e-05, "loss": 1.0356, "step": 246 }, { "epoch": 0.03, "learning_rate": 1.9999397711260334e-05, "loss": 1.0199, "step": 247 }, { "epoch": 0.03, "learning_rate": 1.9999348565032784e-05, "loss": 1.0611, "step": 248 }, { "epoch": 0.03, "learning_rate": 1.9999297491587502e-05, "loss": 1.0693, "step": 249 }, { "epoch": 0.03, "learning_rate": 1.9999244490934337e-05, "loss": 1.0364, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.99991895630835e-05, "loss": 1.1136, "step": 251 }, { "epoch": 0.03, "learning_rate": 1.9999132708045578e-05, "loss": 1.1399, "step": 252 }, { "epoch": 0.03, "learning_rate": 1.999907392583153e-05, "loss": 1.0871, "step": 253 }, { "epoch": 0.03, "learning_rate": 1.9999013216452688e-05, "loss": 1.0569, "step": 254 }, { "epoch": 0.03, "learning_rate": 1.9998950579920748e-05, "loss": 1.1241, "step": 255 }, { "epoch": 0.03, "learning_rate": 1.9998886016247784e-05, "loss": 1.0005, "step": 256 }, { "epoch": 0.03, "learning_rate": 1.999881952544624e-05, "loss": 1.0657, "step": 257 }, { "epoch": 0.03, "learning_rate": 1.9998751107528934e-05, "loss": 1.0499, "step": 258 }, { "epoch": 0.04, "learning_rate": 1.9998680762509045e-05, "loss": 1.0649, "step": 259 }, { "epoch": 0.04, "learning_rate": 1.9998608490400137e-05, "loss": 1.0485, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.999853429121614e-05, "loss": 1.0526, "step": 261 }, { "epoch": 0.04, "learning_rate": 1.999845816497135e-05, "loss": 1.0717, "step": 262 }, { "epoch": 0.04, "learning_rate": 1.999838011168044e-05, "loss": 1.0868, "step": 263 }, { "epoch": 0.04, "learning_rate": 1.9998300131358457e-05, "loss": 1.1233, "step": 264 }, { "epoch": 0.04, "learning_rate": 1.999821822402081e-05, "loss": 1.0684, "step": 265 }, { "epoch": 0.04, "learning_rate": 1.9998134389683295e-05, "loss": 1.1339, "step": 266 }, { "epoch": 0.04, "learning_rate": 1.9998048628362063e-05, "loss": 1.0798, "step": 267 }, { "epoch": 0.04, "learning_rate": 1.9997960940073643e-05, "loss": 1.0522, "step": 268 }, { "epoch": 0.04, "learning_rate": 1.9997871324834937e-05, "loss": 1.0343, "step": 269 }, { "epoch": 0.04, "learning_rate": 1.9997779782663217e-05, "loss": 1.0349, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.9997686313576125e-05, "loss": 1.0716, "step": 271 }, { "epoch": 0.04, "learning_rate": 1.999759091759168e-05, "loss": 1.0758, "step": 272 }, { "epoch": 0.04, "learning_rate": 1.999749359472826e-05, "loss": 1.0757, "step": 273 }, { "epoch": 0.04, "learning_rate": 1.999739434500463e-05, "loss": 1.0894, "step": 274 }, { "epoch": 0.04, "learning_rate": 1.9997293168439915e-05, "loss": 1.0191, "step": 275 }, { "epoch": 0.04, "learning_rate": 1.999719006505362e-05, "loss": 1.1022, "step": 276 }, { "epoch": 0.04, "learning_rate": 1.9997085034865605e-05, "loss": 1.0772, "step": 277 }, { "epoch": 0.04, "learning_rate": 1.999697807789613e-05, "loss": 1.0904, "step": 278 }, { "epoch": 0.04, "learning_rate": 1.9996869194165796e-05, "loss": 1.0335, "step": 279 }, { "epoch": 0.04, "learning_rate": 1.999675838369559e-05, "loss": 1.066, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.9996645646506876e-05, "loss": 1.0192, "step": 281 }, { "epoch": 0.04, "learning_rate": 1.9996530982621376e-05, "loss": 1.0027, "step": 282 }, { "epoch": 0.04, "learning_rate": 1.9996414392061192e-05, "loss": 1.0818, "step": 283 }, { "epoch": 0.04, "learning_rate": 1.9996295874848794e-05, "loss": 1.0507, "step": 284 }, { "epoch": 0.04, "learning_rate": 1.9996175431007025e-05, "loss": 1.0593, "step": 285 }, { "epoch": 0.04, "learning_rate": 1.99960530605591e-05, "loss": 1.0766, "step": 286 }, { "epoch": 0.04, "learning_rate": 1.9995928763528603e-05, "loss": 1.0998, "step": 287 }, { "epoch": 0.04, "learning_rate": 1.999580253993949e-05, "loss": 1.0352, "step": 288 }, { "epoch": 0.04, "learning_rate": 1.9995674389816087e-05, "loss": 1.0655, "step": 289 }, { "epoch": 0.04, "learning_rate": 1.9995544313183096e-05, "loss": 1.0322, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.9995412310065583e-05, "loss": 1.0785, "step": 291 }, { "epoch": 0.04, "learning_rate": 1.9995278380488994e-05, "loss": 1.0531, "step": 292 }, { "epoch": 0.04, "learning_rate": 1.999514252447914e-05, "loss": 1.0203, "step": 293 }, { "epoch": 0.04, "learning_rate": 1.9995004742062206e-05, "loss": 1.0253, "step": 294 }, { "epoch": 0.04, "learning_rate": 1.9994865033264744e-05, "loss": 1.0183, "step": 295 }, { "epoch": 0.04, "learning_rate": 1.9994723398113688e-05, "loss": 1.0847, "step": 296 }, { "epoch": 0.04, "learning_rate": 1.999457983663633e-05, "loss": 1.0552, "step": 297 }, { "epoch": 0.04, "learning_rate": 1.9994434348860337e-05, "loss": 1.0168, "step": 298 }, { "epoch": 0.04, "learning_rate": 1.9994286934813754e-05, "loss": 1.0849, "step": 299 }, { "epoch": 0.04, "learning_rate": 1.9994137594524992e-05, "loss": 1.0604, "step": 300 }, { "epoch": 0.04, "learning_rate": 1.999398632802284e-05, "loss": 1.0002, "step": 301 }, { "epoch": 0.04, "learning_rate": 1.999383313533644e-05, "loss": 0.9884, "step": 302 }, { "epoch": 0.04, "learning_rate": 1.999367801649532e-05, "loss": 0.9971, "step": 303 }, { "epoch": 0.04, "learning_rate": 1.9993520971529388e-05, "loss": 1.065, "step": 304 }, { "epoch": 0.04, "learning_rate": 1.9993362000468897e-05, "loss": 1.0752, "step": 305 }, { "epoch": 0.04, "learning_rate": 1.99932011033445e-05, "loss": 0.969, "step": 306 }, { "epoch": 0.04, "learning_rate": 1.9993038280187197e-05, "loss": 1.0694, "step": 307 }, { "epoch": 0.04, "learning_rate": 1.9992873531028372e-05, "loss": 1.0476, "step": 308 }, { "epoch": 0.04, "learning_rate": 1.9992706855899785e-05, "loss": 1.0069, "step": 309 }, { "epoch": 0.04, "learning_rate": 1.9992538254833548e-05, "loss": 1.089, "step": 310 }, { "epoch": 0.04, "learning_rate": 1.9992367727862166e-05, "loss": 0.9998, "step": 311 }, { "epoch": 0.04, "learning_rate": 1.99921952750185e-05, "loss": 1.0391, "step": 312 }, { "epoch": 0.04, "learning_rate": 1.999202089633579e-05, "loss": 1.0432, "step": 313 }, { "epoch": 0.04, "learning_rate": 1.9991844591847644e-05, "loss": 1.0604, "step": 314 }, { "epoch": 0.04, "learning_rate": 1.9991666361588042e-05, "loss": 1.0441, "step": 315 }, { "epoch": 0.04, "learning_rate": 1.9991486205591334e-05, "loss": 1.0169, "step": 316 }, { "epoch": 0.04, "learning_rate": 1.9991304123892243e-05, "loss": 1.0487, "step": 317 }, { "epoch": 0.04, "learning_rate": 1.9991120116525866e-05, "loss": 1.0348, "step": 318 }, { "epoch": 0.04, "learning_rate": 1.999093418352766e-05, "loss": 1.0473, "step": 319 }, { "epoch": 0.04, "learning_rate": 1.999074632493347e-05, "loss": 1.004, "step": 320 }, { "epoch": 0.04, "learning_rate": 1.9990556540779496e-05, "loss": 0.9759, "step": 321 }, { "epoch": 0.04, "learning_rate": 1.9990364831102317e-05, "loss": 1.0875, "step": 322 }, { "epoch": 0.04, "learning_rate": 1.9990171195938885e-05, "loss": 1.0189, "step": 323 }, { "epoch": 0.04, "learning_rate": 1.9989975635326517e-05, "loss": 1.0514, "step": 324 }, { "epoch": 0.04, "learning_rate": 1.9989778149302902e-05, "loss": 1.0038, "step": 325 }, { "epoch": 0.04, "learning_rate": 1.9989578737906107e-05, "loss": 1.0309, "step": 326 }, { "epoch": 0.04, "learning_rate": 1.9989377401174566e-05, "loss": 1.0321, "step": 327 }, { "epoch": 0.04, "learning_rate": 1.998917413914708e-05, "loss": 1.0524, "step": 328 }, { "epoch": 0.04, "learning_rate": 1.9988968951862823e-05, "loss": 1.0423, "step": 329 }, { "epoch": 0.04, "learning_rate": 1.9988761839361347e-05, "loss": 1.063, "step": 330 }, { "epoch": 0.04, "learning_rate": 1.9988552801682572e-05, "loss": 1.0112, "step": 331 }, { "epoch": 0.04, "learning_rate": 1.9988341838866772e-05, "loss": 1.0071, "step": 332 }, { "epoch": 0.05, "learning_rate": 1.9988128950954623e-05, "loss": 1.1399, "step": 333 }, { "epoch": 0.05, "learning_rate": 1.9987914137987153e-05, "loss": 1.0669, "step": 334 }, { "epoch": 0.05, "learning_rate": 1.9987697400005753e-05, "loss": 1.0604, "step": 335 }, { "epoch": 0.05, "learning_rate": 1.998747873705221e-05, "loss": 1.0665, "step": 336 }, { "epoch": 0.05, "learning_rate": 1.9987258149168654e-05, "loss": 0.9705, "step": 337 }, { "epoch": 0.05, "learning_rate": 1.9987035636397615e-05, "loss": 1.085, "step": 338 }, { "epoch": 0.05, "learning_rate": 1.9986811198781966e-05, "loss": 1.0045, "step": 339 }, { "epoch": 0.05, "learning_rate": 1.998658483636497e-05, "loss": 1.0688, "step": 340 }, { "epoch": 0.05, "learning_rate": 1.9986356549190253e-05, "loss": 1.0142, "step": 341 }, { "epoch": 0.05, "learning_rate": 1.9986126337301814e-05, "loss": 1.0841, "step": 342 }, { "epoch": 0.05, "learning_rate": 1.998589420074402e-05, "loss": 0.9708, "step": 343 }, { "epoch": 0.05, "learning_rate": 1.998566013956162e-05, "loss": 1.0628, "step": 344 }, { "epoch": 0.05, "learning_rate": 1.998542415379972e-05, "loss": 0.9605, "step": 345 }, { "epoch": 0.05, "learning_rate": 1.99851862435038e-05, "loss": 1.0646, "step": 346 }, { "epoch": 0.05, "learning_rate": 1.9984946408719718e-05, "loss": 1.0677, "step": 347 }, { "epoch": 0.05, "learning_rate": 1.9984704649493696e-05, "loss": 1.0728, "step": 348 }, { "epoch": 0.05, "learning_rate": 1.998446096587233e-05, "loss": 1.0541, "step": 349 }, { "epoch": 0.05, "learning_rate": 1.9984215357902586e-05, "loss": 0.9873, "step": 350 }, { "epoch": 0.05, "learning_rate": 1.9983967825631803e-05, "loss": 1.0843, "step": 351 }, { "epoch": 0.05, "learning_rate": 1.9983718369107684e-05, "loss": 0.979, "step": 352 }, { "epoch": 0.05, "learning_rate": 1.9983466988378314e-05, "loss": 0.9984, "step": 353 }, { "epoch": 0.05, "learning_rate": 1.9983213683492143e-05, "loss": 0.9643, "step": 354 }, { "epoch": 0.05, "learning_rate": 1.9982958454497984e-05, "loss": 1.0481, "step": 355 }, { "epoch": 0.05, "learning_rate": 1.9982701301445033e-05, "loss": 1.0205, "step": 356 }, { "epoch": 0.05, "learning_rate": 1.998244222438285e-05, "loss": 0.9949, "step": 357 }, { "epoch": 0.05, "learning_rate": 1.9982181223361373e-05, "loss": 1.1233, "step": 358 }, { "epoch": 0.05, "learning_rate": 1.9981918298430905e-05, "loss": 1.1052, "step": 359 }, { "epoch": 0.05, "learning_rate": 1.9981653449642114e-05, "loss": 1.0404, "step": 360 }, { "epoch": 0.05, "learning_rate": 1.9981386677046052e-05, "loss": 1.0269, "step": 361 }, { "epoch": 0.05, "learning_rate": 1.9981117980694137e-05, "loss": 1.0837, "step": 362 }, { "epoch": 0.05, "learning_rate": 1.9980847360638144e-05, "loss": 1.0084, "step": 363 }, { "epoch": 0.05, "learning_rate": 1.9980574816930245e-05, "loss": 1.032, "step": 364 }, { "epoch": 0.05, "learning_rate": 1.998030034962296e-05, "loss": 0.9997, "step": 365 }, { "epoch": 0.05, "learning_rate": 1.9980023958769195e-05, "loss": 1.0823, "step": 366 }, { "epoch": 0.05, "learning_rate": 1.9979745644422213e-05, "loss": 1.07, "step": 367 }, { "epoch": 0.05, "learning_rate": 1.9979465406635654e-05, "loss": 1.0112, "step": 368 }, { "epoch": 0.05, "learning_rate": 1.9979183245463538e-05, "loss": 1.0289, "step": 369 }, { "epoch": 0.05, "learning_rate": 1.9978899160960238e-05, "loss": 1.0378, "step": 370 }, { "epoch": 0.05, "learning_rate": 1.9978613153180516e-05, "loss": 1.0655, "step": 371 }, { "epoch": 0.05, "learning_rate": 1.997832522217949e-05, "loss": 1.0391, "step": 372 }, { "epoch": 0.05, "learning_rate": 1.997803536801265e-05, "loss": 0.9873, "step": 373 }, { "epoch": 0.05, "learning_rate": 1.9977743590735866e-05, "loss": 1.0518, "step": 374 }, { "epoch": 0.05, "learning_rate": 1.9977449890405378e-05, "loss": 1.1028, "step": 375 }, { "epoch": 0.05, "learning_rate": 1.9977154267077786e-05, "loss": 1.0304, "step": 376 }, { "epoch": 0.05, "learning_rate": 1.9976856720810064e-05, "loss": 1.0742, "step": 377 }, { "epoch": 0.05, "learning_rate": 1.9976557251659567e-05, "loss": 1.0512, "step": 378 }, { "epoch": 0.05, "learning_rate": 1.997625585968401e-05, "loss": 1.0838, "step": 379 }, { "epoch": 0.05, "learning_rate": 1.9975952544941478e-05, "loss": 1.0817, "step": 380 }, { "epoch": 0.05, "learning_rate": 1.9975647307490433e-05, "loss": 1.0442, "step": 381 }, { "epoch": 0.05, "learning_rate": 1.9975340147389707e-05, "loss": 1.052, "step": 382 }, { "epoch": 0.05, "learning_rate": 1.9975031064698497e-05, "loss": 1.1346, "step": 383 }, { "epoch": 0.05, "learning_rate": 1.9974720059476375e-05, "loss": 0.9743, "step": 384 }, { "epoch": 0.05, "learning_rate": 1.997440713178328e-05, "loss": 1.0114, "step": 385 }, { "epoch": 0.05, "learning_rate": 1.997409228167953e-05, "loss": 1.0224, "step": 386 }, { "epoch": 0.05, "learning_rate": 1.99737755092258e-05, "loss": 1.0133, "step": 387 }, { "epoch": 0.05, "learning_rate": 1.997345681448315e-05, "loss": 1.0368, "step": 388 }, { "epoch": 0.05, "learning_rate": 1.9973136197512998e-05, "loss": 1.0455, "step": 389 }, { "epoch": 0.05, "learning_rate": 1.997281365837714e-05, "loss": 1.0234, "step": 390 }, { "epoch": 0.05, "learning_rate": 1.9972489197137742e-05, "loss": 1.0065, "step": 391 }, { "epoch": 0.05, "learning_rate": 1.9972162813857334e-05, "loss": 1.1248, "step": 392 }, { "epoch": 0.05, "learning_rate": 1.9971834508598826e-05, "loss": 1.044, "step": 393 }, { "epoch": 0.05, "learning_rate": 1.997150428142549e-05, "loss": 1.0652, "step": 394 }, { "epoch": 0.05, "learning_rate": 1.9971172132400977e-05, "loss": 1.077, "step": 395 }, { "epoch": 0.05, "learning_rate": 1.99708380615893e-05, "loss": 1.0684, "step": 396 }, { "epoch": 0.05, "learning_rate": 1.9970502069054846e-05, "loss": 1.1024, "step": 397 }, { "epoch": 0.05, "learning_rate": 1.9970164154862375e-05, "loss": 1.0467, "step": 398 }, { "epoch": 0.05, "learning_rate": 1.996982431907701e-05, "loss": 1.0518, "step": 399 }, { "epoch": 0.05, "learning_rate": 1.996948256176425e-05, "loss": 1.0937, "step": 400 }, { "epoch": 0.05, "learning_rate": 1.996913888298997e-05, "loss": 1.0175, "step": 401 }, { "epoch": 0.05, "learning_rate": 1.99687932828204e-05, "loss": 1.0093, "step": 402 }, { "epoch": 0.05, "learning_rate": 1.9968445761322154e-05, "loss": 1.1694, "step": 403 }, { "epoch": 0.05, "learning_rate": 1.996809631856221e-05, "loss": 1.0233, "step": 404 }, { "epoch": 0.05, "learning_rate": 1.9967744954607916e-05, "loss": 1.0505, "step": 405 }, { "epoch": 0.06, "learning_rate": 1.9967391669526995e-05, "loss": 1.0571, "step": 406 }, { "epoch": 0.06, "learning_rate": 1.9967036463387533e-05, "loss": 1.0532, "step": 407 }, { "epoch": 0.06, "learning_rate": 1.9966679336257995e-05, "loss": 1.0945, "step": 408 }, { "epoch": 0.06, "learning_rate": 1.996632028820721e-05, "loss": 1.1641, "step": 409 }, { "epoch": 0.06, "learning_rate": 1.996595931930438e-05, "loss": 0.976, "step": 410 }, { "epoch": 0.06, "learning_rate": 1.996559642961907e-05, "loss": 1.0146, "step": 411 }, { "epoch": 0.06, "learning_rate": 1.9965231619221232e-05, "loss": 1.0399, "step": 412 }, { "epoch": 0.06, "learning_rate": 1.9964864888181168e-05, "loss": 1.063, "step": 413 }, { "epoch": 0.06, "learning_rate": 1.996449623656956e-05, "loss": 1.0476, "step": 414 }, { "epoch": 0.06, "learning_rate": 1.996412566445747e-05, "loss": 1.0336, "step": 415 }, { "epoch": 0.06, "learning_rate": 1.996375317191631e-05, "loss": 1.0315, "step": 416 }, { "epoch": 0.06, "learning_rate": 1.996337875901787e-05, "loss": 1.0405, "step": 417 }, { "epoch": 0.06, "learning_rate": 1.9963002425834322e-05, "loss": 1.0652, "step": 418 }, { "epoch": 0.06, "learning_rate": 1.9962624172438195e-05, "loss": 1.0405, "step": 419 }, { "epoch": 0.06, "learning_rate": 1.996224399890239e-05, "loss": 1.0463, "step": 420 }, { "epoch": 0.06, "learning_rate": 1.9961861905300177e-05, "loss": 1.019, "step": 421 }, { "epoch": 0.06, "learning_rate": 1.9961477891705203e-05, "loss": 1.0251, "step": 422 }, { "epoch": 0.06, "learning_rate": 1.9961091958191476e-05, "loss": 1.0625, "step": 423 }, { "epoch": 0.06, "learning_rate": 1.9960704104833383e-05, "loss": 1.0752, "step": 424 }, { "epoch": 0.06, "learning_rate": 1.9960314331705676e-05, "loss": 0.997, "step": 425 }, { "epoch": 0.06, "learning_rate": 1.9959922638883473e-05, "loss": 0.9821, "step": 426 }, { "epoch": 0.06, "learning_rate": 1.995952902644227e-05, "loss": 1.0249, "step": 427 }, { "epoch": 0.06, "learning_rate": 1.9959133494457936e-05, "loss": 1.0238, "step": 428 }, { "epoch": 0.06, "learning_rate": 1.9958736043006693e-05, "loss": 1.0292, "step": 429 }, { "epoch": 0.06, "learning_rate": 1.9958336672165147e-05, "loss": 1.0137, "step": 430 }, { "epoch": 0.06, "learning_rate": 1.9957935382010273e-05, "loss": 0.9683, "step": 431 }, { "epoch": 0.06, "learning_rate": 1.995753217261941e-05, "loss": 1.0405, "step": 432 }, { "epoch": 0.06, "learning_rate": 1.9957127044070277e-05, "loss": 1.0315, "step": 433 }, { "epoch": 0.06, "learning_rate": 1.9956719996440947e-05, "loss": 1.0685, "step": 434 }, { "epoch": 0.06, "learning_rate": 1.995631102980988e-05, "loss": 1.0424, "step": 435 }, { "epoch": 0.06, "learning_rate": 1.995590014425589e-05, "loss": 1.0789, "step": 436 }, { "epoch": 0.06, "learning_rate": 1.9955487339858174e-05, "loss": 1.0778, "step": 437 }, { "epoch": 0.06, "learning_rate": 1.9955072616696294e-05, "loss": 1.0779, "step": 438 }, { "epoch": 0.06, "learning_rate": 1.9954655974850183e-05, "loss": 1.0272, "step": 439 }, { "epoch": 0.06, "learning_rate": 1.9954237414400133e-05, "loss": 1.0306, "step": 440 }, { "epoch": 0.06, "learning_rate": 1.9953816935426825e-05, "loss": 1.1196, "step": 441 }, { "epoch": 0.06, "learning_rate": 1.9953394538011294e-05, "loss": 1.02, "step": 442 }, { "epoch": 0.06, "learning_rate": 1.995297022223495e-05, "loss": 1.0191, "step": 443 }, { "epoch": 0.06, "learning_rate": 1.9952543988179584e-05, "loss": 1.104, "step": 444 }, { "epoch": 0.06, "learning_rate": 1.995211583592733e-05, "loss": 1.0404, "step": 445 }, { "epoch": 0.06, "learning_rate": 1.9951685765560717e-05, "loss": 1.0442, "step": 446 }, { "epoch": 0.06, "learning_rate": 1.9951253777162634e-05, "loss": 1.016, "step": 447 }, { "epoch": 0.06, "learning_rate": 1.995081987081634e-05, "loss": 1.0114, "step": 448 }, { "epoch": 0.06, "learning_rate": 1.9950384046605458e-05, "loss": 1.0812, "step": 449 }, { "epoch": 0.06, "learning_rate": 1.994994630461399e-05, "loss": 1.0713, "step": 450 }, { "epoch": 0.06, "learning_rate": 1.9949506644926308e-05, "loss": 1.0338, "step": 451 }, { "epoch": 0.06, "learning_rate": 1.9949065067627144e-05, "loss": 1.0879, "step": 452 }, { "epoch": 0.06, "learning_rate": 1.9948621572801604e-05, "loss": 0.9861, "step": 453 }, { "epoch": 0.06, "learning_rate": 1.994817616053517e-05, "loss": 1.0176, "step": 454 }, { "epoch": 0.06, "learning_rate": 1.994772883091369e-05, "loss": 1.0859, "step": 455 }, { "epoch": 0.06, "learning_rate": 1.994727958402337e-05, "loss": 1.0437, "step": 456 }, { "epoch": 0.06, "learning_rate": 1.99468284199508e-05, "loss": 1.0905, "step": 457 }, { "epoch": 0.06, "learning_rate": 1.994637533878294e-05, "loss": 1.0324, "step": 458 }, { "epoch": 0.06, "learning_rate": 1.994592034060711e-05, "loss": 1.0681, "step": 459 }, { "epoch": 0.06, "learning_rate": 1.9945463425511002e-05, "loss": 1.0294, "step": 460 }, { "epoch": 0.06, "learning_rate": 1.9945004593582682e-05, "loss": 1.0353, "step": 461 }, { "epoch": 0.06, "learning_rate": 1.994454384491058e-05, "loss": 1.0344, "step": 462 }, { "epoch": 0.06, "learning_rate": 1.9944081179583503e-05, "loss": 1.1061, "step": 463 }, { "epoch": 0.06, "learning_rate": 1.9943616597690616e-05, "loss": 1.0123, "step": 464 }, { "epoch": 0.06, "learning_rate": 1.9943150099321463e-05, "loss": 0.994, "step": 465 }, { "epoch": 0.06, "learning_rate": 1.9942681684565956e-05, "loss": 1.0334, "step": 466 }, { "epoch": 0.06, "learning_rate": 1.9942211353514375e-05, "loss": 1.0329, "step": 467 }, { "epoch": 0.06, "learning_rate": 1.9941739106257362e-05, "loss": 1.0357, "step": 468 }, { "epoch": 0.06, "learning_rate": 1.9941264942885943e-05, "loss": 1.0676, "step": 469 }, { "epoch": 0.06, "learning_rate": 1.9940788863491503e-05, "loss": 1.0209, "step": 470 }, { "epoch": 0.06, "learning_rate": 1.9940310868165796e-05, "loss": 1.0424, "step": 471 }, { "epoch": 0.06, "learning_rate": 1.9939830957000955e-05, "loss": 1.0643, "step": 472 }, { "epoch": 0.06, "learning_rate": 1.9939349130089466e-05, "loss": 1.07, "step": 473 }, { "epoch": 0.06, "learning_rate": 1.99388653875242e-05, "loss": 1.0121, "step": 474 }, { "epoch": 0.06, "learning_rate": 1.9938379729398392e-05, "loss": 0.9864, "step": 475 }, { "epoch": 0.06, "learning_rate": 1.993789215580564e-05, "loss": 1.087, "step": 476 }, { "epoch": 0.06, "learning_rate": 1.9937402666839924e-05, "loss": 1.0343, "step": 477 }, { "epoch": 0.06, "learning_rate": 1.9936911262595574e-05, "loss": 1.0011, "step": 478 }, { "epoch": 0.06, "learning_rate": 1.9936417943167308e-05, "loss": 1.0531, "step": 479 }, { "epoch": 0.07, "learning_rate": 1.9935922708650203e-05, "loss": 1.0174, "step": 480 }, { "epoch": 0.07, "learning_rate": 1.993542555913971e-05, "loss": 1.0799, "step": 481 }, { "epoch": 0.07, "learning_rate": 1.9934926494731645e-05, "loss": 1.116, "step": 482 }, { "epoch": 0.07, "learning_rate": 1.9934425515522197e-05, "loss": 1.0361, "step": 483 }, { "epoch": 0.07, "learning_rate": 1.9933922621607918e-05, "loss": 1.0759, "step": 484 }, { "epoch": 0.07, "learning_rate": 1.9933417813085735e-05, "loss": 0.9656, "step": 485 }, { "epoch": 0.07, "learning_rate": 1.993291109005294e-05, "loss": 0.966, "step": 486 }, { "epoch": 0.07, "learning_rate": 1.99324024526072e-05, "loss": 1.0282, "step": 487 }, { "epoch": 0.07, "learning_rate": 1.993189190084655e-05, "loss": 0.9562, "step": 488 }, { "epoch": 0.07, "learning_rate": 1.993137943486938e-05, "loss": 0.9875, "step": 489 }, { "epoch": 0.07, "learning_rate": 1.9930865054774466e-05, "loss": 1.0231, "step": 490 }, { "epoch": 0.07, "learning_rate": 1.9930348760660946e-05, "loss": 1.0418, "step": 491 }, { "epoch": 0.07, "learning_rate": 1.992983055262833e-05, "loss": 1.0088, "step": 492 }, { "epoch": 0.07, "learning_rate": 1.992931043077649e-05, "loss": 0.9756, "step": 493 }, { "epoch": 0.07, "learning_rate": 1.9928788395205673e-05, "loss": 1.0101, "step": 494 }, { "epoch": 0.07, "learning_rate": 1.9928264446016496e-05, "loss": 0.9892, "step": 495 }, { "epoch": 0.07, "learning_rate": 1.992773858330994e-05, "loss": 1.0463, "step": 496 }, { "epoch": 0.07, "learning_rate": 1.9927210807187354e-05, "loss": 1.105, "step": 497 }, { "epoch": 0.07, "learning_rate": 1.9926681117750463e-05, "loss": 1.0661, "step": 498 }, { "epoch": 0.07, "learning_rate": 1.9926149515101355e-05, "loss": 1.0493, "step": 499 }, { "epoch": 0.07, "learning_rate": 1.9925615999342484e-05, "loss": 0.9869, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.9925080570576686e-05, "loss": 1.076, "step": 501 }, { "epoch": 0.07, "learning_rate": 1.9924543228907147e-05, "loss": 1.0456, "step": 502 }, { "epoch": 0.07, "learning_rate": 1.9924003974437435e-05, "loss": 0.9969, "step": 503 }, { "epoch": 0.07, "learning_rate": 1.9923462807271482e-05, "loss": 1.0427, "step": 504 }, { "epoch": 0.07, "learning_rate": 1.9922919727513594e-05, "loss": 0.9965, "step": 505 }, { "epoch": 0.07, "learning_rate": 1.9922374735268434e-05, "loss": 0.9807, "step": 506 }, { "epoch": 0.07, "learning_rate": 1.992182783064105e-05, "loss": 0.9608, "step": 507 }, { "epoch": 0.07, "learning_rate": 1.992127901373684e-05, "loss": 1.0351, "step": 508 }, { "epoch": 0.07, "learning_rate": 1.992072828466158e-05, "loss": 1.0445, "step": 509 }, { "epoch": 0.07, "learning_rate": 1.992017564352142e-05, "loss": 0.9485, "step": 510 }, { "epoch": 0.07, "learning_rate": 1.991962109042287e-05, "loss": 1.0186, "step": 511 }, { "epoch": 0.07, "learning_rate": 1.9919064625472813e-05, "loss": 0.9365, "step": 512 }, { "epoch": 0.07, "learning_rate": 1.99185062487785e-05, "loss": 1.072, "step": 513 }, { "epoch": 0.07, "learning_rate": 1.9917945960447546e-05, "loss": 1.0498, "step": 514 }, { "epoch": 0.07, "learning_rate": 1.991738376058794e-05, "loss": 0.9837, "step": 515 }, { "epoch": 0.07, "learning_rate": 1.991681964930803e-05, "loss": 0.9584, "step": 516 }, { "epoch": 0.07, "learning_rate": 1.9916253626716556e-05, "loss": 0.9915, "step": 517 }, { "epoch": 0.07, "learning_rate": 1.9915685692922592e-05, "loss": 0.9978, "step": 518 }, { "epoch": 0.07, "learning_rate": 1.991511584803561e-05, "loss": 1.0235, "step": 519 }, { "epoch": 0.07, "learning_rate": 1.9914544092165436e-05, "loss": 1.0005, "step": 520 }, { "epoch": 0.07, "learning_rate": 1.9913970425422265e-05, "loss": 1.0226, "step": 521 }, { "epoch": 0.07, "learning_rate": 1.9913394847916662e-05, "loss": 1.1143, "step": 522 }, { "epoch": 0.07, "learning_rate": 1.991281735975956e-05, "loss": 1.0101, "step": 523 }, { "epoch": 0.07, "learning_rate": 1.9912237961062268e-05, "loss": 0.9891, "step": 524 }, { "epoch": 0.07, "learning_rate": 1.9911656651936446e-05, "loss": 1.0513, "step": 525 }, { "epoch": 0.07, "learning_rate": 1.9911073432494138e-05, "loss": 1.0178, "step": 526 }, { "epoch": 0.07, "learning_rate": 1.991048830284775e-05, "loss": 0.9987, "step": 527 }, { "epoch": 0.07, "learning_rate": 1.9909901263110053e-05, "loss": 1.0257, "step": 528 }, { "epoch": 0.07, "learning_rate": 1.9909312313394197e-05, "loss": 0.9691, "step": 529 }, { "epoch": 0.07, "learning_rate": 1.9908721453813686e-05, "loss": 1.0017, "step": 530 }, { "epoch": 0.07, "learning_rate": 1.9908128684482398e-05, "loss": 1.0283, "step": 531 }, { "epoch": 0.07, "learning_rate": 1.990753400551459e-05, "loss": 1.0696, "step": 532 }, { "epoch": 0.07, "learning_rate": 1.9906937417024866e-05, "loss": 1.0352, "step": 533 }, { "epoch": 0.07, "learning_rate": 1.9906338919128214e-05, "loss": 1.0331, "step": 534 }, { "epoch": 0.07, "learning_rate": 1.9905738511939983e-05, "loss": 1.0601, "step": 535 }, { "epoch": 0.07, "learning_rate": 1.9905136195575895e-05, "loss": 1.1078, "step": 536 }, { "epoch": 0.07, "learning_rate": 1.9904531970152036e-05, "loss": 1.0484, "step": 537 }, { "epoch": 0.07, "learning_rate": 1.990392583578486e-05, "loss": 1.0626, "step": 538 }, { "epoch": 0.07, "learning_rate": 1.990331779259119e-05, "loss": 1.0116, "step": 539 }, { "epoch": 0.07, "learning_rate": 1.9902707840688217e-05, "loss": 1.0357, "step": 540 }, { "epoch": 0.07, "learning_rate": 1.9902095980193503e-05, "loss": 1.0165, "step": 541 }, { "epoch": 0.07, "learning_rate": 1.990148221122497e-05, "loss": 0.9427, "step": 542 }, { "epoch": 0.07, "learning_rate": 1.9900866533900914e-05, "loss": 0.9817, "step": 543 }, { "epoch": 0.07, "learning_rate": 1.9900248948339996e-05, "loss": 1.0577, "step": 544 }, { "epoch": 0.07, "learning_rate": 1.9899629454661246e-05, "loss": 0.9879, "step": 545 }, { "epoch": 0.07, "learning_rate": 1.9899008052984065e-05, "loss": 0.975, "step": 546 }, { "epoch": 0.07, "learning_rate": 1.9898384743428213e-05, "loss": 1.0412, "step": 547 }, { "epoch": 0.07, "learning_rate": 1.9897759526113826e-05, "loss": 0.9934, "step": 548 }, { "epoch": 0.07, "learning_rate": 1.989713240116141e-05, "loss": 1.0608, "step": 549 }, { "epoch": 0.07, "learning_rate": 1.9896503368691826e-05, "loss": 1.0115, "step": 550 }, { "epoch": 0.07, "learning_rate": 1.9895872428826307e-05, "loss": 0.915, "step": 551 }, { "epoch": 0.07, "learning_rate": 1.989523958168647e-05, "loss": 0.9608, "step": 552 }, { "epoch": 0.07, "learning_rate": 1.9894604827394273e-05, "loss": 1.0228, "step": 553 }, { "epoch": 0.08, "learning_rate": 1.9893968166072067e-05, "loss": 0.9814, "step": 554 }, { "epoch": 0.08, "learning_rate": 1.989332959784255e-05, "loss": 1.0018, "step": 555 }, { "epoch": 0.08, "learning_rate": 1.9892689122828797e-05, "loss": 1.0761, "step": 556 }, { "epoch": 0.08, "learning_rate": 1.989204674115425e-05, "loss": 1.0191, "step": 557 }, { "epoch": 0.08, "learning_rate": 1.989140245294272e-05, "loss": 1.006, "step": 558 }, { "epoch": 0.08, "learning_rate": 1.9890756258318383e-05, "loss": 1.0971, "step": 559 }, { "epoch": 0.08, "learning_rate": 1.9890108157405782e-05, "loss": 1.0149, "step": 560 }, { "epoch": 0.08, "learning_rate": 1.9889458150329827e-05, "loss": 0.9797, "step": 561 }, { "epoch": 0.08, "learning_rate": 1.98888062372158e-05, "loss": 1.0372, "step": 562 }, { "epoch": 0.08, "learning_rate": 1.988815241818934e-05, "loss": 1.034, "step": 563 }, { "epoch": 0.08, "learning_rate": 1.9887496693376473e-05, "loss": 1.0882, "step": 564 }, { "epoch": 0.08, "learning_rate": 1.9886839062903568e-05, "loss": 1.0241, "step": 565 }, { "epoch": 0.08, "learning_rate": 1.988617952689738e-05, "loss": 1.021, "step": 566 }, { "epoch": 0.08, "learning_rate": 1.988551808548502e-05, "loss": 1.0381, "step": 567 }, { "epoch": 0.08, "learning_rate": 1.988485473879397e-05, "loss": 1.0409, "step": 568 }, { "epoch": 0.08, "learning_rate": 1.988418948695208e-05, "loss": 0.947, "step": 569 }, { "epoch": 0.08, "learning_rate": 1.988352233008757e-05, "loss": 1.0503, "step": 570 }, { "epoch": 0.08, "learning_rate": 1.9882853268329027e-05, "loss": 1.0788, "step": 571 }, { "epoch": 0.08, "learning_rate": 1.9882182301805393e-05, "loss": 1.0715, "step": 572 }, { "epoch": 0.08, "learning_rate": 1.988150943064599e-05, "loss": 1.0888, "step": 573 }, { "epoch": 0.08, "learning_rate": 1.988083465498051e-05, "loss": 1.0536, "step": 574 }, { "epoch": 0.08, "learning_rate": 1.9880157974938994e-05, "loss": 1.0392, "step": 575 }, { "epoch": 0.08, "learning_rate": 1.9879479390651867e-05, "loss": 0.9555, "step": 576 }, { "epoch": 0.08, "learning_rate": 1.987879890224992e-05, "loss": 1.0878, "step": 577 }, { "epoch": 0.08, "learning_rate": 1.98781165098643e-05, "loss": 0.9355, "step": 578 }, { "epoch": 0.08, "learning_rate": 1.987743221362653e-05, "loss": 0.9995, "step": 579 }, { "epoch": 0.08, "learning_rate": 1.9876746013668494e-05, "loss": 1.075, "step": 580 }, { "epoch": 0.08, "learning_rate": 1.987605791012245e-05, "loss": 0.9871, "step": 581 }, { "epoch": 0.08, "learning_rate": 1.9875367903121022e-05, "loss": 1.0062, "step": 582 }, { "epoch": 0.08, "learning_rate": 1.987467599279719e-05, "loss": 0.9993, "step": 583 }, { "epoch": 0.08, "learning_rate": 1.9873982179284316e-05, "loss": 0.9668, "step": 584 }, { "epoch": 0.08, "learning_rate": 1.9873286462716118e-05, "loss": 1.0311, "step": 585 }, { "epoch": 0.08, "learning_rate": 1.9872588843226687e-05, "loss": 1.1064, "step": 586 }, { "epoch": 0.08, "learning_rate": 1.9871889320950476e-05, "loss": 1.0561, "step": 587 }, { "epoch": 0.08, "learning_rate": 1.9871187896022305e-05, "loss": 1.0216, "step": 588 }, { "epoch": 0.08, "learning_rate": 1.987048456857737e-05, "loss": 1.0758, "step": 589 }, { "epoch": 0.08, "learning_rate": 1.9869779338751217e-05, "loss": 1.047, "step": 590 }, { "epoch": 0.08, "learning_rate": 1.986907220667978e-05, "loss": 1.0219, "step": 591 }, { "epoch": 0.08, "learning_rate": 1.9868363172499334e-05, "loss": 1.0161, "step": 592 }, { "epoch": 0.08, "learning_rate": 1.986765223634654e-05, "loss": 0.9924, "step": 593 }, { "epoch": 0.08, "learning_rate": 1.986693939835842e-05, "loss": 0.9505, "step": 594 }, { "epoch": 0.08, "learning_rate": 1.9866224658672365e-05, "loss": 1.0483, "step": 595 }, { "epoch": 0.08, "learning_rate": 1.9865508017426127e-05, "loss": 0.9867, "step": 596 }, { "epoch": 0.08, "learning_rate": 1.986478947475783e-05, "loss": 1.0093, "step": 597 }, { "epoch": 0.08, "learning_rate": 1.9864069030805955e-05, "loss": 1.0033, "step": 598 }, { "epoch": 0.08, "learning_rate": 1.9863346685709365e-05, "loss": 0.9763, "step": 599 }, { "epoch": 0.08, "learning_rate": 1.9862622439607276e-05, "loss": 1.0486, "step": 600 }, { "epoch": 0.08, "learning_rate": 1.9861896292639274e-05, "loss": 1.0205, "step": 601 }, { "epoch": 0.08, "learning_rate": 1.9861168244945314e-05, "loss": 0.9948, "step": 602 }, { "epoch": 0.08, "learning_rate": 1.986043829666572e-05, "loss": 1.0173, "step": 603 }, { "epoch": 0.08, "learning_rate": 1.985970644794117e-05, "loss": 0.9682, "step": 604 }, { "epoch": 0.08, "learning_rate": 1.985897269891272e-05, "loss": 1.0461, "step": 605 }, { "epoch": 0.08, "learning_rate": 1.9858237049721793e-05, "loss": 1.0928, "step": 606 }, { "epoch": 0.08, "learning_rate": 1.9857499500510167e-05, "loss": 1.0253, "step": 607 }, { "epoch": 0.08, "learning_rate": 1.9856760051419996e-05, "loss": 1.1206, "step": 608 }, { "epoch": 0.08, "learning_rate": 1.98560187025938e-05, "loss": 1.0481, "step": 609 }, { "epoch": 0.08, "learning_rate": 1.985527545417446e-05, "loss": 1.0026, "step": 610 }, { "epoch": 0.08, "learning_rate": 1.985453030630522e-05, "loss": 1.0065, "step": 611 }, { "epoch": 0.08, "learning_rate": 1.9853783259129703e-05, "loss": 1.017, "step": 612 }, { "epoch": 0.08, "learning_rate": 1.985303431279189e-05, "loss": 1.0411, "step": 613 }, { "epoch": 0.08, "learning_rate": 1.9852283467436124e-05, "loss": 0.9975, "step": 614 }, { "epoch": 0.08, "learning_rate": 1.9851530723207125e-05, "loss": 1.0796, "step": 615 }, { "epoch": 0.08, "learning_rate": 1.9850776080249966e-05, "loss": 1.0502, "step": 616 }, { "epoch": 0.08, "learning_rate": 1.9850019538710098e-05, "loss": 1.0712, "step": 617 }, { "epoch": 0.08, "learning_rate": 1.984926109873333e-05, "loss": 1.072, "step": 618 }, { "epoch": 0.08, "learning_rate": 1.984850076046584e-05, "loss": 1.0177, "step": 619 }, { "epoch": 0.08, "learning_rate": 1.9847738524054172e-05, "loss": 1.0854, "step": 620 }, { "epoch": 0.08, "learning_rate": 1.9846974389645232e-05, "loss": 1.046, "step": 621 }, { "epoch": 0.08, "learning_rate": 1.98462083573863e-05, "loss": 1.0023, "step": 622 }, { "epoch": 0.08, "learning_rate": 1.984544042742501e-05, "loss": 1.0231, "step": 623 }, { "epoch": 0.08, "learning_rate": 1.9844670599909375e-05, "loss": 1.0495, "step": 624 }, { "epoch": 0.08, "learning_rate": 1.9843898874987765e-05, "loss": 1.0442, "step": 625 }, { "epoch": 0.08, "learning_rate": 1.9843125252808914e-05, "loss": 1.0509, "step": 626 }, { "epoch": 0.08, "learning_rate": 1.9842349733521932e-05, "loss": 0.971, "step": 627 }, { "epoch": 0.09, "learning_rate": 1.9841572317276285e-05, "loss": 1.0053, "step": 628 }, { "epoch": 0.09, "learning_rate": 1.984079300422181e-05, "loss": 1.0473, "step": 629 }, { "epoch": 0.09, "learning_rate": 1.9840011794508702e-05, "loss": 1.0652, "step": 630 }, { "epoch": 0.09, "learning_rate": 1.983922868828753e-05, "loss": 1.0565, "step": 631 }, { "epoch": 0.09, "learning_rate": 1.9838443685709228e-05, "loss": 0.998, "step": 632 }, { "epoch": 0.09, "learning_rate": 1.983765678692509e-05, "loss": 0.9352, "step": 633 }, { "epoch": 0.09, "learning_rate": 1.9836867992086777e-05, "loss": 1.0686, "step": 634 }, { "epoch": 0.09, "learning_rate": 1.983607730134632e-05, "loss": 0.9756, "step": 635 }, { "epoch": 0.09, "learning_rate": 1.9835284714856115e-05, "loss": 0.9938, "step": 636 }, { "epoch": 0.09, "learning_rate": 1.983449023276891e-05, "loss": 1.0205, "step": 637 }, { "epoch": 0.09, "learning_rate": 1.983369385523784e-05, "loss": 1.0221, "step": 638 }, { "epoch": 0.09, "learning_rate": 1.983289558241639e-05, "loss": 1.0769, "step": 639 }, { "epoch": 0.09, "learning_rate": 1.9832095414458414e-05, "loss": 1.0569, "step": 640 }, { "epoch": 0.09, "learning_rate": 1.9831293351518136e-05, "loss": 1.0112, "step": 641 }, { "epoch": 0.09, "learning_rate": 1.9830489393750132e-05, "loss": 1.0663, "step": 642 }, { "epoch": 0.09, "learning_rate": 1.982968354130936e-05, "loss": 0.9443, "step": 643 }, { "epoch": 0.09, "learning_rate": 1.982887579435113e-05, "loss": 1.0057, "step": 644 }, { "epoch": 0.09, "learning_rate": 1.9828066153031133e-05, "loss": 0.9894, "step": 645 }, { "epoch": 0.09, "learning_rate": 1.98272546175054e-05, "loss": 1.0022, "step": 646 }, { "epoch": 0.09, "learning_rate": 1.9826441187930356e-05, "loss": 0.9714, "step": 647 }, { "epoch": 0.09, "learning_rate": 1.982562586446276e-05, "loss": 1.0504, "step": 648 }, { "epoch": 0.09, "learning_rate": 1.9824808647259775e-05, "loss": 0.9576, "step": 649 }, { "epoch": 0.09, "learning_rate": 1.9823989536478887e-05, "loss": 1.0795, "step": 650 }, { "epoch": 0.09, "learning_rate": 1.982316853227798e-05, "loss": 1.0202, "step": 651 }, { "epoch": 0.09, "learning_rate": 1.9822345634815278e-05, "loss": 1.0098, "step": 652 }, { "epoch": 0.09, "learning_rate": 1.9821520844249388e-05, "loss": 1.0487, "step": 653 }, { "epoch": 0.09, "learning_rate": 1.982069416073928e-05, "loss": 1.0568, "step": 654 }, { "epoch": 0.09, "learning_rate": 1.9819865584444274e-05, "loss": 1.0102, "step": 655 }, { "epoch": 0.09, "learning_rate": 1.9819035115524076e-05, "loss": 1.0302, "step": 656 }, { "epoch": 0.09, "learning_rate": 1.9818202754138737e-05, "loss": 0.9647, "step": 657 }, { "epoch": 0.09, "learning_rate": 1.9817368500448685e-05, "loss": 1.0891, "step": 658 }, { "epoch": 0.09, "learning_rate": 1.981653235461471e-05, "loss": 0.9806, "step": 659 }, { "epoch": 0.09, "learning_rate": 1.9815694316797967e-05, "loss": 1.0686, "step": 660 }, { "epoch": 0.09, "learning_rate": 1.9814854387159973e-05, "loss": 1.0099, "step": 661 }, { "epoch": 0.09, "learning_rate": 1.9814012565862607e-05, "loss": 1.0246, "step": 662 }, { "epoch": 0.09, "learning_rate": 1.9813168853068126e-05, "loss": 1.0595, "step": 663 }, { "epoch": 0.09, "learning_rate": 1.9812323248939134e-05, "loss": 0.9833, "step": 664 }, { "epoch": 0.09, "learning_rate": 1.981147575363861e-05, "loss": 1.0266, "step": 665 }, { "epoch": 0.09, "learning_rate": 1.9810626367329903e-05, "loss": 1.088, "step": 666 }, { "epoch": 0.09, "learning_rate": 1.980977509017671e-05, "loss": 0.9822, "step": 667 }, { "epoch": 0.09, "learning_rate": 1.9808921922343104e-05, "loss": 1.0356, "step": 668 }, { "epoch": 0.09, "learning_rate": 1.980806686399352e-05, "loss": 1.0243, "step": 669 }, { "epoch": 0.09, "learning_rate": 1.9807209915292754e-05, "loss": 1.0777, "step": 670 }, { "epoch": 0.09, "learning_rate": 1.980635107640598e-05, "loss": 1.0619, "step": 671 }, { "epoch": 0.09, "learning_rate": 1.980549034749871e-05, "loss": 1.0326, "step": 672 }, { "epoch": 0.09, "learning_rate": 1.9804627728736848e-05, "loss": 1.0236, "step": 673 }, { "epoch": 0.09, "learning_rate": 1.9803763220286646e-05, "loss": 1.0152, "step": 674 }, { "epoch": 0.09, "learning_rate": 1.9802896822314726e-05, "loss": 0.9642, "step": 675 }, { "epoch": 0.09, "learning_rate": 1.980202853498807e-05, "loss": 1.0251, "step": 676 }, { "epoch": 0.09, "learning_rate": 1.9801158358474028e-05, "loss": 1.0874, "step": 677 }, { "epoch": 0.09, "learning_rate": 1.9800286292940313e-05, "loss": 0.9847, "step": 678 }, { "epoch": 0.09, "learning_rate": 1.9799412338555005e-05, "loss": 1.1238, "step": 679 }, { "epoch": 0.09, "learning_rate": 1.979853649548654e-05, "loss": 1.0094, "step": 680 }, { "epoch": 0.09, "learning_rate": 1.9797658763903725e-05, "loss": 1.0134, "step": 681 }, { "epoch": 0.09, "learning_rate": 1.9796779143975732e-05, "loss": 0.9804, "step": 682 }, { "epoch": 0.09, "learning_rate": 1.9795897635872085e-05, "loss": 0.9413, "step": 683 }, { "epoch": 0.09, "learning_rate": 1.9795014239762692e-05, "loss": 1.038, "step": 684 }, { "epoch": 0.09, "learning_rate": 1.9794128955817806e-05, "loss": 1.0621, "step": 685 }, { "epoch": 0.09, "learning_rate": 1.9793241784208054e-05, "loss": 0.9563, "step": 686 }, { "epoch": 0.09, "learning_rate": 1.979235272510443e-05, "loss": 1.0274, "step": 687 }, { "epoch": 0.09, "learning_rate": 1.9791461778678278e-05, "loss": 1.0233, "step": 688 }, { "epoch": 0.09, "learning_rate": 1.9790568945101313e-05, "loss": 1.0689, "step": 689 }, { "epoch": 0.09, "learning_rate": 1.9789674224545626e-05, "loss": 1.017, "step": 690 }, { "epoch": 0.09, "learning_rate": 1.978877761718365e-05, "loss": 0.9966, "step": 691 }, { "epoch": 0.09, "learning_rate": 1.9787879123188193e-05, "loss": 1.044, "step": 692 }, { "epoch": 0.09, "learning_rate": 1.978697874273243e-05, "loss": 1.0101, "step": 693 }, { "epoch": 0.09, "learning_rate": 1.978607647598989e-05, "loss": 0.9678, "step": 694 }, { "epoch": 0.09, "learning_rate": 1.9785172323134475e-05, "loss": 1.0326, "step": 695 }, { "epoch": 0.09, "learning_rate": 1.9784266284340446e-05, "loss": 0.9287, "step": 696 }, { "epoch": 0.09, "learning_rate": 1.9783358359782424e-05, "loss": 1.0335, "step": 697 }, { "epoch": 0.09, "learning_rate": 1.9782448549635404e-05, "loss": 1.0173, "step": 698 }, { "epoch": 0.09, "learning_rate": 1.978153685407473e-05, "loss": 1.026, "step": 699 }, { "epoch": 0.09, "learning_rate": 1.9780623273276123e-05, "loss": 1.0467, "step": 700 }, { "epoch": 0.1, "learning_rate": 1.9779707807415657e-05, "loss": 1.0441, "step": 701 }, { "epoch": 0.1, "learning_rate": 1.9778790456669777e-05, "loss": 1.0636, "step": 702 }, { "epoch": 0.1, "learning_rate": 1.977787122121529e-05, "loss": 0.994, "step": 703 }, { "epoch": 0.1, "learning_rate": 1.977695010122936e-05, "loss": 0.9929, "step": 704 }, { "epoch": 0.1, "learning_rate": 1.9776027096889513e-05, "loss": 1.0315, "step": 705 }, { "epoch": 0.1, "learning_rate": 1.9775102208373654e-05, "loss": 1.0661, "step": 706 }, { "epoch": 0.1, "learning_rate": 1.9774175435860037e-05, "loss": 1.106, "step": 707 }, { "epoch": 0.1, "learning_rate": 1.9773246779527282e-05, "loss": 1.0041, "step": 708 }, { "epoch": 0.1, "learning_rate": 1.9772316239554376e-05, "loss": 0.995, "step": 709 }, { "epoch": 0.1, "learning_rate": 1.9771383816120658e-05, "loss": 0.9648, "step": 710 }, { "epoch": 0.1, "learning_rate": 1.977044950940585e-05, "loss": 0.991, "step": 711 }, { "epoch": 0.1, "learning_rate": 1.9769513319590013e-05, "loss": 1.0679, "step": 712 }, { "epoch": 0.1, "learning_rate": 1.976857524685359e-05, "loss": 1.0707, "step": 713 }, { "epoch": 0.1, "learning_rate": 1.976763529137738e-05, "loss": 1.0059, "step": 714 }, { "epoch": 0.1, "learning_rate": 1.9766693453342546e-05, "loss": 0.9914, "step": 715 }, { "epoch": 0.1, "learning_rate": 1.9765749732930603e-05, "loss": 1.0394, "step": 716 }, { "epoch": 0.1, "learning_rate": 1.976480413032345e-05, "loss": 1.0026, "step": 717 }, { "epoch": 0.1, "learning_rate": 1.976385664570333e-05, "loss": 0.9808, "step": 718 }, { "epoch": 0.1, "learning_rate": 1.9762907279252857e-05, "loss": 1.0056, "step": 719 }, { "epoch": 0.1, "learning_rate": 1.9761956031155008e-05, "loss": 1.0792, "step": 720 }, { "epoch": 0.1, "learning_rate": 1.976100290159312e-05, "loss": 1.0365, "step": 721 }, { "epoch": 0.1, "learning_rate": 1.9760047890750895e-05, "loss": 1.0334, "step": 722 }, { "epoch": 0.1, "learning_rate": 1.9759090998812393e-05, "loss": 0.9765, "step": 723 }, { "epoch": 0.1, "learning_rate": 1.9758132225962045e-05, "loss": 1.0733, "step": 724 }, { "epoch": 0.1, "learning_rate": 1.9757171572384637e-05, "loss": 0.9678, "step": 725 }, { "epoch": 0.1, "learning_rate": 1.9756209038265317e-05, "loss": 0.9946, "step": 726 }, { "epoch": 0.1, "learning_rate": 1.9755244623789605e-05, "loss": 0.9709, "step": 727 }, { "epoch": 0.1, "learning_rate": 1.975427832914337e-05, "loss": 1.0217, "step": 728 }, { "epoch": 0.1, "learning_rate": 1.9753310154512853e-05, "loss": 1.0218, "step": 729 }, { "epoch": 0.1, "learning_rate": 1.9752340100084658e-05, "loss": 1.0599, "step": 730 }, { "epoch": 0.1, "learning_rate": 1.9751368166045743e-05, "loss": 1.0408, "step": 731 }, { "epoch": 0.1, "learning_rate": 1.9750394352583434e-05, "loss": 1.0457, "step": 732 }, { "epoch": 0.1, "learning_rate": 1.974941865988542e-05, "loss": 0.9906, "step": 733 }, { "epoch": 0.1, "learning_rate": 1.9748441088139746e-05, "loss": 1.025, "step": 734 }, { "epoch": 0.1, "learning_rate": 1.9747461637534832e-05, "loss": 0.9842, "step": 735 }, { "epoch": 0.1, "learning_rate": 1.974648030825944e-05, "loss": 1.0809, "step": 736 }, { "epoch": 0.1, "learning_rate": 1.9745497100502717e-05, "loss": 1.0247, "step": 737 }, { "epoch": 0.1, "learning_rate": 1.9744512014454153e-05, "loss": 0.995, "step": 738 }, { "epoch": 0.1, "learning_rate": 1.9743525050303613e-05, "loss": 0.9559, "step": 739 }, { "epoch": 0.1, "learning_rate": 1.974253620824132e-05, "loss": 1.0292, "step": 740 }, { "epoch": 0.1, "learning_rate": 1.9741545488457853e-05, "loss": 1.0276, "step": 741 }, { "epoch": 0.1, "learning_rate": 1.9740552891144157e-05, "loss": 1.0143, "step": 742 }, { "epoch": 0.1, "learning_rate": 1.9739558416491547e-05, "loss": 1.0432, "step": 743 }, { "epoch": 0.1, "learning_rate": 1.973856206469168e-05, "loss": 0.9871, "step": 744 }, { "epoch": 0.1, "learning_rate": 1.9737563835936603e-05, "loss": 1.0187, "step": 745 }, { "epoch": 0.1, "learning_rate": 1.9736563730418695e-05, "loss": 0.9981, "step": 746 }, { "epoch": 0.1, "learning_rate": 1.973556174833072e-05, "loss": 0.9558, "step": 747 }, { "epoch": 0.1, "learning_rate": 1.9734557889865792e-05, "loss": 1.0552, "step": 748 }, { "epoch": 0.1, "learning_rate": 1.9733552155217384e-05, "loss": 1.0168, "step": 749 }, { "epoch": 0.1, "learning_rate": 1.973254454457934e-05, "loss": 0.978, "step": 750 }, { "epoch": 0.1, "learning_rate": 1.9731535058145862e-05, "loss": 1.011, "step": 751 }, { "epoch": 0.1, "learning_rate": 1.973052369611151e-05, "loss": 1.0283, "step": 752 }, { "epoch": 0.1, "learning_rate": 1.972951045867121e-05, "loss": 1.0512, "step": 753 }, { "epoch": 0.1, "learning_rate": 1.9728495346020246e-05, "loss": 0.9883, "step": 754 }, { "epoch": 0.1, "learning_rate": 1.972747835835427e-05, "loss": 0.9542, "step": 755 }, { "epoch": 0.1, "learning_rate": 1.9726459495869282e-05, "loss": 0.9886, "step": 756 }, { "epoch": 0.1, "learning_rate": 1.9725438758761658e-05, "loss": 0.9808, "step": 757 }, { "epoch": 0.1, "learning_rate": 1.9724416147228127e-05, "loss": 0.9787, "step": 758 }, { "epoch": 0.1, "learning_rate": 1.972339166146578e-05, "loss": 1.0407, "step": 759 }, { "epoch": 0.1, "learning_rate": 1.9722365301672072e-05, "loss": 1.1015, "step": 760 }, { "epoch": 0.1, "learning_rate": 1.972133706804482e-05, "loss": 0.9961, "step": 761 }, { "epoch": 0.1, "learning_rate": 1.97203069607822e-05, "loss": 1.0187, "step": 762 }, { "epoch": 0.1, "learning_rate": 1.9719274980082746e-05, "loss": 1.0619, "step": 763 }, { "epoch": 0.1, "learning_rate": 1.9718241126145353e-05, "loss": 0.9583, "step": 764 }, { "epoch": 0.1, "learning_rate": 1.971720539916929e-05, "loss": 1.0329, "step": 765 }, { "epoch": 0.1, "learning_rate": 1.971616779935417e-05, "loss": 1.0152, "step": 766 }, { "epoch": 0.1, "learning_rate": 1.9715128326899972e-05, "loss": 1.0031, "step": 767 }, { "epoch": 0.1, "learning_rate": 1.9714086982007044e-05, "loss": 0.9931, "step": 768 }, { "epoch": 0.1, "learning_rate": 1.9713043764876088e-05, "loss": 0.9789, "step": 769 }, { "epoch": 0.1, "learning_rate": 1.9711998675708162e-05, "loss": 1.0244, "step": 770 }, { "epoch": 0.1, "learning_rate": 1.9710951714704697e-05, "loss": 1.0143, "step": 771 }, { "epoch": 0.1, "learning_rate": 1.9709902882067475e-05, "loss": 0.9758, "step": 772 }, { "epoch": 0.1, "learning_rate": 1.9708852177998647e-05, "loss": 1.0384, "step": 773 }, { "epoch": 0.1, "learning_rate": 1.9707799602700712e-05, "loss": 0.9464, "step": 774 }, { "epoch": 0.11, "learning_rate": 1.9706745156376545e-05, "loss": 1.0169, "step": 775 }, { "epoch": 0.11, "learning_rate": 1.9705688839229365e-05, "loss": 1.0296, "step": 776 }, { "epoch": 0.11, "learning_rate": 1.9704630651462767e-05, "loss": 1.0232, "step": 777 }, { "epoch": 0.11, "learning_rate": 1.97035705932807e-05, "loss": 1.0229, "step": 778 }, { "epoch": 0.11, "learning_rate": 1.9702508664887475e-05, "loss": 1.0301, "step": 779 }, { "epoch": 0.11, "learning_rate": 1.9701444866487757e-05, "loss": 0.9776, "step": 780 }, { "epoch": 0.11, "learning_rate": 1.970037919828658e-05, "loss": 0.982, "step": 781 }, { "epoch": 0.11, "learning_rate": 1.9699311660489333e-05, "loss": 1.014, "step": 782 }, { "epoch": 0.11, "learning_rate": 1.969824225330177e-05, "loss": 1.0543, "step": 783 }, { "epoch": 0.11, "learning_rate": 1.9697170976929996e-05, "loss": 1.0018, "step": 784 }, { "epoch": 0.11, "learning_rate": 1.9696097831580492e-05, "loss": 1.0359, "step": 785 }, { "epoch": 0.11, "learning_rate": 1.9695022817460083e-05, "loss": 0.9635, "step": 786 }, { "epoch": 0.11, "learning_rate": 1.9693945934775966e-05, "loss": 0.9907, "step": 787 }, { "epoch": 0.11, "learning_rate": 1.969286718373569e-05, "loss": 1.0542, "step": 788 }, { "epoch": 0.11, "learning_rate": 1.9691786564547163e-05, "loss": 1.049, "step": 789 }, { "epoch": 0.11, "learning_rate": 1.969070407741867e-05, "loss": 1.0037, "step": 790 }, { "epoch": 0.11, "learning_rate": 1.968961972255883e-05, "loss": 0.9895, "step": 791 }, { "epoch": 0.11, "learning_rate": 1.9688533500176645e-05, "loss": 1.0477, "step": 792 }, { "epoch": 0.11, "learning_rate": 1.968744541048146e-05, "loss": 0.9774, "step": 793 }, { "epoch": 0.11, "learning_rate": 1.9686355453682995e-05, "loss": 1.0408, "step": 794 }, { "epoch": 0.11, "learning_rate": 1.9685263629991313e-05, "loss": 0.9522, "step": 795 }, { "epoch": 0.11, "learning_rate": 1.9684169939616856e-05, "loss": 0.9704, "step": 796 }, { "epoch": 0.11, "learning_rate": 1.9683074382770408e-05, "loss": 1.0662, "step": 797 }, { "epoch": 0.11, "learning_rate": 1.968197695966312e-05, "loss": 0.9919, "step": 798 }, { "epoch": 0.11, "learning_rate": 1.9680877670506507e-05, "loss": 1.0196, "step": 799 }, { "epoch": 0.11, "learning_rate": 1.9679776515512443e-05, "loss": 0.9455, "step": 800 }, { "epoch": 0.11, "learning_rate": 1.9678673494893153e-05, "loss": 0.9905, "step": 801 }, { "epoch": 0.11, "learning_rate": 1.9677568608861227e-05, "loss": 0.9367, "step": 802 }, { "epoch": 0.11, "learning_rate": 1.9676461857629614e-05, "loss": 1.0192, "step": 803 }, { "epoch": 0.11, "learning_rate": 1.9675353241411626e-05, "loss": 1.0047, "step": 804 }, { "epoch": 0.11, "learning_rate": 1.967424276042093e-05, "loss": 1.0346, "step": 805 }, { "epoch": 0.11, "learning_rate": 1.9673130414871556e-05, "loss": 1.0052, "step": 806 }, { "epoch": 0.11, "learning_rate": 1.9672016204977885e-05, "loss": 1.0397, "step": 807 }, { "epoch": 0.11, "learning_rate": 1.967090013095467e-05, "loss": 0.9582, "step": 808 }, { "epoch": 0.11, "learning_rate": 1.966978219301701e-05, "loss": 1.0582, "step": 809 }, { "epoch": 0.11, "learning_rate": 1.966866239138038e-05, "loss": 1.053, "step": 810 }, { "epoch": 0.11, "learning_rate": 1.9667540726260595e-05, "loss": 0.9584, "step": 811 }, { "epoch": 0.11, "learning_rate": 1.966641719787384e-05, "loss": 0.9767, "step": 812 }, { "epoch": 0.11, "learning_rate": 1.9665291806436662e-05, "loss": 0.9639, "step": 813 }, { "epoch": 0.11, "learning_rate": 1.9664164552165957e-05, "loss": 1.0833, "step": 814 }, { "epoch": 0.11, "learning_rate": 1.9663035435278994e-05, "loss": 0.9519, "step": 815 }, { "epoch": 0.11, "learning_rate": 1.966190445599338e-05, "loss": 0.9764, "step": 816 }, { "epoch": 0.11, "learning_rate": 1.9660771614527107e-05, "loss": 1.0509, "step": 817 }, { "epoch": 0.11, "learning_rate": 1.9659636911098504e-05, "loss": 1.023, "step": 818 }, { "epoch": 0.11, "learning_rate": 1.965850034592627e-05, "loss": 1.0515, "step": 819 }, { "epoch": 0.11, "learning_rate": 1.9657361919229454e-05, "loss": 1.0602, "step": 820 }, { "epoch": 0.11, "learning_rate": 1.9656221631227483e-05, "loss": 0.9591, "step": 821 }, { "epoch": 0.11, "learning_rate": 1.9655079482140115e-05, "loss": 1.0136, "step": 822 }, { "epoch": 0.11, "learning_rate": 1.9653935472187492e-05, "loss": 0.9635, "step": 823 }, { "epoch": 0.11, "learning_rate": 1.96527896015901e-05, "loss": 0.9748, "step": 824 }, { "epoch": 0.11, "learning_rate": 1.9651641870568787e-05, "loss": 0.9757, "step": 825 }, { "epoch": 0.11, "learning_rate": 1.965049227934476e-05, "loss": 1.0139, "step": 826 }, { "epoch": 0.11, "learning_rate": 1.964934082813959e-05, "loss": 1.0044, "step": 827 }, { "epoch": 0.11, "learning_rate": 1.964818751717519e-05, "loss": 1.029, "step": 828 }, { "epoch": 0.11, "learning_rate": 1.964703234667386e-05, "loss": 1.0532, "step": 829 }, { "epoch": 0.11, "learning_rate": 1.964587531685822e-05, "loss": 0.9728, "step": 830 }, { "epoch": 0.11, "learning_rate": 1.9644716427951286e-05, "loss": 0.9997, "step": 831 }, { "epoch": 0.11, "learning_rate": 1.9643555680176408e-05, "loss": 0.9067, "step": 832 }, { "epoch": 0.11, "learning_rate": 1.9642393073757302e-05, "loss": 1.0183, "step": 833 }, { "epoch": 0.11, "learning_rate": 1.9641228608918044e-05, "loss": 0.9482, "step": 834 }, { "epoch": 0.11, "learning_rate": 1.9640062285883067e-05, "loss": 0.9734, "step": 835 }, { "epoch": 0.11, "learning_rate": 1.963889410487716e-05, "loss": 1.0124, "step": 836 }, { "epoch": 0.11, "learning_rate": 1.9637724066125473e-05, "loss": 0.9575, "step": 837 }, { "epoch": 0.11, "learning_rate": 1.9636552169853514e-05, "loss": 1.0321, "step": 838 }, { "epoch": 0.11, "learning_rate": 1.963537841628714e-05, "loss": 0.9496, "step": 839 }, { "epoch": 0.11, "learning_rate": 1.9634202805652584e-05, "loss": 1.0202, "step": 840 }, { "epoch": 0.11, "learning_rate": 1.963302533817642e-05, "loss": 1.0048, "step": 841 }, { "epoch": 0.11, "learning_rate": 1.9631846014085585e-05, "loss": 0.9997, "step": 842 }, { "epoch": 0.11, "learning_rate": 1.9630664833607377e-05, "loss": 0.9775, "step": 843 }, { "epoch": 0.11, "learning_rate": 1.9629481796969455e-05, "loss": 0.9698, "step": 844 }, { "epoch": 0.11, "learning_rate": 1.9628296904399828e-05, "loss": 0.9839, "step": 845 }, { "epoch": 0.11, "learning_rate": 1.9627110156126862e-05, "loss": 1.0282, "step": 846 }, { "epoch": 0.11, "learning_rate": 1.9625921552379288e-05, "loss": 1.0375, "step": 847 }, { "epoch": 0.11, "learning_rate": 1.962473109338619e-05, "loss": 0.9793, "step": 848 }, { "epoch": 0.12, "learning_rate": 1.9623538779377007e-05, "loss": 1.0907, "step": 849 }, { "epoch": 0.12, "learning_rate": 1.9622344610581542e-05, "loss": 1.017, "step": 850 }, { "epoch": 0.12, "learning_rate": 1.9621148587229954e-05, "loss": 1.068, "step": 851 }, { "epoch": 0.12, "learning_rate": 1.961995070955275e-05, "loss": 1.0127, "step": 852 }, { "epoch": 0.12, "learning_rate": 1.9618750977780813e-05, "loss": 1.0208, "step": 853 }, { "epoch": 0.12, "learning_rate": 1.9617549392145365e-05, "loss": 1.0164, "step": 854 }, { "epoch": 0.12, "learning_rate": 1.9616345952877998e-05, "loss": 1.0445, "step": 855 }, { "epoch": 0.12, "learning_rate": 1.961514066021065e-05, "loss": 0.9747, "step": 856 }, { "epoch": 0.12, "learning_rate": 1.9613933514375623e-05, "loss": 0.9976, "step": 857 }, { "epoch": 0.12, "learning_rate": 1.9612724515605582e-05, "loss": 1.0079, "step": 858 }, { "epoch": 0.12, "learning_rate": 1.9611513664133535e-05, "loss": 1.0388, "step": 859 }, { "epoch": 0.12, "learning_rate": 1.9610300960192864e-05, "loss": 0.9468, "step": 860 }, { "epoch": 0.12, "learning_rate": 1.9609086404017287e-05, "loss": 1.0116, "step": 861 }, { "epoch": 0.12, "learning_rate": 1.96078699958409e-05, "loss": 1.0162, "step": 862 }, { "epoch": 0.12, "learning_rate": 1.9606651735898138e-05, "loss": 0.9958, "step": 863 }, { "epoch": 0.12, "learning_rate": 1.960543162442381e-05, "loss": 0.9455, "step": 864 }, { "epoch": 0.12, "learning_rate": 1.9604209661653067e-05, "loss": 0.9912, "step": 865 }, { "epoch": 0.12, "learning_rate": 1.960298584782143e-05, "loss": 0.9524, "step": 866 }, { "epoch": 0.12, "learning_rate": 1.9601760183164762e-05, "loss": 0.9332, "step": 867 }, { "epoch": 0.12, "learning_rate": 1.96005326679193e-05, "loss": 0.982, "step": 868 }, { "epoch": 0.12, "learning_rate": 1.9599303302321616e-05, "loss": 0.9994, "step": 869 }, { "epoch": 0.12, "learning_rate": 1.9598072086608663e-05, "loss": 1.0111, "step": 870 }, { "epoch": 0.12, "learning_rate": 1.9596839021017732e-05, "loss": 1.0083, "step": 871 }, { "epoch": 0.12, "learning_rate": 1.9595604105786477e-05, "loss": 0.98, "step": 872 }, { "epoch": 0.12, "learning_rate": 1.959436734115291e-05, "loss": 1.0426, "step": 873 }, { "epoch": 0.12, "learning_rate": 1.9593128727355398e-05, "loss": 0.9954, "step": 874 }, { "epoch": 0.12, "learning_rate": 1.9591888264632664e-05, "loss": 0.9766, "step": 875 }, { "epoch": 0.12, "learning_rate": 1.9590645953223792e-05, "loss": 0.9546, "step": 876 }, { "epoch": 0.12, "learning_rate": 1.958940179336821e-05, "loss": 1.0263, "step": 877 }, { "epoch": 0.12, "learning_rate": 1.958815578530572e-05, "loss": 0.9732, "step": 878 }, { "epoch": 0.12, "learning_rate": 1.9586907929276458e-05, "loss": 1.0219, "step": 879 }, { "epoch": 0.12, "learning_rate": 1.958565822552094e-05, "loss": 0.998, "step": 880 }, { "epoch": 0.12, "learning_rate": 1.958440667428002e-05, "loss": 1.0008, "step": 881 }, { "epoch": 0.12, "learning_rate": 1.958315327579492e-05, "loss": 1.0206, "step": 882 }, { "epoch": 0.12, "learning_rate": 1.958189803030721e-05, "loss": 1.0225, "step": 883 }, { "epoch": 0.12, "learning_rate": 1.9580640938058817e-05, "loss": 0.9797, "step": 884 }, { "epoch": 0.12, "learning_rate": 1.957938199929203e-05, "loss": 0.9482, "step": 885 }, { "epoch": 0.12, "learning_rate": 1.9578121214249485e-05, "loss": 0.9692, "step": 886 }, { "epoch": 0.12, "learning_rate": 1.9576858583174185e-05, "loss": 0.988, "step": 887 }, { "epoch": 0.12, "learning_rate": 1.957559410630948e-05, "loss": 0.9533, "step": 888 }, { "epoch": 0.12, "learning_rate": 1.9574327783899073e-05, "loss": 0.9989, "step": 889 }, { "epoch": 0.12, "learning_rate": 1.9573059616187035e-05, "loss": 0.9835, "step": 890 }, { "epoch": 0.12, "learning_rate": 1.9571789603417775e-05, "loss": 1.0132, "step": 891 }, { "epoch": 0.12, "learning_rate": 1.9570517745836083e-05, "loss": 0.9976, "step": 892 }, { "epoch": 0.12, "learning_rate": 1.9569244043687075e-05, "loss": 1.0702, "step": 893 }, { "epoch": 0.12, "learning_rate": 1.956796849721625e-05, "loss": 1.0401, "step": 894 }, { "epoch": 0.12, "learning_rate": 1.956669110666944e-05, "loss": 0.9423, "step": 895 }, { "epoch": 0.12, "learning_rate": 1.9565411872292846e-05, "loss": 1.0266, "step": 896 }, { "epoch": 0.12, "learning_rate": 1.9564130794333024e-05, "loss": 0.9968, "step": 897 }, { "epoch": 0.12, "learning_rate": 1.956284787303687e-05, "loss": 1.015, "step": 898 }, { "epoch": 0.12, "learning_rate": 1.956156310865166e-05, "loss": 0.9663, "step": 899 }, { "epoch": 0.12, "learning_rate": 1.9560276501425003e-05, "loss": 0.9042, "step": 900 }, { "epoch": 0.12, "learning_rate": 1.955898805160488e-05, "loss": 1.0213, "step": 901 }, { "epoch": 0.12, "learning_rate": 1.9557697759439613e-05, "loss": 0.9666, "step": 902 }, { "epoch": 0.12, "learning_rate": 1.9556405625177886e-05, "loss": 0.9583, "step": 903 }, { "epoch": 0.12, "learning_rate": 1.9555111649068746e-05, "loss": 1.0285, "step": 904 }, { "epoch": 0.12, "learning_rate": 1.9553815831361577e-05, "loss": 0.9814, "step": 905 }, { "epoch": 0.12, "learning_rate": 1.955251817230613e-05, "loss": 1.0216, "step": 906 }, { "epoch": 0.12, "learning_rate": 1.955121867215251e-05, "loss": 0.9773, "step": 907 }, { "epoch": 0.12, "learning_rate": 1.9549917331151177e-05, "loss": 0.9833, "step": 908 }, { "epoch": 0.12, "learning_rate": 1.954861414955294e-05, "loss": 0.9411, "step": 909 }, { "epoch": 0.12, "learning_rate": 1.954730912760897e-05, "loss": 1.0174, "step": 910 }, { "epoch": 0.12, "learning_rate": 1.9546002265570786e-05, "loss": 1.0446, "step": 911 }, { "epoch": 0.12, "learning_rate": 1.9544693563690266e-05, "loss": 0.9674, "step": 912 }, { "epoch": 0.12, "learning_rate": 1.9543383022219646e-05, "loss": 1.0111, "step": 913 }, { "epoch": 0.12, "learning_rate": 1.954207064141151e-05, "loss": 0.9865, "step": 914 }, { "epoch": 0.12, "learning_rate": 1.9540756421518798e-05, "loss": 0.9874, "step": 915 }, { "epoch": 0.12, "learning_rate": 1.9539440362794803e-05, "loss": 1.0709, "step": 916 }, { "epoch": 0.12, "learning_rate": 1.953812246549318e-05, "loss": 1.025, "step": 917 }, { "epoch": 0.12, "learning_rate": 1.9536802729867926e-05, "loss": 0.9735, "step": 918 }, { "epoch": 0.12, "learning_rate": 1.9535481156173408e-05, "loss": 0.9636, "step": 919 }, { "epoch": 0.12, "learning_rate": 1.9534157744664336e-05, "loss": 1.0226, "step": 920 }, { "epoch": 0.12, "learning_rate": 1.953283249559577e-05, "loss": 0.9824, "step": 921 }, { "epoch": 0.12, "learning_rate": 1.9531505409223143e-05, "loss": 0.9672, "step": 922 }, { "epoch": 0.13, "learning_rate": 1.9530176485802217e-05, "loss": 0.9614, "step": 923 }, { "epoch": 0.13, "learning_rate": 1.9528845725589126e-05, "loss": 0.9428, "step": 924 }, { "epoch": 0.13, "learning_rate": 1.952751312884036e-05, "loss": 1.0591, "step": 925 }, { "epoch": 0.13, "learning_rate": 1.9526178695812747e-05, "loss": 1.0158, "step": 926 }, { "epoch": 0.13, "learning_rate": 1.9524842426763484e-05, "loss": 1.0827, "step": 927 }, { "epoch": 0.13, "learning_rate": 1.9523504321950113e-05, "loss": 1.0037, "step": 928 }, { "epoch": 0.13, "learning_rate": 1.9522164381630534e-05, "loss": 1.0217, "step": 929 }, { "epoch": 0.13, "learning_rate": 1.9520822606063e-05, "loss": 1.0703, "step": 930 }, { "epoch": 0.13, "learning_rate": 1.9519478995506112e-05, "loss": 1.0513, "step": 931 }, { "epoch": 0.13, "learning_rate": 1.9518133550218836e-05, "loss": 1.0126, "step": 932 }, { "epoch": 0.13, "learning_rate": 1.9516786270460484e-05, "loss": 1.0679, "step": 933 }, { "epoch": 0.13, "learning_rate": 1.9515437156490724e-05, "loss": 0.9376, "step": 934 }, { "epoch": 0.13, "learning_rate": 1.951408620856957e-05, "loss": 0.8973, "step": 935 }, { "epoch": 0.13, "learning_rate": 1.9512733426957403e-05, "loss": 1.0188, "step": 936 }, { "epoch": 0.13, "learning_rate": 1.9511378811914952e-05, "loss": 1.0413, "step": 937 }, { "epoch": 0.13, "learning_rate": 1.951002236370329e-05, "loss": 1.0556, "step": 938 }, { "epoch": 0.13, "learning_rate": 1.950866408258386e-05, "loss": 1.0293, "step": 939 }, { "epoch": 0.13, "learning_rate": 1.9507303968818443e-05, "loss": 0.9976, "step": 940 }, { "epoch": 0.13, "learning_rate": 1.950594202266918e-05, "loss": 0.9468, "step": 941 }, { "epoch": 0.13, "learning_rate": 1.950457824439857e-05, "loss": 0.9589, "step": 942 }, { "epoch": 0.13, "learning_rate": 1.9503212634269454e-05, "loss": 0.9555, "step": 943 }, { "epoch": 0.13, "learning_rate": 1.9501845192545036e-05, "loss": 0.9949, "step": 944 }, { "epoch": 0.13, "learning_rate": 1.9500475919488866e-05, "loss": 1.0147, "step": 945 }, { "epoch": 0.13, "learning_rate": 1.949910481536485e-05, "loss": 0.9917, "step": 946 }, { "epoch": 0.13, "learning_rate": 1.9497731880437246e-05, "loss": 1.0382, "step": 947 }, { "epoch": 0.13, "learning_rate": 1.9496357114970673e-05, "loss": 0.9439, "step": 948 }, { "epoch": 0.13, "learning_rate": 1.9494980519230086e-05, "loss": 0.9883, "step": 949 }, { "epoch": 0.13, "learning_rate": 1.9493602093480807e-05, "loss": 1.0647, "step": 950 }, { "epoch": 0.13, "learning_rate": 1.9492221837988506e-05, "loss": 0.9845, "step": 951 }, { "epoch": 0.13, "learning_rate": 1.9490839753019205e-05, "loss": 0.9672, "step": 952 }, { "epoch": 0.13, "learning_rate": 1.948945583883928e-05, "loss": 0.9921, "step": 953 }, { "epoch": 0.13, "learning_rate": 1.9488070095715457e-05, "loss": 1.0105, "step": 954 }, { "epoch": 0.13, "learning_rate": 1.9486682523914816e-05, "loss": 0.9461, "step": 955 }, { "epoch": 0.13, "learning_rate": 1.948529312370479e-05, "loss": 0.9936, "step": 956 }, { "epoch": 0.13, "learning_rate": 1.948390189535317e-05, "loss": 1.0025, "step": 957 }, { "epoch": 0.13, "learning_rate": 1.9482508839128087e-05, "loss": 0.9671, "step": 958 }, { "epoch": 0.13, "learning_rate": 1.948111395529803e-05, "loss": 1.0978, "step": 959 }, { "epoch": 0.13, "learning_rate": 1.9479717244131845e-05, "loss": 1.0511, "step": 960 }, { "epoch": 0.13, "learning_rate": 1.9478318705898724e-05, "loss": 0.9543, "step": 961 }, { "epoch": 0.13, "learning_rate": 1.9476918340868212e-05, "loss": 0.9817, "step": 962 }, { "epoch": 0.13, "learning_rate": 1.9475516149310208e-05, "loss": 1.0188, "step": 963 }, { "epoch": 0.13, "learning_rate": 1.947411213149497e-05, "loss": 0.9818, "step": 964 }, { "epoch": 0.13, "learning_rate": 1.9472706287693088e-05, "loss": 1.0495, "step": 965 }, { "epoch": 0.13, "learning_rate": 1.9471298618175523e-05, "loss": 1.0227, "step": 966 }, { "epoch": 0.13, "learning_rate": 1.9469889123213585e-05, "loss": 0.9774, "step": 967 }, { "epoch": 0.13, "learning_rate": 1.9468477803078926e-05, "loss": 0.9832, "step": 968 }, { "epoch": 0.13, "learning_rate": 1.9467064658043556e-05, "loss": 0.9179, "step": 969 }, { "epoch": 0.13, "learning_rate": 1.9465649688379837e-05, "loss": 1.0202, "step": 970 }, { "epoch": 0.13, "learning_rate": 1.9464232894360483e-05, "loss": 1.0504, "step": 971 }, { "epoch": 0.13, "learning_rate": 1.946281427625856e-05, "loss": 1.0362, "step": 972 }, { "epoch": 0.13, "learning_rate": 1.9461393834347488e-05, "loss": 1.037, "step": 973 }, { "epoch": 0.13, "learning_rate": 1.9459971568901026e-05, "loss": 0.9938, "step": 974 }, { "epoch": 0.13, "learning_rate": 1.94585474801933e-05, "loss": 0.9531, "step": 975 }, { "epoch": 0.13, "learning_rate": 1.9457121568498778e-05, "loss": 1.0849, "step": 976 }, { "epoch": 0.13, "learning_rate": 1.945569383409228e-05, "loss": 1.0969, "step": 977 }, { "epoch": 0.13, "learning_rate": 1.9454264277248987e-05, "loss": 0.9963, "step": 978 }, { "epoch": 0.13, "learning_rate": 1.945283289824442e-05, "loss": 1.0253, "step": 979 }, { "epoch": 0.13, "learning_rate": 1.945139969735445e-05, "loss": 1.0206, "step": 980 }, { "epoch": 0.13, "learning_rate": 1.9449964674855306e-05, "loss": 0.978, "step": 981 }, { "epoch": 0.13, "learning_rate": 1.944852783102357e-05, "loss": 1.0544, "step": 982 }, { "epoch": 0.13, "learning_rate": 1.9447089166136167e-05, "loss": 0.9721, "step": 983 }, { "epoch": 0.13, "learning_rate": 1.9445648680470383e-05, "loss": 0.8884, "step": 984 }, { "epoch": 0.13, "learning_rate": 1.944420637430384e-05, "loss": 1.0054, "step": 985 }, { "epoch": 0.13, "learning_rate": 1.944276224791453e-05, "loss": 0.9881, "step": 986 }, { "epoch": 0.13, "learning_rate": 1.9441316301580782e-05, "loss": 1.0488, "step": 987 }, { "epoch": 0.13, "learning_rate": 1.9439868535581276e-05, "loss": 1.0555, "step": 988 }, { "epoch": 0.13, "learning_rate": 1.9438418950195048e-05, "loss": 1.0294, "step": 989 }, { "epoch": 0.13, "learning_rate": 1.9436967545701485e-05, "loss": 0.9555, "step": 990 }, { "epoch": 0.13, "learning_rate": 1.9435514322380315e-05, "loss": 1.047, "step": 991 }, { "epoch": 0.13, "learning_rate": 1.9434059280511636e-05, "loss": 1.0269, "step": 992 }, { "epoch": 0.13, "learning_rate": 1.9432602420375875e-05, "loss": 1.055, "step": 993 }, { "epoch": 0.13, "learning_rate": 1.9431143742253825e-05, "loss": 0.9844, "step": 994 }, { "epoch": 0.13, "learning_rate": 1.942968324642662e-05, "loss": 0.9785, "step": 995 }, { "epoch": 0.13, "learning_rate": 1.9428220933175747e-05, "loss": 1.0219, "step": 996 }, { "epoch": 0.14, "learning_rate": 1.942675680278305e-05, "loss": 0.9147, "step": 997 }, { "epoch": 0.14, "learning_rate": 1.9425290855530705e-05, "loss": 1.0024, "step": 998 }, { "epoch": 0.14, "learning_rate": 1.9423823091701262e-05, "loss": 1.0057, "step": 999 }, { "epoch": 0.14, "learning_rate": 1.9422353511577604e-05, "loss": 1.0049, "step": 1000 }, { "epoch": 0.14, "learning_rate": 1.9420882115442974e-05, "loss": 0.9432, "step": 1001 }, { "epoch": 0.14, "learning_rate": 1.9419408903580956e-05, "loss": 1.0241, "step": 1002 }, { "epoch": 0.14, "learning_rate": 1.941793387627549e-05, "loss": 1.0603, "step": 1003 }, { "epoch": 0.14, "learning_rate": 1.9416457033810864e-05, "loss": 0.9996, "step": 1004 }, { "epoch": 0.14, "learning_rate": 1.9414978376471714e-05, "loss": 1.0125, "step": 1005 }, { "epoch": 0.14, "learning_rate": 1.9413497904543033e-05, "loss": 0.9598, "step": 1006 }, { "epoch": 0.14, "learning_rate": 1.9412015618310156e-05, "loss": 0.9883, "step": 1007 }, { "epoch": 0.14, "learning_rate": 1.9410531518058772e-05, "loss": 0.9794, "step": 1008 }, { "epoch": 0.14, "learning_rate": 1.9409045604074916e-05, "loss": 1.0111, "step": 1009 }, { "epoch": 0.14, "learning_rate": 1.9407557876644974e-05, "loss": 0.9618, "step": 1010 }, { "epoch": 0.14, "learning_rate": 1.9406068336055686e-05, "loss": 1.1074, "step": 1011 }, { "epoch": 0.14, "learning_rate": 1.9404576982594135e-05, "loss": 0.9656, "step": 1012 }, { "epoch": 0.14, "learning_rate": 1.9403083816547758e-05, "loss": 0.9542, "step": 1013 }, { "epoch": 0.14, "learning_rate": 1.9401588838204334e-05, "loss": 1.0071, "step": 1014 }, { "epoch": 0.14, "learning_rate": 1.9400092047852e-05, "loss": 1.0351, "step": 1015 }, { "epoch": 0.14, "learning_rate": 1.9398593445779242e-05, "loss": 0.9237, "step": 1016 }, { "epoch": 0.14, "learning_rate": 1.9397093032274888e-05, "loss": 0.9853, "step": 1017 }, { "epoch": 0.14, "learning_rate": 1.939559080762812e-05, "loss": 1.0316, "step": 1018 }, { "epoch": 0.14, "learning_rate": 1.9394086772128468e-05, "loss": 0.9783, "step": 1019 }, { "epoch": 0.14, "learning_rate": 1.9392580926065814e-05, "loss": 1.0, "step": 1020 }, { "epoch": 0.14, "learning_rate": 1.9391073269730382e-05, "loss": 1.0629, "step": 1021 }, { "epoch": 0.14, "learning_rate": 1.9389563803412753e-05, "loss": 1.0417, "step": 1022 }, { "epoch": 0.14, "learning_rate": 1.9388052527403852e-05, "loss": 1.0067, "step": 1023 }, { "epoch": 0.14, "learning_rate": 1.9386539441994953e-05, "loss": 0.9678, "step": 1024 }, { "epoch": 0.14, "learning_rate": 1.9385024547477676e-05, "loss": 0.9966, "step": 1025 }, { "epoch": 0.14, "learning_rate": 1.9383507844144e-05, "loss": 0.9586, "step": 1026 }, { "epoch": 0.14, "learning_rate": 1.938198933228624e-05, "loss": 0.9873, "step": 1027 }, { "epoch": 0.14, "learning_rate": 1.9380469012197068e-05, "loss": 0.9919, "step": 1028 }, { "epoch": 0.14, "learning_rate": 1.9378946884169502e-05, "loss": 0.97, "step": 1029 }, { "epoch": 0.14, "learning_rate": 1.9377422948496912e-05, "loss": 0.953, "step": 1030 }, { "epoch": 0.14, "learning_rate": 1.9375897205473005e-05, "loss": 0.9572, "step": 1031 }, { "epoch": 0.14, "learning_rate": 1.937436965539185e-05, "loss": 0.9904, "step": 1032 }, { "epoch": 0.14, "learning_rate": 1.9372840298547856e-05, "loss": 1.0612, "step": 1033 }, { "epoch": 0.14, "learning_rate": 1.937130913523578e-05, "loss": 1.0135, "step": 1034 }, { "epoch": 0.14, "learning_rate": 1.9369776165750734e-05, "loss": 1.1015, "step": 1035 }, { "epoch": 0.14, "learning_rate": 1.9368241390388172e-05, "loss": 1.0027, "step": 1036 }, { "epoch": 0.14, "learning_rate": 1.9366704809443898e-05, "loss": 0.9299, "step": 1037 }, { "epoch": 0.14, "learning_rate": 1.9365166423214065e-05, "loss": 0.9531, "step": 1038 }, { "epoch": 0.14, "learning_rate": 1.9363626231995175e-05, "loss": 1.034, "step": 1039 }, { "epoch": 0.14, "learning_rate": 1.936208423608407e-05, "loss": 1.0059, "step": 1040 }, { "epoch": 0.14, "learning_rate": 1.9360540435777944e-05, "loss": 1.0449, "step": 1041 }, { "epoch": 0.14, "learning_rate": 1.935899483137435e-05, "loss": 0.9776, "step": 1042 }, { "epoch": 0.14, "learning_rate": 1.9357447423171173e-05, "loss": 1.0216, "step": 1043 }, { "epoch": 0.14, "learning_rate": 1.9355898211466647e-05, "loss": 1.0188, "step": 1044 }, { "epoch": 0.14, "learning_rate": 1.935434719655937e-05, "loss": 1.0407, "step": 1045 }, { "epoch": 0.14, "learning_rate": 1.9352794378748267e-05, "loss": 0.9937, "step": 1046 }, { "epoch": 0.14, "learning_rate": 1.935123975833262e-05, "loss": 1.0054, "step": 1047 }, { "epoch": 0.14, "learning_rate": 1.9349683335612064e-05, "loss": 1.0121, "step": 1048 }, { "epoch": 0.14, "learning_rate": 1.9348125110886564e-05, "loss": 0.9512, "step": 1049 }, { "epoch": 0.14, "learning_rate": 1.9346565084456455e-05, "loss": 1.0258, "step": 1050 }, { "epoch": 0.14, "learning_rate": 1.93450032566224e-05, "loss": 1.0186, "step": 1051 }, { "epoch": 0.14, "learning_rate": 1.9343439627685422e-05, "loss": 1.0299, "step": 1052 }, { "epoch": 0.14, "learning_rate": 1.934187419794688e-05, "loss": 1.0723, "step": 1053 }, { "epoch": 0.14, "learning_rate": 1.934030696770849e-05, "loss": 1.0178, "step": 1054 }, { "epoch": 0.14, "learning_rate": 1.933873793727231e-05, "loss": 1.0041, "step": 1055 }, { "epoch": 0.14, "learning_rate": 1.9337167106940747e-05, "loss": 0.9248, "step": 1056 }, { "epoch": 0.14, "learning_rate": 1.9335594477016557e-05, "loss": 0.9113, "step": 1057 }, { "epoch": 0.14, "learning_rate": 1.9334020047802833e-05, "loss": 1.0606, "step": 1058 }, { "epoch": 0.14, "learning_rate": 1.9332443819603024e-05, "loss": 1.0271, "step": 1059 }, { "epoch": 0.14, "learning_rate": 1.9330865792720926e-05, "loss": 1.0042, "step": 1060 }, { "epoch": 0.14, "learning_rate": 1.9329285967460673e-05, "loss": 1.0239, "step": 1061 }, { "epoch": 0.14, "learning_rate": 1.932770434412676e-05, "loss": 0.9927, "step": 1062 }, { "epoch": 0.14, "learning_rate": 1.9326120923024013e-05, "loss": 1.0187, "step": 1063 }, { "epoch": 0.14, "learning_rate": 1.9324535704457617e-05, "loss": 1.0026, "step": 1064 }, { "epoch": 0.14, "learning_rate": 1.9322948688733093e-05, "loss": 0.9569, "step": 1065 }, { "epoch": 0.14, "learning_rate": 1.9321359876156314e-05, "loss": 0.9482, "step": 1066 }, { "epoch": 0.14, "learning_rate": 1.9319769267033502e-05, "loss": 1.0094, "step": 1067 }, { "epoch": 0.14, "learning_rate": 1.931817686167122e-05, "loss": 0.9551, "step": 1068 }, { "epoch": 0.14, "learning_rate": 1.9316582660376384e-05, "loss": 0.9733, "step": 1069 }, { "epoch": 0.15, "learning_rate": 1.931498666345624e-05, "loss": 0.9925, "step": 1070 }, { "epoch": 0.15, "learning_rate": 1.9313388871218405e-05, "loss": 1.056, "step": 1071 }, { "epoch": 0.15, "learning_rate": 1.9311789283970818e-05, "loss": 0.9728, "step": 1072 }, { "epoch": 0.15, "learning_rate": 1.9310187902021775e-05, "loss": 1.044, "step": 1073 }, { "epoch": 0.15, "learning_rate": 1.9308584725679926e-05, "loss": 0.9706, "step": 1074 }, { "epoch": 0.15, "learning_rate": 1.930697975525425e-05, "loss": 1.0265, "step": 1075 }, { "epoch": 0.15, "learning_rate": 1.9305372991054078e-05, "loss": 0.9461, "step": 1076 }, { "epoch": 0.15, "learning_rate": 1.93037644333891e-05, "loss": 1.0038, "step": 1077 }, { "epoch": 0.15, "learning_rate": 1.9302154082569328e-05, "loss": 0.9872, "step": 1078 }, { "epoch": 0.15, "learning_rate": 1.930054193890514e-05, "loss": 0.9416, "step": 1079 }, { "epoch": 0.15, "learning_rate": 1.929892800270725e-05, "loss": 0.9804, "step": 1080 }, { "epoch": 0.15, "learning_rate": 1.9297312274286716e-05, "loss": 0.9416, "step": 1081 }, { "epoch": 0.15, "learning_rate": 1.9295694753954942e-05, "loss": 0.925, "step": 1082 }, { "epoch": 0.15, "learning_rate": 1.9294075442023687e-05, "loss": 1.0036, "step": 1083 }, { "epoch": 0.15, "learning_rate": 1.9292454338805044e-05, "loss": 0.9783, "step": 1084 }, { "epoch": 0.15, "learning_rate": 1.9290831444611456e-05, "loss": 0.9024, "step": 1085 }, { "epoch": 0.15, "learning_rate": 1.928920675975571e-05, "loss": 0.9858, "step": 1086 }, { "epoch": 0.15, "learning_rate": 1.9287580284550937e-05, "loss": 0.9467, "step": 1087 }, { "epoch": 0.15, "learning_rate": 1.928595201931062e-05, "loss": 1.0999, "step": 1088 }, { "epoch": 0.15, "learning_rate": 1.9284321964348574e-05, "loss": 0.9837, "step": 1089 }, { "epoch": 0.15, "learning_rate": 1.928269011997897e-05, "loss": 1.0191, "step": 1090 }, { "epoch": 0.15, "learning_rate": 1.9281056486516325e-05, "loss": 1.0061, "step": 1091 }, { "epoch": 0.15, "learning_rate": 1.927942106427549e-05, "loss": 1.0394, "step": 1092 }, { "epoch": 0.15, "learning_rate": 1.9277783853571673e-05, "loss": 1.0017, "step": 1093 }, { "epoch": 0.15, "learning_rate": 1.9276144854720412e-05, "loss": 0.9007, "step": 1094 }, { "epoch": 0.15, "learning_rate": 1.9274504068037604e-05, "loss": 0.9975, "step": 1095 }, { "epoch": 0.15, "learning_rate": 1.9272861493839483e-05, "loss": 0.919, "step": 1096 }, { "epoch": 0.15, "learning_rate": 1.9271217132442633e-05, "loss": 0.881, "step": 1097 }, { "epoch": 0.15, "learning_rate": 1.9269570984163974e-05, "loss": 1.0144, "step": 1098 }, { "epoch": 0.15, "learning_rate": 1.926792304932078e-05, "loss": 1.0447, "step": 1099 }, { "epoch": 0.15, "learning_rate": 1.926627332823066e-05, "loss": 0.9869, "step": 1100 }, { "epoch": 0.15, "learning_rate": 1.9264621821211577e-05, "loss": 0.9502, "step": 1101 }, { "epoch": 0.15, "learning_rate": 1.9262968528581828e-05, "loss": 1.03, "step": 1102 }, { "epoch": 0.15, "learning_rate": 1.926131345066006e-05, "loss": 0.9711, "step": 1103 }, { "epoch": 0.15, "learning_rate": 1.925965658776527e-05, "loss": 1.0194, "step": 1104 }, { "epoch": 0.15, "learning_rate": 1.9257997940216783e-05, "loss": 0.9479, "step": 1105 }, { "epoch": 0.15, "learning_rate": 1.9256337508334286e-05, "loss": 1.01, "step": 1106 }, { "epoch": 0.15, "learning_rate": 1.925467529243779e-05, "loss": 0.9625, "step": 1107 }, { "epoch": 0.15, "learning_rate": 1.9253011292847672e-05, "loss": 1.0386, "step": 1108 }, { "epoch": 0.15, "learning_rate": 1.9251345509884638e-05, "loss": 0.9777, "step": 1109 }, { "epoch": 0.15, "learning_rate": 1.9249677943869742e-05, "loss": 0.9692, "step": 1110 }, { "epoch": 0.15, "learning_rate": 1.9248008595124378e-05, "loss": 0.992, "step": 1111 }, { "epoch": 0.15, "learning_rate": 1.924633746397029e-05, "loss": 1.0247, "step": 1112 }, { "epoch": 0.15, "learning_rate": 1.924466455072956e-05, "loss": 1.0234, "step": 1113 }, { "epoch": 0.15, "learning_rate": 1.924298985572462e-05, "loss": 0.9767, "step": 1114 }, { "epoch": 0.15, "learning_rate": 1.9241313379278237e-05, "loss": 1.0239, "step": 1115 }, { "epoch": 0.15, "learning_rate": 1.923963512171353e-05, "loss": 0.9395, "step": 1116 }, { "epoch": 0.15, "learning_rate": 1.923795508335395e-05, "loss": 0.93, "step": 1117 }, { "epoch": 0.15, "learning_rate": 1.9236273264523304e-05, "loss": 1.0109, "step": 1118 }, { "epoch": 0.15, "learning_rate": 1.9234589665545734e-05, "loss": 0.9818, "step": 1119 }, { "epoch": 0.15, "learning_rate": 1.923290428674573e-05, "loss": 0.9709, "step": 1120 }, { "epoch": 0.15, "learning_rate": 1.9231217128448118e-05, "loss": 0.9784, "step": 1121 }, { "epoch": 0.15, "learning_rate": 1.9229528190978072e-05, "loss": 1.006, "step": 1122 }, { "epoch": 0.15, "learning_rate": 1.9227837474661113e-05, "loss": 1.0231, "step": 1123 }, { "epoch": 0.15, "learning_rate": 1.9226144979823094e-05, "loss": 0.9728, "step": 1124 }, { "epoch": 0.15, "learning_rate": 1.9224450706790222e-05, "loss": 0.9578, "step": 1125 }, { "epoch": 0.15, "learning_rate": 1.9222754655889035e-05, "loss": 0.9655, "step": 1126 }, { "epoch": 0.15, "learning_rate": 1.9221056827446426e-05, "loss": 0.9097, "step": 1127 }, { "epoch": 0.15, "learning_rate": 1.9219357221789624e-05, "loss": 0.9363, "step": 1128 }, { "epoch": 0.15, "learning_rate": 1.9217655839246198e-05, "loss": 0.9888, "step": 1129 }, { "epoch": 0.15, "learning_rate": 1.9215952680144067e-05, "loss": 0.936, "step": 1130 }, { "epoch": 0.15, "learning_rate": 1.9214247744811488e-05, "loss": 0.9959, "step": 1131 }, { "epoch": 0.15, "learning_rate": 1.921254103357706e-05, "loss": 1.0479, "step": 1132 }, { "epoch": 0.15, "learning_rate": 1.921083254676972e-05, "loss": 1.0169, "step": 1133 }, { "epoch": 0.15, "learning_rate": 1.9209122284718757e-05, "loss": 1.0315, "step": 1134 }, { "epoch": 0.15, "learning_rate": 1.9207410247753795e-05, "loss": 0.9891, "step": 1135 }, { "epoch": 0.15, "learning_rate": 1.9205696436204807e-05, "loss": 0.9913, "step": 1136 }, { "epoch": 0.15, "learning_rate": 1.92039808504021e-05, "loss": 0.8908, "step": 1137 }, { "epoch": 0.15, "learning_rate": 1.9202263490676323e-05, "loss": 0.9561, "step": 1138 }, { "epoch": 0.15, "learning_rate": 1.920054435735847e-05, "loss": 0.9663, "step": 1139 }, { "epoch": 0.15, "learning_rate": 1.919882345077989e-05, "loss": 1.0056, "step": 1140 }, { "epoch": 0.15, "learning_rate": 1.9197100771272243e-05, "loss": 1.027, "step": 1141 }, { "epoch": 0.15, "learning_rate": 1.919537631916756e-05, "loss": 0.9278, "step": 1142 }, { "epoch": 0.15, "learning_rate": 1.9193650094798198e-05, "loss": 0.9441, "step": 1143 }, { "epoch": 0.16, "learning_rate": 1.919192209849686e-05, "loss": 1.0002, "step": 1144 }, { "epoch": 0.16, "learning_rate": 1.919019233059659e-05, "loss": 0.958, "step": 1145 }, { "epoch": 0.16, "learning_rate": 1.9188460791430775e-05, "loss": 0.9916, "step": 1146 }, { "epoch": 0.16, "learning_rate": 1.918672748133314e-05, "loss": 0.9793, "step": 1147 }, { "epoch": 0.16, "learning_rate": 1.9184992400637753e-05, "loss": 0.9629, "step": 1148 }, { "epoch": 0.16, "learning_rate": 1.9183255549679033e-05, "loss": 1.0401, "step": 1149 }, { "epoch": 0.16, "learning_rate": 1.9181516928791715e-05, "loss": 1.0121, "step": 1150 }, { "epoch": 0.16, "learning_rate": 1.9179776538310902e-05, "loss": 1.052, "step": 1151 }, { "epoch": 0.16, "learning_rate": 1.9178034378572023e-05, "loss": 0.9404, "step": 1152 }, { "epoch": 0.16, "learning_rate": 1.9176290449910854e-05, "loss": 1.0262, "step": 1153 }, { "epoch": 0.16, "learning_rate": 1.9174544752663507e-05, "loss": 1.0174, "step": 1154 }, { "epoch": 0.16, "learning_rate": 1.917279728716644e-05, "loss": 1.0071, "step": 1155 }, { "epoch": 0.16, "learning_rate": 1.9171048053756453e-05, "loss": 0.9307, "step": 1156 }, { "epoch": 0.16, "learning_rate": 1.9169297052770676e-05, "loss": 1.026, "step": 1157 }, { "epoch": 0.16, "learning_rate": 1.916754428454659e-05, "loss": 0.9663, "step": 1158 }, { "epoch": 0.16, "learning_rate": 1.9165789749422014e-05, "loss": 1.007, "step": 1159 }, { "epoch": 0.16, "learning_rate": 1.916403344773511e-05, "loss": 0.9623, "step": 1160 }, { "epoch": 0.16, "learning_rate": 1.9162275379824372e-05, "loss": 1.0081, "step": 1161 }, { "epoch": 0.16, "learning_rate": 1.9160515546028644e-05, "loss": 1.0341, "step": 1162 }, { "epoch": 0.16, "learning_rate": 1.9158753946687104e-05, "loss": 1.0241, "step": 1163 }, { "epoch": 0.16, "learning_rate": 1.9156990582139276e-05, "loss": 0.9999, "step": 1164 }, { "epoch": 0.16, "learning_rate": 1.915522545272502e-05, "loss": 0.9889, "step": 1165 }, { "epoch": 0.16, "learning_rate": 1.9153458558784536e-05, "loss": 0.9676, "step": 1166 }, { "epoch": 0.16, "learning_rate": 1.915168990065836e-05, "loss": 0.9774, "step": 1167 }, { "epoch": 0.16, "learning_rate": 1.9149919478687378e-05, "loss": 1.036, "step": 1168 }, { "epoch": 0.16, "learning_rate": 1.9148147293212817e-05, "loss": 1.0045, "step": 1169 }, { "epoch": 0.16, "learning_rate": 1.914637334457623e-05, "loss": 0.9451, "step": 1170 }, { "epoch": 0.16, "learning_rate": 1.9144597633119518e-05, "loss": 0.9653, "step": 1171 }, { "epoch": 0.16, "learning_rate": 1.914282015918493e-05, "loss": 1.0406, "step": 1172 }, { "epoch": 0.16, "learning_rate": 1.9141040923115034e-05, "loss": 0.936, "step": 1173 }, { "epoch": 0.16, "learning_rate": 1.9139259925252756e-05, "loss": 1.0183, "step": 1174 }, { "epoch": 0.16, "learning_rate": 1.9137477165941355e-05, "loss": 0.9772, "step": 1175 }, { "epoch": 0.16, "learning_rate": 1.913569264552443e-05, "loss": 1.0008, "step": 1176 }, { "epoch": 0.16, "learning_rate": 1.913390636434592e-05, "loss": 0.997, "step": 1177 }, { "epoch": 0.16, "learning_rate": 1.9132118322750106e-05, "loss": 1.0409, "step": 1178 }, { "epoch": 0.16, "learning_rate": 1.9130328521081596e-05, "loss": 0.9947, "step": 1179 }, { "epoch": 0.16, "learning_rate": 1.912853695968535e-05, "loss": 0.9611, "step": 1180 }, { "epoch": 0.16, "learning_rate": 1.912674363890667e-05, "loss": 0.9597, "step": 1181 }, { "epoch": 0.16, "learning_rate": 1.912494855909118e-05, "loss": 0.9834, "step": 1182 }, { "epoch": 0.16, "learning_rate": 1.9123151720584863e-05, "loss": 0.9727, "step": 1183 }, { "epoch": 0.16, "learning_rate": 1.9121353123734025e-05, "loss": 0.9889, "step": 1184 }, { "epoch": 0.16, "learning_rate": 1.9119552768885323e-05, "loss": 1.014, "step": 1185 }, { "epoch": 0.16, "learning_rate": 1.9117750656385738e-05, "loss": 0.9561, "step": 1186 }, { "epoch": 0.16, "learning_rate": 1.9115946786582605e-05, "loss": 0.9829, "step": 1187 }, { "epoch": 0.16, "learning_rate": 1.9114141159823597e-05, "loss": 0.975, "step": 1188 }, { "epoch": 0.16, "learning_rate": 1.911233377645671e-05, "loss": 1.0587, "step": 1189 }, { "epoch": 0.16, "learning_rate": 1.911052463683029e-05, "loss": 0.9281, "step": 1190 }, { "epoch": 0.16, "learning_rate": 1.910871374129303e-05, "loss": 0.963, "step": 1191 }, { "epoch": 0.16, "learning_rate": 1.9106901090193943e-05, "loss": 0.9776, "step": 1192 }, { "epoch": 0.16, "learning_rate": 1.9105086683882394e-05, "loss": 0.9517, "step": 1193 }, { "epoch": 0.16, "learning_rate": 1.9103270522708072e-05, "loss": 1.0158, "step": 1194 }, { "epoch": 0.16, "learning_rate": 1.9101452607021027e-05, "loss": 0.9729, "step": 1195 }, { "epoch": 0.16, "learning_rate": 1.9099632937171625e-05, "loss": 0.9782, "step": 1196 }, { "epoch": 0.16, "learning_rate": 1.909781151351058e-05, "loss": 1.0668, "step": 1197 }, { "epoch": 0.16, "learning_rate": 1.9095988336388945e-05, "loss": 1.0041, "step": 1198 }, { "epoch": 0.16, "learning_rate": 1.9094163406158105e-05, "loss": 1.0189, "step": 1199 }, { "epoch": 0.16, "learning_rate": 1.9092336723169793e-05, "loss": 0.9634, "step": 1200 }, { "epoch": 0.16, "learning_rate": 1.9090508287776067e-05, "loss": 0.9662, "step": 1201 }, { "epoch": 0.16, "learning_rate": 1.9088678100329337e-05, "loss": 0.9892, "step": 1202 }, { "epoch": 0.16, "learning_rate": 1.9086846161182334e-05, "loss": 0.9186, "step": 1203 }, { "epoch": 0.16, "learning_rate": 1.9085012470688142e-05, "loss": 0.9821, "step": 1204 }, { "epoch": 0.16, "learning_rate": 1.9083177029200174e-05, "loss": 1.0267, "step": 1205 }, { "epoch": 0.16, "learning_rate": 1.908133983707218e-05, "loss": 0.9947, "step": 1206 }, { "epoch": 0.16, "learning_rate": 1.907950089465825e-05, "loss": 0.9858, "step": 1207 }, { "epoch": 0.16, "learning_rate": 1.907766020231282e-05, "loss": 1.0471, "step": 1208 }, { "epoch": 0.16, "learning_rate": 1.9075817760390646e-05, "loss": 0.9391, "step": 1209 }, { "epoch": 0.16, "learning_rate": 1.9073973569246832e-05, "loss": 0.9962, "step": 1210 }, { "epoch": 0.16, "learning_rate": 1.9072127629236816e-05, "loss": 1.0017, "step": 1211 }, { "epoch": 0.16, "learning_rate": 1.9070279940716375e-05, "loss": 1.0176, "step": 1212 }, { "epoch": 0.16, "learning_rate": 1.9068430504041623e-05, "loss": 1.0318, "step": 1213 }, { "epoch": 0.16, "learning_rate": 1.906657931956901e-05, "loss": 1.0037, "step": 1214 }, { "epoch": 0.16, "learning_rate": 1.906472638765532e-05, "loss": 1.0125, "step": 1215 }, { "epoch": 0.16, "learning_rate": 1.9062871708657678e-05, "loss": 1.0122, "step": 1216 }, { "epoch": 0.16, "learning_rate": 1.906101528293355e-05, "loss": 0.9609, "step": 1217 }, { "epoch": 0.17, "learning_rate": 1.905915711084072e-05, "loss": 1.0412, "step": 1218 }, { "epoch": 0.17, "learning_rate": 1.9057297192737332e-05, "loss": 1.0281, "step": 1219 }, { "epoch": 0.17, "learning_rate": 1.9055435528981857e-05, "loss": 1.0434, "step": 1220 }, { "epoch": 0.17, "learning_rate": 1.9053572119933093e-05, "loss": 0.9943, "step": 1221 }, { "epoch": 0.17, "learning_rate": 1.9051706965950192e-05, "loss": 1.0159, "step": 1222 }, { "epoch": 0.17, "learning_rate": 1.9049840067392626e-05, "loss": 0.9403, "step": 1223 }, { "epoch": 0.17, "learning_rate": 1.9047971424620214e-05, "loss": 0.9742, "step": 1224 }, { "epoch": 0.17, "learning_rate": 1.9046101037993107e-05, "loss": 0.9922, "step": 1225 }, { "epoch": 0.17, "learning_rate": 1.904422890787179e-05, "loss": 0.9625, "step": 1226 }, { "epoch": 0.17, "learning_rate": 1.9042355034617094e-05, "loss": 1.0602, "step": 1227 }, { "epoch": 0.17, "learning_rate": 1.904047941859017e-05, "loss": 0.9614, "step": 1228 }, { "epoch": 0.17, "learning_rate": 1.903860206015252e-05, "loss": 1.0129, "step": 1229 }, { "epoch": 0.17, "learning_rate": 1.9036722959665975e-05, "loss": 0.9351, "step": 1230 }, { "epoch": 0.17, "learning_rate": 1.9034842117492697e-05, "loss": 0.9484, "step": 1231 }, { "epoch": 0.17, "learning_rate": 1.9032959533995194e-05, "loss": 1.0046, "step": 1232 }, { "epoch": 0.17, "learning_rate": 1.90310752095363e-05, "loss": 0.9768, "step": 1233 }, { "epoch": 0.17, "learning_rate": 1.9029189144479193e-05, "loss": 0.9878, "step": 1234 }, { "epoch": 0.17, "learning_rate": 1.9027301339187384e-05, "loss": 1.001, "step": 1235 }, { "epoch": 0.17, "learning_rate": 1.902541179402471e-05, "loss": 0.9308, "step": 1236 }, { "epoch": 0.17, "learning_rate": 1.902352050935536e-05, "loss": 0.9257, "step": 1237 }, { "epoch": 0.17, "learning_rate": 1.9021627485543844e-05, "loss": 0.947, "step": 1238 }, { "epoch": 0.17, "learning_rate": 1.901973272295502e-05, "loss": 0.9775, "step": 1239 }, { "epoch": 0.17, "learning_rate": 1.9017836221954062e-05, "loss": 0.9381, "step": 1240 }, { "epoch": 0.17, "learning_rate": 1.9015937982906495e-05, "loss": 1.0097, "step": 1241 }, { "epoch": 0.17, "learning_rate": 1.9014038006178182e-05, "loss": 0.9806, "step": 1242 }, { "epoch": 0.17, "learning_rate": 1.9012136292135306e-05, "loss": 0.9726, "step": 1243 }, { "epoch": 0.17, "learning_rate": 1.9010232841144395e-05, "loss": 0.9494, "step": 1244 }, { "epoch": 0.17, "learning_rate": 1.900832765357231e-05, "loss": 0.9767, "step": 1245 }, { "epoch": 0.17, "learning_rate": 1.9006420729786246e-05, "loss": 0.9081, "step": 1246 }, { "epoch": 0.17, "learning_rate": 1.900451207015373e-05, "loss": 1.0149, "step": 1247 }, { "epoch": 0.17, "learning_rate": 1.9002601675042632e-05, "loss": 0.9832, "step": 1248 }, { "epoch": 0.17, "learning_rate": 1.9000689544821145e-05, "loss": 0.9729, "step": 1249 }, { "epoch": 0.17, "learning_rate": 1.8998775679857805e-05, "loss": 1.0071, "step": 1250 }, { "epoch": 0.17, "learning_rate": 1.8996860080521478e-05, "loss": 0.9876, "step": 1251 }, { "epoch": 0.17, "learning_rate": 1.8994942747181368e-05, "loss": 0.933, "step": 1252 }, { "epoch": 0.17, "learning_rate": 1.899302368020701e-05, "loss": 1.0196, "step": 1253 }, { "epoch": 0.17, "learning_rate": 1.899110287996827e-05, "loss": 0.9387, "step": 1254 }, { "epoch": 0.17, "learning_rate": 1.8989180346835356e-05, "loss": 0.9887, "step": 1255 }, { "epoch": 0.17, "learning_rate": 1.8987256081178808e-05, "loss": 0.9822, "step": 1256 }, { "epoch": 0.17, "learning_rate": 1.8985330083369494e-05, "loss": 0.9827, "step": 1257 }, { "epoch": 0.17, "learning_rate": 1.8983402353778625e-05, "loss": 0.9461, "step": 1258 }, { "epoch": 0.17, "learning_rate": 1.8981472892777735e-05, "loss": 1.0513, "step": 1259 }, { "epoch": 0.17, "learning_rate": 1.89795417007387e-05, "loss": 1.0627, "step": 1260 }, { "epoch": 0.17, "learning_rate": 1.8977608778033726e-05, "loss": 0.9924, "step": 1261 }, { "epoch": 0.17, "learning_rate": 1.897567412503536e-05, "loss": 1.0491, "step": 1262 }, { "epoch": 0.17, "learning_rate": 1.8973737742116464e-05, "loss": 0.9909, "step": 1263 }, { "epoch": 0.17, "learning_rate": 1.8971799629650253e-05, "loss": 0.9033, "step": 1264 }, { "epoch": 0.17, "learning_rate": 1.896985978801027e-05, "loss": 0.9846, "step": 1265 }, { "epoch": 0.17, "learning_rate": 1.896791821757038e-05, "loss": 1.0214, "step": 1266 }, { "epoch": 0.17, "learning_rate": 1.8965974918704803e-05, "loss": 0.9803, "step": 1267 }, { "epoch": 0.17, "learning_rate": 1.8964029891788067e-05, "loss": 1.0376, "step": 1268 }, { "epoch": 0.17, "learning_rate": 1.8962083137195054e-05, "loss": 1.005, "step": 1269 }, { "epoch": 0.17, "learning_rate": 1.8960134655300966e-05, "loss": 0.9826, "step": 1270 }, { "epoch": 0.17, "learning_rate": 1.8958184446481343e-05, "loss": 0.9854, "step": 1271 }, { "epoch": 0.17, "learning_rate": 1.8956232511112058e-05, "loss": 0.9638, "step": 1272 }, { "epoch": 0.17, "learning_rate": 1.895427884956932e-05, "loss": 1.0087, "step": 1273 }, { "epoch": 0.17, "learning_rate": 1.8952323462229658e-05, "loss": 0.9667, "step": 1274 }, { "epoch": 0.17, "learning_rate": 1.895036634946995e-05, "loss": 0.9647, "step": 1275 }, { "epoch": 0.17, "learning_rate": 1.894840751166739e-05, "loss": 1.0095, "step": 1276 }, { "epoch": 0.17, "learning_rate": 1.8946446949199525e-05, "loss": 0.9528, "step": 1277 }, { "epoch": 0.17, "learning_rate": 1.894448466244421e-05, "loss": 0.9793, "step": 1278 }, { "epoch": 0.17, "learning_rate": 1.8942520651779657e-05, "loss": 0.9635, "step": 1279 }, { "epoch": 0.17, "learning_rate": 1.8940554917584392e-05, "loss": 1.0434, "step": 1280 }, { "epoch": 0.17, "learning_rate": 1.893858746023728e-05, "loss": 0.9666, "step": 1281 }, { "epoch": 0.17, "learning_rate": 1.8936618280117516e-05, "loss": 0.9938, "step": 1282 }, { "epoch": 0.17, "learning_rate": 1.893464737760463e-05, "loss": 0.9579, "step": 1283 }, { "epoch": 0.17, "learning_rate": 1.8932674753078485e-05, "loss": 0.9353, "step": 1284 }, { "epoch": 0.17, "learning_rate": 1.8930700406919275e-05, "loss": 0.9285, "step": 1285 }, { "epoch": 0.17, "learning_rate": 1.8928724339507515e-05, "loss": 0.9961, "step": 1286 }, { "epoch": 0.17, "learning_rate": 1.892674655122407e-05, "loss": 0.9547, "step": 1287 }, { "epoch": 0.17, "learning_rate": 1.8924767042450122e-05, "loss": 0.9553, "step": 1288 }, { "epoch": 0.17, "learning_rate": 1.8922785813567194e-05, "loss": 1.0202, "step": 1289 }, { "epoch": 0.17, "learning_rate": 1.8920802864957136e-05, "loss": 0.9516, "step": 1290 }, { "epoch": 0.17, "learning_rate": 1.891881819700213e-05, "loss": 0.9853, "step": 1291 }, { "epoch": 0.18, "learning_rate": 1.891683181008469e-05, "loss": 0.978, "step": 1292 }, { "epoch": 0.18, "learning_rate": 1.891484370458766e-05, "loss": 1.0045, "step": 1293 }, { "epoch": 0.18, "learning_rate": 1.8912853880894215e-05, "loss": 0.99, "step": 1294 }, { "epoch": 0.18, "learning_rate": 1.8910862339387865e-05, "loss": 0.9334, "step": 1295 }, { "epoch": 0.18, "learning_rate": 1.890886908045245e-05, "loss": 0.9873, "step": 1296 }, { "epoch": 0.18, "learning_rate": 1.890687410447213e-05, "loss": 0.9816, "step": 1297 }, { "epoch": 0.18, "learning_rate": 1.890487741183142e-05, "loss": 0.9095, "step": 1298 }, { "epoch": 0.18, "learning_rate": 1.890287900291514e-05, "loss": 1.0211, "step": 1299 }, { "epoch": 0.18, "learning_rate": 1.8900878878108452e-05, "loss": 0.9986, "step": 1300 }, { "epoch": 0.18, "learning_rate": 1.8898877037796856e-05, "loss": 1.0077, "step": 1301 }, { "epoch": 0.18, "learning_rate": 1.8896873482366173e-05, "loss": 0.9674, "step": 1302 }, { "epoch": 0.18, "learning_rate": 1.8894868212202553e-05, "loss": 1.0195, "step": 1303 }, { "epoch": 0.18, "learning_rate": 1.8892861227692485e-05, "loss": 0.968, "step": 1304 }, { "epoch": 0.18, "learning_rate": 1.8890852529222778e-05, "loss": 0.9204, "step": 1305 }, { "epoch": 0.18, "learning_rate": 1.8888842117180584e-05, "loss": 0.9163, "step": 1306 }, { "epoch": 0.18, "learning_rate": 1.8886829991953372e-05, "loss": 0.9909, "step": 1307 }, { "epoch": 0.18, "learning_rate": 1.8884816153928953e-05, "loss": 0.9721, "step": 1308 }, { "epoch": 0.18, "learning_rate": 1.888280060349546e-05, "loss": 1.0064, "step": 1309 }, { "epoch": 0.18, "learning_rate": 1.8880783341041357e-05, "loss": 1.0311, "step": 1310 }, { "epoch": 0.18, "learning_rate": 1.8878764366955446e-05, "loss": 1.0153, "step": 1311 }, { "epoch": 0.18, "learning_rate": 1.8876743681626846e-05, "loss": 1.0036, "step": 1312 }, { "epoch": 0.18, "learning_rate": 1.8874721285445016e-05, "loss": 0.9808, "step": 1313 }, { "epoch": 0.18, "learning_rate": 1.887269717879974e-05, "loss": 0.9995, "step": 1314 }, { "epoch": 0.18, "learning_rate": 1.8870671362081133e-05, "loss": 0.952, "step": 1315 }, { "epoch": 0.18, "learning_rate": 1.8868643835679638e-05, "loss": 1.0042, "step": 1316 }, { "epoch": 0.18, "learning_rate": 1.8866614599986032e-05, "loss": 0.9218, "step": 1317 }, { "epoch": 0.18, "learning_rate": 1.8864583655391417e-05, "loss": 1.0127, "step": 1318 }, { "epoch": 0.18, "learning_rate": 1.8862551002287223e-05, "loss": 1.0406, "step": 1319 }, { "epoch": 0.18, "learning_rate": 1.8860516641065218e-05, "loss": 0.9016, "step": 1320 }, { "epoch": 0.18, "learning_rate": 1.8858480572117485e-05, "loss": 1.0408, "step": 1321 }, { "epoch": 0.18, "learning_rate": 1.8856442795836453e-05, "loss": 0.9784, "step": 1322 }, { "epoch": 0.18, "learning_rate": 1.885440331261487e-05, "loss": 0.9588, "step": 1323 }, { "epoch": 0.18, "learning_rate": 1.8852362122845807e-05, "loss": 1.008, "step": 1324 }, { "epoch": 0.18, "learning_rate": 1.8850319226922678e-05, "loss": 0.9318, "step": 1325 }, { "epoch": 0.18, "learning_rate": 1.8848274625239216e-05, "loss": 0.9453, "step": 1326 }, { "epoch": 0.18, "learning_rate": 1.8846228318189488e-05, "loss": 0.9919, "step": 1327 }, { "epoch": 0.18, "learning_rate": 1.884418030616789e-05, "loss": 1.0211, "step": 1328 }, { "epoch": 0.18, "learning_rate": 1.8842130589569137e-05, "loss": 0.9675, "step": 1329 }, { "epoch": 0.18, "learning_rate": 1.8840079168788288e-05, "loss": 1.0402, "step": 1330 }, { "epoch": 0.18, "learning_rate": 1.8838026044220716e-05, "loss": 0.9144, "step": 1331 }, { "epoch": 0.18, "learning_rate": 1.8835971216262132e-05, "loss": 1.0481, "step": 1332 }, { "epoch": 0.18, "learning_rate": 1.8833914685308568e-05, "loss": 0.8566, "step": 1333 }, { "epoch": 0.18, "learning_rate": 1.8831856451756394e-05, "loss": 1.0016, "step": 1334 }, { "epoch": 0.18, "learning_rate": 1.88297965160023e-05, "loss": 0.9803, "step": 1335 }, { "epoch": 0.18, "learning_rate": 1.8827734878443303e-05, "loss": 0.9648, "step": 1336 }, { "epoch": 0.18, "learning_rate": 1.8825671539476754e-05, "loss": 1.0051, "step": 1337 }, { "epoch": 0.18, "learning_rate": 1.882360649950033e-05, "loss": 0.9924, "step": 1338 }, { "epoch": 0.18, "learning_rate": 1.8821539758912033e-05, "loss": 0.9524, "step": 1339 }, { "epoch": 0.18, "learning_rate": 1.8819471318110195e-05, "loss": 1.0171, "step": 1340 }, { "epoch": 0.18, "learning_rate": 1.8817401177493477e-05, "loss": 0.9776, "step": 1341 }, { "epoch": 0.18, "learning_rate": 1.881532933746087e-05, "loss": 0.8741, "step": 1342 }, { "epoch": 0.18, "learning_rate": 1.8813255798411676e-05, "loss": 1.0608, "step": 1343 }, { "epoch": 0.18, "learning_rate": 1.881118056074555e-05, "loss": 0.9578, "step": 1344 }, { "epoch": 0.18, "learning_rate": 1.880910362486246e-05, "loss": 0.9403, "step": 1345 }, { "epoch": 0.18, "learning_rate": 1.88070249911627e-05, "loss": 0.9524, "step": 1346 }, { "epoch": 0.18, "learning_rate": 1.8804944660046887e-05, "loss": 0.9432, "step": 1347 }, { "epoch": 0.18, "learning_rate": 1.8802862631915983e-05, "loss": 0.9095, "step": 1348 }, { "epoch": 0.18, "learning_rate": 1.8800778907171264e-05, "loss": 0.9445, "step": 1349 }, { "epoch": 0.18, "learning_rate": 1.879869348621433e-05, "loss": 0.8942, "step": 1350 }, { "epoch": 0.18, "learning_rate": 1.879660636944712e-05, "loss": 0.9158, "step": 1351 }, { "epoch": 0.18, "learning_rate": 1.879451755727189e-05, "loss": 0.9688, "step": 1352 }, { "epoch": 0.18, "learning_rate": 1.8792427050091225e-05, "loss": 0.9609, "step": 1353 }, { "epoch": 0.18, "learning_rate": 1.879033484830804e-05, "loss": 1.0042, "step": 1354 }, { "epoch": 0.18, "learning_rate": 1.878824095232557e-05, "loss": 0.9471, "step": 1355 }, { "epoch": 0.18, "learning_rate": 1.8786145362547387e-05, "loss": 0.9382, "step": 1356 }, { "epoch": 0.18, "learning_rate": 1.8784048079377375e-05, "loss": 0.9987, "step": 1357 }, { "epoch": 0.18, "learning_rate": 1.8781949103219758e-05, "loss": 0.9822, "step": 1358 }, { "epoch": 0.18, "learning_rate": 1.8779848434479076e-05, "loss": 1.0149, "step": 1359 }, { "epoch": 0.18, "learning_rate": 1.877774607356021e-05, "loss": 0.9149, "step": 1360 }, { "epoch": 0.18, "learning_rate": 1.8775642020868346e-05, "loss": 0.9688, "step": 1361 }, { "epoch": 0.18, "learning_rate": 1.8773536276809016e-05, "loss": 1.0192, "step": 1362 }, { "epoch": 0.18, "learning_rate": 1.877142884178806e-05, "loss": 0.9479, "step": 1363 }, { "epoch": 0.18, "learning_rate": 1.8769319716211658e-05, "loss": 0.9493, "step": 1364 }, { "epoch": 0.19, "learning_rate": 1.8767208900486314e-05, "loss": 0.9378, "step": 1365 }, { "epoch": 0.19, "learning_rate": 1.876509639501885e-05, "loss": 0.9896, "step": 1366 }, { "epoch": 0.19, "learning_rate": 1.8762982200216417e-05, "loss": 0.996, "step": 1367 }, { "epoch": 0.19, "learning_rate": 1.87608663164865e-05, "loss": 0.9659, "step": 1368 }, { "epoch": 0.19, "learning_rate": 1.8758748744236895e-05, "loss": 0.966, "step": 1369 }, { "epoch": 0.19, "learning_rate": 1.8756629483875735e-05, "loss": 0.9217, "step": 1370 }, { "epoch": 0.19, "learning_rate": 1.8754508535811477e-05, "loss": 0.9767, "step": 1371 }, { "epoch": 0.19, "learning_rate": 1.8752385900452892e-05, "loss": 0.9613, "step": 1372 }, { "epoch": 0.19, "learning_rate": 1.875026157820909e-05, "loss": 0.9808, "step": 1373 }, { "epoch": 0.19, "learning_rate": 1.8748135569489504e-05, "loss": 1.0316, "step": 1374 }, { "epoch": 0.19, "learning_rate": 1.8746007874703883e-05, "loss": 0.9029, "step": 1375 }, { "epoch": 0.19, "learning_rate": 1.8743878494262304e-05, "loss": 1.037, "step": 1376 }, { "epoch": 0.19, "learning_rate": 1.8741747428575184e-05, "loss": 0.9697, "step": 1377 }, { "epoch": 0.19, "learning_rate": 1.873961467805324e-05, "loss": 1.047, "step": 1378 }, { "epoch": 0.19, "learning_rate": 1.8737480243107533e-05, "loss": 1.0069, "step": 1379 }, { "epoch": 0.19, "learning_rate": 1.873534412414944e-05, "loss": 1.0108, "step": 1380 }, { "epoch": 0.19, "learning_rate": 1.8733206321590667e-05, "loss": 0.9753, "step": 1381 }, { "epoch": 0.19, "learning_rate": 1.8731066835843237e-05, "loss": 0.9402, "step": 1382 }, { "epoch": 0.19, "learning_rate": 1.8728925667319506e-05, "loss": 0.9343, "step": 1383 }, { "epoch": 0.19, "learning_rate": 1.872678281643215e-05, "loss": 0.9742, "step": 1384 }, { "epoch": 0.19, "learning_rate": 1.872463828359417e-05, "loss": 0.9598, "step": 1385 }, { "epoch": 0.19, "learning_rate": 1.8722492069218886e-05, "loss": 1.0, "step": 1386 }, { "epoch": 0.19, "learning_rate": 1.8720344173719957e-05, "loss": 0.8557, "step": 1387 }, { "epoch": 0.19, "learning_rate": 1.871819459751135e-05, "loss": 0.9556, "step": 1388 }, { "epoch": 0.19, "learning_rate": 1.8716043341007363e-05, "loss": 0.9961, "step": 1389 }, { "epoch": 0.19, "learning_rate": 1.8713890404622618e-05, "loss": 0.9314, "step": 1390 }, { "epoch": 0.19, "learning_rate": 1.8711735788772058e-05, "loss": 0.9739, "step": 1391 }, { "epoch": 0.19, "learning_rate": 1.8709579493870953e-05, "loss": 1.0089, "step": 1392 }, { "epoch": 0.19, "learning_rate": 1.8707421520334895e-05, "loss": 1.026, "step": 1393 }, { "epoch": 0.19, "learning_rate": 1.8705261868579797e-05, "loss": 0.9664, "step": 1394 }, { "epoch": 0.19, "learning_rate": 1.8703100539021902e-05, "loss": 0.9254, "step": 1395 }, { "epoch": 0.19, "learning_rate": 1.870093753207777e-05, "loss": 0.9731, "step": 1396 }, { "epoch": 0.19, "learning_rate": 1.8698772848164286e-05, "loss": 0.9719, "step": 1397 }, { "epoch": 0.19, "learning_rate": 1.869660648769866e-05, "loss": 0.9612, "step": 1398 }, { "epoch": 0.19, "learning_rate": 1.8694438451098423e-05, "loss": 0.9513, "step": 1399 }, { "epoch": 0.19, "learning_rate": 1.8692268738781435e-05, "loss": 1.0137, "step": 1400 }, { "epoch": 0.19, "learning_rate": 1.8690097351165868e-05, "loss": 0.9842, "step": 1401 }, { "epoch": 0.19, "learning_rate": 1.8687924288670224e-05, "loss": 0.9739, "step": 1402 }, { "epoch": 0.19, "learning_rate": 1.8685749551713332e-05, "loss": 0.968, "step": 1403 }, { "epoch": 0.19, "learning_rate": 1.8683573140714332e-05, "loss": 0.9611, "step": 1404 }, { "epoch": 0.19, "learning_rate": 1.8681395056092694e-05, "loss": 0.9144, "step": 1405 }, { "epoch": 0.19, "learning_rate": 1.867921529826821e-05, "loss": 0.9774, "step": 1406 }, { "epoch": 0.19, "learning_rate": 1.8677033867661e-05, "loss": 0.9575, "step": 1407 }, { "epoch": 0.19, "learning_rate": 1.867485076469149e-05, "loss": 1.0695, "step": 1408 }, { "epoch": 0.19, "learning_rate": 1.8672665989780448e-05, "loss": 0.9625, "step": 1409 }, { "epoch": 0.19, "learning_rate": 1.867047954334895e-05, "loss": 0.9892, "step": 1410 }, { "epoch": 0.19, "learning_rate": 1.8668291425818402e-05, "loss": 0.9212, "step": 1411 }, { "epoch": 0.19, "learning_rate": 1.8666101637610533e-05, "loss": 0.9741, "step": 1412 }, { "epoch": 0.19, "learning_rate": 1.866391017914738e-05, "loss": 0.9676, "step": 1413 }, { "epoch": 0.19, "learning_rate": 1.8661717050851323e-05, "loss": 1.0289, "step": 1414 }, { "epoch": 0.19, "learning_rate": 1.8659522253145042e-05, "loss": 0.9626, "step": 1415 }, { "epoch": 0.19, "learning_rate": 1.8657325786451562e-05, "loss": 1.007, "step": 1416 }, { "epoch": 0.19, "learning_rate": 1.8655127651194208e-05, "loss": 0.98, "step": 1417 }, { "epoch": 0.19, "learning_rate": 1.8652927847796642e-05, "loss": 0.9943, "step": 1418 }, { "epoch": 0.19, "learning_rate": 1.8650726376682838e-05, "loss": 0.972, "step": 1419 }, { "epoch": 0.19, "learning_rate": 1.8648523238277096e-05, "loss": 0.9658, "step": 1420 }, { "epoch": 0.19, "learning_rate": 1.864631843300404e-05, "loss": 0.9942, "step": 1421 }, { "epoch": 0.19, "learning_rate": 1.8644111961288605e-05, "loss": 0.9619, "step": 1422 }, { "epoch": 0.19, "learning_rate": 1.8641903823556057e-05, "loss": 1.0156, "step": 1423 }, { "epoch": 0.19, "learning_rate": 1.8639694020231982e-05, "loss": 0.9501, "step": 1424 }, { "epoch": 0.19, "learning_rate": 1.8637482551742283e-05, "loss": 0.9725, "step": 1425 }, { "epoch": 0.19, "learning_rate": 1.8635269418513185e-05, "loss": 0.8909, "step": 1426 }, { "epoch": 0.19, "learning_rate": 1.8633054620971238e-05, "loss": 0.9616, "step": 1427 }, { "epoch": 0.19, "learning_rate": 1.8630838159543306e-05, "loss": 1.0012, "step": 1428 }, { "epoch": 0.19, "learning_rate": 1.8628620034656578e-05, "loss": 0.9806, "step": 1429 }, { "epoch": 0.19, "learning_rate": 1.8626400246738568e-05, "loss": 1.0102, "step": 1430 }, { "epoch": 0.19, "learning_rate": 1.8624178796217096e-05, "loss": 0.9847, "step": 1431 }, { "epoch": 0.19, "learning_rate": 1.8621955683520317e-05, "loss": 1.0644, "step": 1432 }, { "epoch": 0.19, "learning_rate": 1.8619730909076704e-05, "loss": 0.9248, "step": 1433 }, { "epoch": 0.19, "learning_rate": 1.861750447331504e-05, "loss": 0.9626, "step": 1434 }, { "epoch": 0.19, "learning_rate": 1.861527637666444e-05, "loss": 0.9588, "step": 1435 }, { "epoch": 0.19, "learning_rate": 1.8613046619554337e-05, "loss": 0.9254, "step": 1436 }, { "epoch": 0.19, "learning_rate": 1.8610815202414477e-05, "loss": 0.9529, "step": 1437 }, { "epoch": 0.19, "learning_rate": 1.8608582125674933e-05, "loss": 1.0383, "step": 1438 }, { "epoch": 0.2, "learning_rate": 1.8606347389766094e-05, "loss": 0.99, "step": 1439 }, { "epoch": 0.2, "learning_rate": 1.8604110995118675e-05, "loss": 1.007, "step": 1440 }, { "epoch": 0.2, "learning_rate": 1.86018729421637e-05, "loss": 0.9977, "step": 1441 }, { "epoch": 0.2, "learning_rate": 1.8599633231332522e-05, "loss": 1.0187, "step": 1442 }, { "epoch": 0.2, "learning_rate": 1.859739186305681e-05, "loss": 0.9463, "step": 1443 }, { "epoch": 0.2, "learning_rate": 1.8595148837768554e-05, "loss": 0.9417, "step": 1444 }, { "epoch": 0.2, "learning_rate": 1.8592904155900057e-05, "loss": 0.9755, "step": 1445 }, { "epoch": 0.2, "learning_rate": 1.8590657817883952e-05, "loss": 0.9483, "step": 1446 }, { "epoch": 0.2, "learning_rate": 1.858840982415318e-05, "loss": 1.0097, "step": 1447 }, { "epoch": 0.2, "learning_rate": 1.858616017514101e-05, "loss": 1.0105, "step": 1448 }, { "epoch": 0.2, "learning_rate": 1.8583908871281026e-05, "loss": 0.9526, "step": 1449 }, { "epoch": 0.2, "learning_rate": 1.8581655913007136e-05, "loss": 0.9731, "step": 1450 }, { "epoch": 0.2, "learning_rate": 1.8579401300753552e-05, "loss": 0.9791, "step": 1451 }, { "epoch": 0.2, "learning_rate": 1.8577145034954822e-05, "loss": 0.8563, "step": 1452 }, { "epoch": 0.2, "learning_rate": 1.857488711604581e-05, "loss": 0.9661, "step": 1453 }, { "epoch": 0.2, "learning_rate": 1.8572627544461682e-05, "loss": 1.0037, "step": 1454 }, { "epoch": 0.2, "learning_rate": 1.8570366320637947e-05, "loss": 1.0462, "step": 1455 }, { "epoch": 0.2, "learning_rate": 1.8568103445010413e-05, "loss": 0.9621, "step": 1456 }, { "epoch": 0.2, "learning_rate": 1.8565838918015218e-05, "loss": 0.965, "step": 1457 }, { "epoch": 0.2, "learning_rate": 1.8563572740088813e-05, "loss": 1.0093, "step": 1458 }, { "epoch": 0.2, "learning_rate": 1.8561304911667967e-05, "loss": 0.9334, "step": 1459 }, { "epoch": 0.2, "learning_rate": 1.855903543318977e-05, "loss": 1.0164, "step": 1460 }, { "epoch": 0.2, "learning_rate": 1.855676430509163e-05, "loss": 0.9701, "step": 1461 }, { "epoch": 0.2, "learning_rate": 1.8554491527811266e-05, "loss": 0.9676, "step": 1462 }, { "epoch": 0.2, "learning_rate": 1.8552217101786728e-05, "loss": 0.9574, "step": 1463 }, { "epoch": 0.2, "learning_rate": 1.8549941027456365e-05, "loss": 0.9882, "step": 1464 }, { "epoch": 0.2, "learning_rate": 1.8547663305258864e-05, "loss": 0.9819, "step": 1465 }, { "epoch": 0.2, "learning_rate": 1.8545383935633217e-05, "loss": 1.0048, "step": 1466 }, { "epoch": 0.2, "learning_rate": 1.8543102919018738e-05, "loss": 0.94, "step": 1467 }, { "epoch": 0.2, "learning_rate": 1.854082025585506e-05, "loss": 0.966, "step": 1468 }, { "epoch": 0.2, "learning_rate": 1.8538535946582122e-05, "loss": 1.0491, "step": 1469 }, { "epoch": 0.2, "learning_rate": 1.8536249991640192e-05, "loss": 0.9062, "step": 1470 }, { "epoch": 0.2, "learning_rate": 1.8533962391469855e-05, "loss": 0.994, "step": 1471 }, { "epoch": 0.2, "learning_rate": 1.853167314651201e-05, "loss": 0.9644, "step": 1472 }, { "epoch": 0.2, "learning_rate": 1.852938225720787e-05, "loss": 0.9368, "step": 1473 }, { "epoch": 0.2, "learning_rate": 1.8527089723998973e-05, "loss": 1.0137, "step": 1474 }, { "epoch": 0.2, "learning_rate": 1.8524795547327163e-05, "loss": 0.9963, "step": 1475 }, { "epoch": 0.2, "learning_rate": 1.8522499727634612e-05, "loss": 0.9794, "step": 1476 }, { "epoch": 0.2, "learning_rate": 1.85202022653638e-05, "loss": 0.953, "step": 1477 }, { "epoch": 0.2, "learning_rate": 1.8517903160957523e-05, "loss": 0.9373, "step": 1478 }, { "epoch": 0.2, "learning_rate": 1.8515602414858907e-05, "loss": 1.017, "step": 1479 }, { "epoch": 0.2, "learning_rate": 1.8513300027511377e-05, "loss": 0.9645, "step": 1480 }, { "epoch": 0.2, "learning_rate": 1.8510995999358683e-05, "loss": 1.0141, "step": 1481 }, { "epoch": 0.2, "learning_rate": 1.8508690330844893e-05, "loss": 0.938, "step": 1482 }, { "epoch": 0.2, "learning_rate": 1.850638302241439e-05, "loss": 0.9859, "step": 1483 }, { "epoch": 0.2, "learning_rate": 1.8504074074511866e-05, "loss": 0.8839, "step": 1484 }, { "epoch": 0.2, "learning_rate": 1.8501763487582338e-05, "loss": 1.0025, "step": 1485 }, { "epoch": 0.2, "learning_rate": 1.8499451262071134e-05, "loss": 1.0233, "step": 1486 }, { "epoch": 0.2, "learning_rate": 1.8497137398423903e-05, "loss": 0.952, "step": 1487 }, { "epoch": 0.2, "learning_rate": 1.8494821897086603e-05, "loss": 0.9668, "step": 1488 }, { "epoch": 0.2, "learning_rate": 1.8492504758505506e-05, "loss": 0.9884, "step": 1489 }, { "epoch": 0.2, "learning_rate": 1.8490185983127212e-05, "loss": 1.0103, "step": 1490 }, { "epoch": 0.2, "learning_rate": 1.8487865571398625e-05, "loss": 0.9247, "step": 1491 }, { "epoch": 0.2, "learning_rate": 1.8485543523766965e-05, "loss": 1.0044, "step": 1492 }, { "epoch": 0.2, "learning_rate": 1.8483219840679778e-05, "loss": 0.9427, "step": 1493 }, { "epoch": 0.2, "learning_rate": 1.848089452258491e-05, "loss": 0.9575, "step": 1494 }, { "epoch": 0.2, "learning_rate": 1.8478567569930536e-05, "loss": 0.9499, "step": 1495 }, { "epoch": 0.2, "learning_rate": 1.8476238983165137e-05, "loss": 1.0102, "step": 1496 }, { "epoch": 0.2, "learning_rate": 1.847390876273751e-05, "loss": 0.9648, "step": 1497 }, { "epoch": 0.2, "learning_rate": 1.8471576909096768e-05, "loss": 0.9245, "step": 1498 }, { "epoch": 0.2, "learning_rate": 1.846924342269234e-05, "loss": 1.0062, "step": 1499 }, { "epoch": 0.2, "learning_rate": 1.8466908303973968e-05, "loss": 0.9834, "step": 1500 }, { "epoch": 0.2, "learning_rate": 1.8464571553391717e-05, "loss": 0.9351, "step": 1501 }, { "epoch": 0.2, "learning_rate": 1.846223317139595e-05, "loss": 1.0057, "step": 1502 }, { "epoch": 0.2, "learning_rate": 1.8459893158437357e-05, "loss": 0.9774, "step": 1503 }, { "epoch": 0.2, "learning_rate": 1.845755151496694e-05, "loss": 0.9524, "step": 1504 }, { "epoch": 0.2, "learning_rate": 1.8455208241436012e-05, "loss": 1.0041, "step": 1505 }, { "epoch": 0.2, "learning_rate": 1.84528633382962e-05, "loss": 0.9593, "step": 1506 }, { "epoch": 0.2, "learning_rate": 1.8450516805999452e-05, "loss": 0.9774, "step": 1507 }, { "epoch": 0.2, "learning_rate": 1.8448168644998025e-05, "loss": 0.8976, "step": 1508 }, { "epoch": 0.2, "learning_rate": 1.844581885574449e-05, "loss": 0.9814, "step": 1509 }, { "epoch": 0.2, "learning_rate": 1.844346743869173e-05, "loss": 1.0004, "step": 1510 }, { "epoch": 0.2, "learning_rate": 1.8441114394292943e-05, "loss": 1.0417, "step": 1511 }, { "epoch": 0.2, "learning_rate": 1.8438759723001643e-05, "loss": 0.96, "step": 1512 }, { "epoch": 0.21, "learning_rate": 1.8436403425271655e-05, "loss": 1.0401, "step": 1513 }, { "epoch": 0.21, "learning_rate": 1.8434045501557122e-05, "loss": 0.986, "step": 1514 }, { "epoch": 0.21, "learning_rate": 1.8431685952312492e-05, "loss": 0.9728, "step": 1515 }, { "epoch": 0.21, "learning_rate": 1.8429324777992534e-05, "loss": 1.0171, "step": 1516 }, { "epoch": 0.21, "learning_rate": 1.842696197905233e-05, "loss": 1.0106, "step": 1517 }, { "epoch": 0.21, "learning_rate": 1.8424597555947268e-05, "loss": 0.926, "step": 1518 }, { "epoch": 0.21, "learning_rate": 1.8422231509133052e-05, "loss": 0.9696, "step": 1519 }, { "epoch": 0.21, "learning_rate": 1.8419863839065706e-05, "loss": 0.9432, "step": 1520 }, { "epoch": 0.21, "learning_rate": 1.8417494546201557e-05, "loss": 0.9611, "step": 1521 }, { "epoch": 0.21, "learning_rate": 1.8415123630997254e-05, "loss": 0.9985, "step": 1522 }, { "epoch": 0.21, "learning_rate": 1.8412751093909747e-05, "loss": 0.9416, "step": 1523 }, { "epoch": 0.21, "learning_rate": 1.841037693539631e-05, "loss": 0.9793, "step": 1524 }, { "epoch": 0.21, "learning_rate": 1.840800115591452e-05, "loss": 0.9958, "step": 1525 }, { "epoch": 0.21, "learning_rate": 1.840562375592228e-05, "loss": 1.0133, "step": 1526 }, { "epoch": 0.21, "learning_rate": 1.8403244735877787e-05, "loss": 0.9906, "step": 1527 }, { "epoch": 0.21, "learning_rate": 1.840086409623957e-05, "loss": 0.9921, "step": 1528 }, { "epoch": 0.21, "learning_rate": 1.839848183746645e-05, "loss": 0.9457, "step": 1529 }, { "epoch": 0.21, "learning_rate": 1.8396097960017574e-05, "loss": 0.9972, "step": 1530 }, { "epoch": 0.21, "learning_rate": 1.83937124643524e-05, "loss": 0.9417, "step": 1531 }, { "epoch": 0.21, "learning_rate": 1.839132535093069e-05, "loss": 0.8519, "step": 1532 }, { "epoch": 0.21, "learning_rate": 1.8388936620212524e-05, "loss": 0.9379, "step": 1533 }, { "epoch": 0.21, "learning_rate": 1.8386546272658296e-05, "loss": 0.9558, "step": 1534 }, { "epoch": 0.21, "learning_rate": 1.8384154308728703e-05, "loss": 0.9594, "step": 1535 }, { "epoch": 0.21, "learning_rate": 1.8381760728884765e-05, "loss": 0.9744, "step": 1536 }, { "epoch": 0.21, "learning_rate": 1.83793655335878e-05, "loss": 1.0109, "step": 1537 }, { "epoch": 0.21, "learning_rate": 1.8376968723299444e-05, "loss": 0.9497, "step": 1538 }, { "epoch": 0.21, "learning_rate": 1.837457029848165e-05, "loss": 0.9778, "step": 1539 }, { "epoch": 0.21, "learning_rate": 1.8372170259596677e-05, "loss": 0.9851, "step": 1540 }, { "epoch": 0.21, "learning_rate": 1.836976860710709e-05, "loss": 0.9685, "step": 1541 }, { "epoch": 0.21, "learning_rate": 1.8367365341475777e-05, "loss": 0.9293, "step": 1542 }, { "epoch": 0.21, "learning_rate": 1.8364960463165918e-05, "loss": 1.0044, "step": 1543 }, { "epoch": 0.21, "learning_rate": 1.836255397264103e-05, "loss": 0.9748, "step": 1544 }, { "epoch": 0.21, "learning_rate": 1.8360145870364917e-05, "loss": 1.0088, "step": 1545 }, { "epoch": 0.21, "learning_rate": 1.8357736156801703e-05, "loss": 0.9827, "step": 1546 }, { "epoch": 0.21, "learning_rate": 1.8355324832415828e-05, "loss": 0.9944, "step": 1547 }, { "epoch": 0.21, "learning_rate": 1.8352911897672028e-05, "loss": 1.0304, "step": 1548 }, { "epoch": 0.21, "learning_rate": 1.835049735303537e-05, "loss": 0.977, "step": 1549 }, { "epoch": 0.21, "learning_rate": 1.834808119897121e-05, "loss": 0.9998, "step": 1550 }, { "epoch": 0.21, "learning_rate": 1.834566343594523e-05, "loss": 0.9578, "step": 1551 }, { "epoch": 0.21, "learning_rate": 1.834324406442341e-05, "loss": 0.9833, "step": 1552 }, { "epoch": 0.21, "learning_rate": 1.8340823084872053e-05, "loss": 0.9544, "step": 1553 }, { "epoch": 0.21, "learning_rate": 1.8338400497757757e-05, "loss": 0.9151, "step": 1554 }, { "epoch": 0.21, "learning_rate": 1.8335976303547446e-05, "loss": 0.9517, "step": 1555 }, { "epoch": 0.21, "learning_rate": 1.833355050270834e-05, "loss": 1.0371, "step": 1556 }, { "epoch": 0.21, "learning_rate": 1.8331123095707975e-05, "loss": 0.9425, "step": 1557 }, { "epoch": 0.21, "learning_rate": 1.8328694083014196e-05, "loss": 0.962, "step": 1558 }, { "epoch": 0.21, "learning_rate": 1.832626346509516e-05, "loss": 0.9293, "step": 1559 }, { "epoch": 0.21, "learning_rate": 1.8323831242419322e-05, "loss": 0.9924, "step": 1560 }, { "epoch": 0.21, "learning_rate": 1.8321397415455467e-05, "loss": 1.047, "step": 1561 }, { "epoch": 0.21, "learning_rate": 1.8318961984672666e-05, "loss": 0.9994, "step": 1562 }, { "epoch": 0.21, "learning_rate": 1.8316524950540318e-05, "loss": 0.9824, "step": 1563 }, { "epoch": 0.21, "learning_rate": 1.8314086313528117e-05, "loss": 0.9476, "step": 1564 }, { "epoch": 0.21, "learning_rate": 1.8311646074106074e-05, "loss": 0.9966, "step": 1565 }, { "epoch": 0.21, "learning_rate": 1.830920423274451e-05, "loss": 0.917, "step": 1566 }, { "epoch": 0.21, "learning_rate": 1.8306760789914052e-05, "loss": 0.9642, "step": 1567 }, { "epoch": 0.21, "learning_rate": 1.830431574608563e-05, "loss": 1.0249, "step": 1568 }, { "epoch": 0.21, "learning_rate": 1.830186910173049e-05, "loss": 1.0208, "step": 1569 }, { "epoch": 0.21, "learning_rate": 1.8299420857320184e-05, "loss": 1.0034, "step": 1570 }, { "epoch": 0.21, "learning_rate": 1.8296971013326578e-05, "loss": 0.9249, "step": 1571 }, { "epoch": 0.21, "learning_rate": 1.8294519570221832e-05, "loss": 0.9453, "step": 1572 }, { "epoch": 0.21, "learning_rate": 1.8292066528478432e-05, "loss": 0.9487, "step": 1573 }, { "epoch": 0.21, "learning_rate": 1.8289611888569158e-05, "loss": 1.0058, "step": 1574 }, { "epoch": 0.21, "learning_rate": 1.8287155650967104e-05, "loss": 1.0338, "step": 1575 }, { "epoch": 0.21, "learning_rate": 1.828469781614567e-05, "loss": 0.9989, "step": 1576 }, { "epoch": 0.21, "learning_rate": 1.828223838457857e-05, "loss": 0.9635, "step": 1577 }, { "epoch": 0.21, "learning_rate": 1.827977735673982e-05, "loss": 0.9504, "step": 1578 }, { "epoch": 0.21, "learning_rate": 1.827731473310374e-05, "loss": 0.9932, "step": 1579 }, { "epoch": 0.21, "learning_rate": 1.8274850514144967e-05, "loss": 0.9444, "step": 1580 }, { "epoch": 0.21, "learning_rate": 1.8272384700338436e-05, "loss": 0.9744, "step": 1581 }, { "epoch": 0.21, "learning_rate": 1.8269917292159393e-05, "loss": 0.9559, "step": 1582 }, { "epoch": 0.21, "learning_rate": 1.82674482900834e-05, "loss": 0.9774, "step": 1583 }, { "epoch": 0.21, "learning_rate": 1.8264977694586315e-05, "loss": 0.9716, "step": 1584 }, { "epoch": 0.21, "learning_rate": 1.8262505506144304e-05, "loss": 0.9207, "step": 1585 }, { "epoch": 0.21, "learning_rate": 1.826003172523384e-05, "loss": 0.9448, "step": 1586 }, { "epoch": 0.22, "learning_rate": 1.8257556352331715e-05, "loss": 1.0077, "step": 1587 }, { "epoch": 0.22, "learning_rate": 1.8255079387915015e-05, "loss": 1.0002, "step": 1588 }, { "epoch": 0.22, "learning_rate": 1.825260083246113e-05, "loss": 0.883, "step": 1589 }, { "epoch": 0.22, "learning_rate": 1.8250120686447767e-05, "loss": 0.9915, "step": 1590 }, { "epoch": 0.22, "learning_rate": 1.8247638950352934e-05, "loss": 0.9895, "step": 1591 }, { "epoch": 0.22, "learning_rate": 1.824515562465495e-05, "loss": 0.9591, "step": 1592 }, { "epoch": 0.22, "learning_rate": 1.8242670709832436e-05, "loss": 1.0099, "step": 1593 }, { "epoch": 0.22, "learning_rate": 1.824018420636432e-05, "loss": 0.9432, "step": 1594 }, { "epoch": 0.22, "learning_rate": 1.823769611472983e-05, "loss": 1.035, "step": 1595 }, { "epoch": 0.22, "learning_rate": 1.8235206435408516e-05, "loss": 0.9152, "step": 1596 }, { "epoch": 0.22, "learning_rate": 1.8232715168880223e-05, "loss": 0.9665, "step": 1597 }, { "epoch": 0.22, "learning_rate": 1.82302223156251e-05, "loss": 0.9854, "step": 1598 }, { "epoch": 0.22, "learning_rate": 1.8227727876123605e-05, "loss": 1.0087, "step": 1599 }, { "epoch": 0.22, "learning_rate": 1.822523185085651e-05, "loss": 0.9147, "step": 1600 }, { "epoch": 0.22, "learning_rate": 1.8222734240304874e-05, "loss": 0.9421, "step": 1601 }, { "epoch": 0.22, "learning_rate": 1.8220235044950078e-05, "loss": 0.953, "step": 1602 }, { "epoch": 0.22, "learning_rate": 1.8217734265273802e-05, "loss": 1.0089, "step": 1603 }, { "epoch": 0.22, "learning_rate": 1.8215231901758034e-05, "loss": 0.923, "step": 1604 }, { "epoch": 0.22, "learning_rate": 1.8212727954885063e-05, "loss": 0.954, "step": 1605 }, { "epoch": 0.22, "learning_rate": 1.8210222425137485e-05, "loss": 0.9007, "step": 1606 }, { "epoch": 0.22, "learning_rate": 1.8207715312998203e-05, "loss": 0.9301, "step": 1607 }, { "epoch": 0.22, "learning_rate": 1.8205206618950427e-05, "loss": 0.9709, "step": 1608 }, { "epoch": 0.22, "learning_rate": 1.820269634347766e-05, "loss": 1.0042, "step": 1609 }, { "epoch": 0.22, "learning_rate": 1.8200184487063727e-05, "loss": 0.9277, "step": 1610 }, { "epoch": 0.22, "learning_rate": 1.819767105019274e-05, "loss": 1.0323, "step": 1611 }, { "epoch": 0.22, "learning_rate": 1.819515603334913e-05, "loss": 0.9492, "step": 1612 }, { "epoch": 0.22, "learning_rate": 1.819263943701763e-05, "loss": 0.9818, "step": 1613 }, { "epoch": 0.22, "learning_rate": 1.8190121261683268e-05, "loss": 0.9881, "step": 1614 }, { "epoch": 0.22, "learning_rate": 1.8187601507831388e-05, "loss": 0.933, "step": 1615 }, { "epoch": 0.22, "learning_rate": 1.818508017594763e-05, "loss": 1.0289, "step": 1616 }, { "epoch": 0.22, "learning_rate": 1.8182557266517945e-05, "loss": 1.0326, "step": 1617 }, { "epoch": 0.22, "learning_rate": 1.818003278002858e-05, "loss": 0.9888, "step": 1618 }, { "epoch": 0.22, "learning_rate": 1.8177506716966088e-05, "loss": 1.0305, "step": 1619 }, { "epoch": 0.22, "learning_rate": 1.8174979077817338e-05, "loss": 0.9659, "step": 1620 }, { "epoch": 0.22, "learning_rate": 1.817244986306948e-05, "loss": 0.9476, "step": 1621 }, { "epoch": 0.22, "learning_rate": 1.816991907320999e-05, "loss": 0.9193, "step": 1622 }, { "epoch": 0.22, "learning_rate": 1.8167386708726636e-05, "loss": 0.9249, "step": 1623 }, { "epoch": 0.22, "learning_rate": 1.8164852770107487e-05, "loss": 0.9553, "step": 1624 }, { "epoch": 0.22, "learning_rate": 1.8162317257840926e-05, "loss": 1.0112, "step": 1625 }, { "epoch": 0.22, "learning_rate": 1.8159780172415634e-05, "loss": 0.9509, "step": 1626 }, { "epoch": 0.22, "learning_rate": 1.815724151432059e-05, "loss": 0.9379, "step": 1627 }, { "epoch": 0.22, "learning_rate": 1.815470128404508e-05, "loss": 0.9755, "step": 1628 }, { "epoch": 0.22, "learning_rate": 1.8152159482078695e-05, "loss": 0.9166, "step": 1629 }, { "epoch": 0.22, "learning_rate": 1.8149616108911327e-05, "loss": 0.9434, "step": 1630 }, { "epoch": 0.22, "learning_rate": 1.8147071165033177e-05, "loss": 0.9538, "step": 1631 }, { "epoch": 0.22, "learning_rate": 1.8144524650934735e-05, "loss": 0.9452, "step": 1632 }, { "epoch": 0.22, "learning_rate": 1.8141976567106804e-05, "loss": 0.9436, "step": 1633 }, { "epoch": 0.22, "learning_rate": 1.813942691404049e-05, "loss": 0.9752, "step": 1634 }, { "epoch": 0.22, "learning_rate": 1.8136875692227197e-05, "loss": 0.9953, "step": 1635 }, { "epoch": 0.22, "learning_rate": 1.813432290215863e-05, "loss": 0.9839, "step": 1636 }, { "epoch": 0.22, "learning_rate": 1.8131768544326808e-05, "loss": 0.9524, "step": 1637 }, { "epoch": 0.22, "learning_rate": 1.8129212619224034e-05, "loss": 0.9554, "step": 1638 }, { "epoch": 0.22, "learning_rate": 1.8126655127342927e-05, "loss": 0.9564, "step": 1639 }, { "epoch": 0.22, "learning_rate": 1.8124096069176405e-05, "loss": 0.9869, "step": 1640 }, { "epoch": 0.22, "learning_rate": 1.812153544521768e-05, "loss": 0.977, "step": 1641 }, { "epoch": 0.22, "learning_rate": 1.811897325596028e-05, "loss": 1.0392, "step": 1642 }, { "epoch": 0.22, "learning_rate": 1.811640950189802e-05, "loss": 0.9837, "step": 1643 }, { "epoch": 0.22, "learning_rate": 1.8113844183525026e-05, "loss": 0.9432, "step": 1644 }, { "epoch": 0.22, "learning_rate": 1.8111277301335723e-05, "loss": 0.9985, "step": 1645 }, { "epoch": 0.22, "learning_rate": 1.8108708855824838e-05, "loss": 0.9812, "step": 1646 }, { "epoch": 0.22, "learning_rate": 1.81061388474874e-05, "loss": 0.9483, "step": 1647 }, { "epoch": 0.22, "learning_rate": 1.8103567276818736e-05, "loss": 0.9642, "step": 1648 }, { "epoch": 0.22, "learning_rate": 1.8100994144314477e-05, "loss": 0.9898, "step": 1649 }, { "epoch": 0.22, "learning_rate": 1.809841945047055e-05, "loss": 0.9552, "step": 1650 }, { "epoch": 0.22, "learning_rate": 1.809584319578319e-05, "loss": 0.9484, "step": 1651 }, { "epoch": 0.22, "learning_rate": 1.8093265380748932e-05, "loss": 0.9534, "step": 1652 }, { "epoch": 0.22, "learning_rate": 1.8090686005864607e-05, "loss": 1.0134, "step": 1653 }, { "epoch": 0.22, "learning_rate": 1.808810507162735e-05, "loss": 0.9404, "step": 1654 }, { "epoch": 0.22, "learning_rate": 1.8085522578534587e-05, "loss": 0.9459, "step": 1655 }, { "epoch": 0.22, "learning_rate": 1.808293852708407e-05, "loss": 1.0094, "step": 1656 }, { "epoch": 0.22, "learning_rate": 1.808035291777382e-05, "loss": 1.0126, "step": 1657 }, { "epoch": 0.22, "learning_rate": 1.8077765751102183e-05, "loss": 0.8636, "step": 1658 }, { "epoch": 0.22, "learning_rate": 1.8075177027567785e-05, "loss": 0.9545, "step": 1659 }, { "epoch": 0.22, "learning_rate": 1.8072586747669568e-05, "loss": 1.0113, "step": 1660 }, { "epoch": 0.23, "learning_rate": 1.806999491190677e-05, "loss": 1.0, "step": 1661 }, { "epoch": 0.23, "learning_rate": 1.8067401520778918e-05, "loss": 1.0119, "step": 1662 }, { "epoch": 0.23, "learning_rate": 1.8064806574785855e-05, "loss": 0.9158, "step": 1663 }, { "epoch": 0.23, "learning_rate": 1.8062210074427713e-05, "loss": 1.0085, "step": 1664 }, { "epoch": 0.23, "learning_rate": 1.805961202020493e-05, "loss": 0.9351, "step": 1665 }, { "epoch": 0.23, "learning_rate": 1.8057012412618236e-05, "loss": 0.96, "step": 1666 }, { "epoch": 0.23, "learning_rate": 1.8054411252168665e-05, "loss": 0.9378, "step": 1667 }, { "epoch": 0.23, "learning_rate": 1.805180853935755e-05, "loss": 0.9916, "step": 1668 }, { "epoch": 0.23, "learning_rate": 1.804920427468653e-05, "loss": 0.9647, "step": 1669 }, { "epoch": 0.23, "learning_rate": 1.8046598458657528e-05, "loss": 0.9679, "step": 1670 }, { "epoch": 0.23, "learning_rate": 1.8043991091772778e-05, "loss": 0.9828, "step": 1671 }, { "epoch": 0.23, "learning_rate": 1.804138217453481e-05, "loss": 0.9541, "step": 1672 }, { "epoch": 0.23, "learning_rate": 1.8038771707446446e-05, "loss": 0.9654, "step": 1673 }, { "epoch": 0.23, "learning_rate": 1.8036159691010816e-05, "loss": 1.0139, "step": 1674 }, { "epoch": 0.23, "learning_rate": 1.8033546125731347e-05, "loss": 0.9778, "step": 1675 }, { "epoch": 0.23, "learning_rate": 1.8030931012111767e-05, "loss": 0.9353, "step": 1676 }, { "epoch": 0.23, "learning_rate": 1.8028314350656085e-05, "loss": 0.9186, "step": 1677 }, { "epoch": 0.23, "learning_rate": 1.8025696141868635e-05, "loss": 0.9375, "step": 1678 }, { "epoch": 0.23, "learning_rate": 1.8023076386254025e-05, "loss": 0.9865, "step": 1679 }, { "epoch": 0.23, "learning_rate": 1.8020455084317178e-05, "loss": 0.9345, "step": 1680 }, { "epoch": 0.23, "learning_rate": 1.801783223656331e-05, "loss": 0.9977, "step": 1681 }, { "epoch": 0.23, "learning_rate": 1.801520784349793e-05, "loss": 0.9804, "step": 1682 }, { "epoch": 0.23, "learning_rate": 1.8012581905626847e-05, "loss": 0.9714, "step": 1683 }, { "epoch": 0.23, "learning_rate": 1.8009954423456175e-05, "loss": 0.9746, "step": 1684 }, { "epoch": 0.23, "learning_rate": 1.800732539749232e-05, "loss": 0.9291, "step": 1685 }, { "epoch": 0.23, "learning_rate": 1.800469482824198e-05, "loss": 0.9362, "step": 1686 }, { "epoch": 0.23, "learning_rate": 1.8002062716212162e-05, "loss": 0.9606, "step": 1687 }, { "epoch": 0.23, "learning_rate": 1.799942906191016e-05, "loss": 1.0171, "step": 1688 }, { "epoch": 0.23, "learning_rate": 1.7996793865843568e-05, "loss": 0.9834, "step": 1689 }, { "epoch": 0.23, "learning_rate": 1.7994157128520282e-05, "loss": 1.0705, "step": 1690 }, { "epoch": 0.23, "learning_rate": 1.7991518850448494e-05, "loss": 1.0003, "step": 1691 }, { "epoch": 0.23, "learning_rate": 1.7988879032136687e-05, "loss": 0.8986, "step": 1692 }, { "epoch": 0.23, "learning_rate": 1.7986237674093646e-05, "loss": 0.9447, "step": 1693 }, { "epoch": 0.23, "learning_rate": 1.798359477682845e-05, "loss": 0.916, "step": 1694 }, { "epoch": 0.23, "learning_rate": 1.798095034085048e-05, "loss": 0.92, "step": 1695 }, { "epoch": 0.23, "learning_rate": 1.7978304366669407e-05, "loss": 0.9497, "step": 1696 }, { "epoch": 0.23, "learning_rate": 1.79756568547952e-05, "loss": 1.0109, "step": 1697 }, { "epoch": 0.23, "learning_rate": 1.7973007805738124e-05, "loss": 0.9404, "step": 1698 }, { "epoch": 0.23, "learning_rate": 1.7970357220008747e-05, "loss": 0.8838, "step": 1699 }, { "epoch": 0.23, "learning_rate": 1.7967705098117923e-05, "loss": 0.9284, "step": 1700 }, { "epoch": 0.23, "learning_rate": 1.796505144057681e-05, "loss": 1.0323, "step": 1701 }, { "epoch": 0.23, "learning_rate": 1.7962396247896855e-05, "loss": 0.9774, "step": 1702 }, { "epoch": 0.23, "learning_rate": 1.795973952058981e-05, "loss": 1.0104, "step": 1703 }, { "epoch": 0.23, "learning_rate": 1.7957081259167714e-05, "loss": 0.9791, "step": 1704 }, { "epoch": 0.23, "learning_rate": 1.7954421464142908e-05, "loss": 1.0098, "step": 1705 }, { "epoch": 0.23, "learning_rate": 1.7951760136028023e-05, "loss": 0.9812, "step": 1706 }, { "epoch": 0.23, "learning_rate": 1.794909727533599e-05, "loss": 0.9146, "step": 1707 }, { "epoch": 0.23, "learning_rate": 1.7946432882580032e-05, "loss": 0.9642, "step": 1708 }, { "epoch": 0.23, "learning_rate": 1.794376695827367e-05, "loss": 0.9413, "step": 1709 }, { "epoch": 0.23, "learning_rate": 1.7941099502930716e-05, "loss": 0.9628, "step": 1710 }, { "epoch": 0.23, "learning_rate": 1.793843051706529e-05, "loss": 0.9837, "step": 1711 }, { "epoch": 0.23, "learning_rate": 1.7935760001191785e-05, "loss": 0.9106, "step": 1712 }, { "epoch": 0.23, "learning_rate": 1.7933087955824908e-05, "loss": 1.0369, "step": 1713 }, { "epoch": 0.23, "learning_rate": 1.7930414381479653e-05, "loss": 0.9768, "step": 1714 }, { "epoch": 0.23, "learning_rate": 1.7927739278671307e-05, "loss": 0.9501, "step": 1715 }, { "epoch": 0.23, "learning_rate": 1.7925062647915462e-05, "loss": 0.9968, "step": 1716 }, { "epoch": 0.23, "learning_rate": 1.7922384489727985e-05, "loss": 1.0061, "step": 1717 }, { "epoch": 0.23, "learning_rate": 1.7919704804625055e-05, "loss": 0.9709, "step": 1718 }, { "epoch": 0.23, "learning_rate": 1.7917023593123143e-05, "loss": 1.0177, "step": 1719 }, { "epoch": 0.23, "learning_rate": 1.7914340855739004e-05, "loss": 0.9596, "step": 1720 }, { "epoch": 0.23, "learning_rate": 1.7911656592989697e-05, "loss": 0.9936, "step": 1721 }, { "epoch": 0.23, "learning_rate": 1.790897080539257e-05, "loss": 1.0041, "step": 1722 }, { "epoch": 0.23, "learning_rate": 1.790628349346527e-05, "loss": 0.948, "step": 1723 }, { "epoch": 0.23, "learning_rate": 1.7903594657725728e-05, "loss": 0.9936, "step": 1724 }, { "epoch": 0.23, "learning_rate": 1.790090429869218e-05, "loss": 0.9526, "step": 1725 }, { "epoch": 0.23, "learning_rate": 1.789821241688315e-05, "loss": 0.944, "step": 1726 }, { "epoch": 0.23, "learning_rate": 1.7895519012817452e-05, "loss": 1.0678, "step": 1727 }, { "epoch": 0.23, "learning_rate": 1.78928240870142e-05, "loss": 0.9888, "step": 1728 }, { "epoch": 0.23, "learning_rate": 1.7890127639992803e-05, "loss": 0.9616, "step": 1729 }, { "epoch": 0.23, "learning_rate": 1.7887429672272954e-05, "loss": 0.9124, "step": 1730 }, { "epoch": 0.23, "learning_rate": 1.7884730184374645e-05, "loss": 0.93, "step": 1731 }, { "epoch": 0.23, "learning_rate": 1.7882029176818157e-05, "loss": 0.9227, "step": 1732 }, { "epoch": 0.23, "learning_rate": 1.787932665012407e-05, "loss": 0.9626, "step": 1733 }, { "epoch": 0.24, "learning_rate": 1.787662260481326e-05, "loss": 0.9464, "step": 1734 }, { "epoch": 0.24, "learning_rate": 1.7873917041406875e-05, "loss": 0.9718, "step": 1735 }, { "epoch": 0.24, "learning_rate": 1.7871209960426383e-05, "loss": 0.9559, "step": 1736 }, { "epoch": 0.24, "learning_rate": 1.7868501362393525e-05, "loss": 0.9934, "step": 1737 }, { "epoch": 0.24, "learning_rate": 1.7865791247830344e-05, "loss": 0.9212, "step": 1738 }, { "epoch": 0.24, "learning_rate": 1.7863079617259168e-05, "loss": 1.0252, "step": 1739 }, { "epoch": 0.24, "learning_rate": 1.7860366471202622e-05, "loss": 0.9747, "step": 1740 }, { "epoch": 0.24, "learning_rate": 1.785765181018363e-05, "loss": 0.9096, "step": 1741 }, { "epoch": 0.24, "learning_rate": 1.785493563472539e-05, "loss": 0.9142, "step": 1742 }, { "epoch": 0.24, "learning_rate": 1.7852217945351404e-05, "loss": 0.9625, "step": 1743 }, { "epoch": 0.24, "learning_rate": 1.784949874258547e-05, "loss": 0.9543, "step": 1744 }, { "epoch": 0.24, "learning_rate": 1.7846778026951667e-05, "loss": 0.9156, "step": 1745 }, { "epoch": 0.24, "learning_rate": 1.7844055798974372e-05, "loss": 0.9843, "step": 1746 }, { "epoch": 0.24, "learning_rate": 1.7841332059178254e-05, "loss": 0.9463, "step": 1747 }, { "epoch": 0.24, "learning_rate": 1.7838606808088265e-05, "loss": 0.9616, "step": 1748 }, { "epoch": 0.24, "learning_rate": 1.783588004622966e-05, "loss": 0.9714, "step": 1749 }, { "epoch": 0.24, "learning_rate": 1.7833151774127978e-05, "loss": 0.8893, "step": 1750 }, { "epoch": 0.24, "learning_rate": 1.7830421992309047e-05, "loss": 1.0142, "step": 1751 }, { "epoch": 0.24, "learning_rate": 1.7827690701298995e-05, "loss": 0.9657, "step": 1752 }, { "epoch": 0.24, "learning_rate": 1.7824957901624236e-05, "loss": 0.9944, "step": 1753 }, { "epoch": 0.24, "learning_rate": 1.7822223593811468e-05, "loss": 0.8556, "step": 1754 }, { "epoch": 0.24, "learning_rate": 1.781948777838769e-05, "loss": 0.9401, "step": 1755 }, { "epoch": 0.24, "learning_rate": 1.781675045588019e-05, "loss": 0.9469, "step": 1756 }, { "epoch": 0.24, "learning_rate": 1.781401162681654e-05, "loss": 0.9755, "step": 1757 }, { "epoch": 0.24, "learning_rate": 1.781127129172461e-05, "loss": 0.9456, "step": 1758 }, { "epoch": 0.24, "learning_rate": 1.780852945113255e-05, "loss": 0.9016, "step": 1759 }, { "epoch": 0.24, "learning_rate": 1.7805786105568813e-05, "loss": 1.0371, "step": 1760 }, { "epoch": 0.24, "learning_rate": 1.7803041255562137e-05, "loss": 0.901, "step": 1761 }, { "epoch": 0.24, "learning_rate": 1.780029490164154e-05, "loss": 0.9463, "step": 1762 }, { "epoch": 0.24, "learning_rate": 1.779754704433635e-05, "loss": 0.935, "step": 1763 }, { "epoch": 0.24, "learning_rate": 1.7794797684176165e-05, "loss": 0.9288, "step": 1764 }, { "epoch": 0.24, "learning_rate": 1.7792046821690885e-05, "loss": 0.9898, "step": 1765 }, { "epoch": 0.24, "learning_rate": 1.7789294457410693e-05, "loss": 1.0141, "step": 1766 }, { "epoch": 0.24, "learning_rate": 1.7786540591866067e-05, "loss": 0.9858, "step": 1767 }, { "epoch": 0.24, "learning_rate": 1.7783785225587774e-05, "loss": 0.9433, "step": 1768 }, { "epoch": 0.24, "learning_rate": 1.7781028359106856e-05, "loss": 0.9405, "step": 1769 }, { "epoch": 0.24, "learning_rate": 1.777826999295467e-05, "loss": 1.0301, "step": 1770 }, { "epoch": 0.24, "learning_rate": 1.777551012766284e-05, "loss": 0.9365, "step": 1771 }, { "epoch": 0.24, "learning_rate": 1.7772748763763288e-05, "loss": 0.9458, "step": 1772 }, { "epoch": 0.24, "learning_rate": 1.7769985901788223e-05, "loss": 1.0115, "step": 1773 }, { "epoch": 0.24, "learning_rate": 1.7767221542270146e-05, "loss": 0.9445, "step": 1774 }, { "epoch": 0.24, "learning_rate": 1.776445568574184e-05, "loss": 0.9787, "step": 1775 }, { "epoch": 0.24, "learning_rate": 1.7761688332736385e-05, "loss": 0.8866, "step": 1776 }, { "epoch": 0.24, "learning_rate": 1.7758919483787146e-05, "loss": 0.9487, "step": 1777 }, { "epoch": 0.24, "learning_rate": 1.7756149139427764e-05, "loss": 0.9821, "step": 1778 }, { "epoch": 0.24, "learning_rate": 1.7753377300192196e-05, "loss": 0.9851, "step": 1779 }, { "epoch": 0.24, "learning_rate": 1.7750603966614654e-05, "loss": 0.9417, "step": 1780 }, { "epoch": 0.24, "learning_rate": 1.7747829139229664e-05, "loss": 0.9291, "step": 1781 }, { "epoch": 0.24, "learning_rate": 1.7745052818572033e-05, "loss": 0.8792, "step": 1782 }, { "epoch": 0.24, "learning_rate": 1.7742275005176845e-05, "loss": 0.9736, "step": 1783 }, { "epoch": 0.24, "learning_rate": 1.7739495699579488e-05, "loss": 0.9198, "step": 1784 }, { "epoch": 0.24, "learning_rate": 1.7736714902315624e-05, "loss": 1.0218, "step": 1785 }, { "epoch": 0.24, "learning_rate": 1.773393261392121e-05, "loss": 0.9844, "step": 1786 }, { "epoch": 0.24, "learning_rate": 1.773114883493249e-05, "loss": 0.9828, "step": 1787 }, { "epoch": 0.24, "learning_rate": 1.772836356588599e-05, "loss": 0.9449, "step": 1788 }, { "epoch": 0.24, "learning_rate": 1.7725576807318533e-05, "loss": 0.9558, "step": 1789 }, { "epoch": 0.24, "learning_rate": 1.772278855976721e-05, "loss": 0.9777, "step": 1790 }, { "epoch": 0.24, "learning_rate": 1.7719998823769432e-05, "loss": 0.9548, "step": 1791 }, { "epoch": 0.24, "learning_rate": 1.771720759986286e-05, "loss": 0.9543, "step": 1792 }, { "epoch": 0.24, "learning_rate": 1.771441488858547e-05, "loss": 0.9052, "step": 1793 }, { "epoch": 0.24, "learning_rate": 1.7711620690475505e-05, "loss": 0.8831, "step": 1794 }, { "epoch": 0.24, "learning_rate": 1.7708825006071502e-05, "loss": 0.9782, "step": 1795 }, { "epoch": 0.24, "learning_rate": 1.7706027835912296e-05, "loss": 0.9648, "step": 1796 }, { "epoch": 0.24, "learning_rate": 1.7703229180536988e-05, "loss": 0.9639, "step": 1797 }, { "epoch": 0.24, "learning_rate": 1.770042904048498e-05, "loss": 0.9241, "step": 1798 }, { "epoch": 0.24, "learning_rate": 1.7697627416295953e-05, "loss": 0.9141, "step": 1799 }, { "epoch": 0.24, "learning_rate": 1.7694824308509875e-05, "loss": 0.9454, "step": 1800 }, { "epoch": 0.24, "learning_rate": 1.7692019717667002e-05, "loss": 0.9594, "step": 1801 }, { "epoch": 0.24, "learning_rate": 1.7689213644307875e-05, "loss": 1.0156, "step": 1802 }, { "epoch": 0.24, "learning_rate": 1.7686406088973324e-05, "loss": 0.9247, "step": 1803 }, { "epoch": 0.24, "learning_rate": 1.7683597052204456e-05, "loss": 0.9691, "step": 1804 }, { "epoch": 0.24, "learning_rate": 1.7680786534542673e-05, "loss": 0.9701, "step": 1805 }, { "epoch": 0.24, "learning_rate": 1.7677974536529657e-05, "loss": 0.9593, "step": 1806 }, { "epoch": 0.24, "learning_rate": 1.7675161058707372e-05, "loss": 0.9379, "step": 1807 }, { "epoch": 0.25, "learning_rate": 1.767234610161808e-05, "loss": 0.9119, "step": 1808 }, { "epoch": 0.25, "learning_rate": 1.7669529665804312e-05, "loss": 1.0216, "step": 1809 }, { "epoch": 0.25, "learning_rate": 1.76667117518089e-05, "loss": 1.0038, "step": 1810 }, { "epoch": 0.25, "learning_rate": 1.7663892360174943e-05, "loss": 0.9289, "step": 1811 }, { "epoch": 0.25, "learning_rate": 1.7661071491445843e-05, "loss": 0.9195, "step": 1812 }, { "epoch": 0.25, "learning_rate": 1.7658249146165273e-05, "loss": 0.9281, "step": 1813 }, { "epoch": 0.25, "learning_rate": 1.76554253248772e-05, "loss": 0.9527, "step": 1814 }, { "epoch": 0.25, "learning_rate": 1.765260002812587e-05, "loss": 1.0274, "step": 1815 }, { "epoch": 0.25, "learning_rate": 1.7649773256455807e-05, "loss": 0.9906, "step": 1816 }, { "epoch": 0.25, "learning_rate": 1.7646945010411843e-05, "loss": 0.9174, "step": 1817 }, { "epoch": 0.25, "learning_rate": 1.764411529053906e-05, "loss": 0.8993, "step": 1818 }, { "epoch": 0.25, "learning_rate": 1.764128409738286e-05, "loss": 0.8561, "step": 1819 }, { "epoch": 0.25, "learning_rate": 1.7638451431488897e-05, "loss": 0.9091, "step": 1820 }, { "epoch": 0.25, "learning_rate": 1.7635617293403127e-05, "loss": 0.9134, "step": 1821 }, { "epoch": 0.25, "learning_rate": 1.7632781683671787e-05, "loss": 0.9546, "step": 1822 }, { "epoch": 0.25, "learning_rate": 1.7629944602841398e-05, "loss": 0.9377, "step": 1823 }, { "epoch": 0.25, "learning_rate": 1.762710605145876e-05, "loss": 0.9576, "step": 1824 }, { "epoch": 0.25, "learning_rate": 1.762426603007096e-05, "loss": 1.0, "step": 1825 }, { "epoch": 0.25, "learning_rate": 1.7621424539225368e-05, "loss": 0.993, "step": 1826 }, { "epoch": 0.25, "learning_rate": 1.7618581579469638e-05, "loss": 0.9132, "step": 1827 }, { "epoch": 0.25, "learning_rate": 1.7615737151351705e-05, "loss": 0.9448, "step": 1828 }, { "epoch": 0.25, "learning_rate": 1.7612891255419788e-05, "loss": 0.9649, "step": 1829 }, { "epoch": 0.25, "learning_rate": 1.7610043892222382e-05, "loss": 0.9829, "step": 1830 }, { "epoch": 0.25, "learning_rate": 1.7607195062308285e-05, "loss": 0.9216, "step": 1831 }, { "epoch": 0.25, "learning_rate": 1.7604344766226557e-05, "loss": 0.9159, "step": 1832 }, { "epoch": 0.25, "learning_rate": 1.7601493004526546e-05, "loss": 0.9794, "step": 1833 }, { "epoch": 0.25, "learning_rate": 1.7598639777757888e-05, "loss": 0.9193, "step": 1834 }, { "epoch": 0.25, "learning_rate": 1.7595785086470494e-05, "loss": 0.9422, "step": 1835 }, { "epoch": 0.25, "learning_rate": 1.7592928931214567e-05, "loss": 0.9625, "step": 1836 }, { "epoch": 0.25, "learning_rate": 1.759007131254058e-05, "loss": 0.9705, "step": 1837 }, { "epoch": 0.25, "learning_rate": 1.7587212230999298e-05, "loss": 0.8629, "step": 1838 }, { "epoch": 0.25, "learning_rate": 1.758435168714176e-05, "loss": 0.9352, "step": 1839 }, { "epoch": 0.25, "learning_rate": 1.75814896815193e-05, "loss": 0.962, "step": 1840 }, { "epoch": 0.25, "learning_rate": 1.7578626214683515e-05, "loss": 0.9127, "step": 1841 }, { "epoch": 0.25, "learning_rate": 1.7575761287186296e-05, "loss": 0.9365, "step": 1842 }, { "epoch": 0.25, "learning_rate": 1.7572894899579815e-05, "loss": 0.9046, "step": 1843 }, { "epoch": 0.25, "learning_rate": 1.757002705241652e-05, "loss": 0.9117, "step": 1844 }, { "epoch": 0.25, "learning_rate": 1.7567157746249148e-05, "loss": 0.9815, "step": 1845 }, { "epoch": 0.25, "learning_rate": 1.7564286981630713e-05, "loss": 0.959, "step": 1846 }, { "epoch": 0.25, "learning_rate": 1.7561414759114504e-05, "loss": 0.9052, "step": 1847 }, { "epoch": 0.25, "learning_rate": 1.7558541079254098e-05, "loss": 0.9915, "step": 1848 }, { "epoch": 0.25, "learning_rate": 1.7555665942603363e-05, "loss": 0.968, "step": 1849 }, { "epoch": 0.25, "learning_rate": 1.755278934971642e-05, "loss": 0.9288, "step": 1850 }, { "epoch": 0.25, "learning_rate": 1.7549911301147697e-05, "loss": 0.9348, "step": 1851 }, { "epoch": 0.25, "learning_rate": 1.754703179745189e-05, "loss": 0.9339, "step": 1852 }, { "epoch": 0.25, "learning_rate": 1.754415083918398e-05, "loss": 0.9795, "step": 1853 }, { "epoch": 0.25, "learning_rate": 1.7541268426899222e-05, "loss": 0.9446, "step": 1854 }, { "epoch": 0.25, "learning_rate": 1.7538384561153162e-05, "loss": 0.9682, "step": 1855 }, { "epoch": 0.25, "learning_rate": 1.753549924250162e-05, "loss": 0.9915, "step": 1856 }, { "epoch": 0.25, "learning_rate": 1.753261247150069e-05, "loss": 0.9934, "step": 1857 }, { "epoch": 0.25, "learning_rate": 1.7529724248706754e-05, "loss": 0.9665, "step": 1858 }, { "epoch": 0.25, "learning_rate": 1.7526834574676475e-05, "loss": 0.9519, "step": 1859 }, { "epoch": 0.25, "learning_rate": 1.7523943449966786e-05, "loss": 0.97, "step": 1860 }, { "epoch": 0.25, "learning_rate": 1.7521050875134916e-05, "loss": 0.9239, "step": 1861 }, { "epoch": 0.25, "learning_rate": 1.751815685073835e-05, "loss": 0.8877, "step": 1862 }, { "epoch": 0.25, "learning_rate": 1.751526137733488e-05, "loss": 1.0351, "step": 1863 }, { "epoch": 0.25, "learning_rate": 1.7512364455482552e-05, "loss": 1.0145, "step": 1864 }, { "epoch": 0.25, "learning_rate": 1.750946608573971e-05, "loss": 0.9234, "step": 1865 }, { "epoch": 0.25, "learning_rate": 1.7506566268664963e-05, "loss": 0.9957, "step": 1866 }, { "epoch": 0.25, "learning_rate": 1.7503665004817213e-05, "loss": 0.9648, "step": 1867 }, { "epoch": 0.25, "learning_rate": 1.7500762294755624e-05, "loss": 0.9313, "step": 1868 }, { "epoch": 0.25, "learning_rate": 1.7497858139039654e-05, "loss": 0.9404, "step": 1869 }, { "epoch": 0.25, "learning_rate": 1.7494952538229034e-05, "loss": 0.9687, "step": 1870 }, { "epoch": 0.25, "learning_rate": 1.7492045492883764e-05, "loss": 0.9729, "step": 1871 }, { "epoch": 0.25, "learning_rate": 1.7489137003564145e-05, "loss": 0.9031, "step": 1872 }, { "epoch": 0.25, "learning_rate": 1.7486227070830734e-05, "loss": 0.9792, "step": 1873 }, { "epoch": 0.25, "learning_rate": 1.748331569524438e-05, "loss": 1.0459, "step": 1874 }, { "epoch": 0.25, "learning_rate": 1.7480402877366195e-05, "loss": 0.9294, "step": 1875 }, { "epoch": 0.25, "learning_rate": 1.747748861775759e-05, "loss": 0.9221, "step": 1876 }, { "epoch": 0.25, "learning_rate": 1.747457291698024e-05, "loss": 0.9865, "step": 1877 }, { "epoch": 0.25, "learning_rate": 1.7471655775596097e-05, "loss": 0.9457, "step": 1878 }, { "epoch": 0.25, "learning_rate": 1.7468737194167394e-05, "loss": 0.994, "step": 1879 }, { "epoch": 0.25, "learning_rate": 1.746581717325665e-05, "loss": 0.9112, "step": 1880 }, { "epoch": 0.25, "learning_rate": 1.7462895713426647e-05, "loss": 1.0317, "step": 1881 }, { "epoch": 0.26, "learning_rate": 1.7459972815240452e-05, "loss": 0.9749, "step": 1882 }, { "epoch": 0.26, "learning_rate": 1.7457048479261406e-05, "loss": 0.9658, "step": 1883 }, { "epoch": 0.26, "learning_rate": 1.745412270605313e-05, "loss": 1.0088, "step": 1884 }, { "epoch": 0.26, "learning_rate": 1.745119549617952e-05, "loss": 1.0317, "step": 1885 }, { "epoch": 0.26, "learning_rate": 1.7448266850204754e-05, "loss": 0.8992, "step": 1886 }, { "epoch": 0.26, "learning_rate": 1.7445336768693274e-05, "loss": 0.892, "step": 1887 }, { "epoch": 0.26, "learning_rate": 1.744240525220982e-05, "loss": 0.9599, "step": 1888 }, { "epoch": 0.26, "learning_rate": 1.7439472301319385e-05, "loss": 0.977, "step": 1889 }, { "epoch": 0.26, "learning_rate": 1.7436537916587254e-05, "loss": 1.0005, "step": 1890 }, { "epoch": 0.26, "learning_rate": 1.7433602098578983e-05, "loss": 0.9488, "step": 1891 }, { "epoch": 0.26, "learning_rate": 1.74306648478604e-05, "loss": 0.9254, "step": 1892 }, { "epoch": 0.26, "learning_rate": 1.7427726164997624e-05, "loss": 0.9981, "step": 1893 }, { "epoch": 0.26, "learning_rate": 1.7424786050557036e-05, "loss": 0.9518, "step": 1894 }, { "epoch": 0.26, "learning_rate": 1.7421844505105293e-05, "loss": 1.0017, "step": 1895 }, { "epoch": 0.26, "learning_rate": 1.7418901529209336e-05, "loss": 0.9577, "step": 1896 }, { "epoch": 0.26, "learning_rate": 1.7415957123436373e-05, "loss": 0.9804, "step": 1897 }, { "epoch": 0.26, "learning_rate": 1.7413011288353896e-05, "loss": 1.0214, "step": 1898 }, { "epoch": 0.26, "learning_rate": 1.7410064024529667e-05, "loss": 0.8907, "step": 1899 }, { "epoch": 0.26, "learning_rate": 1.740711533253173e-05, "loss": 0.9787, "step": 1900 }, { "epoch": 0.26, "learning_rate": 1.740416521292839e-05, "loss": 0.9777, "step": 1901 }, { "epoch": 0.26, "learning_rate": 1.740121366628824e-05, "loss": 0.9081, "step": 1902 }, { "epoch": 0.26, "learning_rate": 1.7398260693180152e-05, "loss": 0.9206, "step": 1903 }, { "epoch": 0.26, "learning_rate": 1.7395306294173254e-05, "loss": 0.9433, "step": 1904 }, { "epoch": 0.26, "learning_rate": 1.7392350469836965e-05, "loss": 0.921, "step": 1905 }, { "epoch": 0.26, "learning_rate": 1.7389393220740975e-05, "loss": 1.0343, "step": 1906 }, { "epoch": 0.26, "learning_rate": 1.7386434547455246e-05, "loss": 0.9614, "step": 1907 }, { "epoch": 0.26, "learning_rate": 1.7383474450550014e-05, "loss": 0.9873, "step": 1908 }, { "epoch": 0.26, "learning_rate": 1.7380512930595794e-05, "loss": 1.007, "step": 1909 }, { "epoch": 0.26, "learning_rate": 1.7377549988163373e-05, "loss": 1.0504, "step": 1910 }, { "epoch": 0.26, "learning_rate": 1.7374585623823808e-05, "loss": 0.9345, "step": 1911 }, { "epoch": 0.26, "learning_rate": 1.7371619838148436e-05, "loss": 0.9204, "step": 1912 }, { "epoch": 0.26, "learning_rate": 1.736865263170887e-05, "loss": 0.9469, "step": 1913 }, { "epoch": 0.26, "learning_rate": 1.7365684005076985e-05, "loss": 0.9931, "step": 1914 }, { "epoch": 0.26, "learning_rate": 1.7362713958824943e-05, "loss": 0.9929, "step": 1915 }, { "epoch": 0.26, "learning_rate": 1.735974249352517e-05, "loss": 0.9499, "step": 1916 }, { "epoch": 0.26, "learning_rate": 1.7356769609750374e-05, "loss": 0.992, "step": 1917 }, { "epoch": 0.26, "learning_rate": 1.7353795308073526e-05, "loss": 0.9629, "step": 1918 }, { "epoch": 0.26, "learning_rate": 1.735081958906788e-05, "loss": 0.9341, "step": 1919 }, { "epoch": 0.26, "learning_rate": 1.7347842453306953e-05, "loss": 0.9381, "step": 1920 }, { "epoch": 0.26, "learning_rate": 1.7344863901364554e-05, "loss": 0.9408, "step": 1921 }, { "epoch": 0.26, "learning_rate": 1.734188393381474e-05, "loss": 0.9897, "step": 1922 }, { "epoch": 0.26, "learning_rate": 1.733890255123186e-05, "loss": 0.9942, "step": 1923 }, { "epoch": 0.26, "learning_rate": 1.7335919754190523e-05, "loss": 0.9519, "step": 1924 }, { "epoch": 0.26, "learning_rate": 1.7332935543265625e-05, "loss": 0.912, "step": 1925 }, { "epoch": 0.26, "learning_rate": 1.7329949919032315e-05, "loss": 0.9659, "step": 1926 }, { "epoch": 0.26, "learning_rate": 1.732696288206603e-05, "loss": 1.0005, "step": 1927 }, { "epoch": 0.26, "learning_rate": 1.7323974432942478e-05, "loss": 0.9391, "step": 1928 }, { "epoch": 0.26, "learning_rate": 1.7320984572237636e-05, "loss": 0.9929, "step": 1929 }, { "epoch": 0.26, "learning_rate": 1.7317993300527747e-05, "loss": 1.0276, "step": 1930 }, { "epoch": 0.26, "learning_rate": 1.7315000618389335e-05, "loss": 0.9305, "step": 1931 }, { "epoch": 0.26, "learning_rate": 1.7312006526399192e-05, "loss": 0.9748, "step": 1932 }, { "epoch": 0.26, "learning_rate": 1.7309011025134385e-05, "loss": 0.9052, "step": 1933 }, { "epoch": 0.26, "learning_rate": 1.7306014115172244e-05, "loss": 1.0183, "step": 1934 }, { "epoch": 0.26, "learning_rate": 1.730301579709038e-05, "loss": 0.8903, "step": 1935 }, { "epoch": 0.26, "learning_rate": 1.7300016071466674e-05, "loss": 0.9749, "step": 1936 }, { "epoch": 0.26, "learning_rate": 1.7297014938879276e-05, "loss": 0.9377, "step": 1937 }, { "epoch": 0.26, "learning_rate": 1.7294012399906603e-05, "loss": 1.019, "step": 1938 }, { "epoch": 0.26, "learning_rate": 1.7291008455127346e-05, "loss": 0.9613, "step": 1939 }, { "epoch": 0.26, "learning_rate": 1.7288003105120474e-05, "loss": 0.9652, "step": 1940 }, { "epoch": 0.26, "learning_rate": 1.728499635046522e-05, "loss": 0.9083, "step": 1941 }, { "epoch": 0.26, "learning_rate": 1.7281988191741085e-05, "loss": 1.0138, "step": 1942 }, { "epoch": 0.26, "learning_rate": 1.727897862952785e-05, "loss": 0.9977, "step": 1943 }, { "epoch": 0.26, "learning_rate": 1.7275967664405558e-05, "loss": 0.8908, "step": 1944 }, { "epoch": 0.26, "learning_rate": 1.7272955296954524e-05, "loss": 0.9633, "step": 1945 }, { "epoch": 0.26, "learning_rate": 1.7269941527755337e-05, "loss": 0.958, "step": 1946 }, { "epoch": 0.26, "learning_rate": 1.7266926357388852e-05, "loss": 0.9247, "step": 1947 }, { "epoch": 0.26, "learning_rate": 1.7263909786436194e-05, "loss": 0.9867, "step": 1948 }, { "epoch": 0.26, "learning_rate": 1.726089181547877e-05, "loss": 1.002, "step": 1949 }, { "epoch": 0.26, "learning_rate": 1.7257872445098232e-05, "loss": 1.0269, "step": 1950 }, { "epoch": 0.26, "learning_rate": 1.7254851675876526e-05, "loss": 0.9528, "step": 1951 }, { "epoch": 0.26, "learning_rate": 1.7251829508395855e-05, "loss": 1.0324, "step": 1952 }, { "epoch": 0.26, "learning_rate": 1.7248805943238696e-05, "loss": 0.9419, "step": 1953 }, { "epoch": 0.26, "learning_rate": 1.724578098098779e-05, "loss": 1.0024, "step": 1954 }, { "epoch": 0.26, "learning_rate": 1.7242754622226156e-05, "loss": 1.008, "step": 1955 }, { "epoch": 0.27, "learning_rate": 1.7239726867537072e-05, "loss": 0.9286, "step": 1956 }, { "epoch": 0.27, "learning_rate": 1.7236697717504095e-05, "loss": 1.0441, "step": 1957 }, { "epoch": 0.27, "learning_rate": 1.7233667172711045e-05, "loss": 0.9184, "step": 1958 }, { "epoch": 0.27, "learning_rate": 1.723063523374201e-05, "loss": 1.014, "step": 1959 }, { "epoch": 0.27, "learning_rate": 1.722760190118135e-05, "loss": 0.9726, "step": 1960 }, { "epoch": 0.27, "learning_rate": 1.7224567175613692e-05, "loss": 0.9513, "step": 1961 }, { "epoch": 0.27, "learning_rate": 1.7221531057623935e-05, "loss": 0.9068, "step": 1962 }, { "epoch": 0.27, "learning_rate": 1.7218493547797236e-05, "loss": 0.9475, "step": 1963 }, { "epoch": 0.27, "learning_rate": 1.7215454646719036e-05, "loss": 0.9441, "step": 1964 }, { "epoch": 0.27, "learning_rate": 1.721241435497503e-05, "loss": 0.9444, "step": 1965 }, { "epoch": 0.27, "learning_rate": 1.7209372673151186e-05, "loss": 0.9414, "step": 1966 }, { "epoch": 0.27, "learning_rate": 1.7206329601833746e-05, "loss": 0.958, "step": 1967 }, { "epoch": 0.27, "learning_rate": 1.7203285141609205e-05, "loss": 0.9156, "step": 1968 }, { "epoch": 0.27, "learning_rate": 1.7200239293064345e-05, "loss": 0.997, "step": 1969 }, { "epoch": 0.27, "learning_rate": 1.71971920567862e-05, "loss": 0.9919, "step": 1970 }, { "epoch": 0.27, "learning_rate": 1.7194143433362076e-05, "loss": 0.9876, "step": 1971 }, { "epoch": 0.27, "learning_rate": 1.7191093423379555e-05, "loss": 0.9431, "step": 1972 }, { "epoch": 0.27, "learning_rate": 1.718804202742647e-05, "loss": 0.9628, "step": 1973 }, { "epoch": 0.27, "learning_rate": 1.718498924609093e-05, "loss": 0.9972, "step": 1974 }, { "epoch": 0.27, "learning_rate": 1.7181935079961318e-05, "loss": 0.9697, "step": 1975 }, { "epoch": 0.27, "learning_rate": 1.717887952962627e-05, "loss": 0.912, "step": 1976 }, { "epoch": 0.27, "learning_rate": 1.71758225956747e-05, "loss": 0.9867, "step": 1977 }, { "epoch": 0.27, "learning_rate": 1.7172764278695782e-05, "loss": 0.9513, "step": 1978 }, { "epoch": 0.27, "learning_rate": 1.7169704579278955e-05, "loss": 1.0175, "step": 1979 }, { "epoch": 0.27, "learning_rate": 1.7166643498013936e-05, "loss": 0.9827, "step": 1980 }, { "epoch": 0.27, "learning_rate": 1.7163581035490695e-05, "loss": 0.9387, "step": 1981 }, { "epoch": 0.27, "learning_rate": 1.7160517192299474e-05, "loss": 0.9658, "step": 1982 }, { "epoch": 0.27, "learning_rate": 1.7157451969030786e-05, "loss": 1.0047, "step": 1983 }, { "epoch": 0.27, "learning_rate": 1.715438536627539e-05, "loss": 0.9149, "step": 1984 }, { "epoch": 0.27, "learning_rate": 1.7151317384624345e-05, "loss": 0.8913, "step": 1985 }, { "epoch": 0.27, "learning_rate": 1.7148248024668944e-05, "loss": 1.0488, "step": 1986 }, { "epoch": 0.27, "learning_rate": 1.7145177287000763e-05, "loss": 0.9482, "step": 1987 }, { "epoch": 0.27, "learning_rate": 1.7142105172211637e-05, "loss": 0.9639, "step": 1988 }, { "epoch": 0.27, "learning_rate": 1.7139031680893667e-05, "loss": 0.9545, "step": 1989 }, { "epoch": 0.27, "learning_rate": 1.7135956813639222e-05, "loss": 0.9159, "step": 1990 }, { "epoch": 0.27, "learning_rate": 1.7132880571040934e-05, "loss": 0.8955, "step": 1991 }, { "epoch": 0.27, "learning_rate": 1.71298029536917e-05, "loss": 0.8938, "step": 1992 }, { "epoch": 0.27, "learning_rate": 1.712672396218468e-05, "loss": 0.9903, "step": 1993 }, { "epoch": 0.27, "learning_rate": 1.712364359711331e-05, "loss": 0.9713, "step": 1994 }, { "epoch": 0.27, "learning_rate": 1.712056185907127e-05, "loss": 0.9315, "step": 1995 }, { "epoch": 0.27, "learning_rate": 1.7117478748652527e-05, "loss": 0.9323, "step": 1996 }, { "epoch": 0.27, "learning_rate": 1.7114394266451297e-05, "loss": 1.0143, "step": 1997 }, { "epoch": 0.27, "learning_rate": 1.7111308413062063e-05, "loss": 0.9705, "step": 1998 }, { "epoch": 0.27, "learning_rate": 1.7108221189079584e-05, "loss": 0.9768, "step": 1999 }, { "epoch": 0.27, "learning_rate": 1.7105132595098868e-05, "loss": 0.8884, "step": 2000 }, { "epoch": 0.27, "learning_rate": 1.710204263171519e-05, "loss": 1.0135, "step": 2001 }, { "epoch": 0.27, "learning_rate": 1.7098951299524104e-05, "loss": 0.9298, "step": 2002 }, { "epoch": 0.27, "learning_rate": 1.70958585991214e-05, "loss": 0.9453, "step": 2003 }, { "epoch": 0.27, "learning_rate": 1.7092764531103158e-05, "loss": 0.9762, "step": 2004 }, { "epoch": 0.27, "learning_rate": 1.7089669096065705e-05, "loss": 0.9615, "step": 2005 }, { "epoch": 0.27, "learning_rate": 1.7086572294605642e-05, "loss": 1.0102, "step": 2006 }, { "epoch": 0.27, "learning_rate": 1.708347412731983e-05, "loss": 0.9494, "step": 2007 }, { "epoch": 0.27, "learning_rate": 1.7080374594805393e-05, "loss": 0.9672, "step": 2008 }, { "epoch": 0.27, "learning_rate": 1.7077273697659706e-05, "loss": 0.9512, "step": 2009 }, { "epoch": 0.27, "learning_rate": 1.7074171436480432e-05, "loss": 0.9754, "step": 2010 }, { "epoch": 0.27, "learning_rate": 1.7071067811865477e-05, "loss": 0.9207, "step": 2011 }, { "epoch": 0.27, "learning_rate": 1.7067962824413016e-05, "loss": 0.9157, "step": 2012 }, { "epoch": 0.27, "learning_rate": 1.706485647472149e-05, "loss": 0.9665, "step": 2013 }, { "epoch": 0.27, "learning_rate": 1.7061748763389593e-05, "loss": 0.9343, "step": 2014 }, { "epoch": 0.27, "learning_rate": 1.7058639691016295e-05, "loss": 0.917, "step": 2015 }, { "epoch": 0.27, "learning_rate": 1.7055529258200815e-05, "loss": 0.9606, "step": 2016 }, { "epoch": 0.27, "learning_rate": 1.7052417465542643e-05, "loss": 0.9519, "step": 2017 }, { "epoch": 0.27, "learning_rate": 1.7049304313641532e-05, "loss": 0.9183, "step": 2018 }, { "epoch": 0.27, "learning_rate": 1.7046189803097483e-05, "loss": 0.9901, "step": 2019 }, { "epoch": 0.27, "learning_rate": 1.704307393451078e-05, "loss": 0.9107, "step": 2020 }, { "epoch": 0.27, "learning_rate": 1.7039956708481948e-05, "loss": 1.054, "step": 2021 }, { "epoch": 0.27, "learning_rate": 1.703683812561179e-05, "loss": 0.9559, "step": 2022 }, { "epoch": 0.27, "learning_rate": 1.7033718186501366e-05, "loss": 0.9296, "step": 2023 }, { "epoch": 0.27, "learning_rate": 1.703059689175199e-05, "loss": 0.9165, "step": 2024 }, { "epoch": 0.27, "learning_rate": 1.7027474241965242e-05, "loss": 0.9534, "step": 2025 }, { "epoch": 0.27, "learning_rate": 1.7024350237742967e-05, "loss": 0.9853, "step": 2026 }, { "epoch": 0.27, "learning_rate": 1.702122487968727e-05, "loss": 0.9048, "step": 2027 }, { "epoch": 0.27, "learning_rate": 1.701809816840051e-05, "loss": 0.9388, "step": 2028 }, { "epoch": 0.28, "learning_rate": 1.7014970104485316e-05, "loss": 0.9487, "step": 2029 }, { "epoch": 0.28, "learning_rate": 1.701184068854457e-05, "loss": 0.9981, "step": 2030 }, { "epoch": 0.28, "learning_rate": 1.7008709921181415e-05, "loss": 0.9454, "step": 2031 }, { "epoch": 0.28, "learning_rate": 1.7005577802999264e-05, "loss": 1.0028, "step": 2032 }, { "epoch": 0.28, "learning_rate": 1.700244433460178e-05, "loss": 0.9321, "step": 2033 }, { "epoch": 0.28, "learning_rate": 1.699930951659289e-05, "loss": 0.9194, "step": 2034 }, { "epoch": 0.28, "learning_rate": 1.6996173349576783e-05, "loss": 0.9311, "step": 2035 }, { "epoch": 0.28, "learning_rate": 1.6993035834157905e-05, "loss": 0.9206, "step": 2036 }, { "epoch": 0.28, "learning_rate": 1.6989896970940966e-05, "loss": 0.8909, "step": 2037 }, { "epoch": 0.28, "learning_rate": 1.698675676053092e-05, "loss": 0.9685, "step": 2038 }, { "epoch": 0.28, "learning_rate": 1.698361520353301e-05, "loss": 0.9211, "step": 2039 }, { "epoch": 0.28, "learning_rate": 1.6980472300552712e-05, "loss": 0.9583, "step": 2040 }, { "epoch": 0.28, "learning_rate": 1.6977328052195777e-05, "loss": 1.0055, "step": 2041 }, { "epoch": 0.28, "learning_rate": 1.6974182459068203e-05, "loss": 0.947, "step": 2042 }, { "epoch": 0.28, "learning_rate": 1.697103552177626e-05, "loss": 0.8833, "step": 2043 }, { "epoch": 0.28, "learning_rate": 1.6967887240926465e-05, "loss": 0.9747, "step": 2044 }, { "epoch": 0.28, "learning_rate": 1.6964737617125605e-05, "loss": 0.9553, "step": 2045 }, { "epoch": 0.28, "learning_rate": 1.696158665098072e-05, "loss": 0.9135, "step": 2046 }, { "epoch": 0.28, "learning_rate": 1.6958434343099104e-05, "loss": 0.9952, "step": 2047 }, { "epoch": 0.28, "learning_rate": 1.695528069408832e-05, "loss": 0.9518, "step": 2048 }, { "epoch": 0.28, "learning_rate": 1.6952125704556186e-05, "loss": 0.9574, "step": 2049 }, { "epoch": 0.28, "learning_rate": 1.6948969375110772e-05, "loss": 0.9338, "step": 2050 }, { "epoch": 0.28, "learning_rate": 1.6945811706360412e-05, "loss": 0.9753, "step": 2051 }, { "epoch": 0.28, "learning_rate": 1.69426526989137e-05, "loss": 0.9862, "step": 2052 }, { "epoch": 0.28, "learning_rate": 1.6939492353379483e-05, "loss": 0.9149, "step": 2053 }, { "epoch": 0.28, "learning_rate": 1.6936330670366867e-05, "loss": 0.9682, "step": 2054 }, { "epoch": 0.28, "learning_rate": 1.6933167650485222e-05, "loss": 0.9567, "step": 2055 }, { "epoch": 0.28, "learning_rate": 1.6930003294344163e-05, "loss": 1.0304, "step": 2056 }, { "epoch": 0.28, "learning_rate": 1.6926837602553577e-05, "loss": 0.8871, "step": 2057 }, { "epoch": 0.28, "learning_rate": 1.6923670575723595e-05, "loss": 0.9541, "step": 2058 }, { "epoch": 0.28, "learning_rate": 1.692050221446462e-05, "loss": 0.9281, "step": 2059 }, { "epoch": 0.28, "learning_rate": 1.6917332519387294e-05, "loss": 0.9652, "step": 2060 }, { "epoch": 0.28, "learning_rate": 1.6914161491102535e-05, "loss": 0.9368, "step": 2061 }, { "epoch": 0.28, "learning_rate": 1.69109891302215e-05, "loss": 0.9037, "step": 2062 }, { "epoch": 0.28, "learning_rate": 1.6907815437355625e-05, "loss": 1.0039, "step": 2063 }, { "epoch": 0.28, "learning_rate": 1.6904640413116576e-05, "loss": 0.9594, "step": 2064 }, { "epoch": 0.28, "learning_rate": 1.6901464058116298e-05, "loss": 0.9591, "step": 2065 }, { "epoch": 0.28, "learning_rate": 1.6898286372966976e-05, "loss": 0.9618, "step": 2066 }, { "epoch": 0.28, "learning_rate": 1.6895107358281065e-05, "loss": 0.9833, "step": 2067 }, { "epoch": 0.28, "learning_rate": 1.689192701467127e-05, "loss": 0.9644, "step": 2068 }, { "epoch": 0.28, "learning_rate": 1.6888745342750553e-05, "loss": 0.9918, "step": 2069 }, { "epoch": 0.28, "learning_rate": 1.6885562343132124e-05, "loss": 0.9431, "step": 2070 }, { "epoch": 0.28, "learning_rate": 1.6882378016429467e-05, "loss": 1.0071, "step": 2071 }, { "epoch": 0.28, "learning_rate": 1.6879192363256304e-05, "loss": 0.9154, "step": 2072 }, { "epoch": 0.28, "learning_rate": 1.6876005384226623e-05, "loss": 0.9826, "step": 2073 }, { "epoch": 0.28, "learning_rate": 1.687281707995466e-05, "loss": 0.9562, "step": 2074 }, { "epoch": 0.28, "learning_rate": 1.6869627451054917e-05, "loss": 0.9294, "step": 2075 }, { "epoch": 0.28, "learning_rate": 1.6866436498142136e-05, "loss": 0.9337, "step": 2076 }, { "epoch": 0.28, "learning_rate": 1.6863244221831334e-05, "loss": 0.9771, "step": 2077 }, { "epoch": 0.28, "learning_rate": 1.6860050622737764e-05, "loss": 0.9812, "step": 2078 }, { "epoch": 0.28, "learning_rate": 1.6856855701476947e-05, "loss": 0.9236, "step": 2079 }, { "epoch": 0.28, "learning_rate": 1.6853659458664653e-05, "loss": 0.9781, "step": 2080 }, { "epoch": 0.28, "learning_rate": 1.6850461894916903e-05, "loss": 0.9525, "step": 2081 }, { "epoch": 0.28, "learning_rate": 1.6847263010849983e-05, "loss": 0.9498, "step": 2082 }, { "epoch": 0.28, "learning_rate": 1.684406280708043e-05, "loss": 0.9202, "step": 2083 }, { "epoch": 0.28, "learning_rate": 1.6840861284225022e-05, "loss": 0.9157, "step": 2084 }, { "epoch": 0.28, "learning_rate": 1.6837658442900814e-05, "loss": 0.9605, "step": 2085 }, { "epoch": 0.28, "learning_rate": 1.6834454283725094e-05, "loss": 0.916, "step": 2086 }, { "epoch": 0.28, "learning_rate": 1.6831248807315424e-05, "loss": 0.9132, "step": 2087 }, { "epoch": 0.28, "learning_rate": 1.68280420142896e-05, "loss": 0.9546, "step": 2088 }, { "epoch": 0.28, "learning_rate": 1.6824833905265685e-05, "loss": 0.9285, "step": 2089 }, { "epoch": 0.28, "learning_rate": 1.6821624480861994e-05, "loss": 0.9151, "step": 2090 }, { "epoch": 0.28, "learning_rate": 1.6818413741697086e-05, "loss": 0.9832, "step": 2091 }, { "epoch": 0.28, "learning_rate": 1.681520168838979e-05, "loss": 0.9154, "step": 2092 }, { "epoch": 0.28, "learning_rate": 1.6811988321559173e-05, "loss": 0.9728, "step": 2093 }, { "epoch": 0.28, "learning_rate": 1.6808773641824562e-05, "loss": 0.9048, "step": 2094 }, { "epoch": 0.28, "learning_rate": 1.6805557649805536e-05, "loss": 0.9596, "step": 2095 }, { "epoch": 0.28, "learning_rate": 1.680234034612193e-05, "loss": 0.9029, "step": 2096 }, { "epoch": 0.28, "learning_rate": 1.6799121731393825e-05, "loss": 0.89, "step": 2097 }, { "epoch": 0.28, "learning_rate": 1.679590180624156e-05, "loss": 1.0048, "step": 2098 }, { "epoch": 0.28, "learning_rate": 1.6792680571285726e-05, "loss": 1.0034, "step": 2099 }, { "epoch": 0.28, "learning_rate": 1.6789458027147163e-05, "loss": 0.9554, "step": 2100 }, { "epoch": 0.28, "learning_rate": 1.6786234174446966e-05, "loss": 1.0183, "step": 2101 }, { "epoch": 0.28, "learning_rate": 1.678300901380649e-05, "loss": 0.9304, "step": 2102 }, { "epoch": 0.29, "learning_rate": 1.6779782545847322e-05, "loss": 0.9675, "step": 2103 }, { "epoch": 0.29, "learning_rate": 1.6776554771191324e-05, "loss": 0.8904, "step": 2104 }, { "epoch": 0.29, "learning_rate": 1.677332569046059e-05, "loss": 0.8705, "step": 2105 }, { "epoch": 0.29, "learning_rate": 1.6770095304277477e-05, "loss": 0.9655, "step": 2106 }, { "epoch": 0.29, "learning_rate": 1.6766863613264596e-05, "loss": 1.0061, "step": 2107 }, { "epoch": 0.29, "learning_rate": 1.6763630618044802e-05, "loss": 0.9085, "step": 2108 }, { "epoch": 0.29, "learning_rate": 1.6760396319241204e-05, "loss": 0.9488, "step": 2109 }, { "epoch": 0.29, "learning_rate": 1.6757160717477157e-05, "loss": 0.992, "step": 2110 }, { "epoch": 0.29, "learning_rate": 1.6753923813376285e-05, "loss": 0.852, "step": 2111 }, { "epoch": 0.29, "learning_rate": 1.675068560756244e-05, "loss": 0.9943, "step": 2112 }, { "epoch": 0.29, "learning_rate": 1.674744610065974e-05, "loss": 0.9273, "step": 2113 }, { "epoch": 0.29, "learning_rate": 1.674420529329255e-05, "loss": 0.9518, "step": 2114 }, { "epoch": 0.29, "learning_rate": 1.6740963186085478e-05, "loss": 0.951, "step": 2115 }, { "epoch": 0.29, "learning_rate": 1.67377197796634e-05, "loss": 0.9554, "step": 2116 }, { "epoch": 0.29, "learning_rate": 1.6734475074651418e-05, "loss": 0.9607, "step": 2117 }, { "epoch": 0.29, "learning_rate": 1.6731229071674914e-05, "loss": 0.9393, "step": 2118 }, { "epoch": 0.29, "learning_rate": 1.6727981771359492e-05, "loss": 1.0092, "step": 2119 }, { "epoch": 0.29, "learning_rate": 1.6724733174331022e-05, "loss": 0.8795, "step": 2120 }, { "epoch": 0.29, "learning_rate": 1.6721483281215622e-05, "loss": 0.9131, "step": 2121 }, { "epoch": 0.29, "learning_rate": 1.6718232092639657e-05, "loss": 1.01, "step": 2122 }, { "epoch": 0.29, "learning_rate": 1.6714979609229743e-05, "loss": 0.9623, "step": 2123 }, { "epoch": 0.29, "learning_rate": 1.6711725831612743e-05, "loss": 0.9962, "step": 2124 }, { "epoch": 0.29, "learning_rate": 1.6708470760415774e-05, "loss": 0.8996, "step": 2125 }, { "epoch": 0.29, "learning_rate": 1.6705214396266196e-05, "loss": 0.8991, "step": 2126 }, { "epoch": 0.29, "learning_rate": 1.670195673979163e-05, "loss": 0.9254, "step": 2127 }, { "epoch": 0.29, "learning_rate": 1.6698697791619928e-05, "loss": 1.0352, "step": 2128 }, { "epoch": 0.29, "learning_rate": 1.669543755237921e-05, "loss": 0.9539, "step": 2129 }, { "epoch": 0.29, "learning_rate": 1.6692176022697834e-05, "loss": 0.9342, "step": 2130 }, { "epoch": 0.29, "learning_rate": 1.6688913203204404e-05, "loss": 0.9648, "step": 2131 }, { "epoch": 0.29, "learning_rate": 1.668564909452778e-05, "loss": 0.9335, "step": 2132 }, { "epoch": 0.29, "learning_rate": 1.668238369729707e-05, "loss": 0.9249, "step": 2133 }, { "epoch": 0.29, "learning_rate": 1.667911701214163e-05, "loss": 0.9664, "step": 2134 }, { "epoch": 0.29, "learning_rate": 1.6675849039691057e-05, "loss": 0.9683, "step": 2135 }, { "epoch": 0.29, "learning_rate": 1.6672579780575202e-05, "loss": 0.989, "step": 2136 }, { "epoch": 0.29, "learning_rate": 1.6669309235424166e-05, "loss": 0.9839, "step": 2137 }, { "epoch": 0.29, "learning_rate": 1.6666037404868295e-05, "loss": 0.8715, "step": 2138 }, { "epoch": 0.29, "learning_rate": 1.666276428953818e-05, "loss": 1.007, "step": 2139 }, { "epoch": 0.29, "learning_rate": 1.6659489890064666e-05, "loss": 0.84, "step": 2140 }, { "epoch": 0.29, "learning_rate": 1.665621420707884e-05, "loss": 0.973, "step": 2141 }, { "epoch": 0.29, "learning_rate": 1.665293724121204e-05, "loss": 0.9508, "step": 2142 }, { "epoch": 0.29, "learning_rate": 1.6649658993095853e-05, "loss": 0.9167, "step": 2143 }, { "epoch": 0.29, "learning_rate": 1.6646379463362102e-05, "loss": 0.9437, "step": 2144 }, { "epoch": 0.29, "learning_rate": 1.6643098652642875e-05, "loss": 0.9629, "step": 2145 }, { "epoch": 0.29, "learning_rate": 1.663981656157049e-05, "loss": 0.9534, "step": 2146 }, { "epoch": 0.29, "learning_rate": 1.6636533190777515e-05, "loss": 0.9458, "step": 2147 }, { "epoch": 0.29, "learning_rate": 1.6633248540896775e-05, "loss": 0.9539, "step": 2148 }, { "epoch": 0.29, "learning_rate": 1.6629962612561337e-05, "loss": 0.9956, "step": 2149 }, { "epoch": 0.29, "learning_rate": 1.6626675406404503e-05, "loss": 0.9217, "step": 2150 }, { "epoch": 0.29, "learning_rate": 1.662338692305984e-05, "loss": 0.9338, "step": 2151 }, { "epoch": 0.29, "learning_rate": 1.6620097163161143e-05, "loss": 0.895, "step": 2152 }, { "epoch": 0.29, "learning_rate": 1.6616806127342472e-05, "loss": 0.9374, "step": 2153 }, { "epoch": 0.29, "learning_rate": 1.661351381623811e-05, "loss": 0.926, "step": 2154 }, { "epoch": 0.29, "learning_rate": 1.661022023048261e-05, "loss": 0.9883, "step": 2155 }, { "epoch": 0.29, "learning_rate": 1.660692537071075e-05, "loss": 0.9211, "step": 2156 }, { "epoch": 0.29, "learning_rate": 1.6603629237557567e-05, "loss": 0.9509, "step": 2157 }, { "epoch": 0.29, "learning_rate": 1.660033183165834e-05, "loss": 0.954, "step": 2158 }, { "epoch": 0.29, "learning_rate": 1.6597033153648593e-05, "loss": 0.9722, "step": 2159 }, { "epoch": 0.29, "learning_rate": 1.659373320416409e-05, "loss": 0.9562, "step": 2160 }, { "epoch": 0.29, "learning_rate": 1.6590431983840845e-05, "loss": 0.9913, "step": 2161 }, { "epoch": 0.29, "learning_rate": 1.658712949331512e-05, "loss": 1.0556, "step": 2162 }, { "epoch": 0.29, "learning_rate": 1.658382573322342e-05, "loss": 0.9432, "step": 2163 }, { "epoch": 0.29, "learning_rate": 1.6580520704202484e-05, "loss": 0.9602, "step": 2164 }, { "epoch": 0.29, "learning_rate": 1.657721440688931e-05, "loss": 1.0103, "step": 2165 }, { "epoch": 0.29, "learning_rate": 1.6573906841921138e-05, "loss": 0.904, "step": 2166 }, { "epoch": 0.29, "learning_rate": 1.6570598009935447e-05, "loss": 1.0567, "step": 2167 }, { "epoch": 0.29, "learning_rate": 1.6567287911569964e-05, "loss": 0.9303, "step": 2168 }, { "epoch": 0.29, "learning_rate": 1.656397654746265e-05, "loss": 0.9068, "step": 2169 }, { "epoch": 0.29, "learning_rate": 1.656066391825173e-05, "loss": 0.9273, "step": 2170 }, { "epoch": 0.29, "learning_rate": 1.6557350024575656e-05, "loss": 0.9412, "step": 2171 }, { "epoch": 0.29, "learning_rate": 1.6554034867073128e-05, "loss": 0.8792, "step": 2172 }, { "epoch": 0.29, "learning_rate": 1.655071844638309e-05, "loss": 0.889, "step": 2173 }, { "epoch": 0.29, "learning_rate": 1.654740076314474e-05, "loss": 0.9647, "step": 2174 }, { "epoch": 0.29, "learning_rate": 1.6544081817997496e-05, "loss": 0.9762, "step": 2175 }, { "epoch": 0.29, "learning_rate": 1.6540761611581037e-05, "loss": 0.9129, "step": 2176 }, { "epoch": 0.3, "learning_rate": 1.6537440144535288e-05, "loss": 0.9675, "step": 2177 }, { "epoch": 0.3, "learning_rate": 1.65341174175004e-05, "loss": 0.8972, "step": 2178 }, { "epoch": 0.3, "learning_rate": 1.653079343111678e-05, "loss": 0.859, "step": 2179 }, { "epoch": 0.3, "learning_rate": 1.6527468186025077e-05, "loss": 0.9569, "step": 2180 }, { "epoch": 0.3, "learning_rate": 1.6524141682866173e-05, "loss": 0.9356, "step": 2181 }, { "epoch": 0.3, "learning_rate": 1.652081392228121e-05, "loss": 0.9922, "step": 2182 }, { "epoch": 0.3, "learning_rate": 1.6517484904911554e-05, "loss": 0.9715, "step": 2183 }, { "epoch": 0.3, "learning_rate": 1.6514154631398823e-05, "loss": 0.8755, "step": 2184 }, { "epoch": 0.3, "learning_rate": 1.651082310238487e-05, "loss": 0.8882, "step": 2185 }, { "epoch": 0.3, "learning_rate": 1.6507490318511805e-05, "loss": 0.9338, "step": 2186 }, { "epoch": 0.3, "learning_rate": 1.6504156280421963e-05, "loss": 0.9791, "step": 2187 }, { "epoch": 0.3, "learning_rate": 1.650082098875793e-05, "loss": 0.9605, "step": 2188 }, { "epoch": 0.3, "learning_rate": 1.6497484444162528e-05, "loss": 0.9324, "step": 2189 }, { "epoch": 0.3, "learning_rate": 1.649414664727883e-05, "loss": 0.9628, "step": 2190 }, { "epoch": 0.3, "learning_rate": 1.6490807598750135e-05, "loss": 1.0353, "step": 2191 }, { "epoch": 0.3, "learning_rate": 1.648746729922e-05, "loss": 0.9762, "step": 2192 }, { "epoch": 0.3, "learning_rate": 1.6484125749332212e-05, "loss": 0.9362, "step": 2193 }, { "epoch": 0.3, "learning_rate": 1.6480782949730804e-05, "loss": 0.9274, "step": 2194 }, { "epoch": 0.3, "learning_rate": 1.6477438901060042e-05, "loss": 0.9574, "step": 2195 }, { "epoch": 0.3, "learning_rate": 1.6474093603964452e-05, "loss": 0.9475, "step": 2196 }, { "epoch": 0.3, "learning_rate": 1.6470747059088774e-05, "loss": 0.9531, "step": 2197 }, { "epoch": 0.3, "learning_rate": 1.646739926707801e-05, "loss": 0.9432, "step": 2198 }, { "epoch": 0.3, "learning_rate": 1.6464050228577394e-05, "loss": 0.9928, "step": 2199 }, { "epoch": 0.3, "learning_rate": 1.6460699944232397e-05, "loss": 0.965, "step": 2200 }, { "epoch": 0.3, "learning_rate": 1.6457348414688737e-05, "loss": 0.9909, "step": 2201 }, { "epoch": 0.3, "learning_rate": 1.6453995640592368e-05, "loss": 0.9643, "step": 2202 }, { "epoch": 0.3, "learning_rate": 1.6450641622589484e-05, "loss": 0.9105, "step": 2203 }, { "epoch": 0.3, "learning_rate": 1.644728636132652e-05, "loss": 0.8891, "step": 2204 }, { "epoch": 0.3, "learning_rate": 1.644392985745015e-05, "loss": 0.9191, "step": 2205 }, { "epoch": 0.3, "learning_rate": 1.6440572111607287e-05, "loss": 0.9463, "step": 2206 }, { "epoch": 0.3, "learning_rate": 1.6437213124445082e-05, "loss": 0.9686, "step": 2207 }, { "epoch": 0.3, "learning_rate": 1.643385289661093e-05, "loss": 0.9218, "step": 2208 }, { "epoch": 0.3, "learning_rate": 1.6430491428752465e-05, "loss": 0.9822, "step": 2209 }, { "epoch": 0.3, "learning_rate": 1.642712872151755e-05, "loss": 0.9342, "step": 2210 }, { "epoch": 0.3, "learning_rate": 1.6423764775554302e-05, "loss": 0.9248, "step": 2211 }, { "epoch": 0.3, "learning_rate": 1.642039959151106e-05, "loss": 0.9247, "step": 2212 }, { "epoch": 0.3, "learning_rate": 1.641703317003642e-05, "loss": 1.0072, "step": 2213 }, { "epoch": 0.3, "learning_rate": 1.6413665511779197e-05, "loss": 0.9648, "step": 2214 }, { "epoch": 0.3, "learning_rate": 1.641029661738846e-05, "loss": 0.9342, "step": 2215 }, { "epoch": 0.3, "learning_rate": 1.6406926487513514e-05, "loss": 0.9276, "step": 2216 }, { "epoch": 0.3, "learning_rate": 1.6403555122803894e-05, "loss": 1.027, "step": 2217 }, { "epoch": 0.3, "learning_rate": 1.640018252390938e-05, "loss": 0.9508, "step": 2218 }, { "epoch": 0.3, "learning_rate": 1.6396808691479982e-05, "loss": 0.9447, "step": 2219 }, { "epoch": 0.3, "learning_rate": 1.6393433626165957e-05, "loss": 0.9466, "step": 2220 }, { "epoch": 0.3, "learning_rate": 1.6390057328617802e-05, "loss": 0.9519, "step": 2221 }, { "epoch": 0.3, "learning_rate": 1.6386679799486236e-05, "loss": 0.9138, "step": 2222 }, { "epoch": 0.3, "learning_rate": 1.6383301039422234e-05, "loss": 0.9055, "step": 2223 }, { "epoch": 0.3, "learning_rate": 1.6379921049076987e-05, "loss": 0.9315, "step": 2224 }, { "epoch": 0.3, "learning_rate": 1.6376539829101946e-05, "loss": 0.963, "step": 2225 }, { "epoch": 0.3, "learning_rate": 1.6373157380148783e-05, "loss": 0.9298, "step": 2226 }, { "epoch": 0.3, "learning_rate": 1.636977370286941e-05, "loss": 0.9408, "step": 2227 }, { "epoch": 0.3, "learning_rate": 1.6366388797915987e-05, "loss": 0.8853, "step": 2228 }, { "epoch": 0.3, "learning_rate": 1.636300266594089e-05, "loss": 0.8868, "step": 2229 }, { "epoch": 0.3, "learning_rate": 1.635961530759675e-05, "loss": 0.9197, "step": 2230 }, { "epoch": 0.3, "learning_rate": 1.6356226723536427e-05, "loss": 0.9126, "step": 2231 }, { "epoch": 0.3, "learning_rate": 1.6352836914413014e-05, "loss": 0.9261, "step": 2232 }, { "epoch": 0.3, "learning_rate": 1.6349445880879848e-05, "loss": 0.9448, "step": 2233 }, { "epoch": 0.3, "learning_rate": 1.634605362359049e-05, "loss": 0.9443, "step": 2234 }, { "epoch": 0.3, "learning_rate": 1.6342660143198756e-05, "loss": 1.0235, "step": 2235 }, { "epoch": 0.3, "learning_rate": 1.6339265440358676e-05, "loss": 0.931, "step": 2236 }, { "epoch": 0.3, "learning_rate": 1.633586951572453e-05, "loss": 0.9321, "step": 2237 }, { "epoch": 0.3, "learning_rate": 1.6332472369950828e-05, "loss": 0.9764, "step": 2238 }, { "epoch": 0.3, "learning_rate": 1.632907400369232e-05, "loss": 0.9117, "step": 2239 }, { "epoch": 0.3, "learning_rate": 1.632567441760398e-05, "loss": 0.8986, "step": 2240 }, { "epoch": 0.3, "learning_rate": 1.6322273612341033e-05, "loss": 0.9408, "step": 2241 }, { "epoch": 0.3, "learning_rate": 1.631887158855893e-05, "loss": 0.8903, "step": 2242 }, { "epoch": 0.3, "learning_rate": 1.631546834691335e-05, "loss": 0.9877, "step": 2243 }, { "epoch": 0.3, "learning_rate": 1.6312063888060226e-05, "loss": 0.9416, "step": 2244 }, { "epoch": 0.3, "learning_rate": 1.6308658212655706e-05, "loss": 0.9262, "step": 2245 }, { "epoch": 0.3, "learning_rate": 1.6305251321356183e-05, "loss": 0.9014, "step": 2246 }, { "epoch": 0.3, "learning_rate": 1.6301843214818284e-05, "loss": 0.8923, "step": 2247 }, { "epoch": 0.3, "learning_rate": 1.6298433893698862e-05, "loss": 0.9838, "step": 2248 }, { "epoch": 0.3, "learning_rate": 1.6295023358655016e-05, "loss": 1.008, "step": 2249 }, { "epoch": 0.3, "learning_rate": 1.6291611610344073e-05, "loss": 0.9674, "step": 2250 }, { "epoch": 0.31, "learning_rate": 1.6288198649423588e-05, "loss": 0.9499, "step": 2251 }, { "epoch": 0.31, "learning_rate": 1.6284784476551365e-05, "loss": 0.9496, "step": 2252 }, { "epoch": 0.31, "learning_rate": 1.6281369092385424e-05, "loss": 0.9496, "step": 2253 }, { "epoch": 0.31, "learning_rate": 1.6277952497584027e-05, "loss": 0.9144, "step": 2254 }, { "epoch": 0.31, "learning_rate": 1.627453469280568e-05, "loss": 0.9642, "step": 2255 }, { "epoch": 0.31, "learning_rate": 1.6271115678709098e-05, "loss": 0.8707, "step": 2256 }, { "epoch": 0.31, "learning_rate": 1.626769545595325e-05, "loss": 0.9128, "step": 2257 }, { "epoch": 0.31, "learning_rate": 1.6264274025197328e-05, "loss": 0.9016, "step": 2258 }, { "epoch": 0.31, "learning_rate": 1.626085138710076e-05, "loss": 0.9366, "step": 2259 }, { "epoch": 0.31, "learning_rate": 1.6257427542323204e-05, "loss": 0.9422, "step": 2260 }, { "epoch": 0.31, "learning_rate": 1.6254002491524555e-05, "loss": 0.9593, "step": 2261 }, { "epoch": 0.31, "learning_rate": 1.6250576235364938e-05, "loss": 0.8692, "step": 2262 }, { "epoch": 0.31, "learning_rate": 1.6247148774504705e-05, "loss": 0.9025, "step": 2263 }, { "epoch": 0.31, "learning_rate": 1.6243720109604447e-05, "loss": 0.9788, "step": 2264 }, { "epoch": 0.31, "learning_rate": 1.6240290241324993e-05, "loss": 0.9589, "step": 2265 }, { "epoch": 0.31, "learning_rate": 1.6236859170327394e-05, "loss": 0.9386, "step": 2266 }, { "epoch": 0.31, "learning_rate": 1.6233426897272925e-05, "loss": 0.959, "step": 2267 }, { "epoch": 0.31, "learning_rate": 1.6229993422823112e-05, "loss": 0.9638, "step": 2268 }, { "epoch": 0.31, "learning_rate": 1.6226558747639702e-05, "loss": 0.9384, "step": 2269 }, { "epoch": 0.31, "learning_rate": 1.6223122872384675e-05, "loss": 1.0003, "step": 2270 }, { "epoch": 0.31, "learning_rate": 1.6219685797720236e-05, "loss": 0.9453, "step": 2271 }, { "epoch": 0.31, "learning_rate": 1.6216247524308838e-05, "loss": 0.9489, "step": 2272 }, { "epoch": 0.31, "learning_rate": 1.6212808052813148e-05, "loss": 0.9152, "step": 2273 }, { "epoch": 0.31, "learning_rate": 1.6209367383896075e-05, "loss": 0.9959, "step": 2274 }, { "epoch": 0.31, "learning_rate": 1.6205925518220742e-05, "loss": 1.0014, "step": 2275 }, { "epoch": 0.31, "learning_rate": 1.6202482456450524e-05, "loss": 0.9474, "step": 2276 }, { "epoch": 0.31, "learning_rate": 1.6199038199249023e-05, "loss": 0.9128, "step": 2277 }, { "epoch": 0.31, "learning_rate": 1.619559274728005e-05, "loss": 0.9422, "step": 2278 }, { "epoch": 0.31, "learning_rate": 1.6192146101207674e-05, "loss": 0.9841, "step": 2279 }, { "epoch": 0.31, "learning_rate": 1.618869826169618e-05, "loss": 0.9208, "step": 2280 }, { "epoch": 0.31, "learning_rate": 1.618524922941008e-05, "loss": 0.9613, "step": 2281 }, { "epoch": 0.31, "learning_rate": 1.618179900501413e-05, "loss": 0.9714, "step": 2282 }, { "epoch": 0.31, "learning_rate": 1.6178347589173298e-05, "loss": 0.9488, "step": 2283 }, { "epoch": 0.31, "learning_rate": 1.6174894982552788e-05, "loss": 0.9857, "step": 2284 }, { "epoch": 0.31, "learning_rate": 1.6171441185818047e-05, "loss": 0.887, "step": 2285 }, { "epoch": 0.31, "learning_rate": 1.6167986199634732e-05, "loss": 0.9173, "step": 2286 }, { "epoch": 0.31, "learning_rate": 1.6164530024668743e-05, "loss": 0.956, "step": 2287 }, { "epoch": 0.31, "learning_rate": 1.61610726615862e-05, "loss": 0.9873, "step": 2288 }, { "epoch": 0.31, "learning_rate": 1.6157614111053454e-05, "loss": 0.9913, "step": 2289 }, { "epoch": 0.31, "learning_rate": 1.615415437373709e-05, "loss": 0.8723, "step": 2290 }, { "epoch": 0.31, "learning_rate": 1.6150693450303913e-05, "loss": 1.0227, "step": 2291 }, { "epoch": 0.31, "learning_rate": 1.6147231341420968e-05, "loss": 0.998, "step": 2292 }, { "epoch": 0.31, "learning_rate": 1.614376804775552e-05, "loss": 0.9057, "step": 2293 }, { "epoch": 0.31, "learning_rate": 1.6140303569975064e-05, "loss": 0.9888, "step": 2294 }, { "epoch": 0.31, "learning_rate": 1.613683790874732e-05, "loss": 0.9399, "step": 2295 }, { "epoch": 0.31, "learning_rate": 1.6133371064740247e-05, "loss": 1.0009, "step": 2296 }, { "epoch": 0.31, "learning_rate": 1.612990303862202e-05, "loss": 0.8425, "step": 2297 }, { "epoch": 0.31, "learning_rate": 1.6126433831061052e-05, "loss": 0.9879, "step": 2298 }, { "epoch": 0.31, "learning_rate": 1.612296344272597e-05, "loss": 0.9031, "step": 2299 }, { "epoch": 0.31, "learning_rate": 1.6119491874285645e-05, "loss": 0.9366, "step": 2300 }, { "epoch": 0.31, "learning_rate": 1.6116019126409162e-05, "loss": 0.9281, "step": 2301 }, { "epoch": 0.31, "learning_rate": 1.6112545199765844e-05, "loss": 0.9727, "step": 2302 }, { "epoch": 0.31, "learning_rate": 1.610907009502523e-05, "loss": 0.9096, "step": 2303 }, { "epoch": 0.31, "learning_rate": 1.6105593812857097e-05, "loss": 0.941, "step": 2304 }, { "epoch": 0.31, "learning_rate": 1.610211635393144e-05, "loss": 0.9576, "step": 2305 }, { "epoch": 0.31, "learning_rate": 1.6098637718918482e-05, "loss": 0.9704, "step": 2306 }, { "epoch": 0.31, "learning_rate": 1.6095157908488685e-05, "loss": 1.007, "step": 2307 }, { "epoch": 0.31, "learning_rate": 1.609167692331272e-05, "loss": 0.865, "step": 2308 }, { "epoch": 0.31, "learning_rate": 1.60881947640615e-05, "loss": 0.9971, "step": 2309 }, { "epoch": 0.31, "learning_rate": 1.6084711431406144e-05, "loss": 0.9758, "step": 2310 }, { "epoch": 0.31, "learning_rate": 1.608122692601802e-05, "loss": 0.9669, "step": 2311 }, { "epoch": 0.31, "learning_rate": 1.6077741248568712e-05, "loss": 0.9181, "step": 2312 }, { "epoch": 0.31, "learning_rate": 1.6074254399730024e-05, "loss": 0.9467, "step": 2313 }, { "epoch": 0.31, "learning_rate": 1.6070766380173997e-05, "loss": 0.8859, "step": 2314 }, { "epoch": 0.31, "learning_rate": 1.6067277190572887e-05, "loss": 0.9554, "step": 2315 }, { "epoch": 0.31, "learning_rate": 1.6063786831599186e-05, "loss": 0.9322, "step": 2316 }, { "epoch": 0.31, "learning_rate": 1.60602953039256e-05, "loss": 0.9533, "step": 2317 }, { "epoch": 0.31, "learning_rate": 1.605680260822507e-05, "loss": 0.9304, "step": 2318 }, { "epoch": 0.31, "learning_rate": 1.6053308745170757e-05, "loss": 0.9607, "step": 2319 }, { "epoch": 0.31, "learning_rate": 1.6049813715436047e-05, "loss": 0.9239, "step": 2320 }, { "epoch": 0.31, "learning_rate": 1.604631751969456e-05, "loss": 0.9353, "step": 2321 }, { "epoch": 0.31, "learning_rate": 1.6042820158620123e-05, "loss": 0.9309, "step": 2322 }, { "epoch": 0.31, "learning_rate": 1.60393216328868e-05, "loss": 0.9048, "step": 2323 }, { "epoch": 0.31, "learning_rate": 1.6035821943168883e-05, "loss": 0.9669, "step": 2324 }, { "epoch": 0.32, "learning_rate": 1.603232109014088e-05, "loss": 0.9211, "step": 2325 }, { "epoch": 0.32, "learning_rate": 1.6028819074477517e-05, "loss": 0.9341, "step": 2326 }, { "epoch": 0.32, "learning_rate": 1.602531589685376e-05, "loss": 0.9736, "step": 2327 }, { "epoch": 0.32, "learning_rate": 1.6021811557944793e-05, "loss": 0.9653, "step": 2328 }, { "epoch": 0.32, "learning_rate": 1.601830605842602e-05, "loss": 0.8847, "step": 2329 }, { "epoch": 0.32, "learning_rate": 1.6014799398973072e-05, "loss": 0.9341, "step": 2330 }, { "epoch": 0.32, "learning_rate": 1.60112915802618e-05, "loss": 0.8214, "step": 2331 }, { "epoch": 0.32, "learning_rate": 1.6007782602968288e-05, "loss": 0.9451, "step": 2332 }, { "epoch": 0.32, "learning_rate": 1.6004272467768824e-05, "loss": 0.9272, "step": 2333 }, { "epoch": 0.32, "learning_rate": 1.6000761175339944e-05, "loss": 0.9597, "step": 2334 }, { "epoch": 0.32, "learning_rate": 1.599724872635839e-05, "loss": 0.9651, "step": 2335 }, { "epoch": 0.32, "learning_rate": 1.5993735121501128e-05, "loss": 0.9301, "step": 2336 }, { "epoch": 0.32, "learning_rate": 1.5990220361445353e-05, "loss": 0.9031, "step": 2337 }, { "epoch": 0.32, "learning_rate": 1.5986704446868482e-05, "loss": 0.9222, "step": 2338 }, { "epoch": 0.32, "learning_rate": 1.5983187378448152e-05, "loss": 0.9418, "step": 2339 }, { "epoch": 0.32, "learning_rate": 1.5979669156862222e-05, "loss": 0.9074, "step": 2340 }, { "epoch": 0.32, "learning_rate": 1.597614978278877e-05, "loss": 0.9001, "step": 2341 }, { "epoch": 0.32, "learning_rate": 1.5972629256906105e-05, "loss": 0.9296, "step": 2342 }, { "epoch": 0.32, "learning_rate": 1.5969107579892754e-05, "loss": 0.9849, "step": 2343 }, { "epoch": 0.32, "learning_rate": 1.5965584752427463e-05, "loss": 0.9283, "step": 2344 }, { "epoch": 0.32, "learning_rate": 1.59620607751892e-05, "loss": 0.9223, "step": 2345 }, { "epoch": 0.32, "learning_rate": 1.5958535648857157e-05, "loss": 0.9324, "step": 2346 }, { "epoch": 0.32, "learning_rate": 1.595500937411075e-05, "loss": 0.9385, "step": 2347 }, { "epoch": 0.32, "learning_rate": 1.595148195162961e-05, "loss": 0.8863, "step": 2348 }, { "epoch": 0.32, "learning_rate": 1.5947953382093593e-05, "loss": 0.9091, "step": 2349 }, { "epoch": 0.32, "learning_rate": 1.5944423666182776e-05, "loss": 0.9523, "step": 2350 }, { "epoch": 0.32, "learning_rate": 1.594089280457746e-05, "loss": 0.9111, "step": 2351 }, { "epoch": 0.32, "learning_rate": 1.5937360797958157e-05, "loss": 0.88, "step": 2352 }, { "epoch": 0.32, "learning_rate": 1.593382764700561e-05, "loss": 0.9251, "step": 2353 }, { "epoch": 0.32, "learning_rate": 1.5930293352400776e-05, "loss": 0.9011, "step": 2354 }, { "epoch": 0.32, "learning_rate": 1.5926757914824837e-05, "loss": 0.9202, "step": 2355 }, { "epoch": 0.32, "learning_rate": 1.592322133495919e-05, "loss": 0.9548, "step": 2356 }, { "epoch": 0.32, "learning_rate": 1.5919683613485458e-05, "loss": 0.9836, "step": 2357 }, { "epoch": 0.32, "learning_rate": 1.5916144751085485e-05, "loss": 0.9041, "step": 2358 }, { "epoch": 0.32, "learning_rate": 1.5912604748441323e-05, "loss": 0.8995, "step": 2359 }, { "epoch": 0.32, "learning_rate": 1.590906360623526e-05, "loss": 0.9001, "step": 2360 }, { "epoch": 0.32, "learning_rate": 1.5905521325149788e-05, "loss": 1.0633, "step": 2361 }, { "epoch": 0.32, "learning_rate": 1.5901977905867634e-05, "loss": 0.9526, "step": 2362 }, { "epoch": 0.32, "learning_rate": 1.5898433349071727e-05, "loss": 0.9163, "step": 2363 }, { "epoch": 0.32, "learning_rate": 1.589488765544524e-05, "loss": 0.8806, "step": 2364 }, { "epoch": 0.32, "learning_rate": 1.5891340825671533e-05, "loss": 0.9595, "step": 2365 }, { "epoch": 0.32, "learning_rate": 1.5887792860434207e-05, "loss": 0.9507, "step": 2366 }, { "epoch": 0.32, "learning_rate": 1.5884243760417083e-05, "loss": 0.9873, "step": 2367 }, { "epoch": 0.32, "learning_rate": 1.5880693526304192e-05, "loss": 0.9247, "step": 2368 }, { "epoch": 0.32, "learning_rate": 1.587714215877978e-05, "loss": 0.973, "step": 2369 }, { "epoch": 0.32, "learning_rate": 1.5873589658528326e-05, "loss": 0.9022, "step": 2370 }, { "epoch": 0.32, "learning_rate": 1.587003602623451e-05, "loss": 0.9181, "step": 2371 }, { "epoch": 0.32, "learning_rate": 1.5866481262583245e-05, "loss": 0.9688, "step": 2372 }, { "epoch": 0.32, "learning_rate": 1.5862925368259654e-05, "loss": 0.953, "step": 2373 }, { "epoch": 0.32, "learning_rate": 1.5859368343949084e-05, "loss": 0.9066, "step": 2374 }, { "epoch": 0.32, "learning_rate": 1.5855810190337088e-05, "loss": 0.9838, "step": 2375 }, { "epoch": 0.32, "learning_rate": 1.5852250908109448e-05, "loss": 0.9719, "step": 2376 }, { "epoch": 0.32, "learning_rate": 1.5848690497952163e-05, "loss": 0.957, "step": 2377 }, { "epoch": 0.32, "learning_rate": 1.584512896055144e-05, "loss": 0.9664, "step": 2378 }, { "epoch": 0.32, "learning_rate": 1.5841566296593714e-05, "loss": 0.925, "step": 2379 }, { "epoch": 0.32, "learning_rate": 1.583800250676563e-05, "loss": 0.924, "step": 2380 }, { "epoch": 0.32, "learning_rate": 1.5834437591754063e-05, "loss": 0.9136, "step": 2381 }, { "epoch": 0.32, "learning_rate": 1.5830871552246076e-05, "loss": 0.9629, "step": 2382 }, { "epoch": 0.32, "learning_rate": 1.582730438892898e-05, "loss": 0.9498, "step": 2383 }, { "epoch": 0.32, "learning_rate": 1.582373610249029e-05, "loss": 0.9669, "step": 2384 }, { "epoch": 0.32, "learning_rate": 1.582016669361773e-05, "loss": 0.9345, "step": 2385 }, { "epoch": 0.32, "learning_rate": 1.5816596162999252e-05, "loss": 0.9055, "step": 2386 }, { "epoch": 0.32, "learning_rate": 1.5813024511323017e-05, "loss": 0.9495, "step": 2387 }, { "epoch": 0.32, "learning_rate": 1.580945173927741e-05, "loss": 0.9249, "step": 2388 }, { "epoch": 0.32, "learning_rate": 1.5805877847551027e-05, "loss": 0.8522, "step": 2389 }, { "epoch": 0.32, "learning_rate": 1.5802302836832673e-05, "loss": 0.8952, "step": 2390 }, { "epoch": 0.32, "learning_rate": 1.5798726707811383e-05, "loss": 0.9681, "step": 2391 }, { "epoch": 0.32, "learning_rate": 1.5795149461176393e-05, "loss": 0.8728, "step": 2392 }, { "epoch": 0.32, "learning_rate": 1.5791571097617163e-05, "loss": 0.9197, "step": 2393 }, { "epoch": 0.32, "learning_rate": 1.578799161782337e-05, "loss": 0.8987, "step": 2394 }, { "epoch": 0.32, "learning_rate": 1.57844110224849e-05, "loss": 1.0126, "step": 2395 }, { "epoch": 0.32, "learning_rate": 1.5780829312291858e-05, "loss": 0.9188, "step": 2396 }, { "epoch": 0.32, "learning_rate": 1.577724648793456e-05, "loss": 0.9624, "step": 2397 }, { "epoch": 0.33, "learning_rate": 1.577366255010354e-05, "loss": 0.9155, "step": 2398 }, { "epoch": 0.33, "learning_rate": 1.5770077499489546e-05, "loss": 0.9418, "step": 2399 }, { "epoch": 0.33, "learning_rate": 1.5766491336783543e-05, "loss": 0.9946, "step": 2400 }, { "epoch": 0.33, "learning_rate": 1.5762904062676706e-05, "loss": 0.9643, "step": 2401 }, { "epoch": 0.33, "learning_rate": 1.575931567786042e-05, "loss": 0.9167, "step": 2402 }, { "epoch": 0.33, "learning_rate": 1.5755726183026303e-05, "loss": 0.8962, "step": 2403 }, { "epoch": 0.33, "learning_rate": 1.5752135578866162e-05, "loss": 1.0336, "step": 2404 }, { "epoch": 0.33, "learning_rate": 1.5748543866072033e-05, "loss": 0.9439, "step": 2405 }, { "epoch": 0.33, "learning_rate": 1.5744951045336166e-05, "loss": 0.9882, "step": 2406 }, { "epoch": 0.33, "learning_rate": 1.5741357117351018e-05, "loss": 0.9722, "step": 2407 }, { "epoch": 0.33, "learning_rate": 1.573776208280926e-05, "loss": 0.9666, "step": 2408 }, { "epoch": 0.33, "learning_rate": 1.5734165942403782e-05, "loss": 0.9108, "step": 2409 }, { "epoch": 0.33, "learning_rate": 1.5730568696827684e-05, "loss": 0.9482, "step": 2410 }, { "epoch": 0.33, "learning_rate": 1.572697034677428e-05, "loss": 0.9181, "step": 2411 }, { "epoch": 0.33, "learning_rate": 1.5723370892937085e-05, "loss": 0.861, "step": 2412 }, { "epoch": 0.33, "learning_rate": 1.571977033600985e-05, "loss": 0.9743, "step": 2413 }, { "epoch": 0.33, "learning_rate": 1.5716168676686523e-05, "loss": 0.8916, "step": 2414 }, { "epoch": 0.33, "learning_rate": 1.5712565915661264e-05, "loss": 0.9468, "step": 2415 }, { "epoch": 0.33, "learning_rate": 1.570896205362845e-05, "loss": 0.9701, "step": 2416 }, { "epoch": 0.33, "learning_rate": 1.570535709128267e-05, "loss": 0.9105, "step": 2417 }, { "epoch": 0.33, "learning_rate": 1.5701751029318723e-05, "loss": 0.981, "step": 2418 }, { "epoch": 0.33, "learning_rate": 1.5698143868431617e-05, "loss": 0.9145, "step": 2419 }, { "epoch": 0.33, "learning_rate": 1.5694535609316585e-05, "loss": 0.9302, "step": 2420 }, { "epoch": 0.33, "learning_rate": 1.5690926252669058e-05, "loss": 0.9091, "step": 2421 }, { "epoch": 0.33, "learning_rate": 1.568731579918468e-05, "loss": 0.9322, "step": 2422 }, { "epoch": 0.33, "learning_rate": 1.568370424955931e-05, "loss": 0.9288, "step": 2423 }, { "epoch": 0.33, "learning_rate": 1.568009160448902e-05, "loss": 1.0099, "step": 2424 }, { "epoch": 0.33, "learning_rate": 1.5676477864670093e-05, "loss": 0.9776, "step": 2425 }, { "epoch": 0.33, "learning_rate": 1.5672863030799015e-05, "loss": 0.894, "step": 2426 }, { "epoch": 0.33, "learning_rate": 1.5669247103572493e-05, "loss": 0.9252, "step": 2427 }, { "epoch": 0.33, "learning_rate": 1.5665630083687438e-05, "loss": 0.9408, "step": 2428 }, { "epoch": 0.33, "learning_rate": 1.5662011971840972e-05, "loss": 0.9507, "step": 2429 }, { "epoch": 0.33, "learning_rate": 1.5658392768730434e-05, "loss": 0.8781, "step": 2430 }, { "epoch": 0.33, "learning_rate": 1.5654772475053365e-05, "loss": 0.9163, "step": 2431 }, { "epoch": 0.33, "learning_rate": 1.565115109150752e-05, "loss": 0.9338, "step": 2432 }, { "epoch": 0.33, "learning_rate": 1.5647528618790872e-05, "loss": 0.9379, "step": 2433 }, { "epoch": 0.33, "learning_rate": 1.5643905057601583e-05, "loss": 0.9657, "step": 2434 }, { "epoch": 0.33, "learning_rate": 1.5640280408638044e-05, "loss": 0.9409, "step": 2435 }, { "epoch": 0.33, "learning_rate": 1.563665467259885e-05, "loss": 0.9473, "step": 2436 }, { "epoch": 0.33, "learning_rate": 1.5633027850182803e-05, "loss": 0.9655, "step": 2437 }, { "epoch": 0.33, "learning_rate": 1.562939994208892e-05, "loss": 0.9755, "step": 2438 }, { "epoch": 0.33, "learning_rate": 1.562577094901642e-05, "loss": 0.9087, "step": 2439 }, { "epoch": 0.33, "learning_rate": 1.5622140871664733e-05, "loss": 0.9452, "step": 2440 }, { "epoch": 0.33, "learning_rate": 1.5618509710733502e-05, "loss": 0.8927, "step": 2441 }, { "epoch": 0.33, "learning_rate": 1.5614877466922574e-05, "loss": 0.9063, "step": 2442 }, { "epoch": 0.33, "learning_rate": 1.5611244140932013e-05, "loss": 0.9364, "step": 2443 }, { "epoch": 0.33, "learning_rate": 1.5607609733462076e-05, "loss": 0.891, "step": 2444 }, { "epoch": 0.33, "learning_rate": 1.5603974245213247e-05, "loss": 0.9891, "step": 2445 }, { "epoch": 0.33, "learning_rate": 1.5600337676886205e-05, "loss": 0.9304, "step": 2446 }, { "epoch": 0.33, "learning_rate": 1.5596700029181843e-05, "loss": 0.9339, "step": 2447 }, { "epoch": 0.33, "learning_rate": 1.5593061302801263e-05, "loss": 0.9748, "step": 2448 }, { "epoch": 0.33, "learning_rate": 1.5589421498445765e-05, "loss": 0.9309, "step": 2449 }, { "epoch": 0.33, "learning_rate": 1.558578061681687e-05, "loss": 0.9372, "step": 2450 }, { "epoch": 0.33, "learning_rate": 1.55821386586163e-05, "loss": 0.9644, "step": 2451 }, { "epoch": 0.33, "learning_rate": 1.5578495624545988e-05, "loss": 0.9208, "step": 2452 }, { "epoch": 0.33, "learning_rate": 1.5574851515308063e-05, "loss": 0.8819, "step": 2453 }, { "epoch": 0.33, "learning_rate": 1.5571206331604885e-05, "loss": 0.9683, "step": 2454 }, { "epoch": 0.33, "learning_rate": 1.556756007413899e-05, "loss": 0.933, "step": 2455 }, { "epoch": 0.33, "learning_rate": 1.5563912743613143e-05, "loss": 0.8765, "step": 2456 }, { "epoch": 0.33, "learning_rate": 1.5560264340730315e-05, "loss": 0.8722, "step": 2457 }, { "epoch": 0.33, "learning_rate": 1.555661486619367e-05, "loss": 0.8615, "step": 2458 }, { "epoch": 0.33, "learning_rate": 1.5552964320706593e-05, "loss": 0.9498, "step": 2459 }, { "epoch": 0.33, "learning_rate": 1.5549312704972667e-05, "loss": 0.8732, "step": 2460 }, { "epoch": 0.33, "learning_rate": 1.5545660019695684e-05, "loss": 0.9647, "step": 2461 }, { "epoch": 0.33, "learning_rate": 1.5542006265579643e-05, "loss": 0.9478, "step": 2462 }, { "epoch": 0.33, "learning_rate": 1.5538351443328747e-05, "loss": 0.9609, "step": 2463 }, { "epoch": 0.33, "learning_rate": 1.5534695553647404e-05, "loss": 0.933, "step": 2464 }, { "epoch": 0.33, "learning_rate": 1.5531038597240232e-05, "loss": 0.9547, "step": 2465 }, { "epoch": 0.33, "learning_rate": 1.5527380574812054e-05, "loss": 0.9469, "step": 2466 }, { "epoch": 0.33, "learning_rate": 1.5523721487067895e-05, "loss": 0.9866, "step": 2467 }, { "epoch": 0.33, "learning_rate": 1.5520061334712978e-05, "loss": 0.9658, "step": 2468 }, { "epoch": 0.33, "learning_rate": 1.551640011845275e-05, "loss": 0.9347, "step": 2469 }, { "epoch": 0.33, "learning_rate": 1.5512737838992852e-05, "loss": 0.8999, "step": 2470 }, { "epoch": 0.33, "learning_rate": 1.550907449703913e-05, "loss": 0.9462, "step": 2471 }, { "epoch": 0.34, "learning_rate": 1.5505410093297633e-05, "loss": 0.9223, "step": 2472 }, { "epoch": 0.34, "learning_rate": 1.5501744628474623e-05, "loss": 0.933, "step": 2473 }, { "epoch": 0.34, "learning_rate": 1.5498078103276555e-05, "loss": 0.9748, "step": 2474 }, { "epoch": 0.34, "learning_rate": 1.5494410518410096e-05, "loss": 0.9545, "step": 2475 }, { "epoch": 0.34, "learning_rate": 1.5490741874582117e-05, "loss": 0.965, "step": 2476 }, { "epoch": 0.34, "learning_rate": 1.5487072172499696e-05, "loss": 1.0123, "step": 2477 }, { "epoch": 0.34, "learning_rate": 1.5483401412870097e-05, "loss": 0.9425, "step": 2478 }, { "epoch": 0.34, "learning_rate": 1.5479729596400814e-05, "loss": 0.905, "step": 2479 }, { "epoch": 0.34, "learning_rate": 1.5476056723799532e-05, "loss": 0.8689, "step": 2480 }, { "epoch": 0.34, "learning_rate": 1.5472382795774127e-05, "loss": 0.9348, "step": 2481 }, { "epoch": 0.34, "learning_rate": 1.5468707813032705e-05, "loss": 0.9262, "step": 2482 }, { "epoch": 0.34, "learning_rate": 1.5465031776283555e-05, "loss": 1.0186, "step": 2483 }, { "epoch": 0.34, "learning_rate": 1.5461354686235175e-05, "loss": 0.8773, "step": 2484 }, { "epoch": 0.34, "learning_rate": 1.545767654359627e-05, "loss": 0.9058, "step": 2485 }, { "epoch": 0.34, "learning_rate": 1.5453997349075742e-05, "loss": 0.9466, "step": 2486 }, { "epoch": 0.34, "learning_rate": 1.5450317103382695e-05, "loss": 0.9235, "step": 2487 }, { "epoch": 0.34, "learning_rate": 1.5446635807226445e-05, "loss": 0.8745, "step": 2488 }, { "epoch": 0.34, "learning_rate": 1.5442953461316504e-05, "loss": 0.9329, "step": 2489 }, { "epoch": 0.34, "learning_rate": 1.543927006636258e-05, "loss": 0.8889, "step": 2490 }, { "epoch": 0.34, "learning_rate": 1.5435585623074594e-05, "loss": 0.9613, "step": 2491 }, { "epoch": 0.34, "learning_rate": 1.5431900132162666e-05, "loss": 0.995, "step": 2492 }, { "epoch": 0.34, "learning_rate": 1.542821359433711e-05, "loss": 0.9131, "step": 2493 }, { "epoch": 0.34, "learning_rate": 1.5424526010308458e-05, "loss": 0.8437, "step": 2494 }, { "epoch": 0.34, "learning_rate": 1.5420837380787427e-05, "loss": 1.0027, "step": 2495 }, { "epoch": 0.34, "learning_rate": 1.5417147706484944e-05, "loss": 0.9492, "step": 2496 }, { "epoch": 0.34, "learning_rate": 1.5413456988112136e-05, "loss": 1.0278, "step": 2497 }, { "epoch": 0.34, "learning_rate": 1.540976522638033e-05, "loss": 0.943, "step": 2498 }, { "epoch": 0.34, "learning_rate": 1.5406072422001062e-05, "loss": 0.956, "step": 2499 }, { "epoch": 0.34, "learning_rate": 1.5402378575686054e-05, "loss": 1.0266, "step": 2500 }, { "epoch": 0.34, "learning_rate": 1.539868368814724e-05, "loss": 0.8746, "step": 2501 }, { "epoch": 0.34, "learning_rate": 1.539498776009675e-05, "loss": 0.9489, "step": 2502 }, { "epoch": 0.34, "learning_rate": 1.539129079224692e-05, "loss": 0.9323, "step": 2503 }, { "epoch": 0.34, "learning_rate": 1.538759278531028e-05, "loss": 0.9121, "step": 2504 }, { "epoch": 0.34, "learning_rate": 1.538389373999956e-05, "loss": 0.88, "step": 2505 }, { "epoch": 0.34, "learning_rate": 1.5380193657027702e-05, "loss": 1.0041, "step": 2506 }, { "epoch": 0.34, "learning_rate": 1.5376492537107833e-05, "loss": 0.9281, "step": 2507 }, { "epoch": 0.34, "learning_rate": 1.537279038095328e-05, "loss": 0.8948, "step": 2508 }, { "epoch": 0.34, "learning_rate": 1.536908718927759e-05, "loss": 0.9324, "step": 2509 }, { "epoch": 0.34, "learning_rate": 1.536538296279448e-05, "loss": 0.8732, "step": 2510 }, { "epoch": 0.34, "learning_rate": 1.5361677702217895e-05, "loss": 0.8973, "step": 2511 }, { "epoch": 0.34, "learning_rate": 1.5357971408261954e-05, "loss": 0.9309, "step": 2512 }, { "epoch": 0.34, "learning_rate": 1.5354264081640997e-05, "loss": 0.9063, "step": 2513 }, { "epoch": 0.34, "learning_rate": 1.5350555723069545e-05, "loss": 0.984, "step": 2514 }, { "epoch": 0.34, "learning_rate": 1.534684633326233e-05, "loss": 0.9212, "step": 2515 }, { "epoch": 0.34, "learning_rate": 1.534313591293428e-05, "loss": 0.959, "step": 2516 }, { "epoch": 0.34, "learning_rate": 1.533942446280052e-05, "loss": 0.9679, "step": 2517 }, { "epoch": 0.34, "learning_rate": 1.533571198357637e-05, "loss": 0.9335, "step": 2518 }, { "epoch": 0.34, "learning_rate": 1.5331998475977354e-05, "loss": 0.8825, "step": 2519 }, { "epoch": 0.34, "learning_rate": 1.5328283940719196e-05, "loss": 0.9156, "step": 2520 }, { "epoch": 0.34, "learning_rate": 1.532456837851781e-05, "loss": 0.9237, "step": 2521 }, { "epoch": 0.34, "learning_rate": 1.5320851790089314e-05, "loss": 0.9091, "step": 2522 }, { "epoch": 0.34, "learning_rate": 1.5317134176150025e-05, "loss": 0.949, "step": 2523 }, { "epoch": 0.34, "learning_rate": 1.5313415537416448e-05, "loss": 0.9431, "step": 2524 }, { "epoch": 0.34, "learning_rate": 1.53096958746053e-05, "loss": 0.9349, "step": 2525 }, { "epoch": 0.34, "learning_rate": 1.530597518843348e-05, "loss": 0.9239, "step": 2526 }, { "epoch": 0.34, "learning_rate": 1.5302253479618097e-05, "loss": 0.9412, "step": 2527 }, { "epoch": 0.34, "learning_rate": 1.5298530748876453e-05, "loss": 0.8859, "step": 2528 }, { "epoch": 0.34, "learning_rate": 1.5294806996926043e-05, "loss": 0.9332, "step": 2529 }, { "epoch": 0.34, "learning_rate": 1.5291082224484565e-05, "loss": 0.8643, "step": 2530 }, { "epoch": 0.34, "learning_rate": 1.5287356432269907e-05, "loss": 0.9808, "step": 2531 }, { "epoch": 0.34, "learning_rate": 1.528362962100016e-05, "loss": 0.9292, "step": 2532 }, { "epoch": 0.34, "learning_rate": 1.5279901791393605e-05, "loss": 0.8907, "step": 2533 }, { "epoch": 0.34, "learning_rate": 1.5276172944168725e-05, "loss": 0.9141, "step": 2534 }, { "epoch": 0.34, "learning_rate": 1.5272443080044194e-05, "loss": 0.977, "step": 2535 }, { "epoch": 0.34, "learning_rate": 1.5268712199738895e-05, "loss": 0.8782, "step": 2536 }, { "epoch": 0.34, "learning_rate": 1.526498030397188e-05, "loss": 0.8769, "step": 2537 }, { "epoch": 0.34, "learning_rate": 1.5261247393462427e-05, "loss": 0.9716, "step": 2538 }, { "epoch": 0.34, "learning_rate": 1.5257513468929994e-05, "loss": 0.96, "step": 2539 }, { "epoch": 0.34, "learning_rate": 1.525377853109423e-05, "loss": 0.9103, "step": 2540 }, { "epoch": 0.34, "learning_rate": 1.5250042580674993e-05, "loss": 0.9336, "step": 2541 }, { "epoch": 0.34, "learning_rate": 1.5246305618392323e-05, "loss": 0.9198, "step": 2542 }, { "epoch": 0.34, "learning_rate": 1.5242567644966463e-05, "loss": 0.902, "step": 2543 }, { "epoch": 0.34, "learning_rate": 1.5238828661117856e-05, "loss": 0.8853, "step": 2544 }, { "epoch": 0.34, "learning_rate": 1.5235088667567118e-05, "loss": 0.8898, "step": 2545 }, { "epoch": 0.35, "learning_rate": 1.5231347665035084e-05, "loss": 0.9382, "step": 2546 }, { "epoch": 0.35, "learning_rate": 1.5227605654242772e-05, "loss": 0.9217, "step": 2547 }, { "epoch": 0.35, "learning_rate": 1.5223862635911396e-05, "loss": 0.9531, "step": 2548 }, { "epoch": 0.35, "learning_rate": 1.5220118610762362e-05, "loss": 0.936, "step": 2549 }, { "epoch": 0.35, "learning_rate": 1.5216373579517276e-05, "loss": 0.9334, "step": 2550 }, { "epoch": 0.35, "learning_rate": 1.5212627542897934e-05, "loss": 0.9124, "step": 2551 }, { "epoch": 0.35, "learning_rate": 1.520888050162632e-05, "loss": 0.9494, "step": 2552 }, { "epoch": 0.35, "learning_rate": 1.5205132456424622e-05, "loss": 0.9407, "step": 2553 }, { "epoch": 0.35, "learning_rate": 1.5201383408015215e-05, "loss": 0.9653, "step": 2554 }, { "epoch": 0.35, "learning_rate": 1.5197633357120673e-05, "loss": 0.8977, "step": 2555 }, { "epoch": 0.35, "learning_rate": 1.5193882304463756e-05, "loss": 0.9137, "step": 2556 }, { "epoch": 0.35, "learning_rate": 1.5190130250767425e-05, "loss": 0.8435, "step": 2557 }, { "epoch": 0.35, "learning_rate": 1.5186377196754825e-05, "loss": 0.9564, "step": 2558 }, { "epoch": 0.35, "learning_rate": 1.5182623143149297e-05, "loss": 0.9498, "step": 2559 }, { "epoch": 0.35, "learning_rate": 1.5178868090674381e-05, "loss": 0.9317, "step": 2560 }, { "epoch": 0.35, "learning_rate": 1.51751120400538e-05, "loss": 0.8586, "step": 2561 }, { "epoch": 0.35, "learning_rate": 1.5171354992011478e-05, "loss": 0.9703, "step": 2562 }, { "epoch": 0.35, "learning_rate": 1.5167596947271523e-05, "loss": 0.942, "step": 2563 }, { "epoch": 0.35, "learning_rate": 1.5163837906558243e-05, "loss": 0.9203, "step": 2564 }, { "epoch": 0.35, "learning_rate": 1.5160077870596133e-05, "loss": 0.9822, "step": 2565 }, { "epoch": 0.35, "learning_rate": 1.515631684010988e-05, "loss": 0.9645, "step": 2566 }, { "epoch": 0.35, "learning_rate": 1.5152554815824366e-05, "loss": 0.9062, "step": 2567 }, { "epoch": 0.35, "learning_rate": 1.5148791798464654e-05, "loss": 0.9354, "step": 2568 }, { "epoch": 0.35, "learning_rate": 1.5145027788756022e-05, "loss": 0.9188, "step": 2569 }, { "epoch": 0.35, "learning_rate": 1.514126278742391e-05, "loss": 0.954, "step": 2570 }, { "epoch": 0.35, "learning_rate": 1.513749679519397e-05, "loss": 0.9191, "step": 2571 }, { "epoch": 0.35, "learning_rate": 1.5133729812792035e-05, "loss": 0.8969, "step": 2572 }, { "epoch": 0.35, "learning_rate": 1.5129961840944131e-05, "loss": 0.958, "step": 2573 }, { "epoch": 0.35, "learning_rate": 1.512619288037648e-05, "loss": 0.8576, "step": 2574 }, { "epoch": 0.35, "learning_rate": 1.5122422931815487e-05, "loss": 0.9623, "step": 2575 }, { "epoch": 0.35, "learning_rate": 1.5118651995987752e-05, "loss": 0.9294, "step": 2576 }, { "epoch": 0.35, "learning_rate": 1.511488007362006e-05, "loss": 0.892, "step": 2577 }, { "epoch": 0.35, "learning_rate": 1.5111107165439393e-05, "loss": 0.9483, "step": 2578 }, { "epoch": 0.35, "learning_rate": 1.5107333272172922e-05, "loss": 0.8991, "step": 2579 }, { "epoch": 0.35, "learning_rate": 1.5103558394548002e-05, "loss": 0.949, "step": 2580 }, { "epoch": 0.35, "learning_rate": 1.5099782533292184e-05, "loss": 0.9701, "step": 2581 }, { "epoch": 0.35, "learning_rate": 1.5096005689133203e-05, "loss": 0.9482, "step": 2582 }, { "epoch": 0.35, "learning_rate": 1.509222786279899e-05, "loss": 0.945, "step": 2583 }, { "epoch": 0.35, "learning_rate": 1.508844905501766e-05, "loss": 0.9619, "step": 2584 }, { "epoch": 0.35, "learning_rate": 1.5084669266517518e-05, "loss": 1.009, "step": 2585 }, { "epoch": 0.35, "learning_rate": 1.508088849802706e-05, "loss": 0.8854, "step": 2586 }, { "epoch": 0.35, "learning_rate": 1.5077106750274972e-05, "loss": 0.9954, "step": 2587 }, { "epoch": 0.35, "learning_rate": 1.5073324023990124e-05, "loss": 0.9718, "step": 2588 }, { "epoch": 0.35, "learning_rate": 1.5069540319901577e-05, "loss": 0.8426, "step": 2589 }, { "epoch": 0.35, "learning_rate": 1.5065755638738581e-05, "loss": 0.9262, "step": 2590 }, { "epoch": 0.35, "learning_rate": 1.5061969981230577e-05, "loss": 0.8612, "step": 2591 }, { "epoch": 0.35, "learning_rate": 1.505818334810719e-05, "loss": 0.9524, "step": 2592 }, { "epoch": 0.35, "learning_rate": 1.5054395740098228e-05, "loss": 0.9106, "step": 2593 }, { "epoch": 0.35, "learning_rate": 1.5050607157933703e-05, "loss": 0.9522, "step": 2594 }, { "epoch": 0.35, "learning_rate": 1.5046817602343797e-05, "loss": 0.9295, "step": 2595 }, { "epoch": 0.35, "learning_rate": 1.5043027074058891e-05, "loss": 0.9355, "step": 2596 }, { "epoch": 0.35, "learning_rate": 1.503923557380955e-05, "loss": 0.9422, "step": 2597 }, { "epoch": 0.35, "learning_rate": 1.5035443102326523e-05, "loss": 0.858, "step": 2598 }, { "epoch": 0.35, "learning_rate": 1.5031649660340754e-05, "loss": 0.9766, "step": 2599 }, { "epoch": 0.35, "learning_rate": 1.5027855248583368e-05, "loss": 0.8862, "step": 2600 }, { "epoch": 0.35, "learning_rate": 1.5024059867785678e-05, "loss": 0.8797, "step": 2601 }, { "epoch": 0.35, "learning_rate": 1.5020263518679183e-05, "loss": 0.9554, "step": 2602 }, { "epoch": 0.35, "learning_rate": 1.5016466201995572e-05, "loss": 0.9053, "step": 2603 }, { "epoch": 0.35, "learning_rate": 1.5012667918466716e-05, "loss": 0.9149, "step": 2604 }, { "epoch": 0.35, "learning_rate": 1.5008868668824676e-05, "loss": 0.9472, "step": 2605 }, { "epoch": 0.35, "learning_rate": 1.5005068453801697e-05, "loss": 0.9454, "step": 2606 }, { "epoch": 0.35, "learning_rate": 1.500126727413021e-05, "loss": 0.972, "step": 2607 }, { "epoch": 0.35, "learning_rate": 1.4997465130542838e-05, "loss": 0.95, "step": 2608 }, { "epoch": 0.35, "learning_rate": 1.4993662023772379e-05, "loss": 0.9309, "step": 2609 }, { "epoch": 0.35, "learning_rate": 1.4989857954551826e-05, "loss": 0.9892, "step": 2610 }, { "epoch": 0.35, "learning_rate": 1.4986052923614347e-05, "loss": 0.9646, "step": 2611 }, { "epoch": 0.35, "learning_rate": 1.4982246931693309e-05, "loss": 0.9498, "step": 2612 }, { "epoch": 0.35, "learning_rate": 1.4978439979522255e-05, "loss": 0.8891, "step": 2613 }, { "epoch": 0.35, "learning_rate": 1.4974632067834918e-05, "loss": 0.9098, "step": 2614 }, { "epoch": 0.35, "learning_rate": 1.4970823197365208e-05, "loss": 1.0018, "step": 2615 }, { "epoch": 0.35, "learning_rate": 1.4967013368847229e-05, "loss": 0.8946, "step": 2616 }, { "epoch": 0.35, "learning_rate": 1.4963202583015264e-05, "loss": 0.8983, "step": 2617 }, { "epoch": 0.35, "learning_rate": 1.4959390840603786e-05, "loss": 0.9289, "step": 2618 }, { "epoch": 0.35, "learning_rate": 1.4955578142347442e-05, "loss": 0.9451, "step": 2619 }, { "epoch": 0.36, "learning_rate": 1.4951764488981075e-05, "loss": 0.914, "step": 2620 }, { "epoch": 0.36, "learning_rate": 1.4947949881239706e-05, "loss": 0.9966, "step": 2621 }, { "epoch": 0.36, "learning_rate": 1.494413431985854e-05, "loss": 0.9843, "step": 2622 }, { "epoch": 0.36, "learning_rate": 1.4940317805572964e-05, "loss": 0.8443, "step": 2623 }, { "epoch": 0.36, "learning_rate": 1.4936500339118556e-05, "loss": 0.9279, "step": 2624 }, { "epoch": 0.36, "learning_rate": 1.4932681921231072e-05, "loss": 0.8579, "step": 2625 }, { "epoch": 0.36, "learning_rate": 1.4928862552646448e-05, "loss": 0.9139, "step": 2626 }, { "epoch": 0.36, "learning_rate": 1.4925042234100815e-05, "loss": 0.9131, "step": 2627 }, { "epoch": 0.36, "learning_rate": 1.4921220966330472e-05, "loss": 0.9435, "step": 2628 }, { "epoch": 0.36, "learning_rate": 1.4917398750071912e-05, "loss": 0.9874, "step": 2629 }, { "epoch": 0.36, "learning_rate": 1.4913575586061809e-05, "loss": 0.9137, "step": 2630 }, { "epoch": 0.36, "learning_rate": 1.4909751475037014e-05, "loss": 0.9294, "step": 2631 }, { "epoch": 0.36, "learning_rate": 1.4905926417734566e-05, "loss": 1.0156, "step": 2632 }, { "epoch": 0.36, "learning_rate": 1.4902100414891685e-05, "loss": 0.8587, "step": 2633 }, { "epoch": 0.36, "learning_rate": 1.4898273467245775e-05, "loss": 0.9043, "step": 2634 }, { "epoch": 0.36, "learning_rate": 1.4894445575534418e-05, "loss": 0.9506, "step": 2635 }, { "epoch": 0.36, "learning_rate": 1.4890616740495379e-05, "loss": 0.8687, "step": 2636 }, { "epoch": 0.36, "learning_rate": 1.4886786962866608e-05, "loss": 0.8526, "step": 2637 }, { "epoch": 0.36, "learning_rate": 1.4882956243386233e-05, "loss": 0.8854, "step": 2638 }, { "epoch": 0.36, "learning_rate": 1.487912458279257e-05, "loss": 0.8904, "step": 2639 }, { "epoch": 0.36, "learning_rate": 1.4875291981824106e-05, "loss": 0.9659, "step": 2640 }, { "epoch": 0.36, "learning_rate": 1.4871458441219515e-05, "loss": 0.926, "step": 2641 }, { "epoch": 0.36, "learning_rate": 1.4867623961717653e-05, "loss": 0.9069, "step": 2642 }, { "epoch": 0.36, "learning_rate": 1.486378854405756e-05, "loss": 0.9445, "step": 2643 }, { "epoch": 0.36, "learning_rate": 1.4859952188978448e-05, "loss": 0.9402, "step": 2644 }, { "epoch": 0.36, "learning_rate": 1.4856114897219714e-05, "loss": 0.8975, "step": 2645 }, { "epoch": 0.36, "learning_rate": 1.4852276669520938e-05, "loss": 0.9586, "step": 2646 }, { "epoch": 0.36, "learning_rate": 1.4848437506621876e-05, "loss": 0.8987, "step": 2647 }, { "epoch": 0.36, "learning_rate": 1.484459740926247e-05, "loss": 0.9613, "step": 2648 }, { "epoch": 0.36, "learning_rate": 1.4840756378182833e-05, "loss": 0.9887, "step": 2649 }, { "epoch": 0.36, "learning_rate": 1.4836914414123271e-05, "loss": 0.9767, "step": 2650 }, { "epoch": 0.36, "learning_rate": 1.4833071517824254e-05, "loss": 0.8815, "step": 2651 }, { "epoch": 0.36, "learning_rate": 1.4829227690026448e-05, "loss": 0.8904, "step": 2652 }, { "epoch": 0.36, "learning_rate": 1.4825382931470686e-05, "loss": 0.8907, "step": 2653 }, { "epoch": 0.36, "learning_rate": 1.4821537242897985e-05, "loss": 0.9588, "step": 2654 }, { "epoch": 0.36, "learning_rate": 1.4817690625049542e-05, "loss": 0.9682, "step": 2655 }, { "epoch": 0.36, "learning_rate": 1.4813843078666733e-05, "loss": 0.855, "step": 2656 }, { "epoch": 0.36, "learning_rate": 1.4809994604491111e-05, "loss": 0.9194, "step": 2657 }, { "epoch": 0.36, "learning_rate": 1.4806145203264413e-05, "loss": 0.9362, "step": 2658 }, { "epoch": 0.36, "learning_rate": 1.4802294875728542e-05, "loss": 0.8962, "step": 2659 }, { "epoch": 0.36, "learning_rate": 1.4798443622625598e-05, "loss": 0.8856, "step": 2660 }, { "epoch": 0.36, "learning_rate": 1.4794591444697843e-05, "loss": 0.9201, "step": 2661 }, { "epoch": 0.36, "learning_rate": 1.4790738342687729e-05, "loss": 0.9229, "step": 2662 }, { "epoch": 0.36, "learning_rate": 1.4786884317337875e-05, "loss": 0.8821, "step": 2663 }, { "epoch": 0.36, "learning_rate": 1.478302936939109e-05, "loss": 0.9298, "step": 2664 }, { "epoch": 0.36, "learning_rate": 1.4779173499590353e-05, "loss": 0.9247, "step": 2665 }, { "epoch": 0.36, "learning_rate": 1.4775316708678819e-05, "loss": 0.9179, "step": 2666 }, { "epoch": 0.36, "learning_rate": 1.477145899739983e-05, "loss": 0.8788, "step": 2667 }, { "epoch": 0.36, "learning_rate": 1.4767600366496893e-05, "loss": 0.9039, "step": 2668 }, { "epoch": 0.36, "learning_rate": 1.4763740816713704e-05, "loss": 0.8441, "step": 2669 }, { "epoch": 0.36, "learning_rate": 1.4759880348794127e-05, "loss": 0.8893, "step": 2670 }, { "epoch": 0.36, "learning_rate": 1.4756018963482208e-05, "loss": 0.9641, "step": 2671 }, { "epoch": 0.36, "learning_rate": 1.475215666152217e-05, "loss": 0.9978, "step": 2672 }, { "epoch": 0.36, "learning_rate": 1.474829344365841e-05, "loss": 0.9612, "step": 2673 }, { "epoch": 0.36, "learning_rate": 1.4744429310635501e-05, "loss": 0.907, "step": 2674 }, { "epoch": 0.36, "learning_rate": 1.4740564263198196e-05, "loss": 0.9399, "step": 2675 }, { "epoch": 0.36, "learning_rate": 1.4736698302091423e-05, "loss": 0.902, "step": 2676 }, { "epoch": 0.36, "learning_rate": 1.473283142806028e-05, "loss": 0.9196, "step": 2677 }, { "epoch": 0.36, "learning_rate": 1.4728963641850056e-05, "loss": 0.8913, "step": 2678 }, { "epoch": 0.36, "learning_rate": 1.47250949442062e-05, "loss": 0.9872, "step": 2679 }, { "epoch": 0.36, "learning_rate": 1.4721225335874343e-05, "loss": 0.8501, "step": 2680 }, { "epoch": 0.36, "learning_rate": 1.471735481760029e-05, "loss": 0.9287, "step": 2681 }, { "epoch": 0.36, "learning_rate": 1.4713483390130027e-05, "loss": 0.9501, "step": 2682 }, { "epoch": 0.36, "learning_rate": 1.4709611054209711e-05, "loss": 1.0094, "step": 2683 }, { "epoch": 0.36, "learning_rate": 1.4705737810585667e-05, "loss": 0.9132, "step": 2684 }, { "epoch": 0.36, "learning_rate": 1.4701863660004411e-05, "loss": 0.9292, "step": 2685 }, { "epoch": 0.36, "learning_rate": 1.4697988603212619e-05, "loss": 0.8574, "step": 2686 }, { "epoch": 0.36, "learning_rate": 1.469411264095715e-05, "loss": 0.9598, "step": 2687 }, { "epoch": 0.36, "learning_rate": 1.4690235773985035e-05, "loss": 0.9132, "step": 2688 }, { "epoch": 0.36, "learning_rate": 1.4686358003043476e-05, "loss": 0.9144, "step": 2689 }, { "epoch": 0.36, "learning_rate": 1.4682479328879857e-05, "loss": 0.9381, "step": 2690 }, { "epoch": 0.36, "learning_rate": 1.4678599752241728e-05, "loss": 0.8601, "step": 2691 }, { "epoch": 0.36, "learning_rate": 1.467471927387682e-05, "loss": 0.9012, "step": 2692 }, { "epoch": 0.37, "learning_rate": 1.4670837894533032e-05, "loss": 0.933, "step": 2693 }, { "epoch": 0.37, "learning_rate": 1.466695561495844e-05, "loss": 0.9139, "step": 2694 }, { "epoch": 0.37, "learning_rate": 1.4663072435901293e-05, "loss": 0.9297, "step": 2695 }, { "epoch": 0.37, "learning_rate": 1.465918835811001e-05, "loss": 0.9173, "step": 2696 }, { "epoch": 0.37, "learning_rate": 1.465530338233319e-05, "loss": 0.9076, "step": 2697 }, { "epoch": 0.37, "learning_rate": 1.4651417509319598e-05, "loss": 0.9537, "step": 2698 }, { "epoch": 0.37, "learning_rate": 1.4647530739818179e-05, "loss": 0.8366, "step": 2699 }, { "epoch": 0.37, "learning_rate": 1.4643643074578045e-05, "loss": 0.9594, "step": 2700 }, { "epoch": 0.37, "learning_rate": 1.463975451434848e-05, "loss": 0.9231, "step": 2701 }, { "epoch": 0.37, "learning_rate": 1.4635865059878947e-05, "loss": 0.9198, "step": 2702 }, { "epoch": 0.37, "learning_rate": 1.463197471191907e-05, "loss": 0.9061, "step": 2703 }, { "epoch": 0.37, "learning_rate": 1.4628083471218664e-05, "loss": 0.8804, "step": 2704 }, { "epoch": 0.37, "learning_rate": 1.4624191338527698e-05, "loss": 0.9198, "step": 2705 }, { "epoch": 0.37, "learning_rate": 1.462029831459632e-05, "loss": 0.9432, "step": 2706 }, { "epoch": 0.37, "learning_rate": 1.4616404400174848e-05, "loss": 0.9895, "step": 2707 }, { "epoch": 0.37, "learning_rate": 1.4612509596013777e-05, "loss": 0.9014, "step": 2708 }, { "epoch": 0.37, "learning_rate": 1.4608613902863769e-05, "loss": 0.8734, "step": 2709 }, { "epoch": 0.37, "learning_rate": 1.4604717321475652e-05, "loss": 0.9791, "step": 2710 }, { "epoch": 0.37, "learning_rate": 1.4600819852600437e-05, "loss": 0.9415, "step": 2711 }, { "epoch": 0.37, "learning_rate": 1.4596921496989297e-05, "loss": 0.8789, "step": 2712 }, { "epoch": 0.37, "learning_rate": 1.459302225539358e-05, "loss": 0.8883, "step": 2713 }, { "epoch": 0.37, "learning_rate": 1.4589122128564806e-05, "loss": 0.9909, "step": 2714 }, { "epoch": 0.37, "learning_rate": 1.458522111725466e-05, "loss": 0.8637, "step": 2715 }, { "epoch": 0.37, "learning_rate": 1.4581319222215e-05, "loss": 0.9386, "step": 2716 }, { "epoch": 0.37, "learning_rate": 1.4577416444197858e-05, "loss": 0.922, "step": 2717 }, { "epoch": 0.37, "learning_rate": 1.4573512783955435e-05, "loss": 0.9606, "step": 2718 }, { "epoch": 0.37, "learning_rate": 1.4569608242240092e-05, "loss": 0.8813, "step": 2719 }, { "epoch": 0.37, "learning_rate": 1.456570281980438e-05, "loss": 0.9257, "step": 2720 }, { "epoch": 0.37, "learning_rate": 1.4561796517400996e-05, "loss": 0.9289, "step": 2721 }, { "epoch": 0.37, "learning_rate": 1.4557889335782827e-05, "loss": 0.8875, "step": 2722 }, { "epoch": 0.37, "learning_rate": 1.455398127570292e-05, "loss": 0.9182, "step": 2723 }, { "epoch": 0.37, "learning_rate": 1.4550072337914487e-05, "loss": 0.9797, "step": 2724 }, { "epoch": 0.37, "learning_rate": 1.4546162523170922e-05, "loss": 0.9274, "step": 2725 }, { "epoch": 0.37, "learning_rate": 1.4542251832225774e-05, "loss": 0.9414, "step": 2726 }, { "epoch": 0.37, "learning_rate": 1.4538340265832772e-05, "loss": 0.8781, "step": 2727 }, { "epoch": 0.37, "learning_rate": 1.4534427824745804e-05, "loss": 0.9481, "step": 2728 }, { "epoch": 0.37, "learning_rate": 1.4530514509718938e-05, "loss": 0.8783, "step": 2729 }, { "epoch": 0.37, "learning_rate": 1.4526600321506403e-05, "loss": 0.9308, "step": 2730 }, { "epoch": 0.37, "learning_rate": 1.4522685260862593e-05, "loss": 0.998, "step": 2731 }, { "epoch": 0.37, "learning_rate": 1.4518769328542077e-05, "loss": 0.9152, "step": 2732 }, { "epoch": 0.37, "learning_rate": 1.4514852525299587e-05, "loss": 0.9016, "step": 2733 }, { "epoch": 0.37, "learning_rate": 1.4510934851890032e-05, "loss": 0.9112, "step": 2734 }, { "epoch": 0.37, "learning_rate": 1.4507016309068477e-05, "loss": 0.9591, "step": 2735 }, { "epoch": 0.37, "learning_rate": 1.4503096897590161e-05, "loss": 0.9518, "step": 2736 }, { "epoch": 0.37, "learning_rate": 1.4499176618210489e-05, "loss": 0.9192, "step": 2737 }, { "epoch": 0.37, "learning_rate": 1.4495255471685035e-05, "loss": 0.9524, "step": 2738 }, { "epoch": 0.37, "learning_rate": 1.4491333458769536e-05, "loss": 0.8834, "step": 2739 }, { "epoch": 0.37, "learning_rate": 1.44874105802199e-05, "loss": 0.9443, "step": 2740 }, { "epoch": 0.37, "learning_rate": 1.44834868367922e-05, "loss": 0.9139, "step": 2741 }, { "epoch": 0.37, "learning_rate": 1.4479562229242671e-05, "loss": 0.858, "step": 2742 }, { "epoch": 0.37, "learning_rate": 1.4475636758327731e-05, "loss": 0.8872, "step": 2743 }, { "epoch": 0.37, "learning_rate": 1.4471710424803948e-05, "loss": 0.9425, "step": 2744 }, { "epoch": 0.37, "learning_rate": 1.4467783229428056e-05, "loss": 0.8574, "step": 2745 }, { "epoch": 0.37, "learning_rate": 1.4463855172956964e-05, "loss": 0.8943, "step": 2746 }, { "epoch": 0.37, "learning_rate": 1.4459926256147745e-05, "loss": 0.9353, "step": 2747 }, { "epoch": 0.37, "learning_rate": 1.4455996479757634e-05, "loss": 0.9615, "step": 2748 }, { "epoch": 0.37, "learning_rate": 1.4452065844544033e-05, "loss": 0.9419, "step": 2749 }, { "epoch": 0.37, "learning_rate": 1.4448134351264519e-05, "loss": 0.8495, "step": 2750 }, { "epoch": 0.37, "learning_rate": 1.4444202000676812e-05, "loss": 0.9052, "step": 2751 }, { "epoch": 0.37, "learning_rate": 1.4440268793538819e-05, "loss": 0.9294, "step": 2752 }, { "epoch": 0.37, "learning_rate": 1.4436334730608603e-05, "loss": 0.961, "step": 2753 }, { "epoch": 0.37, "learning_rate": 1.4432399812644394e-05, "loss": 0.9043, "step": 2754 }, { "epoch": 0.37, "learning_rate": 1.4428464040404582e-05, "loss": 0.8971, "step": 2755 }, { "epoch": 0.37, "learning_rate": 1.4424527414647726e-05, "loss": 0.9293, "step": 2756 }, { "epoch": 0.37, "learning_rate": 1.4420589936132553e-05, "loss": 0.9519, "step": 2757 }, { "epoch": 0.37, "learning_rate": 1.4416651605617949e-05, "loss": 0.9089, "step": 2758 }, { "epoch": 0.37, "learning_rate": 1.4412712423862964e-05, "loss": 0.9343, "step": 2759 }, { "epoch": 0.37, "learning_rate": 1.4408772391626813e-05, "loss": 0.9964, "step": 2760 }, { "epoch": 0.37, "learning_rate": 1.4404831509668877e-05, "loss": 0.9778, "step": 2761 }, { "epoch": 0.37, "learning_rate": 1.44008897787487e-05, "loss": 0.9353, "step": 2762 }, { "epoch": 0.37, "learning_rate": 1.4396947199625984e-05, "loss": 0.932, "step": 2763 }, { "epoch": 0.37, "learning_rate": 1.4393003773060605e-05, "loss": 0.8756, "step": 2764 }, { "epoch": 0.37, "learning_rate": 1.4389059499812594e-05, "loss": 0.8731, "step": 2765 }, { "epoch": 0.37, "learning_rate": 1.4385114380642148e-05, "loss": 0.9868, "step": 2766 }, { "epoch": 0.38, "learning_rate": 1.438116841630963e-05, "loss": 0.8389, "step": 2767 }, { "epoch": 0.38, "learning_rate": 1.4377221607575558e-05, "loss": 0.8148, "step": 2768 }, { "epoch": 0.38, "learning_rate": 1.437327395520062e-05, "loss": 0.8849, "step": 2769 }, { "epoch": 0.38, "learning_rate": 1.4369325459945661e-05, "loss": 0.9141, "step": 2770 }, { "epoch": 0.38, "learning_rate": 1.4365376122571699e-05, "loss": 0.8951, "step": 2771 }, { "epoch": 0.38, "learning_rate": 1.43614259438399e-05, "loss": 0.9191, "step": 2772 }, { "epoch": 0.38, "learning_rate": 1.4357474924511601e-05, "loss": 0.9698, "step": 2773 }, { "epoch": 0.38, "learning_rate": 1.43535230653483e-05, "loss": 0.9802, "step": 2774 }, { "epoch": 0.38, "learning_rate": 1.4349570367111655e-05, "loss": 0.9864, "step": 2775 }, { "epoch": 0.38, "learning_rate": 1.4345616830563488e-05, "loss": 0.9046, "step": 2776 }, { "epoch": 0.38, "learning_rate": 1.4341662456465777e-05, "loss": 0.936, "step": 2777 }, { "epoch": 0.38, "learning_rate": 1.4337707245580675e-05, "loss": 0.9199, "step": 2778 }, { "epoch": 0.38, "learning_rate": 1.4333751198670474e-05, "loss": 0.9687, "step": 2779 }, { "epoch": 0.38, "learning_rate": 1.432979431649765e-05, "loss": 0.9411, "step": 2780 }, { "epoch": 0.38, "learning_rate": 1.4325836599824828e-05, "loss": 0.9531, "step": 2781 }, { "epoch": 0.38, "learning_rate": 1.4321878049414788e-05, "loss": 0.9203, "step": 2782 }, { "epoch": 0.38, "learning_rate": 1.4317918666030492e-05, "loss": 0.9364, "step": 2783 }, { "epoch": 0.38, "learning_rate": 1.4313958450435039e-05, "loss": 0.9673, "step": 2784 }, { "epoch": 0.38, "learning_rate": 1.4309997403391703e-05, "loss": 0.9642, "step": 2785 }, { "epoch": 0.38, "learning_rate": 1.4306035525663911e-05, "loss": 0.9386, "step": 2786 }, { "epoch": 0.38, "learning_rate": 1.4302072818015253e-05, "loss": 0.9716, "step": 2787 }, { "epoch": 0.38, "learning_rate": 1.4298109281209484e-05, "loss": 0.9336, "step": 2788 }, { "epoch": 0.38, "learning_rate": 1.4294144916010506e-05, "loss": 0.9064, "step": 2789 }, { "epoch": 0.38, "learning_rate": 1.4290179723182395e-05, "loss": 0.8788, "step": 2790 }, { "epoch": 0.38, "learning_rate": 1.4286213703489375e-05, "loss": 0.9442, "step": 2791 }, { "epoch": 0.38, "learning_rate": 1.4282246857695838e-05, "loss": 0.9858, "step": 2792 }, { "epoch": 0.38, "learning_rate": 1.4278279186566326e-05, "loss": 0.9916, "step": 2793 }, { "epoch": 0.38, "learning_rate": 1.4274310690865551e-05, "loss": 0.9639, "step": 2794 }, { "epoch": 0.38, "learning_rate": 1.4270341371358379e-05, "loss": 0.8626, "step": 2795 }, { "epoch": 0.38, "learning_rate": 1.4266371228809825e-05, "loss": 0.9511, "step": 2796 }, { "epoch": 0.38, "learning_rate": 1.4262400263985083e-05, "loss": 0.9249, "step": 2797 }, { "epoch": 0.38, "learning_rate": 1.4258428477649484e-05, "loss": 0.9434, "step": 2798 }, { "epoch": 0.38, "learning_rate": 1.4254455870568538e-05, "loss": 0.8483, "step": 2799 }, { "epoch": 0.38, "learning_rate": 1.4250482443507894e-05, "loss": 0.9288, "step": 2800 }, { "epoch": 0.38, "learning_rate": 1.4246508197233374e-05, "loss": 0.9685, "step": 2801 }, { "epoch": 0.38, "learning_rate": 1.4242533132510945e-05, "loss": 0.9282, "step": 2802 }, { "epoch": 0.38, "learning_rate": 1.4238557250106744e-05, "loss": 0.9348, "step": 2803 }, { "epoch": 0.38, "learning_rate": 1.423458055078706e-05, "loss": 0.8542, "step": 2804 }, { "epoch": 0.38, "learning_rate": 1.4230603035318339e-05, "loss": 0.9015, "step": 2805 }, { "epoch": 0.38, "learning_rate": 1.422662470446718e-05, "loss": 0.8985, "step": 2806 }, { "epoch": 0.38, "learning_rate": 1.4222645559000347e-05, "loss": 0.9096, "step": 2807 }, { "epoch": 0.38, "learning_rate": 1.4218665599684762e-05, "loss": 1.0036, "step": 2808 }, { "epoch": 0.38, "learning_rate": 1.4214684827287495e-05, "loss": 0.9521, "step": 2809 }, { "epoch": 0.38, "learning_rate": 1.4210703242575778e-05, "loss": 0.8436, "step": 2810 }, { "epoch": 0.38, "learning_rate": 1.4206720846317002e-05, "loss": 0.9419, "step": 2811 }, { "epoch": 0.38, "learning_rate": 1.4202737639278708e-05, "loss": 0.9324, "step": 2812 }, { "epoch": 0.38, "learning_rate": 1.4198753622228599e-05, "loss": 0.814, "step": 2813 }, { "epoch": 0.38, "learning_rate": 1.4194768795934529e-05, "loss": 0.9405, "step": 2814 }, { "epoch": 0.38, "learning_rate": 1.4190783161164512e-05, "loss": 0.9124, "step": 2815 }, { "epoch": 0.38, "learning_rate": 1.4186796718686719e-05, "loss": 0.9193, "step": 2816 }, { "epoch": 0.38, "learning_rate": 1.4182809469269474e-05, "loss": 0.975, "step": 2817 }, { "epoch": 0.38, "learning_rate": 1.4178821413681254e-05, "loss": 0.9432, "step": 2818 }, { "epoch": 0.38, "learning_rate": 1.4174832552690697e-05, "loss": 0.9604, "step": 2819 }, { "epoch": 0.38, "learning_rate": 1.4170842887066592e-05, "loss": 0.8941, "step": 2820 }, { "epoch": 0.38, "learning_rate": 1.4166852417577882e-05, "loss": 0.967, "step": 2821 }, { "epoch": 0.38, "learning_rate": 1.4162861144993671e-05, "loss": 0.9185, "step": 2822 }, { "epoch": 0.38, "learning_rate": 1.4158869070083214e-05, "loss": 0.9214, "step": 2823 }, { "epoch": 0.38, "learning_rate": 1.4154876193615921e-05, "loss": 0.9307, "step": 2824 }, { "epoch": 0.38, "learning_rate": 1.4150882516361357e-05, "loss": 0.9081, "step": 2825 }, { "epoch": 0.38, "learning_rate": 1.4146888039089238e-05, "loss": 0.9466, "step": 2826 }, { "epoch": 0.38, "learning_rate": 1.4142892762569439e-05, "loss": 1.0133, "step": 2827 }, { "epoch": 0.38, "learning_rate": 1.4138896687571983e-05, "loss": 0.9769, "step": 2828 }, { "epoch": 0.38, "learning_rate": 1.4134899814867055e-05, "loss": 0.8973, "step": 2829 }, { "epoch": 0.38, "learning_rate": 1.4130902145224989e-05, "loss": 0.9537, "step": 2830 }, { "epoch": 0.38, "learning_rate": 1.4126903679416272e-05, "loss": 0.9564, "step": 2831 }, { "epoch": 0.38, "learning_rate": 1.412290441821155e-05, "loss": 0.8528, "step": 2832 }, { "epoch": 0.38, "learning_rate": 1.4118904362381609e-05, "loss": 0.949, "step": 2833 }, { "epoch": 0.38, "learning_rate": 1.4114903512697407e-05, "loss": 0.9946, "step": 2834 }, { "epoch": 0.38, "learning_rate": 1.4110901869930039e-05, "loss": 0.9903, "step": 2835 }, { "epoch": 0.38, "learning_rate": 1.410689943485076e-05, "loss": 0.9353, "step": 2836 }, { "epoch": 0.38, "learning_rate": 1.4102896208230979e-05, "loss": 0.9503, "step": 2837 }, { "epoch": 0.38, "learning_rate": 1.409889219084225e-05, "loss": 0.9284, "step": 2838 }, { "epoch": 0.38, "learning_rate": 1.409488738345629e-05, "loss": 0.9108, "step": 2839 }, { "epoch": 0.38, "learning_rate": 1.4090881786844958e-05, "loss": 0.9284, "step": 2840 }, { "epoch": 0.39, "learning_rate": 1.4086875401780276e-05, "loss": 0.953, "step": 2841 }, { "epoch": 0.39, "learning_rate": 1.4082868229034405e-05, "loss": 0.974, "step": 2842 }, { "epoch": 0.39, "learning_rate": 1.4078860269379673e-05, "loss": 1.0041, "step": 2843 }, { "epoch": 0.39, "learning_rate": 1.4074851523588542e-05, "loss": 0.9603, "step": 2844 }, { "epoch": 0.39, "learning_rate": 1.4070841992433643e-05, "loss": 0.9928, "step": 2845 }, { "epoch": 0.39, "learning_rate": 1.4066831676687747e-05, "loss": 0.8151, "step": 2846 }, { "epoch": 0.39, "learning_rate": 1.4062820577123778e-05, "loss": 0.8988, "step": 2847 }, { "epoch": 0.39, "learning_rate": 1.4058808694514814e-05, "loss": 0.9052, "step": 2848 }, { "epoch": 0.39, "learning_rate": 1.4054796029634082e-05, "loss": 0.8822, "step": 2849 }, { "epoch": 0.39, "learning_rate": 1.4050782583254963e-05, "loss": 0.9662, "step": 2850 }, { "epoch": 0.39, "learning_rate": 1.404676835615098e-05, "loss": 0.9939, "step": 2851 }, { "epoch": 0.39, "learning_rate": 1.4042753349095817e-05, "loss": 0.9097, "step": 2852 }, { "epoch": 0.39, "learning_rate": 1.4038737562863305e-05, "loss": 0.9292, "step": 2853 }, { "epoch": 0.39, "learning_rate": 1.403472099822742e-05, "loss": 0.9496, "step": 2854 }, { "epoch": 0.39, "learning_rate": 1.4030703655962295e-05, "loss": 0.9204, "step": 2855 }, { "epoch": 0.39, "learning_rate": 1.4026685536842206e-05, "loss": 0.996, "step": 2856 }, { "epoch": 0.39, "learning_rate": 1.4022666641641589e-05, "loss": 0.8997, "step": 2857 }, { "epoch": 0.39, "learning_rate": 1.4018646971135015e-05, "loss": 0.9024, "step": 2858 }, { "epoch": 0.39, "learning_rate": 1.4014626526097218e-05, "loss": 0.9426, "step": 2859 }, { "epoch": 0.39, "learning_rate": 1.4010605307303074e-05, "loss": 0.9377, "step": 2860 }, { "epoch": 0.39, "learning_rate": 1.4006583315527609e-05, "loss": 0.9523, "step": 2861 }, { "epoch": 0.39, "learning_rate": 1.4002560551546001e-05, "loss": 0.8809, "step": 2862 }, { "epoch": 0.39, "learning_rate": 1.3998537016133571e-05, "loss": 0.8433, "step": 2863 }, { "epoch": 0.39, "learning_rate": 1.39945127100658e-05, "loss": 0.9038, "step": 2864 }, { "epoch": 0.39, "learning_rate": 1.3990487634118299e-05, "loss": 0.8968, "step": 2865 }, { "epoch": 0.39, "learning_rate": 1.398646178906685e-05, "loss": 0.781, "step": 2866 }, { "epoch": 0.39, "learning_rate": 1.3982435175687363e-05, "loss": 0.9044, "step": 2867 }, { "epoch": 0.39, "learning_rate": 1.3978407794755906e-05, "loss": 0.8918, "step": 2868 }, { "epoch": 0.39, "learning_rate": 1.3974379647048698e-05, "loss": 0.9969, "step": 2869 }, { "epoch": 0.39, "learning_rate": 1.3970350733342094e-05, "loss": 0.9369, "step": 2870 }, { "epoch": 0.39, "learning_rate": 1.3966321054412613e-05, "loss": 0.8918, "step": 2871 }, { "epoch": 0.39, "learning_rate": 1.3962290611036904e-05, "loss": 0.8738, "step": 2872 }, { "epoch": 0.39, "learning_rate": 1.3958259403991776e-05, "loss": 0.9436, "step": 2873 }, { "epoch": 0.39, "learning_rate": 1.3954227434054182e-05, "loss": 0.8906, "step": 2874 }, { "epoch": 0.39, "learning_rate": 1.395019470200122e-05, "loss": 0.9078, "step": 2875 }, { "epoch": 0.39, "learning_rate": 1.3946161208610134e-05, "loss": 0.9581, "step": 2876 }, { "epoch": 0.39, "learning_rate": 1.3942126954658317e-05, "loss": 0.9409, "step": 2877 }, { "epoch": 0.39, "learning_rate": 1.3938091940923312e-05, "loss": 0.8513, "step": 2878 }, { "epoch": 0.39, "learning_rate": 1.3934056168182802e-05, "loss": 0.8697, "step": 2879 }, { "epoch": 0.39, "learning_rate": 1.3930019637214618e-05, "loss": 0.9269, "step": 2880 }, { "epoch": 0.39, "learning_rate": 1.3925982348796739e-05, "loss": 0.8858, "step": 2881 }, { "epoch": 0.39, "learning_rate": 1.3921944303707287e-05, "loss": 0.9326, "step": 2882 }, { "epoch": 0.39, "learning_rate": 1.3917905502724538e-05, "loss": 0.9362, "step": 2883 }, { "epoch": 0.39, "learning_rate": 1.3913865946626898e-05, "loss": 0.887, "step": 2884 }, { "epoch": 0.39, "learning_rate": 1.390982563619294e-05, "loss": 0.8865, "step": 2885 }, { "epoch": 0.39, "learning_rate": 1.390578457220136e-05, "loss": 0.897, "step": 2886 }, { "epoch": 0.39, "learning_rate": 1.3901742755431016e-05, "loss": 0.9419, "step": 2887 }, { "epoch": 0.39, "learning_rate": 1.38977001866609e-05, "loss": 0.8855, "step": 2888 }, { "epoch": 0.39, "learning_rate": 1.389365686667016e-05, "loss": 0.9702, "step": 2889 }, { "epoch": 0.39, "learning_rate": 1.3889612796238078e-05, "loss": 0.889, "step": 2890 }, { "epoch": 0.39, "learning_rate": 1.3885567976144088e-05, "loss": 0.9297, "step": 2891 }, { "epoch": 0.39, "learning_rate": 1.3881522407167763e-05, "loss": 0.9649, "step": 2892 }, { "epoch": 0.39, "learning_rate": 1.3877476090088822e-05, "loss": 0.858, "step": 2893 }, { "epoch": 0.39, "learning_rate": 1.3873429025687132e-05, "loss": 0.904, "step": 2894 }, { "epoch": 0.39, "learning_rate": 1.3869381214742701e-05, "loss": 0.9243, "step": 2895 }, { "epoch": 0.39, "learning_rate": 1.386533265803568e-05, "loss": 0.9799, "step": 2896 }, { "epoch": 0.39, "learning_rate": 1.3861283356346367e-05, "loss": 0.9265, "step": 2897 }, { "epoch": 0.39, "learning_rate": 1.3857233310455199e-05, "loss": 0.9287, "step": 2898 }, { "epoch": 0.39, "learning_rate": 1.385318252114276e-05, "loss": 0.9674, "step": 2899 }, { "epoch": 0.39, "learning_rate": 1.3849130989189773e-05, "loss": 1.0018, "step": 2900 }, { "epoch": 0.39, "learning_rate": 1.3845078715377116e-05, "loss": 0.9268, "step": 2901 }, { "epoch": 0.39, "learning_rate": 1.3841025700485793e-05, "loss": 0.9123, "step": 2902 }, { "epoch": 0.39, "learning_rate": 1.383697194529696e-05, "loss": 0.8719, "step": 2903 }, { "epoch": 0.39, "learning_rate": 1.3832917450591918e-05, "loss": 0.9056, "step": 2904 }, { "epoch": 0.39, "learning_rate": 1.3828862217152104e-05, "loss": 0.9797, "step": 2905 }, { "epoch": 0.39, "learning_rate": 1.3824806245759107e-05, "loss": 0.917, "step": 2906 }, { "epoch": 0.39, "learning_rate": 1.3820749537194641e-05, "loss": 0.8803, "step": 2907 }, { "epoch": 0.39, "learning_rate": 1.3816692092240584e-05, "loss": 0.9934, "step": 2908 }, { "epoch": 0.39, "learning_rate": 1.3812633911678938e-05, "loss": 0.9539, "step": 2909 }, { "epoch": 0.39, "learning_rate": 1.3808574996291858e-05, "loss": 0.9619, "step": 2910 }, { "epoch": 0.39, "learning_rate": 1.3804515346861633e-05, "loss": 0.9421, "step": 2911 }, { "epoch": 0.39, "learning_rate": 1.3800454964170697e-05, "loss": 0.9528, "step": 2912 }, { "epoch": 0.39, "learning_rate": 1.3796393849001628e-05, "loss": 0.9012, "step": 2913 }, { "epoch": 0.39, "learning_rate": 1.3792332002137138e-05, "loss": 0.8775, "step": 2914 }, { "epoch": 0.4, "learning_rate": 1.378826942436009e-05, "loss": 0.8842, "step": 2915 }, { "epoch": 0.4, "learning_rate": 1.3784206116453475e-05, "loss": 0.8876, "step": 2916 }, { "epoch": 0.4, "learning_rate": 1.3780142079200439e-05, "loss": 0.9735, "step": 2917 }, { "epoch": 0.4, "learning_rate": 1.377607731338426e-05, "loss": 0.9267, "step": 2918 }, { "epoch": 0.4, "learning_rate": 1.3772011819788354e-05, "loss": 0.9181, "step": 2919 }, { "epoch": 0.4, "learning_rate": 1.3767945599196284e-05, "loss": 0.9318, "step": 2920 }, { "epoch": 0.4, "learning_rate": 1.3763878652391749e-05, "loss": 0.9828, "step": 2921 }, { "epoch": 0.4, "learning_rate": 1.3759810980158592e-05, "loss": 0.9257, "step": 2922 }, { "epoch": 0.4, "learning_rate": 1.3755742583280792e-05, "loss": 0.8993, "step": 2923 }, { "epoch": 0.4, "learning_rate": 1.3751673462542465e-05, "loss": 0.954, "step": 2924 }, { "epoch": 0.4, "learning_rate": 1.3747603618727875e-05, "loss": 0.877, "step": 2925 }, { "epoch": 0.4, "learning_rate": 1.3743533052621415e-05, "loss": 0.9397, "step": 2926 }, { "epoch": 0.4, "learning_rate": 1.3739461765007631e-05, "loss": 0.8764, "step": 2927 }, { "epoch": 0.4, "learning_rate": 1.3735389756671193e-05, "loss": 0.9021, "step": 2928 }, { "epoch": 0.4, "learning_rate": 1.3731317028396922e-05, "loss": 0.9344, "step": 2929 }, { "epoch": 0.4, "learning_rate": 1.3727243580969767e-05, "loss": 0.9356, "step": 2930 }, { "epoch": 0.4, "learning_rate": 1.3723169415174827e-05, "loss": 0.9044, "step": 2931 }, { "epoch": 0.4, "learning_rate": 1.371909453179733e-05, "loss": 0.89, "step": 2932 }, { "epoch": 0.4, "learning_rate": 1.3715018931622644e-05, "loss": 0.9255, "step": 2933 }, { "epoch": 0.4, "learning_rate": 1.3710942615436282e-05, "loss": 0.9251, "step": 2934 }, { "epoch": 0.4, "learning_rate": 1.3706865584023884e-05, "loss": 0.9617, "step": 2935 }, { "epoch": 0.4, "learning_rate": 1.3702787838171243e-05, "loss": 0.8676, "step": 2936 }, { "epoch": 0.4, "learning_rate": 1.369870937866427e-05, "loss": 0.9354, "step": 2937 }, { "epoch": 0.4, "learning_rate": 1.3694630206289033e-05, "loss": 0.8587, "step": 2938 }, { "epoch": 0.4, "learning_rate": 1.3690550321831724e-05, "loss": 0.9408, "step": 2939 }, { "epoch": 0.4, "learning_rate": 1.3686469726078676e-05, "loss": 0.9497, "step": 2940 }, { "epoch": 0.4, "learning_rate": 1.3682388419816365e-05, "loss": 0.9425, "step": 2941 }, { "epoch": 0.4, "learning_rate": 1.367830640383139e-05, "loss": 0.9738, "step": 2942 }, { "epoch": 0.4, "learning_rate": 1.3674223678910505e-05, "loss": 0.885, "step": 2943 }, { "epoch": 0.4, "learning_rate": 1.3670140245840584e-05, "loss": 0.9165, "step": 2944 }, { "epoch": 0.4, "learning_rate": 1.3666056105408652e-05, "loss": 0.8608, "step": 2945 }, { "epoch": 0.4, "learning_rate": 1.3661971258401859e-05, "loss": 0.9552, "step": 2946 }, { "epoch": 0.4, "learning_rate": 1.3657885705607492e-05, "loss": 0.9364, "step": 2947 }, { "epoch": 0.4, "learning_rate": 1.365379944781298e-05, "loss": 0.935, "step": 2948 }, { "epoch": 0.4, "learning_rate": 1.3649712485805888e-05, "loss": 0.8993, "step": 2949 }, { "epoch": 0.4, "learning_rate": 1.3645624820373913e-05, "loss": 0.9422, "step": 2950 }, { "epoch": 0.4, "learning_rate": 1.3641536452304884e-05, "loss": 0.9764, "step": 2951 }, { "epoch": 0.4, "learning_rate": 1.3637447382386774e-05, "loss": 0.8978, "step": 2952 }, { "epoch": 0.4, "learning_rate": 1.3633357611407687e-05, "loss": 0.8834, "step": 2953 }, { "epoch": 0.4, "learning_rate": 1.362926714015586e-05, "loss": 0.9305, "step": 2954 }, { "epoch": 0.4, "learning_rate": 1.362517596941967e-05, "loss": 0.9438, "step": 2955 }, { "epoch": 0.4, "learning_rate": 1.3621084099987623e-05, "loss": 0.8996, "step": 2956 }, { "epoch": 0.4, "learning_rate": 1.3616991532648365e-05, "loss": 0.9239, "step": 2957 }, { "epoch": 0.4, "learning_rate": 1.3612898268190671e-05, "loss": 0.9527, "step": 2958 }, { "epoch": 0.4, "learning_rate": 1.3608804307403461e-05, "loss": 0.9944, "step": 2959 }, { "epoch": 0.4, "learning_rate": 1.3604709651075771e-05, "loss": 0.938, "step": 2960 }, { "epoch": 0.4, "learning_rate": 1.3600614299996791e-05, "loss": 0.8975, "step": 2961 }, { "epoch": 0.4, "learning_rate": 1.359651825495583e-05, "loss": 0.9699, "step": 2962 }, { "epoch": 0.4, "learning_rate": 1.3592421516742342e-05, "loss": 0.9017, "step": 2963 }, { "epoch": 0.4, "learning_rate": 1.3588324086145906e-05, "loss": 0.9034, "step": 2964 }, { "epoch": 0.4, "learning_rate": 1.3584225963956235e-05, "loss": 0.9234, "step": 2965 }, { "epoch": 0.4, "learning_rate": 1.3580127150963183e-05, "loss": 0.9199, "step": 2966 }, { "epoch": 0.4, "learning_rate": 1.3576027647956727e-05, "loss": 0.961, "step": 2967 }, { "epoch": 0.4, "learning_rate": 1.3571927455726985e-05, "loss": 0.827, "step": 2968 }, { "epoch": 0.4, "learning_rate": 1.3567826575064205e-05, "loss": 0.9063, "step": 2969 }, { "epoch": 0.4, "learning_rate": 1.3563725006758766e-05, "loss": 0.9154, "step": 2970 }, { "epoch": 0.4, "learning_rate": 1.3559622751601182e-05, "loss": 0.9043, "step": 2971 }, { "epoch": 0.4, "learning_rate": 1.3555519810382095e-05, "loss": 0.8845, "step": 2972 }, { "epoch": 0.4, "learning_rate": 1.355141618389229e-05, "loss": 0.8494, "step": 2973 }, { "epoch": 0.4, "learning_rate": 1.3547311872922668e-05, "loss": 0.9543, "step": 2974 }, { "epoch": 0.4, "learning_rate": 1.354320687826428e-05, "loss": 0.9463, "step": 2975 }, { "epoch": 0.4, "learning_rate": 1.353910120070829e-05, "loss": 0.9925, "step": 2976 }, { "epoch": 0.4, "learning_rate": 1.3534994841046007e-05, "loss": 0.9586, "step": 2977 }, { "epoch": 0.4, "learning_rate": 1.3530887800068872e-05, "loss": 0.9313, "step": 2978 }, { "epoch": 0.4, "learning_rate": 1.3526780078568444e-05, "loss": 0.8975, "step": 2979 }, { "epoch": 0.4, "learning_rate": 1.352267167733643e-05, "loss": 0.9086, "step": 2980 }, { "epoch": 0.4, "learning_rate": 1.3518562597164656e-05, "loss": 0.8339, "step": 2981 }, { "epoch": 0.4, "learning_rate": 1.351445283884508e-05, "loss": 0.8789, "step": 2982 }, { "epoch": 0.4, "learning_rate": 1.3510342403169799e-05, "loss": 0.9497, "step": 2983 }, { "epoch": 0.4, "learning_rate": 1.350623129093103e-05, "loss": 0.9317, "step": 2984 }, { "epoch": 0.4, "learning_rate": 1.3502119502921134e-05, "loss": 0.9169, "step": 2985 }, { "epoch": 0.4, "learning_rate": 1.3498007039932583e-05, "loss": 0.8747, "step": 2986 }, { "epoch": 0.4, "learning_rate": 1.3493893902757997e-05, "loss": 0.9251, "step": 2987 }, { "epoch": 0.4, "learning_rate": 1.3489780092190117e-05, "loss": 0.9184, "step": 2988 }, { "epoch": 0.41, "learning_rate": 1.3485665609021815e-05, "loss": 0.8854, "step": 2989 }, { "epoch": 0.41, "learning_rate": 1.3481550454046094e-05, "loss": 0.9129, "step": 2990 }, { "epoch": 0.41, "learning_rate": 1.3477434628056081e-05, "loss": 0.7553, "step": 2991 }, { "epoch": 0.41, "learning_rate": 1.3473318131845043e-05, "loss": 0.8918, "step": 2992 }, { "epoch": 0.41, "learning_rate": 1.3469200966206366e-05, "loss": 0.9412, "step": 2993 }, { "epoch": 0.41, "learning_rate": 1.3465083131933573e-05, "loss": 0.8953, "step": 2994 }, { "epoch": 0.41, "learning_rate": 1.346096462982031e-05, "loss": 0.9252, "step": 2995 }, { "epoch": 0.41, "learning_rate": 1.3456845460660355e-05, "loss": 0.9183, "step": 2996 }, { "epoch": 0.41, "learning_rate": 1.3452725625247612e-05, "loss": 0.925, "step": 2997 }, { "epoch": 0.41, "learning_rate": 1.3448605124376111e-05, "loss": 0.9549, "step": 2998 }, { "epoch": 0.41, "learning_rate": 1.344448395884002e-05, "loss": 0.9295, "step": 2999 }, { "epoch": 0.41, "learning_rate": 1.3440362129433626e-05, "loss": 0.9212, "step": 3000 }, { "epoch": 0.41, "learning_rate": 1.3436239636951351e-05, "loss": 0.8994, "step": 3001 }, { "epoch": 0.41, "learning_rate": 1.3432116482187734e-05, "loss": 0.8911, "step": 3002 }, { "epoch": 0.41, "learning_rate": 1.3427992665937455e-05, "loss": 0.9164, "step": 3003 }, { "epoch": 0.41, "learning_rate": 1.3423868188995308e-05, "loss": 0.8987, "step": 3004 }, { "epoch": 0.41, "learning_rate": 1.3419743052156229e-05, "loss": 0.8855, "step": 3005 }, { "epoch": 0.41, "learning_rate": 1.341561725621527e-05, "loss": 0.9349, "step": 3006 }, { "epoch": 0.41, "learning_rate": 1.3411490801967611e-05, "loss": 0.9568, "step": 3007 }, { "epoch": 0.41, "learning_rate": 1.3407363690208567e-05, "loss": 0.9233, "step": 3008 }, { "epoch": 0.41, "learning_rate": 1.3403235921733569e-05, "loss": 0.8963, "step": 3009 }, { "epoch": 0.41, "learning_rate": 1.339910749733818e-05, "loss": 0.9043, "step": 3010 }, { "epoch": 0.41, "learning_rate": 1.3394978417818095e-05, "loss": 0.9079, "step": 3011 }, { "epoch": 0.41, "learning_rate": 1.339084868396912e-05, "loss": 0.9304, "step": 3012 }, { "epoch": 0.41, "learning_rate": 1.3386718296587205e-05, "loss": 0.9109, "step": 3013 }, { "epoch": 0.41, "learning_rate": 1.338258725646841e-05, "loss": 0.9229, "step": 3014 }, { "epoch": 0.41, "learning_rate": 1.3378455564408937e-05, "loss": 0.999, "step": 3015 }, { "epoch": 0.41, "learning_rate": 1.3374323221205097e-05, "loss": 0.952, "step": 3016 }, { "epoch": 0.41, "learning_rate": 1.3370190227653339e-05, "loss": 0.9439, "step": 3017 }, { "epoch": 0.41, "learning_rate": 1.336605658455023e-05, "loss": 0.9307, "step": 3018 }, { "epoch": 0.41, "learning_rate": 1.3361922292692469e-05, "loss": 0.8464, "step": 3019 }, { "epoch": 0.41, "learning_rate": 1.3357787352876872e-05, "loss": 0.9008, "step": 3020 }, { "epoch": 0.41, "learning_rate": 1.3353651765900382e-05, "loss": 0.9245, "step": 3021 }, { "epoch": 0.41, "learning_rate": 1.3349515532560074e-05, "loss": 0.8703, "step": 3022 }, { "epoch": 0.41, "learning_rate": 1.3345378653653137e-05, "loss": 0.9066, "step": 3023 }, { "epoch": 0.41, "learning_rate": 1.3341241129976897e-05, "loss": 0.9731, "step": 3024 }, { "epoch": 0.41, "learning_rate": 1.3337102962328787e-05, "loss": 0.9109, "step": 3025 }, { "epoch": 0.41, "learning_rate": 1.3332964151506382e-05, "loss": 0.9454, "step": 3026 }, { "epoch": 0.41, "learning_rate": 1.332882469830737e-05, "loss": 0.884, "step": 3027 }, { "epoch": 0.41, "learning_rate": 1.3324684603529563e-05, "loss": 0.9848, "step": 3028 }, { "epoch": 0.41, "learning_rate": 1.3320543867970907e-05, "loss": 0.9726, "step": 3029 }, { "epoch": 0.41, "learning_rate": 1.3316402492429454e-05, "loss": 0.9734, "step": 3030 }, { "epoch": 0.41, "learning_rate": 1.3312260477703397e-05, "loss": 0.92, "step": 3031 }, { "epoch": 0.41, "learning_rate": 1.3308117824591045e-05, "loss": 0.903, "step": 3032 }, { "epoch": 0.41, "learning_rate": 1.330397453389082e-05, "loss": 0.9896, "step": 3033 }, { "epoch": 0.41, "learning_rate": 1.3299830606401285e-05, "loss": 0.8699, "step": 3034 }, { "epoch": 0.41, "learning_rate": 1.3295686042921115e-05, "loss": 0.9472, "step": 3035 }, { "epoch": 0.41, "learning_rate": 1.3291540844249108e-05, "loss": 0.9931, "step": 3036 }, { "epoch": 0.41, "learning_rate": 1.3287395011184188e-05, "loss": 0.8327, "step": 3037 }, { "epoch": 0.41, "learning_rate": 1.32832485445254e-05, "loss": 0.992, "step": 3038 }, { "epoch": 0.41, "learning_rate": 1.327910144507191e-05, "loss": 0.8624, "step": 3039 }, { "epoch": 0.41, "learning_rate": 1.3274953713623e-05, "loss": 0.9764, "step": 3040 }, { "epoch": 0.41, "learning_rate": 1.327080535097809e-05, "loss": 0.9054, "step": 3041 }, { "epoch": 0.41, "learning_rate": 1.3266656357936705e-05, "loss": 0.9229, "step": 3042 }, { "epoch": 0.41, "learning_rate": 1.3262506735298505e-05, "loss": 0.9384, "step": 3043 }, { "epoch": 0.41, "learning_rate": 1.3258356483863258e-05, "loss": 0.9374, "step": 3044 }, { "epoch": 0.41, "learning_rate": 1.3254205604430862e-05, "loss": 0.8357, "step": 3045 }, { "epoch": 0.41, "learning_rate": 1.3250054097801334e-05, "loss": 0.8558, "step": 3046 }, { "epoch": 0.41, "learning_rate": 1.3245901964774817e-05, "loss": 0.8833, "step": 3047 }, { "epoch": 0.41, "learning_rate": 1.3241749206151561e-05, "loss": 0.9322, "step": 3048 }, { "epoch": 0.41, "learning_rate": 1.323759582273195e-05, "loss": 0.9558, "step": 3049 }, { "epoch": 0.41, "learning_rate": 1.3233441815316486e-05, "loss": 0.9196, "step": 3050 }, { "epoch": 0.41, "learning_rate": 1.3229287184705782e-05, "loss": 0.9058, "step": 3051 }, { "epoch": 0.41, "learning_rate": 1.3225131931700583e-05, "loss": 0.9235, "step": 3052 }, { "epoch": 0.41, "learning_rate": 1.3220976057101749e-05, "loss": 0.8751, "step": 3053 }, { "epoch": 0.41, "learning_rate": 1.3216819561710255e-05, "loss": 0.9235, "step": 3054 }, { "epoch": 0.41, "learning_rate": 1.3212662446327204e-05, "loss": 0.9637, "step": 3055 }, { "epoch": 0.41, "learning_rate": 1.3208504711753815e-05, "loss": 0.9067, "step": 3056 }, { "epoch": 0.41, "learning_rate": 1.3204346358791426e-05, "loss": 0.9085, "step": 3057 }, { "epoch": 0.41, "learning_rate": 1.3200187388241492e-05, "loss": 0.8733, "step": 3058 }, { "epoch": 0.41, "learning_rate": 1.3196027800905596e-05, "loss": 0.9376, "step": 3059 }, { "epoch": 0.41, "learning_rate": 1.3191867597585422e-05, "loss": 0.8975, "step": 3060 }, { "epoch": 0.41, "learning_rate": 1.3187706779082796e-05, "loss": 0.8786, "step": 3061 }, { "epoch": 0.42, "learning_rate": 1.3183545346199641e-05, "loss": 0.8964, "step": 3062 }, { "epoch": 0.42, "learning_rate": 1.3179383299738016e-05, "loss": 0.8741, "step": 3063 }, { "epoch": 0.42, "learning_rate": 1.3175220640500084e-05, "loss": 0.9703, "step": 3064 }, { "epoch": 0.42, "learning_rate": 1.3171057369288134e-05, "loss": 0.8729, "step": 3065 }, { "epoch": 0.42, "learning_rate": 1.3166893486904572e-05, "loss": 0.8923, "step": 3066 }, { "epoch": 0.42, "learning_rate": 1.3162728994151923e-05, "loss": 0.9548, "step": 3067 }, { "epoch": 0.42, "learning_rate": 1.3158563891832825e-05, "loss": 0.8686, "step": 3068 }, { "epoch": 0.42, "learning_rate": 1.3154398180750038e-05, "loss": 0.9331, "step": 3069 }, { "epoch": 0.42, "learning_rate": 1.315023186170643e-05, "loss": 0.9233, "step": 3070 }, { "epoch": 0.42, "learning_rate": 1.3146064935505008e-05, "loss": 0.9078, "step": 3071 }, { "epoch": 0.42, "learning_rate": 1.314189740294887e-05, "loss": 0.9018, "step": 3072 }, { "epoch": 0.42, "learning_rate": 1.3137729264841248e-05, "loss": 0.9298, "step": 3073 }, { "epoch": 0.42, "learning_rate": 1.3133560521985485e-05, "loss": 0.9335, "step": 3074 }, { "epoch": 0.42, "learning_rate": 1.3129391175185035e-05, "loss": 0.8708, "step": 3075 }, { "epoch": 0.42, "learning_rate": 1.3125221225243483e-05, "loss": 0.9509, "step": 3076 }, { "epoch": 0.42, "learning_rate": 1.3121050672964514e-05, "loss": 0.8809, "step": 3077 }, { "epoch": 0.42, "learning_rate": 1.3116879519151944e-05, "loss": 0.9108, "step": 3078 }, { "epoch": 0.42, "learning_rate": 1.3112707764609689e-05, "loss": 0.9023, "step": 3079 }, { "epoch": 0.42, "learning_rate": 1.3108535410141795e-05, "loss": 0.9673, "step": 3080 }, { "epoch": 0.42, "learning_rate": 1.3104362456552418e-05, "loss": 0.9089, "step": 3081 }, { "epoch": 0.42, "learning_rate": 1.310018890464583e-05, "loss": 0.9253, "step": 3082 }, { "epoch": 0.42, "learning_rate": 1.3096014755226414e-05, "loss": 0.9445, "step": 3083 }, { "epoch": 0.42, "learning_rate": 1.3091840009098674e-05, "loss": 0.9249, "step": 3084 }, { "epoch": 0.42, "learning_rate": 1.3087664667067226e-05, "loss": 0.9114, "step": 3085 }, { "epoch": 0.42, "learning_rate": 1.3083488729936802e-05, "loss": 0.8944, "step": 3086 }, { "epoch": 0.42, "learning_rate": 1.3079312198512249e-05, "loss": 0.94, "step": 3087 }, { "epoch": 0.42, "learning_rate": 1.3075135073598525e-05, "loss": 0.8566, "step": 3088 }, { "epoch": 0.42, "learning_rate": 1.3070957356000716e-05, "loss": 0.8789, "step": 3089 }, { "epoch": 0.42, "learning_rate": 1.3066779046523997e-05, "loss": 0.8664, "step": 3090 }, { "epoch": 0.42, "learning_rate": 1.3062600145973678e-05, "loss": 0.8293, "step": 3091 }, { "epoch": 0.42, "learning_rate": 1.305842065515518e-05, "loss": 0.901, "step": 3092 }, { "epoch": 0.42, "learning_rate": 1.3054240574874028e-05, "loss": 0.8042, "step": 3093 }, { "epoch": 0.42, "learning_rate": 1.3050059905935876e-05, "loss": 0.8809, "step": 3094 }, { "epoch": 0.42, "learning_rate": 1.3045878649146476e-05, "loss": 0.823, "step": 3095 }, { "epoch": 0.42, "learning_rate": 1.3041696805311697e-05, "loss": 0.9279, "step": 3096 }, { "epoch": 0.42, "learning_rate": 1.3037514375237527e-05, "loss": 0.8652, "step": 3097 }, { "epoch": 0.42, "learning_rate": 1.3033331359730065e-05, "loss": 0.8817, "step": 3098 }, { "epoch": 0.42, "learning_rate": 1.3029147759595522e-05, "loss": 0.9608, "step": 3099 }, { "epoch": 0.42, "learning_rate": 1.302496357564022e-05, "loss": 0.9505, "step": 3100 }, { "epoch": 0.42, "learning_rate": 1.3020778808670595e-05, "loss": 0.9255, "step": 3101 }, { "epoch": 0.42, "learning_rate": 1.3016593459493194e-05, "loss": 0.885, "step": 3102 }, { "epoch": 0.42, "learning_rate": 1.301240752891468e-05, "loss": 0.9583, "step": 3103 }, { "epoch": 0.42, "learning_rate": 1.3008221017741826e-05, "loss": 0.9427, "step": 3104 }, { "epoch": 0.42, "learning_rate": 1.3004033926781512e-05, "loss": 0.9876, "step": 3105 }, { "epoch": 0.42, "learning_rate": 1.299984625684074e-05, "loss": 0.926, "step": 3106 }, { "epoch": 0.42, "learning_rate": 1.2995658008726611e-05, "loss": 0.9098, "step": 3107 }, { "epoch": 0.42, "learning_rate": 1.299146918324635e-05, "loss": 0.872, "step": 3108 }, { "epoch": 0.42, "learning_rate": 1.2987279781207285e-05, "loss": 0.8907, "step": 3109 }, { "epoch": 0.42, "learning_rate": 1.2983089803416857e-05, "loss": 0.8956, "step": 3110 }, { "epoch": 0.42, "learning_rate": 1.2978899250682619e-05, "loss": 0.9461, "step": 3111 }, { "epoch": 0.42, "learning_rate": 1.2974708123812239e-05, "loss": 0.8656, "step": 3112 }, { "epoch": 0.42, "learning_rate": 1.2970516423613482e-05, "loss": 0.8967, "step": 3113 }, { "epoch": 0.42, "learning_rate": 1.2966324150894238e-05, "loss": 0.9434, "step": 3114 }, { "epoch": 0.42, "learning_rate": 1.2962131306462504e-05, "loss": 0.9268, "step": 3115 }, { "epoch": 0.42, "learning_rate": 1.295793789112638e-05, "loss": 0.9122, "step": 3116 }, { "epoch": 0.42, "learning_rate": 1.2953743905694086e-05, "loss": 0.9017, "step": 3117 }, { "epoch": 0.42, "learning_rate": 1.2949549350973942e-05, "loss": 0.9341, "step": 3118 }, { "epoch": 0.42, "learning_rate": 1.2945354227774385e-05, "loss": 0.9139, "step": 3119 }, { "epoch": 0.42, "learning_rate": 1.2941158536903959e-05, "loss": 0.9048, "step": 3120 }, { "epoch": 0.42, "learning_rate": 1.2936962279171318e-05, "loss": 0.9202, "step": 3121 }, { "epoch": 0.42, "learning_rate": 1.2932765455385228e-05, "loss": 1.0392, "step": 3122 }, { "epoch": 0.42, "learning_rate": 1.2928568066354555e-05, "loss": 0.9262, "step": 3123 }, { "epoch": 0.42, "learning_rate": 1.2924370112888283e-05, "loss": 0.904, "step": 3124 }, { "epoch": 0.42, "learning_rate": 1.2920171595795504e-05, "loss": 0.9327, "step": 3125 }, { "epoch": 0.42, "learning_rate": 1.2915972515885411e-05, "loss": 0.953, "step": 3126 }, { "epoch": 0.42, "learning_rate": 1.2911772873967317e-05, "loss": 0.8285, "step": 3127 }, { "epoch": 0.42, "learning_rate": 1.2907572670850628e-05, "loss": 0.8924, "step": 3128 }, { "epoch": 0.42, "learning_rate": 1.290337190734488e-05, "loss": 0.9368, "step": 3129 }, { "epoch": 0.42, "learning_rate": 1.2899170584259693e-05, "loss": 0.9126, "step": 3130 }, { "epoch": 0.42, "learning_rate": 1.2894968702404813e-05, "loss": 0.9098, "step": 3131 }, { "epoch": 0.42, "learning_rate": 1.2890766262590082e-05, "loss": 0.9318, "step": 3132 }, { "epoch": 0.42, "learning_rate": 1.288656326562546e-05, "loss": 0.9415, "step": 3133 }, { "epoch": 0.42, "learning_rate": 1.2882359712321007e-05, "loss": 0.8996, "step": 3134 }, { "epoch": 0.42, "learning_rate": 1.2878155603486885e-05, "loss": 0.9171, "step": 3135 }, { "epoch": 0.43, "learning_rate": 1.2873950939933382e-05, "loss": 0.9309, "step": 3136 }, { "epoch": 0.43, "learning_rate": 1.2869745722470872e-05, "loss": 0.9592, "step": 3137 }, { "epoch": 0.43, "learning_rate": 1.2865539951909849e-05, "loss": 0.8091, "step": 3138 }, { "epoch": 0.43, "learning_rate": 1.2861333629060911e-05, "loss": 0.9423, "step": 3139 }, { "epoch": 0.43, "learning_rate": 1.2857126754734752e-05, "loss": 0.9902, "step": 3140 }, { "epoch": 0.43, "learning_rate": 1.285291932974219e-05, "loss": 0.9496, "step": 3141 }, { "epoch": 0.43, "learning_rate": 1.2848711354894136e-05, "loss": 0.8848, "step": 3142 }, { "epoch": 0.43, "learning_rate": 1.2844502831001615e-05, "loss": 0.8592, "step": 3143 }, { "epoch": 0.43, "learning_rate": 1.2840293758875751e-05, "loss": 0.9124, "step": 3144 }, { "epoch": 0.43, "learning_rate": 1.2836084139327775e-05, "loss": 0.8813, "step": 3145 }, { "epoch": 0.43, "learning_rate": 1.2831873973169029e-05, "loss": 0.8733, "step": 3146 }, { "epoch": 0.43, "learning_rate": 1.2827663261210956e-05, "loss": 0.9458, "step": 3147 }, { "epoch": 0.43, "learning_rate": 1.2823452004265103e-05, "loss": 0.894, "step": 3148 }, { "epoch": 0.43, "learning_rate": 1.2819240203143123e-05, "loss": 0.9331, "step": 3149 }, { "epoch": 0.43, "learning_rate": 1.2815027858656776e-05, "loss": 0.902, "step": 3150 }, { "epoch": 0.43, "learning_rate": 1.2810814971617927e-05, "loss": 0.8865, "step": 3151 }, { "epoch": 0.43, "learning_rate": 1.2806601542838543e-05, "loss": 0.9926, "step": 3152 }, { "epoch": 0.43, "learning_rate": 1.2802387573130692e-05, "loss": 0.9284, "step": 3153 }, { "epoch": 0.43, "learning_rate": 1.279817306330656e-05, "loss": 0.9005, "step": 3154 }, { "epoch": 0.43, "learning_rate": 1.279395801417842e-05, "loss": 0.8823, "step": 3155 }, { "epoch": 0.43, "learning_rate": 1.2789742426558656e-05, "loss": 1.0008, "step": 3156 }, { "epoch": 0.43, "learning_rate": 1.278552630125976e-05, "loss": 0.9266, "step": 3157 }, { "epoch": 0.43, "learning_rate": 1.2781309639094323e-05, "loss": 0.885, "step": 3158 }, { "epoch": 0.43, "learning_rate": 1.2777092440875045e-05, "loss": 0.8815, "step": 3159 }, { "epoch": 0.43, "learning_rate": 1.277287470741472e-05, "loss": 0.9034, "step": 3160 }, { "epoch": 0.43, "learning_rate": 1.2768656439526248e-05, "loss": 0.8594, "step": 3161 }, { "epoch": 0.43, "learning_rate": 1.2764437638022638e-05, "loss": 0.929, "step": 3162 }, { "epoch": 0.43, "learning_rate": 1.2760218303716995e-05, "loss": 0.8829, "step": 3163 }, { "epoch": 0.43, "learning_rate": 1.2755998437422536e-05, "loss": 0.9257, "step": 3164 }, { "epoch": 0.43, "learning_rate": 1.2751778039952564e-05, "loss": 0.9113, "step": 3165 }, { "epoch": 0.43, "learning_rate": 1.2747557112120503e-05, "loss": 0.9317, "step": 3166 }, { "epoch": 0.43, "learning_rate": 1.2743335654739866e-05, "loss": 0.8975, "step": 3167 }, { "epoch": 0.43, "learning_rate": 1.2739113668624277e-05, "loss": 0.9211, "step": 3168 }, { "epoch": 0.43, "learning_rate": 1.2734891154587454e-05, "loss": 0.8879, "step": 3169 }, { "epoch": 0.43, "learning_rate": 1.2730668113443218e-05, "loss": 0.8822, "step": 3170 }, { "epoch": 0.43, "learning_rate": 1.2726444546005501e-05, "loss": 0.8678, "step": 3171 }, { "epoch": 0.43, "learning_rate": 1.2722220453088323e-05, "loss": 0.8801, "step": 3172 }, { "epoch": 0.43, "learning_rate": 1.2717995835505817e-05, "loss": 0.9605, "step": 3173 }, { "epoch": 0.43, "learning_rate": 1.2713770694072207e-05, "loss": 0.9408, "step": 3174 }, { "epoch": 0.43, "learning_rate": 1.2709545029601827e-05, "loss": 0.8748, "step": 3175 }, { "epoch": 0.43, "learning_rate": 1.2705318842909104e-05, "loss": 0.8765, "step": 3176 }, { "epoch": 0.43, "learning_rate": 1.2701092134808572e-05, "loss": 0.919, "step": 3177 }, { "epoch": 0.43, "learning_rate": 1.2696864906114863e-05, "loss": 0.8796, "step": 3178 }, { "epoch": 0.43, "learning_rate": 1.2692637157642705e-05, "loss": 0.9097, "step": 3179 }, { "epoch": 0.43, "learning_rate": 1.2688408890206934e-05, "loss": 0.8702, "step": 3180 }, { "epoch": 0.43, "learning_rate": 1.2684180104622484e-05, "loss": 0.9291, "step": 3181 }, { "epoch": 0.43, "learning_rate": 1.267995080170438e-05, "loss": 0.8968, "step": 3182 }, { "epoch": 0.43, "learning_rate": 1.267572098226776e-05, "loss": 0.9153, "step": 3183 }, { "epoch": 0.43, "learning_rate": 1.2671490647127856e-05, "loss": 0.9548, "step": 3184 }, { "epoch": 0.43, "learning_rate": 1.2667259797099995e-05, "loss": 0.9197, "step": 3185 }, { "epoch": 0.43, "learning_rate": 1.2663028432999606e-05, "loss": 0.9046, "step": 3186 }, { "epoch": 0.43, "learning_rate": 1.2658796555642225e-05, "loss": 0.89, "step": 3187 }, { "epoch": 0.43, "learning_rate": 1.2654564165843473e-05, "loss": 0.9248, "step": 3188 }, { "epoch": 0.43, "learning_rate": 1.2650331264419083e-05, "loss": 0.8975, "step": 3189 }, { "epoch": 0.43, "learning_rate": 1.2646097852184874e-05, "loss": 0.8861, "step": 3190 }, { "epoch": 0.43, "learning_rate": 1.2641863929956772e-05, "loss": 0.9673, "step": 3191 }, { "epoch": 0.43, "learning_rate": 1.2637629498550803e-05, "loss": 0.8911, "step": 3192 }, { "epoch": 0.43, "learning_rate": 1.263339455878308e-05, "loss": 0.9537, "step": 3193 }, { "epoch": 0.43, "learning_rate": 1.2629159111469831e-05, "loss": 0.8775, "step": 3194 }, { "epoch": 0.43, "learning_rate": 1.2624923157427363e-05, "loss": 0.9034, "step": 3195 }, { "epoch": 0.43, "learning_rate": 1.2620686697472093e-05, "loss": 0.9408, "step": 3196 }, { "epoch": 0.43, "learning_rate": 1.2616449732420532e-05, "loss": 0.9019, "step": 3197 }, { "epoch": 0.43, "learning_rate": 1.2612212263089294e-05, "loss": 0.9435, "step": 3198 }, { "epoch": 0.43, "learning_rate": 1.2607974290295078e-05, "loss": 0.8241, "step": 3199 }, { "epoch": 0.43, "learning_rate": 1.2603735814854687e-05, "loss": 0.9056, "step": 3200 }, { "epoch": 0.43, "learning_rate": 1.259949683758502e-05, "loss": 0.9134, "step": 3201 }, { "epoch": 0.43, "learning_rate": 1.259525735930308e-05, "loss": 0.9304, "step": 3202 }, { "epoch": 0.43, "learning_rate": 1.2591017380825959e-05, "loss": 0.9577, "step": 3203 }, { "epoch": 0.43, "learning_rate": 1.2586776902970841e-05, "loss": 0.8883, "step": 3204 }, { "epoch": 0.43, "learning_rate": 1.258253592655501e-05, "loss": 0.9188, "step": 3205 }, { "epoch": 0.43, "learning_rate": 1.2578294452395858e-05, "loss": 0.9169, "step": 3206 }, { "epoch": 0.43, "learning_rate": 1.2574052481310854e-05, "loss": 0.9401, "step": 3207 }, { "epoch": 0.43, "learning_rate": 1.2569810014117575e-05, "loss": 0.8456, "step": 3208 }, { "epoch": 0.43, "learning_rate": 1.2565567051633685e-05, "loss": 0.9454, "step": 3209 }, { "epoch": 0.44, "learning_rate": 1.2561323594676957e-05, "loss": 0.909, "step": 3210 }, { "epoch": 0.44, "learning_rate": 1.2557079644065247e-05, "loss": 0.9422, "step": 3211 }, { "epoch": 0.44, "learning_rate": 1.2552835200616506e-05, "loss": 0.8936, "step": 3212 }, { "epoch": 0.44, "learning_rate": 1.254859026514879e-05, "loss": 0.946, "step": 3213 }, { "epoch": 0.44, "learning_rate": 1.2544344838480239e-05, "loss": 0.9437, "step": 3214 }, { "epoch": 0.44, "learning_rate": 1.2540098921429096e-05, "loss": 0.8976, "step": 3215 }, { "epoch": 0.44, "learning_rate": 1.2535852514813691e-05, "loss": 0.8671, "step": 3216 }, { "epoch": 0.44, "learning_rate": 1.2531605619452458e-05, "loss": 0.9031, "step": 3217 }, { "epoch": 0.44, "learning_rate": 1.2527358236163916e-05, "loss": 0.9051, "step": 3218 }, { "epoch": 0.44, "learning_rate": 1.252311036576668e-05, "loss": 0.8659, "step": 3219 }, { "epoch": 0.44, "learning_rate": 1.2518862009079464e-05, "loss": 0.9655, "step": 3220 }, { "epoch": 0.44, "learning_rate": 1.251461316692107e-05, "loss": 0.933, "step": 3221 }, { "epoch": 0.44, "learning_rate": 1.2510363840110396e-05, "loss": 0.9445, "step": 3222 }, { "epoch": 0.44, "learning_rate": 1.2506114029466432e-05, "loss": 0.9149, "step": 3223 }, { "epoch": 0.44, "learning_rate": 1.2501863735808267e-05, "loss": 0.9451, "step": 3224 }, { "epoch": 0.44, "learning_rate": 1.2497612959955077e-05, "loss": 0.8664, "step": 3225 }, { "epoch": 0.44, "learning_rate": 1.2493361702726126e-05, "loss": 0.9055, "step": 3226 }, { "epoch": 0.44, "learning_rate": 1.2489109964940784e-05, "loss": 0.9284, "step": 3227 }, { "epoch": 0.44, "learning_rate": 1.2484857747418504e-05, "loss": 0.9147, "step": 3228 }, { "epoch": 0.44, "learning_rate": 1.2480605050978838e-05, "loss": 0.8629, "step": 3229 }, { "epoch": 0.44, "learning_rate": 1.2476351876441419e-05, "loss": 0.9358, "step": 3230 }, { "epoch": 0.44, "learning_rate": 1.2472098224625989e-05, "loss": 0.8728, "step": 3231 }, { "epoch": 0.44, "learning_rate": 1.2467844096352366e-05, "loss": 0.9778, "step": 3232 }, { "epoch": 0.44, "learning_rate": 1.2463589492440468e-05, "loss": 0.9549, "step": 3233 }, { "epoch": 0.44, "learning_rate": 1.2459334413710306e-05, "loss": 0.9131, "step": 3234 }, { "epoch": 0.44, "learning_rate": 1.2455078860981978e-05, "loss": 0.8974, "step": 3235 }, { "epoch": 0.44, "learning_rate": 1.2450822835075672e-05, "loss": 0.9297, "step": 3236 }, { "epoch": 0.44, "learning_rate": 1.2446566336811675e-05, "loss": 0.872, "step": 3237 }, { "epoch": 0.44, "learning_rate": 1.244230936701036e-05, "loss": 0.8978, "step": 3238 }, { "epoch": 0.44, "learning_rate": 1.2438051926492184e-05, "loss": 0.9705, "step": 3239 }, { "epoch": 0.44, "learning_rate": 1.2433794016077713e-05, "loss": 0.8623, "step": 3240 }, { "epoch": 0.44, "learning_rate": 1.2429535636587587e-05, "loss": 0.9188, "step": 3241 }, { "epoch": 0.44, "learning_rate": 1.242527678884254e-05, "loss": 0.8932, "step": 3242 }, { "epoch": 0.44, "learning_rate": 1.2421017473663399e-05, "loss": 0.9046, "step": 3243 }, { "epoch": 0.44, "learning_rate": 1.2416757691871082e-05, "loss": 0.883, "step": 3244 }, { "epoch": 0.44, "learning_rate": 1.2412497444286596e-05, "loss": 0.9582, "step": 3245 }, { "epoch": 0.44, "learning_rate": 1.2408236731731036e-05, "loss": 0.9453, "step": 3246 }, { "epoch": 0.44, "learning_rate": 1.2403975555025584e-05, "loss": 0.9377, "step": 3247 }, { "epoch": 0.44, "learning_rate": 1.2399713914991522e-05, "loss": 0.8359, "step": 3248 }, { "epoch": 0.44, "learning_rate": 1.2395451812450208e-05, "loss": 0.8662, "step": 3249 }, { "epoch": 0.44, "learning_rate": 1.23911892482231e-05, "loss": 0.8852, "step": 3250 }, { "epoch": 0.44, "learning_rate": 1.2386926223131734e-05, "loss": 0.9086, "step": 3251 }, { "epoch": 0.44, "learning_rate": 1.238266273799775e-05, "loss": 0.8756, "step": 3252 }, { "epoch": 0.44, "learning_rate": 1.237839879364286e-05, "loss": 0.9305, "step": 3253 }, { "epoch": 0.44, "learning_rate": 1.2374134390888882e-05, "loss": 0.9526, "step": 3254 }, { "epoch": 0.44, "learning_rate": 1.2369869530557703e-05, "loss": 0.897, "step": 3255 }, { "epoch": 0.44, "learning_rate": 1.2365604213471312e-05, "loss": 0.9369, "step": 3256 }, { "epoch": 0.44, "learning_rate": 1.2361338440451783e-05, "loss": 0.9414, "step": 3257 }, { "epoch": 0.44, "learning_rate": 1.2357072212321272e-05, "loss": 0.8839, "step": 3258 }, { "epoch": 0.44, "learning_rate": 1.2352805529902036e-05, "loss": 0.9006, "step": 3259 }, { "epoch": 0.44, "learning_rate": 1.2348538394016403e-05, "loss": 0.9988, "step": 3260 }, { "epoch": 0.44, "learning_rate": 1.2344270805486804e-05, "loss": 0.8995, "step": 3261 }, { "epoch": 0.44, "learning_rate": 1.2340002765135741e-05, "loss": 0.8851, "step": 3262 }, { "epoch": 0.44, "learning_rate": 1.2335734273785822e-05, "loss": 0.9415, "step": 3263 }, { "epoch": 0.44, "learning_rate": 1.2331465332259724e-05, "loss": 0.9465, "step": 3264 }, { "epoch": 0.44, "learning_rate": 1.2327195941380221e-05, "loss": 0.9202, "step": 3265 }, { "epoch": 0.44, "learning_rate": 1.2322926101970171e-05, "loss": 0.9133, "step": 3266 }, { "epoch": 0.44, "learning_rate": 1.2318655814852519e-05, "loss": 0.8758, "step": 3267 }, { "epoch": 0.44, "learning_rate": 1.2314385080850297e-05, "loss": 0.9324, "step": 3268 }, { "epoch": 0.44, "learning_rate": 1.2310113900786622e-05, "loss": 0.87, "step": 3269 }, { "epoch": 0.44, "learning_rate": 1.2305842275484694e-05, "loss": 0.9004, "step": 3270 }, { "epoch": 0.44, "learning_rate": 1.2301570205767805e-05, "loss": 0.8629, "step": 3271 }, { "epoch": 0.44, "learning_rate": 1.2297297692459326e-05, "loss": 0.9421, "step": 3272 }, { "epoch": 0.44, "learning_rate": 1.2293024736382724e-05, "loss": 0.8831, "step": 3273 }, { "epoch": 0.44, "learning_rate": 1.2288751338361535e-05, "loss": 0.9564, "step": 3274 }, { "epoch": 0.44, "learning_rate": 1.2284477499219399e-05, "loss": 0.9363, "step": 3275 }, { "epoch": 0.44, "learning_rate": 1.2280203219780025e-05, "loss": 0.9098, "step": 3276 }, { "epoch": 0.44, "learning_rate": 1.2275928500867211e-05, "loss": 0.9026, "step": 3277 }, { "epoch": 0.44, "learning_rate": 1.227165334330485e-05, "loss": 0.9214, "step": 3278 }, { "epoch": 0.44, "learning_rate": 1.2267377747916907e-05, "loss": 0.9282, "step": 3279 }, { "epoch": 0.44, "learning_rate": 1.2263101715527437e-05, "loss": 0.8766, "step": 3280 }, { "epoch": 0.44, "learning_rate": 1.2258825246960577e-05, "loss": 0.9115, "step": 3281 }, { "epoch": 0.44, "learning_rate": 1.2254548343040552e-05, "loss": 0.9091, "step": 3282 }, { "epoch": 0.44, "learning_rate": 1.2250271004591663e-05, "loss": 0.9525, "step": 3283 }, { "epoch": 0.45, "learning_rate": 1.2245993232438308e-05, "loss": 0.8885, "step": 3284 }, { "epoch": 0.45, "learning_rate": 1.2241715027404952e-05, "loss": 0.9263, "step": 3285 }, { "epoch": 0.45, "learning_rate": 1.2237436390316158e-05, "loss": 0.9587, "step": 3286 }, { "epoch": 0.45, "learning_rate": 1.2233157321996565e-05, "loss": 0.8836, "step": 3287 }, { "epoch": 0.45, "learning_rate": 1.2228877823270891e-05, "loss": 0.9284, "step": 3288 }, { "epoch": 0.45, "learning_rate": 1.222459789496395e-05, "loss": 0.8929, "step": 3289 }, { "epoch": 0.45, "learning_rate": 1.222031753790063e-05, "loss": 0.9213, "step": 3290 }, { "epoch": 0.45, "learning_rate": 1.2216036752905897e-05, "loss": 0.9082, "step": 3291 }, { "epoch": 0.45, "learning_rate": 1.2211755540804813e-05, "loss": 0.9031, "step": 3292 }, { "epoch": 0.45, "learning_rate": 1.2207473902422506e-05, "loss": 0.9014, "step": 3293 }, { "epoch": 0.45, "learning_rate": 1.2203191838584203e-05, "loss": 0.9909, "step": 3294 }, { "epoch": 0.45, "learning_rate": 1.2198909350115198e-05, "loss": 0.8976, "step": 3295 }, { "epoch": 0.45, "learning_rate": 1.219462643784088e-05, "loss": 0.9092, "step": 3296 }, { "epoch": 0.45, "learning_rate": 1.2190343102586707e-05, "loss": 0.8909, "step": 3297 }, { "epoch": 0.45, "learning_rate": 1.2186059345178228e-05, "loss": 0.9073, "step": 3298 }, { "epoch": 0.45, "learning_rate": 1.2181775166441067e-05, "loss": 0.9063, "step": 3299 }, { "epoch": 0.45, "learning_rate": 1.2177490567200938e-05, "loss": 0.8936, "step": 3300 }, { "epoch": 0.45, "learning_rate": 1.2173205548283626e-05, "loss": 0.8644, "step": 3301 }, { "epoch": 0.45, "learning_rate": 1.2168920110515002e-05, "loss": 0.9504, "step": 3302 }, { "epoch": 0.45, "learning_rate": 1.2164634254721017e-05, "loss": 0.8387, "step": 3303 }, { "epoch": 0.45, "learning_rate": 1.2160347981727704e-05, "loss": 0.91, "step": 3304 }, { "epoch": 0.45, "learning_rate": 1.2156061292361174e-05, "loss": 0.8592, "step": 3305 }, { "epoch": 0.45, "learning_rate": 1.215177418744762e-05, "loss": 0.947, "step": 3306 }, { "epoch": 0.45, "learning_rate": 1.214748666781331e-05, "loss": 0.9196, "step": 3307 }, { "epoch": 0.45, "learning_rate": 1.2143198734284602e-05, "loss": 0.9313, "step": 3308 }, { "epoch": 0.45, "learning_rate": 1.2138910387687926e-05, "loss": 0.8757, "step": 3309 }, { "epoch": 0.45, "learning_rate": 1.2134621628849789e-05, "loss": 0.9585, "step": 3310 }, { "epoch": 0.45, "learning_rate": 1.2130332458596793e-05, "loss": 0.9166, "step": 3311 }, { "epoch": 0.45, "learning_rate": 1.2126042877755595e-05, "loss": 0.9151, "step": 3312 }, { "epoch": 0.45, "learning_rate": 1.2121752887152953e-05, "loss": 0.9496, "step": 3313 }, { "epoch": 0.45, "learning_rate": 1.2117462487615695e-05, "loss": 0.9056, "step": 3314 }, { "epoch": 0.45, "learning_rate": 1.2113171679970725e-05, "loss": 0.8054, "step": 3315 }, { "epoch": 0.45, "learning_rate": 1.2108880465045032e-05, "loss": 0.8887, "step": 3316 }, { "epoch": 0.45, "learning_rate": 1.210458884366568e-05, "loss": 0.9525, "step": 3317 }, { "epoch": 0.45, "learning_rate": 1.2100296816659807e-05, "loss": 0.9563, "step": 3318 }, { "epoch": 0.45, "learning_rate": 1.2096004384854642e-05, "loss": 0.8929, "step": 3319 }, { "epoch": 0.45, "learning_rate": 1.209171154907748e-05, "loss": 0.8596, "step": 3320 }, { "epoch": 0.45, "learning_rate": 1.2087418310155694e-05, "loss": 0.9341, "step": 3321 }, { "epoch": 0.45, "learning_rate": 1.2083124668916745e-05, "loss": 0.9822, "step": 3322 }, { "epoch": 0.45, "learning_rate": 1.207883062618816e-05, "loss": 0.8752, "step": 3323 }, { "epoch": 0.45, "learning_rate": 1.2074536182797551e-05, "loss": 0.9275, "step": 3324 }, { "epoch": 0.45, "learning_rate": 1.2070241339572605e-05, "loss": 0.9321, "step": 3325 }, { "epoch": 0.45, "learning_rate": 1.2065946097341086e-05, "loss": 0.9266, "step": 3326 }, { "epoch": 0.45, "learning_rate": 1.2061650456930834e-05, "loss": 0.8831, "step": 3327 }, { "epoch": 0.45, "learning_rate": 1.2057354419169763e-05, "loss": 0.9192, "step": 3328 }, { "epoch": 0.45, "learning_rate": 1.2053057984885873e-05, "loss": 0.898, "step": 3329 }, { "epoch": 0.45, "learning_rate": 1.2048761154907227e-05, "loss": 0.8484, "step": 3330 }, { "epoch": 0.45, "learning_rate": 1.2044463930061978e-05, "loss": 0.8386, "step": 3331 }, { "epoch": 0.45, "learning_rate": 1.2040166311178347e-05, "loss": 0.9052, "step": 3332 }, { "epoch": 0.45, "learning_rate": 1.2035868299084632e-05, "loss": 0.9236, "step": 3333 }, { "epoch": 0.45, "learning_rate": 1.203156989460921e-05, "loss": 0.8953, "step": 3334 }, { "epoch": 0.45, "learning_rate": 1.2027271098580527e-05, "loss": 0.836, "step": 3335 }, { "epoch": 0.45, "learning_rate": 1.2022971911827113e-05, "loss": 0.935, "step": 3336 }, { "epoch": 0.45, "learning_rate": 1.2018672335177562e-05, "loss": 0.8597, "step": 3337 }, { "epoch": 0.45, "learning_rate": 1.2014372369460559e-05, "loss": 0.8866, "step": 3338 }, { "epoch": 0.45, "learning_rate": 1.2010072015504845e-05, "loss": 0.8911, "step": 3339 }, { "epoch": 0.45, "learning_rate": 1.2005771274139257e-05, "loss": 0.9047, "step": 3340 }, { "epoch": 0.45, "learning_rate": 1.2001470146192689e-05, "loss": 0.8941, "step": 3341 }, { "epoch": 0.45, "learning_rate": 1.1997168632494111e-05, "loss": 0.9296, "step": 3342 }, { "epoch": 0.45, "learning_rate": 1.1992866733872585e-05, "loss": 0.926, "step": 3343 }, { "epoch": 0.45, "learning_rate": 1.1988564451157223e-05, "loss": 0.8999, "step": 3344 }, { "epoch": 0.45, "learning_rate": 1.1984261785177231e-05, "loss": 0.852, "step": 3345 }, { "epoch": 0.45, "learning_rate": 1.1979958736761872e-05, "loss": 0.961, "step": 3346 }, { "epoch": 0.45, "learning_rate": 1.1975655306740501e-05, "loss": 0.8933, "step": 3347 }, { "epoch": 0.45, "learning_rate": 1.1971351495942527e-05, "loss": 1.0203, "step": 3348 }, { "epoch": 0.45, "learning_rate": 1.196704730519745e-05, "loss": 0.9421, "step": 3349 }, { "epoch": 0.45, "learning_rate": 1.196274273533483e-05, "loss": 0.8747, "step": 3350 }, { "epoch": 0.45, "learning_rate": 1.1958437787184306e-05, "loss": 0.9073, "step": 3351 }, { "epoch": 0.45, "learning_rate": 1.1954132461575596e-05, "loss": 0.9912, "step": 3352 }, { "epoch": 0.45, "learning_rate": 1.1949826759338469e-05, "loss": 0.9035, "step": 3353 }, { "epoch": 0.45, "learning_rate": 1.19455206813028e-05, "loss": 0.8662, "step": 3354 }, { "epoch": 0.45, "learning_rate": 1.1941214228298508e-05, "loss": 0.863, "step": 3355 }, { "epoch": 0.45, "learning_rate": 1.1936907401155592e-05, "loss": 0.8644, "step": 3356 }, { "epoch": 0.46, "learning_rate": 1.1932600200704131e-05, "loss": 0.9372, "step": 3357 }, { "epoch": 0.46, "learning_rate": 1.1928292627774268e-05, "loss": 0.9161, "step": 3358 }, { "epoch": 0.46, "learning_rate": 1.1923984683196222e-05, "loss": 0.9044, "step": 3359 }, { "epoch": 0.46, "learning_rate": 1.191967636780028e-05, "loss": 0.8812, "step": 3360 }, { "epoch": 0.46, "learning_rate": 1.1915367682416801e-05, "loss": 0.9383, "step": 3361 }, { "epoch": 0.46, "learning_rate": 1.1911058627876222e-05, "loss": 0.8989, "step": 3362 }, { "epoch": 0.46, "learning_rate": 1.1906749205009036e-05, "loss": 0.9198, "step": 3363 }, { "epoch": 0.46, "learning_rate": 1.1902439414645828e-05, "loss": 0.8973, "step": 3364 }, { "epoch": 0.46, "learning_rate": 1.1898129257617234e-05, "loss": 0.8483, "step": 3365 }, { "epoch": 0.46, "learning_rate": 1.1893818734753975e-05, "loss": 0.9152, "step": 3366 }, { "epoch": 0.46, "learning_rate": 1.1889507846886832e-05, "loss": 0.8998, "step": 3367 }, { "epoch": 0.46, "learning_rate": 1.1885196594846666e-05, "loss": 0.8057, "step": 3368 }, { "epoch": 0.46, "learning_rate": 1.1880884979464398e-05, "loss": 0.8681, "step": 3369 }, { "epoch": 0.46, "learning_rate": 1.1876573001571028e-05, "loss": 0.8571, "step": 3370 }, { "epoch": 0.46, "learning_rate": 1.1872260661997623e-05, "loss": 0.928, "step": 3371 }, { "epoch": 0.46, "learning_rate": 1.1867947961575313e-05, "loss": 0.8981, "step": 3372 }, { "epoch": 0.46, "learning_rate": 1.186363490113531e-05, "loss": 0.8742, "step": 3373 }, { "epoch": 0.46, "learning_rate": 1.1859321481508885e-05, "loss": 1.0032, "step": 3374 }, { "epoch": 0.46, "learning_rate": 1.1855007703527382e-05, "loss": 0.9174, "step": 3375 }, { "epoch": 0.46, "learning_rate": 1.1850693568022218e-05, "loss": 0.9407, "step": 3376 }, { "epoch": 0.46, "learning_rate": 1.184637907582487e-05, "loss": 0.9175, "step": 3377 }, { "epoch": 0.46, "learning_rate": 1.1842064227766891e-05, "loss": 0.9233, "step": 3378 }, { "epoch": 0.46, "learning_rate": 1.1837749024679902e-05, "loss": 0.8898, "step": 3379 }, { "epoch": 0.46, "learning_rate": 1.183343346739559e-05, "loss": 0.931, "step": 3380 }, { "epoch": 0.46, "learning_rate": 1.1829117556745706e-05, "loss": 0.8843, "step": 3381 }, { "epoch": 0.46, "learning_rate": 1.1824801293562082e-05, "loss": 0.9088, "step": 3382 }, { "epoch": 0.46, "learning_rate": 1.1820484678676607e-05, "loss": 0.8611, "step": 3383 }, { "epoch": 0.46, "learning_rate": 1.1816167712921237e-05, "loss": 0.8632, "step": 3384 }, { "epoch": 0.46, "learning_rate": 1.1811850397128007e-05, "loss": 0.9619, "step": 3385 }, { "epoch": 0.46, "learning_rate": 1.1807532732129004e-05, "loss": 1.0046, "step": 3386 }, { "epoch": 0.46, "learning_rate": 1.1803214718756395e-05, "loss": 0.9396, "step": 3387 }, { "epoch": 0.46, "learning_rate": 1.1798896357842406e-05, "loss": 0.8887, "step": 3388 }, { "epoch": 0.46, "learning_rate": 1.179457765021934e-05, "loss": 0.9173, "step": 3389 }, { "epoch": 0.46, "learning_rate": 1.1790258596719553e-05, "loss": 0.9587, "step": 3390 }, { "epoch": 0.46, "learning_rate": 1.1785939198175481e-05, "loss": 0.9395, "step": 3391 }, { "epoch": 0.46, "learning_rate": 1.1781619455419615e-05, "loss": 0.8719, "step": 3392 }, { "epoch": 0.46, "learning_rate": 1.177729936928452e-05, "loss": 0.8534, "step": 3393 }, { "epoch": 0.46, "learning_rate": 1.1772978940602826e-05, "loss": 0.8216, "step": 3394 }, { "epoch": 0.46, "learning_rate": 1.1768658170207225e-05, "loss": 0.8952, "step": 3395 }, { "epoch": 0.46, "learning_rate": 1.1764337058930482e-05, "loss": 0.9584, "step": 3396 }, { "epoch": 0.46, "learning_rate": 1.1760015607605417e-05, "loss": 0.9654, "step": 3397 }, { "epoch": 0.46, "learning_rate": 1.1755693817064927e-05, "loss": 0.873, "step": 3398 }, { "epoch": 0.46, "learning_rate": 1.1751371688141973e-05, "loss": 0.8753, "step": 3399 }, { "epoch": 0.46, "learning_rate": 1.174704922166957e-05, "loss": 0.9378, "step": 3400 }, { "epoch": 0.46, "learning_rate": 1.1742726418480808e-05, "loss": 0.8972, "step": 3401 }, { "epoch": 0.46, "learning_rate": 1.1738403279408841e-05, "loss": 0.8924, "step": 3402 }, { "epoch": 0.46, "learning_rate": 1.1734079805286887e-05, "loss": 0.8818, "step": 3403 }, { "epoch": 0.46, "learning_rate": 1.1729755996948224e-05, "loss": 0.8819, "step": 3404 }, { "epoch": 0.46, "learning_rate": 1.1725431855226203e-05, "loss": 0.9244, "step": 3405 }, { "epoch": 0.46, "learning_rate": 1.1721107380954233e-05, "loss": 0.8588, "step": 3406 }, { "epoch": 0.46, "learning_rate": 1.1716782574965783e-05, "loss": 0.855, "step": 3407 }, { "epoch": 0.46, "learning_rate": 1.1712457438094403e-05, "loss": 0.923, "step": 3408 }, { "epoch": 0.46, "learning_rate": 1.1708131971173685e-05, "loss": 0.9444, "step": 3409 }, { "epoch": 0.46, "learning_rate": 1.17038061750373e-05, "loss": 0.8754, "step": 3410 }, { "epoch": 0.46, "learning_rate": 1.1699480050518974e-05, "loss": 0.9004, "step": 3411 }, { "epoch": 0.46, "learning_rate": 1.1695153598452507e-05, "loss": 0.8649, "step": 3412 }, { "epoch": 0.46, "learning_rate": 1.1690826819671748e-05, "loss": 0.9027, "step": 3413 }, { "epoch": 0.46, "learning_rate": 1.1686499715010616e-05, "loss": 0.9222, "step": 3414 }, { "epoch": 0.46, "learning_rate": 1.1682172285303095e-05, "loss": 0.8741, "step": 3415 }, { "epoch": 0.46, "learning_rate": 1.1677844531383227e-05, "loss": 0.9416, "step": 3416 }, { "epoch": 0.46, "learning_rate": 1.1673516454085123e-05, "loss": 0.8261, "step": 3417 }, { "epoch": 0.46, "learning_rate": 1.1669188054242945e-05, "loss": 0.8749, "step": 3418 }, { "epoch": 0.46, "learning_rate": 1.1664859332690932e-05, "loss": 0.9575, "step": 3419 }, { "epoch": 0.46, "learning_rate": 1.1660530290263375e-05, "loss": 0.9123, "step": 3420 }, { "epoch": 0.46, "learning_rate": 1.1656200927794624e-05, "loss": 0.9829, "step": 3421 }, { "epoch": 0.46, "learning_rate": 1.1651871246119102e-05, "loss": 0.851, "step": 3422 }, { "epoch": 0.46, "learning_rate": 1.1647541246071283e-05, "loss": 0.9433, "step": 3423 }, { "epoch": 0.46, "learning_rate": 1.1643210928485714e-05, "loss": 0.9384, "step": 3424 }, { "epoch": 0.46, "learning_rate": 1.1638880294196984e-05, "loss": 0.9167, "step": 3425 }, { "epoch": 0.46, "learning_rate": 1.1634549344039764e-05, "loss": 0.8938, "step": 3426 }, { "epoch": 0.46, "learning_rate": 1.1630218078848776e-05, "loss": 0.8693, "step": 3427 }, { "epoch": 0.46, "learning_rate": 1.1625886499458798e-05, "loss": 0.8462, "step": 3428 }, { "epoch": 0.46, "learning_rate": 1.1621554606704682e-05, "loss": 0.9171, "step": 3429 }, { "epoch": 0.46, "learning_rate": 1.1617222401421324e-05, "loss": 0.8837, "step": 3430 }, { "epoch": 0.47, "learning_rate": 1.1612889884443694e-05, "loss": 0.909, "step": 3431 }, { "epoch": 0.47, "learning_rate": 1.1608557056606815e-05, "loss": 0.8873, "step": 3432 }, { "epoch": 0.47, "learning_rate": 1.1604223918745775e-05, "loss": 0.9609, "step": 3433 }, { "epoch": 0.47, "learning_rate": 1.1599890471695711e-05, "loss": 0.8746, "step": 3434 }, { "epoch": 0.47, "learning_rate": 1.1595556716291836e-05, "loss": 0.94, "step": 3435 }, { "epoch": 0.47, "learning_rate": 1.1591222653369408e-05, "loss": 0.9138, "step": 3436 }, { "epoch": 0.47, "learning_rate": 1.1586888283763748e-05, "loss": 0.9684, "step": 3437 }, { "epoch": 0.47, "learning_rate": 1.1582553608310243e-05, "loss": 0.9333, "step": 3438 }, { "epoch": 0.47, "learning_rate": 1.1578218627844329e-05, "loss": 0.9171, "step": 3439 }, { "epoch": 0.47, "learning_rate": 1.157388334320151e-05, "loss": 0.9618, "step": 3440 }, { "epoch": 0.47, "learning_rate": 1.156954775521734e-05, "loss": 0.8898, "step": 3441 }, { "epoch": 0.47, "learning_rate": 1.156521186472744e-05, "loss": 0.8892, "step": 3442 }, { "epoch": 0.47, "learning_rate": 1.1560875672567482e-05, "loss": 0.8968, "step": 3443 }, { "epoch": 0.47, "learning_rate": 1.15565391795732e-05, "loss": 0.9177, "step": 3444 }, { "epoch": 0.47, "learning_rate": 1.1552202386580382e-05, "loss": 0.8428, "step": 3445 }, { "epoch": 0.47, "learning_rate": 1.154786529442488e-05, "loss": 0.8822, "step": 3446 }, { "epoch": 0.47, "learning_rate": 1.1543527903942603e-05, "loss": 0.931, "step": 3447 }, { "epoch": 0.47, "learning_rate": 1.1539190215969514e-05, "loss": 0.9716, "step": 3448 }, { "epoch": 0.47, "learning_rate": 1.1534852231341627e-05, "loss": 0.9388, "step": 3449 }, { "epoch": 0.47, "learning_rate": 1.1530513950895031e-05, "loss": 0.8606, "step": 3450 }, { "epoch": 0.47, "learning_rate": 1.1526175375465853e-05, "loss": 0.8785, "step": 3451 }, { "epoch": 0.47, "learning_rate": 1.1521836505890291e-05, "loss": 0.9139, "step": 3452 }, { "epoch": 0.47, "learning_rate": 1.151749734300459e-05, "loss": 0.8646, "step": 3453 }, { "epoch": 0.47, "learning_rate": 1.1513157887645061e-05, "loss": 0.8745, "step": 3454 }, { "epoch": 0.47, "learning_rate": 1.150881814064806e-05, "loss": 0.8726, "step": 3455 }, { "epoch": 0.47, "learning_rate": 1.1504478102850011e-05, "loss": 0.7996, "step": 3456 }, { "epoch": 0.47, "learning_rate": 1.1500137775087388e-05, "loss": 0.8765, "step": 3457 }, { "epoch": 0.47, "learning_rate": 1.1495797158196713e-05, "loss": 0.9003, "step": 3458 }, { "epoch": 0.47, "learning_rate": 1.1491456253014579e-05, "loss": 0.9495, "step": 3459 }, { "epoch": 0.47, "learning_rate": 1.1487115060377625e-05, "loss": 0.9113, "step": 3460 }, { "epoch": 0.47, "learning_rate": 1.148277358112255e-05, "loss": 0.9078, "step": 3461 }, { "epoch": 0.47, "learning_rate": 1.1478431816086104e-05, "loss": 0.9116, "step": 3462 }, { "epoch": 0.47, "learning_rate": 1.1474089766105094e-05, "loss": 0.8424, "step": 3463 }, { "epoch": 0.47, "learning_rate": 1.1469747432016386e-05, "loss": 0.9092, "step": 3464 }, { "epoch": 0.47, "learning_rate": 1.1465404814656893e-05, "loss": 0.8853, "step": 3465 }, { "epoch": 0.47, "learning_rate": 1.1461061914863587e-05, "loss": 0.8779, "step": 3466 }, { "epoch": 0.47, "learning_rate": 1.1456718733473492e-05, "loss": 0.9794, "step": 3467 }, { "epoch": 0.47, "learning_rate": 1.1452375271323695e-05, "loss": 0.8768, "step": 3468 }, { "epoch": 0.47, "learning_rate": 1.1448031529251325e-05, "loss": 0.9064, "step": 3469 }, { "epoch": 0.47, "learning_rate": 1.1443687508093567e-05, "loss": 0.911, "step": 3470 }, { "epoch": 0.47, "learning_rate": 1.143934320868767e-05, "loss": 0.8662, "step": 3471 }, { "epoch": 0.47, "learning_rate": 1.1434998631870923e-05, "loss": 0.9485, "step": 3472 }, { "epoch": 0.47, "learning_rate": 1.1430653778480682e-05, "loss": 0.8511, "step": 3473 }, { "epoch": 0.47, "learning_rate": 1.1426308649354346e-05, "loss": 0.8415, "step": 3474 }, { "epoch": 0.47, "learning_rate": 1.1421963245329368e-05, "loss": 0.8807, "step": 3475 }, { "epoch": 0.47, "learning_rate": 1.141761756724326e-05, "loss": 0.8757, "step": 3476 }, { "epoch": 0.47, "learning_rate": 1.1413271615933582e-05, "loss": 0.8854, "step": 3477 }, { "epoch": 0.47, "learning_rate": 1.1408925392237953e-05, "loss": 0.8723, "step": 3478 }, { "epoch": 0.47, "learning_rate": 1.140457889699403e-05, "loss": 0.8652, "step": 3479 }, { "epoch": 0.47, "learning_rate": 1.140023213103954e-05, "loss": 0.8907, "step": 3480 }, { "epoch": 0.47, "learning_rate": 1.1395885095212247e-05, "loss": 0.976, "step": 3481 }, { "epoch": 0.47, "learning_rate": 1.1391537790349977e-05, "loss": 1.0021, "step": 3482 }, { "epoch": 0.47, "learning_rate": 1.138719021729061e-05, "loss": 0.9294, "step": 3483 }, { "epoch": 0.47, "learning_rate": 1.1382842376872065e-05, "loss": 0.9645, "step": 3484 }, { "epoch": 0.47, "learning_rate": 1.1378494269932326e-05, "loss": 0.8697, "step": 3485 }, { "epoch": 0.47, "learning_rate": 1.1374145897309416e-05, "loss": 0.8856, "step": 3486 }, { "epoch": 0.47, "learning_rate": 1.1369797259841423e-05, "loss": 0.8558, "step": 3487 }, { "epoch": 0.47, "learning_rate": 1.1365448358366473e-05, "loss": 0.8527, "step": 3488 }, { "epoch": 0.47, "learning_rate": 1.1361099193722753e-05, "loss": 0.9936, "step": 3489 }, { "epoch": 0.47, "learning_rate": 1.1356749766748491e-05, "loss": 0.9797, "step": 3490 }, { "epoch": 0.47, "learning_rate": 1.1352400078281977e-05, "loss": 0.8617, "step": 3491 }, { "epoch": 0.47, "learning_rate": 1.1348050129161542e-05, "loss": 0.8886, "step": 3492 }, { "epoch": 0.47, "learning_rate": 1.1343699920225571e-05, "loss": 0.866, "step": 3493 }, { "epoch": 0.47, "learning_rate": 1.1339349452312498e-05, "loss": 0.8669, "step": 3494 }, { "epoch": 0.47, "learning_rate": 1.1334998726260806e-05, "loss": 0.9731, "step": 3495 }, { "epoch": 0.47, "learning_rate": 1.1330647742909035e-05, "loss": 0.9129, "step": 3496 }, { "epoch": 0.47, "learning_rate": 1.1326296503095762e-05, "loss": 0.8536, "step": 3497 }, { "epoch": 0.47, "learning_rate": 1.1321945007659625e-05, "loss": 0.9204, "step": 3498 }, { "epoch": 0.47, "learning_rate": 1.1317593257439305e-05, "loss": 0.8617, "step": 3499 }, { "epoch": 0.47, "learning_rate": 1.131324125327353e-05, "loss": 0.9279, "step": 3500 }, { "epoch": 0.47, "learning_rate": 1.1308888996001089e-05, "loss": 0.8691, "step": 3501 }, { "epoch": 0.47, "learning_rate": 1.1304536486460805e-05, "loss": 0.8628, "step": 3502 }, { "epoch": 0.47, "learning_rate": 1.1300183725491555e-05, "loss": 0.8832, "step": 3503 }, { "epoch": 0.47, "learning_rate": 1.129583071393227e-05, "loss": 0.7868, "step": 3504 }, { "epoch": 0.48, "learning_rate": 1.1291477452621924e-05, "loss": 0.8536, "step": 3505 }, { "epoch": 0.48, "learning_rate": 1.1287123942399537e-05, "loss": 0.8995, "step": 3506 }, { "epoch": 0.48, "learning_rate": 1.128277018410418e-05, "loss": 0.8876, "step": 3507 }, { "epoch": 0.48, "learning_rate": 1.1278416178574976e-05, "loss": 0.9514, "step": 3508 }, { "epoch": 0.48, "learning_rate": 1.1274061926651086e-05, "loss": 0.8809, "step": 3509 }, { "epoch": 0.48, "learning_rate": 1.1269707429171727e-05, "loss": 0.9274, "step": 3510 }, { "epoch": 0.48, "learning_rate": 1.1265352686976161e-05, "loss": 0.9514, "step": 3511 }, { "epoch": 0.48, "learning_rate": 1.1260997700903695e-05, "loss": 0.9206, "step": 3512 }, { "epoch": 0.48, "learning_rate": 1.1256642471793684e-05, "loss": 0.9308, "step": 3513 }, { "epoch": 0.48, "learning_rate": 1.125228700048553e-05, "loss": 0.9385, "step": 3514 }, { "epoch": 0.48, "learning_rate": 1.1247931287818681e-05, "loss": 0.9801, "step": 3515 }, { "epoch": 0.48, "learning_rate": 1.1243575334632633e-05, "loss": 0.8995, "step": 3516 }, { "epoch": 0.48, "learning_rate": 1.1239219141766931e-05, "loss": 0.9329, "step": 3517 }, { "epoch": 0.48, "learning_rate": 1.1234862710061156e-05, "loss": 0.9148, "step": 3518 }, { "epoch": 0.48, "learning_rate": 1.1230506040354952e-05, "loss": 0.9232, "step": 3519 }, { "epoch": 0.48, "learning_rate": 1.1226149133487986e-05, "loss": 0.903, "step": 3520 }, { "epoch": 0.48, "learning_rate": 1.1221791990299995e-05, "loss": 0.9607, "step": 3521 }, { "epoch": 0.48, "learning_rate": 1.1217434611630746e-05, "loss": 0.9234, "step": 3522 }, { "epoch": 0.48, "learning_rate": 1.1213076998320052e-05, "loss": 0.9239, "step": 3523 }, { "epoch": 0.48, "learning_rate": 1.1208719151207779e-05, "loss": 0.9023, "step": 3524 }, { "epoch": 0.48, "learning_rate": 1.1204361071133831e-05, "loss": 0.7977, "step": 3525 }, { "epoch": 0.48, "learning_rate": 1.1200002758938161e-05, "loss": 0.9393, "step": 3526 }, { "epoch": 0.48, "learning_rate": 1.1195644215460766e-05, "loss": 0.9389, "step": 3527 }, { "epoch": 0.48, "learning_rate": 1.1191285441541687e-05, "loss": 0.9048, "step": 3528 }, { "epoch": 0.48, "learning_rate": 1.1186926438021006e-05, "loss": 0.9545, "step": 3529 }, { "epoch": 0.48, "learning_rate": 1.1182567205738856e-05, "loss": 0.9262, "step": 3530 }, { "epoch": 0.48, "learning_rate": 1.1178207745535415e-05, "loss": 0.9304, "step": 3531 }, { "epoch": 0.48, "learning_rate": 1.1173848058250889e-05, "loss": 0.9218, "step": 3532 }, { "epoch": 0.48, "learning_rate": 1.116948814472555e-05, "loss": 0.8926, "step": 3533 }, { "epoch": 0.48, "learning_rate": 1.1165128005799696e-05, "loss": 0.9181, "step": 3534 }, { "epoch": 0.48, "learning_rate": 1.1160767642313681e-05, "loss": 0.9376, "step": 3535 }, { "epoch": 0.48, "learning_rate": 1.1156407055107894e-05, "loss": 0.8918, "step": 3536 }, { "epoch": 0.48, "learning_rate": 1.1152046245022767e-05, "loss": 0.8175, "step": 3537 }, { "epoch": 0.48, "learning_rate": 1.1147685212898784e-05, "loss": 0.9291, "step": 3538 }, { "epoch": 0.48, "learning_rate": 1.114332395957646e-05, "loss": 0.9615, "step": 3539 }, { "epoch": 0.48, "learning_rate": 1.1138962485896363e-05, "loss": 0.9382, "step": 3540 }, { "epoch": 0.48, "learning_rate": 1.1134600792699092e-05, "loss": 0.9375, "step": 3541 }, { "epoch": 0.48, "learning_rate": 1.1130238880825306e-05, "loss": 0.9134, "step": 3542 }, { "epoch": 0.48, "learning_rate": 1.1125876751115686e-05, "loss": 0.9437, "step": 3543 }, { "epoch": 0.48, "learning_rate": 1.1121514404410965e-05, "loss": 0.8966, "step": 3544 }, { "epoch": 0.48, "learning_rate": 1.111715184155192e-05, "loss": 0.862, "step": 3545 }, { "epoch": 0.48, "learning_rate": 1.1112789063379362e-05, "loss": 0.9858, "step": 3546 }, { "epoch": 0.48, "learning_rate": 1.1108426070734156e-05, "loss": 0.918, "step": 3547 }, { "epoch": 0.48, "learning_rate": 1.110406286445719e-05, "loss": 0.7712, "step": 3548 }, { "epoch": 0.48, "learning_rate": 1.1099699445389416e-05, "loss": 0.9658, "step": 3549 }, { "epoch": 0.48, "learning_rate": 1.1095335814371803e-05, "loss": 0.7633, "step": 3550 }, { "epoch": 0.48, "learning_rate": 1.109097197224538e-05, "loss": 0.8942, "step": 3551 }, { "epoch": 0.48, "learning_rate": 1.1086607919851205e-05, "loss": 0.9029, "step": 3552 }, { "epoch": 0.48, "learning_rate": 1.1082243658030382e-05, "loss": 0.9024, "step": 3553 }, { "epoch": 0.48, "learning_rate": 1.107787918762406e-05, "loss": 0.8928, "step": 3554 }, { "epoch": 0.48, "learning_rate": 1.107351450947341e-05, "loss": 0.8859, "step": 3555 }, { "epoch": 0.48, "learning_rate": 1.1069149624419666e-05, "loss": 0.9035, "step": 3556 }, { "epoch": 0.48, "learning_rate": 1.1064784533304087e-05, "loss": 0.8047, "step": 3557 }, { "epoch": 0.48, "learning_rate": 1.1060419236967974e-05, "loss": 0.9298, "step": 3558 }, { "epoch": 0.48, "learning_rate": 1.1056053736252675e-05, "loss": 0.8997, "step": 3559 }, { "epoch": 0.48, "learning_rate": 1.1051688031999565e-05, "loss": 0.9021, "step": 3560 }, { "epoch": 0.48, "learning_rate": 1.1047322125050071e-05, "loss": 0.9687, "step": 3561 }, { "epoch": 0.48, "learning_rate": 1.104295601624565e-05, "loss": 0.8817, "step": 3562 }, { "epoch": 0.48, "learning_rate": 1.1038589706427802e-05, "loss": 0.9124, "step": 3563 }, { "epoch": 0.48, "learning_rate": 1.1034223196438065e-05, "loss": 0.8849, "step": 3564 }, { "epoch": 0.48, "learning_rate": 1.1029856487118013e-05, "loss": 0.8347, "step": 3565 }, { "epoch": 0.48, "learning_rate": 1.1025489579309265e-05, "loss": 0.8629, "step": 3566 }, { "epoch": 0.48, "learning_rate": 1.1021122473853469e-05, "loss": 0.9009, "step": 3567 }, { "epoch": 0.48, "learning_rate": 1.1016755171592322e-05, "loss": 0.9068, "step": 3568 }, { "epoch": 0.48, "learning_rate": 1.1012387673367547e-05, "loss": 0.9387, "step": 3569 }, { "epoch": 0.48, "learning_rate": 1.1008019980020917e-05, "loss": 0.8551, "step": 3570 }, { "epoch": 0.48, "learning_rate": 1.1003652092394228e-05, "loss": 0.914, "step": 3571 }, { "epoch": 0.48, "learning_rate": 1.099928401132933e-05, "loss": 0.8923, "step": 3572 }, { "epoch": 0.48, "learning_rate": 1.0994915737668102e-05, "loss": 0.9094, "step": 3573 }, { "epoch": 0.48, "learning_rate": 1.0990547272252454e-05, "loss": 0.8715, "step": 3574 }, { "epoch": 0.48, "learning_rate": 1.0986178615924346e-05, "loss": 0.8527, "step": 3575 }, { "epoch": 0.48, "learning_rate": 1.098180976952576e-05, "loss": 0.9076, "step": 3576 }, { "epoch": 0.48, "learning_rate": 1.0977440733898733e-05, "loss": 1.0369, "step": 3577 }, { "epoch": 0.48, "learning_rate": 1.097307150988532e-05, "loss": 0.9107, "step": 3578 }, { "epoch": 0.49, "learning_rate": 1.0968702098327624e-05, "loss": 0.9008, "step": 3579 }, { "epoch": 0.49, "learning_rate": 1.096433250006778e-05, "loss": 0.8891, "step": 3580 }, { "epoch": 0.49, "learning_rate": 1.0959962715947956e-05, "loss": 0.9069, "step": 3581 }, { "epoch": 0.49, "learning_rate": 1.0955592746810366e-05, "loss": 0.9079, "step": 3582 }, { "epoch": 0.49, "learning_rate": 1.0951222593497248e-05, "loss": 0.89, "step": 3583 }, { "epoch": 0.49, "learning_rate": 1.0946852256850887e-05, "loss": 0.9605, "step": 3584 }, { "epoch": 0.49, "learning_rate": 1.0942481737713588e-05, "loss": 0.9066, "step": 3585 }, { "epoch": 0.49, "learning_rate": 1.0938111036927705e-05, "loss": 0.9258, "step": 3586 }, { "epoch": 0.49, "learning_rate": 1.0933740155335622e-05, "loss": 0.9225, "step": 3587 }, { "epoch": 0.49, "learning_rate": 1.0929369093779755e-05, "loss": 0.8652, "step": 3588 }, { "epoch": 0.49, "learning_rate": 1.0924997853102563e-05, "loss": 0.909, "step": 3589 }, { "epoch": 0.49, "learning_rate": 1.0920626434146528e-05, "loss": 0.9842, "step": 3590 }, { "epoch": 0.49, "learning_rate": 1.091625483775418e-05, "loss": 0.8847, "step": 3591 }, { "epoch": 0.49, "learning_rate": 1.0911883064768068e-05, "loss": 0.9357, "step": 3592 }, { "epoch": 0.49, "learning_rate": 1.0907511116030785e-05, "loss": 0.8865, "step": 3593 }, { "epoch": 0.49, "learning_rate": 1.0903138992384961e-05, "loss": 0.9076, "step": 3594 }, { "epoch": 0.49, "learning_rate": 1.0898766694673247e-05, "loss": 0.9076, "step": 3595 }, { "epoch": 0.49, "learning_rate": 1.0894394223738338e-05, "loss": 0.9541, "step": 3596 }, { "epoch": 0.49, "learning_rate": 1.0890021580422957e-05, "loss": 0.8933, "step": 3597 }, { "epoch": 0.49, "learning_rate": 1.0885648765569868e-05, "loss": 0.8675, "step": 3598 }, { "epoch": 0.49, "learning_rate": 1.0881275780021859e-05, "loss": 0.9136, "step": 3599 }, { "epoch": 0.49, "learning_rate": 1.0876902624621753e-05, "loss": 0.9342, "step": 3600 }, { "epoch": 0.49, "learning_rate": 1.087252930021241e-05, "loss": 0.9058, "step": 3601 }, { "epoch": 0.49, "learning_rate": 1.0868155807636715e-05, "loss": 0.8735, "step": 3602 }, { "epoch": 0.49, "learning_rate": 1.0863782147737598e-05, "loss": 0.8687, "step": 3603 }, { "epoch": 0.49, "learning_rate": 1.0859408321358005e-05, "loss": 0.9111, "step": 3604 }, { "epoch": 0.49, "learning_rate": 1.085503432934093e-05, "loss": 0.9387, "step": 3605 }, { "epoch": 0.49, "learning_rate": 1.0850660172529383e-05, "loss": 0.929, "step": 3606 }, { "epoch": 0.49, "learning_rate": 1.0846285851766425e-05, "loss": 0.863, "step": 3607 }, { "epoch": 0.49, "learning_rate": 1.084191136789513e-05, "loss": 0.887, "step": 3608 }, { "epoch": 0.49, "learning_rate": 1.083753672175861e-05, "loss": 0.8911, "step": 3609 }, { "epoch": 0.49, "learning_rate": 1.0833161914200017e-05, "loss": 0.8895, "step": 3610 }, { "epoch": 0.49, "learning_rate": 1.0828786946062517e-05, "loss": 0.8863, "step": 3611 }, { "epoch": 0.49, "learning_rate": 1.0824411818189327e-05, "loss": 0.8585, "step": 3612 }, { "epoch": 0.49, "learning_rate": 1.0820036531423675e-05, "loss": 0.8451, "step": 3613 }, { "epoch": 0.49, "learning_rate": 1.0815661086608835e-05, "loss": 0.8672, "step": 3614 }, { "epoch": 0.49, "learning_rate": 1.0811285484588101e-05, "loss": 0.9544, "step": 3615 }, { "epoch": 0.49, "learning_rate": 1.0806909726204805e-05, "loss": 0.9583, "step": 3616 }, { "epoch": 0.49, "learning_rate": 1.0802533812302305e-05, "loss": 0.893, "step": 3617 }, { "epoch": 0.49, "learning_rate": 1.079815774372399e-05, "loss": 0.8516, "step": 3618 }, { "epoch": 0.49, "learning_rate": 1.079378152131328e-05, "loss": 0.9028, "step": 3619 }, { "epoch": 0.49, "learning_rate": 1.078940514591362e-05, "loss": 0.9545, "step": 3620 }, { "epoch": 0.49, "learning_rate": 1.078502861836849e-05, "loss": 0.9375, "step": 3621 }, { "epoch": 0.49, "learning_rate": 1.0780651939521396e-05, "loss": 0.8647, "step": 3622 }, { "epoch": 0.49, "learning_rate": 1.0776275110215875e-05, "loss": 0.8763, "step": 3623 }, { "epoch": 0.49, "learning_rate": 1.0771898131295493e-05, "loss": 0.8202, "step": 3624 }, { "epoch": 0.49, "learning_rate": 1.076752100360384e-05, "loss": 0.8509, "step": 3625 }, { "epoch": 0.49, "learning_rate": 1.0763143727984546e-05, "loss": 0.7827, "step": 3626 }, { "epoch": 0.49, "learning_rate": 1.0758766305281257e-05, "loss": 0.9213, "step": 3627 }, { "epoch": 0.49, "learning_rate": 1.0754388736337652e-05, "loss": 0.9063, "step": 3628 }, { "epoch": 0.49, "learning_rate": 1.0750011021997444e-05, "loss": 0.8269, "step": 3629 }, { "epoch": 0.49, "learning_rate": 1.074563316310436e-05, "loss": 0.8739, "step": 3630 }, { "epoch": 0.49, "learning_rate": 1.0741255160502176e-05, "loss": 0.8634, "step": 3631 }, { "epoch": 0.49, "learning_rate": 1.073687701503467e-05, "loss": 0.8495, "step": 3632 }, { "epoch": 0.49, "learning_rate": 1.0732498727545672e-05, "loss": 0.8682, "step": 3633 }, { "epoch": 0.49, "learning_rate": 1.072812029887902e-05, "loss": 0.8952, "step": 3634 }, { "epoch": 0.49, "learning_rate": 1.0723741729878596e-05, "loss": 0.9379, "step": 3635 }, { "epoch": 0.49, "learning_rate": 1.0719363021388292e-05, "loss": 0.855, "step": 3636 }, { "epoch": 0.49, "learning_rate": 1.071498417425204e-05, "loss": 0.9102, "step": 3637 }, { "epoch": 0.49, "learning_rate": 1.0710605189313794e-05, "loss": 0.8978, "step": 3638 }, { "epoch": 0.49, "learning_rate": 1.0706226067417533e-05, "loss": 0.9721, "step": 3639 }, { "epoch": 0.49, "learning_rate": 1.0701846809407268e-05, "loss": 0.9423, "step": 3640 }, { "epoch": 0.49, "learning_rate": 1.0697467416127028e-05, "loss": 0.8915, "step": 3641 }, { "epoch": 0.49, "learning_rate": 1.0693087888420875e-05, "loss": 0.8674, "step": 3642 }, { "epoch": 0.49, "learning_rate": 1.0688708227132891e-05, "loss": 0.9287, "step": 3643 }, { "epoch": 0.49, "learning_rate": 1.0684328433107192e-05, "loss": 0.8997, "step": 3644 }, { "epoch": 0.49, "learning_rate": 1.0679948507187912e-05, "loss": 0.8662, "step": 3645 }, { "epoch": 0.49, "learning_rate": 1.0675568450219208e-05, "loss": 0.8939, "step": 3646 }, { "epoch": 0.49, "learning_rate": 1.067118826304528e-05, "loss": 0.8806, "step": 3647 }, { "epoch": 0.49, "learning_rate": 1.0666807946510326e-05, "loss": 0.8474, "step": 3648 }, { "epoch": 0.49, "learning_rate": 1.0662427501458596e-05, "loss": 0.9118, "step": 3649 }, { "epoch": 0.49, "learning_rate": 1.0658046928734346e-05, "loss": 0.8579, "step": 3650 }, { "epoch": 0.49, "learning_rate": 1.065366622918186e-05, "loss": 0.8527, "step": 3651 }, { "epoch": 0.49, "learning_rate": 1.0649285403645456e-05, "loss": 0.8842, "step": 3652 }, { "epoch": 0.5, "learning_rate": 1.0644904452969462e-05, "loss": 0.8319, "step": 3653 }, { "epoch": 0.5, "learning_rate": 1.0640523377998245e-05, "loss": 0.8815, "step": 3654 }, { "epoch": 0.5, "learning_rate": 1.0636142179576182e-05, "loss": 0.9145, "step": 3655 }, { "epoch": 0.5, "learning_rate": 1.0631760858547687e-05, "loss": 0.8487, "step": 3656 }, { "epoch": 0.5, "learning_rate": 1.0627379415757183e-05, "loss": 0.879, "step": 3657 }, { "epoch": 0.5, "learning_rate": 1.062299785204913e-05, "loss": 0.9341, "step": 3658 }, { "epoch": 0.5, "learning_rate": 1.0618616168268003e-05, "loss": 0.9295, "step": 3659 }, { "epoch": 0.5, "learning_rate": 1.0614234365258307e-05, "loss": 0.8833, "step": 3660 }, { "epoch": 0.5, "learning_rate": 1.0609852443864563e-05, "loss": 0.884, "step": 3661 }, { "epoch": 0.5, "learning_rate": 1.0605470404931317e-05, "loss": 0.9433, "step": 3662 }, { "epoch": 0.5, "learning_rate": 1.060108824930314e-05, "loss": 0.9367, "step": 3663 }, { "epoch": 0.5, "learning_rate": 1.0596705977824624e-05, "loss": 0.8735, "step": 3664 }, { "epoch": 0.5, "learning_rate": 1.0592323591340378e-05, "loss": 0.897, "step": 3665 }, { "epoch": 0.5, "learning_rate": 1.0587941090695046e-05, "loss": 0.8819, "step": 3666 }, { "epoch": 0.5, "learning_rate": 1.0583558476733282e-05, "loss": 0.9122, "step": 3667 }, { "epoch": 0.5, "learning_rate": 1.0579175750299769e-05, "loss": 0.8777, "step": 3668 }, { "epoch": 0.5, "learning_rate": 1.0574792912239203e-05, "loss": 0.8944, "step": 3669 }, { "epoch": 0.5, "learning_rate": 1.0570409963396313e-05, "loss": 0.8595, "step": 3670 }, { "epoch": 0.5, "learning_rate": 1.0566026904615844e-05, "loss": 0.8768, "step": 3671 }, { "epoch": 0.5, "learning_rate": 1.0561643736742556e-05, "loss": 0.9146, "step": 3672 }, { "epoch": 0.5, "learning_rate": 1.0557260460621242e-05, "loss": 0.8751, "step": 3673 }, { "epoch": 0.5, "learning_rate": 1.0552877077096706e-05, "loss": 0.9073, "step": 3674 }, { "epoch": 0.5, "learning_rate": 1.054849358701378e-05, "loss": 0.9264, "step": 3675 }, { "epoch": 0.5, "learning_rate": 1.0544109991217309e-05, "loss": 0.8852, "step": 3676 }, { "epoch": 0.5, "learning_rate": 1.0539726290552163e-05, "loss": 0.9092, "step": 3677 }, { "epoch": 0.5, "learning_rate": 1.0535342485863235e-05, "loss": 0.9021, "step": 3678 }, { "epoch": 0.5, "learning_rate": 1.0530958577995434e-05, "loss": 0.9339, "step": 3679 }, { "epoch": 0.5, "learning_rate": 1.0526574567793687e-05, "loss": 0.9169, "step": 3680 }, { "epoch": 0.5, "learning_rate": 1.052219045610294e-05, "loss": 0.8246, "step": 3681 }, { "epoch": 0.5, "learning_rate": 1.0517806243768172e-05, "loss": 0.8564, "step": 3682 }, { "epoch": 0.5, "learning_rate": 1.0513421931634362e-05, "loss": 0.8553, "step": 3683 }, { "epoch": 0.5, "learning_rate": 1.0509037520546524e-05, "loss": 0.9152, "step": 3684 }, { "epoch": 0.5, "learning_rate": 1.0504653011349678e-05, "loss": 0.9468, "step": 3685 }, { "epoch": 0.5, "learning_rate": 1.0500268404888873e-05, "loss": 0.8771, "step": 3686 }, { "epoch": 0.5, "learning_rate": 1.0495883702009178e-05, "loss": 0.894, "step": 3687 }, { "epoch": 0.5, "learning_rate": 1.0491498903555667e-05, "loss": 0.8942, "step": 3688 }, { "epoch": 0.5, "learning_rate": 1.0487114010373445e-05, "loss": 0.8843, "step": 3689 }, { "epoch": 0.5, "learning_rate": 1.048272902330763e-05, "loss": 0.9068, "step": 3690 }, { "epoch": 0.5, "learning_rate": 1.0478343943203364e-05, "loss": 0.8633, "step": 3691 }, { "epoch": 0.5, "learning_rate": 1.0473958770905797e-05, "loss": 0.8716, "step": 3692 }, { "epoch": 0.5, "learning_rate": 1.0469573507260107e-05, "loss": 0.8474, "step": 3693 }, { "epoch": 0.5, "learning_rate": 1.0465188153111481e-05, "loss": 0.918, "step": 3694 }, { "epoch": 0.5, "learning_rate": 1.0460802709305126e-05, "loss": 0.8847, "step": 3695 }, { "epoch": 0.5, "learning_rate": 1.0456417176686275e-05, "loss": 0.8688, "step": 3696 }, { "epoch": 0.5, "learning_rate": 1.0452031556100165e-05, "loss": 0.9302, "step": 3697 }, { "epoch": 0.5, "learning_rate": 1.0447645848392057e-05, "loss": 0.8722, "step": 3698 }, { "epoch": 0.5, "learning_rate": 1.0443260054407224e-05, "loss": 0.839, "step": 3699 }, { "epoch": 0.5, "learning_rate": 1.0438874174990966e-05, "loss": 0.922, "step": 3700 }, { "epoch": 0.5, "learning_rate": 1.0434488210988587e-05, "loss": 0.9645, "step": 3701 }, { "epoch": 0.5, "learning_rate": 1.043010216324541e-05, "loss": 0.9197, "step": 3702 }, { "epoch": 0.5, "learning_rate": 1.0425716032606787e-05, "loss": 0.8644, "step": 3703 }, { "epoch": 0.5, "learning_rate": 1.042132981991807e-05, "loss": 0.9018, "step": 3704 }, { "epoch": 0.5, "learning_rate": 1.0416943526024632e-05, "loss": 0.8521, "step": 3705 }, { "epoch": 0.5, "learning_rate": 1.0412557151771865e-05, "loss": 0.9244, "step": 3706 }, { "epoch": 0.5, "learning_rate": 1.0408170698005172e-05, "loss": 0.8626, "step": 3707 }, { "epoch": 0.5, "learning_rate": 1.0403784165569972e-05, "loss": 0.9052, "step": 3708 }, { "epoch": 0.5, "learning_rate": 1.0399397555311701e-05, "loss": 0.9008, "step": 3709 }, { "epoch": 0.5, "learning_rate": 1.0395010868075814e-05, "loss": 0.9028, "step": 3710 }, { "epoch": 0.5, "learning_rate": 1.0390624104707771e-05, "loss": 0.8664, "step": 3711 }, { "epoch": 0.5, "learning_rate": 1.0386237266053054e-05, "loss": 0.8331, "step": 3712 }, { "epoch": 0.5, "learning_rate": 1.0381850352957157e-05, "loss": 0.8297, "step": 3713 }, { "epoch": 0.5, "learning_rate": 1.037746336626559e-05, "loss": 0.8633, "step": 3714 }, { "epoch": 0.5, "learning_rate": 1.0373076306823873e-05, "loss": 0.8639, "step": 3715 }, { "epoch": 0.5, "learning_rate": 1.0368689175477545e-05, "loss": 0.8613, "step": 3716 }, { "epoch": 0.5, "learning_rate": 1.0364301973072156e-05, "loss": 0.897, "step": 3717 }, { "epoch": 0.5, "learning_rate": 1.0359914700453268e-05, "loss": 0.7609, "step": 3718 }, { "epoch": 0.5, "learning_rate": 1.035552735846647e-05, "loss": 0.9183, "step": 3719 }, { "epoch": 0.5, "learning_rate": 1.0351139947957336e-05, "loss": 0.8608, "step": 3720 }, { "epoch": 0.5, "learning_rate": 1.0346752469771485e-05, "loss": 0.8657, "step": 3721 }, { "epoch": 0.5, "learning_rate": 1.0342364924754528e-05, "loss": 0.8433, "step": 3722 }, { "epoch": 0.5, "learning_rate": 1.0337977313752102e-05, "loss": 0.9465, "step": 3723 }, { "epoch": 0.5, "learning_rate": 1.033358963760984e-05, "loss": 0.9378, "step": 3724 }, { "epoch": 0.5, "learning_rate": 1.0329201897173402e-05, "loss": 0.8202, "step": 3725 }, { "epoch": 0.51, "learning_rate": 1.0324814093288463e-05, "loss": 0.8827, "step": 3726 }, { "epoch": 0.51, "learning_rate": 1.0320426226800693e-05, "loss": 0.9214, "step": 3727 }, { "epoch": 0.51, "learning_rate": 1.0316038298555793e-05, "loss": 0.9177, "step": 3728 }, { "epoch": 0.51, "learning_rate": 1.0311650309399463e-05, "loss": 0.9178, "step": 3729 }, { "epoch": 0.51, "learning_rate": 1.030726226017742e-05, "loss": 0.8792, "step": 3730 }, { "epoch": 0.51, "learning_rate": 1.0302874151735391e-05, "loss": 0.8905, "step": 3731 }, { "epoch": 0.51, "learning_rate": 1.0298485984919115e-05, "loss": 0.9318, "step": 3732 }, { "epoch": 0.51, "learning_rate": 1.0294097760574345e-05, "loss": 0.9203, "step": 3733 }, { "epoch": 0.51, "learning_rate": 1.028970947954684e-05, "loss": 0.8937, "step": 3734 }, { "epoch": 0.51, "learning_rate": 1.0285321142682372e-05, "loss": 0.8994, "step": 3735 }, { "epoch": 0.51, "learning_rate": 1.028093275082673e-05, "loss": 0.8883, "step": 3736 }, { "epoch": 0.51, "learning_rate": 1.0276544304825695e-05, "loss": 0.8133, "step": 3737 }, { "epoch": 0.51, "learning_rate": 1.0272155805525085e-05, "loss": 0.8899, "step": 3738 }, { "epoch": 0.51, "learning_rate": 1.0267767253770707e-05, "loss": 0.9308, "step": 3739 }, { "epoch": 0.51, "learning_rate": 1.0263378650408386e-05, "loss": 0.8511, "step": 3740 }, { "epoch": 0.51, "learning_rate": 1.0258989996283959e-05, "loss": 0.8644, "step": 3741 }, { "epoch": 0.51, "learning_rate": 1.025460129224327e-05, "loss": 0.9006, "step": 3742 }, { "epoch": 0.51, "learning_rate": 1.025021253913217e-05, "loss": 0.9053, "step": 3743 }, { "epoch": 0.51, "learning_rate": 1.0245823737796525e-05, "loss": 0.9251, "step": 3744 }, { "epoch": 0.51, "learning_rate": 1.0241434889082207e-05, "loss": 0.8817, "step": 3745 }, { "epoch": 0.51, "learning_rate": 1.0237045993835096e-05, "loss": 0.8897, "step": 3746 }, { "epoch": 0.51, "learning_rate": 1.0232657052901087e-05, "loss": 0.9105, "step": 3747 }, { "epoch": 0.51, "learning_rate": 1.0228268067126074e-05, "loss": 0.8915, "step": 3748 }, { "epoch": 0.51, "learning_rate": 1.0223879037355972e-05, "loss": 0.9654, "step": 3749 }, { "epoch": 0.51, "learning_rate": 1.0219489964436695e-05, "loss": 0.87, "step": 3750 }, { "epoch": 0.51, "learning_rate": 1.0215100849214165e-05, "loss": 0.9191, "step": 3751 }, { "epoch": 0.51, "learning_rate": 1.021071169253432e-05, "loss": 0.8855, "step": 3752 }, { "epoch": 0.51, "learning_rate": 1.0206322495243093e-05, "loss": 0.8965, "step": 3753 }, { "epoch": 0.51, "learning_rate": 1.0201933258186442e-05, "loss": 0.9312, "step": 3754 }, { "epoch": 0.51, "learning_rate": 1.019754398221032e-05, "loss": 0.9064, "step": 3755 }, { "epoch": 0.51, "learning_rate": 1.0193154668160692e-05, "loss": 0.9295, "step": 3756 }, { "epoch": 0.51, "learning_rate": 1.0188765316883529e-05, "loss": 0.8782, "step": 3757 }, { "epoch": 0.51, "learning_rate": 1.0184375929224808e-05, "loss": 0.9389, "step": 3758 }, { "epoch": 0.51, "learning_rate": 1.0179986506030518e-05, "loss": 0.9402, "step": 3759 }, { "epoch": 0.51, "learning_rate": 1.0175597048146647e-05, "loss": 0.8492, "step": 3760 }, { "epoch": 0.51, "learning_rate": 1.0171207556419198e-05, "loss": 0.926, "step": 3761 }, { "epoch": 0.51, "learning_rate": 1.0166818031694174e-05, "loss": 0.8427, "step": 3762 }, { "epoch": 0.51, "learning_rate": 1.0162428474817591e-05, "loss": 0.9128, "step": 3763 }, { "epoch": 0.51, "learning_rate": 1.0158038886635462e-05, "loss": 0.8706, "step": 3764 }, { "epoch": 0.51, "learning_rate": 1.015364926799382e-05, "loss": 0.8527, "step": 3765 }, { "epoch": 0.51, "learning_rate": 1.0149259619738685e-05, "loss": 0.9018, "step": 3766 }, { "epoch": 0.51, "learning_rate": 1.0144869942716098e-05, "loss": 0.8787, "step": 3767 }, { "epoch": 0.51, "learning_rate": 1.0140480237772098e-05, "loss": 0.866, "step": 3768 }, { "epoch": 0.51, "learning_rate": 1.0136090505752736e-05, "loss": 0.8983, "step": 3769 }, { "epoch": 0.51, "learning_rate": 1.0131700747504064e-05, "loss": 0.9114, "step": 3770 }, { "epoch": 0.51, "learning_rate": 1.0127310963872134e-05, "loss": 0.9185, "step": 3771 }, { "epoch": 0.51, "learning_rate": 1.0122921155703011e-05, "loss": 0.925, "step": 3772 }, { "epoch": 0.51, "learning_rate": 1.0118531323842764e-05, "loss": 0.8877, "step": 3773 }, { "epoch": 0.51, "learning_rate": 1.0114141469137459e-05, "loss": 0.8643, "step": 3774 }, { "epoch": 0.51, "learning_rate": 1.0109751592433177e-05, "loss": 0.9232, "step": 3775 }, { "epoch": 0.51, "learning_rate": 1.0105361694575992e-05, "loss": 0.9219, "step": 3776 }, { "epoch": 0.51, "learning_rate": 1.0100971776411996e-05, "loss": 0.8773, "step": 3777 }, { "epoch": 0.51, "learning_rate": 1.009658183878727e-05, "loss": 0.8467, "step": 3778 }, { "epoch": 0.51, "learning_rate": 1.009219188254791e-05, "loss": 0.9013, "step": 3779 }, { "epoch": 0.51, "learning_rate": 1.0087801908540009e-05, "loss": 0.9086, "step": 3780 }, { "epoch": 0.51, "learning_rate": 1.0083411917609664e-05, "loss": 0.973, "step": 3781 }, { "epoch": 0.51, "learning_rate": 1.0079021910602982e-05, "loss": 0.8473, "step": 3782 }, { "epoch": 0.51, "learning_rate": 1.0074631888366063e-05, "loss": 0.8675, "step": 3783 }, { "epoch": 0.51, "learning_rate": 1.0070241851745018e-05, "loss": 0.8592, "step": 3784 }, { "epoch": 0.51, "learning_rate": 1.0065851801585956e-05, "loss": 0.903, "step": 3785 }, { "epoch": 0.51, "learning_rate": 1.006146173873499e-05, "loss": 0.903, "step": 3786 }, { "epoch": 0.51, "learning_rate": 1.005707166403824e-05, "loss": 0.9007, "step": 3787 }, { "epoch": 0.51, "learning_rate": 1.005268157834182e-05, "loss": 0.8287, "step": 3788 }, { "epoch": 0.51, "learning_rate": 1.0048291482491853e-05, "loss": 0.899, "step": 3789 }, { "epoch": 0.51, "learning_rate": 1.0043901377334453e-05, "loss": 0.935, "step": 3790 }, { "epoch": 0.51, "learning_rate": 1.0039511263715757e-05, "loss": 0.9036, "step": 3791 }, { "epoch": 0.51, "learning_rate": 1.0035121142481883e-05, "loss": 0.8375, "step": 3792 }, { "epoch": 0.51, "learning_rate": 1.0030731014478958e-05, "loss": 0.9066, "step": 3793 }, { "epoch": 0.51, "learning_rate": 1.0026340880553114e-05, "loss": 0.8593, "step": 3794 }, { "epoch": 0.51, "learning_rate": 1.0021950741550474e-05, "loss": 0.882, "step": 3795 }, { "epoch": 0.51, "learning_rate": 1.0017560598317178e-05, "loss": 0.8896, "step": 3796 }, { "epoch": 0.51, "learning_rate": 1.0013170451699347e-05, "loss": 0.8654, "step": 3797 }, { "epoch": 0.51, "learning_rate": 1.000878030254312e-05, "loss": 0.8803, "step": 3798 }, { "epoch": 0.51, "learning_rate": 1.0004390151694627e-05, "loss": 0.8844, "step": 3799 }, { "epoch": 0.52, "learning_rate": 1e-05, "loss": 0.8568, "step": 3800 }, { "epoch": 0.52, "learning_rate": 9.995609848305376e-06, "loss": 0.8793, "step": 3801 }, { "epoch": 0.52, "learning_rate": 9.991219697456882e-06, "loss": 0.8458, "step": 3802 }, { "epoch": 0.52, "learning_rate": 9.986829548300656e-06, "loss": 0.8176, "step": 3803 }, { "epoch": 0.52, "learning_rate": 9.982439401682827e-06, "loss": 0.8704, "step": 3804 }, { "epoch": 0.52, "learning_rate": 9.978049258449528e-06, "loss": 0.9181, "step": 3805 }, { "epoch": 0.52, "learning_rate": 9.973659119446889e-06, "loss": 0.9, "step": 3806 }, { "epoch": 0.52, "learning_rate": 9.969268985521044e-06, "loss": 0.8677, "step": 3807 }, { "epoch": 0.52, "learning_rate": 9.964878857518119e-06, "loss": 0.88, "step": 3808 }, { "epoch": 0.52, "learning_rate": 9.960488736284246e-06, "loss": 0.8976, "step": 3809 }, { "epoch": 0.52, "learning_rate": 9.956098622665548e-06, "loss": 0.8672, "step": 3810 }, { "epoch": 0.52, "learning_rate": 9.951708517508152e-06, "loss": 0.9147, "step": 3811 }, { "epoch": 0.52, "learning_rate": 9.947318421658185e-06, "loss": 0.8812, "step": 3812 }, { "epoch": 0.52, "learning_rate": 9.942928335961765e-06, "loss": 0.9152, "step": 3813 }, { "epoch": 0.52, "learning_rate": 9.938538261265014e-06, "loss": 0.8876, "step": 3814 }, { "epoch": 0.52, "learning_rate": 9.93414819841405e-06, "loss": 0.9219, "step": 3815 }, { "epoch": 0.52, "learning_rate": 9.929758148254987e-06, "loss": 0.9005, "step": 3816 }, { "epoch": 0.52, "learning_rate": 9.925368111633944e-06, "loss": 0.9327, "step": 3817 }, { "epoch": 0.52, "learning_rate": 9.920978089397025e-06, "loss": 0.8857, "step": 3818 }, { "epoch": 0.52, "learning_rate": 9.916588082390342e-06, "loss": 0.8458, "step": 3819 }, { "epoch": 0.52, "learning_rate": 9.912198091459996e-06, "loss": 0.8415, "step": 3820 }, { "epoch": 0.52, "learning_rate": 9.907808117452096e-06, "loss": 0.8879, "step": 3821 }, { "epoch": 0.52, "learning_rate": 9.903418161212732e-06, "loss": 0.907, "step": 3822 }, { "epoch": 0.52, "learning_rate": 9.899028223588003e-06, "loss": 0.9357, "step": 3823 }, { "epoch": 0.52, "learning_rate": 9.894638305424007e-06, "loss": 0.8514, "step": 3824 }, { "epoch": 0.52, "learning_rate": 9.890248407566823e-06, "loss": 0.7871, "step": 3825 }, { "epoch": 0.52, "learning_rate": 9.885858530862543e-06, "loss": 0.9047, "step": 3826 }, { "epoch": 0.52, "learning_rate": 9.88146867615724e-06, "loss": 0.8916, "step": 3827 }, { "epoch": 0.52, "learning_rate": 9.877078844296989e-06, "loss": 0.8768, "step": 3828 }, { "epoch": 0.52, "learning_rate": 9.872689036127869e-06, "loss": 0.9386, "step": 3829 }, { "epoch": 0.52, "learning_rate": 9.868299252495938e-06, "loss": 0.9399, "step": 3830 }, { "epoch": 0.52, "learning_rate": 9.863909494247264e-06, "loss": 0.9205, "step": 3831 }, { "epoch": 0.52, "learning_rate": 9.859519762227902e-06, "loss": 0.8552, "step": 3832 }, { "epoch": 0.52, "learning_rate": 9.855130057283905e-06, "loss": 0.9218, "step": 3833 }, { "epoch": 0.52, "learning_rate": 9.850740380261318e-06, "loss": 0.8879, "step": 3834 }, { "epoch": 0.52, "learning_rate": 9.846350732006184e-06, "loss": 0.8938, "step": 3835 }, { "epoch": 0.52, "learning_rate": 9.84196111336454e-06, "loss": 0.9256, "step": 3836 }, { "epoch": 0.52, "learning_rate": 9.837571525182412e-06, "loss": 0.8148, "step": 3837 }, { "epoch": 0.52, "learning_rate": 9.83318196830583e-06, "loss": 0.9566, "step": 3838 }, { "epoch": 0.52, "learning_rate": 9.828792443580805e-06, "loss": 0.8586, "step": 3839 }, { "epoch": 0.52, "learning_rate": 9.824402951853358e-06, "loss": 0.8337, "step": 3840 }, { "epoch": 0.52, "learning_rate": 9.820013493969487e-06, "loss": 0.9127, "step": 3841 }, { "epoch": 0.52, "learning_rate": 9.815624070775195e-06, "loss": 0.8683, "step": 3842 }, { "epoch": 0.52, "learning_rate": 9.811234683116475e-06, "loss": 0.8447, "step": 3843 }, { "epoch": 0.52, "learning_rate": 9.806845331839311e-06, "loss": 0.8713, "step": 3844 }, { "epoch": 0.52, "learning_rate": 9.802456017789683e-06, "loss": 0.8768, "step": 3845 }, { "epoch": 0.52, "learning_rate": 9.79806674181356e-06, "loss": 0.8832, "step": 3846 }, { "epoch": 0.52, "learning_rate": 9.793677504756909e-06, "loss": 0.869, "step": 3847 }, { "epoch": 0.52, "learning_rate": 9.789288307465684e-06, "loss": 0.906, "step": 3848 }, { "epoch": 0.52, "learning_rate": 9.784899150785838e-06, "loss": 0.8592, "step": 3849 }, { "epoch": 0.52, "learning_rate": 9.780510035563306e-06, "loss": 0.9524, "step": 3850 }, { "epoch": 0.52, "learning_rate": 9.77612096264403e-06, "loss": 0.9004, "step": 3851 }, { "epoch": 0.52, "learning_rate": 9.771731932873927e-06, "loss": 0.8822, "step": 3852 }, { "epoch": 0.52, "learning_rate": 9.767342947098916e-06, "loss": 0.8569, "step": 3853 }, { "epoch": 0.52, "learning_rate": 9.762954006164908e-06, "loss": 0.8599, "step": 3854 }, { "epoch": 0.52, "learning_rate": 9.758565110917797e-06, "loss": 0.8808, "step": 3855 }, { "epoch": 0.52, "learning_rate": 9.75417626220348e-06, "loss": 0.9011, "step": 3856 }, { "epoch": 0.52, "learning_rate": 9.749787460867835e-06, "loss": 0.9988, "step": 3857 }, { "epoch": 0.52, "learning_rate": 9.745398707756735e-06, "loss": 0.9001, "step": 3858 }, { "epoch": 0.52, "learning_rate": 9.741010003716045e-06, "loss": 0.9104, "step": 3859 }, { "epoch": 0.52, "learning_rate": 9.736621349591619e-06, "loss": 0.8368, "step": 3860 }, { "epoch": 0.52, "learning_rate": 9.7322327462293e-06, "loss": 0.8815, "step": 3861 }, { "epoch": 0.52, "learning_rate": 9.72784419447492e-06, "loss": 0.8855, "step": 3862 }, { "epoch": 0.52, "learning_rate": 9.72345569517431e-06, "loss": 0.8304, "step": 3863 }, { "epoch": 0.52, "learning_rate": 9.719067249173277e-06, "loss": 0.8006, "step": 3864 }, { "epoch": 0.52, "learning_rate": 9.714678857317632e-06, "loss": 0.8705, "step": 3865 }, { "epoch": 0.52, "learning_rate": 9.710290520453162e-06, "loss": 0.8707, "step": 3866 }, { "epoch": 0.52, "learning_rate": 9.705902239425655e-06, "loss": 0.8525, "step": 3867 }, { "epoch": 0.52, "learning_rate": 9.701514015080886e-06, "loss": 0.8997, "step": 3868 }, { "epoch": 0.52, "learning_rate": 9.697125848264608e-06, "loss": 0.8192, "step": 3869 }, { "epoch": 0.52, "learning_rate": 9.692737739822582e-06, "loss": 0.8497, "step": 3870 }, { "epoch": 0.52, "learning_rate": 9.688349690600538e-06, "loss": 0.9268, "step": 3871 }, { "epoch": 0.52, "learning_rate": 9.683961701444208e-06, "loss": 0.9183, "step": 3872 }, { "epoch": 0.52, "learning_rate": 9.679573773199309e-06, "loss": 0.9048, "step": 3873 }, { "epoch": 0.53, "learning_rate": 9.675185906711539e-06, "loss": 0.8786, "step": 3874 }, { "epoch": 0.53, "learning_rate": 9.670798102826598e-06, "loss": 0.8259, "step": 3875 }, { "epoch": 0.53, "learning_rate": 9.666410362390162e-06, "loss": 0.8422, "step": 3876 }, { "epoch": 0.53, "learning_rate": 9.662022686247903e-06, "loss": 0.915, "step": 3877 }, { "epoch": 0.53, "learning_rate": 9.657635075245473e-06, "loss": 0.9314, "step": 3878 }, { "epoch": 0.53, "learning_rate": 9.653247530228516e-06, "loss": 0.8628, "step": 3879 }, { "epoch": 0.53, "learning_rate": 9.648860052042665e-06, "loss": 0.8671, "step": 3880 }, { "epoch": 0.53, "learning_rate": 9.644472641533536e-06, "loss": 0.8266, "step": 3881 }, { "epoch": 0.53, "learning_rate": 9.640085299546734e-06, "loss": 0.8673, "step": 3882 }, { "epoch": 0.53, "learning_rate": 9.635698026927846e-06, "loss": 0.8252, "step": 3883 }, { "epoch": 0.53, "learning_rate": 9.63131082452246e-06, "loss": 0.9164, "step": 3884 }, { "epoch": 0.53, "learning_rate": 9.62692369317613e-06, "loss": 0.8577, "step": 3885 }, { "epoch": 0.53, "learning_rate": 9.622536633734413e-06, "loss": 0.9194, "step": 3886 }, { "epoch": 0.53, "learning_rate": 9.618149647042847e-06, "loss": 0.8639, "step": 3887 }, { "epoch": 0.53, "learning_rate": 9.61376273394695e-06, "loss": 0.8863, "step": 3888 }, { "epoch": 0.53, "learning_rate": 9.609375895292232e-06, "loss": 0.8574, "step": 3889 }, { "epoch": 0.53, "learning_rate": 9.60498913192419e-06, "loss": 0.8984, "step": 3890 }, { "epoch": 0.53, "learning_rate": 9.6006024446883e-06, "loss": 0.8795, "step": 3891 }, { "epoch": 0.53, "learning_rate": 9.596215834430031e-06, "loss": 0.9228, "step": 3892 }, { "epoch": 0.53, "learning_rate": 9.591829301994833e-06, "loss": 0.9026, "step": 3893 }, { "epoch": 0.53, "learning_rate": 9.587442848228138e-06, "loss": 0.8989, "step": 3894 }, { "epoch": 0.53, "learning_rate": 9.583056473975371e-06, "loss": 0.8825, "step": 3895 }, { "epoch": 0.53, "learning_rate": 9.578670180081935e-06, "loss": 0.8826, "step": 3896 }, { "epoch": 0.53, "learning_rate": 9.574283967393215e-06, "loss": 0.8576, "step": 3897 }, { "epoch": 0.53, "learning_rate": 9.569897836754592e-06, "loss": 0.9944, "step": 3898 }, { "epoch": 0.53, "learning_rate": 9.565511789011418e-06, "loss": 0.8374, "step": 3899 }, { "epoch": 0.53, "learning_rate": 9.56112582500904e-06, "loss": 0.9536, "step": 3900 }, { "epoch": 0.53, "learning_rate": 9.556739945592779e-06, "loss": 0.8932, "step": 3901 }, { "epoch": 0.53, "learning_rate": 9.552354151607948e-06, "loss": 0.9033, "step": 3902 }, { "epoch": 0.53, "learning_rate": 9.54796844389984e-06, "loss": 0.9185, "step": 3903 }, { "epoch": 0.53, "learning_rate": 9.54358282331373e-06, "loss": 0.9268, "step": 3904 }, { "epoch": 0.53, "learning_rate": 9.539197290694877e-06, "loss": 0.8676, "step": 3905 }, { "epoch": 0.53, "learning_rate": 9.534811846888524e-06, "loss": 0.9102, "step": 3906 }, { "epoch": 0.53, "learning_rate": 9.5304264927399e-06, "loss": 0.9226, "step": 3907 }, { "epoch": 0.53, "learning_rate": 9.526041229094206e-06, "loss": 0.863, "step": 3908 }, { "epoch": 0.53, "learning_rate": 9.521656056796643e-06, "loss": 0.9516, "step": 3909 }, { "epoch": 0.53, "learning_rate": 9.51727097669237e-06, "loss": 0.8361, "step": 3910 }, { "epoch": 0.53, "learning_rate": 9.512885989626555e-06, "loss": 0.9514, "step": 3911 }, { "epoch": 0.53, "learning_rate": 9.508501096444335e-06, "loss": 0.9214, "step": 3912 }, { "epoch": 0.53, "learning_rate": 9.504116297990826e-06, "loss": 0.8268, "step": 3913 }, { "epoch": 0.53, "learning_rate": 9.499731595111125e-06, "loss": 0.9295, "step": 3914 }, { "epoch": 0.53, "learning_rate": 9.495346988650323e-06, "loss": 0.8803, "step": 3915 }, { "epoch": 0.53, "learning_rate": 9.490962479453478e-06, "loss": 0.869, "step": 3916 }, { "epoch": 0.53, "learning_rate": 9.48657806836564e-06, "loss": 0.8627, "step": 3917 }, { "epoch": 0.53, "learning_rate": 9.48219375623183e-06, "loss": 0.8802, "step": 3918 }, { "epoch": 0.53, "learning_rate": 9.477809543897061e-06, "loss": 0.9098, "step": 3919 }, { "epoch": 0.53, "learning_rate": 9.473425432206315e-06, "loss": 0.8909, "step": 3920 }, { "epoch": 0.53, "learning_rate": 9.46904142200457e-06, "loss": 0.8807, "step": 3921 }, { "epoch": 0.53, "learning_rate": 9.464657514136768e-06, "loss": 0.8652, "step": 3922 }, { "epoch": 0.53, "learning_rate": 9.460273709447838e-06, "loss": 0.8406, "step": 3923 }, { "epoch": 0.53, "learning_rate": 9.455890008782696e-06, "loss": 0.9005, "step": 3924 }, { "epoch": 0.53, "learning_rate": 9.451506412986223e-06, "loss": 0.8823, "step": 3925 }, { "epoch": 0.53, "learning_rate": 9.447122922903298e-06, "loss": 0.8098, "step": 3926 }, { "epoch": 0.53, "learning_rate": 9.44273953937876e-06, "loss": 0.8591, "step": 3927 }, { "epoch": 0.53, "learning_rate": 9.438356263257446e-06, "loss": 0.9176, "step": 3928 }, { "epoch": 0.53, "learning_rate": 9.43397309538416e-06, "loss": 0.9186, "step": 3929 }, { "epoch": 0.53, "learning_rate": 9.429590036603688e-06, "loss": 0.9453, "step": 3930 }, { "epoch": 0.53, "learning_rate": 9.425207087760799e-06, "loss": 0.8081, "step": 3931 }, { "epoch": 0.53, "learning_rate": 9.420824249700234e-06, "loss": 0.88, "step": 3932 }, { "epoch": 0.53, "learning_rate": 9.41644152326672e-06, "loss": 0.8508, "step": 3933 }, { "epoch": 0.53, "learning_rate": 9.412058909304956e-06, "loss": 0.8438, "step": 3934 }, { "epoch": 0.53, "learning_rate": 9.407676408659623e-06, "loss": 0.8806, "step": 3935 }, { "epoch": 0.53, "learning_rate": 9.40329402217538e-06, "loss": 0.8222, "step": 3936 }, { "epoch": 0.53, "learning_rate": 9.398911750696864e-06, "loss": 0.8937, "step": 3937 }, { "epoch": 0.53, "learning_rate": 9.394529595068686e-06, "loss": 0.8492, "step": 3938 }, { "epoch": 0.53, "learning_rate": 9.39014755613544e-06, "loss": 0.9158, "step": 3939 }, { "epoch": 0.53, "learning_rate": 9.385765634741696e-06, "loss": 0.8605, "step": 3940 }, { "epoch": 0.53, "learning_rate": 9.381383831731998e-06, "loss": 0.8936, "step": 3941 }, { "epoch": 0.53, "learning_rate": 9.377002147950875e-06, "loss": 0.8175, "step": 3942 }, { "epoch": 0.53, "learning_rate": 9.37262058424282e-06, "loss": 0.8661, "step": 3943 }, { "epoch": 0.53, "learning_rate": 9.36823914145232e-06, "loss": 0.9212, "step": 3944 }, { "epoch": 0.53, "learning_rate": 9.36385782042382e-06, "loss": 0.8734, "step": 3945 }, { "epoch": 0.53, "learning_rate": 9.35947662200176e-06, "loss": 0.8282, "step": 3946 }, { "epoch": 0.53, "learning_rate": 9.355095547030543e-06, "loss": 0.8406, "step": 3947 }, { "epoch": 0.54, "learning_rate": 9.35071459635455e-06, "loss": 0.9231, "step": 3948 }, { "epoch": 0.54, "learning_rate": 9.346333770818145e-06, "loss": 0.9101, "step": 3949 }, { "epoch": 0.54, "learning_rate": 9.341953071265659e-06, "loss": 0.9971, "step": 3950 }, { "epoch": 0.54, "learning_rate": 9.33757249854141e-06, "loss": 0.904, "step": 3951 }, { "epoch": 0.54, "learning_rate": 9.333192053489675e-06, "loss": 0.93, "step": 3952 }, { "epoch": 0.54, "learning_rate": 9.328811736954722e-06, "loss": 0.8932, "step": 3953 }, { "epoch": 0.54, "learning_rate": 9.324431549780792e-06, "loss": 0.9432, "step": 3954 }, { "epoch": 0.54, "learning_rate": 9.32005149281209e-06, "loss": 0.9257, "step": 3955 }, { "epoch": 0.54, "learning_rate": 9.315671566892809e-06, "loss": 0.8462, "step": 3956 }, { "epoch": 0.54, "learning_rate": 9.31129177286711e-06, "loss": 0.8762, "step": 3957 }, { "epoch": 0.54, "learning_rate": 9.306912111579127e-06, "loss": 0.8908, "step": 3958 }, { "epoch": 0.54, "learning_rate": 9.302532583872974e-06, "loss": 0.8503, "step": 3959 }, { "epoch": 0.54, "learning_rate": 9.298153190592732e-06, "loss": 0.8407, "step": 3960 }, { "epoch": 0.54, "learning_rate": 9.293773932582467e-06, "loss": 0.9171, "step": 3961 }, { "epoch": 0.54, "learning_rate": 9.289394810686206e-06, "loss": 0.8782, "step": 3962 }, { "epoch": 0.54, "learning_rate": 9.285015825747962e-06, "loss": 0.9366, "step": 3963 }, { "epoch": 0.54, "learning_rate": 9.280636978611712e-06, "loss": 0.8977, "step": 3964 }, { "epoch": 0.54, "learning_rate": 9.276258270121407e-06, "loss": 0.8739, "step": 3965 }, { "epoch": 0.54, "learning_rate": 9.271879701120981e-06, "loss": 0.9149, "step": 3966 }, { "epoch": 0.54, "learning_rate": 9.267501272454331e-06, "loss": 0.8075, "step": 3967 }, { "epoch": 0.54, "learning_rate": 9.263122984965332e-06, "loss": 0.897, "step": 3968 }, { "epoch": 0.54, "learning_rate": 9.258744839497827e-06, "loss": 0.8701, "step": 3969 }, { "epoch": 0.54, "learning_rate": 9.254366836895641e-06, "loss": 0.9392, "step": 3970 }, { "epoch": 0.54, "learning_rate": 9.24998897800256e-06, "loss": 0.8804, "step": 3971 }, { "epoch": 0.54, "learning_rate": 9.245611263662351e-06, "loss": 0.8825, "step": 3972 }, { "epoch": 0.54, "learning_rate": 9.241233694718748e-06, "loss": 0.9351, "step": 3973 }, { "epoch": 0.54, "learning_rate": 9.236856272015457e-06, "loss": 0.8868, "step": 3974 }, { "epoch": 0.54, "learning_rate": 9.232478996396162e-06, "loss": 0.8441, "step": 3975 }, { "epoch": 0.54, "learning_rate": 9.22810186870451e-06, "loss": 0.8887, "step": 3976 }, { "epoch": 0.54, "learning_rate": 9.223724889784128e-06, "loss": 0.8759, "step": 3977 }, { "epoch": 0.54, "learning_rate": 9.219348060478606e-06, "loss": 0.8987, "step": 3978 }, { "epoch": 0.54, "learning_rate": 9.214971381631514e-06, "loss": 0.8182, "step": 3979 }, { "epoch": 0.54, "learning_rate": 9.210594854086382e-06, "loss": 0.9249, "step": 3980 }, { "epoch": 0.54, "learning_rate": 9.206218478686724e-06, "loss": 0.9049, "step": 3981 }, { "epoch": 0.54, "learning_rate": 9.201842256276012e-06, "loss": 0.9409, "step": 3982 }, { "epoch": 0.54, "learning_rate": 9.197466187697697e-06, "loss": 0.955, "step": 3983 }, { "epoch": 0.54, "learning_rate": 9.193090273795199e-06, "loss": 0.8835, "step": 3984 }, { "epoch": 0.54, "learning_rate": 9.188714515411902e-06, "loss": 0.8858, "step": 3985 }, { "epoch": 0.54, "learning_rate": 9.18433891339117e-06, "loss": 0.8838, "step": 3986 }, { "epoch": 0.54, "learning_rate": 9.179963468576328e-06, "loss": 0.8954, "step": 3987 }, { "epoch": 0.54, "learning_rate": 9.175588181810678e-06, "loss": 0.8921, "step": 3988 }, { "epoch": 0.54, "learning_rate": 9.171213053937486e-06, "loss": 0.7093, "step": 3989 }, { "epoch": 0.54, "learning_rate": 9.166838085799988e-06, "loss": 0.9241, "step": 3990 }, { "epoch": 0.54, "learning_rate": 9.162463278241395e-06, "loss": 0.8515, "step": 3991 }, { "epoch": 0.54, "learning_rate": 9.158088632104876e-06, "loss": 0.8144, "step": 3992 }, { "epoch": 0.54, "learning_rate": 9.15371414823358e-06, "loss": 0.855, "step": 3993 }, { "epoch": 0.54, "learning_rate": 9.149339827470619e-06, "loss": 0.8932, "step": 3994 }, { "epoch": 0.54, "learning_rate": 9.144965670659075e-06, "loss": 0.8469, "step": 3995 }, { "epoch": 0.54, "learning_rate": 9.140591678641998e-06, "loss": 0.8284, "step": 3996 }, { "epoch": 0.54, "learning_rate": 9.136217852262404e-06, "loss": 0.9026, "step": 3997 }, { "epoch": 0.54, "learning_rate": 9.131844192363285e-06, "loss": 0.9049, "step": 3998 }, { "epoch": 0.54, "learning_rate": 9.127470699787594e-06, "loss": 0.9069, "step": 3999 }, { "epoch": 0.54, "learning_rate": 9.123097375378249e-06, "loss": 0.8866, "step": 4000 }, { "epoch": 0.54, "learning_rate": 9.118724219978143e-06, "loss": 0.9536, "step": 4001 }, { "epoch": 0.54, "learning_rate": 9.114351234430132e-06, "loss": 0.8977, "step": 4002 }, { "epoch": 0.54, "learning_rate": 9.109978419577044e-06, "loss": 0.8643, "step": 4003 }, { "epoch": 0.54, "learning_rate": 9.105605776261664e-06, "loss": 0.9283, "step": 4004 }, { "epoch": 0.54, "learning_rate": 9.101233305326755e-06, "loss": 0.9064, "step": 4005 }, { "epoch": 0.54, "learning_rate": 9.09686100761504e-06, "loss": 0.8408, "step": 4006 }, { "epoch": 0.54, "learning_rate": 9.092488883969215e-06, "loss": 0.8521, "step": 4007 }, { "epoch": 0.54, "learning_rate": 9.088116935231936e-06, "loss": 0.8903, "step": 4008 }, { "epoch": 0.54, "learning_rate": 9.083745162245823e-06, "loss": 0.9439, "step": 4009 }, { "epoch": 0.54, "learning_rate": 9.079373565853473e-06, "loss": 0.932, "step": 4010 }, { "epoch": 0.54, "learning_rate": 9.075002146897438e-06, "loss": 0.8323, "step": 4011 }, { "epoch": 0.54, "learning_rate": 9.070630906220246e-06, "loss": 0.9231, "step": 4012 }, { "epoch": 0.54, "learning_rate": 9.066259844664382e-06, "loss": 0.8253, "step": 4013 }, { "epoch": 0.54, "learning_rate": 9.061888963072298e-06, "loss": 0.8643, "step": 4014 }, { "epoch": 0.54, "learning_rate": 9.057518262286414e-06, "loss": 0.8937, "step": 4015 }, { "epoch": 0.54, "learning_rate": 9.053147743149118e-06, "loss": 0.844, "step": 4016 }, { "epoch": 0.54, "learning_rate": 9.048777406502754e-06, "loss": 0.9397, "step": 4017 }, { "epoch": 0.54, "learning_rate": 9.044407253189636e-06, "loss": 0.8667, "step": 4018 }, { "epoch": 0.54, "learning_rate": 9.040037284052046e-06, "loss": 0.8505, "step": 4019 }, { "epoch": 0.54, "learning_rate": 9.035667499932224e-06, "loss": 0.8777, "step": 4020 }, { "epoch": 0.54, "learning_rate": 9.03129790167238e-06, "loss": 0.9245, "step": 4021 }, { "epoch": 0.55, "learning_rate": 9.026928490114683e-06, "loss": 0.948, "step": 4022 }, { "epoch": 0.55, "learning_rate": 9.02255926610127e-06, "loss": 0.931, "step": 4023 }, { "epoch": 0.55, "learning_rate": 9.018190230474242e-06, "loss": 0.8365, "step": 4024 }, { "epoch": 0.55, "learning_rate": 9.01382138407566e-06, "loss": 0.9358, "step": 4025 }, { "epoch": 0.55, "learning_rate": 9.00945272774755e-06, "loss": 0.8898, "step": 4026 }, { "epoch": 0.55, "learning_rate": 9.005084262331902e-06, "loss": 0.8438, "step": 4027 }, { "epoch": 0.55, "learning_rate": 9.000715988670672e-06, "loss": 0.9449, "step": 4028 }, { "epoch": 0.55, "learning_rate": 8.996347907605773e-06, "loss": 0.887, "step": 4029 }, { "epoch": 0.55, "learning_rate": 8.99198001997909e-06, "loss": 0.8488, "step": 4030 }, { "epoch": 0.55, "learning_rate": 8.987612326632457e-06, "loss": 0.9015, "step": 4031 }, { "epoch": 0.55, "learning_rate": 8.983244828407683e-06, "loss": 0.8529, "step": 4032 }, { "epoch": 0.55, "learning_rate": 8.978877526146536e-06, "loss": 0.8861, "step": 4033 }, { "epoch": 0.55, "learning_rate": 8.97451042069074e-06, "loss": 0.8062, "step": 4034 }, { "epoch": 0.55, "learning_rate": 8.970143512881992e-06, "loss": 0.8382, "step": 4035 }, { "epoch": 0.55, "learning_rate": 8.965776803561942e-06, "loss": 0.8753, "step": 4036 }, { "epoch": 0.55, "learning_rate": 8.961410293572203e-06, "loss": 0.9268, "step": 4037 }, { "epoch": 0.55, "learning_rate": 8.957043983754355e-06, "loss": 0.923, "step": 4038 }, { "epoch": 0.55, "learning_rate": 8.952677874949934e-06, "loss": 0.8727, "step": 4039 }, { "epoch": 0.55, "learning_rate": 8.948311968000437e-06, "loss": 0.9186, "step": 4040 }, { "epoch": 0.55, "learning_rate": 8.943946263747327e-06, "loss": 0.8294, "step": 4041 }, { "epoch": 0.55, "learning_rate": 8.939580763032026e-06, "loss": 0.8937, "step": 4042 }, { "epoch": 0.55, "learning_rate": 8.935215466695916e-06, "loss": 0.9289, "step": 4043 }, { "epoch": 0.55, "learning_rate": 8.930850375580336e-06, "loss": 0.8725, "step": 4044 }, { "epoch": 0.55, "learning_rate": 8.92648549052659e-06, "loss": 0.8722, "step": 4045 }, { "epoch": 0.55, "learning_rate": 8.922120812375942e-06, "loss": 0.8557, "step": 4046 }, { "epoch": 0.55, "learning_rate": 8.917756341969618e-06, "loss": 0.9892, "step": 4047 }, { "epoch": 0.55, "learning_rate": 8.913392080148795e-06, "loss": 0.8654, "step": 4048 }, { "epoch": 0.55, "learning_rate": 8.909028027754622e-06, "loss": 0.8974, "step": 4049 }, { "epoch": 0.55, "learning_rate": 8.9046641856282e-06, "loss": 0.845, "step": 4050 }, { "epoch": 0.55, "learning_rate": 8.900300554610587e-06, "loss": 0.8737, "step": 4051 }, { "epoch": 0.55, "learning_rate": 8.895937135542812e-06, "loss": 0.9081, "step": 4052 }, { "epoch": 0.55, "learning_rate": 8.891573929265848e-06, "loss": 0.8459, "step": 4053 }, { "epoch": 0.55, "learning_rate": 8.88721093662064e-06, "loss": 0.8954, "step": 4054 }, { "epoch": 0.55, "learning_rate": 8.882848158448084e-06, "loss": 0.8934, "step": 4055 }, { "epoch": 0.55, "learning_rate": 8.878485595589039e-06, "loss": 0.895, "step": 4056 }, { "epoch": 0.55, "learning_rate": 8.874123248884318e-06, "loss": 0.8585, "step": 4057 }, { "epoch": 0.55, "learning_rate": 8.869761119174697e-06, "loss": 0.9325, "step": 4058 }, { "epoch": 0.55, "learning_rate": 8.86539920730091e-06, "loss": 0.8736, "step": 4059 }, { "epoch": 0.55, "learning_rate": 8.86103751410364e-06, "loss": 0.898, "step": 4060 }, { "epoch": 0.55, "learning_rate": 8.856676040423543e-06, "loss": 0.8981, "step": 4061 }, { "epoch": 0.55, "learning_rate": 8.852314787101219e-06, "loss": 0.8711, "step": 4062 }, { "epoch": 0.55, "learning_rate": 8.847953754977236e-06, "loss": 0.8963, "step": 4063 }, { "epoch": 0.55, "learning_rate": 8.84359294489211e-06, "loss": 0.9152, "step": 4064 }, { "epoch": 0.55, "learning_rate": 8.839232357686322e-06, "loss": 0.8798, "step": 4065 }, { "epoch": 0.55, "learning_rate": 8.834871994200305e-06, "loss": 0.8375, "step": 4066 }, { "epoch": 0.55, "learning_rate": 8.830511855274454e-06, "loss": 0.8314, "step": 4067 }, { "epoch": 0.55, "learning_rate": 8.826151941749115e-06, "loss": 0.8776, "step": 4068 }, { "epoch": 0.55, "learning_rate": 8.82179225446459e-06, "loss": 0.906, "step": 4069 }, { "epoch": 0.55, "learning_rate": 8.817432794261145e-06, "loss": 0.8676, "step": 4070 }, { "epoch": 0.55, "learning_rate": 8.813073561978996e-06, "loss": 0.818, "step": 4071 }, { "epoch": 0.55, "learning_rate": 8.808714558458318e-06, "loss": 0.8103, "step": 4072 }, { "epoch": 0.55, "learning_rate": 8.804355784539236e-06, "loss": 0.8495, "step": 4073 }, { "epoch": 0.55, "learning_rate": 8.799997241061844e-06, "loss": 0.9304, "step": 4074 }, { "epoch": 0.55, "learning_rate": 8.795638928866174e-06, "loss": 0.8336, "step": 4075 }, { "epoch": 0.55, "learning_rate": 8.791280848792227e-06, "loss": 0.9305, "step": 4076 }, { "epoch": 0.55, "learning_rate": 8.786923001679953e-06, "loss": 0.8716, "step": 4077 }, { "epoch": 0.55, "learning_rate": 8.78256538836926e-06, "loss": 0.9216, "step": 4078 }, { "epoch": 0.55, "learning_rate": 8.778208009700008e-06, "loss": 0.7834, "step": 4079 }, { "epoch": 0.55, "learning_rate": 8.773850866512016e-06, "loss": 0.8598, "step": 4080 }, { "epoch": 0.55, "learning_rate": 8.769493959645055e-06, "loss": 0.8481, "step": 4081 }, { "epoch": 0.55, "learning_rate": 8.765137289938846e-06, "loss": 0.8641, "step": 4082 }, { "epoch": 0.55, "learning_rate": 8.760780858233074e-06, "loss": 0.9483, "step": 4083 }, { "epoch": 0.55, "learning_rate": 8.756424665367367e-06, "loss": 0.8892, "step": 4084 }, { "epoch": 0.55, "learning_rate": 8.75206871218132e-06, "loss": 0.8296, "step": 4085 }, { "epoch": 0.55, "learning_rate": 8.747712999514472e-06, "loss": 0.8736, "step": 4086 }, { "epoch": 0.55, "learning_rate": 8.74335752820632e-06, "loss": 0.874, "step": 4087 }, { "epoch": 0.55, "learning_rate": 8.739002299096305e-06, "loss": 0.8574, "step": 4088 }, { "epoch": 0.55, "learning_rate": 8.734647313023839e-06, "loss": 0.849, "step": 4089 }, { "epoch": 0.55, "learning_rate": 8.730292570828271e-06, "loss": 0.8409, "step": 4090 }, { "epoch": 0.55, "learning_rate": 8.725938073348916e-06, "loss": 0.833, "step": 4091 }, { "epoch": 0.55, "learning_rate": 8.721583821425025e-06, "loss": 0.8848, "step": 4092 }, { "epoch": 0.55, "learning_rate": 8.71722981589582e-06, "loss": 0.8392, "step": 4093 }, { "epoch": 0.55, "learning_rate": 8.712876057600467e-06, "loss": 0.8957, "step": 4094 }, { "epoch": 0.56, "learning_rate": 8.70852254737808e-06, "loss": 0.9114, "step": 4095 }, { "epoch": 0.56, "learning_rate": 8.704169286067733e-06, "loss": 0.8837, "step": 4096 }, { "epoch": 0.56, "learning_rate": 8.699816274508446e-06, "loss": 0.8257, "step": 4097 }, { "epoch": 0.56, "learning_rate": 8.6954635135392e-06, "loss": 0.9546, "step": 4098 }, { "epoch": 0.56, "learning_rate": 8.691111003998913e-06, "loss": 0.8893, "step": 4099 }, { "epoch": 0.56, "learning_rate": 8.686758746726472e-06, "loss": 0.8882, "step": 4100 }, { "epoch": 0.56, "learning_rate": 8.682406742560698e-06, "loss": 0.8877, "step": 4101 }, { "epoch": 0.56, "learning_rate": 8.678054992340379e-06, "loss": 0.8795, "step": 4102 }, { "epoch": 0.56, "learning_rate": 8.673703496904243e-06, "loss": 0.8562, "step": 4103 }, { "epoch": 0.56, "learning_rate": 8.669352257090968e-06, "loss": 0.878, "step": 4104 }, { "epoch": 0.56, "learning_rate": 8.665001273739197e-06, "loss": 0.7933, "step": 4105 }, { "epoch": 0.56, "learning_rate": 8.660650547687506e-06, "loss": 0.925, "step": 4106 }, { "epoch": 0.56, "learning_rate": 8.656300079774432e-06, "loss": 0.7429, "step": 4107 }, { "epoch": 0.56, "learning_rate": 8.65194987083846e-06, "loss": 0.8926, "step": 4108 }, { "epoch": 0.56, "learning_rate": 8.647599921718025e-06, "loss": 0.919, "step": 4109 }, { "epoch": 0.56, "learning_rate": 8.64325023325151e-06, "loss": 0.8778, "step": 4110 }, { "epoch": 0.56, "learning_rate": 8.63890080627725e-06, "loss": 0.925, "step": 4111 }, { "epoch": 0.56, "learning_rate": 8.63455164163353e-06, "loss": 0.861, "step": 4112 }, { "epoch": 0.56, "learning_rate": 8.63020274015858e-06, "loss": 0.8348, "step": 4113 }, { "epoch": 0.56, "learning_rate": 8.625854102690587e-06, "loss": 0.8978, "step": 4114 }, { "epoch": 0.56, "learning_rate": 8.621505730067678e-06, "loss": 0.8501, "step": 4115 }, { "epoch": 0.56, "learning_rate": 8.617157623127938e-06, "loss": 0.889, "step": 4116 }, { "epoch": 0.56, "learning_rate": 8.612809782709394e-06, "loss": 0.8906, "step": 4117 }, { "epoch": 0.56, "learning_rate": 8.608462209650026e-06, "loss": 0.8515, "step": 4118 }, { "epoch": 0.56, "learning_rate": 8.60411490478776e-06, "loss": 0.8039, "step": 4119 }, { "epoch": 0.56, "learning_rate": 8.599767868960467e-06, "loss": 0.8978, "step": 4120 }, { "epoch": 0.56, "learning_rate": 8.595421103005976e-06, "loss": 0.8667, "step": 4121 }, { "epoch": 0.56, "learning_rate": 8.591074607762054e-06, "loss": 0.8903, "step": 4122 }, { "epoch": 0.56, "learning_rate": 8.586728384066421e-06, "loss": 0.8244, "step": 4123 }, { "epoch": 0.56, "learning_rate": 8.582382432756743e-06, "loss": 0.8697, "step": 4124 }, { "epoch": 0.56, "learning_rate": 8.578036754670635e-06, "loss": 0.8671, "step": 4125 }, { "epoch": 0.56, "learning_rate": 8.57369135064566e-06, "loss": 0.8729, "step": 4126 }, { "epoch": 0.56, "learning_rate": 8.569346221519323e-06, "loss": 0.8991, "step": 4127 }, { "epoch": 0.56, "learning_rate": 8.565001368129077e-06, "loss": 0.8838, "step": 4128 }, { "epoch": 0.56, "learning_rate": 8.560656791312332e-06, "loss": 0.8673, "step": 4129 }, { "epoch": 0.56, "learning_rate": 8.556312491906433e-06, "loss": 0.9292, "step": 4130 }, { "epoch": 0.56, "learning_rate": 8.551968470748679e-06, "loss": 0.8922, "step": 4131 }, { "epoch": 0.56, "learning_rate": 8.547624728676307e-06, "loss": 0.8702, "step": 4132 }, { "epoch": 0.56, "learning_rate": 8.543281266526508e-06, "loss": 0.8353, "step": 4133 }, { "epoch": 0.56, "learning_rate": 8.538938085136413e-06, "loss": 0.9415, "step": 4134 }, { "epoch": 0.56, "learning_rate": 8.534595185343109e-06, "loss": 0.8413, "step": 4135 }, { "epoch": 0.56, "learning_rate": 8.530252567983615e-06, "loss": 0.8849, "step": 4136 }, { "epoch": 0.56, "learning_rate": 8.525910233894906e-06, "loss": 0.8324, "step": 4137 }, { "epoch": 0.56, "learning_rate": 8.521568183913898e-06, "loss": 0.8609, "step": 4138 }, { "epoch": 0.56, "learning_rate": 8.517226418877452e-06, "loss": 0.8794, "step": 4139 }, { "epoch": 0.56, "learning_rate": 8.512884939622377e-06, "loss": 0.8296, "step": 4140 }, { "epoch": 0.56, "learning_rate": 8.508543746985423e-06, "loss": 0.8209, "step": 4141 }, { "epoch": 0.56, "learning_rate": 8.50420284180329e-06, "loss": 0.926, "step": 4142 }, { "epoch": 0.56, "learning_rate": 8.499862224912617e-06, "loss": 0.857, "step": 4143 }, { "epoch": 0.56, "learning_rate": 8.49552189714999e-06, "loss": 0.8908, "step": 4144 }, { "epoch": 0.56, "learning_rate": 8.491181859351941e-06, "loss": 0.8496, "step": 4145 }, { "epoch": 0.56, "learning_rate": 8.48684211235494e-06, "loss": 0.8408, "step": 4146 }, { "epoch": 0.56, "learning_rate": 8.482502656995411e-06, "loss": 0.8394, "step": 4147 }, { "epoch": 0.56, "learning_rate": 8.47816349410971e-06, "loss": 0.8485, "step": 4148 }, { "epoch": 0.56, "learning_rate": 8.47382462453415e-06, "loss": 0.8512, "step": 4149 }, { "epoch": 0.56, "learning_rate": 8.469486049104972e-06, "loss": 0.9355, "step": 4150 }, { "epoch": 0.56, "learning_rate": 8.465147768658374e-06, "loss": 0.876, "step": 4151 }, { "epoch": 0.56, "learning_rate": 8.460809784030491e-06, "loss": 0.8586, "step": 4152 }, { "epoch": 0.56, "learning_rate": 8.456472096057398e-06, "loss": 0.92, "step": 4153 }, { "epoch": 0.56, "learning_rate": 8.452134705575121e-06, "loss": 0.8716, "step": 4154 }, { "epoch": 0.56, "learning_rate": 8.44779761341962e-06, "loss": 0.8784, "step": 4155 }, { "epoch": 0.56, "learning_rate": 8.443460820426806e-06, "loss": 0.8253, "step": 4156 }, { "epoch": 0.56, "learning_rate": 8.439124327432521e-06, "loss": 0.8855, "step": 4157 }, { "epoch": 0.56, "learning_rate": 8.434788135272564e-06, "loss": 0.9093, "step": 4158 }, { "epoch": 0.56, "learning_rate": 8.430452244782663e-06, "loss": 0.9059, "step": 4159 }, { "epoch": 0.56, "learning_rate": 8.426116656798495e-06, "loss": 0.8309, "step": 4160 }, { "epoch": 0.56, "learning_rate": 8.421781372155675e-06, "loss": 0.8584, "step": 4161 }, { "epoch": 0.56, "learning_rate": 8.417446391689762e-06, "loss": 0.8078, "step": 4162 }, { "epoch": 0.56, "learning_rate": 8.413111716236257e-06, "loss": 0.9192, "step": 4163 }, { "epoch": 0.56, "learning_rate": 8.408777346630597e-06, "loss": 0.9081, "step": 4164 }, { "epoch": 0.56, "learning_rate": 8.40444328370817e-06, "loss": 0.9009, "step": 4165 }, { "epoch": 0.56, "learning_rate": 8.400109528304292e-06, "loss": 0.8953, "step": 4166 }, { "epoch": 0.56, "learning_rate": 8.39577608125423e-06, "loss": 0.8744, "step": 4167 }, { "epoch": 0.56, "learning_rate": 8.391442943393188e-06, "loss": 0.9681, "step": 4168 }, { "epoch": 0.57, "learning_rate": 8.387110115556311e-06, "loss": 0.8461, "step": 4169 }, { "epoch": 0.57, "learning_rate": 8.382777598578683e-06, "loss": 0.9374, "step": 4170 }, { "epoch": 0.57, "learning_rate": 8.378445393295321e-06, "loss": 0.8609, "step": 4171 }, { "epoch": 0.57, "learning_rate": 8.374113500541202e-06, "loss": 0.8516, "step": 4172 }, { "epoch": 0.57, "learning_rate": 8.369781921151226e-06, "loss": 0.9053, "step": 4173 }, { "epoch": 0.57, "learning_rate": 8.365450655960236e-06, "loss": 0.8808, "step": 4174 }, { "epoch": 0.57, "learning_rate": 8.361119705803016e-06, "loss": 0.8689, "step": 4175 }, { "epoch": 0.57, "learning_rate": 8.356789071514288e-06, "loss": 0.919, "step": 4176 }, { "epoch": 0.57, "learning_rate": 8.352458753928716e-06, "loss": 0.9124, "step": 4177 }, { "epoch": 0.57, "learning_rate": 8.348128753880898e-06, "loss": 0.9115, "step": 4178 }, { "epoch": 0.57, "learning_rate": 8.343799072205376e-06, "loss": 0.8251, "step": 4179 }, { "epoch": 0.57, "learning_rate": 8.33946970973663e-06, "loss": 0.8885, "step": 4180 }, { "epoch": 0.57, "learning_rate": 8.33514066730907e-06, "loss": 0.9018, "step": 4181 }, { "epoch": 0.57, "learning_rate": 8.330811945757056e-06, "loss": 0.88, "step": 4182 }, { "epoch": 0.57, "learning_rate": 8.32648354591488e-06, "loss": 0.87, "step": 4183 }, { "epoch": 0.57, "learning_rate": 8.322155468616777e-06, "loss": 0.8755, "step": 4184 }, { "epoch": 0.57, "learning_rate": 8.317827714696908e-06, "loss": 0.8069, "step": 4185 }, { "epoch": 0.57, "learning_rate": 8.313500284989388e-06, "loss": 0.8344, "step": 4186 }, { "epoch": 0.57, "learning_rate": 8.309173180328255e-06, "loss": 0.8836, "step": 4187 }, { "epoch": 0.57, "learning_rate": 8.304846401547496e-06, "loss": 0.8351, "step": 4188 }, { "epoch": 0.57, "learning_rate": 8.300519949481028e-06, "loss": 0.8481, "step": 4189 }, { "epoch": 0.57, "learning_rate": 8.296193824962702e-06, "loss": 0.8613, "step": 4190 }, { "epoch": 0.57, "learning_rate": 8.291868028826317e-06, "loss": 0.9082, "step": 4191 }, { "epoch": 0.57, "learning_rate": 8.2875425619056e-06, "loss": 0.8882, "step": 4192 }, { "epoch": 0.57, "learning_rate": 8.283217425034218e-06, "loss": 0.8783, "step": 4193 }, { "epoch": 0.57, "learning_rate": 8.27889261904577e-06, "loss": 0.8863, "step": 4194 }, { "epoch": 0.57, "learning_rate": 8.2745681447738e-06, "loss": 0.9046, "step": 4195 }, { "epoch": 0.57, "learning_rate": 8.27024400305178e-06, "loss": 0.8996, "step": 4196 }, { "epoch": 0.57, "learning_rate": 8.265920194713116e-06, "loss": 0.9026, "step": 4197 }, { "epoch": 0.57, "learning_rate": 8.261596720591164e-06, "loss": 0.9181, "step": 4198 }, { "epoch": 0.57, "learning_rate": 8.257273581519193e-06, "loss": 0.9114, "step": 4199 }, { "epoch": 0.57, "learning_rate": 8.252950778330434e-06, "loss": 0.8047, "step": 4200 }, { "epoch": 0.57, "learning_rate": 8.24862831185803e-06, "loss": 0.8698, "step": 4201 }, { "epoch": 0.57, "learning_rate": 8.244306182935074e-06, "loss": 0.8718, "step": 4202 }, { "epoch": 0.57, "learning_rate": 8.239984392394584e-06, "loss": 0.958, "step": 4203 }, { "epoch": 0.57, "learning_rate": 8.235662941069523e-06, "loss": 0.8382, "step": 4204 }, { "epoch": 0.57, "learning_rate": 8.23134182979278e-06, "loss": 0.8357, "step": 4205 }, { "epoch": 0.57, "learning_rate": 8.22702105939718e-06, "loss": 0.9227, "step": 4206 }, { "epoch": 0.57, "learning_rate": 8.222700630715486e-06, "loss": 0.8631, "step": 4207 }, { "epoch": 0.57, "learning_rate": 8.218380544580388e-06, "loss": 0.9221, "step": 4208 }, { "epoch": 0.57, "learning_rate": 8.214060801824524e-06, "loss": 0.9083, "step": 4209 }, { "epoch": 0.57, "learning_rate": 8.209741403280449e-06, "loss": 0.9612, "step": 4210 }, { "epoch": 0.57, "learning_rate": 8.205422349780664e-06, "loss": 0.8523, "step": 4211 }, { "epoch": 0.57, "learning_rate": 8.201103642157597e-06, "loss": 0.8282, "step": 4212 }, { "epoch": 0.57, "learning_rate": 8.19678528124361e-06, "loss": 0.8548, "step": 4213 }, { "epoch": 0.57, "learning_rate": 8.192467267871003e-06, "loss": 0.8073, "step": 4214 }, { "epoch": 0.57, "learning_rate": 8.188149602871998e-06, "loss": 0.8904, "step": 4215 }, { "epoch": 0.57, "learning_rate": 8.183832287078763e-06, "loss": 0.8528, "step": 4216 }, { "epoch": 0.57, "learning_rate": 8.179515321323397e-06, "loss": 0.8693, "step": 4217 }, { "epoch": 0.57, "learning_rate": 8.175198706437918e-06, "loss": 0.8571, "step": 4218 }, { "epoch": 0.57, "learning_rate": 8.170882443254294e-06, "loss": 0.8121, "step": 4219 }, { "epoch": 0.57, "learning_rate": 8.166566532604411e-06, "loss": 0.9089, "step": 4220 }, { "epoch": 0.57, "learning_rate": 8.1622509753201e-06, "loss": 0.9139, "step": 4221 }, { "epoch": 0.57, "learning_rate": 8.15793577223311e-06, "loss": 0.9147, "step": 4222 }, { "epoch": 0.57, "learning_rate": 8.153620924175132e-06, "loss": 0.9434, "step": 4223 }, { "epoch": 0.57, "learning_rate": 8.149306431977785e-06, "loss": 0.9483, "step": 4224 }, { "epoch": 0.57, "learning_rate": 8.14499229647262e-06, "loss": 0.9072, "step": 4225 }, { "epoch": 0.57, "learning_rate": 8.140678518491118e-06, "loss": 0.8749, "step": 4226 }, { "epoch": 0.57, "learning_rate": 8.136365098864693e-06, "loss": 0.877, "step": 4227 }, { "epoch": 0.57, "learning_rate": 8.132052038424689e-06, "loss": 0.8697, "step": 4228 }, { "epoch": 0.57, "learning_rate": 8.12773933800238e-06, "loss": 0.8313, "step": 4229 }, { "epoch": 0.57, "learning_rate": 8.123426998428974e-06, "loss": 0.8967, "step": 4230 }, { "epoch": 0.57, "learning_rate": 8.119115020535605e-06, "loss": 0.8817, "step": 4231 }, { "epoch": 0.57, "learning_rate": 8.114803405153337e-06, "loss": 0.8546, "step": 4232 }, { "epoch": 0.57, "learning_rate": 8.11049215311317e-06, "loss": 0.8573, "step": 4233 }, { "epoch": 0.57, "learning_rate": 8.106181265246026e-06, "loss": 0.8755, "step": 4234 }, { "epoch": 0.57, "learning_rate": 8.101870742382768e-06, "loss": 0.9138, "step": 4235 }, { "epoch": 0.57, "learning_rate": 8.097560585354176e-06, "loss": 0.829, "step": 4236 }, { "epoch": 0.57, "learning_rate": 8.093250794990966e-06, "loss": 0.8838, "step": 4237 }, { "epoch": 0.57, "learning_rate": 8.088941372123781e-06, "loss": 0.8297, "step": 4238 }, { "epoch": 0.57, "learning_rate": 8.0846323175832e-06, "loss": 0.9179, "step": 4239 }, { "epoch": 0.57, "learning_rate": 8.080323632199724e-06, "loss": 0.9154, "step": 4240 }, { "epoch": 0.57, "learning_rate": 8.076015316803781e-06, "loss": 0.8638, "step": 4241 }, { "epoch": 0.57, "learning_rate": 8.071707372225734e-06, "loss": 0.9439, "step": 4242 }, { "epoch": 0.58, "learning_rate": 8.067399799295872e-06, "loss": 0.8911, "step": 4243 }, { "epoch": 0.58, "learning_rate": 8.06309259884441e-06, "loss": 0.8472, "step": 4244 }, { "epoch": 0.58, "learning_rate": 8.058785771701497e-06, "loss": 0.9364, "step": 4245 }, { "epoch": 0.58, "learning_rate": 8.054479318697203e-06, "loss": 0.8386, "step": 4246 }, { "epoch": 0.58, "learning_rate": 8.050173240661533e-06, "loss": 0.9548, "step": 4247 }, { "epoch": 0.58, "learning_rate": 8.04586753842441e-06, "loss": 0.8963, "step": 4248 }, { "epoch": 0.58, "learning_rate": 8.041562212815699e-06, "loss": 0.9146, "step": 4249 }, { "epoch": 0.58, "learning_rate": 8.037257264665174e-06, "loss": 0.8659, "step": 4250 }, { "epoch": 0.58, "learning_rate": 8.032952694802556e-06, "loss": 0.8829, "step": 4251 }, { "epoch": 0.58, "learning_rate": 8.028648504057477e-06, "loss": 0.8266, "step": 4252 }, { "epoch": 0.58, "learning_rate": 8.024344693259505e-06, "loss": 0.891, "step": 4253 }, { "epoch": 0.58, "learning_rate": 8.02004126323813e-06, "loss": 0.8949, "step": 4254 }, { "epoch": 0.58, "learning_rate": 8.015738214822774e-06, "loss": 0.889, "step": 4255 }, { "epoch": 0.58, "learning_rate": 8.011435548842782e-06, "loss": 0.8751, "step": 4256 }, { "epoch": 0.58, "learning_rate": 8.00713326612742e-06, "loss": 0.8556, "step": 4257 }, { "epoch": 0.58, "learning_rate": 8.002831367505892e-06, "loss": 0.8042, "step": 4258 }, { "epoch": 0.58, "learning_rate": 7.998529853807316e-06, "loss": 0.8739, "step": 4259 }, { "epoch": 0.58, "learning_rate": 7.994228725860744e-06, "loss": 0.9179, "step": 4260 }, { "epoch": 0.58, "learning_rate": 7.989927984495155e-06, "loss": 0.7705, "step": 4261 }, { "epoch": 0.58, "learning_rate": 7.985627630539443e-06, "loss": 0.8478, "step": 4262 }, { "epoch": 0.58, "learning_rate": 7.981327664822438e-06, "loss": 0.8271, "step": 4263 }, { "epoch": 0.58, "learning_rate": 7.977028088172889e-06, "loss": 0.9317, "step": 4264 }, { "epoch": 0.58, "learning_rate": 7.972728901419475e-06, "loss": 0.8659, "step": 4265 }, { "epoch": 0.58, "learning_rate": 7.968430105390792e-06, "loss": 0.8743, "step": 4266 }, { "epoch": 0.58, "learning_rate": 7.964131700915368e-06, "loss": 0.8301, "step": 4267 }, { "epoch": 0.58, "learning_rate": 7.959833688821655e-06, "loss": 0.8899, "step": 4268 }, { "epoch": 0.58, "learning_rate": 7.955536069938022e-06, "loss": 0.8619, "step": 4269 }, { "epoch": 0.58, "learning_rate": 7.951238845092776e-06, "loss": 0.8686, "step": 4270 }, { "epoch": 0.58, "learning_rate": 7.94694201511413e-06, "loss": 0.8574, "step": 4271 }, { "epoch": 0.58, "learning_rate": 7.94264558083024e-06, "loss": 0.8059, "step": 4272 }, { "epoch": 0.58, "learning_rate": 7.93834954306917e-06, "loss": 0.9015, "step": 4273 }, { "epoch": 0.58, "learning_rate": 7.934053902658918e-06, "loss": 0.8386, "step": 4274 }, { "epoch": 0.58, "learning_rate": 7.929758660427398e-06, "loss": 0.855, "step": 4275 }, { "epoch": 0.58, "learning_rate": 7.92546381720245e-06, "loss": 0.9196, "step": 4276 }, { "epoch": 0.58, "learning_rate": 7.921169373811843e-06, "loss": 0.9113, "step": 4277 }, { "epoch": 0.58, "learning_rate": 7.916875331083258e-06, "loss": 0.8776, "step": 4278 }, { "epoch": 0.58, "learning_rate": 7.912581689844309e-06, "loss": 0.9009, "step": 4279 }, { "epoch": 0.58, "learning_rate": 7.908288450922523e-06, "loss": 0.9214, "step": 4280 }, { "epoch": 0.58, "learning_rate": 7.903995615145361e-06, "loss": 0.8714, "step": 4281 }, { "epoch": 0.58, "learning_rate": 7.899703183340195e-06, "loss": 0.907, "step": 4282 }, { "epoch": 0.58, "learning_rate": 7.895411156334322e-06, "loss": 0.8586, "step": 4283 }, { "epoch": 0.58, "learning_rate": 7.89111953495497e-06, "loss": 0.868, "step": 4284 }, { "epoch": 0.58, "learning_rate": 7.886828320029277e-06, "loss": 0.9168, "step": 4285 }, { "epoch": 0.58, "learning_rate": 7.882537512384308e-06, "loss": 0.9084, "step": 4286 }, { "epoch": 0.58, "learning_rate": 7.878247112847049e-06, "loss": 0.8865, "step": 4287 }, { "epoch": 0.58, "learning_rate": 7.873957122244408e-06, "loss": 0.8727, "step": 4288 }, { "epoch": 0.58, "learning_rate": 7.869667541403212e-06, "loss": 0.9685, "step": 4289 }, { "epoch": 0.58, "learning_rate": 7.865378371150213e-06, "loss": 0.9069, "step": 4290 }, { "epoch": 0.58, "learning_rate": 7.86108961231208e-06, "loss": 0.8838, "step": 4291 }, { "epoch": 0.58, "learning_rate": 7.856801265715401e-06, "loss": 0.8286, "step": 4292 }, { "epoch": 0.58, "learning_rate": 7.852513332186695e-06, "loss": 0.9857, "step": 4293 }, { "epoch": 0.58, "learning_rate": 7.848225812552385e-06, "loss": 0.8745, "step": 4294 }, { "epoch": 0.58, "learning_rate": 7.843938707638831e-06, "loss": 0.8889, "step": 4295 }, { "epoch": 0.58, "learning_rate": 7.839652018272299e-06, "loss": 0.8541, "step": 4296 }, { "epoch": 0.58, "learning_rate": 7.835365745278987e-06, "loss": 0.8654, "step": 4297 }, { "epoch": 0.58, "learning_rate": 7.831079889485001e-06, "loss": 0.8416, "step": 4298 }, { "epoch": 0.58, "learning_rate": 7.826794451716379e-06, "loss": 0.7834, "step": 4299 }, { "epoch": 0.58, "learning_rate": 7.822509432799068e-06, "loss": 0.8968, "step": 4300 }, { "epoch": 0.58, "learning_rate": 7.818224833558936e-06, "loss": 0.8328, "step": 4301 }, { "epoch": 0.58, "learning_rate": 7.813940654821774e-06, "loss": 0.8197, "step": 4302 }, { "epoch": 0.58, "learning_rate": 7.809656897413295e-06, "loss": 0.9215, "step": 4303 }, { "epoch": 0.58, "learning_rate": 7.805373562159122e-06, "loss": 0.9039, "step": 4304 }, { "epoch": 0.58, "learning_rate": 7.801090649884802e-06, "loss": 0.8823, "step": 4305 }, { "epoch": 0.58, "learning_rate": 7.796808161415797e-06, "loss": 0.8416, "step": 4306 }, { "epoch": 0.58, "learning_rate": 7.792526097577494e-06, "loss": 0.8886, "step": 4307 }, { "epoch": 0.58, "learning_rate": 7.788244459195192e-06, "loss": 0.8623, "step": 4308 }, { "epoch": 0.58, "learning_rate": 7.783963247094103e-06, "loss": 0.8317, "step": 4309 }, { "epoch": 0.58, "learning_rate": 7.779682462099373e-06, "loss": 0.8195, "step": 4310 }, { "epoch": 0.58, "learning_rate": 7.77540210503605e-06, "loss": 0.8717, "step": 4311 }, { "epoch": 0.58, "learning_rate": 7.77112217672911e-06, "loss": 0.8907, "step": 4312 }, { "epoch": 0.58, "learning_rate": 7.766842678003438e-06, "loss": 0.8726, "step": 4313 }, { "epoch": 0.58, "learning_rate": 7.762563609683846e-06, "loss": 0.8576, "step": 4314 }, { "epoch": 0.58, "learning_rate": 7.758284972595049e-06, "loss": 0.8689, "step": 4315 }, { "epoch": 0.58, "learning_rate": 7.754006767561696e-06, "loss": 0.9177, "step": 4316 }, { "epoch": 0.59, "learning_rate": 7.74972899540834e-06, "loss": 0.8445, "step": 4317 }, { "epoch": 0.59, "learning_rate": 7.745451656959452e-06, "loss": 0.8935, "step": 4318 }, { "epoch": 0.59, "learning_rate": 7.741174753039426e-06, "loss": 0.8825, "step": 4319 }, { "epoch": 0.59, "learning_rate": 7.736898284472566e-06, "loss": 0.8804, "step": 4320 }, { "epoch": 0.59, "learning_rate": 7.732622252083097e-06, "loss": 0.8822, "step": 4321 }, { "epoch": 0.59, "learning_rate": 7.728346656695151e-06, "loss": 0.8795, "step": 4322 }, { "epoch": 0.59, "learning_rate": 7.72407149913279e-06, "loss": 0.8512, "step": 4323 }, { "epoch": 0.59, "learning_rate": 7.719796780219979e-06, "loss": 0.9076, "step": 4324 }, { "epoch": 0.59, "learning_rate": 7.715522500780604e-06, "loss": 0.8735, "step": 4325 }, { "epoch": 0.59, "learning_rate": 7.711248661638467e-06, "loss": 0.9031, "step": 4326 }, { "epoch": 0.59, "learning_rate": 7.70697526361728e-06, "loss": 0.8294, "step": 4327 }, { "epoch": 0.59, "learning_rate": 7.702702307540675e-06, "loss": 0.9313, "step": 4328 }, { "epoch": 0.59, "learning_rate": 7.698429794232196e-06, "loss": 0.8743, "step": 4329 }, { "epoch": 0.59, "learning_rate": 7.694157724515309e-06, "loss": 0.8637, "step": 4330 }, { "epoch": 0.59, "learning_rate": 7.68988609921338e-06, "loss": 0.8527, "step": 4331 }, { "epoch": 0.59, "learning_rate": 7.685614919149705e-06, "loss": 0.8712, "step": 4332 }, { "epoch": 0.59, "learning_rate": 7.681344185147485e-06, "loss": 0.8937, "step": 4333 }, { "epoch": 0.59, "learning_rate": 7.677073898029832e-06, "loss": 0.9265, "step": 4334 }, { "epoch": 0.59, "learning_rate": 7.672804058619784e-06, "loss": 0.867, "step": 4335 }, { "epoch": 0.59, "learning_rate": 7.668534667740281e-06, "loss": 0.9208, "step": 4336 }, { "epoch": 0.59, "learning_rate": 7.664265726214183e-06, "loss": 0.9319, "step": 4337 }, { "epoch": 0.59, "learning_rate": 7.65999723486426e-06, "loss": 0.8612, "step": 4338 }, { "epoch": 0.59, "learning_rate": 7.655729194513201e-06, "loss": 0.8699, "step": 4339 }, { "epoch": 0.59, "learning_rate": 7.651461605983599e-06, "loss": 0.9179, "step": 4340 }, { "epoch": 0.59, "learning_rate": 7.64719447009797e-06, "loss": 0.8499, "step": 4341 }, { "epoch": 0.59, "learning_rate": 7.642927787678733e-06, "loss": 0.9094, "step": 4342 }, { "epoch": 0.59, "learning_rate": 7.638661559548222e-06, "loss": 0.8627, "step": 4343 }, { "epoch": 0.59, "learning_rate": 7.634395786528695e-06, "loss": 0.8603, "step": 4344 }, { "epoch": 0.59, "learning_rate": 7.630130469442302e-06, "loss": 0.8553, "step": 4345 }, { "epoch": 0.59, "learning_rate": 7.625865609111121e-06, "loss": 0.8476, "step": 4346 }, { "epoch": 0.59, "learning_rate": 7.621601206357139e-06, "loss": 0.8802, "step": 4347 }, { "epoch": 0.59, "learning_rate": 7.61733726200225e-06, "loss": 0.83, "step": 4348 }, { "epoch": 0.59, "learning_rate": 7.613073776868266e-06, "loss": 0.8476, "step": 4349 }, { "epoch": 0.59, "learning_rate": 7.608810751776902e-06, "loss": 0.8866, "step": 4350 }, { "epoch": 0.59, "learning_rate": 7.604548187549794e-06, "loss": 0.8453, "step": 4351 }, { "epoch": 0.59, "learning_rate": 7.6002860850084815e-06, "loss": 0.9165, "step": 4352 }, { "epoch": 0.59, "learning_rate": 7.596024444974417e-06, "loss": 0.8362, "step": 4353 }, { "epoch": 0.59, "learning_rate": 7.591763268268968e-06, "loss": 0.9322, "step": 4354 }, { "epoch": 0.59, "learning_rate": 7.587502555713405e-06, "loss": 0.8802, "step": 4355 }, { "epoch": 0.59, "learning_rate": 7.5832423081289195e-06, "loss": 0.8634, "step": 4356 }, { "epoch": 0.59, "learning_rate": 7.5789825263366025e-06, "loss": 0.8906, "step": 4357 }, { "epoch": 0.59, "learning_rate": 7.574723211157464e-06, "loss": 0.8806, "step": 4358 }, { "epoch": 0.59, "learning_rate": 7.5704643634124155e-06, "loss": 0.8886, "step": 4359 }, { "epoch": 0.59, "learning_rate": 7.566205983922289e-06, "loss": 0.8381, "step": 4360 }, { "epoch": 0.59, "learning_rate": 7.561948073507818e-06, "loss": 0.8709, "step": 4361 }, { "epoch": 0.59, "learning_rate": 7.557690632989644e-06, "loss": 0.8586, "step": 4362 }, { "epoch": 0.59, "learning_rate": 7.553433663188328e-06, "loss": 0.9169, "step": 4363 }, { "epoch": 0.59, "learning_rate": 7.549177164924329e-06, "loss": 0.8435, "step": 4364 }, { "epoch": 0.59, "learning_rate": 7.544921139018027e-06, "loss": 0.8838, "step": 4365 }, { "epoch": 0.59, "learning_rate": 7.5406655862896945e-06, "loss": 0.8072, "step": 4366 }, { "epoch": 0.59, "learning_rate": 7.536410507559533e-06, "loss": 0.8764, "step": 4367 }, { "epoch": 0.59, "learning_rate": 7.5321559036476375e-06, "loss": 0.8948, "step": 4368 }, { "epoch": 0.59, "learning_rate": 7.527901775374014e-06, "loss": 0.8928, "step": 4369 }, { "epoch": 0.59, "learning_rate": 7.523648123558582e-06, "loss": 0.8549, "step": 4370 }, { "epoch": 0.59, "learning_rate": 7.519394949021166e-06, "loss": 0.8981, "step": 4371 }, { "epoch": 0.59, "learning_rate": 7.515142252581499e-06, "loss": 0.8881, "step": 4372 }, { "epoch": 0.59, "learning_rate": 7.5108900350592185e-06, "loss": 0.9131, "step": 4373 }, { "epoch": 0.59, "learning_rate": 7.506638297273877e-06, "loss": 0.9115, "step": 4374 }, { "epoch": 0.59, "learning_rate": 7.502387040044927e-06, "loss": 0.8369, "step": 4375 }, { "epoch": 0.59, "learning_rate": 7.498136264191736e-06, "loss": 0.8314, "step": 4376 }, { "epoch": 0.59, "learning_rate": 7.49388597053357e-06, "loss": 0.8316, "step": 4377 }, { "epoch": 0.59, "learning_rate": 7.489636159889607e-06, "loss": 0.8882, "step": 4378 }, { "epoch": 0.59, "learning_rate": 7.4853868330789345e-06, "loss": 0.8709, "step": 4379 }, { "epoch": 0.59, "learning_rate": 7.4811379909205395e-06, "loss": 0.8717, "step": 4380 }, { "epoch": 0.59, "learning_rate": 7.476889634233324e-06, "loss": 0.8622, "step": 4381 }, { "epoch": 0.59, "learning_rate": 7.472641763836088e-06, "loss": 0.8719, "step": 4382 }, { "epoch": 0.59, "learning_rate": 7.4683943805475465e-06, "loss": 0.8846, "step": 4383 }, { "epoch": 0.59, "learning_rate": 7.464147485186311e-06, "loss": 0.8709, "step": 4384 }, { "epoch": 0.59, "learning_rate": 7.459901078570909e-06, "loss": 0.8367, "step": 4385 }, { "epoch": 0.59, "learning_rate": 7.455655161519767e-06, "loss": 0.895, "step": 4386 }, { "epoch": 0.59, "learning_rate": 7.451409734851216e-06, "loss": 0.8351, "step": 4387 }, { "epoch": 0.59, "learning_rate": 7.4471647993835e-06, "loss": 0.8539, "step": 4388 }, { "epoch": 0.59, "learning_rate": 7.442920355934758e-06, "loss": 0.8676, "step": 4389 }, { "epoch": 0.6, "learning_rate": 7.4386764053230434e-06, "loss": 0.9141, "step": 4390 }, { "epoch": 0.6, "learning_rate": 7.434432948366315e-06, "loss": 0.9121, "step": 4391 }, { "epoch": 0.6, "learning_rate": 7.430189985882427e-06, "loss": 0.8664, "step": 4392 }, { "epoch": 0.6, "learning_rate": 7.425947518689147e-06, "loss": 0.8877, "step": 4393 }, { "epoch": 0.6, "learning_rate": 7.421705547604144e-06, "loss": 0.8525, "step": 4394 }, { "epoch": 0.6, "learning_rate": 7.417464073444989e-06, "loss": 0.8732, "step": 4395 }, { "epoch": 0.6, "learning_rate": 7.413223097029163e-06, "loss": 0.8303, "step": 4396 }, { "epoch": 0.6, "learning_rate": 7.4089826191740435e-06, "loss": 0.8634, "step": 4397 }, { "epoch": 0.6, "learning_rate": 7.40474264069692e-06, "loss": 0.7958, "step": 4398 }, { "epoch": 0.6, "learning_rate": 7.400503162414978e-06, "loss": 0.8498, "step": 4399 }, { "epoch": 0.6, "learning_rate": 7.396264185145317e-06, "loss": 0.9235, "step": 4400 }, { "epoch": 0.6, "learning_rate": 7.392025709704924e-06, "loss": 0.8962, "step": 4401 }, { "epoch": 0.6, "learning_rate": 7.38778773691071e-06, "loss": 0.8647, "step": 4402 }, { "epoch": 0.6, "learning_rate": 7.383550267579469e-06, "loss": 0.8888, "step": 4403 }, { "epoch": 0.6, "learning_rate": 7.379313302527908e-06, "loss": 0.8489, "step": 4404 }, { "epoch": 0.6, "learning_rate": 7.375076842572641e-06, "loss": 0.8184, "step": 4405 }, { "epoch": 0.6, "learning_rate": 7.370840888530173e-06, "loss": 0.9001, "step": 4406 }, { "epoch": 0.6, "learning_rate": 7.366605441216922e-06, "loss": 0.8491, "step": 4407 }, { "epoch": 0.6, "learning_rate": 7.362370501449201e-06, "loss": 0.8819, "step": 4408 }, { "epoch": 0.6, "learning_rate": 7.358136070043231e-06, "loss": 0.9172, "step": 4409 }, { "epoch": 0.6, "learning_rate": 7.353902147815128e-06, "loss": 0.7648, "step": 4410 }, { "epoch": 0.6, "learning_rate": 7.349668735580921e-06, "loss": 0.8683, "step": 4411 }, { "epoch": 0.6, "learning_rate": 7.345435834156529e-06, "loss": 0.9005, "step": 4412 }, { "epoch": 0.6, "learning_rate": 7.3412034443577786e-06, "loss": 0.7771, "step": 4413 }, { "epoch": 0.6, "learning_rate": 7.336971567000396e-06, "loss": 0.9075, "step": 4414 }, { "epoch": 0.6, "learning_rate": 7.332740202900008e-06, "loss": 0.8671, "step": 4415 }, { "epoch": 0.6, "learning_rate": 7.328509352872149e-06, "loss": 0.9011, "step": 4416 }, { "epoch": 0.6, "learning_rate": 7.324279017732241e-06, "loss": 0.8509, "step": 4417 }, { "epoch": 0.6, "learning_rate": 7.320049198295622e-06, "loss": 0.8664, "step": 4418 }, { "epoch": 0.6, "learning_rate": 7.31581989537752e-06, "loss": 0.9105, "step": 4419 }, { "epoch": 0.6, "learning_rate": 7.311591109793068e-06, "loss": 0.8519, "step": 4420 }, { "epoch": 0.6, "learning_rate": 7.3073628423573e-06, "loss": 0.8513, "step": 4421 }, { "epoch": 0.6, "learning_rate": 7.303135093885141e-06, "loss": 0.8823, "step": 4422 }, { "epoch": 0.6, "learning_rate": 7.298907865191432e-06, "loss": 0.9063, "step": 4423 }, { "epoch": 0.6, "learning_rate": 7.294681157090899e-06, "loss": 0.7958, "step": 4424 }, { "epoch": 0.6, "learning_rate": 7.290454970398177e-06, "loss": 0.815, "step": 4425 }, { "epoch": 0.6, "learning_rate": 7.286229305927796e-06, "loss": 0.8554, "step": 4426 }, { "epoch": 0.6, "learning_rate": 7.282004164494187e-06, "loss": 0.8208, "step": 4427 }, { "epoch": 0.6, "learning_rate": 7.277779546911682e-06, "loss": 0.9139, "step": 4428 }, { "epoch": 0.6, "learning_rate": 7.273555453994504e-06, "loss": 0.9161, "step": 4429 }, { "epoch": 0.6, "learning_rate": 7.269331886556786e-06, "loss": 0.8516, "step": 4430 }, { "epoch": 0.6, "learning_rate": 7.2651088454125515e-06, "loss": 0.883, "step": 4431 }, { "epoch": 0.6, "learning_rate": 7.260886331375729e-06, "loss": 0.8801, "step": 4432 }, { "epoch": 0.6, "learning_rate": 7.256664345260134e-06, "loss": 0.8485, "step": 4433 }, { "epoch": 0.6, "learning_rate": 7.252442887879496e-06, "loss": 0.8761, "step": 4434 }, { "epoch": 0.6, "learning_rate": 7.248221960047437e-06, "loss": 0.8943, "step": 4435 }, { "epoch": 0.6, "learning_rate": 7.2440015625774655e-06, "loss": 0.8756, "step": 4436 }, { "epoch": 0.6, "learning_rate": 7.239781696283003e-06, "loss": 0.9148, "step": 4437 }, { "epoch": 0.6, "learning_rate": 7.235562361977364e-06, "loss": 0.8265, "step": 4438 }, { "epoch": 0.6, "learning_rate": 7.231343560473753e-06, "loss": 0.878, "step": 4439 }, { "epoch": 0.6, "learning_rate": 7.227125292585283e-06, "loss": 0.9224, "step": 4440 }, { "epoch": 0.6, "learning_rate": 7.222907559124955e-06, "loss": 0.824, "step": 4441 }, { "epoch": 0.6, "learning_rate": 7.218690360905675e-06, "loss": 0.8815, "step": 4442 }, { "epoch": 0.6, "learning_rate": 7.21447369874024e-06, "loss": 0.9056, "step": 4443 }, { "epoch": 0.6, "learning_rate": 7.210257573441346e-06, "loss": 0.8397, "step": 4444 }, { "epoch": 0.6, "learning_rate": 7.206041985821583e-06, "loss": 0.8066, "step": 4445 }, { "epoch": 0.6, "learning_rate": 7.2018269366934435e-06, "loss": 0.8442, "step": 4446 }, { "epoch": 0.6, "learning_rate": 7.197612426869309e-06, "loss": 0.8779, "step": 4447 }, { "epoch": 0.6, "learning_rate": 7.19339845716146e-06, "loss": 0.8704, "step": 4448 }, { "epoch": 0.6, "learning_rate": 7.189185028382076e-06, "loss": 0.7994, "step": 4449 }, { "epoch": 0.6, "learning_rate": 7.184972141343225e-06, "loss": 0.908, "step": 4450 }, { "epoch": 0.6, "learning_rate": 7.18075979685688e-06, "loss": 0.836, "step": 4451 }, { "epoch": 0.6, "learning_rate": 7.1765479957349e-06, "loss": 0.924, "step": 4452 }, { "epoch": 0.6, "learning_rate": 7.172336738789048e-06, "loss": 0.8149, "step": 4453 }, { "epoch": 0.6, "learning_rate": 7.168126026830975e-06, "loss": 0.8698, "step": 4454 }, { "epoch": 0.6, "learning_rate": 7.163915860672227e-06, "loss": 0.8773, "step": 4455 }, { "epoch": 0.6, "learning_rate": 7.159706241124253e-06, "loss": 0.829, "step": 4456 }, { "epoch": 0.6, "learning_rate": 7.155497168998386e-06, "loss": 0.9548, "step": 4457 }, { "epoch": 0.6, "learning_rate": 7.151288645105866e-06, "loss": 0.9244, "step": 4458 }, { "epoch": 0.6, "learning_rate": 7.147080670257811e-06, "loss": 0.8788, "step": 4459 }, { "epoch": 0.6, "learning_rate": 7.14287324526525e-06, "loss": 0.8671, "step": 4460 }, { "epoch": 0.6, "learning_rate": 7.138666370939093e-06, "loss": 0.8137, "step": 4461 }, { "epoch": 0.6, "learning_rate": 7.134460048090153e-06, "loss": 0.927, "step": 4462 }, { "epoch": 0.6, "learning_rate": 7.1302542775291315e-06, "loss": 0.9221, "step": 4463 }, { "epoch": 0.61, "learning_rate": 7.126049060066621e-06, "loss": 0.8953, "step": 4464 }, { "epoch": 0.61, "learning_rate": 7.121844396513117e-06, "loss": 0.9271, "step": 4465 }, { "epoch": 0.61, "learning_rate": 7.117640287678997e-06, "loss": 0.8922, "step": 4466 }, { "epoch": 0.61, "learning_rate": 7.1134367343745436e-06, "loss": 0.8987, "step": 4467 }, { "epoch": 0.61, "learning_rate": 7.109233737409919e-06, "loss": 0.8381, "step": 4468 }, { "epoch": 0.61, "learning_rate": 7.1050312975951915e-06, "loss": 0.8679, "step": 4469 }, { "epoch": 0.61, "learning_rate": 7.1008294157403105e-06, "loss": 0.8759, "step": 4470 }, { "epoch": 0.61, "learning_rate": 7.096628092655126e-06, "loss": 0.8215, "step": 4471 }, { "epoch": 0.61, "learning_rate": 7.092427329149376e-06, "loss": 0.9088, "step": 4472 }, { "epoch": 0.61, "learning_rate": 7.088227126032689e-06, "loss": 0.8904, "step": 4473 }, { "epoch": 0.61, "learning_rate": 7.084027484114595e-06, "loss": 0.848, "step": 4474 }, { "epoch": 0.61, "learning_rate": 7.0798284042045005e-06, "loss": 0.9399, "step": 4475 }, { "epoch": 0.61, "learning_rate": 7.075629887111721e-06, "loss": 0.8922, "step": 4476 }, { "epoch": 0.61, "learning_rate": 7.071431933645446e-06, "loss": 0.8217, "step": 4477 }, { "epoch": 0.61, "learning_rate": 7.067234544614773e-06, "loss": 0.8426, "step": 4478 }, { "epoch": 0.61, "learning_rate": 7.0630377208286816e-06, "loss": 0.8844, "step": 4479 }, { "epoch": 0.61, "learning_rate": 7.058841463096042e-06, "loss": 0.8579, "step": 4480 }, { "epoch": 0.61, "learning_rate": 7.054645772225617e-06, "loss": 0.8694, "step": 4481 }, { "epoch": 0.61, "learning_rate": 7.05045064902606e-06, "loss": 0.8165, "step": 4482 }, { "epoch": 0.61, "learning_rate": 7.046256094305917e-06, "loss": 0.8648, "step": 4483 }, { "epoch": 0.61, "learning_rate": 7.042062108873622e-06, "loss": 0.8031, "step": 4484 }, { "epoch": 0.61, "learning_rate": 7.037868693537497e-06, "loss": 0.8972, "step": 4485 }, { "epoch": 0.61, "learning_rate": 7.033675849105763e-06, "loss": 0.8876, "step": 4486 }, { "epoch": 0.61, "learning_rate": 7.029483576386519e-06, "loss": 0.8453, "step": 4487 }, { "epoch": 0.61, "learning_rate": 7.025291876187765e-06, "loss": 0.9035, "step": 4488 }, { "epoch": 0.61, "learning_rate": 7.021100749317382e-06, "loss": 0.8563, "step": 4489 }, { "epoch": 0.61, "learning_rate": 7.016910196583145e-06, "loss": 0.8394, "step": 4490 }, { "epoch": 0.61, "learning_rate": 7.012720218792719e-06, "loss": 0.8316, "step": 4491 }, { "epoch": 0.61, "learning_rate": 7.008530816753652e-06, "loss": 0.8546, "step": 4492 }, { "epoch": 0.61, "learning_rate": 7.004341991273391e-06, "loss": 0.8323, "step": 4493 }, { "epoch": 0.61, "learning_rate": 7.000153743159263e-06, "loss": 0.879, "step": 4494 }, { "epoch": 0.61, "learning_rate": 6.99596607321849e-06, "loss": 0.8796, "step": 4495 }, { "epoch": 0.61, "learning_rate": 6.991778982258176e-06, "loss": 0.8511, "step": 4496 }, { "epoch": 0.61, "learning_rate": 6.987592471085322e-06, "loss": 0.7962, "step": 4497 }, { "epoch": 0.61, "learning_rate": 6.983406540506809e-06, "loss": 0.8361, "step": 4498 }, { "epoch": 0.61, "learning_rate": 6.979221191329408e-06, "loss": 0.8454, "step": 4499 }, { "epoch": 0.61, "learning_rate": 6.975036424359783e-06, "loss": 0.9253, "step": 4500 }, { "epoch": 0.61, "learning_rate": 6.970852240404479e-06, "loss": 0.8221, "step": 4501 }, { "epoch": 0.61, "learning_rate": 6.966668640269938e-06, "loss": 0.856, "step": 4502 }, { "epoch": 0.61, "learning_rate": 6.962485624762475e-06, "loss": 0.8966, "step": 4503 }, { "epoch": 0.61, "learning_rate": 6.958303194688307e-06, "loss": 0.7687, "step": 4504 }, { "epoch": 0.61, "learning_rate": 6.954121350853529e-06, "loss": 0.8631, "step": 4505 }, { "epoch": 0.61, "learning_rate": 6.949940094064127e-06, "loss": 0.9342, "step": 4506 }, { "epoch": 0.61, "learning_rate": 6.9457594251259734e-06, "loss": 0.8861, "step": 4507 }, { "epoch": 0.61, "learning_rate": 6.941579344844822e-06, "loss": 0.833, "step": 4508 }, { "epoch": 0.61, "learning_rate": 6.937399854026325e-06, "loss": 0.7936, "step": 4509 }, { "epoch": 0.61, "learning_rate": 6.933220953476007e-06, "loss": 0.8387, "step": 4510 }, { "epoch": 0.61, "learning_rate": 6.929042643999291e-06, "loss": 0.9401, "step": 4511 }, { "epoch": 0.61, "learning_rate": 6.924864926401475e-06, "loss": 0.8793, "step": 4512 }, { "epoch": 0.61, "learning_rate": 6.920687801487755e-06, "loss": 0.8758, "step": 4513 }, { "epoch": 0.61, "learning_rate": 6.916511270063204e-06, "loss": 0.906, "step": 4514 }, { "epoch": 0.61, "learning_rate": 6.9123353329327795e-06, "loss": 0.8602, "step": 4515 }, { "epoch": 0.61, "learning_rate": 6.908159990901333e-06, "loss": 0.8504, "step": 4516 }, { "epoch": 0.61, "learning_rate": 6.90398524477359e-06, "loss": 0.8778, "step": 4517 }, { "epoch": 0.61, "learning_rate": 6.8998110953541755e-06, "loss": 0.8086, "step": 4518 }, { "epoch": 0.61, "learning_rate": 6.895637543447584e-06, "loss": 0.8513, "step": 4519 }, { "epoch": 0.61, "learning_rate": 6.891464589858203e-06, "loss": 0.8235, "step": 4520 }, { "epoch": 0.61, "learning_rate": 6.887292235390312e-06, "loss": 0.9382, "step": 4521 }, { "epoch": 0.61, "learning_rate": 6.883120480848058e-06, "loss": 0.7981, "step": 4522 }, { "epoch": 0.61, "learning_rate": 6.878949327035487e-06, "loss": 0.8756, "step": 4523 }, { "epoch": 0.61, "learning_rate": 6.874778774756521e-06, "loss": 0.901, "step": 4524 }, { "epoch": 0.61, "learning_rate": 6.870608824814966e-06, "loss": 0.866, "step": 4525 }, { "epoch": 0.61, "learning_rate": 6.866439478014519e-06, "loss": 0.8385, "step": 4526 }, { "epoch": 0.61, "learning_rate": 6.862270735158754e-06, "loss": 0.8442, "step": 4527 }, { "epoch": 0.61, "learning_rate": 6.858102597051132e-06, "loss": 0.7576, "step": 4528 }, { "epoch": 0.61, "learning_rate": 6.853935064494993e-06, "loss": 0.8754, "step": 4529 }, { "epoch": 0.61, "learning_rate": 6.849768138293569e-06, "loss": 0.8908, "step": 4530 }, { "epoch": 0.61, "learning_rate": 6.8456018192499654e-06, "loss": 0.9668, "step": 4531 }, { "epoch": 0.61, "learning_rate": 6.8414361081671776e-06, "loss": 0.856, "step": 4532 }, { "epoch": 0.61, "learning_rate": 6.837271005848081e-06, "loss": 0.9158, "step": 4533 }, { "epoch": 0.61, "learning_rate": 6.8331065130954285e-06, "loss": 0.8747, "step": 4534 }, { "epoch": 0.61, "learning_rate": 6.828942630711869e-06, "loss": 0.8163, "step": 4535 }, { "epoch": 0.61, "learning_rate": 6.824779359499918e-06, "loss": 0.8873, "step": 4536 }, { "epoch": 0.61, "learning_rate": 6.8206167002619885e-06, "loss": 0.909, "step": 4537 }, { "epoch": 0.62, "learning_rate": 6.816454653800359e-06, "loss": 0.8814, "step": 4538 }, { "epoch": 0.62, "learning_rate": 6.8122932209172075e-06, "loss": 0.908, "step": 4539 }, { "epoch": 0.62, "learning_rate": 6.80813240241458e-06, "loss": 0.8577, "step": 4540 }, { "epoch": 0.62, "learning_rate": 6.803972199094409e-06, "loss": 0.9045, "step": 4541 }, { "epoch": 0.62, "learning_rate": 6.799812611758511e-06, "loss": 0.8381, "step": 4542 }, { "epoch": 0.62, "learning_rate": 6.7956536412085775e-06, "loss": 0.8858, "step": 4543 }, { "epoch": 0.62, "learning_rate": 6.791495288246188e-06, "loss": 0.8542, "step": 4544 }, { "epoch": 0.62, "learning_rate": 6.787337553672798e-06, "loss": 0.9282, "step": 4545 }, { "epoch": 0.62, "learning_rate": 6.783180438289749e-06, "loss": 0.9062, "step": 4546 }, { "epoch": 0.62, "learning_rate": 6.779023942898255e-06, "loss": 0.893, "step": 4547 }, { "epoch": 0.62, "learning_rate": 6.774868068299421e-06, "loss": 0.839, "step": 4548 }, { "epoch": 0.62, "learning_rate": 6.770712815294223e-06, "loss": 0.8892, "step": 4549 }, { "epoch": 0.62, "learning_rate": 6.766558184683518e-06, "loss": 0.8868, "step": 4550 }, { "epoch": 0.62, "learning_rate": 6.762404177268053e-06, "loss": 0.7809, "step": 4551 }, { "epoch": 0.62, "learning_rate": 6.7582507938484406e-06, "loss": 0.8508, "step": 4552 }, { "epoch": 0.62, "learning_rate": 6.754098035225187e-06, "loss": 0.8286, "step": 4553 }, { "epoch": 0.62, "learning_rate": 6.749945902198667e-06, "loss": 0.8688, "step": 4554 }, { "epoch": 0.62, "learning_rate": 6.745794395569142e-06, "loss": 0.8069, "step": 4555 }, { "epoch": 0.62, "learning_rate": 6.741643516136746e-06, "loss": 0.927, "step": 4556 }, { "epoch": 0.62, "learning_rate": 6.7374932647015e-06, "loss": 0.8069, "step": 4557 }, { "epoch": 0.62, "learning_rate": 6.733343642063299e-06, "loss": 0.7927, "step": 4558 }, { "epoch": 0.62, "learning_rate": 6.729194649021915e-06, "loss": 0.8099, "step": 4559 }, { "epoch": 0.62, "learning_rate": 6.725046286377004e-06, "loss": 0.8238, "step": 4560 }, { "epoch": 0.62, "learning_rate": 6.720898554928097e-06, "loss": 0.8771, "step": 4561 }, { "epoch": 0.62, "learning_rate": 6.716751455474606e-06, "loss": 0.8791, "step": 4562 }, { "epoch": 0.62, "learning_rate": 6.712604988815815e-06, "loss": 0.8577, "step": 4563 }, { "epoch": 0.62, "learning_rate": 6.708459155750892e-06, "loss": 0.8101, "step": 4564 }, { "epoch": 0.62, "learning_rate": 6.704313957078886e-06, "loss": 0.8428, "step": 4565 }, { "epoch": 0.62, "learning_rate": 6.700169393598714e-06, "loss": 0.866, "step": 4566 }, { "epoch": 0.62, "learning_rate": 6.696025466109181e-06, "loss": 0.8446, "step": 4567 }, { "epoch": 0.62, "learning_rate": 6.691882175408959e-06, "loss": 0.8587, "step": 4568 }, { "epoch": 0.62, "learning_rate": 6.6877395222966025e-06, "loss": 0.8964, "step": 4569 }, { "epoch": 0.62, "learning_rate": 6.683597507570545e-06, "loss": 0.841, "step": 4570 }, { "epoch": 0.62, "learning_rate": 6.679456132029094e-06, "loss": 0.8393, "step": 4571 }, { "epoch": 0.62, "learning_rate": 6.675315396470437e-06, "loss": 0.8965, "step": 4572 }, { "epoch": 0.62, "learning_rate": 6.6711753016926305e-06, "loss": 0.8399, "step": 4573 }, { "epoch": 0.62, "learning_rate": 6.667035848493619e-06, "loss": 0.849, "step": 4574 }, { "epoch": 0.62, "learning_rate": 6.662897037671215e-06, "loss": 0.9431, "step": 4575 }, { "epoch": 0.62, "learning_rate": 6.658758870023105e-06, "loss": 0.8874, "step": 4576 }, { "epoch": 0.62, "learning_rate": 6.654621346346864e-06, "loss": 0.837, "step": 4577 }, { "epoch": 0.62, "learning_rate": 6.650484467439928e-06, "loss": 0.9072, "step": 4578 }, { "epoch": 0.62, "learning_rate": 6.646348234099621e-06, "loss": 0.8241, "step": 4579 }, { "epoch": 0.62, "learning_rate": 6.642212647123132e-06, "loss": 0.9448, "step": 4580 }, { "epoch": 0.62, "learning_rate": 6.638077707307535e-06, "loss": 0.8418, "step": 4581 }, { "epoch": 0.62, "learning_rate": 6.633943415449771e-06, "loss": 0.8965, "step": 4582 }, { "epoch": 0.62, "learning_rate": 6.6298097723466625e-06, "loss": 0.749, "step": 4583 }, { "epoch": 0.62, "learning_rate": 6.625676778794905e-06, "loss": 0.8583, "step": 4584 }, { "epoch": 0.62, "learning_rate": 6.621544435591065e-06, "loss": 0.9216, "step": 4585 }, { "epoch": 0.62, "learning_rate": 6.617412743531592e-06, "loss": 0.8387, "step": 4586 }, { "epoch": 0.62, "learning_rate": 6.613281703412798e-06, "loss": 0.9307, "step": 4587 }, { "epoch": 0.62, "learning_rate": 6.609151316030883e-06, "loss": 0.8947, "step": 4588 }, { "epoch": 0.62, "learning_rate": 6.60502158218191e-06, "loss": 0.82, "step": 4589 }, { "epoch": 0.62, "learning_rate": 6.600892502661822e-06, "loss": 0.8918, "step": 4590 }, { "epoch": 0.62, "learning_rate": 6.596764078266433e-06, "loss": 0.899, "step": 4591 }, { "epoch": 0.62, "learning_rate": 6.592636309791437e-06, "loss": 0.8899, "step": 4592 }, { "epoch": 0.62, "learning_rate": 6.5885091980323925e-06, "loss": 0.874, "step": 4593 }, { "epoch": 0.62, "learning_rate": 6.584382743784734e-06, "loss": 0.8199, "step": 4594 }, { "epoch": 0.62, "learning_rate": 6.580256947843775e-06, "loss": 0.8664, "step": 4595 }, { "epoch": 0.62, "learning_rate": 6.576131811004693e-06, "loss": 0.8951, "step": 4596 }, { "epoch": 0.62, "learning_rate": 6.5720073340625505e-06, "loss": 0.8207, "step": 4597 }, { "epoch": 0.62, "learning_rate": 6.567883517812268e-06, "loss": 0.8716, "step": 4598 }, { "epoch": 0.62, "learning_rate": 6.5637603630486545e-06, "loss": 0.8716, "step": 4599 }, { "epoch": 0.62, "learning_rate": 6.559637870566378e-06, "loss": 0.9202, "step": 4600 }, { "epoch": 0.62, "learning_rate": 6.555516041159984e-06, "loss": 0.9042, "step": 4601 }, { "epoch": 0.62, "learning_rate": 6.551394875623893e-06, "loss": 0.9137, "step": 4602 }, { "epoch": 0.62, "learning_rate": 6.547274374752395e-06, "loss": 0.9352, "step": 4603 }, { "epoch": 0.62, "learning_rate": 6.5431545393396516e-06, "loss": 0.8514, "step": 4604 }, { "epoch": 0.62, "learning_rate": 6.5390353701796936e-06, "loss": 0.8698, "step": 4605 }, { "epoch": 0.62, "learning_rate": 6.534916868066431e-06, "loss": 0.8633, "step": 4606 }, { "epoch": 0.62, "learning_rate": 6.530799033793636e-06, "loss": 0.8579, "step": 4607 }, { "epoch": 0.62, "learning_rate": 6.526681868154958e-06, "loss": 0.8568, "step": 4608 }, { "epoch": 0.62, "learning_rate": 6.522565371943921e-06, "loss": 0.8457, "step": 4609 }, { "epoch": 0.62, "learning_rate": 6.518449545953911e-06, "loss": 0.8974, "step": 4610 }, { "epoch": 0.62, "learning_rate": 6.514334390978188e-06, "loss": 0.8357, "step": 4611 }, { "epoch": 0.63, "learning_rate": 6.510219907809885e-06, "loss": 0.9024, "step": 4612 }, { "epoch": 0.63, "learning_rate": 6.506106097242003e-06, "loss": 0.8819, "step": 4613 }, { "epoch": 0.63, "learning_rate": 6.501992960067418e-06, "loss": 0.9138, "step": 4614 }, { "epoch": 0.63, "learning_rate": 6.497880497078868e-06, "loss": 0.8323, "step": 4615 }, { "epoch": 0.63, "learning_rate": 6.493768709068969e-06, "loss": 0.8186, "step": 4616 }, { "epoch": 0.63, "learning_rate": 6.489657596830201e-06, "loss": 0.8847, "step": 4617 }, { "epoch": 0.63, "learning_rate": 6.485547161154922e-06, "loss": 0.826, "step": 4618 }, { "epoch": 0.63, "learning_rate": 6.481437402835349e-06, "loss": 0.8761, "step": 4619 }, { "epoch": 0.63, "learning_rate": 6.477328322663572e-06, "loss": 0.847, "step": 4620 }, { "epoch": 0.63, "learning_rate": 6.473219921431557e-06, "loss": 0.9006, "step": 4621 }, { "epoch": 0.63, "learning_rate": 6.469112199931131e-06, "loss": 0.8673, "step": 4622 }, { "epoch": 0.63, "learning_rate": 6.465005158953994e-06, "loss": 0.9043, "step": 4623 }, { "epoch": 0.63, "learning_rate": 6.460898799291711e-06, "loss": 0.8289, "step": 4624 }, { "epoch": 0.63, "learning_rate": 6.456793121735724e-06, "loss": 0.8629, "step": 4625 }, { "epoch": 0.63, "learning_rate": 6.452688127077333e-06, "loss": 0.8579, "step": 4626 }, { "epoch": 0.63, "learning_rate": 6.448583816107713e-06, "loss": 0.8704, "step": 4627 }, { "epoch": 0.63, "learning_rate": 6.444480189617908e-06, "loss": 0.8495, "step": 4628 }, { "epoch": 0.63, "learning_rate": 6.440377248398821e-06, "loss": 0.8461, "step": 4629 }, { "epoch": 0.63, "learning_rate": 6.436274993241238e-06, "loss": 0.8893, "step": 4630 }, { "epoch": 0.63, "learning_rate": 6.432173424935797e-06, "loss": 0.8994, "step": 4631 }, { "epoch": 0.63, "learning_rate": 6.428072544273019e-06, "loss": 0.9168, "step": 4632 }, { "epoch": 0.63, "learning_rate": 6.423972352043275e-06, "loss": 0.7924, "step": 4633 }, { "epoch": 0.63, "learning_rate": 6.419872849036821e-06, "loss": 0.8818, "step": 4634 }, { "epoch": 0.63, "learning_rate": 6.4157740360437695e-06, "loss": 0.8471, "step": 4635 }, { "epoch": 0.63, "learning_rate": 6.411675913854098e-06, "loss": 0.8846, "step": 4636 }, { "epoch": 0.63, "learning_rate": 6.407578483257662e-06, "loss": 0.8736, "step": 4637 }, { "epoch": 0.63, "learning_rate": 6.403481745044171e-06, "loss": 0.9007, "step": 4638 }, { "epoch": 0.63, "learning_rate": 6.399385700003213e-06, "loss": 0.8974, "step": 4639 }, { "epoch": 0.63, "learning_rate": 6.395290348924232e-06, "loss": 0.8659, "step": 4640 }, { "epoch": 0.63, "learning_rate": 6.391195692596546e-06, "loss": 0.8762, "step": 4641 }, { "epoch": 0.63, "learning_rate": 6.387101731809332e-06, "loss": 0.8524, "step": 4642 }, { "epoch": 0.63, "learning_rate": 6.3830084673516415e-06, "loss": 0.8984, "step": 4643 }, { "epoch": 0.63, "learning_rate": 6.378915900012383e-06, "loss": 0.8865, "step": 4644 }, { "epoch": 0.63, "learning_rate": 6.374824030580336e-06, "loss": 0.8347, "step": 4645 }, { "epoch": 0.63, "learning_rate": 6.370732859844145e-06, "loss": 0.8968, "step": 4646 }, { "epoch": 0.63, "learning_rate": 6.366642388592317e-06, "loss": 0.8135, "step": 4647 }, { "epoch": 0.63, "learning_rate": 6.36255261761323e-06, "loss": 0.8917, "step": 4648 }, { "epoch": 0.63, "learning_rate": 6.3584635476951195e-06, "loss": 0.8574, "step": 4649 }, { "epoch": 0.63, "learning_rate": 6.354375179626092e-06, "loss": 0.8465, "step": 4650 }, { "epoch": 0.63, "learning_rate": 6.350287514194112e-06, "loss": 0.8095, "step": 4651 }, { "epoch": 0.63, "learning_rate": 6.346200552187019e-06, "loss": 0.8312, "step": 4652 }, { "epoch": 0.63, "learning_rate": 6.342114294392509e-06, "loss": 0.9097, "step": 4653 }, { "epoch": 0.63, "learning_rate": 6.338028741598144e-06, "loss": 0.8312, "step": 4654 }, { "epoch": 0.63, "learning_rate": 6.333943894591349e-06, "loss": 0.9381, "step": 4655 }, { "epoch": 0.63, "learning_rate": 6.3298597541594155e-06, "loss": 0.8861, "step": 4656 }, { "epoch": 0.63, "learning_rate": 6.325776321089496e-06, "loss": 0.886, "step": 4657 }, { "epoch": 0.63, "learning_rate": 6.321693596168611e-06, "loss": 0.8178, "step": 4658 }, { "epoch": 0.63, "learning_rate": 6.317611580183638e-06, "loss": 0.8096, "step": 4659 }, { "epoch": 0.63, "learning_rate": 6.313530273921325e-06, "loss": 0.8773, "step": 4660 }, { "epoch": 0.63, "learning_rate": 6.30944967816828e-06, "loss": 0.8641, "step": 4661 }, { "epoch": 0.63, "learning_rate": 6.30536979371097e-06, "loss": 0.8115, "step": 4662 }, { "epoch": 0.63, "learning_rate": 6.3012906213357316e-06, "loss": 0.8233, "step": 4663 }, { "epoch": 0.63, "learning_rate": 6.297212161828761e-06, "loss": 0.825, "step": 4664 }, { "epoch": 0.63, "learning_rate": 6.2931344159761165e-06, "loss": 0.8861, "step": 4665 }, { "epoch": 0.63, "learning_rate": 6.289057384563721e-06, "loss": 0.7974, "step": 4666 }, { "epoch": 0.63, "learning_rate": 6.284981068377359e-06, "loss": 0.898, "step": 4667 }, { "epoch": 0.63, "learning_rate": 6.280905468202674e-06, "loss": 0.8817, "step": 4668 }, { "epoch": 0.63, "learning_rate": 6.276830584825175e-06, "loss": 0.7866, "step": 4669 }, { "epoch": 0.63, "learning_rate": 6.272756419030235e-06, "loss": 0.869, "step": 4670 }, { "epoch": 0.63, "learning_rate": 6.268682971603081e-06, "loss": 0.9106, "step": 4671 }, { "epoch": 0.63, "learning_rate": 6.264610243328808e-06, "loss": 0.8655, "step": 4672 }, { "epoch": 0.63, "learning_rate": 6.26053823499237e-06, "loss": 0.935, "step": 4673 }, { "epoch": 0.63, "learning_rate": 6.256466947378586e-06, "loss": 0.8493, "step": 4674 }, { "epoch": 0.63, "learning_rate": 6.252396381272129e-06, "loss": 0.8791, "step": 4675 }, { "epoch": 0.63, "learning_rate": 6.248326537457538e-06, "loss": 0.8266, "step": 4676 }, { "epoch": 0.63, "learning_rate": 6.2442574167192125e-06, "loss": 0.7654, "step": 4677 }, { "epoch": 0.63, "learning_rate": 6.240189019841411e-06, "loss": 0.9173, "step": 4678 }, { "epoch": 0.63, "learning_rate": 6.2361213476082534e-06, "loss": 0.8309, "step": 4679 }, { "epoch": 0.63, "learning_rate": 6.232054400803719e-06, "loss": 0.8896, "step": 4680 }, { "epoch": 0.63, "learning_rate": 6.22798818021165e-06, "loss": 0.9076, "step": 4681 }, { "epoch": 0.63, "learning_rate": 6.223922686615743e-06, "loss": 0.8767, "step": 4682 }, { "epoch": 0.63, "learning_rate": 6.219857920799564e-06, "loss": 0.8575, "step": 4683 }, { "epoch": 0.63, "learning_rate": 6.215793883546526e-06, "loss": 0.8729, "step": 4684 }, { "epoch": 0.63, "learning_rate": 6.211730575639914e-06, "loss": 0.868, "step": 4685 }, { "epoch": 0.64, "learning_rate": 6.207667997862866e-06, "loss": 0.8738, "step": 4686 }, { "epoch": 0.64, "learning_rate": 6.203606150998377e-06, "loss": 0.9031, "step": 4687 }, { "epoch": 0.64, "learning_rate": 6.1995450358293085e-06, "loss": 0.9459, "step": 4688 }, { "epoch": 0.64, "learning_rate": 6.195484653138372e-06, "loss": 0.8462, "step": 4689 }, { "epoch": 0.64, "learning_rate": 6.1914250037081465e-06, "loss": 0.8855, "step": 4690 }, { "epoch": 0.64, "learning_rate": 6.187366088321065e-06, "loss": 0.8667, "step": 4691 }, { "epoch": 0.64, "learning_rate": 6.1833079077594215e-06, "loss": 0.847, "step": 4692 }, { "epoch": 0.64, "learning_rate": 6.179250462805362e-06, "loss": 0.8243, "step": 4693 }, { "epoch": 0.64, "learning_rate": 6.175193754240899e-06, "loss": 0.884, "step": 4694 }, { "epoch": 0.64, "learning_rate": 6.171137782847895e-06, "loss": 0.9502, "step": 4695 }, { "epoch": 0.64, "learning_rate": 6.1670825494080834e-06, "loss": 0.8451, "step": 4696 }, { "epoch": 0.64, "learning_rate": 6.163028054703041e-06, "loss": 0.8336, "step": 4697 }, { "epoch": 0.64, "learning_rate": 6.15897429951421e-06, "loss": 0.8239, "step": 4698 }, { "epoch": 0.64, "learning_rate": 6.154921284622886e-06, "loss": 0.8736, "step": 4699 }, { "epoch": 0.64, "learning_rate": 6.150869010810227e-06, "loss": 0.8201, "step": 4700 }, { "epoch": 0.64, "learning_rate": 6.146817478857241e-06, "loss": 0.8648, "step": 4701 }, { "epoch": 0.64, "learning_rate": 6.142766689544804e-06, "loss": 0.8334, "step": 4702 }, { "epoch": 0.64, "learning_rate": 6.138716643653634e-06, "loss": 0.8276, "step": 4703 }, { "epoch": 0.64, "learning_rate": 6.134667341964321e-06, "loss": 0.8477, "step": 4704 }, { "epoch": 0.64, "learning_rate": 6.130618785257302e-06, "loss": 0.8873, "step": 4705 }, { "epoch": 0.64, "learning_rate": 6.12657097431287e-06, "loss": 0.8293, "step": 4706 }, { "epoch": 0.64, "learning_rate": 6.122523909911182e-06, "loss": 0.8661, "step": 4707 }, { "epoch": 0.64, "learning_rate": 6.11847759283224e-06, "loss": 0.888, "step": 4708 }, { "epoch": 0.64, "learning_rate": 6.114432023855916e-06, "loss": 0.8578, "step": 4709 }, { "epoch": 0.64, "learning_rate": 6.1103872037619225e-06, "loss": 0.8994, "step": 4710 }, { "epoch": 0.64, "learning_rate": 6.106343133329841e-06, "loss": 0.8517, "step": 4711 }, { "epoch": 0.64, "learning_rate": 6.102299813339101e-06, "loss": 0.9191, "step": 4712 }, { "epoch": 0.64, "learning_rate": 6.098257244568986e-06, "loss": 0.8614, "step": 4713 }, { "epoch": 0.64, "learning_rate": 6.094215427798643e-06, "loss": 0.9239, "step": 4714 }, { "epoch": 0.64, "learning_rate": 6.090174363807063e-06, "loss": 0.8529, "step": 4715 }, { "epoch": 0.64, "learning_rate": 6.086134053373103e-06, "loss": 0.9356, "step": 4716 }, { "epoch": 0.64, "learning_rate": 6.082094497275466e-06, "loss": 0.8557, "step": 4717 }, { "epoch": 0.64, "learning_rate": 6.078055696292715e-06, "loss": 0.8808, "step": 4718 }, { "epoch": 0.64, "learning_rate": 6.074017651203265e-06, "loss": 0.8541, "step": 4719 }, { "epoch": 0.64, "learning_rate": 6.069980362785386e-06, "loss": 0.8111, "step": 4720 }, { "epoch": 0.64, "learning_rate": 6.065943831817202e-06, "loss": 0.8115, "step": 4721 }, { "epoch": 0.64, "learning_rate": 6.061908059076691e-06, "loss": 0.9029, "step": 4722 }, { "epoch": 0.64, "learning_rate": 6.057873045341686e-06, "loss": 0.8732, "step": 4723 }, { "epoch": 0.64, "learning_rate": 6.05383879138987e-06, "loss": 0.8695, "step": 4724 }, { "epoch": 0.64, "learning_rate": 6.049805297998785e-06, "loss": 0.7774, "step": 4725 }, { "epoch": 0.64, "learning_rate": 6.04577256594582e-06, "loss": 0.8073, "step": 4726 }, { "epoch": 0.64, "learning_rate": 6.041740596008228e-06, "loss": 0.8984, "step": 4727 }, { "epoch": 0.64, "learning_rate": 6.0377093889631e-06, "loss": 0.8495, "step": 4728 }, { "epoch": 0.64, "learning_rate": 6.033678945587393e-06, "loss": 0.8851, "step": 4729 }, { "epoch": 0.64, "learning_rate": 6.029649266657911e-06, "loss": 0.866, "step": 4730 }, { "epoch": 0.64, "learning_rate": 6.025620352951308e-06, "loss": 0.8232, "step": 4731 }, { "epoch": 0.64, "learning_rate": 6.0215922052441e-06, "loss": 0.8167, "step": 4732 }, { "epoch": 0.64, "learning_rate": 6.0175648243126425e-06, "loss": 0.811, "step": 4733 }, { "epoch": 0.64, "learning_rate": 6.013538210933156e-06, "loss": 0.9556, "step": 4734 }, { "epoch": 0.64, "learning_rate": 6.009512365881703e-06, "loss": 0.8615, "step": 4735 }, { "epoch": 0.64, "learning_rate": 6.0054872899342065e-06, "loss": 0.8465, "step": 4736 }, { "epoch": 0.64, "learning_rate": 6.001462983866433e-06, "loss": 0.8664, "step": 4737 }, { "epoch": 0.64, "learning_rate": 5.997439448454004e-06, "loss": 0.8205, "step": 4738 }, { "epoch": 0.64, "learning_rate": 5.993416684472393e-06, "loss": 0.8931, "step": 4739 }, { "epoch": 0.64, "learning_rate": 5.989394692696928e-06, "loss": 0.8898, "step": 4740 }, { "epoch": 0.64, "learning_rate": 5.985373473902784e-06, "loss": 0.9618, "step": 4741 }, { "epoch": 0.64, "learning_rate": 5.981353028864987e-06, "loss": 0.7998, "step": 4742 }, { "epoch": 0.64, "learning_rate": 5.977333358358412e-06, "loss": 0.9336, "step": 4743 }, { "epoch": 0.64, "learning_rate": 5.9733144631577935e-06, "loss": 0.8778, "step": 4744 }, { "epoch": 0.64, "learning_rate": 5.969296344037705e-06, "loss": 0.9261, "step": 4745 }, { "epoch": 0.64, "learning_rate": 5.96527900177258e-06, "loss": 0.8578, "step": 4746 }, { "epoch": 0.64, "learning_rate": 5.961262437136697e-06, "loss": 0.8074, "step": 4747 }, { "epoch": 0.64, "learning_rate": 5.957246650904183e-06, "loss": 0.8302, "step": 4748 }, { "epoch": 0.64, "learning_rate": 5.953231643849022e-06, "loss": 0.7942, "step": 4749 }, { "epoch": 0.64, "learning_rate": 5.949217416745041e-06, "loss": 0.8641, "step": 4750 }, { "epoch": 0.64, "learning_rate": 5.945203970365922e-06, "loss": 0.8483, "step": 4751 }, { "epoch": 0.64, "learning_rate": 5.941191305485189e-06, "loss": 0.8649, "step": 4752 }, { "epoch": 0.64, "learning_rate": 5.937179422876226e-06, "loss": 0.8677, "step": 4753 }, { "epoch": 0.64, "learning_rate": 5.933168323312256e-06, "loss": 0.9179, "step": 4754 }, { "epoch": 0.64, "learning_rate": 5.92915800756636e-06, "loss": 0.8537, "step": 4755 }, { "epoch": 0.64, "learning_rate": 5.92514847641146e-06, "loss": 0.8553, "step": 4756 }, { "epoch": 0.64, "learning_rate": 5.921139730620331e-06, "loss": 0.806, "step": 4757 }, { "epoch": 0.64, "learning_rate": 5.917131770965596e-06, "loss": 0.9471, "step": 4758 }, { "epoch": 0.65, "learning_rate": 5.913124598219726e-06, "loss": 0.8422, "step": 4759 }, { "epoch": 0.65, "learning_rate": 5.909118213155044e-06, "loss": 0.9093, "step": 4760 }, { "epoch": 0.65, "learning_rate": 5.9051126165437134e-06, "loss": 0.8325, "step": 4761 }, { "epoch": 0.65, "learning_rate": 5.901107809157753e-06, "loss": 0.8705, "step": 4762 }, { "epoch": 0.65, "learning_rate": 5.897103791769024e-06, "loss": 0.881, "step": 4763 }, { "epoch": 0.65, "learning_rate": 5.893100565149243e-06, "loss": 0.8381, "step": 4764 }, { "epoch": 0.65, "learning_rate": 5.889098130069965e-06, "loss": 0.8384, "step": 4765 }, { "epoch": 0.65, "learning_rate": 5.885096487302595e-06, "loss": 0.8991, "step": 4766 }, { "epoch": 0.65, "learning_rate": 5.881095637618392e-06, "loss": 0.8909, "step": 4767 }, { "epoch": 0.65, "learning_rate": 5.877095581788454e-06, "loss": 0.8785, "step": 4768 }, { "epoch": 0.65, "learning_rate": 5.87309632058373e-06, "loss": 0.8622, "step": 4769 }, { "epoch": 0.65, "learning_rate": 5.8690978547750134e-06, "loss": 0.875, "step": 4770 }, { "epoch": 0.65, "learning_rate": 5.865100185132948e-06, "loss": 0.9491, "step": 4771 }, { "epoch": 0.65, "learning_rate": 5.8611033124280225e-06, "loss": 0.8575, "step": 4772 }, { "epoch": 0.65, "learning_rate": 5.857107237430567e-06, "loss": 0.9037, "step": 4773 }, { "epoch": 0.65, "learning_rate": 5.853111960910768e-06, "loss": 0.8684, "step": 4774 }, { "epoch": 0.65, "learning_rate": 5.849117483638648e-06, "loss": 0.8279, "step": 4775 }, { "epoch": 0.65, "learning_rate": 5.845123806384083e-06, "loss": 0.9562, "step": 4776 }, { "epoch": 0.65, "learning_rate": 5.841130929916788e-06, "loss": 0.8831, "step": 4777 }, { "epoch": 0.65, "learning_rate": 5.83713885500633e-06, "loss": 0.8045, "step": 4778 }, { "epoch": 0.65, "learning_rate": 5.8331475824221215e-06, "loss": 0.8493, "step": 4779 }, { "epoch": 0.65, "learning_rate": 5.8291571129334145e-06, "loss": 0.8093, "step": 4780 }, { "epoch": 0.65, "learning_rate": 5.82516744730931e-06, "loss": 0.8213, "step": 4781 }, { "epoch": 0.65, "learning_rate": 5.821178586318747e-06, "loss": 0.8915, "step": 4782 }, { "epoch": 0.65, "learning_rate": 5.81719053073053e-06, "loss": 0.8569, "step": 4783 }, { "epoch": 0.65, "learning_rate": 5.81320328131328e-06, "loss": 0.8467, "step": 4784 }, { "epoch": 0.65, "learning_rate": 5.8092168388354876e-06, "loss": 0.8594, "step": 4785 }, { "epoch": 0.65, "learning_rate": 5.805231204065473e-06, "loss": 0.909, "step": 4786 }, { "epoch": 0.65, "learning_rate": 5.801246377771406e-06, "loss": 0.8398, "step": 4787 }, { "epoch": 0.65, "learning_rate": 5.797262360721292e-06, "loss": 0.8178, "step": 4788 }, { "epoch": 0.65, "learning_rate": 5.793279153682999e-06, "loss": 0.8078, "step": 4789 }, { "epoch": 0.65, "learning_rate": 5.7892967574242235e-06, "loss": 0.8152, "step": 4790 }, { "epoch": 0.65, "learning_rate": 5.785315172712507e-06, "loss": 0.8894, "step": 4791 }, { "epoch": 0.65, "learning_rate": 5.781334400315241e-06, "loss": 0.9019, "step": 4792 }, { "epoch": 0.65, "learning_rate": 5.777354440999652e-06, "loss": 0.8579, "step": 4793 }, { "epoch": 0.65, "learning_rate": 5.773375295532821e-06, "loss": 0.8694, "step": 4794 }, { "epoch": 0.65, "learning_rate": 5.7693969646816665e-06, "loss": 0.7578, "step": 4795 }, { "epoch": 0.65, "learning_rate": 5.765419449212944e-06, "loss": 0.8059, "step": 4796 }, { "epoch": 0.65, "learning_rate": 5.761442749893256e-06, "loss": 0.8644, "step": 4797 }, { "epoch": 0.65, "learning_rate": 5.757466867489056e-06, "loss": 0.9042, "step": 4798 }, { "epoch": 0.65, "learning_rate": 5.753491802766631e-06, "loss": 0.8864, "step": 4799 }, { "epoch": 0.65, "learning_rate": 5.74951755649211e-06, "loss": 0.8343, "step": 4800 }, { "epoch": 0.65, "learning_rate": 5.745544129431467e-06, "loss": 0.8529, "step": 4801 }, { "epoch": 0.65, "learning_rate": 5.741571522350515e-06, "loss": 0.8389, "step": 4802 }, { "epoch": 0.65, "learning_rate": 5.73759973601492e-06, "loss": 0.8622, "step": 4803 }, { "epoch": 0.65, "learning_rate": 5.7336287711901774e-06, "loss": 0.8724, "step": 4804 }, { "epoch": 0.65, "learning_rate": 5.729658628641628e-06, "loss": 0.8995, "step": 4805 }, { "epoch": 0.65, "learning_rate": 5.725689309134448e-06, "loss": 0.9389, "step": 4806 }, { "epoch": 0.65, "learning_rate": 5.721720813433673e-06, "loss": 0.8579, "step": 4807 }, { "epoch": 0.65, "learning_rate": 5.7177531423041655e-06, "loss": 0.8892, "step": 4808 }, { "epoch": 0.65, "learning_rate": 5.7137862965106275e-06, "loss": 0.8445, "step": 4809 }, { "epoch": 0.65, "learning_rate": 5.709820276817609e-06, "loss": 0.8264, "step": 4810 }, { "epoch": 0.65, "learning_rate": 5.705855083989493e-06, "loss": 0.8808, "step": 4811 }, { "epoch": 0.65, "learning_rate": 5.701890718790519e-06, "loss": 0.9204, "step": 4812 }, { "epoch": 0.65, "learning_rate": 5.697927181984749e-06, "loss": 0.8889, "step": 4813 }, { "epoch": 0.65, "learning_rate": 5.693964474336093e-06, "loss": 0.8699, "step": 4814 }, { "epoch": 0.65, "learning_rate": 5.690002596608304e-06, "loss": 0.8536, "step": 4815 }, { "epoch": 0.65, "learning_rate": 5.686041549564964e-06, "loss": 0.8313, "step": 4816 }, { "epoch": 0.65, "learning_rate": 5.682081333969513e-06, "loss": 0.8819, "step": 4817 }, { "epoch": 0.65, "learning_rate": 5.678121950585216e-06, "loss": 0.8531, "step": 4818 }, { "epoch": 0.65, "learning_rate": 5.674163400175181e-06, "loss": 0.9034, "step": 4819 }, { "epoch": 0.65, "learning_rate": 5.670205683502353e-06, "loss": 0.8846, "step": 4820 }, { "epoch": 0.65, "learning_rate": 5.66624880132953e-06, "loss": 0.8237, "step": 4821 }, { "epoch": 0.65, "learning_rate": 5.662292754419332e-06, "loss": 0.8007, "step": 4822 }, { "epoch": 0.65, "learning_rate": 5.658337543534227e-06, "loss": 0.8168, "step": 4823 }, { "epoch": 0.65, "learning_rate": 5.654383169436519e-06, "loss": 0.8584, "step": 4824 }, { "epoch": 0.65, "learning_rate": 5.650429632888348e-06, "loss": 0.9147, "step": 4825 }, { "epoch": 0.65, "learning_rate": 5.646476934651699e-06, "loss": 0.8578, "step": 4826 }, { "epoch": 0.65, "learning_rate": 5.6425250754883985e-06, "loss": 0.8479, "step": 4827 }, { "epoch": 0.65, "learning_rate": 5.638574056160102e-06, "loss": 0.8645, "step": 4828 }, { "epoch": 0.65, "learning_rate": 5.634623877428303e-06, "loss": 0.8399, "step": 4829 }, { "epoch": 0.65, "learning_rate": 5.630674540054337e-06, "loss": 0.8644, "step": 4830 }, { "epoch": 0.65, "learning_rate": 5.626726044799381e-06, "loss": 0.8568, "step": 4831 }, { "epoch": 0.65, "learning_rate": 5.622778392424444e-06, "loss": 0.8341, "step": 4832 }, { "epoch": 0.66, "learning_rate": 5.6188315836903736e-06, "loss": 0.8724, "step": 4833 }, { "epoch": 0.66, "learning_rate": 5.614885619357855e-06, "loss": 0.8012, "step": 4834 }, { "epoch": 0.66, "learning_rate": 5.610940500187406e-06, "loss": 0.8469, "step": 4835 }, { "epoch": 0.66, "learning_rate": 5.606996226939396e-06, "loss": 0.8145, "step": 4836 }, { "epoch": 0.66, "learning_rate": 5.603052800374018e-06, "loss": 0.92, "step": 4837 }, { "epoch": 0.66, "learning_rate": 5.5991102212513045e-06, "loss": 0.8665, "step": 4838 }, { "epoch": 0.66, "learning_rate": 5.595168490331124e-06, "loss": 0.8618, "step": 4839 }, { "epoch": 0.66, "learning_rate": 5.5912276083731884e-06, "loss": 0.8364, "step": 4840 }, { "epoch": 0.66, "learning_rate": 5.5872875761370394e-06, "loss": 0.8568, "step": 4841 }, { "epoch": 0.66, "learning_rate": 5.583348394382055e-06, "loss": 0.8075, "step": 4842 }, { "epoch": 0.66, "learning_rate": 5.57941006386745e-06, "loss": 0.8548, "step": 4843 }, { "epoch": 0.66, "learning_rate": 5.575472585352274e-06, "loss": 0.8538, "step": 4844 }, { "epoch": 0.66, "learning_rate": 5.571535959595422e-06, "loss": 0.7657, "step": 4845 }, { "epoch": 0.66, "learning_rate": 5.5676001873556105e-06, "loss": 0.8897, "step": 4846 }, { "epoch": 0.66, "learning_rate": 5.5636652693914e-06, "loss": 0.8483, "step": 4847 }, { "epoch": 0.66, "learning_rate": 5.559731206461182e-06, "loss": 0.8928, "step": 4848 }, { "epoch": 0.66, "learning_rate": 5.555797999323189e-06, "loss": 0.9358, "step": 4849 }, { "epoch": 0.66, "learning_rate": 5.551865648735485e-06, "loss": 0.861, "step": 4850 }, { "epoch": 0.66, "learning_rate": 5.547934155455967e-06, "loss": 0.8834, "step": 4851 }, { "epoch": 0.66, "learning_rate": 5.544003520242369e-06, "loss": 0.8968, "step": 4852 }, { "epoch": 0.66, "learning_rate": 5.540073743852256e-06, "loss": 0.8506, "step": 4853 }, { "epoch": 0.66, "learning_rate": 5.536144827043037e-06, "loss": 0.8935, "step": 4854 }, { "epoch": 0.66, "learning_rate": 5.532216770571948e-06, "loss": 0.7774, "step": 4855 }, { "epoch": 0.66, "learning_rate": 5.528289575196058e-06, "loss": 0.8497, "step": 4856 }, { "epoch": 0.66, "learning_rate": 5.524363241672268e-06, "loss": 0.8386, "step": 4857 }, { "epoch": 0.66, "learning_rate": 5.520437770757327e-06, "loss": 0.8358, "step": 4858 }, { "epoch": 0.66, "learning_rate": 5.516513163207804e-06, "loss": 0.8523, "step": 4859 }, { "epoch": 0.66, "learning_rate": 5.512589419780106e-06, "loss": 0.8542, "step": 4860 }, { "epoch": 0.66, "learning_rate": 5.50866654123047e-06, "loss": 0.8505, "step": 4861 }, { "epoch": 0.66, "learning_rate": 5.504744528314967e-06, "loss": 0.9459, "step": 4862 }, { "epoch": 0.66, "learning_rate": 5.5008233817895126e-06, "loss": 0.816, "step": 4863 }, { "epoch": 0.66, "learning_rate": 5.496903102409843e-06, "loss": 0.8796, "step": 4864 }, { "epoch": 0.66, "learning_rate": 5.492983690931528e-06, "loss": 0.7979, "step": 4865 }, { "epoch": 0.66, "learning_rate": 5.4890651481099736e-06, "loss": 0.7856, "step": 4866 }, { "epoch": 0.66, "learning_rate": 5.485147474700415e-06, "loss": 0.8659, "step": 4867 }, { "epoch": 0.66, "learning_rate": 5.481230671457929e-06, "loss": 0.8443, "step": 4868 }, { "epoch": 0.66, "learning_rate": 5.4773147391374136e-06, "loss": 0.8251, "step": 4869 }, { "epoch": 0.66, "learning_rate": 5.473399678493601e-06, "loss": 0.8474, "step": 4870 }, { "epoch": 0.66, "learning_rate": 5.469485490281064e-06, "loss": 0.8354, "step": 4871 }, { "epoch": 0.66, "learning_rate": 5.465572175254195e-06, "loss": 0.8699, "step": 4872 }, { "epoch": 0.66, "learning_rate": 5.461659734167229e-06, "loss": 0.8259, "step": 4873 }, { "epoch": 0.66, "learning_rate": 5.457748167774228e-06, "loss": 0.7787, "step": 4874 }, { "epoch": 0.66, "learning_rate": 5.453837476829083e-06, "loss": 0.8552, "step": 4875 }, { "epoch": 0.66, "learning_rate": 5.449927662085517e-06, "loss": 0.7811, "step": 4876 }, { "epoch": 0.66, "learning_rate": 5.446018724297082e-06, "loss": 0.8006, "step": 4877 }, { "epoch": 0.66, "learning_rate": 5.442110664217175e-06, "loss": 0.873, "step": 4878 }, { "epoch": 0.66, "learning_rate": 5.438203482599007e-06, "loss": 0.8782, "step": 4879 }, { "epoch": 0.66, "learning_rate": 5.434297180195626e-06, "loss": 0.8377, "step": 4880 }, { "epoch": 0.66, "learning_rate": 5.430391757759907e-06, "loss": 0.8328, "step": 4881 }, { "epoch": 0.66, "learning_rate": 5.426487216044569e-06, "loss": 0.8551, "step": 4882 }, { "epoch": 0.66, "learning_rate": 5.422583555802144e-06, "loss": 0.8477, "step": 4883 }, { "epoch": 0.66, "learning_rate": 5.418680777785003e-06, "loss": 0.9475, "step": 4884 }, { "epoch": 0.66, "learning_rate": 5.414778882745346e-06, "loss": 0.8654, "step": 4885 }, { "epoch": 0.66, "learning_rate": 5.410877871435196e-06, "loss": 0.8588, "step": 4886 }, { "epoch": 0.66, "learning_rate": 5.406977744606421e-06, "loss": 0.8369, "step": 4887 }, { "epoch": 0.66, "learning_rate": 5.403078503010706e-06, "loss": 0.8519, "step": 4888 }, { "epoch": 0.66, "learning_rate": 5.3991801473995675e-06, "loss": 0.8764, "step": 4889 }, { "epoch": 0.66, "learning_rate": 5.39528267852435e-06, "loss": 0.7837, "step": 4890 }, { "epoch": 0.66, "learning_rate": 5.391386097136234e-06, "loss": 0.8423, "step": 4891 }, { "epoch": 0.66, "learning_rate": 5.387490403986224e-06, "loss": 0.8679, "step": 4892 }, { "epoch": 0.66, "learning_rate": 5.383595599825154e-06, "loss": 0.892, "step": 4893 }, { "epoch": 0.66, "learning_rate": 5.3797016854036845e-06, "loss": 0.7515, "step": 4894 }, { "epoch": 0.66, "learning_rate": 5.375808661472304e-06, "loss": 0.8864, "step": 4895 }, { "epoch": 0.66, "learning_rate": 5.371916528781338e-06, "loss": 0.895, "step": 4896 }, { "epoch": 0.66, "learning_rate": 5.368025288080931e-06, "loss": 0.8516, "step": 4897 }, { "epoch": 0.66, "learning_rate": 5.36413494012106e-06, "loss": 0.8488, "step": 4898 }, { "epoch": 0.66, "learning_rate": 5.360245485651523e-06, "loss": 0.9044, "step": 4899 }, { "epoch": 0.66, "learning_rate": 5.356356925421959e-06, "loss": 0.925, "step": 4900 }, { "epoch": 0.66, "learning_rate": 5.352469260181825e-06, "loss": 0.8601, "step": 4901 }, { "epoch": 0.66, "learning_rate": 5.348582490680405e-06, "loss": 0.8174, "step": 4902 }, { "epoch": 0.66, "learning_rate": 5.344696617666815e-06, "loss": 0.841, "step": 4903 }, { "epoch": 0.66, "learning_rate": 5.340811641889991e-06, "loss": 0.8624, "step": 4904 }, { "epoch": 0.66, "learning_rate": 5.336927564098712e-06, "loss": 0.8772, "step": 4905 }, { "epoch": 0.66, "learning_rate": 5.333044385041565e-06, "loss": 0.9054, "step": 4906 }, { "epoch": 0.67, "learning_rate": 5.329162105466974e-06, "loss": 0.8082, "step": 4907 }, { "epoch": 0.67, "learning_rate": 5.325280726123182e-06, "loss": 0.9273, "step": 4908 }, { "epoch": 0.67, "learning_rate": 5.321400247758275e-06, "loss": 0.8416, "step": 4909 }, { "epoch": 0.67, "learning_rate": 5.317520671120147e-06, "loss": 0.829, "step": 4910 }, { "epoch": 0.67, "learning_rate": 5.313641996956529e-06, "loss": 0.8377, "step": 4911 }, { "epoch": 0.67, "learning_rate": 5.309764226014972e-06, "loss": 0.85, "step": 4912 }, { "epoch": 0.67, "learning_rate": 5.305887359042851e-06, "loss": 0.8243, "step": 4913 }, { "epoch": 0.67, "learning_rate": 5.302011396787379e-06, "loss": 0.8496, "step": 4914 }, { "epoch": 0.67, "learning_rate": 5.298136339995589e-06, "loss": 0.8768, "step": 4915 }, { "epoch": 0.67, "learning_rate": 5.294262189414332e-06, "loss": 0.8563, "step": 4916 }, { "epoch": 0.67, "learning_rate": 5.290388945790292e-06, "loss": 0.8571, "step": 4917 }, { "epoch": 0.67, "learning_rate": 5.28651660986997e-06, "loss": 0.8662, "step": 4918 }, { "epoch": 0.67, "learning_rate": 5.282645182399708e-06, "loss": 0.8158, "step": 4919 }, { "epoch": 0.67, "learning_rate": 5.278774664125659e-06, "loss": 0.8458, "step": 4920 }, { "epoch": 0.67, "learning_rate": 5.274905055793802e-06, "loss": 0.9404, "step": 4921 }, { "epoch": 0.67, "learning_rate": 5.271036358149946e-06, "loss": 0.8904, "step": 4922 }, { "epoch": 0.67, "learning_rate": 5.2671685719397184e-06, "loss": 0.8547, "step": 4923 }, { "epoch": 0.67, "learning_rate": 5.263301697908579e-06, "loss": 0.8475, "step": 4924 }, { "epoch": 0.67, "learning_rate": 5.2594357368018065e-06, "loss": 0.844, "step": 4925 }, { "epoch": 0.67, "learning_rate": 5.255570689364502e-06, "loss": 0.8469, "step": 4926 }, { "epoch": 0.67, "learning_rate": 5.251706556341596e-06, "loss": 0.8542, "step": 4927 }, { "epoch": 0.67, "learning_rate": 5.247843338477832e-06, "loss": 0.8428, "step": 4928 }, { "epoch": 0.67, "learning_rate": 5.243981036517793e-06, "loss": 0.8902, "step": 4929 }, { "epoch": 0.67, "learning_rate": 5.240119651205876e-06, "loss": 0.8223, "step": 4930 }, { "epoch": 0.67, "learning_rate": 5.2362591832863005e-06, "loss": 0.8774, "step": 4931 }, { "epoch": 0.67, "learning_rate": 5.232399633503107e-06, "loss": 0.8605, "step": 4932 }, { "epoch": 0.67, "learning_rate": 5.228541002600172e-06, "loss": 0.8433, "step": 4933 }, { "epoch": 0.67, "learning_rate": 5.224683291321182e-06, "loss": 0.8508, "step": 4934 }, { "epoch": 0.67, "learning_rate": 5.220826500409651e-06, "loss": 0.8667, "step": 4935 }, { "epoch": 0.67, "learning_rate": 5.216970630608913e-06, "loss": 0.8673, "step": 4936 }, { "epoch": 0.67, "learning_rate": 5.213115682662124e-06, "loss": 0.7993, "step": 4937 }, { "epoch": 0.67, "learning_rate": 5.209261657312274e-06, "loss": 0.8164, "step": 4938 }, { "epoch": 0.67, "learning_rate": 5.2054085553021595e-06, "loss": 0.913, "step": 4939 }, { "epoch": 0.67, "learning_rate": 5.201556377374406e-06, "loss": 0.8507, "step": 4940 }, { "epoch": 0.67, "learning_rate": 5.197705124271459e-06, "loss": 0.8465, "step": 4941 }, { "epoch": 0.67, "learning_rate": 5.193854796735592e-06, "loss": 0.8177, "step": 4942 }, { "epoch": 0.67, "learning_rate": 5.190005395508893e-06, "loss": 0.8609, "step": 4943 }, { "epoch": 0.67, "learning_rate": 5.186156921333272e-06, "loss": 0.849, "step": 4944 }, { "epoch": 0.67, "learning_rate": 5.182309374950463e-06, "loss": 0.797, "step": 4945 }, { "epoch": 0.67, "learning_rate": 5.178462757102018e-06, "loss": 0.8473, "step": 4946 }, { "epoch": 0.67, "learning_rate": 5.1746170685293186e-06, "loss": 0.8183, "step": 4947 }, { "epoch": 0.67, "learning_rate": 5.170772309973558e-06, "loss": 0.8507, "step": 4948 }, { "epoch": 0.67, "learning_rate": 5.16692848217575e-06, "loss": 0.838, "step": 4949 }, { "epoch": 0.67, "learning_rate": 5.163085585876733e-06, "loss": 0.8553, "step": 4950 }, { "epoch": 0.67, "learning_rate": 5.159243621817169e-06, "loss": 0.8803, "step": 4951 }, { "epoch": 0.67, "learning_rate": 5.1554025907375345e-06, "loss": 0.906, "step": 4952 }, { "epoch": 0.67, "learning_rate": 5.151562493378128e-06, "loss": 0.8429, "step": 4953 }, { "epoch": 0.67, "learning_rate": 5.147723330479069e-06, "loss": 0.8718, "step": 4954 }, { "epoch": 0.67, "learning_rate": 5.14388510278029e-06, "loss": 0.8814, "step": 4955 }, { "epoch": 0.67, "learning_rate": 5.140047811021556e-06, "loss": 0.872, "step": 4956 }, { "epoch": 0.67, "learning_rate": 5.13621145594244e-06, "loss": 0.8559, "step": 4957 }, { "epoch": 0.67, "learning_rate": 5.132376038282347e-06, "loss": 0.8253, "step": 4958 }, { "epoch": 0.67, "learning_rate": 5.128541558780487e-06, "loss": 0.8548, "step": 4959 }, { "epoch": 0.67, "learning_rate": 5.124708018175894e-06, "loss": 0.8479, "step": 4960 }, { "epoch": 0.67, "learning_rate": 5.120875417207431e-06, "loss": 0.7718, "step": 4961 }, { "epoch": 0.67, "learning_rate": 5.117043756613766e-06, "loss": 0.8442, "step": 4962 }, { "epoch": 0.67, "learning_rate": 5.113213037133395e-06, "loss": 0.8865, "step": 4963 }, { "epoch": 0.67, "learning_rate": 5.109383259504626e-06, "loss": 0.8861, "step": 4964 }, { "epoch": 0.67, "learning_rate": 5.105554424465584e-06, "loss": 0.7997, "step": 4965 }, { "epoch": 0.67, "learning_rate": 5.101726532754228e-06, "loss": 0.8938, "step": 4966 }, { "epoch": 0.67, "learning_rate": 5.0978995851083165e-06, "loss": 0.8685, "step": 4967 }, { "epoch": 0.67, "learning_rate": 5.094073582265437e-06, "loss": 0.9235, "step": 4968 }, { "epoch": 0.67, "learning_rate": 5.090248524962988e-06, "loss": 0.8839, "step": 4969 }, { "epoch": 0.67, "learning_rate": 5.086424413938194e-06, "loss": 0.8939, "step": 4970 }, { "epoch": 0.67, "learning_rate": 5.08260124992809e-06, "loss": 0.8748, "step": 4971 }, { "epoch": 0.67, "learning_rate": 5.078779033669532e-06, "loss": 0.8263, "step": 4972 }, { "epoch": 0.67, "learning_rate": 5.07495776589919e-06, "loss": 0.8225, "step": 4973 }, { "epoch": 0.67, "learning_rate": 5.071137447353551e-06, "loss": 0.8652, "step": 4974 }, { "epoch": 0.67, "learning_rate": 5.06731807876893e-06, "loss": 0.8845, "step": 4975 }, { "epoch": 0.67, "learning_rate": 5.063499660881447e-06, "loss": 0.8081, "step": 4976 }, { "epoch": 0.67, "learning_rate": 5.0596821944270406e-06, "loss": 0.8004, "step": 4977 }, { "epoch": 0.67, "learning_rate": 5.055865680141463e-06, "loss": 0.8098, "step": 4978 }, { "epoch": 0.67, "learning_rate": 5.052050118760297e-06, "loss": 0.845, "step": 4979 }, { "epoch": 0.67, "learning_rate": 5.048235511018928e-06, "loss": 0.8809, "step": 4980 }, { "epoch": 0.68, "learning_rate": 5.044421857652561e-06, "loss": 0.8611, "step": 4981 }, { "epoch": 0.68, "learning_rate": 5.0406091593962195e-06, "loss": 0.8407, "step": 4982 }, { "epoch": 0.68, "learning_rate": 5.036797416984736e-06, "loss": 0.8763, "step": 4983 }, { "epoch": 0.68, "learning_rate": 5.032986631152772e-06, "loss": 0.8809, "step": 4984 }, { "epoch": 0.68, "learning_rate": 5.029176802634794e-06, "loss": 0.8903, "step": 4985 }, { "epoch": 0.68, "learning_rate": 5.025367932165086e-06, "loss": 0.823, "step": 4986 }, { "epoch": 0.68, "learning_rate": 5.021560020477749e-06, "loss": 0.8481, "step": 4987 }, { "epoch": 0.68, "learning_rate": 5.017753068306692e-06, "loss": 0.8881, "step": 4988 }, { "epoch": 0.68, "learning_rate": 5.013947076385657e-06, "loss": 0.8768, "step": 4989 }, { "epoch": 0.68, "learning_rate": 5.010142045448181e-06, "loss": 0.8109, "step": 4990 }, { "epoch": 0.68, "learning_rate": 5.006337976227627e-06, "loss": 0.8738, "step": 4991 }, { "epoch": 0.68, "learning_rate": 5.002534869457165e-06, "loss": 0.8635, "step": 4992 }, { "epoch": 0.68, "learning_rate": 4.998732725869791e-06, "loss": 0.8464, "step": 4993 }, { "epoch": 0.68, "learning_rate": 4.9949315461983075e-06, "loss": 0.8426, "step": 4994 }, { "epoch": 0.68, "learning_rate": 4.99113133117533e-06, "loss": 0.9102, "step": 4995 }, { "epoch": 0.68, "learning_rate": 4.9873320815332906e-06, "loss": 0.8599, "step": 4996 }, { "epoch": 0.68, "learning_rate": 4.9835337980044315e-06, "loss": 0.9293, "step": 4997 }, { "epoch": 0.68, "learning_rate": 4.97973648132082e-06, "loss": 0.8774, "step": 4998 }, { "epoch": 0.68, "learning_rate": 4.975940132214326e-06, "loss": 0.8484, "step": 4999 }, { "epoch": 0.68, "learning_rate": 4.972144751416632e-06, "loss": 0.8861, "step": 5000 }, { "epoch": 0.68, "learning_rate": 4.968350339659247e-06, "loss": 0.9106, "step": 5001 }, { "epoch": 0.68, "learning_rate": 4.964556897673475e-06, "loss": 0.8924, "step": 5002 }, { "epoch": 0.68, "learning_rate": 4.960764426190451e-06, "loss": 0.909, "step": 5003 }, { "epoch": 0.68, "learning_rate": 4.9569729259411104e-06, "loss": 0.8118, "step": 5004 }, { "epoch": 0.68, "learning_rate": 4.953182397656206e-06, "loss": 0.7899, "step": 5005 }, { "epoch": 0.68, "learning_rate": 4.9493928420663e-06, "loss": 0.8256, "step": 5006 }, { "epoch": 0.68, "learning_rate": 4.945604259901771e-06, "loss": 0.7893, "step": 5007 }, { "epoch": 0.68, "learning_rate": 4.941816651892813e-06, "loss": 0.7844, "step": 5008 }, { "epoch": 0.68, "learning_rate": 4.938030018769424e-06, "loss": 0.8418, "step": 5009 }, { "epoch": 0.68, "learning_rate": 4.93424436126142e-06, "loss": 0.9042, "step": 5010 }, { "epoch": 0.68, "learning_rate": 4.930459680098423e-06, "loss": 0.8808, "step": 5011 }, { "epoch": 0.68, "learning_rate": 4.926675976009878e-06, "loss": 0.8376, "step": 5012 }, { "epoch": 0.68, "learning_rate": 4.92289324972503e-06, "loss": 0.8099, "step": 5013 }, { "epoch": 0.68, "learning_rate": 4.919111501972943e-06, "loss": 0.8278, "step": 5014 }, { "epoch": 0.68, "learning_rate": 4.915330733482486e-06, "loss": 0.8734, "step": 5015 }, { "epoch": 0.68, "learning_rate": 4.911550944982343e-06, "loss": 0.8467, "step": 5016 }, { "epoch": 0.68, "learning_rate": 4.9077721372010135e-06, "loss": 0.8299, "step": 5017 }, { "epoch": 0.68, "learning_rate": 4.9039943108668e-06, "loss": 0.8188, "step": 5018 }, { "epoch": 0.68, "learning_rate": 4.90021746670782e-06, "loss": 0.7954, "step": 5019 }, { "epoch": 0.68, "learning_rate": 4.896441605451998e-06, "loss": 0.8416, "step": 5020 }, { "epoch": 0.68, "learning_rate": 4.892666727827079e-06, "loss": 0.8064, "step": 5021 }, { "epoch": 0.68, "learning_rate": 4.888892834560608e-06, "loss": 0.882, "step": 5022 }, { "epoch": 0.68, "learning_rate": 4.885119926379943e-06, "loss": 0.816, "step": 5023 }, { "epoch": 0.68, "learning_rate": 4.8813480040122526e-06, "loss": 0.8899, "step": 5024 }, { "epoch": 0.68, "learning_rate": 4.877577068184513e-06, "loss": 0.8362, "step": 5025 }, { "epoch": 0.68, "learning_rate": 4.873807119623521e-06, "loss": 0.8522, "step": 5026 }, { "epoch": 0.68, "learning_rate": 4.870038159055871e-06, "loss": 0.8403, "step": 5027 }, { "epoch": 0.68, "learning_rate": 4.86627018720797e-06, "loss": 0.9141, "step": 5028 }, { "epoch": 0.68, "learning_rate": 4.862503204806031e-06, "loss": 0.913, "step": 5029 }, { "epoch": 0.68, "learning_rate": 4.858737212576091e-06, "loss": 0.8797, "step": 5030 }, { "epoch": 0.68, "learning_rate": 4.854972211243981e-06, "loss": 0.8454, "step": 5031 }, { "epoch": 0.68, "learning_rate": 4.851208201535347e-06, "loss": 0.8772, "step": 5032 }, { "epoch": 0.68, "learning_rate": 4.84744518417564e-06, "loss": 0.8534, "step": 5033 }, { "epoch": 0.68, "learning_rate": 4.843683159890121e-06, "loss": 0.8397, "step": 5034 }, { "epoch": 0.68, "learning_rate": 4.83992212940387e-06, "loss": 0.8637, "step": 5035 }, { "epoch": 0.68, "learning_rate": 4.83616209344176e-06, "loss": 0.8649, "step": 5036 }, { "epoch": 0.68, "learning_rate": 4.832403052728481e-06, "loss": 0.8894, "step": 5037 }, { "epoch": 0.68, "learning_rate": 4.828645007988524e-06, "loss": 0.8846, "step": 5038 }, { "epoch": 0.68, "learning_rate": 4.824887959946203e-06, "loss": 0.8743, "step": 5039 }, { "epoch": 0.68, "learning_rate": 4.821131909325624e-06, "loss": 0.8237, "step": 5040 }, { "epoch": 0.68, "learning_rate": 4.817376856850707e-06, "loss": 0.8365, "step": 5041 }, { "epoch": 0.68, "learning_rate": 4.813622803245181e-06, "loss": 0.8167, "step": 5042 }, { "epoch": 0.68, "learning_rate": 4.809869749232577e-06, "loss": 0.8454, "step": 5043 }, { "epoch": 0.68, "learning_rate": 4.806117695536241e-06, "loss": 0.8485, "step": 5044 }, { "epoch": 0.68, "learning_rate": 4.802366642879326e-06, "loss": 0.8607, "step": 5045 }, { "epoch": 0.68, "learning_rate": 4.798616591984784e-06, "loss": 0.8918, "step": 5046 }, { "epoch": 0.68, "learning_rate": 4.79486754357538e-06, "loss": 0.8671, "step": 5047 }, { "epoch": 0.68, "learning_rate": 4.791119498373683e-06, "loss": 0.8549, "step": 5048 }, { "epoch": 0.68, "learning_rate": 4.787372457102067e-06, "loss": 0.8766, "step": 5049 }, { "epoch": 0.68, "learning_rate": 4.783626420482724e-06, "loss": 0.9152, "step": 5050 }, { "epoch": 0.68, "learning_rate": 4.779881389237638e-06, "loss": 0.9312, "step": 5051 }, { "epoch": 0.68, "learning_rate": 4.776137364088608e-06, "loss": 0.892, "step": 5052 }, { "epoch": 0.68, "learning_rate": 4.772394345757228e-06, "loss": 0.8482, "step": 5053 }, { "epoch": 0.69, "learning_rate": 4.768652334964919e-06, "loss": 0.8132, "step": 5054 }, { "epoch": 0.69, "learning_rate": 4.7649113324328854e-06, "loss": 0.9198, "step": 5055 }, { "epoch": 0.69, "learning_rate": 4.761171338882151e-06, "loss": 0.8423, "step": 5056 }, { "epoch": 0.69, "learning_rate": 4.75743235503354e-06, "loss": 0.8992, "step": 5057 }, { "epoch": 0.69, "learning_rate": 4.753694381607679e-06, "loss": 0.8899, "step": 5058 }, { "epoch": 0.69, "learning_rate": 4.74995741932501e-06, "loss": 0.8003, "step": 5059 }, { "epoch": 0.69, "learning_rate": 4.746221468905773e-06, "loss": 0.7672, "step": 5060 }, { "epoch": 0.69, "learning_rate": 4.742486531070011e-06, "loss": 0.8026, "step": 5061 }, { "epoch": 0.69, "learning_rate": 4.7387526065375725e-06, "loss": 0.8655, "step": 5062 }, { "epoch": 0.69, "learning_rate": 4.7350196960281205e-06, "loss": 0.8317, "step": 5063 }, { "epoch": 0.69, "learning_rate": 4.73128780026111e-06, "loss": 0.8039, "step": 5064 }, { "epoch": 0.69, "learning_rate": 4.727556919955808e-06, "loss": 0.8692, "step": 5065 }, { "epoch": 0.69, "learning_rate": 4.723827055831281e-06, "loss": 0.8564, "step": 5066 }, { "epoch": 0.69, "learning_rate": 4.720098208606397e-06, "loss": 0.85, "step": 5067 }, { "epoch": 0.69, "learning_rate": 4.716370378999844e-06, "loss": 0.8506, "step": 5068 }, { "epoch": 0.69, "learning_rate": 4.712643567730096e-06, "loss": 0.8537, "step": 5069 }, { "epoch": 0.69, "learning_rate": 4.708917775515439e-06, "loss": 0.853, "step": 5070 }, { "epoch": 0.69, "learning_rate": 4.7051930030739566e-06, "loss": 0.8559, "step": 5071 }, { "epoch": 0.69, "learning_rate": 4.701469251123548e-06, "loss": 0.766, "step": 5072 }, { "epoch": 0.69, "learning_rate": 4.6977465203819036e-06, "loss": 0.8569, "step": 5073 }, { "epoch": 0.69, "learning_rate": 4.694024811566523e-06, "loss": 0.8354, "step": 5074 }, { "epoch": 0.69, "learning_rate": 4.690304125394707e-06, "loss": 0.8145, "step": 5075 }, { "epoch": 0.69, "learning_rate": 4.686584462583555e-06, "loss": 0.8309, "step": 5076 }, { "epoch": 0.69, "learning_rate": 4.6828658238499805e-06, "loss": 0.9131, "step": 5077 }, { "epoch": 0.69, "learning_rate": 4.679148209910689e-06, "loss": 0.8782, "step": 5078 }, { "epoch": 0.69, "learning_rate": 4.675431621482195e-06, "loss": 0.8493, "step": 5079 }, { "epoch": 0.69, "learning_rate": 4.671716059280806e-06, "loss": 0.8125, "step": 5080 }, { "epoch": 0.69, "learning_rate": 4.668001524022648e-06, "loss": 0.8352, "step": 5081 }, { "epoch": 0.69, "learning_rate": 4.664288016423635e-06, "loss": 0.9126, "step": 5082 }, { "epoch": 0.69, "learning_rate": 4.660575537199487e-06, "loss": 0.8327, "step": 5083 }, { "epoch": 0.69, "learning_rate": 4.656864087065726e-06, "loss": 0.8747, "step": 5084 }, { "epoch": 0.69, "learning_rate": 4.653153666737672e-06, "loss": 0.9258, "step": 5085 }, { "epoch": 0.69, "learning_rate": 4.649444276930458e-06, "loss": 0.9098, "step": 5086 }, { "epoch": 0.69, "learning_rate": 4.645735918359009e-06, "loss": 0.8365, "step": 5087 }, { "epoch": 0.69, "learning_rate": 4.642028591738046e-06, "loss": 0.9125, "step": 5088 }, { "epoch": 0.69, "learning_rate": 4.638322297782109e-06, "loss": 0.8638, "step": 5089 }, { "epoch": 0.69, "learning_rate": 4.634617037205517e-06, "loss": 0.7542, "step": 5090 }, { "epoch": 0.69, "learning_rate": 4.630912810722411e-06, "loss": 0.8144, "step": 5091 }, { "epoch": 0.69, "learning_rate": 4.627209619046718e-06, "loss": 0.8374, "step": 5092 }, { "epoch": 0.69, "learning_rate": 4.6235074628921705e-06, "loss": 0.861, "step": 5093 }, { "epoch": 0.69, "learning_rate": 4.6198063429722995e-06, "loss": 0.8807, "step": 5094 }, { "epoch": 0.69, "learning_rate": 4.616106260000437e-06, "loss": 0.839, "step": 5095 }, { "epoch": 0.69, "learning_rate": 4.612407214689721e-06, "loss": 0.8852, "step": 5096 }, { "epoch": 0.69, "learning_rate": 4.608709207753081e-06, "loss": 0.841, "step": 5097 }, { "epoch": 0.69, "learning_rate": 4.605012239903253e-06, "loss": 0.7987, "step": 5098 }, { "epoch": 0.69, "learning_rate": 4.601316311852761e-06, "loss": 0.8783, "step": 5099 }, { "epoch": 0.69, "learning_rate": 4.597621424313948e-06, "loss": 0.8788, "step": 5100 }, { "epoch": 0.69, "learning_rate": 4.593927577998941e-06, "loss": 0.8432, "step": 5101 }, { "epoch": 0.69, "learning_rate": 4.590234773619671e-06, "loss": 0.8785, "step": 5102 }, { "epoch": 0.69, "learning_rate": 4.586543011887869e-06, "loss": 0.8247, "step": 5103 }, { "epoch": 0.69, "learning_rate": 4.582852293515057e-06, "loss": 0.8066, "step": 5104 }, { "epoch": 0.69, "learning_rate": 4.579162619212576e-06, "loss": 0.8549, "step": 5105 }, { "epoch": 0.69, "learning_rate": 4.575473989691546e-06, "loss": 0.8581, "step": 5106 }, { "epoch": 0.69, "learning_rate": 4.571786405662893e-06, "loss": 0.8976, "step": 5107 }, { "epoch": 0.69, "learning_rate": 4.56809986783734e-06, "loss": 0.8746, "step": 5108 }, { "epoch": 0.69, "learning_rate": 4.564414376925407e-06, "loss": 0.8691, "step": 5109 }, { "epoch": 0.69, "learning_rate": 4.560729933637422e-06, "loss": 0.7845, "step": 5110 }, { "epoch": 0.69, "learning_rate": 4.5570465386834995e-06, "loss": 0.8439, "step": 5111 }, { "epoch": 0.69, "learning_rate": 4.553364192773556e-06, "loss": 0.7906, "step": 5112 }, { "epoch": 0.69, "learning_rate": 4.549682896617304e-06, "loss": 0.8408, "step": 5113 }, { "epoch": 0.69, "learning_rate": 4.546002650924261e-06, "loss": 0.845, "step": 5114 }, { "epoch": 0.69, "learning_rate": 4.542323456403733e-06, "loss": 0.8863, "step": 5115 }, { "epoch": 0.69, "learning_rate": 4.538645313764828e-06, "loss": 0.8618, "step": 5116 }, { "epoch": 0.69, "learning_rate": 4.53496822371645e-06, "loss": 0.8489, "step": 5117 }, { "epoch": 0.69, "learning_rate": 4.531292186967298e-06, "loss": 0.8683, "step": 5118 }, { "epoch": 0.69, "learning_rate": 4.527617204225875e-06, "loss": 0.8163, "step": 5119 }, { "epoch": 0.69, "learning_rate": 4.523943276200476e-06, "loss": 0.8007, "step": 5120 }, { "epoch": 0.69, "learning_rate": 4.5202704035991895e-06, "loss": 0.8029, "step": 5121 }, { "epoch": 0.69, "learning_rate": 4.5165985871299045e-06, "loss": 0.8268, "step": 5122 }, { "epoch": 0.69, "learning_rate": 4.51292782750031e-06, "loss": 0.8838, "step": 5123 }, { "epoch": 0.69, "learning_rate": 4.509258125417886e-06, "loss": 0.8224, "step": 5124 }, { "epoch": 0.69, "learning_rate": 4.5055894815899084e-06, "loss": 0.8251, "step": 5125 }, { "epoch": 0.69, "learning_rate": 4.5019218967234515e-06, "loss": 0.8828, "step": 5126 }, { "epoch": 0.69, "learning_rate": 4.4982553715253804e-06, "loss": 0.8714, "step": 5127 }, { "epoch": 0.7, "learning_rate": 4.494589906702369e-06, "loss": 0.8128, "step": 5128 }, { "epoch": 0.7, "learning_rate": 4.490925502960874e-06, "loss": 0.8475, "step": 5129 }, { "epoch": 0.7, "learning_rate": 4.487262161007153e-06, "loss": 0.8846, "step": 5130 }, { "epoch": 0.7, "learning_rate": 4.4835998815472515e-06, "loss": 0.8146, "step": 5131 }, { "epoch": 0.7, "learning_rate": 4.479938665287021e-06, "loss": 0.8855, "step": 5132 }, { "epoch": 0.7, "learning_rate": 4.47627851293211e-06, "loss": 0.9098, "step": 5133 }, { "epoch": 0.7, "learning_rate": 4.472619425187947e-06, "loss": 0.8406, "step": 5134 }, { "epoch": 0.7, "learning_rate": 4.4689614027597685e-06, "loss": 0.8681, "step": 5135 }, { "epoch": 0.7, "learning_rate": 4.4653044463525975e-06, "loss": 0.9118, "step": 5136 }, { "epoch": 0.7, "learning_rate": 4.4616485566712534e-06, "loss": 0.8191, "step": 5137 }, { "epoch": 0.7, "learning_rate": 4.457993734420357e-06, "loss": 0.8175, "step": 5138 }, { "epoch": 0.7, "learning_rate": 4.454339980304317e-06, "loss": 0.8934, "step": 5139 }, { "epoch": 0.7, "learning_rate": 4.450687295027335e-06, "loss": 0.8343, "step": 5140 }, { "epoch": 0.7, "learning_rate": 4.447035679293407e-06, "loss": 0.8088, "step": 5141 }, { "epoch": 0.7, "learning_rate": 4.44338513380633e-06, "loss": 0.8819, "step": 5142 }, { "epoch": 0.7, "learning_rate": 4.439735659269688e-06, "loss": 0.9133, "step": 5143 }, { "epoch": 0.7, "learning_rate": 4.436087256386859e-06, "loss": 0.8666, "step": 5144 }, { "epoch": 0.7, "learning_rate": 4.432439925861015e-06, "loss": 0.8049, "step": 5145 }, { "epoch": 0.7, "learning_rate": 4.428793668395118e-06, "loss": 0.8345, "step": 5146 }, { "epoch": 0.7, "learning_rate": 4.425148484691936e-06, "loss": 0.8111, "step": 5147 }, { "epoch": 0.7, "learning_rate": 4.421504375454016e-06, "loss": 0.8521, "step": 5148 }, { "epoch": 0.7, "learning_rate": 4.417861341383702e-06, "loss": 0.8278, "step": 5149 }, { "epoch": 0.7, "learning_rate": 4.414219383183129e-06, "loss": 0.8454, "step": 5150 }, { "epoch": 0.7, "learning_rate": 4.410578501554236e-06, "loss": 0.8127, "step": 5151 }, { "epoch": 0.7, "learning_rate": 4.406938697198741e-06, "loss": 0.9068, "step": 5152 }, { "epoch": 0.7, "learning_rate": 4.403299970818159e-06, "loss": 0.8666, "step": 5153 }, { "epoch": 0.7, "learning_rate": 4.399662323113798e-06, "loss": 0.7871, "step": 5154 }, { "epoch": 0.7, "learning_rate": 4.396025754786755e-06, "loss": 0.8524, "step": 5155 }, { "epoch": 0.7, "learning_rate": 4.392390266537926e-06, "loss": 0.8545, "step": 5156 }, { "epoch": 0.7, "learning_rate": 4.3887558590679925e-06, "loss": 0.7974, "step": 5157 }, { "epoch": 0.7, "learning_rate": 4.385122533077429e-06, "loss": 0.8218, "step": 5158 }, { "epoch": 0.7, "learning_rate": 4.381490289266505e-06, "loss": 0.8486, "step": 5159 }, { "epoch": 0.7, "learning_rate": 4.37785912833527e-06, "loss": 0.8648, "step": 5160 }, { "epoch": 0.7, "learning_rate": 4.374229050983585e-06, "loss": 0.8794, "step": 5161 }, { "epoch": 0.7, "learning_rate": 4.370600057911084e-06, "loss": 0.8085, "step": 5162 }, { "epoch": 0.7, "learning_rate": 4.366972149817199e-06, "loss": 0.8687, "step": 5163 }, { "epoch": 0.7, "learning_rate": 4.3633453274011506e-06, "loss": 0.9017, "step": 5164 }, { "epoch": 0.7, "learning_rate": 4.359719591361957e-06, "loss": 0.8144, "step": 5165 }, { "epoch": 0.7, "learning_rate": 4.356094942398421e-06, "loss": 0.8379, "step": 5166 }, { "epoch": 0.7, "learning_rate": 4.352471381209134e-06, "loss": 0.8362, "step": 5167 }, { "epoch": 0.7, "learning_rate": 4.3488489084924825e-06, "loss": 0.8553, "step": 5168 }, { "epoch": 0.7, "learning_rate": 4.345227524946637e-06, "loss": 0.9101, "step": 5169 }, { "epoch": 0.7, "learning_rate": 4.341607231269569e-06, "loss": 0.8401, "step": 5170 }, { "epoch": 0.7, "learning_rate": 4.337988028159031e-06, "loss": 0.8871, "step": 5171 }, { "epoch": 0.7, "learning_rate": 4.334369916312569e-06, "loss": 0.7879, "step": 5172 }, { "epoch": 0.7, "learning_rate": 4.330752896427509e-06, "loss": 0.8413, "step": 5173 }, { "epoch": 0.7, "learning_rate": 4.327136969200987e-06, "loss": 0.8355, "step": 5174 }, { "epoch": 0.7, "learning_rate": 4.323522135329907e-06, "loss": 0.8844, "step": 5175 }, { "epoch": 0.7, "learning_rate": 4.3199083955109785e-06, "loss": 0.8359, "step": 5176 }, { "epoch": 0.7, "learning_rate": 4.3162957504406915e-06, "loss": 0.8533, "step": 5177 }, { "epoch": 0.7, "learning_rate": 4.312684200815324e-06, "loss": 0.7955, "step": 5178 }, { "epoch": 0.7, "learning_rate": 4.309073747330943e-06, "loss": 0.8323, "step": 5179 }, { "epoch": 0.7, "learning_rate": 4.3054643906834145e-06, "loss": 0.8722, "step": 5180 }, { "epoch": 0.7, "learning_rate": 4.3018561315683825e-06, "loss": 0.8431, "step": 5181 }, { "epoch": 0.7, "learning_rate": 4.2982489706812815e-06, "loss": 0.8357, "step": 5182 }, { "epoch": 0.7, "learning_rate": 4.294642908717332e-06, "loss": 0.8694, "step": 5183 }, { "epoch": 0.7, "learning_rate": 4.291037946371551e-06, "loss": 0.8477, "step": 5184 }, { "epoch": 0.7, "learning_rate": 4.287434084338739e-06, "loss": 0.8966, "step": 5185 }, { "epoch": 0.7, "learning_rate": 4.28383132331348e-06, "loss": 0.832, "step": 5186 }, { "epoch": 0.7, "learning_rate": 4.280229663990152e-06, "loss": 0.8727, "step": 5187 }, { "epoch": 0.7, "learning_rate": 4.276629107062914e-06, "loss": 0.8631, "step": 5188 }, { "epoch": 0.7, "learning_rate": 4.2730296532257244e-06, "loss": 0.9061, "step": 5189 }, { "epoch": 0.7, "learning_rate": 4.269431303172318e-06, "loss": 0.934, "step": 5190 }, { "epoch": 0.7, "learning_rate": 4.26583405759622e-06, "loss": 0.8551, "step": 5191 }, { "epoch": 0.7, "learning_rate": 4.262237917190739e-06, "loss": 0.8013, "step": 5192 }, { "epoch": 0.7, "learning_rate": 4.258642882648984e-06, "loss": 0.8684, "step": 5193 }, { "epoch": 0.7, "learning_rate": 4.255048954663835e-06, "loss": 0.8303, "step": 5194 }, { "epoch": 0.7, "learning_rate": 4.251456133927968e-06, "loss": 0.8659, "step": 5195 }, { "epoch": 0.7, "learning_rate": 4.247864421133841e-06, "loss": 0.7553, "step": 5196 }, { "epoch": 0.7, "learning_rate": 4.244273816973698e-06, "loss": 0.7693, "step": 5197 }, { "epoch": 0.7, "learning_rate": 4.240684322139579e-06, "loss": 0.8368, "step": 5198 }, { "epoch": 0.7, "learning_rate": 4.237095937323298e-06, "loss": 0.8927, "step": 5199 }, { "epoch": 0.7, "learning_rate": 4.23350866321646e-06, "loss": 0.8224, "step": 5200 }, { "epoch": 0.7, "learning_rate": 4.229922500510454e-06, "loss": 0.8675, "step": 5201 }, { "epoch": 0.71, "learning_rate": 4.226337449896462e-06, "loss": 0.9048, "step": 5202 }, { "epoch": 0.71, "learning_rate": 4.222753512065444e-06, "loss": 0.7897, "step": 5203 }, { "epoch": 0.71, "learning_rate": 4.219170687708147e-06, "loss": 0.859, "step": 5204 }, { "epoch": 0.71, "learning_rate": 4.2155889775151045e-06, "loss": 0.8731, "step": 5205 }, { "epoch": 0.71, "learning_rate": 4.212008382176631e-06, "loss": 0.8437, "step": 5206 }, { "epoch": 0.71, "learning_rate": 4.208428902382839e-06, "loss": 0.8608, "step": 5207 }, { "epoch": 0.71, "learning_rate": 4.204850538823612e-06, "loss": 0.828, "step": 5208 }, { "epoch": 0.71, "learning_rate": 4.201273292188622e-06, "loss": 0.9138, "step": 5209 }, { "epoch": 0.71, "learning_rate": 4.197697163167328e-06, "loss": 0.8361, "step": 5210 }, { "epoch": 0.71, "learning_rate": 4.194122152448976e-06, "loss": 0.7974, "step": 5211 }, { "epoch": 0.71, "learning_rate": 4.190548260722591e-06, "loss": 0.8578, "step": 5212 }, { "epoch": 0.71, "learning_rate": 4.186975488676987e-06, "loss": 0.8392, "step": 5213 }, { "epoch": 0.71, "learning_rate": 4.183403837000755e-06, "loss": 0.881, "step": 5214 }, { "epoch": 0.71, "learning_rate": 4.179833306382275e-06, "loss": 0.8505, "step": 5215 }, { "epoch": 0.71, "learning_rate": 4.176263897509717e-06, "loss": 0.8375, "step": 5216 }, { "epoch": 0.71, "learning_rate": 4.172695611071025e-06, "loss": 0.8067, "step": 5217 }, { "epoch": 0.71, "learning_rate": 4.16912844775393e-06, "loss": 0.8591, "step": 5218 }, { "epoch": 0.71, "learning_rate": 4.165562408245942e-06, "loss": 0.8339, "step": 5219 }, { "epoch": 0.71, "learning_rate": 4.16199749323437e-06, "loss": 0.8879, "step": 5220 }, { "epoch": 0.71, "learning_rate": 4.158433703406285e-06, "loss": 0.8471, "step": 5221 }, { "epoch": 0.71, "learning_rate": 4.154871039448561e-06, "loss": 0.942, "step": 5222 }, { "epoch": 0.71, "learning_rate": 4.15130950204784e-06, "loss": 0.8427, "step": 5223 }, { "epoch": 0.71, "learning_rate": 4.147749091890555e-06, "loss": 0.8923, "step": 5224 }, { "epoch": 0.71, "learning_rate": 4.144189809662913e-06, "loss": 0.7336, "step": 5225 }, { "epoch": 0.71, "learning_rate": 4.140631656050919e-06, "loss": 0.8797, "step": 5226 }, { "epoch": 0.71, "learning_rate": 4.137074631740346e-06, "loss": 0.7941, "step": 5227 }, { "epoch": 0.71, "learning_rate": 4.133518737416757e-06, "loss": 0.8713, "step": 5228 }, { "epoch": 0.71, "learning_rate": 4.129963973765493e-06, "loss": 0.8466, "step": 5229 }, { "epoch": 0.71, "learning_rate": 4.126410341471676e-06, "loss": 0.8663, "step": 5230 }, { "epoch": 0.71, "learning_rate": 4.12285784122022e-06, "loss": 0.9005, "step": 5231 }, { "epoch": 0.71, "learning_rate": 4.119306473695811e-06, "loss": 0.8617, "step": 5232 }, { "epoch": 0.71, "learning_rate": 4.1157562395829186e-06, "loss": 0.8337, "step": 5233 }, { "epoch": 0.71, "learning_rate": 4.112207139565792e-06, "loss": 0.7852, "step": 5234 }, { "epoch": 0.71, "learning_rate": 4.10865917432847e-06, "loss": 0.8554, "step": 5235 }, { "epoch": 0.71, "learning_rate": 4.105112344554765e-06, "loss": 0.9162, "step": 5236 }, { "epoch": 0.71, "learning_rate": 4.101566650928273e-06, "loss": 0.8389, "step": 5237 }, { "epoch": 0.71, "learning_rate": 4.098022094132371e-06, "loss": 0.8517, "step": 5238 }, { "epoch": 0.71, "learning_rate": 4.094478674850212e-06, "loss": 0.83, "step": 5239 }, { "epoch": 0.71, "learning_rate": 4.090936393764743e-06, "loss": 0.807, "step": 5240 }, { "epoch": 0.71, "learning_rate": 4.087395251558679e-06, "loss": 0.8732, "step": 5241 }, { "epoch": 0.71, "learning_rate": 4.083855248914519e-06, "loss": 0.9042, "step": 5242 }, { "epoch": 0.71, "learning_rate": 4.080316386514541e-06, "loss": 0.8467, "step": 5243 }, { "epoch": 0.71, "learning_rate": 4.076778665040811e-06, "loss": 0.8702, "step": 5244 }, { "epoch": 0.71, "learning_rate": 4.073242085175167e-06, "loss": 0.8688, "step": 5245 }, { "epoch": 0.71, "learning_rate": 4.069706647599229e-06, "loss": 0.8295, "step": 5246 }, { "epoch": 0.71, "learning_rate": 4.066172352994395e-06, "loss": 0.8087, "step": 5247 }, { "epoch": 0.71, "learning_rate": 4.062639202041845e-06, "loss": 0.8854, "step": 5248 }, { "epoch": 0.71, "learning_rate": 4.059107195422544e-06, "loss": 0.8373, "step": 5249 }, { "epoch": 0.71, "learning_rate": 4.055576333817226e-06, "loss": 0.7991, "step": 5250 }, { "epoch": 0.71, "learning_rate": 4.052046617906412e-06, "loss": 0.8363, "step": 5251 }, { "epoch": 0.71, "learning_rate": 4.048518048370394e-06, "loss": 0.8053, "step": 5252 }, { "epoch": 0.71, "learning_rate": 4.044990625889255e-06, "loss": 0.8456, "step": 5253 }, { "epoch": 0.71, "learning_rate": 4.041464351142847e-06, "loss": 0.8749, "step": 5254 }, { "epoch": 0.71, "learning_rate": 4.037939224810807e-06, "loss": 0.8644, "step": 5255 }, { "epoch": 0.71, "learning_rate": 4.034415247572545e-06, "loss": 0.8513, "step": 5256 }, { "epoch": 0.71, "learning_rate": 4.0308924201072495e-06, "loss": 0.863, "step": 5257 }, { "epoch": 0.71, "learning_rate": 4.027370743093898e-06, "loss": 0.8421, "step": 5258 }, { "epoch": 0.71, "learning_rate": 4.023850217211234e-06, "loss": 0.9146, "step": 5259 }, { "epoch": 0.71, "learning_rate": 4.020330843137784e-06, "loss": 0.8328, "step": 5260 }, { "epoch": 0.71, "learning_rate": 4.01681262155185e-06, "loss": 0.8472, "step": 5261 }, { "epoch": 0.71, "learning_rate": 4.013295553131515e-06, "loss": 0.8222, "step": 5262 }, { "epoch": 0.71, "learning_rate": 4.009779638554645e-06, "loss": 0.8843, "step": 5263 }, { "epoch": 0.71, "learning_rate": 4.0062648784988735e-06, "loss": 0.828, "step": 5264 }, { "epoch": 0.71, "learning_rate": 4.002751273641613e-06, "loss": 0.8723, "step": 5265 }, { "epoch": 0.71, "learning_rate": 3.999238824660058e-06, "loss": 0.8177, "step": 5266 }, { "epoch": 0.71, "learning_rate": 3.995727532231174e-06, "loss": 0.748, "step": 5267 }, { "epoch": 0.71, "learning_rate": 3.992217397031715e-06, "loss": 0.8343, "step": 5268 }, { "epoch": 0.71, "learning_rate": 3.9887084197382e-06, "loss": 0.786, "step": 5269 }, { "epoch": 0.71, "learning_rate": 3.985200601026931e-06, "loss": 0.8016, "step": 5270 }, { "epoch": 0.71, "learning_rate": 3.981693941573979e-06, "loss": 0.7788, "step": 5271 }, { "epoch": 0.71, "learning_rate": 3.978188442055207e-06, "loss": 0.8068, "step": 5272 }, { "epoch": 0.71, "learning_rate": 3.97468410314624e-06, "loss": 0.8733, "step": 5273 }, { "epoch": 0.71, "learning_rate": 3.971180925522487e-06, "loss": 0.8306, "step": 5274 }, { "epoch": 0.71, "learning_rate": 3.9676789098591275e-06, "loss": 0.8932, "step": 5275 }, { "epoch": 0.72, "learning_rate": 3.964178056831117e-06, "loss": 0.8565, "step": 5276 }, { "epoch": 0.72, "learning_rate": 3.9606783671132e-06, "loss": 0.8332, "step": 5277 }, { "epoch": 0.72, "learning_rate": 3.95717984137988e-06, "loss": 0.8458, "step": 5278 }, { "epoch": 0.72, "learning_rate": 3.953682480305445e-06, "loss": 0.8033, "step": 5279 }, { "epoch": 0.72, "learning_rate": 3.950186284563956e-06, "loss": 0.9582, "step": 5280 }, { "epoch": 0.72, "learning_rate": 3.946691254829246e-06, "loss": 0.8199, "step": 5281 }, { "epoch": 0.72, "learning_rate": 3.9431973917749345e-06, "loss": 0.8552, "step": 5282 }, { "epoch": 0.72, "learning_rate": 3.939704696074405e-06, "loss": 0.8187, "step": 5283 }, { "epoch": 0.72, "learning_rate": 3.936213168400821e-06, "loss": 0.8267, "step": 5284 }, { "epoch": 0.72, "learning_rate": 3.932722809427114e-06, "loss": 0.8428, "step": 5285 }, { "epoch": 0.72, "learning_rate": 3.929233619826006e-06, "loss": 0.8504, "step": 5286 }, { "epoch": 0.72, "learning_rate": 3.925745600269978e-06, "loss": 0.9141, "step": 5287 }, { "epoch": 0.72, "learning_rate": 3.922258751431293e-06, "loss": 0.8517, "step": 5288 }, { "epoch": 0.72, "learning_rate": 3.918773073981983e-06, "loss": 0.8422, "step": 5289 }, { "epoch": 0.72, "learning_rate": 3.915288568593857e-06, "loss": 0.8098, "step": 5290 }, { "epoch": 0.72, "learning_rate": 3.911805235938506e-06, "loss": 0.8419, "step": 5291 }, { "epoch": 0.72, "learning_rate": 3.908323076687282e-06, "loss": 0.8673, "step": 5292 }, { "epoch": 0.72, "learning_rate": 3.90484209151132e-06, "loss": 0.8232, "step": 5293 }, { "epoch": 0.72, "learning_rate": 3.901362281081519e-06, "loss": 0.9096, "step": 5294 }, { "epoch": 0.72, "learning_rate": 3.897883646068565e-06, "loss": 0.7838, "step": 5295 }, { "epoch": 0.72, "learning_rate": 3.894406187142908e-06, "loss": 0.8716, "step": 5296 }, { "epoch": 0.72, "learning_rate": 3.890929904974775e-06, "loss": 0.8081, "step": 5297 }, { "epoch": 0.72, "learning_rate": 3.887454800234161e-06, "loss": 0.8375, "step": 5298 }, { "epoch": 0.72, "learning_rate": 3.883980873590839e-06, "loss": 0.7711, "step": 5299 }, { "epoch": 0.72, "learning_rate": 3.880508125714357e-06, "loss": 0.8799, "step": 5300 }, { "epoch": 0.72, "learning_rate": 3.877036557274032e-06, "loss": 0.8576, "step": 5301 }, { "epoch": 0.72, "learning_rate": 3.8735661689389535e-06, "loss": 0.7843, "step": 5302 }, { "epoch": 0.72, "learning_rate": 3.870096961377981e-06, "loss": 0.8395, "step": 5303 }, { "epoch": 0.72, "learning_rate": 3.866628935259755e-06, "loss": 0.8065, "step": 5304 }, { "epoch": 0.72, "learning_rate": 3.863162091252682e-06, "loss": 0.864, "step": 5305 }, { "epoch": 0.72, "learning_rate": 3.859696430024939e-06, "loss": 0.8305, "step": 5306 }, { "epoch": 0.72, "learning_rate": 3.856231952244483e-06, "loss": 0.9248, "step": 5307 }, { "epoch": 0.72, "learning_rate": 3.8527686585790345e-06, "loss": 0.814, "step": 5308 }, { "epoch": 0.72, "learning_rate": 3.849306549696087e-06, "loss": 0.8714, "step": 5309 }, { "epoch": 0.72, "learning_rate": 3.845845626262913e-06, "loss": 0.8786, "step": 5310 }, { "epoch": 0.72, "learning_rate": 3.842385888946548e-06, "loss": 0.8938, "step": 5311 }, { "epoch": 0.72, "learning_rate": 3.838927338413804e-06, "loss": 0.8474, "step": 5312 }, { "epoch": 0.72, "learning_rate": 3.835469975331256e-06, "loss": 0.8286, "step": 5313 }, { "epoch": 0.72, "learning_rate": 3.832013800365266e-06, "loss": 0.8604, "step": 5314 }, { "epoch": 0.72, "learning_rate": 3.8285588141819545e-06, "loss": 0.7386, "step": 5315 }, { "epoch": 0.72, "learning_rate": 3.825105017447213e-06, "loss": 0.8842, "step": 5316 }, { "epoch": 0.72, "learning_rate": 3.8216524108267085e-06, "loss": 0.8398, "step": 5317 }, { "epoch": 0.72, "learning_rate": 3.818200994985872e-06, "loss": 0.8134, "step": 5318 }, { "epoch": 0.72, "learning_rate": 3.81475077058992e-06, "loss": 0.7939, "step": 5319 }, { "epoch": 0.72, "learning_rate": 3.811301738303823e-06, "loss": 0.8921, "step": 5320 }, { "epoch": 0.72, "learning_rate": 3.8078538987923284e-06, "loss": 0.8272, "step": 5321 }, { "epoch": 0.72, "learning_rate": 3.804407252719949e-06, "loss": 0.7778, "step": 5322 }, { "epoch": 0.72, "learning_rate": 3.8009618007509807e-06, "loss": 0.9102, "step": 5323 }, { "epoch": 0.72, "learning_rate": 3.797517543549476e-06, "loss": 0.9247, "step": 5324 }, { "epoch": 0.72, "learning_rate": 3.794074481779261e-06, "loss": 0.8534, "step": 5325 }, { "epoch": 0.72, "learning_rate": 3.790632616103932e-06, "loss": 0.9264, "step": 5326 }, { "epoch": 0.72, "learning_rate": 3.7871919471868525e-06, "loss": 0.8357, "step": 5327 }, { "epoch": 0.72, "learning_rate": 3.7837524756911625e-06, "loss": 0.8972, "step": 5328 }, { "epoch": 0.72, "learning_rate": 3.7803142022797632e-06, "loss": 0.8905, "step": 5329 }, { "epoch": 0.72, "learning_rate": 3.776877127615329e-06, "loss": 0.8896, "step": 5330 }, { "epoch": 0.72, "learning_rate": 3.7734412523603027e-06, "loss": 0.9127, "step": 5331 }, { "epoch": 0.72, "learning_rate": 3.770006577176889e-06, "loss": 0.8217, "step": 5332 }, { "epoch": 0.72, "learning_rate": 3.766573102727078e-06, "loss": 0.8186, "step": 5333 }, { "epoch": 0.72, "learning_rate": 3.7631408296726126e-06, "loss": 0.8751, "step": 5334 }, { "epoch": 0.72, "learning_rate": 3.7597097586750097e-06, "loss": 0.8536, "step": 5335 }, { "epoch": 0.72, "learning_rate": 3.756279890395551e-06, "loss": 0.8951, "step": 5336 }, { "epoch": 0.72, "learning_rate": 3.7528512254952975e-06, "loss": 0.842, "step": 5337 }, { "epoch": 0.72, "learning_rate": 3.7494237646350675e-06, "loss": 0.8749, "step": 5338 }, { "epoch": 0.72, "learning_rate": 3.74599750847545e-06, "loss": 0.8634, "step": 5339 }, { "epoch": 0.72, "learning_rate": 3.742572457676801e-06, "loss": 0.859, "step": 5340 }, { "epoch": 0.72, "learning_rate": 3.739148612899243e-06, "loss": 0.8242, "step": 5341 }, { "epoch": 0.72, "learning_rate": 3.735725974802675e-06, "loss": 0.8135, "step": 5342 }, { "epoch": 0.72, "learning_rate": 3.7323045440467543e-06, "loss": 0.8541, "step": 5343 }, { "epoch": 0.72, "learning_rate": 3.7288843212909065e-06, "loss": 0.832, "step": 5344 }, { "epoch": 0.72, "learning_rate": 3.7254653071943235e-06, "loss": 0.8528, "step": 5345 }, { "epoch": 0.72, "learning_rate": 3.7220475024159743e-06, "loss": 0.8814, "step": 5346 }, { "epoch": 0.72, "learning_rate": 3.718630907614582e-06, "loss": 0.9062, "step": 5347 }, { "epoch": 0.72, "learning_rate": 3.715215523448642e-06, "loss": 0.9046, "step": 5348 }, { "epoch": 0.72, "learning_rate": 3.711801350576417e-06, "loss": 0.8994, "step": 5349 }, { "epoch": 0.73, "learning_rate": 3.7083883896559326e-06, "loss": 0.9404, "step": 5350 }, { "epoch": 0.73, "learning_rate": 3.704976641344985e-06, "loss": 0.8566, "step": 5351 }, { "epoch": 0.73, "learning_rate": 3.70156610630114e-06, "loss": 0.8608, "step": 5352 }, { "epoch": 0.73, "learning_rate": 3.69815678518172e-06, "loss": 0.8423, "step": 5353 }, { "epoch": 0.73, "learning_rate": 3.6947486786438193e-06, "loss": 0.8569, "step": 5354 }, { "epoch": 0.73, "learning_rate": 3.6913417873442937e-06, "loss": 0.8506, "step": 5355 }, { "epoch": 0.73, "learning_rate": 3.687936111939775e-06, "loss": 0.8334, "step": 5356 }, { "epoch": 0.73, "learning_rate": 3.6845316530866493e-06, "loss": 0.8042, "step": 5357 }, { "epoch": 0.73, "learning_rate": 3.681128411441074e-06, "loss": 0.8966, "step": 5358 }, { "epoch": 0.73, "learning_rate": 3.6777263876589697e-06, "loss": 0.8522, "step": 5359 }, { "epoch": 0.73, "learning_rate": 3.67432558239602e-06, "loss": 0.884, "step": 5360 }, { "epoch": 0.73, "learning_rate": 3.6709259963076836e-06, "loss": 0.8537, "step": 5361 }, { "epoch": 0.73, "learning_rate": 3.6675276300491738e-06, "loss": 0.8245, "step": 5362 }, { "epoch": 0.73, "learning_rate": 3.664130484275473e-06, "loss": 0.8739, "step": 5363 }, { "epoch": 0.73, "learning_rate": 3.6607345596413247e-06, "loss": 0.8382, "step": 5364 }, { "epoch": 0.73, "learning_rate": 3.657339856801245e-06, "loss": 0.861, "step": 5365 }, { "epoch": 0.73, "learning_rate": 3.6539463764095095e-06, "loss": 0.893, "step": 5366 }, { "epoch": 0.73, "learning_rate": 3.6505541191201554e-06, "loss": 0.8401, "step": 5367 }, { "epoch": 0.73, "learning_rate": 3.647163085586989e-06, "loss": 0.8157, "step": 5368 }, { "epoch": 0.73, "learning_rate": 3.6437732764635737e-06, "loss": 0.8577, "step": 5369 }, { "epoch": 0.73, "learning_rate": 3.6403846924032502e-06, "loss": 0.8046, "step": 5370 }, { "epoch": 0.73, "learning_rate": 3.6369973340591114e-06, "loss": 0.8455, "step": 5371 }, { "epoch": 0.73, "learning_rate": 3.6336112020840176e-06, "loss": 0.8845, "step": 5372 }, { "epoch": 0.73, "learning_rate": 3.630226297130589e-06, "loss": 0.8021, "step": 5373 }, { "epoch": 0.73, "learning_rate": 3.6268426198512197e-06, "loss": 0.8881, "step": 5374 }, { "epoch": 0.73, "learning_rate": 3.6234601708980576e-06, "loss": 0.93, "step": 5375 }, { "epoch": 0.73, "learning_rate": 3.620078950923016e-06, "loss": 0.8358, "step": 5376 }, { "epoch": 0.73, "learning_rate": 3.6166989605777727e-06, "loss": 0.8477, "step": 5377 }, { "epoch": 0.73, "learning_rate": 3.6133202005137647e-06, "loss": 0.8221, "step": 5378 }, { "epoch": 0.73, "learning_rate": 3.6099426713822006e-06, "loss": 0.8408, "step": 5379 }, { "epoch": 0.73, "learning_rate": 3.606566373834044e-06, "loss": 0.8908, "step": 5380 }, { "epoch": 0.73, "learning_rate": 3.6031913085200222e-06, "loss": 0.8299, "step": 5381 }, { "epoch": 0.73, "learning_rate": 3.5998174760906233e-06, "loss": 0.8976, "step": 5382 }, { "epoch": 0.73, "learning_rate": 3.596444877196109e-06, "loss": 0.8445, "step": 5383 }, { "epoch": 0.73, "learning_rate": 3.593073512486489e-06, "loss": 0.8552, "step": 5384 }, { "epoch": 0.73, "learning_rate": 3.5897033826115424e-06, "loss": 0.8045, "step": 5385 }, { "epoch": 0.73, "learning_rate": 3.5863344882208084e-06, "loss": 0.8565, "step": 5386 }, { "epoch": 0.73, "learning_rate": 3.5829668299635856e-06, "loss": 0.8889, "step": 5387 }, { "epoch": 0.73, "learning_rate": 3.5796004084889436e-06, "loss": 0.7914, "step": 5388 }, { "epoch": 0.73, "learning_rate": 3.5762352244457045e-06, "loss": 0.7852, "step": 5389 }, { "epoch": 0.73, "learning_rate": 3.572871278482455e-06, "loss": 0.9224, "step": 5390 }, { "epoch": 0.73, "learning_rate": 3.5695085712475417e-06, "loss": 0.8883, "step": 5391 }, { "epoch": 0.73, "learning_rate": 3.5661471033890714e-06, "loss": 0.8119, "step": 5392 }, { "epoch": 0.73, "learning_rate": 3.562786875554918e-06, "loss": 0.8084, "step": 5393 }, { "epoch": 0.73, "learning_rate": 3.559427888392716e-06, "loss": 0.7797, "step": 5394 }, { "epoch": 0.73, "learning_rate": 3.5560701425498536e-06, "loss": 0.8843, "step": 5395 }, { "epoch": 0.73, "learning_rate": 3.5527136386734827e-06, "loss": 0.8323, "step": 5396 }, { "epoch": 0.73, "learning_rate": 3.5493583774105157e-06, "loss": 0.8127, "step": 5397 }, { "epoch": 0.73, "learning_rate": 3.546004359407632e-06, "loss": 0.8233, "step": 5398 }, { "epoch": 0.73, "learning_rate": 3.5426515853112643e-06, "loss": 0.8353, "step": 5399 }, { "epoch": 0.73, "learning_rate": 3.5393000557676037e-06, "loss": 0.7838, "step": 5400 }, { "epoch": 0.73, "learning_rate": 3.5359497714226086e-06, "loss": 0.7634, "step": 5401 }, { "epoch": 0.73, "learning_rate": 3.532600732921989e-06, "loss": 0.8192, "step": 5402 }, { "epoch": 0.73, "learning_rate": 3.5292529409112264e-06, "loss": 0.8906, "step": 5403 }, { "epoch": 0.73, "learning_rate": 3.525906396035552e-06, "loss": 0.7919, "step": 5404 }, { "epoch": 0.73, "learning_rate": 3.5225610989399593e-06, "loss": 0.8148, "step": 5405 }, { "epoch": 0.73, "learning_rate": 3.5192170502691993e-06, "loss": 0.8304, "step": 5406 }, { "epoch": 0.73, "learning_rate": 3.515874250667791e-06, "loss": 0.839, "step": 5407 }, { "epoch": 0.73, "learning_rate": 3.5125327007800037e-06, "loss": 0.8943, "step": 5408 }, { "epoch": 0.73, "learning_rate": 3.509192401249869e-06, "loss": 0.8263, "step": 5409 }, { "epoch": 0.73, "learning_rate": 3.505853352721177e-06, "loss": 0.806, "step": 5410 }, { "epoch": 0.73, "learning_rate": 3.5025155558374735e-06, "loss": 0.8312, "step": 5411 }, { "epoch": 0.73, "learning_rate": 3.499179011242073e-06, "loss": 0.875, "step": 5412 }, { "epoch": 0.73, "learning_rate": 3.4958437195780394e-06, "loss": 0.7987, "step": 5413 }, { "epoch": 0.73, "learning_rate": 3.4925096814881988e-06, "loss": 0.8942, "step": 5414 }, { "epoch": 0.73, "learning_rate": 3.4891768976151284e-06, "loss": 0.8235, "step": 5415 }, { "epoch": 0.73, "learning_rate": 3.4858453686011808e-06, "loss": 0.8504, "step": 5416 }, { "epoch": 0.73, "learning_rate": 3.482515095088449e-06, "loss": 0.8483, "step": 5417 }, { "epoch": 0.73, "learning_rate": 3.4791860777187924e-06, "loss": 0.8942, "step": 5418 }, { "epoch": 0.73, "learning_rate": 3.4758583171338277e-06, "loss": 0.8904, "step": 5419 }, { "epoch": 0.73, "learning_rate": 3.4725318139749255e-06, "loss": 0.8568, "step": 5420 }, { "epoch": 0.73, "learning_rate": 3.4692065688832223e-06, "loss": 0.8221, "step": 5421 }, { "epoch": 0.73, "learning_rate": 3.4658825824996036e-06, "loss": 0.8888, "step": 5422 }, { "epoch": 0.74, "learning_rate": 3.4625598554647177e-06, "loss": 0.8335, "step": 5423 }, { "epoch": 0.74, "learning_rate": 3.459238388418963e-06, "loss": 0.7601, "step": 5424 }, { "epoch": 0.74, "learning_rate": 3.4559181820025067e-06, "loss": 0.8092, "step": 5425 }, { "epoch": 0.74, "learning_rate": 3.4525992368552652e-06, "loss": 0.8092, "step": 5426 }, { "epoch": 0.74, "learning_rate": 3.449281553616911e-06, "loss": 0.9401, "step": 5427 }, { "epoch": 0.74, "learning_rate": 3.445965132926877e-06, "loss": 0.7918, "step": 5428 }, { "epoch": 0.74, "learning_rate": 3.442649975424347e-06, "loss": 0.864, "step": 5429 }, { "epoch": 0.74, "learning_rate": 3.4393360817482733e-06, "loss": 0.824, "step": 5430 }, { "epoch": 0.74, "learning_rate": 3.4360234525373528e-06, "loss": 0.9056, "step": 5431 }, { "epoch": 0.74, "learning_rate": 3.4327120884300437e-06, "loss": 0.9431, "step": 5432 }, { "epoch": 0.74, "learning_rate": 3.429401990064555e-06, "loss": 0.8172, "step": 5433 }, { "epoch": 0.74, "learning_rate": 3.4260931580788635e-06, "loss": 0.8727, "step": 5434 }, { "epoch": 0.74, "learning_rate": 3.422785593110692e-06, "loss": 0.8991, "step": 5435 }, { "epoch": 0.74, "learning_rate": 3.419479295797522e-06, "loss": 0.887, "step": 5436 }, { "epoch": 0.74, "learning_rate": 3.4161742667765853e-06, "loss": 0.8291, "step": 5437 }, { "epoch": 0.74, "learning_rate": 3.4128705066848832e-06, "loss": 0.8881, "step": 5438 }, { "epoch": 0.74, "learning_rate": 3.409568016159155e-06, "loss": 0.8575, "step": 5439 }, { "epoch": 0.74, "learning_rate": 3.406266795835913e-06, "loss": 0.8237, "step": 5440 }, { "epoch": 0.74, "learning_rate": 3.40296684635141e-06, "loss": 0.7551, "step": 5441 }, { "epoch": 0.74, "learning_rate": 3.399668168341662e-06, "loss": 0.7918, "step": 5442 }, { "epoch": 0.74, "learning_rate": 3.3963707624424314e-06, "loss": 0.7842, "step": 5443 }, { "epoch": 0.74, "learning_rate": 3.3930746292892503e-06, "loss": 0.8364, "step": 5444 }, { "epoch": 0.74, "learning_rate": 3.389779769517393e-06, "loss": 0.9097, "step": 5445 }, { "epoch": 0.74, "learning_rate": 3.3864861837618914e-06, "loss": 0.798, "step": 5446 }, { "epoch": 0.74, "learning_rate": 3.383193872657533e-06, "loss": 0.8048, "step": 5447 }, { "epoch": 0.74, "learning_rate": 3.3799028368388554e-06, "loss": 0.8469, "step": 5448 }, { "epoch": 0.74, "learning_rate": 3.3766130769401617e-06, "loss": 0.8565, "step": 5449 }, { "epoch": 0.74, "learning_rate": 3.3733245935954973e-06, "loss": 0.8126, "step": 5450 }, { "epoch": 0.74, "learning_rate": 3.370037387438667e-06, "loss": 0.8622, "step": 5451 }, { "epoch": 0.74, "learning_rate": 3.366751459103227e-06, "loss": 0.8821, "step": 5452 }, { "epoch": 0.74, "learning_rate": 3.3634668092224853e-06, "loss": 0.9038, "step": 5453 }, { "epoch": 0.74, "learning_rate": 3.360183438429514e-06, "loss": 0.9509, "step": 5454 }, { "epoch": 0.74, "learning_rate": 3.3569013473571276e-06, "loss": 0.7578, "step": 5455 }, { "epoch": 0.74, "learning_rate": 3.3536205366378983e-06, "loss": 0.8908, "step": 5456 }, { "epoch": 0.74, "learning_rate": 3.3503410069041473e-06, "loss": 0.8269, "step": 5457 }, { "epoch": 0.74, "learning_rate": 3.347062758787959e-06, "loss": 0.8365, "step": 5458 }, { "epoch": 0.74, "learning_rate": 3.3437857929211604e-06, "loss": 0.8598, "step": 5459 }, { "epoch": 0.74, "learning_rate": 3.3405101099353367e-06, "loss": 0.8359, "step": 5460 }, { "epoch": 0.74, "learning_rate": 3.3372357104618237e-06, "loss": 0.8797, "step": 5461 }, { "epoch": 0.74, "learning_rate": 3.333962595131708e-06, "loss": 0.8347, "step": 5462 }, { "epoch": 0.74, "learning_rate": 3.330690764575837e-06, "loss": 0.7964, "step": 5463 }, { "epoch": 0.74, "learning_rate": 3.3274202194248004e-06, "loss": 0.8002, "step": 5464 }, { "epoch": 0.74, "learning_rate": 3.324150960308947e-06, "loss": 0.8756, "step": 5465 }, { "epoch": 0.74, "learning_rate": 3.3208829878583714e-06, "loss": 0.7843, "step": 5466 }, { "epoch": 0.74, "learning_rate": 3.3176163027029296e-06, "loss": 0.8564, "step": 5467 }, { "epoch": 0.74, "learning_rate": 3.314350905472221e-06, "loss": 0.8673, "step": 5468 }, { "epoch": 0.74, "learning_rate": 3.3110867967955993e-06, "loss": 0.8266, "step": 5469 }, { "epoch": 0.74, "learning_rate": 3.3078239773021726e-06, "loss": 0.7235, "step": 5470 }, { "epoch": 0.74, "learning_rate": 3.3045624476207916e-06, "loss": 0.8078, "step": 5471 }, { "epoch": 0.74, "learning_rate": 3.301302208380074e-06, "loss": 0.9156, "step": 5472 }, { "epoch": 0.74, "learning_rate": 3.2980432602083754e-06, "loss": 0.8148, "step": 5473 }, { "epoch": 0.74, "learning_rate": 3.2947856037338077e-06, "loss": 0.7416, "step": 5474 }, { "epoch": 0.74, "learning_rate": 3.29152923958423e-06, "loss": 0.8231, "step": 5475 }, { "epoch": 0.74, "learning_rate": 3.288274168387261e-06, "loss": 0.8475, "step": 5476 }, { "epoch": 0.74, "learning_rate": 3.2850203907702616e-06, "loss": 0.8159, "step": 5477 }, { "epoch": 0.74, "learning_rate": 3.281767907360347e-06, "loss": 0.8195, "step": 5478 }, { "epoch": 0.74, "learning_rate": 3.2785167187843825e-06, "loss": 0.892, "step": 5479 }, { "epoch": 0.74, "learning_rate": 3.2752668256689803e-06, "loss": 0.846, "step": 5480 }, { "epoch": 0.74, "learning_rate": 3.2720182286405088e-06, "loss": 0.8246, "step": 5481 }, { "epoch": 0.74, "learning_rate": 3.268770928325088e-06, "loss": 0.8219, "step": 5482 }, { "epoch": 0.74, "learning_rate": 3.265524925348582e-06, "loss": 0.8769, "step": 5483 }, { "epoch": 0.74, "learning_rate": 3.2622802203366057e-06, "loss": 0.8572, "step": 5484 }, { "epoch": 0.74, "learning_rate": 3.2590368139145212e-06, "loss": 0.8688, "step": 5485 }, { "epoch": 0.74, "learning_rate": 3.2557947067074524e-06, "loss": 0.9054, "step": 5486 }, { "epoch": 0.74, "learning_rate": 3.2525538993402605e-06, "loss": 0.8099, "step": 5487 }, { "epoch": 0.74, "learning_rate": 3.2493143924375616e-06, "loss": 0.8981, "step": 5488 }, { "epoch": 0.74, "learning_rate": 3.2460761866237177e-06, "loss": 0.8844, "step": 5489 }, { "epoch": 0.74, "learning_rate": 3.2428392825228405e-06, "loss": 0.8283, "step": 5490 }, { "epoch": 0.74, "learning_rate": 3.2396036807587993e-06, "loss": 0.8398, "step": 5491 }, { "epoch": 0.74, "learning_rate": 3.236369381955201e-06, "loss": 0.8445, "step": 5492 }, { "epoch": 0.74, "learning_rate": 3.233136386735407e-06, "loss": 0.8507, "step": 5493 }, { "epoch": 0.74, "learning_rate": 3.2299046957225233e-06, "loss": 0.8269, "step": 5494 }, { "epoch": 0.74, "learning_rate": 3.2266743095394124e-06, "loss": 0.8945, "step": 5495 }, { "epoch": 0.74, "learning_rate": 3.2234452288086802e-06, "loss": 0.8627, "step": 5496 }, { "epoch": 0.75, "learning_rate": 3.2202174541526808e-06, "loss": 0.8521, "step": 5497 }, { "epoch": 0.75, "learning_rate": 3.2169909861935157e-06, "loss": 0.8482, "step": 5498 }, { "epoch": 0.75, "learning_rate": 3.2137658255530325e-06, "loss": 0.8061, "step": 5499 }, { "epoch": 0.75, "learning_rate": 3.2105419728528387e-06, "loss": 0.8764, "step": 5500 }, { "epoch": 0.75, "learning_rate": 3.2073194287142774e-06, "loss": 0.8129, "step": 5501 }, { "epoch": 0.75, "learning_rate": 3.2040981937584435e-06, "loss": 0.9422, "step": 5502 }, { "epoch": 0.75, "learning_rate": 3.200878268606179e-06, "loss": 0.9117, "step": 5503 }, { "epoch": 0.75, "learning_rate": 3.197659653878071e-06, "loss": 0.843, "step": 5504 }, { "epoch": 0.75, "learning_rate": 3.1944423501944643e-06, "loss": 0.8902, "step": 5505 }, { "epoch": 0.75, "learning_rate": 3.1912263581754397e-06, "loss": 0.7894, "step": 5506 }, { "epoch": 0.75, "learning_rate": 3.18801167844083e-06, "loss": 0.8067, "step": 5507 }, { "epoch": 0.75, "learning_rate": 3.184798311610211e-06, "loss": 0.8398, "step": 5508 }, { "epoch": 0.75, "learning_rate": 3.1815862583029143e-06, "loss": 0.8362, "step": 5509 }, { "epoch": 0.75, "learning_rate": 3.1783755191380094e-06, "loss": 0.8443, "step": 5510 }, { "epoch": 0.75, "learning_rate": 3.1751660947343176e-06, "loss": 0.798, "step": 5511 }, { "epoch": 0.75, "learning_rate": 3.1719579857104042e-06, "loss": 0.8223, "step": 5512 }, { "epoch": 0.75, "learning_rate": 3.1687511926845793e-06, "loss": 0.8303, "step": 5513 }, { "epoch": 0.75, "learning_rate": 3.165545716274908e-06, "loss": 0.84, "step": 5514 }, { "epoch": 0.75, "learning_rate": 3.1623415570991923e-06, "loss": 0.909, "step": 5515 }, { "epoch": 0.75, "learning_rate": 3.159138715774983e-06, "loss": 0.7957, "step": 5516 }, { "epoch": 0.75, "learning_rate": 3.1559371929195758e-06, "loss": 0.857, "step": 5517 }, { "epoch": 0.75, "learning_rate": 3.1527369891500194e-06, "loss": 0.8282, "step": 5518 }, { "epoch": 0.75, "learning_rate": 3.149538105083101e-06, "loss": 0.8409, "step": 5519 }, { "epoch": 0.75, "learning_rate": 3.1463405413353533e-06, "loss": 0.8183, "step": 5520 }, { "epoch": 0.75, "learning_rate": 3.1431442985230585e-06, "loss": 0.8521, "step": 5521 }, { "epoch": 0.75, "learning_rate": 3.139949377262238e-06, "loss": 0.8498, "step": 5522 }, { "epoch": 0.75, "learning_rate": 3.1367557781686697e-06, "loss": 0.8883, "step": 5523 }, { "epoch": 0.75, "learning_rate": 3.1335635018578635e-06, "loss": 0.8607, "step": 5524 }, { "epoch": 0.75, "learning_rate": 3.1303725489450864e-06, "loss": 0.8014, "step": 5525 }, { "epoch": 0.75, "learning_rate": 3.127182920045343e-06, "loss": 0.8364, "step": 5526 }, { "epoch": 0.75, "learning_rate": 3.123994615773378e-06, "loss": 0.7851, "step": 5527 }, { "epoch": 0.75, "learning_rate": 3.1208076367436966e-06, "loss": 0.8935, "step": 5528 }, { "epoch": 0.75, "learning_rate": 3.1176219835705345e-06, "loss": 0.8714, "step": 5529 }, { "epoch": 0.75, "learning_rate": 3.1144376568678767e-06, "loss": 0.8363, "step": 5530 }, { "epoch": 0.75, "learning_rate": 3.1112546572494515e-06, "loss": 0.8436, "step": 5531 }, { "epoch": 0.75, "learning_rate": 3.1080729853287293e-06, "loss": 0.8532, "step": 5532 }, { "epoch": 0.75, "learning_rate": 3.1048926417189353e-06, "loss": 0.8954, "step": 5533 }, { "epoch": 0.75, "learning_rate": 3.101713627033026e-06, "loss": 0.8533, "step": 5534 }, { "epoch": 0.75, "learning_rate": 3.098535941883708e-06, "loss": 0.8246, "step": 5535 }, { "epoch": 0.75, "learning_rate": 3.095359586883425e-06, "loss": 0.7758, "step": 5536 }, { "epoch": 0.75, "learning_rate": 3.092184562644378e-06, "loss": 0.842, "step": 5537 }, { "epoch": 0.75, "learning_rate": 3.0890108697785003e-06, "loss": 0.8209, "step": 5538 }, { "epoch": 0.75, "learning_rate": 3.0858385088974696e-06, "loss": 0.8713, "step": 5539 }, { "epoch": 0.75, "learning_rate": 3.08266748061271e-06, "loss": 0.8424, "step": 5540 }, { "epoch": 0.75, "learning_rate": 3.0794977855353835e-06, "loss": 0.8137, "step": 5541 }, { "epoch": 0.75, "learning_rate": 3.0763294242764064e-06, "loss": 0.8804, "step": 5542 }, { "epoch": 0.75, "learning_rate": 3.0731623974464265e-06, "loss": 0.841, "step": 5543 }, { "epoch": 0.75, "learning_rate": 3.06999670565584e-06, "loss": 0.8401, "step": 5544 }, { "epoch": 0.75, "learning_rate": 3.06683234951478e-06, "loss": 0.8464, "step": 5545 }, { "epoch": 0.75, "learning_rate": 3.0636693296331334e-06, "loss": 0.7644, "step": 5546 }, { "epoch": 0.75, "learning_rate": 3.0605076466205196e-06, "loss": 0.8189, "step": 5547 }, { "epoch": 0.75, "learning_rate": 3.0573473010863032e-06, "loss": 0.8414, "step": 5548 }, { "epoch": 0.75, "learning_rate": 3.0541882936395917e-06, "loss": 0.8603, "step": 5549 }, { "epoch": 0.75, "learning_rate": 3.0510306248892307e-06, "loss": 0.8296, "step": 5550 }, { "epoch": 0.75, "learning_rate": 3.0478742954438166e-06, "loss": 0.842, "step": 5551 }, { "epoch": 0.75, "learning_rate": 3.0447193059116818e-06, "loss": 0.8978, "step": 5552 }, { "epoch": 0.75, "learning_rate": 3.0415656569009e-06, "loss": 0.8263, "step": 5553 }, { "epoch": 0.75, "learning_rate": 3.0384133490192836e-06, "loss": 0.7855, "step": 5554 }, { "epoch": 0.75, "learning_rate": 3.0352623828743977e-06, "loss": 0.8494, "step": 5555 }, { "epoch": 0.75, "learning_rate": 3.0321127590735377e-06, "loss": 0.7921, "step": 5556 }, { "epoch": 0.75, "learning_rate": 3.028964478223745e-06, "loss": 0.8219, "step": 5557 }, { "epoch": 0.75, "learning_rate": 3.0258175409318015e-06, "loss": 0.7766, "step": 5558 }, { "epoch": 0.75, "learning_rate": 3.0226719478042267e-06, "loss": 0.8979, "step": 5559 }, { "epoch": 0.75, "learning_rate": 3.01952769944729e-06, "loss": 0.9416, "step": 5560 }, { "epoch": 0.75, "learning_rate": 3.0163847964669933e-06, "loss": 0.8423, "step": 5561 }, { "epoch": 0.75, "learning_rate": 3.0132432394690827e-06, "loss": 0.8109, "step": 5562 }, { "epoch": 0.75, "learning_rate": 3.010103029059043e-06, "loss": 0.8724, "step": 5563 }, { "epoch": 0.75, "learning_rate": 3.0069641658420965e-06, "loss": 0.7778, "step": 5564 }, { "epoch": 0.75, "learning_rate": 3.0038266504232194e-06, "loss": 0.7831, "step": 5565 }, { "epoch": 0.75, "learning_rate": 3.0006904834071126e-06, "loss": 0.8047, "step": 5566 }, { "epoch": 0.75, "learning_rate": 2.9975556653982252e-06, "loss": 0.874, "step": 5567 }, { "epoch": 0.75, "learning_rate": 2.9944221970007382e-06, "loss": 0.8375, "step": 5568 }, { "epoch": 0.75, "learning_rate": 2.991290078818585e-06, "loss": 0.8668, "step": 5569 }, { "epoch": 0.75, "learning_rate": 2.988159311455433e-06, "loss": 0.819, "step": 5570 }, { "epoch": 0.76, "learning_rate": 2.985029895514686e-06, "loss": 0.8374, "step": 5571 }, { "epoch": 0.76, "learning_rate": 2.9819018315994907e-06, "loss": 0.8174, "step": 5572 }, { "epoch": 0.76, "learning_rate": 2.9787751203127323e-06, "loss": 0.8904, "step": 5573 }, { "epoch": 0.76, "learning_rate": 2.975649762257031e-06, "loss": 0.7809, "step": 5574 }, { "epoch": 0.76, "learning_rate": 2.972525758034759e-06, "loss": 0.85, "step": 5575 }, { "epoch": 0.76, "learning_rate": 2.9694031082480135e-06, "loss": 0.8123, "step": 5576 }, { "epoch": 0.76, "learning_rate": 2.966281813498637e-06, "loss": 0.8426, "step": 5577 }, { "epoch": 0.76, "learning_rate": 2.9631618743882086e-06, "loss": 0.8489, "step": 5578 }, { "epoch": 0.76, "learning_rate": 2.960043291518052e-06, "loss": 0.8583, "step": 5579 }, { "epoch": 0.76, "learning_rate": 2.956926065489224e-06, "loss": 0.8374, "step": 5580 }, { "epoch": 0.76, "learning_rate": 2.95381019690252e-06, "loss": 0.9005, "step": 5581 }, { "epoch": 0.76, "learning_rate": 2.9506956863584734e-06, "loss": 0.855, "step": 5582 }, { "epoch": 0.76, "learning_rate": 2.947582534457357e-06, "loss": 0.8526, "step": 5583 }, { "epoch": 0.76, "learning_rate": 2.9444707417991857e-06, "loss": 0.8597, "step": 5584 }, { "epoch": 0.76, "learning_rate": 2.9413603089837084e-06, "loss": 0.7862, "step": 5585 }, { "epoch": 0.76, "learning_rate": 2.938251236610409e-06, "loss": 0.8363, "step": 5586 }, { "epoch": 0.76, "learning_rate": 2.935143525278512e-06, "loss": 0.8657, "step": 5587 }, { "epoch": 0.76, "learning_rate": 2.932037175586985e-06, "loss": 0.89, "step": 5588 }, { "epoch": 0.76, "learning_rate": 2.9289321881345257e-06, "loss": 0.8641, "step": 5589 }, { "epoch": 0.76, "learning_rate": 2.9258285635195717e-06, "loss": 0.865, "step": 5590 }, { "epoch": 0.76, "learning_rate": 2.9227263023402975e-06, "loss": 0.8882, "step": 5591 }, { "epoch": 0.76, "learning_rate": 2.9196254051946127e-06, "loss": 0.8493, "step": 5592 }, { "epoch": 0.76, "learning_rate": 2.9165258726801715e-06, "loss": 0.8425, "step": 5593 }, { "epoch": 0.76, "learning_rate": 2.9134277053943594e-06, "loss": 0.8845, "step": 5594 }, { "epoch": 0.76, "learning_rate": 2.910330903934299e-06, "loss": 0.8904, "step": 5595 }, { "epoch": 0.76, "learning_rate": 2.9072354688968463e-06, "loss": 0.859, "step": 5596 }, { "epoch": 0.76, "learning_rate": 2.904141400878604e-06, "loss": 0.923, "step": 5597 }, { "epoch": 0.76, "learning_rate": 2.9010487004759024e-06, "loss": 0.9006, "step": 5598 }, { "epoch": 0.76, "learning_rate": 2.897957368284812e-06, "loss": 0.8692, "step": 5599 }, { "epoch": 0.76, "learning_rate": 2.894867404901137e-06, "loss": 0.8433, "step": 5600 }, { "epoch": 0.76, "learning_rate": 2.891778810920417e-06, "loss": 0.8526, "step": 5601 }, { "epoch": 0.76, "learning_rate": 2.888691586937937e-06, "loss": 0.7946, "step": 5602 }, { "epoch": 0.76, "learning_rate": 2.8856057335487074e-06, "loss": 0.8324, "step": 5603 }, { "epoch": 0.76, "learning_rate": 2.8825212513474775e-06, "loss": 0.8532, "step": 5604 }, { "epoch": 0.76, "learning_rate": 2.8794381409287307e-06, "loss": 0.7859, "step": 5605 }, { "epoch": 0.76, "learning_rate": 2.876356402886694e-06, "loss": 0.8603, "step": 5606 }, { "epoch": 0.76, "learning_rate": 2.873276037815321e-06, "loss": 0.8478, "step": 5607 }, { "epoch": 0.76, "learning_rate": 2.870197046308304e-06, "loss": 0.8108, "step": 5608 }, { "epoch": 0.76, "learning_rate": 2.867119428959071e-06, "loss": 0.8509, "step": 5609 }, { "epoch": 0.76, "learning_rate": 2.86404318636078e-06, "loss": 0.8309, "step": 5610 }, { "epoch": 0.76, "learning_rate": 2.860968319106332e-06, "loss": 0.8107, "step": 5611 }, { "epoch": 0.76, "learning_rate": 2.857894827788362e-06, "loss": 0.885, "step": 5612 }, { "epoch": 0.76, "learning_rate": 2.8548227129992367e-06, "loss": 0.8205, "step": 5613 }, { "epoch": 0.76, "learning_rate": 2.8517519753310564e-06, "loss": 0.8542, "step": 5614 }, { "epoch": 0.76, "learning_rate": 2.848682615375653e-06, "loss": 0.8374, "step": 5615 }, { "epoch": 0.76, "learning_rate": 2.845614633724607e-06, "loss": 0.7781, "step": 5616 }, { "epoch": 0.76, "learning_rate": 2.8425480309692177e-06, "loss": 0.9255, "step": 5617 }, { "epoch": 0.76, "learning_rate": 2.8394828077005277e-06, "loss": 0.7644, "step": 5618 }, { "epoch": 0.76, "learning_rate": 2.8364189645093076e-06, "loss": 0.8354, "step": 5619 }, { "epoch": 0.76, "learning_rate": 2.8333565019860644e-06, "loss": 0.7367, "step": 5620 }, { "epoch": 0.76, "learning_rate": 2.830295420721044e-06, "loss": 0.9017, "step": 5621 }, { "epoch": 0.76, "learning_rate": 2.82723572130422e-06, "loss": 0.7814, "step": 5622 }, { "epoch": 0.76, "learning_rate": 2.8241774043253023e-06, "loss": 0.7802, "step": 5623 }, { "epoch": 0.76, "learning_rate": 2.821120470373733e-06, "loss": 0.8957, "step": 5624 }, { "epoch": 0.76, "learning_rate": 2.8180649200386835e-06, "loss": 0.91, "step": 5625 }, { "epoch": 0.76, "learning_rate": 2.815010753909071e-06, "loss": 0.8137, "step": 5626 }, { "epoch": 0.76, "learning_rate": 2.811957972573535e-06, "loss": 0.8524, "step": 5627 }, { "epoch": 0.76, "learning_rate": 2.8089065766204504e-06, "loss": 0.8828, "step": 5628 }, { "epoch": 0.76, "learning_rate": 2.8058565666379233e-06, "loss": 0.8772, "step": 5629 }, { "epoch": 0.76, "learning_rate": 2.8028079432138023e-06, "loss": 0.7986, "step": 5630 }, { "epoch": 0.76, "learning_rate": 2.799760706935658e-06, "loss": 0.8761, "step": 5631 }, { "epoch": 0.76, "learning_rate": 2.796714858390798e-06, "loss": 0.8084, "step": 5632 }, { "epoch": 0.76, "learning_rate": 2.7936703981662595e-06, "loss": 0.783, "step": 5633 }, { "epoch": 0.76, "learning_rate": 2.790627326848815e-06, "loss": 0.8026, "step": 5634 }, { "epoch": 0.76, "learning_rate": 2.7875856450249728e-06, "loss": 0.8443, "step": 5635 }, { "epoch": 0.76, "learning_rate": 2.784545353280966e-06, "loss": 0.8984, "step": 5636 }, { "epoch": 0.76, "learning_rate": 2.7815064522027645e-06, "loss": 0.82, "step": 5637 }, { "epoch": 0.76, "learning_rate": 2.7784689423760656e-06, "loss": 0.8125, "step": 5638 }, { "epoch": 0.76, "learning_rate": 2.775432824386307e-06, "loss": 0.7995, "step": 5639 }, { "epoch": 0.76, "learning_rate": 2.7723980988186514e-06, "loss": 0.8924, "step": 5640 }, { "epoch": 0.76, "learning_rate": 2.7693647662579927e-06, "loss": 0.8669, "step": 5641 }, { "epoch": 0.76, "learning_rate": 2.7663328272889588e-06, "loss": 0.8549, "step": 5642 }, { "epoch": 0.76, "learning_rate": 2.7633022824959055e-06, "loss": 0.889, "step": 5643 }, { "epoch": 0.76, "learning_rate": 2.7602731324629294e-06, "loss": 0.7667, "step": 5644 }, { "epoch": 0.77, "learning_rate": 2.7572453777738474e-06, "loss": 0.8392, "step": 5645 }, { "epoch": 0.77, "learning_rate": 2.7542190190122133e-06, "loss": 0.8267, "step": 5646 }, { "epoch": 0.77, "learning_rate": 2.751194056761306e-06, "loss": 0.8457, "step": 5647 }, { "epoch": 0.77, "learning_rate": 2.7481704916041475e-06, "loss": 0.8052, "step": 5648 }, { "epoch": 0.77, "learning_rate": 2.745148324123477e-06, "loss": 0.8311, "step": 5649 }, { "epoch": 0.77, "learning_rate": 2.7421275549017722e-06, "loss": 0.8824, "step": 5650 }, { "epoch": 0.77, "learning_rate": 2.7391081845212376e-06, "loss": 0.8693, "step": 5651 }, { "epoch": 0.77, "learning_rate": 2.7360902135638066e-06, "loss": 0.8439, "step": 5652 }, { "epoch": 0.77, "learning_rate": 2.7330736426111525e-06, "loss": 0.8385, "step": 5653 }, { "epoch": 0.77, "learning_rate": 2.7300584722446676e-06, "loss": 0.8072, "step": 5654 }, { "epoch": 0.77, "learning_rate": 2.7270447030454784e-06, "loss": 0.8555, "step": 5655 }, { "epoch": 0.77, "learning_rate": 2.7240323355944454e-06, "loss": 0.8649, "step": 5656 }, { "epoch": 0.77, "learning_rate": 2.72102137047215e-06, "loss": 0.8922, "step": 5657 }, { "epoch": 0.77, "learning_rate": 2.718011808258915e-06, "loss": 0.8423, "step": 5658 }, { "epoch": 0.77, "learning_rate": 2.715003649534783e-06, "loss": 0.9044, "step": 5659 }, { "epoch": 0.77, "learning_rate": 2.7119968948795285e-06, "loss": 0.8854, "step": 5660 }, { "epoch": 0.77, "learning_rate": 2.708991544872658e-06, "loss": 0.8293, "step": 5661 }, { "epoch": 0.77, "learning_rate": 2.7059876000934006e-06, "loss": 0.7802, "step": 5662 }, { "epoch": 0.77, "learning_rate": 2.7029850611207277e-06, "loss": 0.8593, "step": 5663 }, { "epoch": 0.77, "learning_rate": 2.6999839285333272e-06, "loss": 0.8647, "step": 5664 }, { "epoch": 0.77, "learning_rate": 2.6969842029096217e-06, "loss": 0.8403, "step": 5665 }, { "epoch": 0.77, "learning_rate": 2.6939858848277566e-06, "loss": 0.8827, "step": 5666 }, { "epoch": 0.77, "learning_rate": 2.690988974865617e-06, "loss": 0.9253, "step": 5667 }, { "epoch": 0.77, "learning_rate": 2.6879934736008097e-06, "loss": 0.882, "step": 5668 }, { "epoch": 0.77, "learning_rate": 2.684999381610668e-06, "loss": 0.8534, "step": 5669 }, { "epoch": 0.77, "learning_rate": 2.682006699472256e-06, "loss": 0.8989, "step": 5670 }, { "epoch": 0.77, "learning_rate": 2.679015427762366e-06, "loss": 0.8713, "step": 5671 }, { "epoch": 0.77, "learning_rate": 2.676025567057522e-06, "loss": 0.8603, "step": 5672 }, { "epoch": 0.77, "learning_rate": 2.673037117933971e-06, "loss": 0.8627, "step": 5673 }, { "epoch": 0.77, "learning_rate": 2.670050080967689e-06, "loss": 0.8472, "step": 5674 }, { "epoch": 0.77, "learning_rate": 2.6670644567343793e-06, "loss": 0.8413, "step": 5675 }, { "epoch": 0.77, "learning_rate": 2.6640802458094783e-06, "loss": 0.8914, "step": 5676 }, { "epoch": 0.77, "learning_rate": 2.661097448768144e-06, "loss": 0.8622, "step": 5677 }, { "epoch": 0.77, "learning_rate": 2.6581160661852635e-06, "loss": 0.827, "step": 5678 }, { "epoch": 0.77, "learning_rate": 2.6551360986354514e-06, "loss": 0.8405, "step": 5679 }, { "epoch": 0.77, "learning_rate": 2.652157546693046e-06, "loss": 0.7965, "step": 5680 }, { "epoch": 0.77, "learning_rate": 2.649180410932124e-06, "loss": 0.8747, "step": 5681 }, { "epoch": 0.77, "learning_rate": 2.6462046919264782e-06, "loss": 0.8608, "step": 5682 }, { "epoch": 0.77, "learning_rate": 2.6432303902496315e-06, "loss": 0.7983, "step": 5683 }, { "epoch": 0.77, "learning_rate": 2.6402575064748337e-06, "loss": 0.7796, "step": 5684 }, { "epoch": 0.77, "learning_rate": 2.637286041175059e-06, "loss": 0.8144, "step": 5685 }, { "epoch": 0.77, "learning_rate": 2.634315994923017e-06, "loss": 0.8802, "step": 5686 }, { "epoch": 0.77, "learning_rate": 2.631347368291134e-06, "loss": 0.801, "step": 5687 }, { "epoch": 0.77, "learning_rate": 2.628380161851567e-06, "loss": 0.8482, "step": 5688 }, { "epoch": 0.77, "learning_rate": 2.6254143761761942e-06, "loss": 0.8698, "step": 5689 }, { "epoch": 0.77, "learning_rate": 2.6224500118366313e-06, "loss": 0.8662, "step": 5690 }, { "epoch": 0.77, "learning_rate": 2.6194870694042097e-06, "loss": 0.8876, "step": 5691 }, { "epoch": 0.77, "learning_rate": 2.616525549449991e-06, "loss": 0.9043, "step": 5692 }, { "epoch": 0.77, "learning_rate": 2.6135654525447607e-06, "loss": 0.8786, "step": 5693 }, { "epoch": 0.77, "learning_rate": 2.6106067792590284e-06, "loss": 0.8863, "step": 5694 }, { "epoch": 0.77, "learning_rate": 2.6076495301630387e-06, "loss": 0.8702, "step": 5695 }, { "epoch": 0.77, "learning_rate": 2.604693705826751e-06, "loss": 0.914, "step": 5696 }, { "epoch": 0.77, "learning_rate": 2.601739306819854e-06, "loss": 0.818, "step": 5697 }, { "epoch": 0.77, "learning_rate": 2.5987863337117604e-06, "loss": 0.8338, "step": 5698 }, { "epoch": 0.77, "learning_rate": 2.5958347870716106e-06, "loss": 0.8684, "step": 5699 }, { "epoch": 0.77, "learning_rate": 2.592884667468273e-06, "loss": 0.8694, "step": 5700 }, { "epoch": 0.77, "learning_rate": 2.5899359754703334e-06, "loss": 0.8836, "step": 5701 }, { "epoch": 0.77, "learning_rate": 2.5869887116461055e-06, "loss": 0.8403, "step": 5702 }, { "epoch": 0.77, "learning_rate": 2.5840428765636304e-06, "loss": 0.8651, "step": 5703 }, { "epoch": 0.77, "learning_rate": 2.581098470790667e-06, "loss": 0.8972, "step": 5704 }, { "epoch": 0.77, "learning_rate": 2.5781554948947097e-06, "loss": 0.8448, "step": 5705 }, { "epoch": 0.77, "learning_rate": 2.5752139494429673e-06, "loss": 0.8432, "step": 5706 }, { "epoch": 0.77, "learning_rate": 2.5722738350023768e-06, "loss": 0.8394, "step": 5707 }, { "epoch": 0.77, "learning_rate": 2.569335152139597e-06, "loss": 0.8452, "step": 5708 }, { "epoch": 0.77, "learning_rate": 2.5663979014210194e-06, "loss": 0.7936, "step": 5709 }, { "epoch": 0.77, "learning_rate": 2.5634620834127476e-06, "loss": 0.877, "step": 5710 }, { "epoch": 0.77, "learning_rate": 2.560527698680617e-06, "loss": 0.8329, "step": 5711 }, { "epoch": 0.77, "learning_rate": 2.5575947477901843e-06, "loss": 0.8627, "step": 5712 }, { "epoch": 0.77, "learning_rate": 2.554663231306724e-06, "loss": 0.8283, "step": 5713 }, { "epoch": 0.77, "learning_rate": 2.551733149795249e-06, "loss": 0.8092, "step": 5714 }, { "epoch": 0.77, "learning_rate": 2.5488045038204823e-06, "loss": 0.8968, "step": 5715 }, { "epoch": 0.77, "learning_rate": 2.5458772939468733e-06, "loss": 0.8738, "step": 5716 }, { "epoch": 0.77, "learning_rate": 2.5429515207385957e-06, "loss": 0.8082, "step": 5717 }, { "epoch": 0.78, "learning_rate": 2.5400271847595503e-06, "loss": 0.7874, "step": 5718 }, { "epoch": 0.78, "learning_rate": 2.5371042865733552e-06, "loss": 0.8411, "step": 5719 }, { "epoch": 0.78, "learning_rate": 2.5341828267433523e-06, "loss": 0.8715, "step": 5720 }, { "epoch": 0.78, "learning_rate": 2.531262805832607e-06, "loss": 0.8381, "step": 5721 }, { "epoch": 0.78, "learning_rate": 2.528344224403906e-06, "loss": 0.8295, "step": 5722 }, { "epoch": 0.78, "learning_rate": 2.5254270830197635e-06, "loss": 0.8258, "step": 5723 }, { "epoch": 0.78, "learning_rate": 2.522511382242413e-06, "loss": 0.8348, "step": 5724 }, { "epoch": 0.78, "learning_rate": 2.519597122633809e-06, "loss": 0.7906, "step": 5725 }, { "epoch": 0.78, "learning_rate": 2.5166843047556256e-06, "loss": 0.8066, "step": 5726 }, { "epoch": 0.78, "learning_rate": 2.513772929169268e-06, "loss": 0.8921, "step": 5727 }, { "epoch": 0.78, "learning_rate": 2.5108629964358577e-06, "loss": 0.8737, "step": 5728 }, { "epoch": 0.78, "learning_rate": 2.507954507116237e-06, "loss": 0.8576, "step": 5729 }, { "epoch": 0.78, "learning_rate": 2.5050474617709718e-06, "loss": 0.8507, "step": 5730 }, { "epoch": 0.78, "learning_rate": 2.5021418609603477e-06, "loss": 0.8557, "step": 5731 }, { "epoch": 0.78, "learning_rate": 2.4992377052443783e-06, "loss": 0.8084, "step": 5732 }, { "epoch": 0.78, "learning_rate": 2.4963349951827907e-06, "loss": 0.8426, "step": 5733 }, { "epoch": 0.78, "learning_rate": 2.4934337313350386e-06, "loss": 0.7922, "step": 5734 }, { "epoch": 0.78, "learning_rate": 2.4905339142602938e-06, "loss": 0.7978, "step": 5735 }, { "epoch": 0.78, "learning_rate": 2.487635544517448e-06, "loss": 0.818, "step": 5736 }, { "epoch": 0.78, "learning_rate": 2.4847386226651227e-06, "loss": 0.7469, "step": 5737 }, { "epoch": 0.78, "learning_rate": 2.48184314926165e-06, "loss": 0.8306, "step": 5738 }, { "epoch": 0.78, "learning_rate": 2.47894912486509e-06, "loss": 0.7803, "step": 5739 }, { "epoch": 0.78, "learning_rate": 2.4760565500332135e-06, "loss": 0.8015, "step": 5740 }, { "epoch": 0.78, "learning_rate": 2.473165425323528e-06, "loss": 0.8061, "step": 5741 }, { "epoch": 0.78, "learning_rate": 2.4702757512932463e-06, "loss": 0.8258, "step": 5742 }, { "epoch": 0.78, "learning_rate": 2.467387528499312e-06, "loss": 0.8512, "step": 5743 }, { "epoch": 0.78, "learning_rate": 2.4645007574983827e-06, "loss": 0.8932, "step": 5744 }, { "epoch": 0.78, "learning_rate": 2.4616154388468383e-06, "loss": 0.8311, "step": 5745 }, { "epoch": 0.78, "learning_rate": 2.4587315731007765e-06, "loss": 0.8786, "step": 5746 }, { "epoch": 0.78, "learning_rate": 2.4558491608160217e-06, "loss": 0.8747, "step": 5747 }, { "epoch": 0.78, "learning_rate": 2.4529682025481118e-06, "loss": 0.7877, "step": 5748 }, { "epoch": 0.78, "learning_rate": 2.4500886988523065e-06, "loss": 0.8186, "step": 5749 }, { "epoch": 0.78, "learning_rate": 2.4472106502835815e-06, "loss": 0.7742, "step": 5750 }, { "epoch": 0.78, "learning_rate": 2.444334057396641e-06, "loss": 0.8544, "step": 5751 }, { "epoch": 0.78, "learning_rate": 2.4414589207459018e-06, "loss": 0.8139, "step": 5752 }, { "epoch": 0.78, "learning_rate": 2.4385852408854993e-06, "loss": 0.8022, "step": 5753 }, { "epoch": 0.78, "learning_rate": 2.435713018369292e-06, "loss": 0.8929, "step": 5754 }, { "epoch": 0.78, "learning_rate": 2.4328422537508524e-06, "loss": 0.8827, "step": 5755 }, { "epoch": 0.78, "learning_rate": 2.429972947583481e-06, "loss": 0.8578, "step": 5756 }, { "epoch": 0.78, "learning_rate": 2.4271051004201896e-06, "loss": 0.8202, "step": 5757 }, { "epoch": 0.78, "learning_rate": 2.4242387128137092e-06, "loss": 0.8309, "step": 5758 }, { "epoch": 0.78, "learning_rate": 2.4213737853164887e-06, "loss": 0.8077, "step": 5759 }, { "epoch": 0.78, "learning_rate": 2.4185103184807045e-06, "loss": 0.7861, "step": 5760 }, { "epoch": 0.78, "learning_rate": 2.415648312858241e-06, "loss": 0.8607, "step": 5761 }, { "epoch": 0.78, "learning_rate": 2.412787769000706e-06, "loss": 0.8233, "step": 5762 }, { "epoch": 0.78, "learning_rate": 2.4099286874594243e-06, "loss": 0.8836, "step": 5763 }, { "epoch": 0.78, "learning_rate": 2.407071068785436e-06, "loss": 0.8241, "step": 5764 }, { "epoch": 0.78, "learning_rate": 2.404214913529508e-06, "loss": 0.8841, "step": 5765 }, { "epoch": 0.78, "learning_rate": 2.4013602222421162e-06, "loss": 0.8445, "step": 5766 }, { "epoch": 0.78, "learning_rate": 2.3985069954734576e-06, "loss": 0.8555, "step": 5767 }, { "epoch": 0.78, "learning_rate": 2.395655233773445e-06, "loss": 0.8538, "step": 5768 }, { "epoch": 0.78, "learning_rate": 2.392804937691716e-06, "loss": 0.8689, "step": 5769 }, { "epoch": 0.78, "learning_rate": 2.389956107777618e-06, "loss": 0.8227, "step": 5770 }, { "epoch": 0.78, "learning_rate": 2.3871087445802175e-06, "loss": 0.8421, "step": 5771 }, { "epoch": 0.78, "learning_rate": 2.3842628486483e-06, "loss": 0.8203, "step": 5772 }, { "epoch": 0.78, "learning_rate": 2.381418420530364e-06, "loss": 0.8089, "step": 5773 }, { "epoch": 0.78, "learning_rate": 2.3785754607746327e-06, "loss": 0.7786, "step": 5774 }, { "epoch": 0.78, "learning_rate": 2.3757339699290417e-06, "loss": 0.862, "step": 5775 }, { "epoch": 0.78, "learning_rate": 2.3728939485412437e-06, "loss": 0.8825, "step": 5776 }, { "epoch": 0.78, "learning_rate": 2.370055397158604e-06, "loss": 0.8872, "step": 5777 }, { "epoch": 0.78, "learning_rate": 2.3672183163282146e-06, "loss": 0.7746, "step": 5778 }, { "epoch": 0.78, "learning_rate": 2.3643827065968774e-06, "loss": 0.841, "step": 5779 }, { "epoch": 0.78, "learning_rate": 2.3615485685111083e-06, "loss": 0.8424, "step": 5780 }, { "epoch": 0.78, "learning_rate": 2.3587159026171468e-06, "loss": 0.8431, "step": 5781 }, { "epoch": 0.78, "learning_rate": 2.3558847094609406e-06, "loss": 0.866, "step": 5782 }, { "epoch": 0.78, "learning_rate": 2.353054989588163e-06, "loss": 0.8785, "step": 5783 }, { "epoch": 0.78, "learning_rate": 2.3502267435441938e-06, "loss": 0.7994, "step": 5784 }, { "epoch": 0.78, "learning_rate": 2.347399971874137e-06, "loss": 0.8054, "step": 5785 }, { "epoch": 0.78, "learning_rate": 2.3445746751228025e-06, "loss": 0.8232, "step": 5786 }, { "epoch": 0.78, "learning_rate": 2.3417508538347265e-06, "loss": 0.906, "step": 5787 }, { "epoch": 0.78, "learning_rate": 2.338928508554158e-06, "loss": 0.9109, "step": 5788 }, { "epoch": 0.78, "learning_rate": 2.336107639825058e-06, "loss": 0.8567, "step": 5789 }, { "epoch": 0.78, "learning_rate": 2.3332882481911032e-06, "loss": 0.8162, "step": 5790 }, { "epoch": 0.78, "learning_rate": 2.3304703341956893e-06, "loss": 0.7795, "step": 5791 }, { "epoch": 0.79, "learning_rate": 2.327653898381921e-06, "loss": 0.8322, "step": 5792 }, { "epoch": 0.79, "learning_rate": 2.3248389412926277e-06, "loss": 0.8547, "step": 5793 }, { "epoch": 0.79, "learning_rate": 2.3220254634703452e-06, "loss": 0.8694, "step": 5794 }, { "epoch": 0.79, "learning_rate": 2.3192134654573285e-06, "loss": 0.8498, "step": 5795 }, { "epoch": 0.79, "learning_rate": 2.3164029477955454e-06, "loss": 0.8053, "step": 5796 }, { "epoch": 0.79, "learning_rate": 2.313593911026676e-06, "loss": 0.8565, "step": 5797 }, { "epoch": 0.79, "learning_rate": 2.310786355692124e-06, "loss": 0.8889, "step": 5798 }, { "epoch": 0.79, "learning_rate": 2.3079802823329987e-06, "loss": 0.8464, "step": 5799 }, { "epoch": 0.79, "learning_rate": 2.305175691490128e-06, "loss": 0.8387, "step": 5800 }, { "epoch": 0.79, "learning_rate": 2.302372583704048e-06, "loss": 0.8358, "step": 5801 }, { "epoch": 0.79, "learning_rate": 2.2995709595150208e-06, "loss": 0.8939, "step": 5802 }, { "epoch": 0.79, "learning_rate": 2.2967708194630122e-06, "loss": 0.8071, "step": 5803 }, { "epoch": 0.79, "learning_rate": 2.2939721640877054e-06, "loss": 0.8346, "step": 5804 }, { "epoch": 0.79, "learning_rate": 2.291174993928499e-06, "loss": 0.909, "step": 5805 }, { "epoch": 0.79, "learning_rate": 2.288379309524498e-06, "loss": 0.8705, "step": 5806 }, { "epoch": 0.79, "learning_rate": 2.2855851114145333e-06, "loss": 0.869, "step": 5807 }, { "epoch": 0.79, "learning_rate": 2.2827924001371405e-06, "loss": 0.8186, "step": 5808 }, { "epoch": 0.79, "learning_rate": 2.280001176230572e-06, "loss": 0.7965, "step": 5809 }, { "epoch": 0.79, "learning_rate": 2.277211440232787e-06, "loss": 0.804, "step": 5810 }, { "epoch": 0.79, "learning_rate": 2.274423192681472e-06, "loss": 0.7799, "step": 5811 }, { "epoch": 0.79, "learning_rate": 2.271636434114013e-06, "loss": 0.8371, "step": 5812 }, { "epoch": 0.79, "learning_rate": 2.268851165067514e-06, "loss": 0.8262, "step": 5813 }, { "epoch": 0.79, "learning_rate": 2.2660673860787942e-06, "loss": 0.8215, "step": 5814 }, { "epoch": 0.79, "learning_rate": 2.2632850976843777e-06, "loss": 0.833, "step": 5815 }, { "epoch": 0.79, "learning_rate": 2.260504300420515e-06, "loss": 0.8038, "step": 5816 }, { "epoch": 0.79, "learning_rate": 2.257724994823157e-06, "loss": 0.769, "step": 5817 }, { "epoch": 0.79, "learning_rate": 2.254947181427971e-06, "loss": 0.8763, "step": 5818 }, { "epoch": 0.79, "learning_rate": 2.252170860770336e-06, "loss": 0.812, "step": 5819 }, { "epoch": 0.79, "learning_rate": 2.2493960333853482e-06, "loss": 0.8298, "step": 5820 }, { "epoch": 0.79, "learning_rate": 2.24662269980781e-06, "loss": 0.7819, "step": 5821 }, { "epoch": 0.79, "learning_rate": 2.243850860572239e-06, "loss": 0.78, "step": 5822 }, { "epoch": 0.79, "learning_rate": 2.2410805162128603e-06, "loss": 0.8726, "step": 5823 }, { "epoch": 0.79, "learning_rate": 2.238311667263615e-06, "loss": 0.7987, "step": 5824 }, { "epoch": 0.79, "learning_rate": 2.23554431425816e-06, "loss": 0.8497, "step": 5825 }, { "epoch": 0.79, "learning_rate": 2.2327784577298562e-06, "loss": 0.7692, "step": 5826 }, { "epoch": 0.79, "learning_rate": 2.230014098211779e-06, "loss": 0.8708, "step": 5827 }, { "epoch": 0.79, "learning_rate": 2.2272512362367126e-06, "loss": 0.8471, "step": 5828 }, { "epoch": 0.79, "learning_rate": 2.2244898723371587e-06, "loss": 0.879, "step": 5829 }, { "epoch": 0.79, "learning_rate": 2.2217300070453298e-06, "loss": 0.8821, "step": 5830 }, { "epoch": 0.79, "learning_rate": 2.2189716408931415e-06, "loss": 0.7781, "step": 5831 }, { "epoch": 0.79, "learning_rate": 2.21621477441223e-06, "loss": 0.8229, "step": 5832 }, { "epoch": 0.79, "learning_rate": 2.2134594081339335e-06, "loss": 0.746, "step": 5833 }, { "epoch": 0.79, "learning_rate": 2.2107055425893052e-06, "loss": 0.8488, "step": 5834 }, { "epoch": 0.79, "learning_rate": 2.207953178309116e-06, "loss": 0.8681, "step": 5835 }, { "epoch": 0.79, "learning_rate": 2.2052023158238366e-06, "loss": 0.8799, "step": 5836 }, { "epoch": 0.79, "learning_rate": 2.202452955663653e-06, "loss": 0.772, "step": 5837 }, { "epoch": 0.79, "learning_rate": 2.1997050983584588e-06, "loss": 0.8474, "step": 5838 }, { "epoch": 0.79, "learning_rate": 2.196958744437866e-06, "loss": 0.8395, "step": 5839 }, { "epoch": 0.79, "learning_rate": 2.1942138944311875e-06, "loss": 0.7879, "step": 5840 }, { "epoch": 0.79, "learning_rate": 2.1914705488674515e-06, "loss": 0.8199, "step": 5841 }, { "epoch": 0.79, "learning_rate": 2.188728708275395e-06, "loss": 0.8856, "step": 5842 }, { "epoch": 0.79, "learning_rate": 2.185988373183461e-06, "loss": 0.8222, "step": 5843 }, { "epoch": 0.79, "learning_rate": 2.183249544119811e-06, "loss": 0.8421, "step": 5844 }, { "epoch": 0.79, "learning_rate": 2.180512221612311e-06, "loss": 0.8733, "step": 5845 }, { "epoch": 0.79, "learning_rate": 2.177776406188534e-06, "loss": 0.8365, "step": 5846 }, { "epoch": 0.79, "learning_rate": 2.175042098375766e-06, "loss": 0.7745, "step": 5847 }, { "epoch": 0.79, "learning_rate": 2.1723092987010053e-06, "loss": 0.8299, "step": 5848 }, { "epoch": 0.79, "learning_rate": 2.1695780076909543e-06, "loss": 0.7871, "step": 5849 }, { "epoch": 0.79, "learning_rate": 2.166848225872026e-06, "loss": 0.8745, "step": 5850 }, { "epoch": 0.79, "learning_rate": 2.164119953770344e-06, "loss": 0.8198, "step": 5851 }, { "epoch": 0.79, "learning_rate": 2.161393191911736e-06, "loss": 0.8357, "step": 5852 }, { "epoch": 0.79, "learning_rate": 2.1586679408217494e-06, "loss": 0.7773, "step": 5853 }, { "epoch": 0.79, "learning_rate": 2.1559442010256292e-06, "loss": 0.8888, "step": 5854 }, { "epoch": 0.79, "learning_rate": 2.153221973048335e-06, "loss": 0.8627, "step": 5855 }, { "epoch": 0.79, "learning_rate": 2.1505012574145335e-06, "loss": 0.8513, "step": 5856 }, { "epoch": 0.79, "learning_rate": 2.147782054648597e-06, "loss": 0.8089, "step": 5857 }, { "epoch": 0.79, "learning_rate": 2.145064365274615e-06, "loss": 0.8056, "step": 5858 }, { "epoch": 0.79, "learning_rate": 2.1423481898163754e-06, "loss": 0.8357, "step": 5859 }, { "epoch": 0.79, "learning_rate": 2.13963352879738e-06, "loss": 0.8296, "step": 5860 }, { "epoch": 0.79, "learning_rate": 2.1369203827408348e-06, "loss": 0.8869, "step": 5861 }, { "epoch": 0.79, "learning_rate": 2.1342087521696597e-06, "loss": 0.8847, "step": 5862 }, { "epoch": 0.79, "learning_rate": 2.131498637606477e-06, "loss": 0.8311, "step": 5863 }, { "epoch": 0.79, "learning_rate": 2.1287900395736207e-06, "loss": 0.8402, "step": 5864 }, { "epoch": 0.79, "learning_rate": 2.1260829585931277e-06, "loss": 0.8516, "step": 5865 }, { "epoch": 0.8, "learning_rate": 2.1233773951867442e-06, "loss": 0.8088, "step": 5866 }, { "epoch": 0.8, "learning_rate": 2.1206733498759312e-06, "loss": 0.9028, "step": 5867 }, { "epoch": 0.8, "learning_rate": 2.117970823181846e-06, "loss": 0.8538, "step": 5868 }, { "epoch": 0.8, "learning_rate": 2.1152698156253617e-06, "loss": 0.8583, "step": 5869 }, { "epoch": 0.8, "learning_rate": 2.1125703277270502e-06, "loss": 0.8184, "step": 5870 }, { "epoch": 0.8, "learning_rate": 2.1098723600072015e-06, "loss": 0.8226, "step": 5871 }, { "epoch": 0.8, "learning_rate": 2.1071759129858026e-06, "loss": 0.9014, "step": 5872 }, { "epoch": 0.8, "learning_rate": 2.104480987182551e-06, "loss": 0.8326, "step": 5873 }, { "epoch": 0.8, "learning_rate": 2.1017875831168553e-06, "loss": 0.8616, "step": 5874 }, { "epoch": 0.8, "learning_rate": 2.099095701307824e-06, "loss": 0.7845, "step": 5875 }, { "epoch": 0.8, "learning_rate": 2.0964053422742736e-06, "loss": 0.8213, "step": 5876 }, { "epoch": 0.8, "learning_rate": 2.093716506534733e-06, "loss": 0.8192, "step": 5877 }, { "epoch": 0.8, "learning_rate": 2.0910291946074312e-06, "loss": 0.9111, "step": 5878 }, { "epoch": 0.8, "learning_rate": 2.088343407010306e-06, "loss": 0.8336, "step": 5879 }, { "epoch": 0.8, "learning_rate": 2.0856591442609965e-06, "loss": 0.8303, "step": 5880 }, { "epoch": 0.8, "learning_rate": 2.0829764068768586e-06, "loss": 0.8701, "step": 5881 }, { "epoch": 0.8, "learning_rate": 2.080295195374945e-06, "loss": 0.8364, "step": 5882 }, { "epoch": 0.8, "learning_rate": 2.0776155102720174e-06, "loss": 0.8164, "step": 5883 }, { "epoch": 0.8, "learning_rate": 2.074937352084543e-06, "loss": 0.821, "step": 5884 }, { "epoch": 0.8, "learning_rate": 2.0722607213286917e-06, "loss": 0.8732, "step": 5885 }, { "epoch": 0.8, "learning_rate": 2.069585618520349e-06, "loss": 0.8654, "step": 5886 }, { "epoch": 0.8, "learning_rate": 2.0669120441750945e-06, "loss": 0.8245, "step": 5887 }, { "epoch": 0.8, "learning_rate": 2.0642399988082186e-06, "loss": 0.8181, "step": 5888 }, { "epoch": 0.8, "learning_rate": 2.061569482934713e-06, "loss": 0.782, "step": 5889 }, { "epoch": 0.8, "learning_rate": 2.058900497069284e-06, "loss": 0.8369, "step": 5890 }, { "epoch": 0.8, "learning_rate": 2.0562330417263344e-06, "loss": 0.8566, "step": 5891 }, { "epoch": 0.8, "learning_rate": 2.0535671174199722e-06, "loss": 0.8085, "step": 5892 }, { "epoch": 0.8, "learning_rate": 2.0509027246640157e-06, "loss": 0.7946, "step": 5893 }, { "epoch": 0.8, "learning_rate": 2.048239863971979e-06, "loss": 0.8407, "step": 5894 }, { "epoch": 0.8, "learning_rate": 2.0455785358570945e-06, "loss": 0.7878, "step": 5895 }, { "epoch": 0.8, "learning_rate": 2.042918740832288e-06, "loss": 0.8568, "step": 5896 }, { "epoch": 0.8, "learning_rate": 2.040260479410193e-06, "loss": 0.8723, "step": 5897 }, { "epoch": 0.8, "learning_rate": 2.0376037521031456e-06, "loss": 0.8454, "step": 5898 }, { "epoch": 0.8, "learning_rate": 2.034948559423193e-06, "loss": 0.8936, "step": 5899 }, { "epoch": 0.8, "learning_rate": 2.0322949018820802e-06, "loss": 0.8063, "step": 5900 }, { "epoch": 0.8, "learning_rate": 2.0296427799912575e-06, "loss": 0.8048, "step": 5901 }, { "epoch": 0.8, "learning_rate": 2.02699219426188e-06, "loss": 0.8665, "step": 5902 }, { "epoch": 0.8, "learning_rate": 2.0243431452048036e-06, "loss": 0.8346, "step": 5903 }, { "epoch": 0.8, "learning_rate": 2.021695633330596e-06, "loss": 0.7846, "step": 5904 }, { "epoch": 0.8, "learning_rate": 2.0190496591495224e-06, "loss": 0.8732, "step": 5905 }, { "epoch": 0.8, "learning_rate": 2.0164052231715516e-06, "loss": 0.8444, "step": 5906 }, { "epoch": 0.8, "learning_rate": 2.0137623259063576e-06, "loss": 0.8409, "step": 5907 }, { "epoch": 0.8, "learning_rate": 2.0111209678633147e-06, "loss": 0.8797, "step": 5908 }, { "epoch": 0.8, "learning_rate": 2.0084811495515087e-06, "loss": 0.9067, "step": 5909 }, { "epoch": 0.8, "learning_rate": 2.0058428714797206e-06, "loss": 0.8158, "step": 5910 }, { "epoch": 0.8, "learning_rate": 2.003206134156437e-06, "loss": 0.8207, "step": 5911 }, { "epoch": 0.8, "learning_rate": 2.0005709380898454e-06, "loss": 0.8827, "step": 5912 }, { "epoch": 0.8, "learning_rate": 1.997937283787843e-06, "loss": 0.7631, "step": 5913 }, { "epoch": 0.8, "learning_rate": 1.995305171758023e-06, "loss": 0.8114, "step": 5914 }, { "epoch": 0.8, "learning_rate": 1.992674602507685e-06, "loss": 0.8272, "step": 5915 }, { "epoch": 0.8, "learning_rate": 1.9900455765438288e-06, "loss": 0.8265, "step": 5916 }, { "epoch": 0.8, "learning_rate": 1.987418094373155e-06, "loss": 0.8178, "step": 5917 }, { "epoch": 0.8, "learning_rate": 1.9847921565020724e-06, "loss": 0.8461, "step": 5918 }, { "epoch": 0.8, "learning_rate": 1.9821677634366932e-06, "loss": 0.796, "step": 5919 }, { "epoch": 0.8, "learning_rate": 1.979544915682824e-06, "loss": 0.8208, "step": 5920 }, { "epoch": 0.8, "learning_rate": 1.9769236137459778e-06, "loss": 0.8239, "step": 5921 }, { "epoch": 0.8, "learning_rate": 1.974303858131369e-06, "loss": 0.9125, "step": 5922 }, { "epoch": 0.8, "learning_rate": 1.971685649343916e-06, "loss": 0.8142, "step": 5923 }, { "epoch": 0.8, "learning_rate": 1.9690689878882375e-06, "loss": 0.8047, "step": 5924 }, { "epoch": 0.8, "learning_rate": 1.9664538742686533e-06, "loss": 0.7896, "step": 5925 }, { "epoch": 0.8, "learning_rate": 1.9638403089891857e-06, "loss": 0.7759, "step": 5926 }, { "epoch": 0.8, "learning_rate": 1.961228292553555e-06, "loss": 0.8206, "step": 5927 }, { "epoch": 0.8, "learning_rate": 1.9586178254651934e-06, "loss": 0.7451, "step": 5928 }, { "epoch": 0.8, "learning_rate": 1.956008908227224e-06, "loss": 0.8264, "step": 5929 }, { "epoch": 0.8, "learning_rate": 1.9534015413424734e-06, "loss": 0.8228, "step": 5930 }, { "epoch": 0.8, "learning_rate": 1.95079572531347e-06, "loss": 0.8352, "step": 5931 }, { "epoch": 0.8, "learning_rate": 1.948191460642448e-06, "loss": 0.8425, "step": 5932 }, { "epoch": 0.8, "learning_rate": 1.9455887478313374e-06, "loss": 0.9124, "step": 5933 }, { "epoch": 0.8, "learning_rate": 1.942987587381768e-06, "loss": 0.8082, "step": 5934 }, { "epoch": 0.8, "learning_rate": 1.9403879797950753e-06, "loss": 0.8556, "step": 5935 }, { "epoch": 0.8, "learning_rate": 1.9377899255722886e-06, "loss": 0.8973, "step": 5936 }, { "epoch": 0.8, "learning_rate": 1.935193425214148e-06, "loss": 0.8359, "step": 5937 }, { "epoch": 0.8, "learning_rate": 1.932598479221085e-06, "loss": 0.8496, "step": 5938 }, { "epoch": 0.8, "learning_rate": 1.9300050880932354e-06, "loss": 0.857, "step": 5939 }, { "epoch": 0.81, "learning_rate": 1.9274132523304324e-06, "loss": 0.775, "step": 5940 }, { "epoch": 0.81, "learning_rate": 1.9248229724322164e-06, "loss": 0.901, "step": 5941 }, { "epoch": 0.81, "learning_rate": 1.9222342488978208e-06, "loss": 0.8667, "step": 5942 }, { "epoch": 0.81, "learning_rate": 1.9196470822261816e-06, "loss": 0.8059, "step": 5943 }, { "epoch": 0.81, "learning_rate": 1.917061472915933e-06, "loss": 0.8311, "step": 5944 }, { "epoch": 0.81, "learning_rate": 1.9144774214654118e-06, "loss": 0.847, "step": 5945 }, { "epoch": 0.81, "learning_rate": 1.911894928372655e-06, "loss": 0.8076, "step": 5946 }, { "epoch": 0.81, "learning_rate": 1.9093139941353968e-06, "loss": 0.8757, "step": 5947 }, { "epoch": 0.81, "learning_rate": 1.9067346192510705e-06, "loss": 0.8434, "step": 5948 }, { "epoch": 0.81, "learning_rate": 1.90415680421681e-06, "loss": 0.8689, "step": 5949 }, { "epoch": 0.81, "learning_rate": 1.9015805495294515e-06, "loss": 0.8795, "step": 5950 }, { "epoch": 0.81, "learning_rate": 1.8990058556855274e-06, "loss": 0.8278, "step": 5951 }, { "epoch": 0.81, "learning_rate": 1.8964327231812674e-06, "loss": 0.8296, "step": 5952 }, { "epoch": 0.81, "learning_rate": 1.8938611525126026e-06, "loss": 0.901, "step": 5953 }, { "epoch": 0.81, "learning_rate": 1.8912911441751625e-06, "loss": 0.8209, "step": 5954 }, { "epoch": 0.81, "learning_rate": 1.8887226986642792e-06, "loss": 0.8753, "step": 5955 }, { "epoch": 0.81, "learning_rate": 1.8861558164749782e-06, "loss": 0.864, "step": 5956 }, { "epoch": 0.81, "learning_rate": 1.8835904981019858e-06, "loss": 0.8879, "step": 5957 }, { "epoch": 0.81, "learning_rate": 1.8810267440397245e-06, "loss": 0.7984, "step": 5958 }, { "epoch": 0.81, "learning_rate": 1.8784645547823233e-06, "loss": 0.8352, "step": 5959 }, { "epoch": 0.81, "learning_rate": 1.8759039308235972e-06, "loss": 0.8812, "step": 5960 }, { "epoch": 0.81, "learning_rate": 1.8733448726570736e-06, "loss": 0.8666, "step": 5961 }, { "epoch": 0.81, "learning_rate": 1.8707873807759668e-06, "loss": 0.8707, "step": 5962 }, { "epoch": 0.81, "learning_rate": 1.868231455673194e-06, "loss": 0.8342, "step": 5963 }, { "epoch": 0.81, "learning_rate": 1.8656770978413674e-06, "loss": 0.8659, "step": 5964 }, { "epoch": 0.81, "learning_rate": 1.8631243077728045e-06, "loss": 0.8263, "step": 5965 }, { "epoch": 0.81, "learning_rate": 1.8605730859595116e-06, "loss": 0.8465, "step": 5966 }, { "epoch": 0.81, "learning_rate": 1.8580234328931979e-06, "loss": 0.8248, "step": 5967 }, { "epoch": 0.81, "learning_rate": 1.8554753490652689e-06, "loss": 0.8173, "step": 5968 }, { "epoch": 0.81, "learning_rate": 1.8529288349668261e-06, "loss": 0.8456, "step": 5969 }, { "epoch": 0.81, "learning_rate": 1.850383891088674e-06, "loss": 0.7836, "step": 5970 }, { "epoch": 0.81, "learning_rate": 1.8478405179213076e-06, "loss": 0.8156, "step": 5971 }, { "epoch": 0.81, "learning_rate": 1.845298715954925e-06, "loss": 0.8283, "step": 5972 }, { "epoch": 0.81, "learning_rate": 1.8427584856794134e-06, "loss": 0.791, "step": 5973 }, { "epoch": 0.81, "learning_rate": 1.8402198275843687e-06, "loss": 0.8958, "step": 5974 }, { "epoch": 0.81, "learning_rate": 1.8376827421590737e-06, "loss": 0.8186, "step": 5975 }, { "epoch": 0.81, "learning_rate": 1.8351472298925143e-06, "loss": 0.8806, "step": 5976 }, { "epoch": 0.81, "learning_rate": 1.8326132912733685e-06, "loss": 0.8482, "step": 5977 }, { "epoch": 0.81, "learning_rate": 1.830080926790011e-06, "loss": 0.7898, "step": 5978 }, { "epoch": 0.81, "learning_rate": 1.8275501369305214e-06, "loss": 0.8054, "step": 5979 }, { "epoch": 0.81, "learning_rate": 1.8250209221826675e-06, "loss": 0.8122, "step": 5980 }, { "epoch": 0.81, "learning_rate": 1.8224932830339137e-06, "loss": 0.7779, "step": 5981 }, { "epoch": 0.81, "learning_rate": 1.8199672199714224e-06, "loss": 0.8015, "step": 5982 }, { "epoch": 0.81, "learning_rate": 1.8174427334820565e-06, "loss": 0.8139, "step": 5983 }, { "epoch": 0.81, "learning_rate": 1.8149198240523702e-06, "loss": 0.8425, "step": 5984 }, { "epoch": 0.81, "learning_rate": 1.8123984921686134e-06, "loss": 0.8495, "step": 5985 }, { "epoch": 0.81, "learning_rate": 1.8098787383167327e-06, "loss": 0.7886, "step": 5986 }, { "epoch": 0.81, "learning_rate": 1.807360562982371e-06, "loss": 0.7882, "step": 5987 }, { "epoch": 0.81, "learning_rate": 1.80484396665087e-06, "loss": 0.8239, "step": 5988 }, { "epoch": 0.81, "learning_rate": 1.8023289498072626e-06, "loss": 0.8019, "step": 5989 }, { "epoch": 0.81, "learning_rate": 1.7998155129362783e-06, "loss": 0.8883, "step": 5990 }, { "epoch": 0.81, "learning_rate": 1.7973036565223411e-06, "loss": 0.7964, "step": 5991 }, { "epoch": 0.81, "learning_rate": 1.7947933810495755e-06, "loss": 0.8455, "step": 5992 }, { "epoch": 0.81, "learning_rate": 1.7922846870017974e-06, "loss": 0.8784, "step": 5993 }, { "epoch": 0.81, "learning_rate": 1.7897775748625169e-06, "loss": 0.8747, "step": 5994 }, { "epoch": 0.81, "learning_rate": 1.7872720451149406e-06, "loss": 0.8001, "step": 5995 }, { "epoch": 0.81, "learning_rate": 1.7847680982419668e-06, "loss": 0.8153, "step": 5996 }, { "epoch": 0.81, "learning_rate": 1.7822657347261985e-06, "loss": 0.8226, "step": 5997 }, { "epoch": 0.81, "learning_rate": 1.779764955049925e-06, "loss": 0.7758, "step": 5998 }, { "epoch": 0.81, "learning_rate": 1.7772657596951304e-06, "loss": 0.8808, "step": 5999 }, { "epoch": 0.81, "learning_rate": 1.7747681491434943e-06, "loss": 0.8709, "step": 6000 }, { "epoch": 0.81, "learning_rate": 1.7722721238763963e-06, "loss": 0.8488, "step": 6001 }, { "epoch": 0.81, "learning_rate": 1.7697776843749037e-06, "loss": 0.8465, "step": 6002 }, { "epoch": 0.81, "learning_rate": 1.767284831119782e-06, "loss": 0.7972, "step": 6003 }, { "epoch": 0.81, "learning_rate": 1.7647935645914848e-06, "loss": 0.8386, "step": 6004 }, { "epoch": 0.81, "learning_rate": 1.7623038852701724e-06, "loss": 0.8348, "step": 6005 }, { "epoch": 0.81, "learning_rate": 1.759815793635683e-06, "loss": 0.771, "step": 6006 }, { "epoch": 0.81, "learning_rate": 1.7573292901675654e-06, "loss": 0.8374, "step": 6007 }, { "epoch": 0.81, "learning_rate": 1.7548443753450506e-06, "loss": 0.7368, "step": 6008 }, { "epoch": 0.81, "learning_rate": 1.7523610496470667e-06, "loss": 0.7811, "step": 6009 }, { "epoch": 0.81, "learning_rate": 1.7498793135522329e-06, "loss": 0.8696, "step": 6010 }, { "epoch": 0.81, "learning_rate": 1.7473991675388714e-06, "loss": 0.8628, "step": 6011 }, { "epoch": 0.81, "learning_rate": 1.7449206120849881e-06, "loss": 0.9091, "step": 6012 }, { "epoch": 0.81, "learning_rate": 1.742443647668285e-06, "loss": 0.8555, "step": 6013 }, { "epoch": 0.82, "learning_rate": 1.7399682747661595e-06, "loss": 0.8195, "step": 6014 }, { "epoch": 0.82, "learning_rate": 1.7374944938556982e-06, "loss": 0.8598, "step": 6015 }, { "epoch": 0.82, "learning_rate": 1.7350223054136871e-06, "loss": 0.8344, "step": 6016 }, { "epoch": 0.82, "learning_rate": 1.7325517099166012e-06, "loss": 0.8534, "step": 6017 }, { "epoch": 0.82, "learning_rate": 1.730082707840608e-06, "loss": 0.8406, "step": 6018 }, { "epoch": 0.82, "learning_rate": 1.727615299661567e-06, "loss": 0.8488, "step": 6019 }, { "epoch": 0.82, "learning_rate": 1.7251494858550366e-06, "loss": 0.8246, "step": 6020 }, { "epoch": 0.82, "learning_rate": 1.7226852668962625e-06, "loss": 0.8499, "step": 6021 }, { "epoch": 0.82, "learning_rate": 1.7202226432601833e-06, "loss": 0.8717, "step": 6022 }, { "epoch": 0.82, "learning_rate": 1.7177616154214316e-06, "loss": 0.8193, "step": 6023 }, { "epoch": 0.82, "learning_rate": 1.7153021838543294e-06, "loss": 0.8527, "step": 6024 }, { "epoch": 0.82, "learning_rate": 1.7128443490328983e-06, "loss": 0.7974, "step": 6025 }, { "epoch": 0.82, "learning_rate": 1.7103881114308451e-06, "loss": 0.7991, "step": 6026 }, { "epoch": 0.82, "learning_rate": 1.7079334715215724e-06, "loss": 0.8259, "step": 6027 }, { "epoch": 0.82, "learning_rate": 1.7054804297781714e-06, "loss": 0.7385, "step": 6028 }, { "epoch": 0.82, "learning_rate": 1.703028986673425e-06, "loss": 0.7864, "step": 6029 }, { "epoch": 0.82, "learning_rate": 1.7005791426798168e-06, "loss": 0.8191, "step": 6030 }, { "epoch": 0.82, "learning_rate": 1.6981308982695133e-06, "loss": 0.8254, "step": 6031 }, { "epoch": 0.82, "learning_rate": 1.6956842539143747e-06, "loss": 0.8167, "step": 6032 }, { "epoch": 0.82, "learning_rate": 1.6932392100859506e-06, "loss": 0.8273, "step": 6033 }, { "epoch": 0.82, "learning_rate": 1.690795767255491e-06, "loss": 0.8087, "step": 6034 }, { "epoch": 0.82, "learning_rate": 1.6883539258939275e-06, "loss": 0.9045, "step": 6035 }, { "epoch": 0.82, "learning_rate": 1.6859136864718873e-06, "loss": 0.8691, "step": 6036 }, { "epoch": 0.82, "learning_rate": 1.6834750494596874e-06, "loss": 0.8594, "step": 6037 }, { "epoch": 0.82, "learning_rate": 1.6810380153273365e-06, "loss": 0.8365, "step": 6038 }, { "epoch": 0.82, "learning_rate": 1.6786025845445375e-06, "loss": 0.86, "step": 6039 }, { "epoch": 0.82, "learning_rate": 1.6761687575806796e-06, "loss": 0.8476, "step": 6040 }, { "epoch": 0.82, "learning_rate": 1.6737365349048463e-06, "loss": 0.833, "step": 6041 }, { "epoch": 0.82, "learning_rate": 1.6713059169858058e-06, "loss": 0.8154, "step": 6042 }, { "epoch": 0.82, "learning_rate": 1.6688769042920283e-06, "loss": 0.857, "step": 6043 }, { "epoch": 0.82, "learning_rate": 1.6664494972916645e-06, "loss": 0.8491, "step": 6044 }, { "epoch": 0.82, "learning_rate": 1.6640236964525581e-06, "loss": 0.8724, "step": 6045 }, { "epoch": 0.82, "learning_rate": 1.6615995022422472e-06, "loss": 0.8453, "step": 6046 }, { "epoch": 0.82, "learning_rate": 1.6591769151279513e-06, "loss": 0.827, "step": 6047 }, { "epoch": 0.82, "learning_rate": 1.6567559355765905e-06, "loss": 0.8191, "step": 6048 }, { "epoch": 0.82, "learning_rate": 1.6543365640547737e-06, "loss": 0.8138, "step": 6049 }, { "epoch": 0.82, "learning_rate": 1.6519188010287923e-06, "loss": 0.8388, "step": 6050 }, { "epoch": 0.82, "learning_rate": 1.6495026469646347e-06, "loss": 0.8679, "step": 6051 }, { "epoch": 0.82, "learning_rate": 1.6470881023279717e-06, "loss": 0.8555, "step": 6052 }, { "epoch": 0.82, "learning_rate": 1.6446751675841755e-06, "loss": 0.8082, "step": 6053 }, { "epoch": 0.82, "learning_rate": 1.6422638431982995e-06, "loss": 0.9013, "step": 6054 }, { "epoch": 0.82, "learning_rate": 1.6398541296350868e-06, "loss": 0.8089, "step": 6055 }, { "epoch": 0.82, "learning_rate": 1.6374460273589732e-06, "loss": 0.7807, "step": 6056 }, { "epoch": 0.82, "learning_rate": 1.635039536834081e-06, "loss": 0.8618, "step": 6057 }, { "epoch": 0.82, "learning_rate": 1.632634658524226e-06, "loss": 0.7787, "step": 6058 }, { "epoch": 0.82, "learning_rate": 1.6302313928929104e-06, "loss": 0.7973, "step": 6059 }, { "epoch": 0.82, "learning_rate": 1.627829740403325e-06, "loss": 0.8499, "step": 6060 }, { "epoch": 0.82, "learning_rate": 1.6254297015183496e-06, "loss": 0.8326, "step": 6061 }, { "epoch": 0.82, "learning_rate": 1.6230312767005574e-06, "loss": 0.7834, "step": 6062 }, { "epoch": 0.82, "learning_rate": 1.6206344664122042e-06, "loss": 0.8265, "step": 6063 }, { "epoch": 0.82, "learning_rate": 1.6182392711152406e-06, "loss": 0.8842, "step": 6064 }, { "epoch": 0.82, "learning_rate": 1.6158456912712995e-06, "loss": 0.8143, "step": 6065 }, { "epoch": 0.82, "learning_rate": 1.613453727341706e-06, "loss": 0.8035, "step": 6066 }, { "epoch": 0.82, "learning_rate": 1.6110633797874776e-06, "loss": 0.852, "step": 6067 }, { "epoch": 0.82, "learning_rate": 1.608674649069313e-06, "loss": 0.8644, "step": 6068 }, { "epoch": 0.82, "learning_rate": 1.606287535647605e-06, "loss": 0.8462, "step": 6069 }, { "epoch": 0.82, "learning_rate": 1.6039020399824268e-06, "loss": 0.8311, "step": 6070 }, { "epoch": 0.82, "learning_rate": 1.601518162533553e-06, "loss": 0.7514, "step": 6071 }, { "epoch": 0.82, "learning_rate": 1.5991359037604338e-06, "loss": 0.8206, "step": 6072 }, { "epoch": 0.82, "learning_rate": 1.596755264122214e-06, "loss": 0.8986, "step": 6073 }, { "epoch": 0.82, "learning_rate": 1.5943762440777243e-06, "loss": 0.8768, "step": 6074 }, { "epoch": 0.82, "learning_rate": 1.5919988440854805e-06, "loss": 0.8487, "step": 6075 }, { "epoch": 0.82, "learning_rate": 1.5896230646036937e-06, "loss": 0.8861, "step": 6076 }, { "epoch": 0.82, "learning_rate": 1.5872489060902562e-06, "loss": 0.8264, "step": 6077 }, { "epoch": 0.82, "learning_rate": 1.5848763690027514e-06, "loss": 0.7998, "step": 6078 }, { "epoch": 0.82, "learning_rate": 1.5825054537984464e-06, "loss": 0.8323, "step": 6079 }, { "epoch": 0.82, "learning_rate": 1.5801361609342958e-06, "loss": 0.8223, "step": 6080 }, { "epoch": 0.82, "learning_rate": 1.5777684908669499e-06, "loss": 0.7954, "step": 6081 }, { "epoch": 0.82, "learning_rate": 1.575402444052736e-06, "loss": 0.7889, "step": 6082 }, { "epoch": 0.82, "learning_rate": 1.5730380209476737e-06, "loss": 0.8784, "step": 6083 }, { "epoch": 0.82, "learning_rate": 1.5706752220074661e-06, "loss": 0.7896, "step": 6084 }, { "epoch": 0.82, "learning_rate": 1.5683140476875092e-06, "loss": 0.847, "step": 6085 }, { "epoch": 0.82, "learning_rate": 1.565954498442882e-06, "loss": 0.8259, "step": 6086 }, { "epoch": 0.83, "learning_rate": 1.5635965747283488e-06, "loss": 0.8463, "step": 6087 }, { "epoch": 0.83, "learning_rate": 1.5612402769983625e-06, "loss": 0.8008, "step": 6088 }, { "epoch": 0.83, "learning_rate": 1.5588856057070612e-06, "loss": 0.8344, "step": 6089 }, { "epoch": 0.83, "learning_rate": 1.556532561308275e-06, "loss": 0.8266, "step": 6090 }, { "epoch": 0.83, "learning_rate": 1.5541811442555122e-06, "loss": 0.8559, "step": 6091 }, { "epoch": 0.83, "learning_rate": 1.551831355001976e-06, "loss": 0.7712, "step": 6092 }, { "epoch": 0.83, "learning_rate": 1.5494831940005484e-06, "loss": 0.7804, "step": 6093 }, { "epoch": 0.83, "learning_rate": 1.5471366617037998e-06, "loss": 0.8477, "step": 6094 }, { "epoch": 0.83, "learning_rate": 1.5447917585639905e-06, "loss": 0.8336, "step": 6095 }, { "epoch": 0.83, "learning_rate": 1.5424484850330623e-06, "loss": 0.8591, "step": 6096 }, { "epoch": 0.83, "learning_rate": 1.5401068415626442e-06, "loss": 0.8722, "step": 6097 }, { "epoch": 0.83, "learning_rate": 1.5377668286040525e-06, "loss": 0.8492, "step": 6098 }, { "epoch": 0.83, "learning_rate": 1.5354284466082836e-06, "loss": 0.8364, "step": 6099 }, { "epoch": 0.83, "learning_rate": 1.5330916960260312e-06, "loss": 0.775, "step": 6100 }, { "epoch": 0.83, "learning_rate": 1.5307565773076626e-06, "loss": 0.7859, "step": 6101 }, { "epoch": 0.83, "learning_rate": 1.528423090903236e-06, "loss": 0.7891, "step": 6102 }, { "epoch": 0.83, "learning_rate": 1.5260912372624925e-06, "loss": 0.8385, "step": 6103 }, { "epoch": 0.83, "learning_rate": 1.523761016834866e-06, "loss": 0.8293, "step": 6104 }, { "epoch": 0.83, "learning_rate": 1.521432430069465e-06, "loss": 0.8139, "step": 6105 }, { "epoch": 0.83, "learning_rate": 1.5191054774150905e-06, "loss": 0.8666, "step": 6106 }, { "epoch": 0.83, "learning_rate": 1.5167801593202248e-06, "loss": 0.7698, "step": 6107 }, { "epoch": 0.83, "learning_rate": 1.514456476233035e-06, "loss": 0.8775, "step": 6108 }, { "epoch": 0.83, "learning_rate": 1.5121344286013784e-06, "loss": 0.7366, "step": 6109 }, { "epoch": 0.83, "learning_rate": 1.5098140168727916e-06, "loss": 0.8156, "step": 6110 }, { "epoch": 0.83, "learning_rate": 1.5074952414944976e-06, "loss": 0.8397, "step": 6111 }, { "epoch": 0.83, "learning_rate": 1.5051781029134016e-06, "loss": 0.7739, "step": 6112 }, { "epoch": 0.83, "learning_rate": 1.5028626015760995e-06, "loss": 0.7751, "step": 6113 }, { "epoch": 0.83, "learning_rate": 1.5005487379288675e-06, "loss": 0.7963, "step": 6114 }, { "epoch": 0.83, "learning_rate": 1.4982365124176645e-06, "loss": 0.8438, "step": 6115 }, { "epoch": 0.83, "learning_rate": 1.4959259254881375e-06, "loss": 0.8224, "step": 6116 }, { "epoch": 0.83, "learning_rate": 1.4936169775856124e-06, "loss": 0.7586, "step": 6117 }, { "epoch": 0.83, "learning_rate": 1.4913096691551077e-06, "loss": 0.9051, "step": 6118 }, { "epoch": 0.83, "learning_rate": 1.4890040006413187e-06, "loss": 0.8061, "step": 6119 }, { "epoch": 0.83, "learning_rate": 1.4866999724886277e-06, "loss": 0.8069, "step": 6120 }, { "epoch": 0.83, "learning_rate": 1.4843975851410964e-06, "loss": 0.8011, "step": 6121 }, { "epoch": 0.83, "learning_rate": 1.4820968390424783e-06, "loss": 0.7832, "step": 6122 }, { "epoch": 0.83, "learning_rate": 1.4797977346362046e-06, "loss": 0.9299, "step": 6123 }, { "epoch": 0.83, "learning_rate": 1.477500272365392e-06, "loss": 0.8429, "step": 6124 }, { "epoch": 0.83, "learning_rate": 1.4752044526728393e-06, "loss": 0.8496, "step": 6125 }, { "epoch": 0.83, "learning_rate": 1.4729102760010282e-06, "loss": 0.8257, "step": 6126 }, { "epoch": 0.83, "learning_rate": 1.4706177427921297e-06, "loss": 0.8211, "step": 6127 }, { "epoch": 0.83, "learning_rate": 1.4683268534879925e-06, "loss": 0.8447, "step": 6128 }, { "epoch": 0.83, "learning_rate": 1.4660376085301476e-06, "loss": 0.713, "step": 6129 }, { "epoch": 0.83, "learning_rate": 1.46375000835981e-06, "loss": 0.8041, "step": 6130 }, { "epoch": 0.83, "learning_rate": 1.4614640534178825e-06, "loss": 0.7965, "step": 6131 }, { "epoch": 0.83, "learning_rate": 1.459179744144945e-06, "loss": 0.9326, "step": 6132 }, { "epoch": 0.83, "learning_rate": 1.4568970809812643e-06, "loss": 0.8148, "step": 6133 }, { "epoch": 0.83, "learning_rate": 1.454616064366785e-06, "loss": 0.8451, "step": 6134 }, { "epoch": 0.83, "learning_rate": 1.4523366947411366e-06, "loss": 0.8667, "step": 6135 }, { "epoch": 0.83, "learning_rate": 1.4500589725436344e-06, "loss": 0.7899, "step": 6136 }, { "epoch": 0.83, "learning_rate": 1.4477828982132758e-06, "loss": 0.8487, "step": 6137 }, { "epoch": 0.83, "learning_rate": 1.4455084721887346e-06, "loss": 0.8307, "step": 6138 }, { "epoch": 0.83, "learning_rate": 1.4432356949083726e-06, "loss": 0.8165, "step": 6139 }, { "epoch": 0.83, "learning_rate": 1.4409645668102313e-06, "loss": 0.8333, "step": 6140 }, { "epoch": 0.83, "learning_rate": 1.4386950883320327e-06, "loss": 0.7985, "step": 6141 }, { "epoch": 0.83, "learning_rate": 1.4364272599111883e-06, "loss": 0.8274, "step": 6142 }, { "epoch": 0.83, "learning_rate": 1.434161081984784e-06, "loss": 0.842, "step": 6143 }, { "epoch": 0.83, "learning_rate": 1.4318965549895903e-06, "loss": 0.8308, "step": 6144 }, { "epoch": 0.83, "learning_rate": 1.4296336793620557e-06, "loss": 0.8164, "step": 6145 }, { "epoch": 0.83, "learning_rate": 1.4273724555383195e-06, "loss": 0.8915, "step": 6146 }, { "epoch": 0.83, "learning_rate": 1.425112883954195e-06, "loss": 0.8504, "step": 6147 }, { "epoch": 0.83, "learning_rate": 1.4228549650451794e-06, "loss": 0.8036, "step": 6148 }, { "epoch": 0.83, "learning_rate": 1.4205986992464515e-06, "loss": 0.9054, "step": 6149 }, { "epoch": 0.83, "learning_rate": 1.4183440869928678e-06, "loss": 0.8533, "step": 6150 }, { "epoch": 0.83, "learning_rate": 1.4160911287189737e-06, "loss": 0.8702, "step": 6151 }, { "epoch": 0.83, "learning_rate": 1.4138398248589913e-06, "loss": 0.8413, "step": 6152 }, { "epoch": 0.83, "learning_rate": 1.411590175846822e-06, "loss": 0.8212, "step": 6153 }, { "epoch": 0.83, "learning_rate": 1.4093421821160502e-06, "loss": 0.8621, "step": 6154 }, { "epoch": 0.83, "learning_rate": 1.4070958440999438e-06, "loss": 0.8266, "step": 6155 }, { "epoch": 0.83, "learning_rate": 1.4048511622314488e-06, "loss": 0.8243, "step": 6156 }, { "epoch": 0.83, "learning_rate": 1.4026081369431909e-06, "loss": 0.8166, "step": 6157 }, { "epoch": 0.83, "learning_rate": 1.4003667686674792e-06, "loss": 0.8354, "step": 6158 }, { "epoch": 0.83, "learning_rate": 1.3981270578363004e-06, "loss": 0.8118, "step": 6159 }, { "epoch": 0.83, "learning_rate": 1.3958890048813267e-06, "loss": 0.8613, "step": 6160 }, { "epoch": 0.84, "learning_rate": 1.3936526102339077e-06, "loss": 0.8634, "step": 6161 }, { "epoch": 0.84, "learning_rate": 1.3914178743250707e-06, "loss": 0.7601, "step": 6162 }, { "epoch": 0.84, "learning_rate": 1.3891847975855255e-06, "loss": 0.8339, "step": 6163 }, { "epoch": 0.84, "learning_rate": 1.386953380445667e-06, "loss": 0.8474, "step": 6164 }, { "epoch": 0.84, "learning_rate": 1.3847236233355621e-06, "loss": 0.8201, "step": 6165 }, { "epoch": 0.84, "learning_rate": 1.3824955266849637e-06, "loss": 0.8318, "step": 6166 }, { "epoch": 0.84, "learning_rate": 1.380269090923302e-06, "loss": 0.8002, "step": 6167 }, { "epoch": 0.84, "learning_rate": 1.3780443164796854e-06, "loss": 0.7685, "step": 6168 }, { "epoch": 0.84, "learning_rate": 1.3758212037829076e-06, "loss": 0.8321, "step": 6169 }, { "epoch": 0.84, "learning_rate": 1.3735997532614375e-06, "loss": 0.8284, "step": 6170 }, { "epoch": 0.84, "learning_rate": 1.3713799653434246e-06, "loss": 0.8532, "step": 6171 }, { "epoch": 0.84, "learning_rate": 1.3691618404566954e-06, "loss": 0.7689, "step": 6172 }, { "epoch": 0.84, "learning_rate": 1.3669453790287646e-06, "loss": 0.8231, "step": 6173 }, { "epoch": 0.84, "learning_rate": 1.3647305814868173e-06, "loss": 0.8183, "step": 6174 }, { "epoch": 0.84, "learning_rate": 1.3625174482577208e-06, "loss": 0.7564, "step": 6175 }, { "epoch": 0.84, "learning_rate": 1.3603059797680218e-06, "loss": 0.8128, "step": 6176 }, { "epoch": 0.84, "learning_rate": 1.3580961764439449e-06, "loss": 0.8323, "step": 6177 }, { "epoch": 0.84, "learning_rate": 1.3558880387113993e-06, "loss": 0.8417, "step": 6178 }, { "epoch": 0.84, "learning_rate": 1.3536815669959635e-06, "loss": 0.8365, "step": 6179 }, { "epoch": 0.84, "learning_rate": 1.3514767617229051e-06, "loss": 0.8086, "step": 6180 }, { "epoch": 0.84, "learning_rate": 1.3492736233171644e-06, "loss": 0.8619, "step": 6181 }, { "epoch": 0.84, "learning_rate": 1.3470721522033592e-06, "loss": 0.9033, "step": 6182 }, { "epoch": 0.84, "learning_rate": 1.3448723488057925e-06, "loss": 0.8005, "step": 6183 }, { "epoch": 0.84, "learning_rate": 1.3426742135484405e-06, "loss": 0.8529, "step": 6184 }, { "epoch": 0.84, "learning_rate": 1.340477746854959e-06, "loss": 0.8341, "step": 6185 }, { "epoch": 0.84, "learning_rate": 1.3382829491486814e-06, "loss": 0.8785, "step": 6186 }, { "epoch": 0.84, "learning_rate": 1.3360898208526207e-06, "loss": 0.822, "step": 6187 }, { "epoch": 0.84, "learning_rate": 1.3338983623894696e-06, "loss": 0.8279, "step": 6188 }, { "epoch": 0.84, "learning_rate": 1.3317085741815972e-06, "loss": 0.8275, "step": 6189 }, { "epoch": 0.84, "learning_rate": 1.3295204566510511e-06, "loss": 0.8255, "step": 6190 }, { "epoch": 0.84, "learning_rate": 1.3273340102195532e-06, "loss": 0.8593, "step": 6191 }, { "epoch": 0.84, "learning_rate": 1.3251492353085116e-06, "loss": 0.8838, "step": 6192 }, { "epoch": 0.84, "learning_rate": 1.3229661323390042e-06, "loss": 0.8067, "step": 6193 }, { "epoch": 0.84, "learning_rate": 1.320784701731792e-06, "loss": 0.9004, "step": 6194 }, { "epoch": 0.84, "learning_rate": 1.3186049439073112e-06, "loss": 0.9063, "step": 6195 }, { "epoch": 0.84, "learning_rate": 1.3164268592856722e-06, "loss": 0.8208, "step": 6196 }, { "epoch": 0.84, "learning_rate": 1.3142504482866714e-06, "loss": 0.7894, "step": 6197 }, { "epoch": 0.84, "learning_rate": 1.3120757113297777e-06, "loss": 0.8652, "step": 6198 }, { "epoch": 0.84, "learning_rate": 1.3099026488341348e-06, "loss": 0.7961, "step": 6199 }, { "epoch": 0.84, "learning_rate": 1.3077312612185688e-06, "loss": 0.8228, "step": 6200 }, { "epoch": 0.84, "learning_rate": 1.3055615489015771e-06, "loss": 0.8235, "step": 6201 }, { "epoch": 0.84, "learning_rate": 1.303393512301342e-06, "loss": 0.8192, "step": 6202 }, { "epoch": 0.84, "learning_rate": 1.3012271518357177e-06, "loss": 0.8039, "step": 6203 }, { "epoch": 0.84, "learning_rate": 1.2990624679222341e-06, "loss": 0.813, "step": 6204 }, { "epoch": 0.84, "learning_rate": 1.2968994609781005e-06, "loss": 0.7918, "step": 6205 }, { "epoch": 0.84, "learning_rate": 1.2947381314202046e-06, "loss": 0.766, "step": 6206 }, { "epoch": 0.84, "learning_rate": 1.2925784796651086e-06, "loss": 0.776, "step": 6207 }, { "epoch": 0.84, "learning_rate": 1.2904205061290497e-06, "loss": 0.8095, "step": 6208 }, { "epoch": 0.84, "learning_rate": 1.2882642112279454e-06, "loss": 0.8442, "step": 6209 }, { "epoch": 0.84, "learning_rate": 1.286109595377384e-06, "loss": 0.7611, "step": 6210 }, { "epoch": 0.84, "learning_rate": 1.283956658992639e-06, "loss": 0.8133, "step": 6211 }, { "epoch": 0.84, "learning_rate": 1.2818054024886517e-06, "loss": 0.8677, "step": 6212 }, { "epoch": 0.84, "learning_rate": 1.279655826280045e-06, "loss": 0.8978, "step": 6213 }, { "epoch": 0.84, "learning_rate": 1.2775079307811133e-06, "loss": 0.8298, "step": 6214 }, { "epoch": 0.84, "learning_rate": 1.275361716405834e-06, "loss": 0.8745, "step": 6215 }, { "epoch": 0.84, "learning_rate": 1.2732171835678531e-06, "loss": 0.8341, "step": 6216 }, { "epoch": 0.84, "learning_rate": 1.2710743326804974e-06, "loss": 0.8262, "step": 6217 }, { "epoch": 0.84, "learning_rate": 1.268933164156767e-06, "loss": 0.8121, "step": 6218 }, { "epoch": 0.84, "learning_rate": 1.266793678409336e-06, "loss": 0.8574, "step": 6219 }, { "epoch": 0.84, "learning_rate": 1.2646558758505622e-06, "loss": 0.8088, "step": 6220 }, { "epoch": 0.84, "learning_rate": 1.2625197568924696e-06, "loss": 0.8698, "step": 6221 }, { "epoch": 0.84, "learning_rate": 1.260385321946761e-06, "loss": 0.7737, "step": 6222 }, { "epoch": 0.84, "learning_rate": 1.2582525714248195e-06, "loss": 0.7965, "step": 6223 }, { "epoch": 0.84, "learning_rate": 1.2561215057376953e-06, "loss": 0.8732, "step": 6224 }, { "epoch": 0.84, "learning_rate": 1.2539921252961207e-06, "loss": 0.8162, "step": 6225 }, { "epoch": 0.84, "learning_rate": 1.2518644305104987e-06, "loss": 0.8936, "step": 6226 }, { "epoch": 0.84, "learning_rate": 1.2497384217909102e-06, "loss": 0.8324, "step": 6227 }, { "epoch": 0.84, "learning_rate": 1.2476140995471097e-06, "loss": 0.8961, "step": 6228 }, { "epoch": 0.84, "learning_rate": 1.2454914641885251e-06, "loss": 0.8264, "step": 6229 }, { "epoch": 0.84, "learning_rate": 1.2433705161242638e-06, "loss": 0.7329, "step": 6230 }, { "epoch": 0.84, "learning_rate": 1.241251255763105e-06, "loss": 0.7196, "step": 6231 }, { "epoch": 0.84, "learning_rate": 1.2391336835135015e-06, "loss": 0.8247, "step": 6232 }, { "epoch": 0.84, "learning_rate": 1.237017799783582e-06, "loss": 0.8058, "step": 6233 }, { "epoch": 0.84, "learning_rate": 1.2349036049811513e-06, "loss": 0.8308, "step": 6234 }, { "epoch": 0.85, "learning_rate": 1.2327910995136883e-06, "loss": 0.765, "step": 6235 }, { "epoch": 0.85, "learning_rate": 1.2306802837883436e-06, "loss": 0.8299, "step": 6236 }, { "epoch": 0.85, "learning_rate": 1.228571158211943e-06, "loss": 0.8201, "step": 6237 }, { "epoch": 0.85, "learning_rate": 1.2264637231909871e-06, "loss": 0.782, "step": 6238 }, { "epoch": 0.85, "learning_rate": 1.2243579791316552e-06, "loss": 0.8431, "step": 6239 }, { "epoch": 0.85, "learning_rate": 1.2222539264397925e-06, "loss": 0.882, "step": 6240 }, { "epoch": 0.85, "learning_rate": 1.220151565520924e-06, "loss": 0.7504, "step": 6241 }, { "epoch": 0.85, "learning_rate": 1.218050896780244e-06, "loss": 0.8598, "step": 6242 }, { "epoch": 0.85, "learning_rate": 1.2159519206226268e-06, "loss": 0.812, "step": 6243 }, { "epoch": 0.85, "learning_rate": 1.2138546374526172e-06, "loss": 0.8405, "step": 6244 }, { "epoch": 0.85, "learning_rate": 1.2117590476744311e-06, "loss": 0.8359, "step": 6245 }, { "epoch": 0.85, "learning_rate": 1.2096651516919634e-06, "loss": 0.7907, "step": 6246 }, { "epoch": 0.85, "learning_rate": 1.2075729499087752e-06, "loss": 0.8474, "step": 6247 }, { "epoch": 0.85, "learning_rate": 1.2054824427281108e-06, "loss": 0.8596, "step": 6248 }, { "epoch": 0.85, "learning_rate": 1.2033936305528815e-06, "loss": 0.8074, "step": 6249 }, { "epoch": 0.85, "learning_rate": 1.2013065137856716e-06, "loss": 0.7737, "step": 6250 }, { "epoch": 0.85, "learning_rate": 1.1992210928287385e-06, "loss": 0.8271, "step": 6251 }, { "epoch": 0.85, "learning_rate": 1.1971373680840182e-06, "loss": 0.8343, "step": 6252 }, { "epoch": 0.85, "learning_rate": 1.195055339953115e-06, "loss": 0.8382, "step": 6253 }, { "epoch": 0.85, "learning_rate": 1.1929750088373071e-06, "loss": 0.832, "step": 6254 }, { "epoch": 0.85, "learning_rate": 1.1908963751375446e-06, "loss": 0.7767, "step": 6255 }, { "epoch": 0.85, "learning_rate": 1.1888194392544504e-06, "loss": 0.7651, "step": 6256 }, { "epoch": 0.85, "learning_rate": 1.1867442015883247e-06, "loss": 0.838, "step": 6257 }, { "epoch": 0.85, "learning_rate": 1.1846706625391358e-06, "loss": 0.8712, "step": 6258 }, { "epoch": 0.85, "learning_rate": 1.1825988225065266e-06, "loss": 0.8178, "step": 6259 }, { "epoch": 0.85, "learning_rate": 1.18052868188981e-06, "loss": 0.8703, "step": 6260 }, { "epoch": 0.85, "learning_rate": 1.1784602410879708e-06, "loss": 0.8502, "step": 6261 }, { "epoch": 0.85, "learning_rate": 1.1763935004996751e-06, "loss": 0.8172, "step": 6262 }, { "epoch": 0.85, "learning_rate": 1.1743284605232508e-06, "loss": 0.8107, "step": 6263 }, { "epoch": 0.85, "learning_rate": 1.1722651215567016e-06, "loss": 0.8137, "step": 6264 }, { "epoch": 0.85, "learning_rate": 1.1702034839977039e-06, "loss": 0.8261, "step": 6265 }, { "epoch": 0.85, "learning_rate": 1.1681435482436066e-06, "loss": 0.8631, "step": 6266 }, { "epoch": 0.85, "learning_rate": 1.166085314691432e-06, "loss": 0.8135, "step": 6267 }, { "epoch": 0.85, "learning_rate": 1.1640287837378706e-06, "loss": 0.8627, "step": 6268 }, { "epoch": 0.85, "learning_rate": 1.1619739557792863e-06, "loss": 0.8017, "step": 6269 }, { "epoch": 0.85, "learning_rate": 1.159920831211715e-06, "loss": 0.8477, "step": 6270 }, { "epoch": 0.85, "learning_rate": 1.157869410430863e-06, "loss": 0.8819, "step": 6271 }, { "epoch": 0.85, "learning_rate": 1.155819693832112e-06, "loss": 0.8363, "step": 6272 }, { "epoch": 0.85, "learning_rate": 1.1537716818105126e-06, "loss": 0.8425, "step": 6273 }, { "epoch": 0.85, "learning_rate": 1.151725374760786e-06, "loss": 0.7853, "step": 6274 }, { "epoch": 0.85, "learning_rate": 1.1496807730773242e-06, "loss": 0.8033, "step": 6275 }, { "epoch": 0.85, "learning_rate": 1.1476378771541953e-06, "loss": 0.8426, "step": 6276 }, { "epoch": 0.85, "learning_rate": 1.1455966873851343e-06, "loss": 0.8743, "step": 6277 }, { "epoch": 0.85, "learning_rate": 1.143557204163549e-06, "loss": 0.8294, "step": 6278 }, { "epoch": 0.85, "learning_rate": 1.1415194278825159e-06, "loss": 0.7843, "step": 6279 }, { "epoch": 0.85, "learning_rate": 1.1394833589347843e-06, "loss": 0.7662, "step": 6280 }, { "epoch": 0.85, "learning_rate": 1.1374489977127779e-06, "loss": 0.8417, "step": 6281 }, { "epoch": 0.85, "learning_rate": 1.1354163446085864e-06, "loss": 0.846, "step": 6282 }, { "epoch": 0.85, "learning_rate": 1.133385400013971e-06, "loss": 0.8486, "step": 6283 }, { "epoch": 0.85, "learning_rate": 1.131356164320363e-06, "loss": 0.7861, "step": 6284 }, { "epoch": 0.85, "learning_rate": 1.1293286379188695e-06, "loss": 0.8242, "step": 6285 }, { "epoch": 0.85, "learning_rate": 1.1273028212002623e-06, "loss": 0.8114, "step": 6286 }, { "epoch": 0.85, "learning_rate": 1.1252787145549871e-06, "loss": 0.8869, "step": 6287 }, { "epoch": 0.85, "learning_rate": 1.123256318373157e-06, "loss": 0.8085, "step": 6288 }, { "epoch": 0.85, "learning_rate": 1.1212356330445562e-06, "loss": 0.8342, "step": 6289 }, { "epoch": 0.85, "learning_rate": 1.1192166589586428e-06, "loss": 0.8379, "step": 6290 }, { "epoch": 0.85, "learning_rate": 1.1171993965045424e-06, "loss": 0.7861, "step": 6291 }, { "epoch": 0.85, "learning_rate": 1.1151838460710495e-06, "loss": 0.8537, "step": 6292 }, { "epoch": 0.85, "learning_rate": 1.113170008046629e-06, "loss": 0.8335, "step": 6293 }, { "epoch": 0.85, "learning_rate": 1.1111578828194192e-06, "loss": 0.7965, "step": 6294 }, { "epoch": 0.85, "learning_rate": 1.1091474707772242e-06, "loss": 0.8541, "step": 6295 }, { "epoch": 0.85, "learning_rate": 1.107138772307519e-06, "loss": 0.8189, "step": 6296 }, { "epoch": 0.85, "learning_rate": 1.10513178779745e-06, "loss": 0.8938, "step": 6297 }, { "epoch": 0.85, "learning_rate": 1.103126517633829e-06, "loss": 0.8297, "step": 6298 }, { "epoch": 0.85, "learning_rate": 1.101122962203144e-06, "loss": 0.805, "step": 6299 }, { "epoch": 0.85, "learning_rate": 1.0991211218915475e-06, "loss": 0.8241, "step": 6300 }, { "epoch": 0.85, "learning_rate": 1.097120997084864e-06, "loss": 0.7618, "step": 6301 }, { "epoch": 0.85, "learning_rate": 1.0951225881685823e-06, "loss": 0.883, "step": 6302 }, { "epoch": 0.85, "learning_rate": 1.09312589552787e-06, "loss": 0.8509, "step": 6303 }, { "epoch": 0.85, "learning_rate": 1.091130919547555e-06, "loss": 0.8148, "step": 6304 }, { "epoch": 0.85, "learning_rate": 1.0891376606121385e-06, "loss": 0.8344, "step": 6305 }, { "epoch": 0.85, "learning_rate": 1.0871461191057885e-06, "loss": 0.8915, "step": 6306 }, { "epoch": 0.85, "learning_rate": 1.085156295412344e-06, "loss": 0.7811, "step": 6307 }, { "epoch": 0.85, "learning_rate": 1.0831681899153135e-06, "loss": 0.895, "step": 6308 }, { "epoch": 0.86, "learning_rate": 1.0811818029978715e-06, "loss": 0.8196, "step": 6309 }, { "epoch": 0.86, "learning_rate": 1.0791971350428654e-06, "loss": 0.7652, "step": 6310 }, { "epoch": 0.86, "learning_rate": 1.0772141864328078e-06, "loss": 0.8845, "step": 6311 }, { "epoch": 0.86, "learning_rate": 1.0752329575498789e-06, "loss": 0.8404, "step": 6312 }, { "epoch": 0.86, "learning_rate": 1.0732534487759327e-06, "loss": 0.8372, "step": 6313 }, { "epoch": 0.86, "learning_rate": 1.0712756604924868e-06, "loss": 0.7659, "step": 6314 }, { "epoch": 0.86, "learning_rate": 1.0692995930807292e-06, "loss": 0.8715, "step": 6315 }, { "epoch": 0.86, "learning_rate": 1.0673252469215155e-06, "loss": 0.8158, "step": 6316 }, { "epoch": 0.86, "learning_rate": 1.0653526223953692e-06, "loss": 0.8175, "step": 6317 }, { "epoch": 0.86, "learning_rate": 1.0633817198824859e-06, "loss": 0.8382, "step": 6318 }, { "epoch": 0.86, "learning_rate": 1.0614125397627229e-06, "loss": 0.8577, "step": 6319 }, { "epoch": 0.86, "learning_rate": 1.0594450824156111e-06, "loss": 0.8571, "step": 6320 }, { "epoch": 0.86, "learning_rate": 1.057479348220346e-06, "loss": 0.829, "step": 6321 }, { "epoch": 0.86, "learning_rate": 1.055515337555789e-06, "loss": 0.7801, "step": 6322 }, { "epoch": 0.86, "learning_rate": 1.0535530508004789e-06, "loss": 0.8764, "step": 6323 }, { "epoch": 0.86, "learning_rate": 1.051592488332611e-06, "loss": 0.8603, "step": 6324 }, { "epoch": 0.86, "learning_rate": 1.0496336505300552e-06, "loss": 0.8764, "step": 6325 }, { "epoch": 0.86, "learning_rate": 1.0476765377703435e-06, "loss": 0.8432, "step": 6326 }, { "epoch": 0.86, "learning_rate": 1.045721150430683e-06, "loss": 0.8762, "step": 6327 }, { "epoch": 0.86, "learning_rate": 1.0437674888879424e-06, "loss": 0.7337, "step": 6328 }, { "epoch": 0.86, "learning_rate": 1.0418155535186591e-06, "loss": 0.8324, "step": 6329 }, { "epoch": 0.86, "learning_rate": 1.039865344699037e-06, "loss": 0.8365, "step": 6330 }, { "epoch": 0.86, "learning_rate": 1.0379168628049475e-06, "loss": 0.8775, "step": 6331 }, { "epoch": 0.86, "learning_rate": 1.0359701082119345e-06, "loss": 0.8188, "step": 6332 }, { "epoch": 0.86, "learning_rate": 1.0340250812952e-06, "loss": 0.8493, "step": 6333 }, { "epoch": 0.86, "learning_rate": 1.0320817824296202e-06, "loss": 0.8204, "step": 6334 }, { "epoch": 0.86, "learning_rate": 1.030140211989733e-06, "loss": 0.8092, "step": 6335 }, { "epoch": 0.86, "learning_rate": 1.0282003703497478e-06, "loss": 0.8537, "step": 6336 }, { "epoch": 0.86, "learning_rate": 1.026262257883538e-06, "loss": 0.786, "step": 6337 }, { "epoch": 0.86, "learning_rate": 1.0243258749646445e-06, "loss": 0.891, "step": 6338 }, { "epoch": 0.86, "learning_rate": 1.0223912219662746e-06, "loss": 0.8491, "step": 6339 }, { "epoch": 0.86, "learning_rate": 1.020458299261301e-06, "loss": 0.8509, "step": 6340 }, { "epoch": 0.86, "learning_rate": 1.0185271072222668e-06, "loss": 0.8637, "step": 6341 }, { "epoch": 0.86, "learning_rate": 1.0165976462213779e-06, "loss": 0.8694, "step": 6342 }, { "epoch": 0.86, "learning_rate": 1.0146699166305073e-06, "loss": 0.8717, "step": 6343 }, { "epoch": 0.86, "learning_rate": 1.0127439188211941e-06, "loss": 0.8053, "step": 6344 }, { "epoch": 0.86, "learning_rate": 1.0108196531646464e-06, "loss": 0.8206, "step": 6345 }, { "epoch": 0.86, "learning_rate": 1.0088971200317344e-06, "loss": 0.8775, "step": 6346 }, { "epoch": 0.86, "learning_rate": 1.006976319792996e-06, "loss": 0.8777, "step": 6347 }, { "epoch": 0.86, "learning_rate": 1.0050572528186375e-06, "loss": 0.8795, "step": 6348 }, { "epoch": 0.86, "learning_rate": 1.0031399194785252e-06, "loss": 0.7818, "step": 6349 }, { "epoch": 0.86, "learning_rate": 1.001224320142199e-06, "loss": 0.842, "step": 6350 }, { "epoch": 0.86, "learning_rate": 9.99310455178859e-07, "loss": 0.8508, "step": 6351 }, { "epoch": 0.86, "learning_rate": 9.973983249573726e-07, "loss": 0.7614, "step": 6352 }, { "epoch": 0.86, "learning_rate": 9.954879298462717e-07, "loss": 0.823, "step": 6353 }, { "epoch": 0.86, "learning_rate": 9.935792702137558e-07, "loss": 0.8593, "step": 6354 }, { "epoch": 0.86, "learning_rate": 9.916723464276924e-07, "loss": 0.8206, "step": 6355 }, { "epoch": 0.86, "learning_rate": 9.89767158855608e-07, "loss": 0.842, "step": 6356 }, { "epoch": 0.86, "learning_rate": 9.878637078646968e-07, "loss": 0.8763, "step": 6357 }, { "epoch": 0.86, "learning_rate": 9.859619938218223e-07, "loss": 0.8213, "step": 6358 }, { "epoch": 0.86, "learning_rate": 9.840620170935057e-07, "loss": 0.807, "step": 6359 }, { "epoch": 0.86, "learning_rate": 9.821637780459426e-07, "loss": 0.8705, "step": 6360 }, { "epoch": 0.86, "learning_rate": 9.80267277044985e-07, "loss": 0.8417, "step": 6361 }, { "epoch": 0.86, "learning_rate": 9.783725144561574e-07, "loss": 0.7678, "step": 6362 }, { "epoch": 0.86, "learning_rate": 9.764794906446395e-07, "loss": 0.8308, "step": 6363 }, { "epoch": 0.86, "learning_rate": 9.745882059752886e-07, "loss": 0.8165, "step": 6364 }, { "epoch": 0.86, "learning_rate": 9.726986608126176e-07, "loss": 0.812, "step": 6365 }, { "epoch": 0.86, "learning_rate": 9.708108555208073e-07, "loss": 0.826, "step": 6366 }, { "epoch": 0.86, "learning_rate": 9.68924790463701e-07, "loss": 0.8836, "step": 6367 }, { "epoch": 0.86, "learning_rate": 9.670404660048072e-07, "loss": 0.8609, "step": 6368 }, { "epoch": 0.86, "learning_rate": 9.65157882507305e-07, "loss": 0.9124, "step": 6369 }, { "epoch": 0.86, "learning_rate": 9.632770403340275e-07, "loss": 0.7769, "step": 6370 }, { "epoch": 0.86, "learning_rate": 9.613979398474815e-07, "loss": 0.8124, "step": 6371 }, { "epoch": 0.86, "learning_rate": 9.59520581409832e-07, "loss": 0.8424, "step": 6372 }, { "epoch": 0.86, "learning_rate": 9.57644965382908e-07, "loss": 0.8275, "step": 6373 }, { "epoch": 0.86, "learning_rate": 9.557710921282105e-07, "loss": 0.8156, "step": 6374 }, { "epoch": 0.86, "learning_rate": 9.53898962006896e-07, "loss": 0.816, "step": 6375 }, { "epoch": 0.86, "learning_rate": 9.520285753797897e-07, "loss": 0.8645, "step": 6376 }, { "epoch": 0.86, "learning_rate": 9.501599326073762e-07, "loss": 0.865, "step": 6377 }, { "epoch": 0.86, "learning_rate": 9.482930340498109e-07, "loss": 0.8827, "step": 6378 }, { "epoch": 0.86, "learning_rate": 9.46427880066908e-07, "loss": 0.859, "step": 6379 }, { "epoch": 0.86, "learning_rate": 9.445644710181467e-07, "loss": 0.794, "step": 6380 }, { "epoch": 0.86, "learning_rate": 9.427028072626687e-07, "loss": 0.8914, "step": 6381 }, { "epoch": 0.87, "learning_rate": 9.408428891592802e-07, "loss": 0.8456, "step": 6382 }, { "epoch": 0.87, "learning_rate": 9.389847170664546e-07, "loss": 0.8447, "step": 6383 }, { "epoch": 0.87, "learning_rate": 9.37128291342323e-07, "loss": 0.8528, "step": 6384 }, { "epoch": 0.87, "learning_rate": 9.352736123446827e-07, "loss": 0.84, "step": 6385 }, { "epoch": 0.87, "learning_rate": 9.334206804309919e-07, "loss": 0.8216, "step": 6386 }, { "epoch": 0.87, "learning_rate": 9.315694959583788e-07, "loss": 0.8215, "step": 6387 }, { "epoch": 0.87, "learning_rate": 9.297200592836264e-07, "loss": 0.8526, "step": 6388 }, { "epoch": 0.87, "learning_rate": 9.278723707631865e-07, "loss": 0.8341, "step": 6389 }, { "epoch": 0.87, "learning_rate": 9.260264307531719e-07, "loss": 0.7936, "step": 6390 }, { "epoch": 0.87, "learning_rate": 9.241822396093569e-07, "loss": 0.851, "step": 6391 }, { "epoch": 0.87, "learning_rate": 9.223397976871829e-07, "loss": 0.8131, "step": 6392 }, { "epoch": 0.87, "learning_rate": 9.2049910534175e-07, "loss": 0.8218, "step": 6393 }, { "epoch": 0.87, "learning_rate": 9.186601629278236e-07, "loss": 0.8776, "step": 6394 }, { "epoch": 0.87, "learning_rate": 9.1682297079983e-07, "loss": 0.8037, "step": 6395 }, { "epoch": 0.87, "learning_rate": 9.149875293118604e-07, "loss": 0.8858, "step": 6396 }, { "epoch": 0.87, "learning_rate": 9.131538388176664e-07, "loss": 0.8628, "step": 6397 }, { "epoch": 0.87, "learning_rate": 9.113218996706652e-07, "loss": 0.8611, "step": 6398 }, { "epoch": 0.87, "learning_rate": 9.094917122239322e-07, "loss": 0.8201, "step": 6399 }, { "epoch": 0.87, "learning_rate": 9.076632768302085e-07, "loss": 0.7699, "step": 6400 }, { "epoch": 0.87, "learning_rate": 9.058365938418945e-07, "loss": 0.8616, "step": 6401 }, { "epoch": 0.87, "learning_rate": 9.040116636110574e-07, "loss": 0.8206, "step": 6402 }, { "epoch": 0.87, "learning_rate": 9.021884864894226e-07, "loss": 0.81, "step": 6403 }, { "epoch": 0.87, "learning_rate": 9.003670628283789e-07, "loss": 0.838, "step": 6404 }, { "epoch": 0.87, "learning_rate": 8.985473929789746e-07, "loss": 0.783, "step": 6405 }, { "epoch": 0.87, "learning_rate": 8.967294772919277e-07, "loss": 0.8076, "step": 6406 }, { "epoch": 0.87, "learning_rate": 8.949133161176104e-07, "loss": 0.789, "step": 6407 }, { "epoch": 0.87, "learning_rate": 8.930989098060594e-07, "loss": 0.8989, "step": 6408 }, { "epoch": 0.87, "learning_rate": 8.912862587069726e-07, "loss": 0.8988, "step": 6409 }, { "epoch": 0.87, "learning_rate": 8.894753631697095e-07, "loss": 0.8435, "step": 6410 }, { "epoch": 0.87, "learning_rate": 8.876662235432931e-07, "loss": 0.8011, "step": 6411 }, { "epoch": 0.87, "learning_rate": 8.858588401764079e-07, "loss": 0.8767, "step": 6412 }, { "epoch": 0.87, "learning_rate": 8.840532134173963e-07, "loss": 0.8464, "step": 6413 }, { "epoch": 0.87, "learning_rate": 8.822493436142643e-07, "loss": 0.8127, "step": 6414 }, { "epoch": 0.87, "learning_rate": 8.804472311146817e-07, "loss": 0.8078, "step": 6415 }, { "epoch": 0.87, "learning_rate": 8.786468762659772e-07, "loss": 0.784, "step": 6416 }, { "epoch": 0.87, "learning_rate": 8.768482794151389e-07, "loss": 0.7991, "step": 6417 }, { "epoch": 0.87, "learning_rate": 8.750514409088206e-07, "loss": 0.898, "step": 6418 }, { "epoch": 0.87, "learning_rate": 8.73256361093332e-07, "loss": 0.7635, "step": 6419 }, { "epoch": 0.87, "learning_rate": 8.714630403146496e-07, "loss": 0.7895, "step": 6420 }, { "epoch": 0.87, "learning_rate": 8.696714789184069e-07, "loss": 0.8588, "step": 6421 }, { "epoch": 0.87, "learning_rate": 8.678816772498988e-07, "loss": 0.8182, "step": 6422 }, { "epoch": 0.87, "learning_rate": 8.660936356540794e-07, "loss": 0.8029, "step": 6423 }, { "epoch": 0.87, "learning_rate": 8.643073544755709e-07, "loss": 0.8405, "step": 6424 }, { "epoch": 0.87, "learning_rate": 8.625228340586467e-07, "loss": 0.8106, "step": 6425 }, { "epoch": 0.87, "learning_rate": 8.607400747472471e-07, "loss": 0.8088, "step": 6426 }, { "epoch": 0.87, "learning_rate": 8.589590768849698e-07, "loss": 0.83, "step": 6427 }, { "epoch": 0.87, "learning_rate": 8.571798408150745e-07, "loss": 0.7976, "step": 6428 }, { "epoch": 0.87, "learning_rate": 8.554023668804812e-07, "loss": 0.8033, "step": 6429 }, { "epoch": 0.87, "learning_rate": 8.536266554237715e-07, "loss": 0.788, "step": 6430 }, { "epoch": 0.87, "learning_rate": 8.518527067871851e-07, "loss": 0.8813, "step": 6431 }, { "epoch": 0.87, "learning_rate": 8.500805213126217e-07, "loss": 0.778, "step": 6432 }, { "epoch": 0.87, "learning_rate": 8.483100993416415e-07, "loss": 0.9024, "step": 6433 }, { "epoch": 0.87, "learning_rate": 8.465414412154693e-07, "loss": 0.8142, "step": 6434 }, { "epoch": 0.87, "learning_rate": 8.447745472749836e-07, "loss": 0.7577, "step": 6435 }, { "epoch": 0.87, "learning_rate": 8.430094178607262e-07, "loss": 0.8088, "step": 6436 }, { "epoch": 0.87, "learning_rate": 8.412460533128964e-07, "loss": 0.8246, "step": 6437 }, { "epoch": 0.87, "learning_rate": 8.394844539713586e-07, "loss": 0.8014, "step": 6438 }, { "epoch": 0.87, "learning_rate": 8.377246201756306e-07, "loss": 0.8364, "step": 6439 }, { "epoch": 0.87, "learning_rate": 8.35966552264893e-07, "loss": 0.7923, "step": 6440 }, { "epoch": 0.87, "learning_rate": 8.34210250577987e-07, "loss": 0.8306, "step": 6441 }, { "epoch": 0.87, "learning_rate": 8.32455715453413e-07, "loss": 0.8302, "step": 6442 }, { "epoch": 0.87, "learning_rate": 8.307029472293271e-07, "loss": 0.8619, "step": 6443 }, { "epoch": 0.87, "learning_rate": 8.289519462435502e-07, "loss": 0.8414, "step": 6444 }, { "epoch": 0.87, "learning_rate": 8.272027128335602e-07, "loss": 0.7911, "step": 6445 }, { "epoch": 0.87, "learning_rate": 8.254552473364952e-07, "loss": 0.7729, "step": 6446 }, { "epoch": 0.87, "learning_rate": 8.237095500891479e-07, "loss": 0.8446, "step": 6447 }, { "epoch": 0.87, "learning_rate": 8.21965621427978e-07, "loss": 0.8919, "step": 6448 }, { "epoch": 0.87, "learning_rate": 8.202234616891002e-07, "loss": 0.7878, "step": 6449 }, { "epoch": 0.87, "learning_rate": 8.18483071208287e-07, "loss": 0.8048, "step": 6450 }, { "epoch": 0.87, "learning_rate": 8.167444503209721e-07, "loss": 0.7813, "step": 6451 }, { "epoch": 0.87, "learning_rate": 8.150075993622452e-07, "loss": 0.831, "step": 6452 }, { "epoch": 0.87, "learning_rate": 8.13272518666861e-07, "loss": 0.864, "step": 6453 }, { "epoch": 0.87, "learning_rate": 8.115392085692275e-07, "loss": 0.8182, "step": 6454 }, { "epoch": 0.87, "learning_rate": 8.098076694034129e-07, "loss": 0.7653, "step": 6455 }, { "epoch": 0.88, "learning_rate": 8.080779015031426e-07, "loss": 0.8677, "step": 6456 }, { "epoch": 0.88, "learning_rate": 8.063499052018042e-07, "loss": 0.843, "step": 6457 }, { "epoch": 0.88, "learning_rate": 8.046236808324426e-07, "loss": 0.894, "step": 6458 }, { "epoch": 0.88, "learning_rate": 8.028992287277593e-07, "loss": 0.7732, "step": 6459 }, { "epoch": 0.88, "learning_rate": 8.011765492201151e-07, "loss": 0.8447, "step": 6460 }, { "epoch": 0.88, "learning_rate": 7.994556426415279e-07, "loss": 0.888, "step": 6461 }, { "epoch": 0.88, "learning_rate": 7.9773650932368e-07, "loss": 0.8659, "step": 6462 }, { "epoch": 0.88, "learning_rate": 7.960191495979041e-07, "loss": 0.8411, "step": 6463 }, { "epoch": 0.88, "learning_rate": 7.943035637951957e-07, "loss": 0.8491, "step": 6464 }, { "epoch": 0.88, "learning_rate": 7.925897522462045e-07, "loss": 0.8085, "step": 6465 }, { "epoch": 0.88, "learning_rate": 7.908777152812452e-07, "loss": 0.8685, "step": 6466 }, { "epoch": 0.88, "learning_rate": 7.891674532302828e-07, "loss": 0.8541, "step": 6467 }, { "epoch": 0.88, "learning_rate": 7.874589664229448e-07, "loss": 0.8662, "step": 6468 }, { "epoch": 0.88, "learning_rate": 7.857522551885155e-07, "loss": 0.859, "step": 6469 }, { "epoch": 0.88, "learning_rate": 7.840473198559339e-07, "loss": 0.8203, "step": 6470 }, { "epoch": 0.88, "learning_rate": 7.823441607538029e-07, "loss": 0.8163, "step": 6471 }, { "epoch": 0.88, "learning_rate": 7.806427782103798e-07, "loss": 0.7904, "step": 6472 }, { "epoch": 0.88, "learning_rate": 7.789431725535768e-07, "loss": 0.7726, "step": 6473 }, { "epoch": 0.88, "learning_rate": 7.772453441109674e-07, "loss": 0.8347, "step": 6474 }, { "epoch": 0.88, "learning_rate": 7.75549293209783e-07, "loss": 0.7999, "step": 6475 }, { "epoch": 0.88, "learning_rate": 7.738550201769091e-07, "loss": 0.867, "step": 6476 }, { "epoch": 0.88, "learning_rate": 7.721625253388909e-07, "loss": 0.8505, "step": 6477 }, { "epoch": 0.88, "learning_rate": 7.704718090219299e-07, "loss": 0.8025, "step": 6478 }, { "epoch": 0.88, "learning_rate": 7.687828715518842e-07, "loss": 0.8996, "step": 6479 }, { "epoch": 0.88, "learning_rate": 7.670957132542722e-07, "loss": 0.8091, "step": 6480 }, { "epoch": 0.88, "learning_rate": 7.654103344542674e-07, "loss": 0.8252, "step": 6481 }, { "epoch": 0.88, "learning_rate": 7.637267354766975e-07, "loss": 0.8184, "step": 6482 }, { "epoch": 0.88, "learning_rate": 7.62044916646052e-07, "loss": 0.8484, "step": 6483 }, { "epoch": 0.88, "learning_rate": 7.603648782864714e-07, "loss": 0.865, "step": 6484 }, { "epoch": 0.88, "learning_rate": 7.586866207217625e-07, "loss": 0.7578, "step": 6485 }, { "epoch": 0.88, "learning_rate": 7.570101442753808e-07, "loss": 0.8161, "step": 6486 }, { "epoch": 0.88, "learning_rate": 7.553354492704401e-07, "loss": 0.8435, "step": 6487 }, { "epoch": 0.88, "learning_rate": 7.536625360297122e-07, "loss": 0.8113, "step": 6488 }, { "epoch": 0.88, "learning_rate": 7.519914048756238e-07, "loss": 0.8348, "step": 6489 }, { "epoch": 0.88, "learning_rate": 7.503220561302604e-07, "loss": 0.755, "step": 6490 }, { "epoch": 0.88, "learning_rate": 7.486544901153637e-07, "loss": 0.8152, "step": 6491 }, { "epoch": 0.88, "learning_rate": 7.469887071523297e-07, "loss": 0.8427, "step": 6492 }, { "epoch": 0.88, "learning_rate": 7.453247075622117e-07, "loss": 0.8695, "step": 6493 }, { "epoch": 0.88, "learning_rate": 7.436624916657176e-07, "loss": 0.7865, "step": 6494 }, { "epoch": 0.88, "learning_rate": 7.420020597832178e-07, "loss": 0.8404, "step": 6495 }, { "epoch": 0.88, "learning_rate": 7.40343412234733e-07, "loss": 0.7938, "step": 6496 }, { "epoch": 0.88, "learning_rate": 7.386865493399398e-07, "loss": 0.8825, "step": 6497 }, { "epoch": 0.88, "learning_rate": 7.370314714181726e-07, "loss": 0.8984, "step": 6498 }, { "epoch": 0.88, "learning_rate": 7.353781787884251e-07, "loss": 0.7388, "step": 6499 }, { "epoch": 0.88, "learning_rate": 7.337266717693414e-07, "loss": 0.7566, "step": 6500 }, { "epoch": 0.88, "learning_rate": 7.320769506792225e-07, "loss": 0.8291, "step": 6501 }, { "epoch": 0.88, "learning_rate": 7.304290158360283e-07, "loss": 0.8079, "step": 6502 }, { "epoch": 0.88, "learning_rate": 7.287828675573694e-07, "loss": 0.8509, "step": 6503 }, { "epoch": 0.88, "learning_rate": 7.271385061605185e-07, "loss": 0.8203, "step": 6504 }, { "epoch": 0.88, "learning_rate": 7.254959319623989e-07, "loss": 0.8695, "step": 6505 }, { "epoch": 0.88, "learning_rate": 7.238551452795917e-07, "loss": 0.8398, "step": 6506 }, { "epoch": 0.88, "learning_rate": 7.222161464283307e-07, "loss": 0.8579, "step": 6507 }, { "epoch": 0.88, "learning_rate": 7.205789357245097e-07, "loss": 0.7866, "step": 6508 }, { "epoch": 0.88, "learning_rate": 7.189435134836753e-07, "loss": 0.8366, "step": 6509 }, { "epoch": 0.88, "learning_rate": 7.173098800210287e-07, "loss": 0.7214, "step": 6510 }, { "epoch": 0.88, "learning_rate": 7.15678035651427e-07, "loss": 0.8264, "step": 6511 }, { "epoch": 0.88, "learning_rate": 7.140479806893818e-07, "loss": 0.8224, "step": 6512 }, { "epoch": 0.88, "learning_rate": 7.124197154490631e-07, "loss": 0.8174, "step": 6513 }, { "epoch": 0.88, "learning_rate": 7.107932402442919e-07, "loss": 0.8605, "step": 6514 }, { "epoch": 0.88, "learning_rate": 7.091685553885464e-07, "loss": 0.8522, "step": 6515 }, { "epoch": 0.88, "learning_rate": 7.075456611949572e-07, "loss": 0.8592, "step": 6516 }, { "epoch": 0.88, "learning_rate": 7.059245579763141e-07, "loss": 0.7964, "step": 6517 }, { "epoch": 0.88, "learning_rate": 7.043052460450595e-07, "loss": 0.869, "step": 6518 }, { "epoch": 0.88, "learning_rate": 7.026877257132891e-07, "loss": 0.8128, "step": 6519 }, { "epoch": 0.88, "learning_rate": 7.010719972927549e-07, "loss": 0.8183, "step": 6520 }, { "epoch": 0.88, "learning_rate": 6.99458061094861e-07, "loss": 0.883, "step": 6521 }, { "epoch": 0.88, "learning_rate": 6.978459174306729e-07, "loss": 0.8246, "step": 6522 }, { "epoch": 0.88, "learning_rate": 6.962355666109033e-07, "loss": 0.831, "step": 6523 }, { "epoch": 0.88, "learning_rate": 6.946270089459228e-07, "loss": 0.7291, "step": 6524 }, { "epoch": 0.88, "learning_rate": 6.930202447457535e-07, "loss": 0.8017, "step": 6525 }, { "epoch": 0.88, "learning_rate": 6.914152743200775e-07, "loss": 0.858, "step": 6526 }, { "epoch": 0.88, "learning_rate": 6.898120979782264e-07, "loss": 0.8424, "step": 6527 }, { "epoch": 0.88, "learning_rate": 6.882107160291851e-07, "loss": 0.8084, "step": 6528 }, { "epoch": 0.88, "learning_rate": 6.866111287815991e-07, "loss": 0.9332, "step": 6529 }, { "epoch": 0.89, "learning_rate": 6.850133365437605e-07, "loss": 0.821, "step": 6530 }, { "epoch": 0.89, "learning_rate": 6.834173396236188e-07, "loss": 0.8616, "step": 6531 }, { "epoch": 0.89, "learning_rate": 6.818231383287788e-07, "loss": 0.8355, "step": 6532 }, { "epoch": 0.89, "learning_rate": 6.802307329664981e-07, "loss": 0.797, "step": 6533 }, { "epoch": 0.89, "learning_rate": 6.786401238436869e-07, "loss": 0.8009, "step": 6534 }, { "epoch": 0.89, "learning_rate": 6.770513112669086e-07, "loss": 0.8142, "step": 6535 }, { "epoch": 0.89, "learning_rate": 6.754642955423852e-07, "loss": 0.8699, "step": 6536 }, { "epoch": 0.89, "learning_rate": 6.738790769759873e-07, "loss": 0.8238, "step": 6537 }, { "epoch": 0.89, "learning_rate": 6.722956558732419e-07, "loss": 0.8104, "step": 6538 }, { "epoch": 0.89, "learning_rate": 6.707140325393269e-07, "loss": 0.8285, "step": 6539 }, { "epoch": 0.89, "learning_rate": 6.691342072790763e-07, "loss": 0.8435, "step": 6540 }, { "epoch": 0.89, "learning_rate": 6.675561803969765e-07, "loss": 0.8452, "step": 6541 }, { "epoch": 0.89, "learning_rate": 6.659799521971688e-07, "loss": 0.8341, "step": 6542 }, { "epoch": 0.89, "learning_rate": 6.644055229834457e-07, "loss": 0.8076, "step": 6543 }, { "epoch": 0.89, "learning_rate": 6.628328930592532e-07, "loss": 0.8154, "step": 6544 }, { "epoch": 0.89, "learning_rate": 6.612620627276889e-07, "loss": 0.8926, "step": 6545 }, { "epoch": 0.89, "learning_rate": 6.596930322915107e-07, "loss": 0.7637, "step": 6546 }, { "epoch": 0.89, "learning_rate": 6.581258020531223e-07, "loss": 0.8461, "step": 6547 }, { "epoch": 0.89, "learning_rate": 6.565603723145819e-07, "loss": 0.9226, "step": 6548 }, { "epoch": 0.89, "learning_rate": 6.549967433776005e-07, "loss": 0.8691, "step": 6549 }, { "epoch": 0.89, "learning_rate": 6.534349155435471e-07, "loss": 0.8244, "step": 6550 }, { "epoch": 0.89, "learning_rate": 6.518748891134364e-07, "loss": 0.8754, "step": 6551 }, { "epoch": 0.89, "learning_rate": 6.5031666438794e-07, "loss": 0.8129, "step": 6552 }, { "epoch": 0.89, "learning_rate": 6.487602416673811e-07, "loss": 0.7365, "step": 6553 }, { "epoch": 0.89, "learning_rate": 6.472056212517352e-07, "loss": 0.9028, "step": 6554 }, { "epoch": 0.89, "learning_rate": 6.456528034406317e-07, "loss": 0.7965, "step": 6555 }, { "epoch": 0.89, "learning_rate": 6.441017885333534e-07, "loss": 0.8997, "step": 6556 }, { "epoch": 0.89, "learning_rate": 6.42552576828831e-07, "loss": 0.7987, "step": 6557 }, { "epoch": 0.89, "learning_rate": 6.410051686256524e-07, "loss": 0.82, "step": 6558 }, { "epoch": 0.89, "learning_rate": 6.394595642220569e-07, "loss": 0.8413, "step": 6559 }, { "epoch": 0.89, "learning_rate": 6.37915763915935e-07, "loss": 0.8263, "step": 6560 }, { "epoch": 0.89, "learning_rate": 6.363737680048299e-07, "loss": 0.8472, "step": 6561 }, { "epoch": 0.89, "learning_rate": 6.348335767859371e-07, "loss": 0.8393, "step": 6562 }, { "epoch": 0.89, "learning_rate": 6.332951905561025e-07, "loss": 0.7993, "step": 6563 }, { "epoch": 0.89, "learning_rate": 6.3175860961183e-07, "loss": 0.7969, "step": 6564 }, { "epoch": 0.89, "learning_rate": 6.302238342492683e-07, "loss": 0.8055, "step": 6565 }, { "epoch": 0.89, "learning_rate": 6.286908647642231e-07, "loss": 0.8481, "step": 6566 }, { "epoch": 0.89, "learning_rate": 6.27159701452148e-07, "loss": 0.7997, "step": 6567 }, { "epoch": 0.89, "learning_rate": 6.256303446081535e-07, "loss": 0.8517, "step": 6568 }, { "epoch": 0.89, "learning_rate": 6.241027945269973e-07, "loss": 0.8457, "step": 6569 }, { "epoch": 0.89, "learning_rate": 6.225770515030916e-07, "loss": 0.7734, "step": 6570 }, { "epoch": 0.89, "learning_rate": 6.210531158304977e-07, "loss": 0.9277, "step": 6571 }, { "epoch": 0.89, "learning_rate": 6.195309878029332e-07, "loss": 0.8711, "step": 6572 }, { "epoch": 0.89, "learning_rate": 6.18010667713761e-07, "loss": 0.8805, "step": 6573 }, { "epoch": 0.89, "learning_rate": 6.164921558560033e-07, "loss": 0.8594, "step": 6574 }, { "epoch": 0.89, "learning_rate": 6.149754525223262e-07, "loss": 0.7498, "step": 6575 }, { "epoch": 0.89, "learning_rate": 6.134605580050523e-07, "loss": 0.8795, "step": 6576 }, { "epoch": 0.89, "learning_rate": 6.119474725961505e-07, "loss": 0.8567, "step": 6577 }, { "epoch": 0.89, "learning_rate": 6.104361965872485e-07, "loss": 0.8579, "step": 6578 }, { "epoch": 0.89, "learning_rate": 6.089267302696189e-07, "loss": 0.8149, "step": 6579 }, { "epoch": 0.89, "learning_rate": 6.07419073934189e-07, "loss": 0.8119, "step": 6580 }, { "epoch": 0.89, "learning_rate": 6.05913227871534e-07, "loss": 0.771, "step": 6581 }, { "epoch": 0.89, "learning_rate": 6.04409192371882e-07, "loss": 0.8547, "step": 6582 }, { "epoch": 0.89, "learning_rate": 6.029069677251143e-07, "loss": 0.823, "step": 6583 }, { "epoch": 0.89, "learning_rate": 6.014065542207603e-07, "loss": 0.8482, "step": 6584 }, { "epoch": 0.89, "learning_rate": 5.999079521480011e-07, "loss": 0.8599, "step": 6585 }, { "epoch": 0.89, "learning_rate": 5.984111617956678e-07, "loss": 0.8072, "step": 6586 }, { "epoch": 0.89, "learning_rate": 5.969161834522452e-07, "loss": 0.8259, "step": 6587 }, { "epoch": 0.89, "learning_rate": 5.954230174058662e-07, "loss": 0.8099, "step": 6588 }, { "epoch": 0.89, "learning_rate": 5.939316639443149e-07, "loss": 0.8686, "step": 6589 }, { "epoch": 0.89, "learning_rate": 5.92442123355027e-07, "loss": 0.8526, "step": 6590 }, { "epoch": 0.89, "learning_rate": 5.909543959250852e-07, "loss": 0.8169, "step": 6591 }, { "epoch": 0.89, "learning_rate": 5.894684819412289e-07, "loss": 0.8054, "step": 6592 }, { "epoch": 0.89, "learning_rate": 5.879843816898445e-07, "loss": 0.8213, "step": 6593 }, { "epoch": 0.89, "learning_rate": 5.865020954569689e-07, "loss": 0.7794, "step": 6594 }, { "epoch": 0.89, "learning_rate": 5.850216235282858e-07, "loss": 0.8749, "step": 6595 }, { "epoch": 0.89, "learning_rate": 5.83542966189139e-07, "loss": 0.8264, "step": 6596 }, { "epoch": 0.89, "learning_rate": 5.820661237245128e-07, "loss": 0.7975, "step": 6597 }, { "epoch": 0.89, "learning_rate": 5.805910964190465e-07, "loss": 0.845, "step": 6598 }, { "epoch": 0.89, "learning_rate": 5.791178845570288e-07, "loss": 0.847, "step": 6599 }, { "epoch": 0.89, "learning_rate": 5.776464884223954e-07, "loss": 0.8546, "step": 6600 }, { "epoch": 0.89, "learning_rate": 5.76176908298739e-07, "loss": 0.7853, "step": 6601 }, { "epoch": 0.89, "learning_rate": 5.747091444692953e-07, "loss": 0.8581, "step": 6602 }, { "epoch": 0.89, "learning_rate": 5.73243197216955e-07, "loss": 0.7972, "step": 6603 }, { "epoch": 0.9, "learning_rate": 5.717790668242551e-07, "loss": 0.7992, "step": 6604 }, { "epoch": 0.9, "learning_rate": 5.703167535733811e-07, "loss": 0.8343, "step": 6605 }, { "epoch": 0.9, "learning_rate": 5.688562577461765e-07, "loss": 0.845, "step": 6606 }, { "epoch": 0.9, "learning_rate": 5.67397579624126e-07, "loss": 0.8224, "step": 6607 }, { "epoch": 0.9, "learning_rate": 5.659407194883671e-07, "loss": 0.8061, "step": 6608 }, { "epoch": 0.9, "learning_rate": 5.644856776196849e-07, "loss": 0.8928, "step": 6609 }, { "epoch": 0.9, "learning_rate": 5.6303245429852e-07, "loss": 0.8208, "step": 6610 }, { "epoch": 0.9, "learning_rate": 5.615810498049557e-07, "loss": 0.8409, "step": 6611 }, { "epoch": 0.9, "learning_rate": 5.601314644187283e-07, "loss": 0.7815, "step": 6612 }, { "epoch": 0.9, "learning_rate": 5.586836984192223e-07, "loss": 0.8532, "step": 6613 }, { "epoch": 0.9, "learning_rate": 5.572377520854699e-07, "loss": 0.874, "step": 6614 }, { "epoch": 0.9, "learning_rate": 5.557936256961571e-07, "loss": 0.8306, "step": 6615 }, { "epoch": 0.9, "learning_rate": 5.54351319529618e-07, "loss": 0.8609, "step": 6616 }, { "epoch": 0.9, "learning_rate": 5.529108338638334e-07, "loss": 0.8816, "step": 6617 }, { "epoch": 0.9, "learning_rate": 5.514721689764325e-07, "loss": 0.868, "step": 6618 }, { "epoch": 0.9, "learning_rate": 5.500353251446955e-07, "loss": 0.8123, "step": 6619 }, { "epoch": 0.9, "learning_rate": 5.486003026455544e-07, "loss": 0.8115, "step": 6620 }, { "epoch": 0.9, "learning_rate": 5.471671017555846e-07, "loss": 0.8356, "step": 6621 }, { "epoch": 0.9, "learning_rate": 5.457357227510152e-07, "loss": 0.8598, "step": 6622 }, { "epoch": 0.9, "learning_rate": 5.443061659077198e-07, "loss": 0.8417, "step": 6623 }, { "epoch": 0.9, "learning_rate": 5.428784315012236e-07, "loss": 0.7996, "step": 6624 }, { "epoch": 0.9, "learning_rate": 5.414525198067011e-07, "loss": 0.8152, "step": 6625 }, { "epoch": 0.9, "learning_rate": 5.400284310989746e-07, "loss": 0.8719, "step": 6626 }, { "epoch": 0.9, "learning_rate": 5.386061656525143e-07, "loss": 0.8903, "step": 6627 }, { "epoch": 0.9, "learning_rate": 5.371857237414379e-07, "loss": 0.7947, "step": 6628 }, { "epoch": 0.9, "learning_rate": 5.357671056395164e-07, "loss": 0.8332, "step": 6629 }, { "epoch": 0.9, "learning_rate": 5.343503116201643e-07, "loss": 0.7791, "step": 6630 }, { "epoch": 0.9, "learning_rate": 5.329353419564476e-07, "loss": 0.8147, "step": 6631 }, { "epoch": 0.9, "learning_rate": 5.315221969210782e-07, "loss": 0.8126, "step": 6632 }, { "epoch": 0.9, "learning_rate": 5.301108767864171e-07, "loss": 0.8301, "step": 6633 }, { "epoch": 0.9, "learning_rate": 5.287013818244768e-07, "loss": 0.7842, "step": 6634 }, { "epoch": 0.9, "learning_rate": 5.272937123069133e-07, "loss": 0.8089, "step": 6635 }, { "epoch": 0.9, "learning_rate": 5.258878685050339e-07, "loss": 0.8178, "step": 6636 }, { "epoch": 0.9, "learning_rate": 5.244838506897909e-07, "loss": 0.8158, "step": 6637 }, { "epoch": 0.9, "learning_rate": 5.230816591317899e-07, "loss": 0.8304, "step": 6638 }, { "epoch": 0.9, "learning_rate": 5.216812941012794e-07, "loss": 0.8049, "step": 6639 }, { "epoch": 0.9, "learning_rate": 5.202827558681589e-07, "loss": 0.8488, "step": 6640 }, { "epoch": 0.9, "learning_rate": 5.188860447019728e-07, "loss": 0.8351, "step": 6641 }, { "epoch": 0.9, "learning_rate": 5.174911608719157e-07, "loss": 0.8814, "step": 6642 }, { "epoch": 0.9, "learning_rate": 5.160981046468317e-07, "loss": 0.8574, "step": 6643 }, { "epoch": 0.9, "learning_rate": 5.14706876295209e-07, "loss": 0.8392, "step": 6644 }, { "epoch": 0.9, "learning_rate": 5.133174760851856e-07, "loss": 0.7854, "step": 6645 }, { "epoch": 0.9, "learning_rate": 5.119299042845449e-07, "loss": 0.8799, "step": 6646 }, { "epoch": 0.9, "learning_rate": 5.10544161160722e-07, "loss": 0.8124, "step": 6647 }, { "epoch": 0.9, "learning_rate": 5.091602469807965e-07, "loss": 0.835, "step": 6648 }, { "epoch": 0.9, "learning_rate": 5.077781620114952e-07, "loss": 0.8266, "step": 6649 }, { "epoch": 0.9, "learning_rate": 5.063979065191948e-07, "loss": 0.793, "step": 6650 }, { "epoch": 0.9, "learning_rate": 5.050194807699149e-07, "loss": 0.8265, "step": 6651 }, { "epoch": 0.9, "learning_rate": 5.036428850293295e-07, "loss": 0.7499, "step": 6652 }, { "epoch": 0.9, "learning_rate": 5.022681195627543e-07, "loss": 0.8768, "step": 6653 }, { "epoch": 0.9, "learning_rate": 5.00895184635154e-07, "loss": 0.8802, "step": 6654 }, { "epoch": 0.9, "learning_rate": 4.99524080511139e-07, "loss": 0.805, "step": 6655 }, { "epoch": 0.9, "learning_rate": 4.981548074549669e-07, "loss": 0.8302, "step": 6656 }, { "epoch": 0.9, "learning_rate": 4.967873657305478e-07, "loss": 0.8745, "step": 6657 }, { "epoch": 0.9, "learning_rate": 4.954217556014318e-07, "loss": 0.8302, "step": 6658 }, { "epoch": 0.9, "learning_rate": 4.940579773308196e-07, "loss": 0.824, "step": 6659 }, { "epoch": 0.9, "learning_rate": 4.926960311815587e-07, "loss": 0.8625, "step": 6660 }, { "epoch": 0.9, "learning_rate": 4.913359174161403e-07, "loss": 0.83, "step": 6661 }, { "epoch": 0.9, "learning_rate": 4.89977636296709e-07, "loss": 0.7951, "step": 6662 }, { "epoch": 0.9, "learning_rate": 4.8862118808505e-07, "loss": 0.8653, "step": 6663 }, { "epoch": 0.9, "learning_rate": 4.872665730425973e-07, "loss": 0.8597, "step": 6664 }, { "epoch": 0.9, "learning_rate": 4.859137914304313e-07, "loss": 0.7936, "step": 6665 }, { "epoch": 0.9, "learning_rate": 4.845628435092797e-07, "loss": 0.8591, "step": 6666 }, { "epoch": 0.9, "learning_rate": 4.832137295395189e-07, "loss": 0.7625, "step": 6667 }, { "epoch": 0.9, "learning_rate": 4.818664497811664e-07, "loss": 0.8621, "step": 6668 }, { "epoch": 0.9, "learning_rate": 4.805210044938913e-07, "loss": 0.8342, "step": 6669 }, { "epoch": 0.9, "learning_rate": 4.791773939370048e-07, "loss": 0.704, "step": 6670 }, { "epoch": 0.9, "learning_rate": 4.778356183694688e-07, "loss": 0.7714, "step": 6671 }, { "epoch": 0.9, "learning_rate": 4.764956780498897e-07, "loss": 0.828, "step": 6672 }, { "epoch": 0.9, "learning_rate": 4.7515757323651877e-07, "loss": 0.7995, "step": 6673 }, { "epoch": 0.9, "learning_rate": 4.738213041872552e-07, "loss": 0.8212, "step": 6674 }, { "epoch": 0.9, "learning_rate": 4.72486871159642e-07, "loss": 0.7482, "step": 6675 }, { "epoch": 0.9, "learning_rate": 4.711542744108744e-07, "loss": 0.8172, "step": 6676 }, { "epoch": 0.9, "learning_rate": 4.6982351419778695e-07, "loss": 0.8797, "step": 6677 }, { "epoch": 0.91, "learning_rate": 4.684945907768623e-07, "loss": 0.8592, "step": 6678 }, { "epoch": 0.91, "learning_rate": 4.671675044042301e-07, "loss": 0.8052, "step": 6679 }, { "epoch": 0.91, "learning_rate": 4.6584225533566674e-07, "loss": 0.7954, "step": 6680 }, { "epoch": 0.91, "learning_rate": 4.645188438265924e-07, "loss": 0.8024, "step": 6681 }, { "epoch": 0.91, "learning_rate": 4.6319727013207416e-07, "loss": 0.8196, "step": 6682 }, { "epoch": 0.91, "learning_rate": 4.618775345068238e-07, "loss": 0.8011, "step": 6683 }, { "epoch": 0.91, "learning_rate": 4.605596372051979e-07, "loss": 0.8431, "step": 6684 }, { "epoch": 0.91, "learning_rate": 4.592435784812055e-07, "loss": 0.8344, "step": 6685 }, { "epoch": 0.91, "learning_rate": 4.579293585884925e-07, "loss": 0.8139, "step": 6686 }, { "epoch": 0.91, "learning_rate": 4.5661697778035643e-07, "loss": 0.8714, "step": 6687 }, { "epoch": 0.91, "learning_rate": 4.553064363097337e-07, "loss": 0.9062, "step": 6688 }, { "epoch": 0.91, "learning_rate": 4.539977344292168e-07, "loss": 0.7868, "step": 6689 }, { "epoch": 0.91, "learning_rate": 4.526908723910339e-07, "loss": 0.7873, "step": 6690 }, { "epoch": 0.91, "learning_rate": 4.513858504470625e-07, "loss": 0.7806, "step": 6691 }, { "epoch": 0.91, "learning_rate": 4.500826688488269e-07, "loss": 0.7792, "step": 6692 }, { "epoch": 0.91, "learning_rate": 4.4878132784749063e-07, "loss": 0.8066, "step": 6693 }, { "epoch": 0.91, "learning_rate": 4.4748182769387196e-07, "loss": 0.8203, "step": 6694 }, { "epoch": 0.91, "learning_rate": 4.4618416863842606e-07, "loss": 0.6858, "step": 6695 }, { "epoch": 0.91, "learning_rate": 4.4488835093125736e-07, "loss": 0.8872, "step": 6696 }, { "epoch": 0.91, "learning_rate": 4.4359437482211276e-07, "loss": 0.8744, "step": 6697 }, { "epoch": 0.91, "learning_rate": 4.423022405603894e-07, "loss": 0.8001, "step": 6698 }, { "epoch": 0.91, "learning_rate": 4.4101194839512364e-07, "loss": 0.814, "step": 6699 }, { "epoch": 0.91, "learning_rate": 4.3972349857499874e-07, "loss": 0.7918, "step": 6700 }, { "epoch": 0.91, "learning_rate": 4.384368913483439e-07, "loss": 0.8327, "step": 6701 }, { "epoch": 0.91, "learning_rate": 4.371521269631307e-07, "loss": 0.8253, "step": 6702 }, { "epoch": 0.91, "learning_rate": 4.3586920566698e-07, "loss": 0.831, "step": 6703 }, { "epoch": 0.91, "learning_rate": 4.34588127707154e-07, "loss": 0.8097, "step": 6704 }, { "epoch": 0.91, "learning_rate": 4.333088933305607e-07, "loss": 0.8213, "step": 6705 }, { "epoch": 0.91, "learning_rate": 4.3203150278375184e-07, "loss": 0.8359, "step": 6706 }, { "epoch": 0.91, "learning_rate": 4.307559563129238e-07, "loss": 0.7805, "step": 6707 }, { "epoch": 0.91, "learning_rate": 4.2948225416391986e-07, "loss": 0.8327, "step": 6708 }, { "epoch": 0.91, "learning_rate": 4.2821039658222483e-07, "loss": 0.8101, "step": 6709 }, { "epoch": 0.91, "learning_rate": 4.269403838129704e-07, "loss": 0.7936, "step": 6710 }, { "epoch": 0.91, "learning_rate": 4.2567221610092966e-07, "loss": 0.7546, "step": 6711 }, { "epoch": 0.91, "learning_rate": 4.2440589369052265e-07, "loss": 0.7783, "step": 6712 }, { "epoch": 0.91, "learning_rate": 4.231414168258163e-07, "loss": 0.7925, "step": 6713 }, { "epoch": 0.91, "learning_rate": 4.2187878575051466e-07, "loss": 0.8739, "step": 6714 }, { "epoch": 0.91, "learning_rate": 4.2061800070797186e-07, "loss": 0.8125, "step": 6715 }, { "epoch": 0.91, "learning_rate": 4.193590619411847e-07, "loss": 0.7577, "step": 6716 }, { "epoch": 0.91, "learning_rate": 4.181019696927924e-07, "loss": 0.8581, "step": 6717 }, { "epoch": 0.91, "learning_rate": 4.168467242050822e-07, "loss": 0.8528, "step": 6718 }, { "epoch": 0.91, "learning_rate": 4.155933257199807e-07, "loss": 0.8172, "step": 6719 }, { "epoch": 0.91, "learning_rate": 4.1434177447906343e-07, "loss": 0.7794, "step": 6720 }, { "epoch": 0.91, "learning_rate": 4.1309207072354305e-07, "loss": 0.8387, "step": 6721 }, { "epoch": 0.91, "learning_rate": 4.118442146942847e-07, "loss": 0.8281, "step": 6722 }, { "epoch": 0.91, "learning_rate": 4.105982066317904e-07, "loss": 0.8687, "step": 6723 }, { "epoch": 0.91, "learning_rate": 4.0935404677621025e-07, "loss": 0.8384, "step": 6724 }, { "epoch": 0.91, "learning_rate": 4.0811173536733586e-07, "loss": 0.7626, "step": 6725 }, { "epoch": 0.91, "learning_rate": 4.0687127264460224e-07, "loss": 0.8315, "step": 6726 }, { "epoch": 0.91, "learning_rate": 4.0563265884709157e-07, "loss": 0.7657, "step": 6727 }, { "epoch": 0.91, "learning_rate": 4.043958942135262e-07, "loss": 0.903, "step": 6728 }, { "epoch": 0.91, "learning_rate": 4.0316097898227215e-07, "loss": 0.8333, "step": 6729 }, { "epoch": 0.91, "learning_rate": 4.0192791339133896e-07, "loss": 0.7478, "step": 6730 }, { "epoch": 0.91, "learning_rate": 4.0069669767838436e-07, "loss": 0.8912, "step": 6731 }, { "epoch": 0.91, "learning_rate": 3.994673320807041e-07, "loss": 0.8716, "step": 6732 }, { "epoch": 0.91, "learning_rate": 3.982398168352386e-07, "loss": 0.8462, "step": 6733 }, { "epoch": 0.91, "learning_rate": 3.970141521785731e-07, "loss": 0.8962, "step": 6734 }, { "epoch": 0.91, "learning_rate": 3.9579033834693303e-07, "loss": 0.8398, "step": 6735 }, { "epoch": 0.91, "learning_rate": 3.94568375576192e-07, "loss": 0.783, "step": 6736 }, { "epoch": 0.91, "learning_rate": 3.9334826410186377e-07, "loss": 0.8206, "step": 6737 }, { "epoch": 0.91, "learning_rate": 3.9213000415910473e-07, "loss": 0.8243, "step": 6738 }, { "epoch": 0.91, "learning_rate": 3.9091359598271483e-07, "loss": 0.8266, "step": 6739 }, { "epoch": 0.91, "learning_rate": 3.896990398071399e-07, "loss": 0.8689, "step": 6740 }, { "epoch": 0.91, "learning_rate": 3.884863358664648e-07, "loss": 0.831, "step": 6741 }, { "epoch": 0.91, "learning_rate": 3.872754843944204e-07, "loss": 0.8406, "step": 6742 }, { "epoch": 0.91, "learning_rate": 3.8606648562437787e-07, "loss": 0.8348, "step": 6743 }, { "epoch": 0.91, "learning_rate": 3.8485933978935297e-07, "loss": 0.8694, "step": 6744 }, { "epoch": 0.91, "learning_rate": 3.8365404712200624e-07, "loss": 0.841, "step": 6745 }, { "epoch": 0.91, "learning_rate": 3.824506078546353e-07, "loss": 0.9009, "step": 6746 }, { "epoch": 0.91, "learning_rate": 3.8124902221918783e-07, "loss": 0.7981, "step": 6747 }, { "epoch": 0.91, "learning_rate": 3.800492904472497e-07, "loss": 0.8208, "step": 6748 }, { "epoch": 0.91, "learning_rate": 3.788514127700493e-07, "loss": 0.8502, "step": 6749 }, { "epoch": 0.91, "learning_rate": 3.776553894184598e-07, "loss": 0.8555, "step": 6750 }, { "epoch": 0.92, "learning_rate": 3.764612206229956e-07, "loss": 0.8339, "step": 6751 }, { "epoch": 0.92, "learning_rate": 3.7526890661381375e-07, "loss": 0.8374, "step": 6752 }, { "epoch": 0.92, "learning_rate": 3.740784476207149e-07, "loss": 0.8421, "step": 6753 }, { "epoch": 0.92, "learning_rate": 3.728898438731388e-07, "loss": 0.8067, "step": 6754 }, { "epoch": 0.92, "learning_rate": 3.7170309560017327e-07, "loss": 0.7838, "step": 6755 }, { "epoch": 0.92, "learning_rate": 3.7051820303054544e-07, "loss": 0.8306, "step": 6756 }, { "epoch": 0.92, "learning_rate": 3.6933516639262257e-07, "loss": 0.7777, "step": 6757 }, { "epoch": 0.92, "learning_rate": 3.681539859144168e-07, "loss": 0.7609, "step": 6758 }, { "epoch": 0.92, "learning_rate": 3.6697466182358366e-07, "loss": 0.8149, "step": 6759 }, { "epoch": 0.92, "learning_rate": 3.6579719434741924e-07, "loss": 0.8275, "step": 6760 }, { "epoch": 0.92, "learning_rate": 3.6462158371286194e-07, "loss": 0.8158, "step": 6761 }, { "epoch": 0.92, "learning_rate": 3.6344783014649054e-07, "loss": 0.8411, "step": 6762 }, { "epoch": 0.92, "learning_rate": 3.6227593387452743e-07, "loss": 0.8032, "step": 6763 }, { "epoch": 0.92, "learning_rate": 3.6110589512284076e-07, "loss": 0.8178, "step": 6764 }, { "epoch": 0.92, "learning_rate": 3.599377141169336e-07, "loss": 0.8287, "step": 6765 }, { "epoch": 0.92, "learning_rate": 3.587713910819568e-07, "loss": 0.7287, "step": 6766 }, { "epoch": 0.92, "learning_rate": 3.5760692624269956e-07, "loss": 0.8423, "step": 6767 }, { "epoch": 0.92, "learning_rate": 3.564443198235945e-07, "loss": 0.7728, "step": 6768 }, { "epoch": 0.92, "learning_rate": 3.5528357204871686e-07, "loss": 0.8093, "step": 6769 }, { "epoch": 0.92, "learning_rate": 3.541246831417811e-07, "loss": 0.8461, "step": 6770 }, { "epoch": 0.92, "learning_rate": 3.5296765332614615e-07, "loss": 0.844, "step": 6771 }, { "epoch": 0.92, "learning_rate": 3.5181248282480815e-07, "loss": 0.8569, "step": 6772 }, { "epoch": 0.92, "learning_rate": 3.506591718604124e-07, "loss": 0.8874, "step": 6773 }, { "epoch": 0.92, "learning_rate": 3.4950772065523996e-07, "loss": 0.8763, "step": 6774 }, { "epoch": 0.92, "learning_rate": 3.483581294312155e-07, "loss": 0.8142, "step": 6775 }, { "epoch": 0.92, "learning_rate": 3.472103984099029e-07, "loss": 0.8347, "step": 6776 }, { "epoch": 0.92, "learning_rate": 3.4606452781250966e-07, "loss": 0.8312, "step": 6777 }, { "epoch": 0.92, "learning_rate": 3.449205178598869e-07, "loss": 0.8684, "step": 6778 }, { "epoch": 0.92, "learning_rate": 3.4377836877252156e-07, "loss": 0.8845, "step": 6779 }, { "epoch": 0.92, "learning_rate": 3.426380807705476e-07, "loss": 0.8971, "step": 6780 }, { "epoch": 0.92, "learning_rate": 3.4149965407373474e-07, "loss": 0.8044, "step": 6781 }, { "epoch": 0.92, "learning_rate": 3.403630889014986e-07, "loss": 0.8392, "step": 6782 }, { "epoch": 0.92, "learning_rate": 3.3922838547289507e-07, "loss": 0.8344, "step": 6783 }, { "epoch": 0.92, "learning_rate": 3.380955440066203e-07, "loss": 0.8721, "step": 6784 }, { "epoch": 0.92, "learning_rate": 3.369645647210096e-07, "loss": 0.8319, "step": 6785 }, { "epoch": 0.92, "learning_rate": 3.358354478340431e-07, "loss": 0.8611, "step": 6786 }, { "epoch": 0.92, "learning_rate": 3.3470819356334003e-07, "loss": 0.7999, "step": 6787 }, { "epoch": 0.92, "learning_rate": 3.335828021261622e-07, "loss": 0.8706, "step": 6788 }, { "epoch": 0.92, "learning_rate": 3.324592737394083e-07, "loss": 0.8154, "step": 6789 }, { "epoch": 0.92, "learning_rate": 3.3133760861962404e-07, "loss": 0.8748, "step": 6790 }, { "epoch": 0.92, "learning_rate": 3.302178069829909e-07, "loss": 0.7973, "step": 6791 }, { "epoch": 0.92, "learning_rate": 3.29099869045334e-07, "loss": 0.7202, "step": 6792 }, { "epoch": 0.92, "learning_rate": 3.279837950221176e-07, "loss": 0.8582, "step": 6793 }, { "epoch": 0.92, "learning_rate": 3.2686958512844867e-07, "loss": 0.8501, "step": 6794 }, { "epoch": 0.92, "learning_rate": 3.25757239579072e-07, "loss": 0.7897, "step": 6795 }, { "epoch": 0.92, "learning_rate": 3.24646758588375e-07, "loss": 0.8523, "step": 6796 }, { "epoch": 0.92, "learning_rate": 3.235381423703865e-07, "loss": 0.8474, "step": 6797 }, { "epoch": 0.92, "learning_rate": 3.224313911387755e-07, "loss": 0.8473, "step": 6798 }, { "epoch": 0.92, "learning_rate": 3.2132650510684924e-07, "loss": 0.7914, "step": 6799 }, { "epoch": 0.92, "learning_rate": 3.202234844875574e-07, "loss": 0.7803, "step": 6800 }, { "epoch": 0.92, "learning_rate": 3.19122329493492e-07, "loss": 0.8627, "step": 6801 }, { "epoch": 0.92, "learning_rate": 3.1802304033688004e-07, "loss": 0.8842, "step": 6802 }, { "epoch": 0.92, "learning_rate": 3.169256172295954e-07, "loss": 0.8192, "step": 6803 }, { "epoch": 0.92, "learning_rate": 3.1583006038314767e-07, "loss": 0.8389, "step": 6804 }, { "epoch": 0.92, "learning_rate": 3.1473637000868694e-07, "loss": 0.8289, "step": 6805 }, { "epoch": 0.92, "learning_rate": 3.136445463170079e-07, "loss": 0.8068, "step": 6806 }, { "epoch": 0.92, "learning_rate": 3.1255458951854113e-07, "loss": 0.8364, "step": 6807 }, { "epoch": 0.92, "learning_rate": 3.114664998233585e-07, "loss": 0.8161, "step": 6808 }, { "epoch": 0.92, "learning_rate": 3.103802774411702e-07, "loss": 0.7577, "step": 6809 }, { "epoch": 0.92, "learning_rate": 3.0929592258133303e-07, "loss": 0.7875, "step": 6810 }, { "epoch": 0.92, "learning_rate": 3.0821343545283657e-07, "loss": 0.8168, "step": 6811 }, { "epoch": 0.92, "learning_rate": 3.071328162643139e-07, "loss": 0.8065, "step": 6812 }, { "epoch": 0.92, "learning_rate": 3.0605406522403624e-07, "loss": 0.8004, "step": 6813 }, { "epoch": 0.92, "learning_rate": 3.0497718253991724e-07, "loss": 0.7936, "step": 6814 }, { "epoch": 0.92, "learning_rate": 3.0390216841950873e-07, "loss": 0.8469, "step": 6815 }, { "epoch": 0.92, "learning_rate": 3.0282902307000375e-07, "loss": 0.7927, "step": 6816 }, { "epoch": 0.92, "learning_rate": 3.0175774669823356e-07, "loss": 0.8395, "step": 6817 }, { "epoch": 0.92, "learning_rate": 3.0068833951066747e-07, "loss": 0.8123, "step": 6818 }, { "epoch": 0.92, "learning_rate": 2.996208017134217e-07, "loss": 0.851, "step": 6819 }, { "epoch": 0.92, "learning_rate": 2.9855513351224494e-07, "loss": 0.8391, "step": 6820 }, { "epoch": 0.92, "learning_rate": 2.974913351125275e-07, "loss": 0.9114, "step": 6821 }, { "epoch": 0.92, "learning_rate": 2.964294067193008e-07, "loss": 0.8258, "step": 6822 }, { "epoch": 0.92, "learning_rate": 2.953693485372333e-07, "loss": 0.8568, "step": 6823 }, { "epoch": 0.92, "learning_rate": 2.9431116077063726e-07, "loss": 0.8393, "step": 6824 }, { "epoch": 0.93, "learning_rate": 2.9325484362345945e-07, "loss": 0.8486, "step": 6825 }, { "epoch": 0.93, "learning_rate": 2.922003972992904e-07, "loss": 0.832, "step": 6826 }, { "epoch": 0.93, "learning_rate": 2.9114782200135525e-07, "loss": 0.7729, "step": 6827 }, { "epoch": 0.93, "learning_rate": 2.9009711793252516e-07, "loss": 0.8275, "step": 6828 }, { "epoch": 0.93, "learning_rate": 2.8904828529530473e-07, "loss": 0.8272, "step": 6829 }, { "epoch": 0.93, "learning_rate": 2.8800132429184004e-07, "loss": 0.8079, "step": 6830 }, { "epoch": 0.93, "learning_rate": 2.8695623512391634e-07, "loss": 0.8767, "step": 6831 }, { "epoch": 0.93, "learning_rate": 2.859130179929581e-07, "loss": 0.8078, "step": 6832 }, { "epoch": 0.93, "learning_rate": 2.8487167310002894e-07, "loss": 0.8268, "step": 6833 }, { "epoch": 0.93, "learning_rate": 2.838322006458327e-07, "loss": 0.8249, "step": 6834 }, { "epoch": 0.93, "learning_rate": 2.8279460083071255e-07, "loss": 0.8658, "step": 6835 }, { "epoch": 0.93, "learning_rate": 2.817588738546473e-07, "loss": 0.8424, "step": 6836 }, { "epoch": 0.93, "learning_rate": 2.807250199172573e-07, "loss": 0.8853, "step": 6837 }, { "epoch": 0.93, "learning_rate": 2.79693039217801e-07, "loss": 0.7838, "step": 6838 }, { "epoch": 0.93, "learning_rate": 2.7866293195517923e-07, "loss": 0.8057, "step": 6839 }, { "epoch": 0.93, "learning_rate": 2.7763469832792767e-07, "loss": 0.7929, "step": 6840 }, { "epoch": 0.93, "learning_rate": 2.766083385342222e-07, "loss": 0.8965, "step": 6841 }, { "epoch": 0.93, "learning_rate": 2.755838527718757e-07, "loss": 0.8552, "step": 6842 }, { "epoch": 0.93, "learning_rate": 2.745612412383447e-07, "loss": 0.8659, "step": 6843 }, { "epoch": 0.93, "learning_rate": 2.735405041307215e-07, "loss": 0.7805, "step": 6844 }, { "epoch": 0.93, "learning_rate": 2.725216416457344e-07, "loss": 0.7712, "step": 6845 }, { "epoch": 0.93, "learning_rate": 2.7150465397975613e-07, "loss": 0.8315, "step": 6846 }, { "epoch": 0.93, "learning_rate": 2.7048954132879115e-07, "loss": 0.7735, "step": 6847 }, { "epoch": 0.93, "learning_rate": 2.6947630388849175e-07, "loss": 0.8773, "step": 6848 }, { "epoch": 0.93, "learning_rate": 2.6846494185414076e-07, "loss": 0.8481, "step": 6849 }, { "epoch": 0.93, "learning_rate": 2.674554554206621e-07, "loss": 0.848, "step": 6850 }, { "epoch": 0.93, "learning_rate": 2.6644784478261797e-07, "loss": 0.8615, "step": 6851 }, { "epoch": 0.93, "learning_rate": 2.6544211013421084e-07, "loss": 0.791, "step": 6852 }, { "epoch": 0.93, "learning_rate": 2.644382516692812e-07, "loss": 0.768, "step": 6853 }, { "epoch": 0.93, "learning_rate": 2.634362695813053e-07, "loss": 0.773, "step": 6854 }, { "epoch": 0.93, "learning_rate": 2.624361640633999e-07, "loss": 0.8516, "step": 6855 }, { "epoch": 0.93, "learning_rate": 2.6143793530831853e-07, "loss": 0.8486, "step": 6856 }, { "epoch": 0.93, "learning_rate": 2.604415835084562e-07, "loss": 0.8677, "step": 6857 }, { "epoch": 0.93, "learning_rate": 2.594471088558437e-07, "loss": 0.817, "step": 6858 }, { "epoch": 0.93, "learning_rate": 2.5845451154214994e-07, "loss": 0.7704, "step": 6859 }, { "epoch": 0.93, "learning_rate": 2.57463791758682e-07, "loss": 0.839, "step": 6860 }, { "epoch": 0.93, "learning_rate": 2.56474949696387e-07, "loss": 0.8318, "step": 6861 }, { "epoch": 0.93, "learning_rate": 2.5548798554584695e-07, "loss": 0.8023, "step": 6862 }, { "epoch": 0.93, "learning_rate": 2.5450289949728536e-07, "loss": 0.92, "step": 6863 }, { "epoch": 0.93, "learning_rate": 2.5351969174056133e-07, "loss": 0.7912, "step": 6864 }, { "epoch": 0.93, "learning_rate": 2.525383624651723e-07, "loss": 0.8927, "step": 6865 }, { "epoch": 0.93, "learning_rate": 2.515589118602557e-07, "loss": 0.8561, "step": 6866 }, { "epoch": 0.93, "learning_rate": 2.50581340114584e-07, "loss": 0.8178, "step": 6867 }, { "epoch": 0.93, "learning_rate": 2.496056474165687e-07, "loss": 0.8321, "step": 6868 }, { "epoch": 0.93, "learning_rate": 2.4863183395425816e-07, "loss": 0.8127, "step": 6869 }, { "epoch": 0.93, "learning_rate": 2.4765989991534344e-07, "loss": 0.8613, "step": 6870 }, { "epoch": 0.93, "learning_rate": 2.466898454871469e-07, "loss": 0.8479, "step": 6871 }, { "epoch": 0.93, "learning_rate": 2.4572167085663124e-07, "loss": 0.7997, "step": 6872 }, { "epoch": 0.93, "learning_rate": 2.4475537621039715e-07, "loss": 0.8451, "step": 6873 }, { "epoch": 0.93, "learning_rate": 2.4379096173468343e-07, "loss": 0.8501, "step": 6874 }, { "epoch": 0.93, "learning_rate": 2.4282842761536586e-07, "loss": 0.8047, "step": 6875 }, { "epoch": 0.93, "learning_rate": 2.4186777403795714e-07, "loss": 0.8425, "step": 6876 }, { "epoch": 0.93, "learning_rate": 2.409090011876081e-07, "loss": 0.8196, "step": 6877 }, { "epoch": 0.93, "learning_rate": 2.399521092491075e-07, "loss": 0.811, "step": 6878 }, { "epoch": 0.93, "learning_rate": 2.3899709840688124e-07, "loss": 0.7934, "step": 6879 }, { "epoch": 0.93, "learning_rate": 2.3804396884499313e-07, "loss": 0.7988, "step": 6880 }, { "epoch": 0.93, "learning_rate": 2.3709272074714408e-07, "loss": 0.7859, "step": 6881 }, { "epoch": 0.93, "learning_rate": 2.361433542966718e-07, "loss": 0.8652, "step": 6882 }, { "epoch": 0.93, "learning_rate": 2.3519586967655217e-07, "loss": 0.8342, "step": 6883 }, { "epoch": 0.93, "learning_rate": 2.3425026706939692e-07, "loss": 0.8762, "step": 6884 }, { "epoch": 0.93, "learning_rate": 2.333065466574569e-07, "loss": 0.8268, "step": 6885 }, { "epoch": 0.93, "learning_rate": 2.3236470862261996e-07, "loss": 0.7855, "step": 6886 }, { "epoch": 0.93, "learning_rate": 2.3142475314640867e-07, "loss": 0.8692, "step": 6887 }, { "epoch": 0.93, "learning_rate": 2.30486680409987e-07, "loss": 0.7871, "step": 6888 }, { "epoch": 0.93, "learning_rate": 2.2955049059415258e-07, "loss": 0.9185, "step": 6889 }, { "epoch": 0.93, "learning_rate": 2.2861618387934213e-07, "loss": 0.8405, "step": 6890 }, { "epoch": 0.93, "learning_rate": 2.2768376044562834e-07, "loss": 0.7703, "step": 6891 }, { "epoch": 0.93, "learning_rate": 2.2675322047271963e-07, "loss": 0.8055, "step": 6892 }, { "epoch": 0.93, "learning_rate": 2.258245641399648e-07, "loss": 0.8315, "step": 6893 }, { "epoch": 0.93, "learning_rate": 2.248977916263473e-07, "loss": 0.8881, "step": 6894 }, { "epoch": 0.93, "learning_rate": 2.2397290311048868e-07, "loss": 0.7871, "step": 6895 }, { "epoch": 0.93, "learning_rate": 2.2304989877064643e-07, "loss": 0.8638, "step": 6896 }, { "epoch": 0.93, "learning_rate": 2.2212877878471372e-07, "loss": 0.8615, "step": 6897 }, { "epoch": 0.93, "learning_rate": 2.2120954333022304e-07, "loss": 0.7755, "step": 6898 }, { "epoch": 0.94, "learning_rate": 2.2029219258434376e-07, "loss": 0.8721, "step": 6899 }, { "epoch": 0.94, "learning_rate": 2.193767267238789e-07, "loss": 0.8182, "step": 6900 }, { "epoch": 0.94, "learning_rate": 2.1846314592527172e-07, "loss": 0.8566, "step": 6901 }, { "epoch": 0.94, "learning_rate": 2.1755145036459814e-07, "loss": 0.7991, "step": 6902 }, { "epoch": 0.94, "learning_rate": 2.1664164021757638e-07, "loss": 0.9166, "step": 6903 }, { "epoch": 0.94, "learning_rate": 2.1573371565955736e-07, "loss": 0.824, "step": 6904 }, { "epoch": 0.94, "learning_rate": 2.1482767686552774e-07, "loss": 0.8568, "step": 6905 }, { "epoch": 0.94, "learning_rate": 2.139235240101134e-07, "loss": 0.8076, "step": 6906 }, { "epoch": 0.94, "learning_rate": 2.1302125726757383e-07, "loss": 0.8449, "step": 6907 }, { "epoch": 0.94, "learning_rate": 2.1212087681180993e-07, "loss": 0.8272, "step": 6908 }, { "epoch": 0.94, "learning_rate": 2.112223828163551e-07, "loss": 0.8455, "step": 6909 }, { "epoch": 0.94, "learning_rate": 2.103257754543786e-07, "loss": 0.8029, "step": 6910 }, { "epoch": 0.94, "learning_rate": 2.0943105489868666e-07, "loss": 0.8222, "step": 6911 }, { "epoch": 0.94, "learning_rate": 2.0853822132172574e-07, "loss": 0.8504, "step": 6912 }, { "epoch": 0.94, "learning_rate": 2.076472748955727e-07, "loss": 0.8182, "step": 6913 }, { "epoch": 0.94, "learning_rate": 2.0675821579194567e-07, "loss": 0.7834, "step": 6914 }, { "epoch": 0.94, "learning_rate": 2.058710441821954e-07, "loss": 0.8253, "step": 6915 }, { "epoch": 0.94, "learning_rate": 2.0498576023731064e-07, "loss": 0.7785, "step": 6916 }, { "epoch": 0.94, "learning_rate": 2.0410236412791606e-07, "loss": 0.8501, "step": 6917 }, { "epoch": 0.94, "learning_rate": 2.032208560242732e-07, "loss": 0.8232, "step": 6918 }, { "epoch": 0.94, "learning_rate": 2.0234123609627732e-07, "loss": 0.8201, "step": 6919 }, { "epoch": 0.94, "learning_rate": 2.0146350451346275e-07, "loss": 0.8193, "step": 6920 }, { "epoch": 0.94, "learning_rate": 2.0058766144499642e-07, "loss": 0.8223, "step": 6921 }, { "epoch": 0.94, "learning_rate": 1.9971370705968663e-07, "loss": 0.8342, "step": 6922 }, { "epoch": 0.94, "learning_rate": 1.9884164152597307e-07, "loss": 0.8148, "step": 6923 }, { "epoch": 0.94, "learning_rate": 1.9797146501193243e-07, "loss": 0.7939, "step": 6924 }, { "epoch": 0.94, "learning_rate": 1.971031776852772e-07, "loss": 0.8342, "step": 6925 }, { "epoch": 0.94, "learning_rate": 1.9623677971335464e-07, "loss": 0.8405, "step": 6926 }, { "epoch": 0.94, "learning_rate": 1.9537227126315338e-07, "loss": 0.832, "step": 6927 }, { "epoch": 0.94, "learning_rate": 1.9450965250129127e-07, "loss": 0.7946, "step": 6928 }, { "epoch": 0.94, "learning_rate": 1.936489235940242e-07, "loss": 0.8265, "step": 6929 }, { "epoch": 0.94, "learning_rate": 1.9279008470724502e-07, "loss": 0.8715, "step": 6930 }, { "epoch": 0.94, "learning_rate": 1.9193313600648244e-07, "loss": 0.7975, "step": 6931 }, { "epoch": 0.94, "learning_rate": 1.910780776568977e-07, "loss": 0.8433, "step": 6932 }, { "epoch": 0.94, "learning_rate": 1.9022490982329221e-07, "loss": 0.8264, "step": 6933 }, { "epoch": 0.94, "learning_rate": 1.8937363267009901e-07, "loss": 0.8614, "step": 6934 }, { "epoch": 0.94, "learning_rate": 1.88524246361389e-07, "loss": 0.7763, "step": 6935 }, { "epoch": 0.94, "learning_rate": 1.876767510608679e-07, "loss": 0.8131, "step": 6936 }, { "epoch": 0.94, "learning_rate": 1.8683114693187731e-07, "loss": 0.85, "step": 6937 }, { "epoch": 0.94, "learning_rate": 1.8598743413739462e-07, "loss": 0.8341, "step": 6938 }, { "epoch": 0.94, "learning_rate": 1.8514561284003085e-07, "loss": 0.8238, "step": 6939 }, { "epoch": 0.94, "learning_rate": 1.8430568320203512e-07, "loss": 0.8561, "step": 6940 }, { "epoch": 0.94, "learning_rate": 1.8346764538529127e-07, "loss": 0.8263, "step": 6941 }, { "epoch": 0.94, "learning_rate": 1.8263149955131564e-07, "loss": 0.8035, "step": 6942 }, { "epoch": 0.94, "learning_rate": 1.817972458612649e-07, "loss": 0.7378, "step": 6943 }, { "epoch": 0.94, "learning_rate": 1.8096488447592598e-07, "loss": 0.8044, "step": 6944 }, { "epoch": 0.94, "learning_rate": 1.8013441555572607e-07, "loss": 0.8109, "step": 6945 }, { "epoch": 0.94, "learning_rate": 1.7930583926072275e-07, "loss": 0.8844, "step": 6946 }, { "epoch": 0.94, "learning_rate": 1.784791557506127e-07, "loss": 0.834, "step": 6947 }, { "epoch": 0.94, "learning_rate": 1.776543651847251e-07, "loss": 0.8429, "step": 6948 }, { "epoch": 0.94, "learning_rate": 1.7683146772202508e-07, "loss": 0.8415, "step": 6949 }, { "epoch": 0.94, "learning_rate": 1.760104635211146e-07, "loss": 0.7712, "step": 6950 }, { "epoch": 0.94, "learning_rate": 1.7519135274022824e-07, "loss": 0.8754, "step": 6951 }, { "epoch": 0.94, "learning_rate": 1.7437413553723749e-07, "loss": 0.8524, "step": 6952 }, { "epoch": 0.94, "learning_rate": 1.7355881206964742e-07, "loss": 0.8117, "step": 6953 }, { "epoch": 0.94, "learning_rate": 1.7274538249460015e-07, "loss": 0.8381, "step": 6954 }, { "epoch": 0.94, "learning_rate": 1.719338469688714e-07, "loss": 0.8361, "step": 6955 }, { "epoch": 0.94, "learning_rate": 1.7112420564887046e-07, "loss": 0.7829, "step": 6956 }, { "epoch": 0.94, "learning_rate": 1.703164586906436e-07, "loss": 0.869, "step": 6957 }, { "epoch": 0.94, "learning_rate": 1.6951060624987082e-07, "loss": 0.7521, "step": 6958 }, { "epoch": 0.94, "learning_rate": 1.6870664848186891e-07, "loss": 0.8638, "step": 6959 }, { "epoch": 0.94, "learning_rate": 1.6790458554158728e-07, "loss": 0.8124, "step": 6960 }, { "epoch": 0.94, "learning_rate": 1.6710441758361117e-07, "loss": 0.8258, "step": 6961 }, { "epoch": 0.94, "learning_rate": 1.6630614476216056e-07, "loss": 0.7698, "step": 6962 }, { "epoch": 0.94, "learning_rate": 1.6550976723109013e-07, "loss": 0.835, "step": 6963 }, { "epoch": 0.94, "learning_rate": 1.6471528514388824e-07, "loss": 0.8746, "step": 6964 }, { "epoch": 0.94, "learning_rate": 1.6392269865368015e-07, "loss": 0.8606, "step": 6965 }, { "epoch": 0.94, "learning_rate": 1.631320079132237e-07, "loss": 0.8332, "step": 6966 }, { "epoch": 0.94, "learning_rate": 1.623432130749125e-07, "loss": 0.8549, "step": 6967 }, { "epoch": 0.94, "learning_rate": 1.6155631429077389e-07, "loss": 0.8117, "step": 6968 }, { "epoch": 0.94, "learning_rate": 1.6077131171247096e-07, "loss": 0.8447, "step": 6969 }, { "epoch": 0.94, "learning_rate": 1.5998820549130046e-07, "loss": 0.8202, "step": 6970 }, { "epoch": 0.94, "learning_rate": 1.5920699577819388e-07, "loss": 0.8366, "step": 6971 }, { "epoch": 0.94, "learning_rate": 1.5842768272371523e-07, "loss": 0.8135, "step": 6972 }, { "epoch": 0.95, "learning_rate": 1.576502664780688e-07, "loss": 0.7665, "step": 6973 }, { "epoch": 0.95, "learning_rate": 1.5687474719108586e-07, "loss": 0.8345, "step": 6974 }, { "epoch": 0.95, "learning_rate": 1.5610112501223796e-07, "loss": 0.8571, "step": 6975 }, { "epoch": 0.95, "learning_rate": 1.553294000906269e-07, "loss": 0.8307, "step": 6976 }, { "epoch": 0.95, "learning_rate": 1.5455957257499043e-07, "loss": 0.7859, "step": 6977 }, { "epoch": 0.95, "learning_rate": 1.5379164261370317e-07, "loss": 0.8512, "step": 6978 }, { "epoch": 0.95, "learning_rate": 1.5302561035477003e-07, "loss": 0.8292, "step": 6979 }, { "epoch": 0.95, "learning_rate": 1.5226147594583073e-07, "loss": 0.7969, "step": 6980 }, { "epoch": 0.95, "learning_rate": 1.5149923953416078e-07, "loss": 0.7618, "step": 6981 }, { "epoch": 0.95, "learning_rate": 1.5073890126667156e-07, "loss": 0.8547, "step": 6982 }, { "epoch": 0.95, "learning_rate": 1.4998046128990362e-07, "loss": 0.8388, "step": 6983 }, { "epoch": 0.95, "learning_rate": 1.492239197500356e-07, "loss": 0.7907, "step": 6984 }, { "epoch": 0.95, "learning_rate": 1.4846927679287747e-07, "loss": 0.8345, "step": 6985 }, { "epoch": 0.95, "learning_rate": 1.477165325638763e-07, "loss": 0.8027, "step": 6986 }, { "epoch": 0.95, "learning_rate": 1.4696568720811266e-07, "loss": 0.8274, "step": 6987 }, { "epoch": 0.95, "learning_rate": 1.462167408702986e-07, "loss": 0.8248, "step": 6988 }, { "epoch": 0.95, "learning_rate": 1.4546969369478191e-07, "loss": 0.755, "step": 6989 }, { "epoch": 0.95, "learning_rate": 1.4472454582554418e-07, "loss": 0.7475, "step": 6990 }, { "epoch": 0.95, "learning_rate": 1.439812974062016e-07, "loss": 0.8074, "step": 6991 }, { "epoch": 0.95, "learning_rate": 1.43239948580004e-07, "loss": 0.8073, "step": 6992 }, { "epoch": 0.95, "learning_rate": 1.4250049948983491e-07, "loss": 0.7822, "step": 6993 }, { "epoch": 0.95, "learning_rate": 1.417629502782092e-07, "loss": 0.824, "step": 6994 }, { "epoch": 0.95, "learning_rate": 1.410273010872798e-07, "loss": 0.8075, "step": 6995 }, { "epoch": 0.95, "learning_rate": 1.4029355205883222e-07, "loss": 0.9294, "step": 6996 }, { "epoch": 0.95, "learning_rate": 1.3956170333428332e-07, "loss": 0.8531, "step": 6997 }, { "epoch": 0.95, "learning_rate": 1.3883175505468693e-07, "loss": 0.8473, "step": 6998 }, { "epoch": 0.95, "learning_rate": 1.381037073607272e-07, "loss": 0.8457, "step": 6999 }, { "epoch": 0.95, "learning_rate": 1.3737756039272632e-07, "loss": 0.8836, "step": 7000 }, { "epoch": 0.95, "learning_rate": 1.3665331429063678e-07, "loss": 0.8615, "step": 7001 }, { "epoch": 0.95, "learning_rate": 1.359309691940458e-07, "loss": 0.8082, "step": 7002 }, { "epoch": 0.95, "learning_rate": 1.3521052524217315e-07, "loss": 0.8044, "step": 7003 }, { "epoch": 0.95, "learning_rate": 1.344919825738733e-07, "loss": 0.7977, "step": 7004 }, { "epoch": 0.95, "learning_rate": 1.3377534132763548e-07, "loss": 0.8713, "step": 7005 }, { "epoch": 0.95, "learning_rate": 1.330606016415803e-07, "loss": 0.8604, "step": 7006 }, { "epoch": 0.95, "learning_rate": 1.3234776365346313e-07, "loss": 0.811, "step": 7007 }, { "epoch": 0.95, "learning_rate": 1.3163682750066964e-07, "loss": 0.8298, "step": 7008 }, { "epoch": 0.95, "learning_rate": 1.3092779332022465e-07, "loss": 0.776, "step": 7009 }, { "epoch": 0.95, "learning_rate": 1.3022066124878218e-07, "loss": 0.814, "step": 7010 }, { "epoch": 0.95, "learning_rate": 1.2951543142263101e-07, "loss": 0.8257, "step": 7011 }, { "epoch": 0.95, "learning_rate": 1.2881210397769461e-07, "loss": 0.8688, "step": 7012 }, { "epoch": 0.95, "learning_rate": 1.2811067904952567e-07, "loss": 0.8378, "step": 7013 }, { "epoch": 0.95, "learning_rate": 1.2741115677331383e-07, "loss": 0.8226, "step": 7014 }, { "epoch": 0.95, "learning_rate": 1.2671353728388237e-07, "loss": 0.8131, "step": 7015 }, { "epoch": 0.95, "learning_rate": 1.260178207156848e-07, "loss": 0.8354, "step": 7016 }, { "epoch": 0.95, "learning_rate": 1.2532400720281057e-07, "loss": 0.8395, "step": 7017 }, { "epoch": 0.95, "learning_rate": 1.2463209687898047e-07, "loss": 0.814, "step": 7018 }, { "epoch": 0.95, "learning_rate": 1.2394208987755008e-07, "loss": 0.8862, "step": 7019 }, { "epoch": 0.95, "learning_rate": 1.2325398633150742e-07, "loss": 0.8385, "step": 7020 }, { "epoch": 0.95, "learning_rate": 1.2256778637347422e-07, "loss": 0.8581, "step": 7021 }, { "epoch": 0.95, "learning_rate": 1.2188349013570356e-07, "loss": 0.8119, "step": 7022 }, { "epoch": 0.95, "learning_rate": 1.2120109775008215e-07, "loss": 0.7355, "step": 7023 }, { "epoch": 0.95, "learning_rate": 1.205206093481337e-07, "loss": 0.852, "step": 7024 }, { "epoch": 0.95, "learning_rate": 1.1984202506100883e-07, "loss": 0.846, "step": 7025 }, { "epoch": 0.95, "learning_rate": 1.1916534501949406e-07, "loss": 0.8101, "step": 7026 }, { "epoch": 0.95, "learning_rate": 1.184905693540106e-07, "loss": 0.78, "step": 7027 }, { "epoch": 0.95, "learning_rate": 1.1781769819460887e-07, "loss": 0.7813, "step": 7028 }, { "epoch": 0.95, "learning_rate": 1.1714673167097624e-07, "loss": 0.8062, "step": 7029 }, { "epoch": 0.95, "learning_rate": 1.1647766991243037e-07, "loss": 0.8591, "step": 7030 }, { "epoch": 0.95, "learning_rate": 1.1581051304792146e-07, "loss": 0.7786, "step": 7031 }, { "epoch": 0.95, "learning_rate": 1.1514526120603331e-07, "loss": 0.8188, "step": 7032 }, { "epoch": 0.95, "learning_rate": 1.1448191451498448e-07, "loss": 0.8697, "step": 7033 }, { "epoch": 0.95, "learning_rate": 1.1382047310262379e-07, "loss": 0.8125, "step": 7034 }, { "epoch": 0.95, "learning_rate": 1.1316093709643372e-07, "loss": 0.8084, "step": 7035 }, { "epoch": 0.95, "learning_rate": 1.1250330662352926e-07, "loss": 0.8424, "step": 7036 }, { "epoch": 0.95, "learning_rate": 1.1184758181065902e-07, "loss": 0.8541, "step": 7037 }, { "epoch": 0.95, "learning_rate": 1.1119376278420301e-07, "loss": 0.8362, "step": 7038 }, { "epoch": 0.95, "learning_rate": 1.105418496701749e-07, "loss": 0.7839, "step": 7039 }, { "epoch": 0.95, "learning_rate": 1.0989184259422081e-07, "loss": 0.8385, "step": 7040 }, { "epoch": 0.95, "learning_rate": 1.0924374168161833e-07, "loss": 0.8235, "step": 7041 }, { "epoch": 0.95, "learning_rate": 1.0859754705728087e-07, "loss": 0.8398, "step": 7042 }, { "epoch": 0.95, "learning_rate": 1.0795325884575103e-07, "loss": 0.8742, "step": 7043 }, { "epoch": 0.95, "learning_rate": 1.0731087717120503e-07, "loss": 0.8127, "step": 7044 }, { "epoch": 0.95, "learning_rate": 1.0667040215745272e-07, "loss": 0.803, "step": 7045 }, { "epoch": 0.96, "learning_rate": 1.0603183392793536e-07, "loss": 0.8439, "step": 7046 }, { "epoch": 0.96, "learning_rate": 1.0539517260572562e-07, "loss": 0.8338, "step": 7047 }, { "epoch": 0.96, "learning_rate": 1.0476041831353201e-07, "loss": 0.8486, "step": 7048 }, { "epoch": 0.96, "learning_rate": 1.0412757117369222e-07, "loss": 0.8435, "step": 7049 }, { "epoch": 0.96, "learning_rate": 1.0349663130817866e-07, "loss": 0.7974, "step": 7050 }, { "epoch": 0.96, "learning_rate": 1.0286759883859298e-07, "loss": 0.8335, "step": 7051 }, { "epoch": 0.96, "learning_rate": 1.0224047388617375e-07, "loss": 0.7892, "step": 7052 }, { "epoch": 0.96, "learning_rate": 1.0161525657178872e-07, "loss": 0.8534, "step": 7053 }, { "epoch": 0.96, "learning_rate": 1.0099194701593817e-07, "loss": 0.8054, "step": 7054 }, { "epoch": 0.96, "learning_rate": 1.0037054533875601e-07, "loss": 0.8422, "step": 7055 }, { "epoch": 0.96, "learning_rate": 9.975105166000642e-08, "loss": 0.798, "step": 7056 }, { "epoch": 0.96, "learning_rate": 9.913346609908836e-08, "loss": 0.8621, "step": 7057 }, { "epoch": 0.96, "learning_rate": 9.851778877503215e-08, "loss": 0.825, "step": 7058 }, { "epoch": 0.96, "learning_rate": 9.790401980649844e-08, "loss": 0.8764, "step": 7059 }, { "epoch": 0.96, "learning_rate": 9.729215931178149e-08, "loss": 0.7969, "step": 7060 }, { "epoch": 0.96, "learning_rate": 9.668220740881029e-08, "loss": 0.825, "step": 7061 }, { "epoch": 0.96, "learning_rate": 9.607416421514081e-08, "loss": 0.8391, "step": 7062 }, { "epoch": 0.96, "learning_rate": 9.546802984796489e-08, "loss": 0.8522, "step": 7063 }, { "epoch": 0.96, "learning_rate": 9.48638044241057e-08, "loss": 0.7452, "step": 7064 }, { "epoch": 0.96, "learning_rate": 9.426148806001789e-08, "loss": 0.799, "step": 7065 }, { "epoch": 0.96, "learning_rate": 9.36610808717886e-08, "loss": 0.8871, "step": 7066 }, { "epoch": 0.96, "learning_rate": 9.306258297513637e-08, "loss": 0.8231, "step": 7067 }, { "epoch": 0.96, "learning_rate": 9.246599448541337e-08, "loss": 0.8866, "step": 7068 }, { "epoch": 0.96, "learning_rate": 9.18713155176021e-08, "loss": 0.797, "step": 7069 }, { "epoch": 0.96, "learning_rate": 9.127854618631637e-08, "loss": 0.747, "step": 7070 }, { "epoch": 0.96, "learning_rate": 9.068768660580595e-08, "loss": 0.8488, "step": 7071 }, { "epoch": 0.96, "learning_rate": 9.009873688994753e-08, "loss": 0.8482, "step": 7072 }, { "epoch": 0.96, "learning_rate": 8.951169715225249e-08, "loss": 0.8773, "step": 7073 }, { "epoch": 0.96, "learning_rate": 8.89265675058637e-08, "loss": 0.802, "step": 7074 }, { "epoch": 0.96, "learning_rate": 8.834334806355649e-08, "loss": 0.781, "step": 7075 }, { "epoch": 0.96, "learning_rate": 8.776203893773539e-08, "loss": 0.845, "step": 7076 }, { "epoch": 0.96, "learning_rate": 8.718264024044077e-08, "loss": 0.8725, "step": 7077 }, { "epoch": 0.96, "learning_rate": 8.660515208334108e-08, "loss": 0.7933, "step": 7078 }, { "epoch": 0.96, "learning_rate": 8.602957457773842e-08, "loss": 0.8144, "step": 7079 }, { "epoch": 0.96, "learning_rate": 8.545590783456625e-08, "loss": 0.8044, "step": 7080 }, { "epoch": 0.96, "learning_rate": 8.488415196439059e-08, "loss": 0.8246, "step": 7081 }, { "epoch": 0.96, "learning_rate": 8.431430707740773e-08, "loss": 0.8332, "step": 7082 }, { "epoch": 0.96, "learning_rate": 8.374637328344648e-08, "loss": 0.833, "step": 7083 }, { "epoch": 0.96, "learning_rate": 8.318035069196817e-08, "loss": 0.7843, "step": 7084 }, { "epoch": 0.96, "learning_rate": 8.261623941206331e-08, "loss": 0.8536, "step": 7085 }, { "epoch": 0.96, "learning_rate": 8.205403955245606e-08, "loss": 0.7729, "step": 7086 }, { "epoch": 0.96, "learning_rate": 8.149375122150193e-08, "loss": 0.8758, "step": 7087 }, { "epoch": 0.96, "learning_rate": 8.09353745271868e-08, "loss": 0.8925, "step": 7088 }, { "epoch": 0.96, "learning_rate": 8.037890957713013e-08, "loss": 0.8398, "step": 7089 }, { "epoch": 0.96, "learning_rate": 7.982435647858167e-08, "loss": 0.8633, "step": 7090 }, { "epoch": 0.96, "learning_rate": 7.92717153384226e-08, "loss": 0.8352, "step": 7091 }, { "epoch": 0.96, "learning_rate": 7.872098626316438e-08, "loss": 0.8567, "step": 7092 }, { "epoch": 0.96, "learning_rate": 7.817216935895434e-08, "loss": 0.8043, "step": 7093 }, { "epoch": 0.96, "learning_rate": 7.762526473156561e-08, "loss": 0.7984, "step": 7094 }, { "epoch": 0.96, "learning_rate": 7.708027248640726e-08, "loss": 0.7929, "step": 7095 }, { "epoch": 0.96, "learning_rate": 7.653719272851745e-08, "loss": 0.8056, "step": 7096 }, { "epoch": 0.96, "learning_rate": 7.59960255625658e-08, "loss": 0.828, "step": 7097 }, { "epoch": 0.96, "learning_rate": 7.545677109285443e-08, "loss": 0.7831, "step": 7098 }, { "epoch": 0.96, "learning_rate": 7.491942942331687e-08, "loss": 0.8455, "step": 7099 }, { "epoch": 0.96, "learning_rate": 7.438400065751584e-08, "loss": 0.8549, "step": 7100 }, { "epoch": 0.96, "learning_rate": 7.385048489864765e-08, "loss": 0.8116, "step": 7101 }, { "epoch": 0.96, "learning_rate": 7.331888224953787e-08, "loss": 0.8002, "step": 7102 }, { "epoch": 0.96, "learning_rate": 7.278919281264673e-08, "loss": 0.8101, "step": 7103 }, { "epoch": 0.96, "learning_rate": 7.226141669006259e-08, "loss": 0.7692, "step": 7104 }, { "epoch": 0.96, "learning_rate": 7.173555398350518e-08, "loss": 0.8377, "step": 7105 }, { "epoch": 0.96, "learning_rate": 7.121160479432787e-08, "loss": 0.8583, "step": 7106 }, { "epoch": 0.96, "learning_rate": 7.068956922351211e-08, "loss": 0.8385, "step": 7107 }, { "epoch": 0.96, "learning_rate": 7.016944737167297e-08, "loss": 0.839, "step": 7108 }, { "epoch": 0.96, "learning_rate": 6.965123933905583e-08, "loss": 0.8651, "step": 7109 }, { "epoch": 0.96, "learning_rate": 6.913494522553632e-08, "loss": 0.8508, "step": 7110 }, { "epoch": 0.96, "learning_rate": 6.862056513062266e-08, "loss": 0.8506, "step": 7111 }, { "epoch": 0.96, "learning_rate": 6.810809915345328e-08, "loss": 0.8401, "step": 7112 }, { "epoch": 0.96, "learning_rate": 6.759754739279923e-08, "loss": 0.8312, "step": 7113 }, { "epoch": 0.96, "learning_rate": 6.708890994705952e-08, "loss": 0.8156, "step": 7114 }, { "epoch": 0.96, "learning_rate": 6.6582186914268e-08, "loss": 0.868, "step": 7115 }, { "epoch": 0.96, "learning_rate": 6.607737839208428e-08, "loss": 0.8675, "step": 7116 }, { "epoch": 0.96, "learning_rate": 6.557448447780612e-08, "loss": 0.888, "step": 7117 }, { "epoch": 0.96, "learning_rate": 6.507350526835709e-08, "loss": 0.7612, "step": 7118 }, { "epoch": 0.96, "learning_rate": 6.457444086029219e-08, "loss": 0.7992, "step": 7119 }, { "epoch": 0.97, "learning_rate": 6.40772913497989e-08, "loss": 0.8266, "step": 7120 }, { "epoch": 0.97, "learning_rate": 6.358205683269392e-08, "loss": 0.8088, "step": 7121 }, { "epoch": 0.97, "learning_rate": 6.308873740442867e-08, "loss": 0.9127, "step": 7122 }, { "epoch": 0.97, "learning_rate": 6.259733316007932e-08, "loss": 0.7669, "step": 7123 }, { "epoch": 0.97, "learning_rate": 6.21078441943601e-08, "loss": 0.8122, "step": 7124 }, { "epoch": 0.97, "learning_rate": 6.162027060160891e-08, "loss": 0.8135, "step": 7125 }, { "epoch": 0.97, "learning_rate": 6.113461247579944e-08, "loss": 0.8595, "step": 7126 }, { "epoch": 0.97, "learning_rate": 6.065086991053459e-08, "loss": 0.803, "step": 7127 }, { "epoch": 0.97, "learning_rate": 6.016904299904869e-08, "loss": 0.7641, "step": 7128 }, { "epoch": 0.97, "learning_rate": 5.968913183420521e-08, "loss": 0.846, "step": 7129 }, { "epoch": 0.97, "learning_rate": 5.921113650849908e-08, "loss": 0.813, "step": 7130 }, { "epoch": 0.97, "learning_rate": 5.87350571140588e-08, "loss": 0.8091, "step": 7131 }, { "epoch": 0.97, "learning_rate": 5.826089374263988e-08, "loss": 0.8753, "step": 7132 }, { "epoch": 0.97, "learning_rate": 5.778864648562921e-08, "loss": 0.8373, "step": 7133 }, { "epoch": 0.97, "learning_rate": 5.731831543404509e-08, "loss": 0.8356, "step": 7134 }, { "epoch": 0.97, "learning_rate": 5.684990067853835e-08, "loss": 0.8459, "step": 7135 }, { "epoch": 0.97, "learning_rate": 5.638340230938677e-08, "loss": 0.8435, "step": 7136 }, { "epoch": 0.97, "learning_rate": 5.5918820416500653e-08, "loss": 0.8776, "step": 7137 }, { "epoch": 0.97, "learning_rate": 5.54561550894217e-08, "loss": 0.8723, "step": 7138 }, { "epoch": 0.97, "learning_rate": 5.499540641731971e-08, "loss": 0.8808, "step": 7139 }, { "epoch": 0.97, "learning_rate": 5.4536574488999185e-08, "loss": 0.897, "step": 7140 }, { "epoch": 0.97, "learning_rate": 5.407965939289161e-08, "loss": 0.8132, "step": 7141 }, { "epoch": 0.97, "learning_rate": 5.3624661217059895e-08, "loss": 0.8098, "step": 7142 }, { "epoch": 0.97, "learning_rate": 5.3171580049199425e-08, "loss": 0.8263, "step": 7143 }, { "epoch": 0.97, "learning_rate": 5.2720415976631465e-08, "loss": 0.807, "step": 7144 }, { "epoch": 0.97, "learning_rate": 5.227116908631314e-08, "loss": 0.8256, "step": 7145 }, { "epoch": 0.97, "learning_rate": 5.1823839464829605e-08, "loss": 0.7592, "step": 7146 }, { "epoch": 0.97, "learning_rate": 5.1378427198396364e-08, "loss": 0.7606, "step": 7147 }, { "epoch": 0.97, "learning_rate": 5.093493237285918e-08, "loss": 0.7788, "step": 7148 }, { "epoch": 0.97, "learning_rate": 5.049335507369524e-08, "loss": 0.8799, "step": 7149 }, { "epoch": 0.97, "learning_rate": 5.005369538601201e-08, "loss": 0.7887, "step": 7150 }, { "epoch": 0.97, "learning_rate": 4.9615953394545056e-08, "loss": 0.8637, "step": 7151 }, { "epoch": 0.97, "learning_rate": 4.918012918366466e-08, "loss": 0.7901, "step": 7152 }, { "epoch": 0.97, "learning_rate": 4.874622283736807e-08, "loss": 0.7695, "step": 7153 }, { "epoch": 0.97, "learning_rate": 4.831423443928396e-08, "loss": 0.8245, "step": 7154 }, { "epoch": 0.97, "learning_rate": 4.788416407267127e-08, "loss": 0.8139, "step": 7155 }, { "epoch": 0.97, "learning_rate": 4.745601182042037e-08, "loss": 0.8373, "step": 7156 }, { "epoch": 0.97, "learning_rate": 4.702977776504858e-08, "loss": 0.8079, "step": 7157 }, { "epoch": 0.97, "learning_rate": 4.6605461988707967e-08, "loss": 0.8107, "step": 7158 }, { "epoch": 0.97, "learning_rate": 4.618306457317756e-08, "loss": 0.8488, "step": 7159 }, { "epoch": 0.97, "learning_rate": 4.57625855998689e-08, "loss": 0.8747, "step": 7160 }, { "epoch": 0.97, "learning_rate": 4.5344025149821616e-08, "loss": 0.8416, "step": 7161 }, { "epoch": 0.97, "learning_rate": 4.4927383303706716e-08, "loss": 0.8565, "step": 7162 }, { "epoch": 0.97, "learning_rate": 4.451266014182665e-08, "loss": 0.8281, "step": 7163 }, { "epoch": 0.97, "learning_rate": 4.4099855744110796e-08, "loss": 0.7411, "step": 7164 }, { "epoch": 0.97, "learning_rate": 4.36889701901233e-08, "loss": 0.8546, "step": 7165 }, { "epoch": 0.97, "learning_rate": 4.328000355905415e-08, "loss": 0.8502, "step": 7166 }, { "epoch": 0.97, "learning_rate": 4.2872955929724736e-08, "loss": 0.8062, "step": 7167 }, { "epoch": 0.97, "learning_rate": 4.2467827380588964e-08, "loss": 0.815, "step": 7168 }, { "epoch": 0.97, "learning_rate": 4.206461798972772e-08, "loss": 0.8556, "step": 7169 }, { "epoch": 0.97, "learning_rate": 4.166332783485438e-08, "loss": 0.7489, "step": 7170 }, { "epoch": 0.97, "learning_rate": 4.126395699330932e-08, "loss": 0.8278, "step": 7171 }, { "epoch": 0.97, "learning_rate": 4.0866505542066506e-08, "loss": 0.8181, "step": 7172 }, { "epoch": 0.97, "learning_rate": 4.047097355772911e-08, "loss": 0.8449, "step": 7173 }, { "epoch": 0.97, "learning_rate": 4.007736111652838e-08, "loss": 0.8655, "step": 7174 }, { "epoch": 0.97, "learning_rate": 3.968566829432807e-08, "loss": 0.909, "step": 7175 }, { "epoch": 0.97, "learning_rate": 3.929589516661891e-08, "loss": 0.8159, "step": 7176 }, { "epoch": 0.97, "learning_rate": 3.890804180852525e-08, "loss": 0.8247, "step": 7177 }, { "epoch": 0.97, "learning_rate": 3.852210829479952e-08, "loss": 0.8163, "step": 7178 }, { "epoch": 0.97, "learning_rate": 3.8138094699824435e-08, "loss": 0.8438, "step": 7179 }, { "epoch": 0.97, "learning_rate": 3.7756001097611906e-08, "loss": 0.8303, "step": 7180 }, { "epoch": 0.97, "learning_rate": 3.737582756180525e-08, "loss": 0.852, "step": 7181 }, { "epoch": 0.97, "learning_rate": 3.699757416567584e-08, "loss": 0.8095, "step": 7182 }, { "epoch": 0.97, "learning_rate": 3.6621240982127606e-08, "loss": 0.8046, "step": 7183 }, { "epoch": 0.97, "learning_rate": 3.624682808369251e-08, "loss": 0.8163, "step": 7184 }, { "epoch": 0.97, "learning_rate": 3.587433554253172e-08, "loss": 0.8137, "step": 7185 }, { "epoch": 0.97, "learning_rate": 3.5503763430437823e-08, "loss": 0.8374, "step": 7186 }, { "epoch": 0.97, "learning_rate": 3.513511181883367e-08, "loss": 0.8821, "step": 7187 }, { "epoch": 0.97, "learning_rate": 3.4768380778770204e-08, "loss": 0.8228, "step": 7188 }, { "epoch": 0.97, "learning_rate": 3.4403570380929785e-08, "loss": 0.8393, "step": 7189 }, { "epoch": 0.97, "learning_rate": 3.404068069562283e-08, "loss": 0.8024, "step": 7190 }, { "epoch": 0.97, "learning_rate": 3.367971179279006e-08, "loss": 0.8697, "step": 7191 }, { "epoch": 0.97, "learning_rate": 3.332066374200582e-08, "loss": 0.8276, "step": 7192 }, { "epoch": 0.97, "learning_rate": 3.2963536612466986e-08, "loss": 0.7983, "step": 7193 }, { "epoch": 0.98, "learning_rate": 3.2608330473007374e-08, "loss": 0.828, "step": 7194 }, { "epoch": 0.98, "learning_rate": 3.2255045392085574e-08, "loss": 0.8725, "step": 7195 }, { "epoch": 0.98, "learning_rate": 3.190368143779266e-08, "loss": 0.8179, "step": 7196 }, { "epoch": 0.98, "learning_rate": 3.155423867784779e-08, "loss": 0.8735, "step": 7197 }, { "epoch": 0.98, "learning_rate": 3.120671717960155e-08, "loss": 0.7973, "step": 7198 }, { "epoch": 0.98, "learning_rate": 3.0861117010032584e-08, "loss": 0.9121, "step": 7199 }, { "epoch": 0.98, "learning_rate": 3.051743823574982e-08, "loss": 0.8009, "step": 7200 }, { "epoch": 0.98, "learning_rate": 3.017568092299139e-08, "loss": 0.8796, "step": 7201 }, { "epoch": 0.98, "learning_rate": 2.983584513762794e-08, "loss": 0.7934, "step": 7202 }, { "epoch": 0.98, "learning_rate": 2.949793094515485e-08, "loss": 0.8192, "step": 7203 }, { "epoch": 0.98, "learning_rate": 2.916193841070114e-08, "loss": 0.7936, "step": 7204 }, { "epoch": 0.98, "learning_rate": 2.8827867599023896e-08, "loss": 0.8428, "step": 7205 }, { "epoch": 0.98, "learning_rate": 2.849571857450939e-08, "loss": 0.8318, "step": 7206 }, { "epoch": 0.98, "learning_rate": 2.8165491401176415e-08, "loss": 0.8344, "step": 7207 }, { "epoch": 0.98, "learning_rate": 2.7837186142668505e-08, "loss": 0.7933, "step": 7208 }, { "epoch": 0.98, "learning_rate": 2.751080286226171e-08, "loss": 0.8595, "step": 7209 }, { "epoch": 0.98, "learning_rate": 2.7186341622862378e-08, "loss": 0.7728, "step": 7210 }, { "epoch": 0.98, "learning_rate": 2.686380248700493e-08, "loss": 0.7807, "step": 7211 }, { "epoch": 0.98, "learning_rate": 2.6543185516852977e-08, "loss": 0.8536, "step": 7212 }, { "epoch": 0.98, "learning_rate": 2.622449077420153e-08, "loss": 0.7927, "step": 7213 }, { "epoch": 0.98, "learning_rate": 2.5907718320473674e-08, "loss": 0.8124, "step": 7214 }, { "epoch": 0.98, "learning_rate": 2.5592868216721688e-08, "loss": 0.8441, "step": 7215 }, { "epoch": 0.98, "learning_rate": 2.5279940523629253e-08, "loss": 0.8555, "step": 7216 }, { "epoch": 0.98, "learning_rate": 2.4968935301507015e-08, "loss": 0.8163, "step": 7217 }, { "epoch": 0.98, "learning_rate": 2.465985261029591e-08, "loss": 0.8898, "step": 7218 }, { "epoch": 0.98, "learning_rate": 2.4352692509569397e-08, "loss": 0.831, "step": 7219 }, { "epoch": 0.98, "learning_rate": 2.404745505852457e-08, "loss": 0.8613, "step": 7220 }, { "epoch": 0.98, "learning_rate": 2.374414031599437e-08, "loss": 0.8776, "step": 7221 }, { "epoch": 0.98, "learning_rate": 2.344274834043425e-08, "loss": 0.8331, "step": 7222 }, { "epoch": 0.98, "learning_rate": 2.314327918993664e-08, "loss": 0.9017, "step": 7223 }, { "epoch": 0.98, "learning_rate": 2.284573292221759e-08, "loss": 0.9078, "step": 7224 }, { "epoch": 0.98, "learning_rate": 2.2550109594623447e-08, "loss": 0.8032, "step": 7225 }, { "epoch": 0.98, "learning_rate": 2.2256409264133082e-08, "loss": 0.8637, "step": 7226 }, { "epoch": 0.98, "learning_rate": 2.1964631987351214e-08, "loss": 0.7649, "step": 7227 }, { "epoch": 0.98, "learning_rate": 2.167477782051286e-08, "loss": 0.8857, "step": 7228 }, { "epoch": 0.98, "learning_rate": 2.1386846819485552e-08, "loss": 0.8116, "step": 7229 }, { "epoch": 0.98, "learning_rate": 2.1100839039761566e-08, "loss": 0.8522, "step": 7230 }, { "epoch": 0.98, "learning_rate": 2.0816754536463478e-08, "loss": 0.8312, "step": 7231 }, { "epoch": 0.98, "learning_rate": 2.0534593364345267e-08, "loss": 0.8254, "step": 7232 }, { "epoch": 0.98, "learning_rate": 2.0254355577790096e-08, "loss": 0.7988, "step": 7233 }, { "epoch": 0.98, "learning_rate": 1.9976041230808097e-08, "loss": 0.8037, "step": 7234 }, { "epoch": 0.98, "learning_rate": 1.9699650377039692e-08, "loss": 0.8296, "step": 7235 }, { "epoch": 0.98, "learning_rate": 1.9425183069756716e-08, "loss": 0.8295, "step": 7236 }, { "epoch": 0.98, "learning_rate": 1.915263936185574e-08, "loss": 0.8433, "step": 7237 }, { "epoch": 0.98, "learning_rate": 1.888201930586697e-08, "loss": 0.8232, "step": 7238 }, { "epoch": 0.98, "learning_rate": 1.8613322953948688e-08, "loss": 0.8599, "step": 7239 }, { "epoch": 0.98, "learning_rate": 1.834655035788613e-08, "loss": 0.8312, "step": 7240 }, { "epoch": 0.98, "learning_rate": 1.8081701569097055e-08, "loss": 0.8542, "step": 7241 }, { "epoch": 0.98, "learning_rate": 1.781877663862619e-08, "loss": 0.79, "step": 7242 }, { "epoch": 0.98, "learning_rate": 1.7557775617149663e-08, "loss": 0.9334, "step": 7243 }, { "epoch": 0.98, "learning_rate": 1.7298698554968352e-08, "loss": 0.7865, "step": 7244 }, { "epoch": 0.98, "learning_rate": 1.7041545502018976e-08, "loss": 0.7423, "step": 7245 }, { "epoch": 0.98, "learning_rate": 1.6786316507859667e-08, "loss": 0.8509, "step": 7246 }, { "epoch": 0.98, "learning_rate": 1.6533011621685523e-08, "loss": 0.8124, "step": 7247 }, { "epoch": 0.98, "learning_rate": 1.628163089231527e-08, "loss": 0.838, "step": 7248 }, { "epoch": 0.98, "learning_rate": 1.6032174368197927e-08, "loss": 0.8299, "step": 7249 }, { "epoch": 0.98, "learning_rate": 1.5784642097413928e-08, "loss": 0.7864, "step": 7250 }, { "epoch": 0.98, "learning_rate": 1.553903412767066e-08, "loss": 0.8551, "step": 7251 }, { "epoch": 0.98, "learning_rate": 1.5295350506305816e-08, "loss": 0.8178, "step": 7252 }, { "epoch": 0.98, "learning_rate": 1.505359128028405e-08, "loss": 0.7877, "step": 7253 }, { "epoch": 0.98, "learning_rate": 1.4813756496201426e-08, "loss": 0.8723, "step": 7254 }, { "epoch": 0.98, "learning_rate": 1.4575846200282073e-08, "loss": 0.8142, "step": 7255 }, { "epoch": 0.98, "learning_rate": 1.4339860438381536e-08, "loss": 0.8119, "step": 7256 }, { "epoch": 0.98, "learning_rate": 1.4105799255978991e-08, "loss": 0.8463, "step": 7257 }, { "epoch": 0.98, "learning_rate": 1.3873662698188351e-08, "loss": 0.8564, "step": 7258 }, { "epoch": 0.98, "learning_rate": 1.364345080975049e-08, "loss": 0.8742, "step": 7259 }, { "epoch": 0.98, "learning_rate": 1.3415163635033257e-08, "loss": 0.8002, "step": 7260 }, { "epoch": 0.98, "learning_rate": 1.3188801218037007e-08, "loss": 0.8617, "step": 7261 }, { "epoch": 0.98, "learning_rate": 1.2964363602387953e-08, "loss": 0.7785, "step": 7262 }, { "epoch": 0.98, "learning_rate": 1.2741850831345936e-08, "loss": 0.8402, "step": 7263 }, { "epoch": 0.98, "learning_rate": 1.2521262947793322e-08, "loss": 0.8113, "step": 7264 }, { "epoch": 0.98, "learning_rate": 1.2302599994247211e-08, "loss": 0.7939, "step": 7265 }, { "epoch": 0.98, "learning_rate": 1.2085862012850557e-08, "loss": 0.8203, "step": 7266 }, { "epoch": 0.98, "learning_rate": 1.1871049045376615e-08, "loss": 0.8727, "step": 7267 }, { "epoch": 0.99, "learning_rate": 1.1658161133227818e-08, "loss": 0.8485, "step": 7268 }, { "epoch": 0.99, "learning_rate": 1.1447198317433573e-08, "loss": 0.8762, "step": 7269 }, { "epoch": 0.99, "learning_rate": 1.1238160638653572e-08, "loss": 0.7732, "step": 7270 }, { "epoch": 0.99, "learning_rate": 1.1031048137177813e-08, "loss": 0.8067, "step": 7271 }, { "epoch": 0.99, "learning_rate": 1.0825860852923253e-08, "loss": 0.883, "step": 7272 }, { "epoch": 0.99, "learning_rate": 1.0622598825437147e-08, "loss": 0.8686, "step": 7273 }, { "epoch": 0.99, "learning_rate": 1.0421262093894823e-08, "loss": 0.8844, "step": 7274 }, { "epoch": 0.99, "learning_rate": 1.0221850697100799e-08, "loss": 0.9056, "step": 7275 }, { "epoch": 0.99, "learning_rate": 1.0024364673487663e-08, "loss": 0.778, "step": 7276 }, { "epoch": 0.99, "learning_rate": 9.828804061118303e-09, "loss": 0.8337, "step": 7277 }, { "epoch": 0.99, "learning_rate": 9.635168897684788e-09, "loss": 0.7705, "step": 7278 }, { "epoch": 0.99, "learning_rate": 9.443459220505046e-09, "loss": 0.8448, "step": 7279 }, { "epoch": 0.99, "learning_rate": 9.253675066530632e-09, "loss": 0.8417, "step": 7280 }, { "epoch": 0.99, "learning_rate": 9.06581647233895e-09, "loss": 0.8745, "step": 7281 }, { "epoch": 0.99, "learning_rate": 8.879883474135487e-09, "loss": 0.8396, "step": 7282 }, { "epoch": 0.99, "learning_rate": 8.695876107757129e-09, "loss": 0.8136, "step": 7283 }, { "epoch": 0.99, "learning_rate": 8.513794408667731e-09, "loss": 0.8055, "step": 7284 }, { "epoch": 0.99, "learning_rate": 8.333638411960333e-09, "loss": 0.8181, "step": 7285 }, { "epoch": 0.99, "learning_rate": 8.155408152358268e-09, "loss": 0.8503, "step": 7286 }, { "epoch": 0.99, "learning_rate": 7.97910366421295e-09, "loss": 0.7958, "step": 7287 }, { "epoch": 0.99, "learning_rate": 7.804724981501644e-09, "loss": 0.845, "step": 7288 }, { "epoch": 0.99, "learning_rate": 7.632272137836349e-09, "loss": 0.8806, "step": 7289 }, { "epoch": 0.99, "learning_rate": 7.461745166453815e-09, "loss": 0.8139, "step": 7290 }, { "epoch": 0.99, "learning_rate": 7.293144100218863e-09, "loss": 0.7879, "step": 7291 }, { "epoch": 0.99, "learning_rate": 7.12646897162772e-09, "loss": 0.8449, "step": 7292 }, { "epoch": 0.99, "learning_rate": 6.96171981280469e-09, "loss": 0.8236, "step": 7293 }, { "epoch": 0.99, "learning_rate": 6.798896655502152e-09, "loss": 0.7644, "step": 7294 }, { "epoch": 0.99, "learning_rate": 6.637999531101669e-09, "loss": 0.8267, "step": 7295 }, { "epoch": 0.99, "learning_rate": 6.479028470615101e-09, "loss": 0.8568, "step": 7296 }, { "epoch": 0.99, "learning_rate": 6.321983504679052e-09, "loss": 0.8703, "step": 7297 }, { "epoch": 0.99, "learning_rate": 6.166864663562644e-09, "loss": 0.7836, "step": 7298 }, { "epoch": 0.99, "learning_rate": 6.013671977164182e-09, "loss": 0.8031, "step": 7299 }, { "epoch": 0.99, "learning_rate": 5.862405475006716e-09, "loss": 0.8273, "step": 7300 }, { "epoch": 0.99, "learning_rate": 5.713065186245814e-09, "loss": 0.7894, "step": 7301 }, { "epoch": 0.99, "learning_rate": 5.565651139664008e-09, "loss": 0.7824, "step": 7302 }, { "epoch": 0.99, "learning_rate": 5.420163363673014e-09, "loss": 0.8712, "step": 7303 }, { "epoch": 0.99, "learning_rate": 5.276601886313737e-09, "loss": 0.8627, "step": 7304 }, { "epoch": 0.99, "learning_rate": 5.1349667352551535e-09, "loss": 0.7863, "step": 7305 }, { "epoch": 0.99, "learning_rate": 4.995257937795428e-09, "loss": 0.8036, "step": 7306 }, { "epoch": 0.99, "learning_rate": 4.8574755208608e-09, "loss": 0.7841, "step": 7307 }, { "epoch": 0.99, "learning_rate": 4.721619511006692e-09, "loss": 0.8218, "step": 7308 }, { "epoch": 0.99, "learning_rate": 4.587689934418826e-09, "loss": 0.8398, "step": 7309 }, { "epoch": 0.99, "learning_rate": 4.4556868169076674e-09, "loss": 0.8148, "step": 7310 }, { "epoch": 0.99, "learning_rate": 4.325610183915086e-09, "loss": 0.736, "step": 7311 }, { "epoch": 0.99, "learning_rate": 4.197460060513248e-09, "loss": 0.8013, "step": 7312 }, { "epoch": 0.99, "learning_rate": 4.071236471399065e-09, "loss": 0.8136, "step": 7313 }, { "epoch": 0.99, "learning_rate": 3.946939440900855e-09, "loss": 0.8691, "step": 7314 }, { "epoch": 0.99, "learning_rate": 3.8245689929750084e-09, "loss": 0.9061, "step": 7315 }, { "epoch": 0.99, "learning_rate": 3.7041251512071053e-09, "loss": 0.8352, "step": 7316 }, { "epoch": 0.99, "learning_rate": 3.5856079388096874e-09, "loss": 0.8511, "step": 7317 }, { "epoch": 0.99, "learning_rate": 3.4690173786255943e-09, "loss": 0.8103, "step": 7318 }, { "epoch": 0.99, "learning_rate": 3.3543534931257395e-09, "loss": 0.786, "step": 7319 }, { "epoch": 0.99, "learning_rate": 3.241616304410222e-09, "loss": 0.8559, "step": 7320 }, { "epoch": 0.99, "learning_rate": 3.1308058342072177e-09, "loss": 0.8531, "step": 7321 }, { "epoch": 0.99, "learning_rate": 3.0219221038729764e-09, "loss": 0.9096, "step": 7322 }, { "epoch": 0.99, "learning_rate": 2.9149651343940433e-09, "loss": 0.8309, "step": 7323 }, { "epoch": 0.99, "learning_rate": 2.80993494638393e-09, "loss": 0.7837, "step": 7324 }, { "epoch": 0.99, "learning_rate": 2.706831560085332e-09, "loss": 0.7817, "step": 7325 }, { "epoch": 0.99, "learning_rate": 2.605654995371243e-09, "loss": 0.7888, "step": 7326 }, { "epoch": 0.99, "learning_rate": 2.506405271741619e-09, "loss": 0.7894, "step": 7327 }, { "epoch": 0.99, "learning_rate": 2.409082408323382e-09, "loss": 0.8228, "step": 7328 }, { "epoch": 0.99, "learning_rate": 2.3136864238759714e-09, "loss": 0.767, "step": 7329 }, { "epoch": 0.99, "learning_rate": 2.22021733678468e-09, "loss": 0.8369, "step": 7330 }, { "epoch": 0.99, "learning_rate": 2.128675165065097e-09, "loss": 0.8784, "step": 7331 }, { "epoch": 0.99, "learning_rate": 2.0390599263586662e-09, "loss": 0.7636, "step": 7332 }, { "epoch": 0.99, "learning_rate": 1.951371637939348e-09, "loss": 0.7776, "step": 7333 }, { "epoch": 0.99, "learning_rate": 1.8656103167058458e-09, "loss": 0.7876, "step": 7334 }, { "epoch": 0.99, "learning_rate": 1.781775979189382e-09, "loss": 0.7809, "step": 7335 }, { "epoch": 0.99, "learning_rate": 1.699868641545921e-09, "loss": 0.7767, "step": 7336 }, { "epoch": 0.99, "learning_rate": 1.6198883195617244e-09, "loss": 0.8178, "step": 7337 }, { "epoch": 0.99, "learning_rate": 1.541835028653349e-09, "loss": 0.79, "step": 7338 }, { "epoch": 0.99, "learning_rate": 1.4657087838632067e-09, "loss": 0.8185, "step": 7339 }, { "epoch": 0.99, "learning_rate": 1.3915095998640049e-09, "loss": 0.8623, "step": 7340 }, { "epoch": 0.99, "learning_rate": 1.3192374909565265e-09, "loss": 0.8326, "step": 7341 }, { "epoch": 1.0, "learning_rate": 1.2488924710696294e-09, "loss": 0.8145, "step": 7342 }, { "epoch": 1.0, "learning_rate": 1.1804745537602468e-09, "loss": 0.8627, "step": 7343 }, { "epoch": 1.0, "learning_rate": 1.1139837522167186e-09, "loss": 0.793, "step": 7344 }, { "epoch": 1.0, "learning_rate": 1.0494200792532383e-09, "loss": 0.8294, "step": 7345 }, { "epoch": 1.0, "learning_rate": 9.867835473142962e-10, "loss": 0.8447, "step": 7346 }, { "epoch": 1.0, "learning_rate": 9.260741684702368e-10, "loss": 0.8316, "step": 7347 }, { "epoch": 1.0, "learning_rate": 8.672919544228109e-10, "loss": 0.8354, "step": 7348 }, { "epoch": 1.0, "learning_rate": 8.104369165018444e-10, "loss": 0.8166, "step": 7349 }, { "epoch": 1.0, "learning_rate": 7.555090656652386e-10, "loss": 0.8262, "step": 7350 }, { "epoch": 1.0, "learning_rate": 7.025084124989701e-10, "loss": 0.9155, "step": 7351 }, { "epoch": 1.0, "learning_rate": 6.514349672182008e-10, "loss": 0.9051, "step": 7352 }, { "epoch": 1.0, "learning_rate": 6.022887396672782e-10, "loss": 0.8245, "step": 7353 }, { "epoch": 1.0, "learning_rate": 5.550697393175153e-10, "loss": 0.8624, "step": 7354 }, { "epoch": 1.0, "learning_rate": 5.097779752694099e-10, "loss": 0.8777, "step": 7355 }, { "epoch": 1.0, "learning_rate": 4.664134562526456e-10, "loss": 0.7761, "step": 7356 }, { "epoch": 1.0, "learning_rate": 4.2497619062498164e-10, "loss": 0.8483, "step": 7357 }, { "epoch": 1.0, "learning_rate": 3.85466186372252e-10, "loss": 0.7474, "step": 7358 }, { "epoch": 1.0, "learning_rate": 3.4788345111058666e-10, "loss": 0.8053, "step": 7359 }, { "epoch": 1.0, "learning_rate": 3.122279920830806e-10, "loss": 0.8867, "step": 7360 }, { "epoch": 1.0, "learning_rate": 2.784998161620145e-10, "loss": 0.8262, "step": 7361 }, { "epoch": 1.0, "learning_rate": 2.466989298466338e-10, "loss": 0.8354, "step": 7362 }, { "epoch": 1.0, "learning_rate": 2.1682533926759008e-10, "loss": 0.7985, "step": 7363 }, { "epoch": 1.0, "learning_rate": 1.8887905018138975e-10, "loss": 0.877, "step": 7364 }, { "epoch": 1.0, "learning_rate": 1.628600679748349e-10, "loss": 0.8187, "step": 7365 }, { "epoch": 1.0, "learning_rate": 1.3876839766280292e-10, "loss": 0.8731, "step": 7366 }, { "epoch": 1.0, "learning_rate": 1.1660404388824653e-10, "loss": 0.8464, "step": 7367 }, { "epoch": 1.0, "learning_rate": 9.636701092330392e-11, "loss": 0.8202, "step": 7368 }, { "epoch": 1.0, "learning_rate": 7.805730266818856e-11, "loss": 0.8156, "step": 7369 }, { "epoch": 1.0, "learning_rate": 6.167492265118924e-11, "loss": 0.7839, "step": 7370 }, { "epoch": 1.0, "learning_rate": 4.721987403089046e-11, "loss": 0.8173, "step": 7371 }, { "epoch": 1.0, "learning_rate": 3.469215959284178e-11, "loss": 0.8454, "step": 7372 }, { "epoch": 1.0, "learning_rate": 2.4091781751778287e-11, "loss": 0.8187, "step": 7373 }, { "epoch": 1.0, "learning_rate": 1.5418742549400122e-11, "loss": 0.82, "step": 7374 }, { "epoch": 1.0, "learning_rate": 8.673043658813385e-12, "loss": 0.8875, "step": 7375 }, { "epoch": 1.0, "learning_rate": 3.854686380089234e-12, "loss": 0.8321, "step": 7376 }, { "epoch": 1.0, "learning_rate": 9.636716413741198e-13, "loss": 0.8637, "step": 7377 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.82, "step": 7378 }, { "epoch": 1.0, "step": 7378, "total_flos": 8905470619811840.0, "train_loss": 0.9097490718804843, "train_runtime": 67278.4724, "train_samples_per_second": 14.037, "train_steps_per_second": 0.11 } ], "logging_steps": 1.0, "max_steps": 7378, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "total_flos": 8905470619811840.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }