{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.993604502430289, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3513513513513515e-07, "loss": 1.6001, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.702702702702703e-07, "loss": 1.6438, "step": 2 }, { "epoch": 0.01, "learning_rate": 4.0540540540540546e-07, "loss": 1.1229, "step": 3 }, { "epoch": 0.01, "learning_rate": 5.405405405405406e-07, "loss": 1.0384, "step": 4 }, { "epoch": 0.01, "learning_rate": 6.756756756756758e-07, "loss": 1.0047, "step": 5 }, { "epoch": 0.01, "learning_rate": 8.108108108108109e-07, "loss": 0.9613, "step": 6 }, { "epoch": 0.01, "learning_rate": 9.459459459459461e-07, "loss": 0.9528, "step": 7 }, { "epoch": 0.02, "learning_rate": 1.0810810810810812e-06, "loss": 0.9506, "step": 8 }, { "epoch": 0.02, "learning_rate": 1.2162162162162164e-06, "loss": 0.9616, "step": 9 }, { "epoch": 0.02, "learning_rate": 1.3513513513513515e-06, "loss": 0.9739, "step": 10 }, { "epoch": 0.02, "learning_rate": 1.4864864864864868e-06, "loss": 0.8957, "step": 11 }, { "epoch": 0.02, "learning_rate": 1.6216216216216219e-06, "loss": 1.0051, "step": 12 }, { "epoch": 0.03, "learning_rate": 1.756756756756757e-06, "loss": 0.9498, "step": 13 }, { "epoch": 0.03, "learning_rate": 1.8918918918918922e-06, "loss": 0.8877, "step": 14 }, { "epoch": 0.03, "learning_rate": 2.0270270270270273e-06, "loss": 0.8949, "step": 15 }, { "epoch": 0.03, "learning_rate": 2.1621621621621623e-06, "loss": 0.9001, "step": 16 }, { "epoch": 0.03, "learning_rate": 2.297297297297298e-06, "loss": 0.9405, "step": 17 }, { "epoch": 0.04, "learning_rate": 2.432432432432433e-06, "loss": 0.9437, "step": 18 }, { "epoch": 0.04, "learning_rate": 2.5675675675675675e-06, "loss": 0.9176, "step": 19 }, { "epoch": 0.04, "learning_rate": 2.702702702702703e-06, "loss": 0.9338, "step": 20 }, { "epoch": 0.04, "learning_rate": 2.837837837837838e-06, "loss": 0.8636, "step": 21 }, { "epoch": 0.05, "learning_rate": 2.9729729729729736e-06, "loss": 0.8741, "step": 22 }, { "epoch": 0.05, "learning_rate": 3.1081081081081082e-06, "loss": 0.8108, "step": 23 }, { "epoch": 0.05, "learning_rate": 3.2432432432432437e-06, "loss": 0.8591, "step": 24 }, { "epoch": 0.05, "learning_rate": 3.3783783783783788e-06, "loss": 0.8376, "step": 25 }, { "epoch": 0.05, "learning_rate": 3.513513513513514e-06, "loss": 0.8863, "step": 26 }, { "epoch": 0.06, "learning_rate": 3.648648648648649e-06, "loss": 0.8204, "step": 27 }, { "epoch": 0.06, "learning_rate": 3.7837837837837844e-06, "loss": 0.8501, "step": 28 }, { "epoch": 0.06, "learning_rate": 3.918918918918919e-06, "loss": 0.8401, "step": 29 }, { "epoch": 0.06, "learning_rate": 4.0540540540540545e-06, "loss": 0.8302, "step": 30 }, { "epoch": 0.06, "learning_rate": 4.189189189189189e-06, "loss": 0.7676, "step": 31 }, { "epoch": 0.07, "learning_rate": 4.324324324324325e-06, "loss": 0.7955, "step": 32 }, { "epoch": 0.07, "learning_rate": 4.45945945945946e-06, "loss": 0.7783, "step": 33 }, { "epoch": 0.07, "learning_rate": 4.594594594594596e-06, "loss": 0.8573, "step": 34 }, { "epoch": 0.07, "learning_rate": 4.72972972972973e-06, "loss": 0.7894, "step": 35 }, { "epoch": 0.07, "learning_rate": 4.864864864864866e-06, "loss": 0.8103, "step": 36 }, { "epoch": 0.08, "learning_rate": 5e-06, "loss": 0.7767, "step": 37 }, { "epoch": 0.08, "learning_rate": 5.135135135135135e-06, "loss": 0.8475, "step": 38 }, { "epoch": 0.08, "learning_rate": 5.2702702702702705e-06, "loss": 0.798, "step": 39 }, { "epoch": 0.08, "learning_rate": 5.405405405405406e-06, "loss": 0.801, "step": 40 }, { "epoch": 0.08, "learning_rate": 5.540540540540541e-06, "loss": 0.7508, "step": 41 }, { "epoch": 0.09, "learning_rate": 5.675675675675676e-06, "loss": 0.8079, "step": 42 }, { "epoch": 0.09, "learning_rate": 5.810810810810811e-06, "loss": 0.7663, "step": 43 }, { "epoch": 0.09, "learning_rate": 5.945945945945947e-06, "loss": 0.7769, "step": 44 }, { "epoch": 0.09, "learning_rate": 6.081081081081082e-06, "loss": 0.7921, "step": 45 }, { "epoch": 0.09, "learning_rate": 6.2162162162162164e-06, "loss": 0.8231, "step": 46 }, { "epoch": 0.1, "learning_rate": 6.351351351351351e-06, "loss": 0.8689, "step": 47 }, { "epoch": 0.1, "learning_rate": 6.486486486486487e-06, "loss": 0.8001, "step": 48 }, { "epoch": 0.1, "learning_rate": 6.621621621621622e-06, "loss": 0.7654, "step": 49 }, { "epoch": 0.1, "learning_rate": 6.7567567567567575e-06, "loss": 0.7373, "step": 50 }, { "epoch": 0.1, "learning_rate": 6.891891891891892e-06, "loss": 0.7286, "step": 51 }, { "epoch": 0.11, "learning_rate": 7.027027027027028e-06, "loss": 0.7519, "step": 52 }, { "epoch": 0.11, "learning_rate": 7.162162162162163e-06, "loss": 0.6419, "step": 53 }, { "epoch": 0.11, "learning_rate": 7.297297297297298e-06, "loss": 0.7268, "step": 54 }, { "epoch": 0.11, "learning_rate": 7.4324324324324324e-06, "loss": 0.7436, "step": 55 }, { "epoch": 0.11, "learning_rate": 7.567567567567569e-06, "loss": 0.7937, "step": 56 }, { "epoch": 0.12, "learning_rate": 7.702702702702704e-06, "loss": 0.7329, "step": 57 }, { "epoch": 0.12, "learning_rate": 7.837837837837838e-06, "loss": 0.7614, "step": 58 }, { "epoch": 0.12, "learning_rate": 7.972972972972974e-06, "loss": 0.7907, "step": 59 }, { "epoch": 0.12, "learning_rate": 8.108108108108109e-06, "loss": 0.7404, "step": 60 }, { "epoch": 0.12, "learning_rate": 8.243243243243245e-06, "loss": 0.7253, "step": 61 }, { "epoch": 0.13, "learning_rate": 8.378378378378378e-06, "loss": 0.6684, "step": 62 }, { "epoch": 0.13, "learning_rate": 8.513513513513514e-06, "loss": 0.7579, "step": 63 }, { "epoch": 0.13, "learning_rate": 8.64864864864865e-06, "loss": 0.6923, "step": 64 }, { "epoch": 0.13, "learning_rate": 8.783783783783785e-06, "loss": 0.7583, "step": 65 }, { "epoch": 0.14, "learning_rate": 8.91891891891892e-06, "loss": 0.7905, "step": 66 }, { "epoch": 0.14, "learning_rate": 9.054054054054054e-06, "loss": 0.7864, "step": 67 }, { "epoch": 0.14, "learning_rate": 9.189189189189191e-06, "loss": 0.8015, "step": 68 }, { "epoch": 0.14, "learning_rate": 9.324324324324325e-06, "loss": 0.7648, "step": 69 }, { "epoch": 0.14, "learning_rate": 9.45945945945946e-06, "loss": 0.6806, "step": 70 }, { "epoch": 0.15, "learning_rate": 9.594594594594594e-06, "loss": 0.772, "step": 71 }, { "epoch": 0.15, "learning_rate": 9.729729729729732e-06, "loss": 0.708, "step": 72 }, { "epoch": 0.15, "learning_rate": 9.864864864864865e-06, "loss": 0.7278, "step": 73 }, { "epoch": 0.15, "learning_rate": 1e-05, "loss": 0.703, "step": 74 }, { "epoch": 0.15, "learning_rate": 9.999995592318451e-06, "loss": 0.6937, "step": 75 }, { "epoch": 0.16, "learning_rate": 9.999982369281574e-06, "loss": 0.7493, "step": 76 }, { "epoch": 0.16, "learning_rate": 9.999960330912679e-06, "loss": 0.7467, "step": 77 }, { "epoch": 0.16, "learning_rate": 9.999929477250626e-06, "loss": 0.6794, "step": 78 }, { "epoch": 0.16, "learning_rate": 9.99988980834981e-06, "loss": 0.7672, "step": 79 }, { "epoch": 0.16, "learning_rate": 9.99984132428017e-06, "loss": 0.6718, "step": 80 }, { "epoch": 0.17, "learning_rate": 9.999784025127187e-06, "loss": 0.7496, "step": 81 }, { "epoch": 0.17, "learning_rate": 9.999717910991883e-06, "loss": 0.6751, "step": 82 }, { "epoch": 0.17, "learning_rate": 9.999642981990825e-06, "loss": 0.7292, "step": 83 }, { "epoch": 0.17, "learning_rate": 9.999559238256115e-06, "loss": 0.6885, "step": 84 }, { "epoch": 0.17, "learning_rate": 9.999466679935402e-06, "loss": 0.7022, "step": 85 }, { "epoch": 0.18, "learning_rate": 9.999365307191869e-06, "loss": 0.6794, "step": 86 }, { "epoch": 0.18, "learning_rate": 9.999255120204248e-06, "loss": 0.7694, "step": 87 }, { "epoch": 0.18, "learning_rate": 9.999136119166803e-06, "loss": 0.7489, "step": 88 }, { "epoch": 0.18, "learning_rate": 9.999008304289345e-06, "loss": 0.6952, "step": 89 }, { "epoch": 0.18, "learning_rate": 9.998871675797216e-06, "loss": 0.7223, "step": 90 }, { "epoch": 0.19, "learning_rate": 9.998726233931309e-06, "loss": 0.715, "step": 91 }, { "epoch": 0.19, "learning_rate": 9.998571978948043e-06, "loss": 0.6327, "step": 92 }, { "epoch": 0.19, "learning_rate": 9.998408911119381e-06, "loss": 0.6819, "step": 93 }, { "epoch": 0.19, "learning_rate": 9.998237030732827e-06, "loss": 0.7071, "step": 94 }, { "epoch": 0.19, "learning_rate": 9.998056338091415e-06, "loss": 0.7289, "step": 95 }, { "epoch": 0.2, "learning_rate": 9.99786683351372e-06, "loss": 0.6837, "step": 96 }, { "epoch": 0.2, "learning_rate": 9.997668517333855e-06, "loss": 0.7543, "step": 97 }, { "epoch": 0.2, "learning_rate": 9.99746138990146e-06, "loss": 0.7411, "step": 98 }, { "epoch": 0.2, "learning_rate": 9.997245451581722e-06, "loss": 0.6604, "step": 99 }, { "epoch": 0.2, "learning_rate": 9.997020702755353e-06, "loss": 0.6707, "step": 100 }, { "epoch": 0.21, "learning_rate": 9.996787143818603e-06, "loss": 0.7377, "step": 101 }, { "epoch": 0.21, "learning_rate": 9.99654477518325e-06, "loss": 0.6555, "step": 102 }, { "epoch": 0.21, "learning_rate": 9.996293597276611e-06, "loss": 0.6904, "step": 103 }, { "epoch": 0.21, "learning_rate": 9.996033610541529e-06, "loss": 0.6803, "step": 104 }, { "epoch": 0.21, "learning_rate": 9.995764815436382e-06, "loss": 0.661, "step": 105 }, { "epoch": 0.22, "learning_rate": 9.995487212435071e-06, "loss": 0.7236, "step": 106 }, { "epoch": 0.22, "learning_rate": 9.995200802027035e-06, "loss": 0.7147, "step": 107 }, { "epoch": 0.22, "learning_rate": 9.994905584717232e-06, "loss": 0.7008, "step": 108 }, { "epoch": 0.22, "learning_rate": 9.994601561026156e-06, "loss": 0.7284, "step": 109 }, { "epoch": 0.23, "learning_rate": 9.994288731489819e-06, "loss": 0.6762, "step": 110 }, { "epoch": 0.23, "learning_rate": 9.993967096659762e-06, "loss": 0.6894, "step": 111 }, { "epoch": 0.23, "learning_rate": 9.993636657103055e-06, "loss": 0.6775, "step": 112 }, { "epoch": 0.23, "learning_rate": 9.993297413402282e-06, "loss": 0.6522, "step": 113 }, { "epoch": 0.23, "learning_rate": 9.992949366155558e-06, "loss": 0.6685, "step": 114 }, { "epoch": 0.24, "learning_rate": 9.992592515976511e-06, "loss": 0.7217, "step": 115 }, { "epoch": 0.24, "learning_rate": 9.9922268634943e-06, "loss": 0.6733, "step": 116 }, { "epoch": 0.24, "learning_rate": 9.991852409353594e-06, "loss": 0.6896, "step": 117 }, { "epoch": 0.24, "learning_rate": 9.991469154214579e-06, "loss": 0.6353, "step": 118 }, { "epoch": 0.24, "learning_rate": 9.991077098752968e-06, "loss": 0.6423, "step": 119 }, { "epoch": 0.25, "learning_rate": 9.99067624365998e-06, "loss": 0.6908, "step": 120 }, { "epoch": 0.25, "learning_rate": 9.99026658964235e-06, "loss": 0.6634, "step": 121 }, { "epoch": 0.25, "learning_rate": 9.989848137422332e-06, "loss": 0.645, "step": 122 }, { "epoch": 0.25, "learning_rate": 9.989420887737684e-06, "loss": 0.6679, "step": 123 }, { "epoch": 0.25, "learning_rate": 9.988984841341682e-06, "loss": 0.7149, "step": 124 }, { "epoch": 0.26, "learning_rate": 9.988539999003104e-06, "loss": 0.6779, "step": 125 }, { "epoch": 0.26, "learning_rate": 9.98808636150624e-06, "loss": 0.6918, "step": 126 }, { "epoch": 0.26, "learning_rate": 9.987623929650884e-06, "loss": 0.6915, "step": 127 }, { "epoch": 0.26, "learning_rate": 9.987152704252342e-06, "loss": 0.6482, "step": 128 }, { "epoch": 0.26, "learning_rate": 9.986672686141416e-06, "loss": 0.6646, "step": 129 }, { "epoch": 0.27, "learning_rate": 9.986183876164412e-06, "loss": 0.6833, "step": 130 }, { "epoch": 0.27, "learning_rate": 9.985686275183139e-06, "loss": 0.6961, "step": 131 }, { "epoch": 0.27, "learning_rate": 9.985179884074902e-06, "loss": 0.6644, "step": 132 }, { "epoch": 0.27, "learning_rate": 9.984664703732505e-06, "loss": 0.6932, "step": 133 }, { "epoch": 0.27, "learning_rate": 9.984140735064252e-06, "loss": 0.7162, "step": 134 }, { "epoch": 0.28, "learning_rate": 9.983607978993933e-06, "loss": 0.6961, "step": 135 }, { "epoch": 0.28, "learning_rate": 9.98306643646084e-06, "loss": 0.6712, "step": 136 }, { "epoch": 0.28, "learning_rate": 9.982516108419746e-06, "loss": 0.684, "step": 137 }, { "epoch": 0.28, "learning_rate": 9.981956995840927e-06, "loss": 0.6423, "step": 138 }, { "epoch": 0.28, "learning_rate": 9.981389099710132e-06, "loss": 0.7091, "step": 139 }, { "epoch": 0.29, "learning_rate": 9.98081242102861e-06, "loss": 0.6628, "step": 140 }, { "epoch": 0.29, "learning_rate": 9.98022696081308e-06, "loss": 0.7019, "step": 141 }, { "epoch": 0.29, "learning_rate": 9.979632720095756e-06, "loss": 0.6056, "step": 142 }, { "epoch": 0.29, "learning_rate": 9.979029699924327e-06, "loss": 0.6157, "step": 143 }, { "epoch": 0.29, "learning_rate": 9.978417901361958e-06, "loss": 0.7407, "step": 144 }, { "epoch": 0.3, "learning_rate": 9.977797325487299e-06, "loss": 0.6844, "step": 145 }, { "epoch": 0.3, "learning_rate": 9.977167973394465e-06, "loss": 0.6879, "step": 146 }, { "epoch": 0.3, "learning_rate": 9.976529846193056e-06, "loss": 0.6261, "step": 147 }, { "epoch": 0.3, "learning_rate": 9.975882945008131e-06, "loss": 0.6189, "step": 148 }, { "epoch": 0.3, "learning_rate": 9.975227270980227e-06, "loss": 0.6998, "step": 149 }, { "epoch": 0.31, "learning_rate": 9.974562825265343e-06, "loss": 0.7177, "step": 150 }, { "epoch": 0.31, "learning_rate": 9.973889609034945e-06, "loss": 0.6759, "step": 151 }, { "epoch": 0.31, "learning_rate": 9.973207623475964e-06, "loss": 0.6594, "step": 152 }, { "epoch": 0.31, "learning_rate": 9.97251686979079e-06, "loss": 0.6454, "step": 153 }, { "epoch": 0.32, "learning_rate": 9.971817349197267e-06, "loss": 0.6127, "step": 154 }, { "epoch": 0.32, "learning_rate": 9.971109062928708e-06, "loss": 0.696, "step": 155 }, { "epoch": 0.32, "learning_rate": 9.97039201223387e-06, "loss": 0.7095, "step": 156 }, { "epoch": 0.32, "learning_rate": 9.969666198376962e-06, "loss": 0.658, "step": 157 }, { "epoch": 0.32, "learning_rate": 9.968931622637652e-06, "loss": 0.6323, "step": 158 }, { "epoch": 0.33, "learning_rate": 9.968188286311047e-06, "loss": 0.649, "step": 159 }, { "epoch": 0.33, "learning_rate": 9.967436190707705e-06, "loss": 0.6619, "step": 160 }, { "epoch": 0.33, "learning_rate": 9.966675337153622e-06, "loss": 0.654, "step": 161 }, { "epoch": 0.33, "learning_rate": 9.965905726990241e-06, "loss": 0.6198, "step": 162 }, { "epoch": 0.33, "learning_rate": 9.965127361574441e-06, "loss": 0.6387, "step": 163 }, { "epoch": 0.34, "learning_rate": 9.964340242278535e-06, "loss": 0.6494, "step": 164 }, { "epoch": 0.34, "learning_rate": 9.96354437049027e-06, "loss": 0.6287, "step": 165 }, { "epoch": 0.34, "learning_rate": 9.962739747612832e-06, "loss": 0.6289, "step": 166 }, { "epoch": 0.34, "learning_rate": 9.961926375064822e-06, "loss": 0.6171, "step": 167 }, { "epoch": 0.34, "learning_rate": 9.961104254280278e-06, "loss": 0.6364, "step": 168 }, { "epoch": 0.35, "learning_rate": 9.960273386708658e-06, "loss": 0.6498, "step": 169 }, { "epoch": 0.35, "learning_rate": 9.959433773814843e-06, "loss": 0.6115, "step": 170 }, { "epoch": 0.35, "learning_rate": 9.958585417079134e-06, "loss": 0.6405, "step": 171 }, { "epoch": 0.35, "learning_rate": 9.95772831799724e-06, "loss": 0.6699, "step": 172 }, { "epoch": 0.35, "learning_rate": 9.956862478080294e-06, "loss": 0.6311, "step": 173 }, { "epoch": 0.36, "learning_rate": 9.95598789885483e-06, "loss": 0.6, "step": 174 }, { "epoch": 0.36, "learning_rate": 9.955104581862798e-06, "loss": 0.6686, "step": 175 }, { "epoch": 0.36, "learning_rate": 9.954212528661548e-06, "loss": 0.6001, "step": 176 }, { "epoch": 0.36, "learning_rate": 9.953311740823837e-06, "loss": 0.6686, "step": 177 }, { "epoch": 0.36, "learning_rate": 9.952402219937817e-06, "loss": 0.6815, "step": 178 }, { "epoch": 0.37, "learning_rate": 9.95148396760704e-06, "loss": 0.6993, "step": 179 }, { "epoch": 0.37, "learning_rate": 9.950556985450452e-06, "loss": 0.7152, "step": 180 }, { "epoch": 0.37, "learning_rate": 9.94962127510239e-06, "loss": 0.6259, "step": 181 }, { "epoch": 0.37, "learning_rate": 9.94867683821258e-06, "loss": 0.6344, "step": 182 }, { "epoch": 0.37, "learning_rate": 9.94772367644613e-06, "loss": 0.6919, "step": 183 }, { "epoch": 0.38, "learning_rate": 9.946761791483537e-06, "loss": 0.683, "step": 184 }, { "epoch": 0.38, "learning_rate": 9.94579118502067e-06, "loss": 0.6452, "step": 185 }, { "epoch": 0.38, "learning_rate": 9.944811858768782e-06, "loss": 0.6601, "step": 186 }, { "epoch": 0.38, "learning_rate": 9.943823814454496e-06, "loss": 0.6454, "step": 187 }, { "epoch": 0.38, "learning_rate": 9.942827053819804e-06, "loss": 0.6543, "step": 188 }, { "epoch": 0.39, "learning_rate": 9.941821578622068e-06, "loss": 0.5887, "step": 189 }, { "epoch": 0.39, "learning_rate": 9.940807390634017e-06, "loss": 0.6302, "step": 190 }, { "epoch": 0.39, "learning_rate": 9.939784491643734e-06, "loss": 0.6476, "step": 191 }, { "epoch": 0.39, "learning_rate": 9.938752883454663e-06, "loss": 0.6775, "step": 192 }, { "epoch": 0.39, "learning_rate": 9.93771256788561e-06, "loss": 0.6252, "step": 193 }, { "epoch": 0.4, "learning_rate": 9.936663546770723e-06, "loss": 0.7002, "step": 194 }, { "epoch": 0.4, "learning_rate": 9.935605821959502e-06, "loss": 0.6547, "step": 195 }, { "epoch": 0.4, "learning_rate": 9.934539395316795e-06, "loss": 0.612, "step": 196 }, { "epoch": 0.4, "learning_rate": 9.933464268722787e-06, "loss": 0.6254, "step": 197 }, { "epoch": 0.41, "learning_rate": 9.932380444073006e-06, "loss": 0.6121, "step": 198 }, { "epoch": 0.41, "learning_rate": 9.931287923278313e-06, "loss": 0.6389, "step": 199 }, { "epoch": 0.41, "learning_rate": 9.930186708264902e-06, "loss": 0.6196, "step": 200 }, { "epoch": 0.41, "learning_rate": 9.929076800974295e-06, "loss": 0.5824, "step": 201 }, { "epoch": 0.41, "learning_rate": 9.927958203363336e-06, "loss": 0.6153, "step": 202 }, { "epoch": 0.42, "learning_rate": 9.9268309174042e-06, "loss": 0.5734, "step": 203 }, { "epoch": 0.42, "learning_rate": 9.925694945084369e-06, "loss": 0.6199, "step": 204 }, { "epoch": 0.42, "learning_rate": 9.924550288406648e-06, "loss": 0.6296, "step": 205 }, { "epoch": 0.42, "learning_rate": 9.923396949389147e-06, "loss": 0.6536, "step": 206 }, { "epoch": 0.42, "learning_rate": 9.922234930065286e-06, "loss": 0.6259, "step": 207 }, { "epoch": 0.43, "learning_rate": 9.921064232483795e-06, "loss": 0.6591, "step": 208 }, { "epoch": 0.43, "learning_rate": 9.919884858708691e-06, "loss": 0.6223, "step": 209 }, { "epoch": 0.43, "learning_rate": 9.918696810819302e-06, "loss": 0.7134, "step": 210 }, { "epoch": 0.43, "learning_rate": 9.91750009091024e-06, "loss": 0.6403, "step": 211 }, { "epoch": 0.43, "learning_rate": 9.916294701091407e-06, "loss": 0.6306, "step": 212 }, { "epoch": 0.44, "learning_rate": 9.915080643487997e-06, "loss": 0.6049, "step": 213 }, { "epoch": 0.44, "learning_rate": 9.913857920240481e-06, "loss": 0.6587, "step": 214 }, { "epoch": 0.44, "learning_rate": 9.912626533504607e-06, "loss": 0.6325, "step": 215 }, { "epoch": 0.44, "learning_rate": 9.911386485451397e-06, "loss": 0.5978, "step": 216 }, { "epoch": 0.44, "learning_rate": 9.910137778267153e-06, "loss": 0.6451, "step": 217 }, { "epoch": 0.45, "learning_rate": 9.90888041415343e-06, "loss": 0.6503, "step": 218 }, { "epoch": 0.45, "learning_rate": 9.907614395327054e-06, "loss": 0.5977, "step": 219 }, { "epoch": 0.45, "learning_rate": 9.90633972402011e-06, "loss": 0.6502, "step": 220 }, { "epoch": 0.45, "learning_rate": 9.905056402479933e-06, "loss": 0.5484, "step": 221 }, { "epoch": 0.45, "learning_rate": 9.903764432969115e-06, "loss": 0.6077, "step": 222 }, { "epoch": 0.46, "learning_rate": 9.902463817765492e-06, "loss": 0.6677, "step": 223 }, { "epoch": 0.46, "learning_rate": 9.90115455916214e-06, "loss": 0.6368, "step": 224 }, { "epoch": 0.46, "learning_rate": 9.899836659467379e-06, "loss": 0.6297, "step": 225 }, { "epoch": 0.46, "learning_rate": 9.898510121004761e-06, "loss": 0.6233, "step": 226 }, { "epoch": 0.46, "learning_rate": 9.89717494611307e-06, "loss": 0.6197, "step": 227 }, { "epoch": 0.47, "learning_rate": 9.895831137146319e-06, "loss": 0.6012, "step": 228 }, { "epoch": 0.47, "learning_rate": 9.894478696473737e-06, "loss": 0.6264, "step": 229 }, { "epoch": 0.47, "learning_rate": 9.893117626479778e-06, "loss": 0.5927, "step": 230 }, { "epoch": 0.47, "learning_rate": 9.891747929564105e-06, "loss": 0.6323, "step": 231 }, { "epoch": 0.47, "learning_rate": 9.890369608141592e-06, "loss": 0.5862, "step": 232 }, { "epoch": 0.48, "learning_rate": 9.888982664642324e-06, "loss": 0.6188, "step": 233 }, { "epoch": 0.48, "learning_rate": 9.88758710151158e-06, "loss": 0.5929, "step": 234 }, { "epoch": 0.48, "learning_rate": 9.88618292120984e-06, "loss": 0.5958, "step": 235 }, { "epoch": 0.48, "learning_rate": 9.884770126212775e-06, "loss": 0.6402, "step": 236 }, { "epoch": 0.49, "learning_rate": 9.883348719011246e-06, "loss": 0.6367, "step": 237 }, { "epoch": 0.49, "learning_rate": 9.881918702111299e-06, "loss": 0.6781, "step": 238 }, { "epoch": 0.49, "learning_rate": 9.880480078034154e-06, "loss": 0.7319, "step": 239 }, { "epoch": 0.49, "learning_rate": 9.879032849316212e-06, "loss": 0.6652, "step": 240 }, { "epoch": 0.49, "learning_rate": 9.877577018509041e-06, "loss": 0.6344, "step": 241 }, { "epoch": 0.5, "learning_rate": 9.876112588179378e-06, "loss": 0.6475, "step": 242 }, { "epoch": 0.5, "learning_rate": 9.874639560909118e-06, "loss": 0.6647, "step": 243 }, { "epoch": 0.5, "learning_rate": 9.873157939295318e-06, "loss": 0.6218, "step": 244 }, { "epoch": 0.5, "learning_rate": 9.871667725950182e-06, "loss": 0.5731, "step": 245 }, { "epoch": 0.5, "learning_rate": 9.870168923501066e-06, "loss": 0.6234, "step": 246 }, { "epoch": 0.51, "learning_rate": 9.868661534590465e-06, "loss": 0.6632, "step": 247 }, { "epoch": 0.51, "learning_rate": 9.867145561876018e-06, "loss": 0.661, "step": 248 }, { "epoch": 0.51, "learning_rate": 9.865621008030492e-06, "loss": 0.6418, "step": 249 }, { "epoch": 0.51, "learning_rate": 9.86408787574179e-06, "loss": 0.6276, "step": 250 }, { "epoch": 0.51, "learning_rate": 9.862546167712934e-06, "loss": 0.6229, "step": 251 }, { "epoch": 0.52, "learning_rate": 9.860995886662065e-06, "loss": 0.6642, "step": 252 }, { "epoch": 0.52, "learning_rate": 9.859437035322443e-06, "loss": 0.5459, "step": 253 }, { "epoch": 0.52, "learning_rate": 9.857869616442437e-06, "loss": 0.584, "step": 254 }, { "epoch": 0.52, "learning_rate": 9.856293632785518e-06, "loss": 0.6123, "step": 255 }, { "epoch": 0.52, "learning_rate": 9.854709087130261e-06, "loss": 0.6528, "step": 256 }, { "epoch": 0.53, "learning_rate": 9.853115982270334e-06, "loss": 0.5406, "step": 257 }, { "epoch": 0.53, "learning_rate": 9.8515143210145e-06, "loss": 0.6084, "step": 258 }, { "epoch": 0.53, "learning_rate": 9.8499041061866e-06, "loss": 0.6228, "step": 259 }, { "epoch": 0.53, "learning_rate": 9.848285340625561e-06, "loss": 0.6009, "step": 260 }, { "epoch": 0.53, "learning_rate": 9.846658027185385e-06, "loss": 0.6207, "step": 261 }, { "epoch": 0.54, "learning_rate": 9.845022168735143e-06, "loss": 0.5946, "step": 262 }, { "epoch": 0.54, "learning_rate": 9.843377768158972e-06, "loss": 0.5877, "step": 263 }, { "epoch": 0.54, "learning_rate": 9.84172482835607e-06, "loss": 0.5906, "step": 264 }, { "epoch": 0.54, "learning_rate": 9.840063352240692e-06, "loss": 0.5748, "step": 265 }, { "epoch": 0.54, "learning_rate": 9.838393342742136e-06, "loss": 0.6218, "step": 266 }, { "epoch": 0.55, "learning_rate": 9.836714802804756e-06, "loss": 0.5488, "step": 267 }, { "epoch": 0.55, "learning_rate": 9.835027735387935e-06, "loss": 0.5417, "step": 268 }, { "epoch": 0.55, "learning_rate": 9.833332143466099e-06, "loss": 0.5969, "step": 269 }, { "epoch": 0.55, "learning_rate": 9.831628030028698e-06, "loss": 0.6188, "step": 270 }, { "epoch": 0.55, "learning_rate": 9.829915398080205e-06, "loss": 0.6096, "step": 271 }, { "epoch": 0.56, "learning_rate": 9.82819425064012e-06, "loss": 0.6049, "step": 272 }, { "epoch": 0.56, "learning_rate": 9.826464590742946e-06, "loss": 0.6315, "step": 273 }, { "epoch": 0.56, "learning_rate": 9.824726421438202e-06, "loss": 0.5657, "step": 274 }, { "epoch": 0.56, "learning_rate": 9.822979745790406e-06, "loss": 0.5693, "step": 275 }, { "epoch": 0.56, "learning_rate": 9.821224566879072e-06, "loss": 0.6177, "step": 276 }, { "epoch": 0.57, "learning_rate": 9.819460887798714e-06, "loss": 0.6594, "step": 277 }, { "epoch": 0.57, "learning_rate": 9.817688711658818e-06, "loss": 0.5297, "step": 278 }, { "epoch": 0.57, "learning_rate": 9.815908041583865e-06, "loss": 0.5616, "step": 279 }, { "epoch": 0.57, "learning_rate": 9.814118880713304e-06, "loss": 0.6024, "step": 280 }, { "epoch": 0.58, "learning_rate": 9.812321232201555e-06, "loss": 0.5257, "step": 281 }, { "epoch": 0.58, "learning_rate": 9.810515099218004e-06, "loss": 0.6291, "step": 282 }, { "epoch": 0.58, "learning_rate": 9.808700484946994e-06, "loss": 0.5729, "step": 283 }, { "epoch": 0.58, "learning_rate": 9.80687739258782e-06, "loss": 0.5889, "step": 284 }, { "epoch": 0.58, "learning_rate": 9.805045825354728e-06, "loss": 0.5888, "step": 285 }, { "epoch": 0.59, "learning_rate": 9.803205786476907e-06, "loss": 0.6354, "step": 286 }, { "epoch": 0.59, "learning_rate": 9.801357279198473e-06, "loss": 0.5026, "step": 287 }, { "epoch": 0.59, "learning_rate": 9.799500306778483e-06, "loss": 0.5485, "step": 288 }, { "epoch": 0.59, "learning_rate": 9.797634872490913e-06, "loss": 0.534, "step": 289 }, { "epoch": 0.59, "learning_rate": 9.795760979624658e-06, "loss": 0.6189, "step": 290 }, { "epoch": 0.6, "learning_rate": 9.79387863148353e-06, "loss": 0.6162, "step": 291 }, { "epoch": 0.6, "learning_rate": 9.791987831386241e-06, "loss": 0.5305, "step": 292 }, { "epoch": 0.6, "learning_rate": 9.790088582666412e-06, "loss": 0.5872, "step": 293 }, { "epoch": 0.6, "learning_rate": 9.788180888672558e-06, "loss": 0.5939, "step": 294 }, { "epoch": 0.6, "learning_rate": 9.78626475276808e-06, "loss": 0.6107, "step": 295 }, { "epoch": 0.61, "learning_rate": 9.784340178331264e-06, "loss": 0.6448, "step": 296 }, { "epoch": 0.61, "learning_rate": 9.782407168755274e-06, "loss": 0.5683, "step": 297 }, { "epoch": 0.61, "learning_rate": 9.78046572744815e-06, "loss": 0.6133, "step": 298 }, { "epoch": 0.61, "learning_rate": 9.778515857832789e-06, "loss": 0.5174, "step": 299 }, { "epoch": 0.61, "learning_rate": 9.776557563346957e-06, "loss": 0.5426, "step": 300 }, { "epoch": 0.62, "learning_rate": 9.774590847443268e-06, "loss": 0.5982, "step": 301 }, { "epoch": 0.62, "learning_rate": 9.772615713589184e-06, "loss": 0.5162, "step": 302 }, { "epoch": 0.62, "learning_rate": 9.770632165267009e-06, "loss": 0.5761, "step": 303 }, { "epoch": 0.62, "learning_rate": 9.768640205973885e-06, "loss": 0.5686, "step": 304 }, { "epoch": 0.62, "learning_rate": 9.76663983922178e-06, "loss": 0.542, "step": 305 }, { "epoch": 0.63, "learning_rate": 9.764631068537484e-06, "loss": 0.5851, "step": 306 }, { "epoch": 0.63, "learning_rate": 9.76261389746261e-06, "loss": 0.6023, "step": 307 }, { "epoch": 0.63, "learning_rate": 9.76058832955357e-06, "loss": 0.5793, "step": 308 }, { "epoch": 0.63, "learning_rate": 9.758554368381596e-06, "loss": 0.5933, "step": 309 }, { "epoch": 0.63, "learning_rate": 9.756512017532703e-06, "loss": 0.5559, "step": 310 }, { "epoch": 0.64, "learning_rate": 9.754461280607705e-06, "loss": 0.6319, "step": 311 }, { "epoch": 0.64, "learning_rate": 9.7524021612222e-06, "loss": 0.5846, "step": 312 }, { "epoch": 0.64, "learning_rate": 9.750334663006566e-06, "loss": 0.6373, "step": 313 }, { "epoch": 0.64, "learning_rate": 9.748258789605954e-06, "loss": 0.5273, "step": 314 }, { "epoch": 0.64, "learning_rate": 9.746174544680278e-06, "loss": 0.5791, "step": 315 }, { "epoch": 0.65, "learning_rate": 9.74408193190421e-06, "loss": 0.6071, "step": 316 }, { "epoch": 0.65, "learning_rate": 9.741980954967186e-06, "loss": 0.5951, "step": 317 }, { "epoch": 0.65, "learning_rate": 9.739871617573372e-06, "loss": 0.5656, "step": 318 }, { "epoch": 0.65, "learning_rate": 9.737753923441689e-06, "loss": 0.6172, "step": 319 }, { "epoch": 0.65, "learning_rate": 9.735627876305782e-06, "loss": 0.5975, "step": 320 }, { "epoch": 0.66, "learning_rate": 9.733493479914031e-06, "loss": 0.5788, "step": 321 }, { "epoch": 0.66, "learning_rate": 9.731350738029527e-06, "loss": 0.6118, "step": 322 }, { "epoch": 0.66, "learning_rate": 9.729199654430083e-06, "loss": 0.5779, "step": 323 }, { "epoch": 0.66, "learning_rate": 9.727040232908214e-06, "loss": 0.5732, "step": 324 }, { "epoch": 0.67, "learning_rate": 9.724872477271135e-06, "loss": 0.5887, "step": 325 }, { "epoch": 0.67, "learning_rate": 9.722696391340762e-06, "loss": 0.5703, "step": 326 }, { "epoch": 0.67, "learning_rate": 9.720511978953687e-06, "loss": 0.5983, "step": 327 }, { "epoch": 0.67, "learning_rate": 9.718319243961191e-06, "loss": 0.6587, "step": 328 }, { "epoch": 0.67, "learning_rate": 9.716118190229225e-06, "loss": 0.5839, "step": 329 }, { "epoch": 0.68, "learning_rate": 9.713908821638402e-06, "loss": 0.5759, "step": 330 }, { "epoch": 0.68, "learning_rate": 9.711691142084006e-06, "loss": 0.5759, "step": 331 }, { "epoch": 0.68, "learning_rate": 9.709465155475964e-06, "loss": 0.5814, "step": 332 }, { "epoch": 0.68, "learning_rate": 9.70723086573885e-06, "loss": 0.5827, "step": 333 }, { "epoch": 0.68, "learning_rate": 9.704988276811883e-06, "loss": 0.5535, "step": 334 }, { "epoch": 0.69, "learning_rate": 9.702737392648908e-06, "loss": 0.6675, "step": 335 }, { "epoch": 0.69, "learning_rate": 9.700478217218397e-06, "loss": 0.6807, "step": 336 }, { "epoch": 0.69, "learning_rate": 9.69821075450344e-06, "loss": 0.6128, "step": 337 }, { "epoch": 0.69, "learning_rate": 9.69593500850174e-06, "loss": 0.6097, "step": 338 }, { "epoch": 0.69, "learning_rate": 9.693650983225603e-06, "loss": 0.5696, "step": 339 }, { "epoch": 0.7, "learning_rate": 9.691358682701927e-06, "loss": 0.5591, "step": 340 }, { "epoch": 0.7, "learning_rate": 9.68905811097221e-06, "loss": 0.6614, "step": 341 }, { "epoch": 0.7, "learning_rate": 9.686749272092523e-06, "loss": 0.6147, "step": 342 }, { "epoch": 0.7, "learning_rate": 9.684432170133517e-06, "loss": 0.551, "step": 343 }, { "epoch": 0.7, "learning_rate": 9.682106809180413e-06, "loss": 0.5752, "step": 344 }, { "epoch": 0.71, "learning_rate": 9.679773193332991e-06, "loss": 0.6045, "step": 345 }, { "epoch": 0.71, "learning_rate": 9.677431326705585e-06, "loss": 0.5113, "step": 346 }, { "epoch": 0.71, "learning_rate": 9.675081213427076e-06, "loss": 0.5594, "step": 347 }, { "epoch": 0.71, "learning_rate": 9.672722857640881e-06, "loss": 0.5765, "step": 348 }, { "epoch": 0.71, "learning_rate": 9.670356263504956e-06, "loss": 0.6392, "step": 349 }, { "epoch": 0.72, "learning_rate": 9.66798143519178e-06, "loss": 0.5873, "step": 350 }, { "epoch": 0.72, "learning_rate": 9.665598376888344e-06, "loss": 0.6056, "step": 351 }, { "epoch": 0.72, "learning_rate": 9.663207092796152e-06, "loss": 0.6466, "step": 352 }, { "epoch": 0.72, "learning_rate": 9.660807587131216e-06, "loss": 0.6057, "step": 353 }, { "epoch": 0.72, "learning_rate": 9.658399864124037e-06, "loss": 0.6125, "step": 354 }, { "epoch": 0.73, "learning_rate": 9.655983928019604e-06, "loss": 0.5623, "step": 355 }, { "epoch": 0.73, "learning_rate": 9.65355978307739e-06, "loss": 0.6181, "step": 356 }, { "epoch": 0.73, "learning_rate": 9.651127433571336e-06, "loss": 0.5794, "step": 357 }, { "epoch": 0.73, "learning_rate": 9.648686883789854e-06, "loss": 0.5335, "step": 358 }, { "epoch": 0.73, "learning_rate": 9.646238138035806e-06, "loss": 0.5598, "step": 359 }, { "epoch": 0.74, "learning_rate": 9.643781200626512e-06, "loss": 0.661, "step": 360 }, { "epoch": 0.74, "learning_rate": 9.641316075893731e-06, "loss": 0.5399, "step": 361 }, { "epoch": 0.74, "learning_rate": 9.638842768183656e-06, "loss": 0.5537, "step": 362 }, { "epoch": 0.74, "learning_rate": 9.63636128185691e-06, "loss": 0.5558, "step": 363 }, { "epoch": 0.74, "learning_rate": 9.63387162128853e-06, "loss": 0.6492, "step": 364 }, { "epoch": 0.75, "learning_rate": 9.63137379086797e-06, "loss": 0.5655, "step": 365 }, { "epoch": 0.75, "learning_rate": 9.628867794999088e-06, "loss": 0.6258, "step": 366 }, { "epoch": 0.75, "learning_rate": 9.626353638100136e-06, "loss": 0.549, "step": 367 }, { "epoch": 0.75, "learning_rate": 9.623831324603755e-06, "loss": 0.5519, "step": 368 }, { "epoch": 0.76, "learning_rate": 9.621300858956963e-06, "loss": 0.586, "step": 369 }, { "epoch": 0.76, "learning_rate": 9.618762245621162e-06, "loss": 0.5578, "step": 370 }, { "epoch": 0.76, "learning_rate": 9.616215489072106e-06, "loss": 0.5673, "step": 371 }, { "epoch": 0.76, "learning_rate": 9.613660593799915e-06, "loss": 0.5978, "step": 372 }, { "epoch": 0.76, "learning_rate": 9.611097564309054e-06, "loss": 0.595, "step": 373 }, { "epoch": 0.77, "learning_rate": 9.608526405118327e-06, "loss": 0.5044, "step": 374 }, { "epoch": 0.77, "learning_rate": 9.605947120760878e-06, "loss": 0.5456, "step": 375 }, { "epoch": 0.77, "learning_rate": 9.603359715784174e-06, "loss": 0.5637, "step": 376 }, { "epoch": 0.77, "learning_rate": 9.600764194749993e-06, "loss": 0.5576, "step": 377 }, { "epoch": 0.77, "learning_rate": 9.598160562234429e-06, "loss": 0.6121, "step": 378 }, { "epoch": 0.78, "learning_rate": 9.595548822827877e-06, "loss": 0.5872, "step": 379 }, { "epoch": 0.78, "learning_rate": 9.592928981135023e-06, "loss": 0.572, "step": 380 }, { "epoch": 0.78, "learning_rate": 9.590301041774836e-06, "loss": 0.4991, "step": 381 }, { "epoch": 0.78, "learning_rate": 9.587665009380565e-06, "loss": 0.5545, "step": 382 }, { "epoch": 0.78, "learning_rate": 9.585020888599727e-06, "loss": 0.5612, "step": 383 }, { "epoch": 0.79, "learning_rate": 9.582368684094099e-06, "loss": 0.5659, "step": 384 }, { "epoch": 0.79, "learning_rate": 9.57970840053971e-06, "loss": 0.5585, "step": 385 }, { "epoch": 0.79, "learning_rate": 9.577040042626832e-06, "loss": 0.586, "step": 386 }, { "epoch": 0.79, "learning_rate": 9.574363615059976e-06, "loss": 0.5158, "step": 387 }, { "epoch": 0.79, "learning_rate": 9.571679122557876e-06, "loss": 0.5666, "step": 388 }, { "epoch": 0.8, "learning_rate": 9.568986569853487e-06, "loss": 0.5659, "step": 389 }, { "epoch": 0.8, "learning_rate": 9.566285961693977e-06, "loss": 0.5403, "step": 390 }, { "epoch": 0.8, "learning_rate": 9.563577302840714e-06, "loss": 0.5557, "step": 391 }, { "epoch": 0.8, "learning_rate": 9.560860598069258e-06, "loss": 0.5641, "step": 392 }, { "epoch": 0.8, "learning_rate": 9.558135852169358e-06, "loss": 0.6047, "step": 393 }, { "epoch": 0.81, "learning_rate": 9.555403069944942e-06, "loss": 0.5842, "step": 394 }, { "epoch": 0.81, "learning_rate": 9.552662256214097e-06, "loss": 0.5659, "step": 395 }, { "epoch": 0.81, "learning_rate": 9.549913415809084e-06, "loss": 0.5804, "step": 396 }, { "epoch": 0.81, "learning_rate": 9.547156553576303e-06, "loss": 0.5723, "step": 397 }, { "epoch": 0.81, "learning_rate": 9.544391674376306e-06, "loss": 0.552, "step": 398 }, { "epoch": 0.82, "learning_rate": 9.54161878308377e-06, "loss": 0.5845, "step": 399 }, { "epoch": 0.82, "learning_rate": 9.53883788458751e-06, "loss": 0.6074, "step": 400 }, { "epoch": 0.82, "learning_rate": 9.53604898379045e-06, "loss": 0.5706, "step": 401 }, { "epoch": 0.82, "learning_rate": 9.533252085609623e-06, "loss": 0.5215, "step": 402 }, { "epoch": 0.82, "learning_rate": 9.530447194976164e-06, "loss": 0.5783, "step": 403 }, { "epoch": 0.83, "learning_rate": 9.5276343168353e-06, "loss": 0.5655, "step": 404 }, { "epoch": 0.83, "learning_rate": 9.52481345614634e-06, "loss": 0.5814, "step": 405 }, { "epoch": 0.83, "learning_rate": 9.521984617882665e-06, "loss": 0.6218, "step": 406 }, { "epoch": 0.83, "learning_rate": 9.519147807031722e-06, "loss": 0.5591, "step": 407 }, { "epoch": 0.83, "learning_rate": 9.516303028595015e-06, "loss": 0.5296, "step": 408 }, { "epoch": 0.84, "learning_rate": 9.513450287588097e-06, "loss": 0.5285, "step": 409 }, { "epoch": 0.84, "learning_rate": 9.510589589040554e-06, "loss": 0.5757, "step": 410 }, { "epoch": 0.84, "learning_rate": 9.507720937996007e-06, "loss": 0.5435, "step": 411 }, { "epoch": 0.84, "learning_rate": 9.504844339512096e-06, "loss": 0.5691, "step": 412 }, { "epoch": 0.85, "learning_rate": 9.501959798660474e-06, "loss": 0.5382, "step": 413 }, { "epoch": 0.85, "learning_rate": 9.499067320526793e-06, "loss": 0.5761, "step": 414 }, { "epoch": 0.85, "learning_rate": 9.496166910210706e-06, "loss": 0.5986, "step": 415 }, { "epoch": 0.85, "learning_rate": 9.493258572825846e-06, "loss": 0.567, "step": 416 }, { "epoch": 0.85, "learning_rate": 9.49034231349982e-06, "loss": 0.5547, "step": 417 }, { "epoch": 0.86, "learning_rate": 9.487418137374208e-06, "loss": 0.5822, "step": 418 }, { "epoch": 0.86, "learning_rate": 9.484486049604544e-06, "loss": 0.6069, "step": 419 }, { "epoch": 0.86, "learning_rate": 9.481546055360311e-06, "loss": 0.5195, "step": 420 }, { "epoch": 0.86, "learning_rate": 9.478598159824935e-06, "loss": 0.5267, "step": 421 }, { "epoch": 0.86, "learning_rate": 9.475642368195765e-06, "loss": 0.5303, "step": 422 }, { "epoch": 0.87, "learning_rate": 9.47267868568408e-06, "loss": 0.5138, "step": 423 }, { "epoch": 0.87, "learning_rate": 9.469707117515068e-06, "loss": 0.5699, "step": 424 }, { "epoch": 0.87, "learning_rate": 9.466727668927817e-06, "loss": 0.5274, "step": 425 }, { "epoch": 0.87, "learning_rate": 9.463740345175311e-06, "loss": 0.5769, "step": 426 }, { "epoch": 0.87, "learning_rate": 9.460745151524422e-06, "loss": 0.552, "step": 427 }, { "epoch": 0.88, "learning_rate": 9.457742093255892e-06, "loss": 0.5646, "step": 428 }, { "epoch": 0.88, "learning_rate": 9.454731175664331e-06, "loss": 0.55, "step": 429 }, { "epoch": 0.88, "learning_rate": 9.451712404058205e-06, "loss": 0.6068, "step": 430 }, { "epoch": 0.88, "learning_rate": 9.448685783759825e-06, "loss": 0.5599, "step": 431 }, { "epoch": 0.88, "learning_rate": 9.44565132010535e-06, "loss": 0.5309, "step": 432 }, { "epoch": 0.89, "learning_rate": 9.44260901844475e-06, "loss": 0.5534, "step": 433 }, { "epoch": 0.89, "learning_rate": 9.43955888414183e-06, "loss": 0.585, "step": 434 }, { "epoch": 0.89, "learning_rate": 9.436500922574198e-06, "loss": 0.5664, "step": 435 }, { "epoch": 0.89, "learning_rate": 9.433435139133262e-06, "loss": 0.5303, "step": 436 }, { "epoch": 0.89, "learning_rate": 9.43036153922422e-06, "loss": 0.5545, "step": 437 }, { "epoch": 0.9, "learning_rate": 9.427280128266049e-06, "loss": 0.6481, "step": 438 }, { "epoch": 0.9, "learning_rate": 9.424190911691507e-06, "loss": 0.6095, "step": 439 }, { "epoch": 0.9, "learning_rate": 9.4210938949471e-06, "loss": 0.5583, "step": 440 }, { "epoch": 0.9, "learning_rate": 9.4179890834931e-06, "loss": 0.572, "step": 441 }, { "epoch": 0.9, "learning_rate": 9.41487648280351e-06, "loss": 0.5717, "step": 442 }, { "epoch": 0.91, "learning_rate": 9.411756098366073e-06, "loss": 0.5764, "step": 443 }, { "epoch": 0.91, "learning_rate": 9.408627935682252e-06, "loss": 0.5712, "step": 444 }, { "epoch": 0.91, "learning_rate": 9.405492000267228e-06, "loss": 0.5493, "step": 445 }, { "epoch": 0.91, "learning_rate": 9.402348297649881e-06, "loss": 0.5403, "step": 446 }, { "epoch": 0.91, "learning_rate": 9.399196833372788e-06, "loss": 0.587, "step": 447 }, { "epoch": 0.92, "learning_rate": 9.396037612992208e-06, "loss": 0.5556, "step": 448 }, { "epoch": 0.92, "learning_rate": 9.392870642078076e-06, "loss": 0.4806, "step": 449 }, { "epoch": 0.92, "learning_rate": 9.389695926213993e-06, "loss": 0.5284, "step": 450 }, { "epoch": 0.92, "learning_rate": 9.38651347099721e-06, "loss": 0.5314, "step": 451 }, { "epoch": 0.93, "learning_rate": 9.383323282038632e-06, "loss": 0.5415, "step": 452 }, { "epoch": 0.93, "learning_rate": 9.38012536496279e-06, "loss": 0.5519, "step": 453 }, { "epoch": 0.93, "learning_rate": 9.376919725407847e-06, "loss": 0.5551, "step": 454 }, { "epoch": 0.93, "learning_rate": 9.373706369025575e-06, "loss": 0.5659, "step": 455 }, { "epoch": 0.93, "learning_rate": 9.370485301481357e-06, "loss": 0.5632, "step": 456 }, { "epoch": 0.94, "learning_rate": 9.367256528454168e-06, "loss": 0.5873, "step": 457 }, { "epoch": 0.94, "learning_rate": 9.36402005563657e-06, "loss": 0.577, "step": 458 }, { "epoch": 0.94, "learning_rate": 9.360775888734699e-06, "loss": 0.5237, "step": 459 }, { "epoch": 0.94, "learning_rate": 9.357524033468258e-06, "loss": 0.5018, "step": 460 }, { "epoch": 0.94, "learning_rate": 9.354264495570501e-06, "loss": 0.5267, "step": 461 }, { "epoch": 0.95, "learning_rate": 9.350997280788231e-06, "loss": 0.5551, "step": 462 }, { "epoch": 0.95, "learning_rate": 9.34772239488179e-06, "loss": 0.5382, "step": 463 }, { "epoch": 0.95, "learning_rate": 9.344439843625034e-06, "loss": 0.5559, "step": 464 }, { "epoch": 0.95, "learning_rate": 9.34114963280534e-06, "loss": 0.5173, "step": 465 }, { "epoch": 0.95, "learning_rate": 9.337851768223589e-06, "loss": 0.539, "step": 466 }, { "epoch": 0.96, "learning_rate": 9.334546255694159e-06, "loss": 0.5219, "step": 467 }, { "epoch": 0.96, "learning_rate": 9.331233101044903e-06, "loss": 0.5724, "step": 468 }, { "epoch": 0.96, "learning_rate": 9.327912310117158e-06, "loss": 0.5259, "step": 469 }, { "epoch": 0.96, "learning_rate": 9.324583888765717e-06, "loss": 0.5099, "step": 470 }, { "epoch": 0.96, "learning_rate": 9.32124784285883e-06, "loss": 0.5503, "step": 471 }, { "epoch": 0.97, "learning_rate": 9.317904178278186e-06, "loss": 0.5615, "step": 472 }, { "epoch": 0.97, "learning_rate": 9.31455290091891e-06, "loss": 0.5714, "step": 473 }, { "epoch": 0.97, "learning_rate": 9.31119401668955e-06, "loss": 0.537, "step": 474 }, { "epoch": 0.97, "learning_rate": 9.307827531512058e-06, "loss": 0.5303, "step": 475 }, { "epoch": 0.97, "learning_rate": 9.304453451321792e-06, "loss": 0.5682, "step": 476 }, { "epoch": 0.98, "learning_rate": 9.301071782067504e-06, "loss": 0.5433, "step": 477 }, { "epoch": 0.98, "learning_rate": 9.29768252971132e-06, "loss": 0.5386, "step": 478 }, { "epoch": 0.98, "learning_rate": 9.294285700228742e-06, "loss": 0.5346, "step": 479 }, { "epoch": 0.98, "learning_rate": 9.29088129960862e-06, "loss": 0.5894, "step": 480 }, { "epoch": 0.98, "learning_rate": 9.287469333853165e-06, "loss": 0.5771, "step": 481 }, { "epoch": 0.99, "learning_rate": 9.284049808977919e-06, "loss": 0.6191, "step": 482 }, { "epoch": 0.99, "learning_rate": 9.280622731011754e-06, "loss": 0.5543, "step": 483 }, { "epoch": 0.99, "learning_rate": 9.277188105996853e-06, "loss": 0.5242, "step": 484 }, { "epoch": 0.99, "learning_rate": 9.273745939988715e-06, "loss": 0.5108, "step": 485 }, { "epoch": 0.99, "learning_rate": 9.270296239056122e-06, "loss": 0.5528, "step": 486 }, { "epoch": 1.0, "learning_rate": 9.266839009281154e-06, "loss": 0.5583, "step": 487 }, { "epoch": 1.0, "learning_rate": 9.263374256759155e-06, "loss": 0.5203, "step": 488 }, { "epoch": 1.0, "learning_rate": 9.259901987598734e-06, "loss": 0.43, "step": 489 }, { "epoch": 1.0, "learning_rate": 9.256422207921757e-06, "loss": 0.4095, "step": 490 }, { "epoch": 1.0, "learning_rate": 9.252934923863325e-06, "loss": 0.3913, "step": 491 }, { "epoch": 1.01, "learning_rate": 9.249440141571775e-06, "loss": 0.4195, "step": 492 }, { "epoch": 1.01, "learning_rate": 9.245937867208662e-06, "loss": 0.4101, "step": 493 }, { "epoch": 1.01, "learning_rate": 9.242428106948748e-06, "loss": 0.3789, "step": 494 }, { "epoch": 1.01, "learning_rate": 9.23891086698e-06, "loss": 0.399, "step": 495 }, { "epoch": 1.02, "learning_rate": 9.235386153503561e-06, "loss": 0.4332, "step": 496 }, { "epoch": 1.02, "learning_rate": 9.231853972733762e-06, "loss": 0.3863, "step": 497 }, { "epoch": 1.02, "learning_rate": 9.22831433089809e-06, "loss": 0.3602, "step": 498 }, { "epoch": 1.02, "learning_rate": 9.224767234237195e-06, "loss": 0.3808, "step": 499 }, { "epoch": 1.02, "learning_rate": 9.221212689004863e-06, "loss": 0.3727, "step": 500 }, { "epoch": 1.03, "learning_rate": 9.217650701468016e-06, "loss": 0.4253, "step": 501 }, { "epoch": 1.03, "learning_rate": 9.214081277906697e-06, "loss": 0.3816, "step": 502 }, { "epoch": 1.03, "learning_rate": 9.21050442461406e-06, "loss": 0.346, "step": 503 }, { "epoch": 1.03, "learning_rate": 9.206920147896355e-06, "loss": 0.3737, "step": 504 }, { "epoch": 1.03, "learning_rate": 9.203328454072923e-06, "loss": 0.3961, "step": 505 }, { "epoch": 1.04, "learning_rate": 9.19972934947618e-06, "loss": 0.3944, "step": 506 }, { "epoch": 1.04, "learning_rate": 9.196122840451611e-06, "loss": 0.3726, "step": 507 }, { "epoch": 1.04, "learning_rate": 9.192508933357753e-06, "loss": 0.3798, "step": 508 }, { "epoch": 1.04, "learning_rate": 9.188887634566187e-06, "loss": 0.4185, "step": 509 }, { "epoch": 1.04, "learning_rate": 9.185258950461521e-06, "loss": 0.4229, "step": 510 }, { "epoch": 1.05, "learning_rate": 9.181622887441397e-06, "loss": 0.404, "step": 511 }, { "epoch": 1.05, "learning_rate": 9.17797945191645e-06, "loss": 0.3722, "step": 512 }, { "epoch": 1.05, "learning_rate": 9.174328650310324e-06, "loss": 0.3656, "step": 513 }, { "epoch": 1.05, "learning_rate": 9.17067048905965e-06, "loss": 0.3932, "step": 514 }, { "epoch": 1.05, "learning_rate": 9.16700497461403e-06, "loss": 0.4122, "step": 515 }, { "epoch": 1.06, "learning_rate": 9.163332113436031e-06, "loss": 0.3996, "step": 516 }, { "epoch": 1.06, "learning_rate": 9.159651912001178e-06, "loss": 0.3343, "step": 517 }, { "epoch": 1.06, "learning_rate": 9.15596437679793e-06, "loss": 0.3845, "step": 518 }, { "epoch": 1.06, "learning_rate": 9.152269514327678e-06, "loss": 0.3957, "step": 519 }, { "epoch": 1.06, "learning_rate": 9.148567331104736e-06, "loss": 0.352, "step": 520 }, { "epoch": 1.07, "learning_rate": 9.14485783365632e-06, "loss": 0.3776, "step": 521 }, { "epoch": 1.07, "learning_rate": 9.141141028522544e-06, "loss": 0.3941, "step": 522 }, { "epoch": 1.07, "learning_rate": 9.137416922256407e-06, "loss": 0.4041, "step": 523 }, { "epoch": 1.07, "learning_rate": 9.133685521423776e-06, "loss": 0.3633, "step": 524 }, { "epoch": 1.07, "learning_rate": 9.129946832603383e-06, "loss": 0.4231, "step": 525 }, { "epoch": 1.08, "learning_rate": 9.126200862386808e-06, "loss": 0.3766, "step": 526 }, { "epoch": 1.08, "learning_rate": 9.122447617378467e-06, "loss": 0.3487, "step": 527 }, { "epoch": 1.08, "learning_rate": 9.118687104195607e-06, "loss": 0.3891, "step": 528 }, { "epoch": 1.08, "learning_rate": 9.114919329468283e-06, "loss": 0.4305, "step": 529 }, { "epoch": 1.08, "learning_rate": 9.111144299839357e-06, "loss": 0.3928, "step": 530 }, { "epoch": 1.09, "learning_rate": 9.107362021964477e-06, "loss": 0.3616, "step": 531 }, { "epoch": 1.09, "learning_rate": 9.103572502512076e-06, "loss": 0.3676, "step": 532 }, { "epoch": 1.09, "learning_rate": 9.099775748163356e-06, "loss": 0.3829, "step": 533 }, { "epoch": 1.09, "learning_rate": 9.095971765612262e-06, "loss": 0.3843, "step": 534 }, { "epoch": 1.09, "learning_rate": 9.092160561565498e-06, "loss": 0.3782, "step": 535 }, { "epoch": 1.1, "learning_rate": 9.088342142742493e-06, "loss": 0.3788, "step": 536 }, { "epoch": 1.1, "learning_rate": 9.084516515875394e-06, "loss": 0.3837, "step": 537 }, { "epoch": 1.1, "learning_rate": 9.08068368770906e-06, "loss": 0.3996, "step": 538 }, { "epoch": 1.1, "learning_rate": 9.076843665001046e-06, "loss": 0.4126, "step": 539 }, { "epoch": 1.11, "learning_rate": 9.072996454521591e-06, "loss": 0.417, "step": 540 }, { "epoch": 1.11, "learning_rate": 9.069142063053606e-06, "loss": 0.4064, "step": 541 }, { "epoch": 1.11, "learning_rate": 9.065280497392663e-06, "loss": 0.3958, "step": 542 }, { "epoch": 1.11, "learning_rate": 9.061411764346983e-06, "loss": 0.3547, "step": 543 }, { "epoch": 1.11, "learning_rate": 9.057535870737423e-06, "loss": 0.3223, "step": 544 }, { "epoch": 1.12, "learning_rate": 9.053652823397465e-06, "loss": 0.4229, "step": 545 }, { "epoch": 1.12, "learning_rate": 9.049762629173204e-06, "loss": 0.3805, "step": 546 }, { "epoch": 1.12, "learning_rate": 9.045865294923333e-06, "loss": 0.3707, "step": 547 }, { "epoch": 1.12, "learning_rate": 9.04196082751914e-06, "loss": 0.4207, "step": 548 }, { "epoch": 1.12, "learning_rate": 9.038049233844476e-06, "loss": 0.3302, "step": 549 }, { "epoch": 1.13, "learning_rate": 9.034130520795774e-06, "loss": 0.3304, "step": 550 }, { "epoch": 1.13, "learning_rate": 9.030204695282004e-06, "loss": 0.4036, "step": 551 }, { "epoch": 1.13, "learning_rate": 9.026271764224683e-06, "loss": 0.405, "step": 552 }, { "epoch": 1.13, "learning_rate": 9.022331734557856e-06, "loss": 0.3878, "step": 553 }, { "epoch": 1.13, "learning_rate": 9.018384613228079e-06, "loss": 0.4104, "step": 554 }, { "epoch": 1.14, "learning_rate": 9.014430407194413e-06, "loss": 0.34, "step": 555 }, { "epoch": 1.14, "learning_rate": 9.010469123428413e-06, "loss": 0.3769, "step": 556 }, { "epoch": 1.14, "learning_rate": 9.006500768914106e-06, "loss": 0.3421, "step": 557 }, { "epoch": 1.14, "learning_rate": 9.002525350647995e-06, "loss": 0.4346, "step": 558 }, { "epoch": 1.14, "learning_rate": 8.998542875639025e-06, "loss": 0.3393, "step": 559 }, { "epoch": 1.15, "learning_rate": 8.994553350908594e-06, "loss": 0.387, "step": 560 }, { "epoch": 1.15, "learning_rate": 8.99055678349052e-06, "loss": 0.3626, "step": 561 }, { "epoch": 1.15, "learning_rate": 8.986553180431043e-06, "loss": 0.4028, "step": 562 }, { "epoch": 1.15, "learning_rate": 8.982542548788805e-06, "loss": 0.3904, "step": 563 }, { "epoch": 1.15, "learning_rate": 8.978524895634842e-06, "loss": 0.3817, "step": 564 }, { "epoch": 1.16, "learning_rate": 8.97450022805257e-06, "loss": 0.4056, "step": 565 }, { "epoch": 1.16, "learning_rate": 8.970468553137765e-06, "loss": 0.3192, "step": 566 }, { "epoch": 1.16, "learning_rate": 8.966429877998568e-06, "loss": 0.3544, "step": 567 }, { "epoch": 1.16, "learning_rate": 8.962384209755453e-06, "loss": 0.3848, "step": 568 }, { "epoch": 1.16, "learning_rate": 8.958331555541229e-06, "loss": 0.4019, "step": 569 }, { "epoch": 1.17, "learning_rate": 8.954271922501019e-06, "loss": 0.3318, "step": 570 }, { "epoch": 1.17, "learning_rate": 8.95020531779225e-06, "loss": 0.3318, "step": 571 }, { "epoch": 1.17, "learning_rate": 8.946131748584643e-06, "loss": 0.4275, "step": 572 }, { "epoch": 1.17, "learning_rate": 8.942051222060195e-06, "loss": 0.3833, "step": 573 }, { "epoch": 1.17, "learning_rate": 8.937963745413172e-06, "loss": 0.4169, "step": 574 }, { "epoch": 1.18, "learning_rate": 8.933869325850088e-06, "loss": 0.3857, "step": 575 }, { "epoch": 1.18, "learning_rate": 8.92976797058971e-06, "loss": 0.3988, "step": 576 }, { "epoch": 1.18, "learning_rate": 8.925659686863018e-06, "loss": 0.3683, "step": 577 }, { "epoch": 1.18, "learning_rate": 8.921544481913218e-06, "loss": 0.374, "step": 578 }, { "epoch": 1.18, "learning_rate": 8.917422362995714e-06, "loss": 0.3438, "step": 579 }, { "epoch": 1.19, "learning_rate": 8.913293337378099e-06, "loss": 0.3375, "step": 580 }, { "epoch": 1.19, "learning_rate": 8.90915741234015e-06, "loss": 0.3728, "step": 581 }, { "epoch": 1.19, "learning_rate": 8.9050145951738e-06, "loss": 0.3702, "step": 582 }, { "epoch": 1.19, "learning_rate": 8.900864893183136e-06, "loss": 0.3556, "step": 583 }, { "epoch": 1.2, "learning_rate": 8.896708313684386e-06, "loss": 0.4395, "step": 584 }, { "epoch": 1.2, "learning_rate": 8.892544864005899e-06, "loss": 0.3665, "step": 585 }, { "epoch": 1.2, "learning_rate": 8.888374551488141e-06, "loss": 0.3871, "step": 586 }, { "epoch": 1.2, "learning_rate": 8.884197383483676e-06, "loss": 0.3805, "step": 587 }, { "epoch": 1.2, "learning_rate": 8.880013367357153e-06, "loss": 0.4102, "step": 588 }, { "epoch": 1.21, "learning_rate": 8.875822510485298e-06, "loss": 0.3846, "step": 589 }, { "epoch": 1.21, "learning_rate": 8.871624820256895e-06, "loss": 0.3469, "step": 590 }, { "epoch": 1.21, "learning_rate": 8.867420304072775e-06, "loss": 0.3761, "step": 591 }, { "epoch": 1.21, "learning_rate": 8.86320896934581e-06, "loss": 0.3455, "step": 592 }, { "epoch": 1.21, "learning_rate": 8.858990823500885e-06, "loss": 0.332, "step": 593 }, { "epoch": 1.22, "learning_rate": 8.854765873974898e-06, "loss": 0.3608, "step": 594 }, { "epoch": 1.22, "learning_rate": 8.850534128216743e-06, "loss": 0.4233, "step": 595 }, { "epoch": 1.22, "learning_rate": 8.846295593687295e-06, "loss": 0.4056, "step": 596 }, { "epoch": 1.22, "learning_rate": 8.842050277859398e-06, "loss": 0.4021, "step": 597 }, { "epoch": 1.22, "learning_rate": 8.837798188217853e-06, "loss": 0.3946, "step": 598 }, { "epoch": 1.23, "learning_rate": 8.833539332259398e-06, "loss": 0.3393, "step": 599 }, { "epoch": 1.23, "learning_rate": 8.82927371749271e-06, "loss": 0.3807, "step": 600 }, { "epoch": 1.23, "learning_rate": 8.825001351438378e-06, "loss": 0.3663, "step": 601 }, { "epoch": 1.23, "learning_rate": 8.820722241628891e-06, "loss": 0.3618, "step": 602 }, { "epoch": 1.23, "learning_rate": 8.816436395608633e-06, "loss": 0.3865, "step": 603 }, { "epoch": 1.24, "learning_rate": 8.812143820933859e-06, "loss": 0.3026, "step": 604 }, { "epoch": 1.24, "learning_rate": 8.807844525172692e-06, "loss": 0.4093, "step": 605 }, { "epoch": 1.24, "learning_rate": 8.803538515905102e-06, "loss": 0.3282, "step": 606 }, { "epoch": 1.24, "learning_rate": 8.799225800722895e-06, "loss": 0.3441, "step": 607 }, { "epoch": 1.24, "learning_rate": 8.794906387229703e-06, "loss": 0.3741, "step": 608 }, { "epoch": 1.25, "learning_rate": 8.790580283040964e-06, "loss": 0.3889, "step": 609 }, { "epoch": 1.25, "learning_rate": 8.786247495783913e-06, "loss": 0.3418, "step": 610 }, { "epoch": 1.25, "learning_rate": 8.781908033097572e-06, "loss": 0.3701, "step": 611 }, { "epoch": 1.25, "learning_rate": 8.777561902632726e-06, "loss": 0.3805, "step": 612 }, { "epoch": 1.25, "learning_rate": 8.773209112051919e-06, "loss": 0.331, "step": 613 }, { "epoch": 1.26, "learning_rate": 8.768849669029438e-06, "loss": 0.4034, "step": 614 }, { "epoch": 1.26, "learning_rate": 8.764483581251297e-06, "loss": 0.3489, "step": 615 }, { "epoch": 1.26, "learning_rate": 8.760110856415226e-06, "loss": 0.401, "step": 616 }, { "epoch": 1.26, "learning_rate": 8.755731502230656e-06, "loss": 0.3789, "step": 617 }, { "epoch": 1.26, "learning_rate": 8.751345526418707e-06, "loss": 0.3119, "step": 618 }, { "epoch": 1.27, "learning_rate": 8.746952936712172e-06, "loss": 0.3428, "step": 619 }, { "epoch": 1.27, "learning_rate": 8.742553740855507e-06, "loss": 0.3796, "step": 620 }, { "epoch": 1.27, "learning_rate": 8.738147946604811e-06, "loss": 0.373, "step": 621 }, { "epoch": 1.27, "learning_rate": 8.733735561727824e-06, "loss": 0.3261, "step": 622 }, { "epoch": 1.28, "learning_rate": 8.729316594003896e-06, "loss": 0.3674, "step": 623 }, { "epoch": 1.28, "learning_rate": 8.724891051223991e-06, "loss": 0.3269, "step": 624 }, { "epoch": 1.28, "learning_rate": 8.720458941190662e-06, "loss": 0.386, "step": 625 }, { "epoch": 1.28, "learning_rate": 8.71602027171804e-06, "loss": 0.3816, "step": 626 }, { "epoch": 1.28, "learning_rate": 8.711575050631823e-06, "loss": 0.3863, "step": 627 }, { "epoch": 1.29, "learning_rate": 8.707123285769255e-06, "loss": 0.3619, "step": 628 }, { "epoch": 1.29, "learning_rate": 8.702664984979124e-06, "loss": 0.3692, "step": 629 }, { "epoch": 1.29, "learning_rate": 8.698200156121737e-06, "loss": 0.3746, "step": 630 }, { "epoch": 1.29, "learning_rate": 8.693728807068912e-06, "loss": 0.4184, "step": 631 }, { "epoch": 1.29, "learning_rate": 8.689250945703964e-06, "loss": 0.3654, "step": 632 }, { "epoch": 1.3, "learning_rate": 8.684766579921684e-06, "loss": 0.3812, "step": 633 }, { "epoch": 1.3, "learning_rate": 8.680275717628336e-06, "loss": 0.4212, "step": 634 }, { "epoch": 1.3, "learning_rate": 8.675778366741638e-06, "loss": 0.3903, "step": 635 }, { "epoch": 1.3, "learning_rate": 8.671274535190744e-06, "loss": 0.3518, "step": 636 }, { "epoch": 1.3, "learning_rate": 8.666764230916236e-06, "loss": 0.3946, "step": 637 }, { "epoch": 1.31, "learning_rate": 8.66224746187011e-06, "loss": 0.3992, "step": 638 }, { "epoch": 1.31, "learning_rate": 8.657724236015757e-06, "loss": 0.4097, "step": 639 }, { "epoch": 1.31, "learning_rate": 8.65319456132795e-06, "loss": 0.3729, "step": 640 }, { "epoch": 1.31, "learning_rate": 8.64865844579284e-06, "loss": 0.3332, "step": 641 }, { "epoch": 1.31, "learning_rate": 8.644115897407921e-06, "loss": 0.3731, "step": 642 }, { "epoch": 1.32, "learning_rate": 8.639566924182043e-06, "loss": 0.3792, "step": 643 }, { "epoch": 1.32, "learning_rate": 8.63501153413537e-06, "loss": 0.3908, "step": 644 }, { "epoch": 1.32, "learning_rate": 8.63044973529939e-06, "loss": 0.3885, "step": 645 }, { "epoch": 1.32, "learning_rate": 8.625881535716883e-06, "loss": 0.3661, "step": 646 }, { "epoch": 1.32, "learning_rate": 8.621306943441919e-06, "loss": 0.382, "step": 647 }, { "epoch": 1.33, "learning_rate": 8.616725966539831e-06, "loss": 0.3282, "step": 648 }, { "epoch": 1.33, "learning_rate": 8.61213861308722e-06, "loss": 0.3292, "step": 649 }, { "epoch": 1.33, "learning_rate": 8.607544891171921e-06, "loss": 0.3868, "step": 650 }, { "epoch": 1.33, "learning_rate": 8.602944808893e-06, "loss": 0.3622, "step": 651 }, { "epoch": 1.33, "learning_rate": 8.598338374360735e-06, "loss": 0.3712, "step": 652 }, { "epoch": 1.34, "learning_rate": 8.593725595696605e-06, "loss": 0.4036, "step": 653 }, { "epoch": 1.34, "learning_rate": 8.589106481033272e-06, "loss": 0.3755, "step": 654 }, { "epoch": 1.34, "learning_rate": 8.584481038514573e-06, "loss": 0.3472, "step": 655 }, { "epoch": 1.34, "learning_rate": 8.579849276295501e-06, "loss": 0.3652, "step": 656 }, { "epoch": 1.34, "learning_rate": 8.575211202542185e-06, "loss": 0.3032, "step": 657 }, { "epoch": 1.35, "learning_rate": 8.570566825431887e-06, "loss": 0.4141, "step": 658 }, { "epoch": 1.35, "learning_rate": 8.565916153152982e-06, "loss": 0.371, "step": 659 }, { "epoch": 1.35, "learning_rate": 8.561259193904942e-06, "loss": 0.359, "step": 660 }, { "epoch": 1.35, "learning_rate": 8.556595955898326e-06, "loss": 0.3605, "step": 661 }, { "epoch": 1.35, "learning_rate": 8.551926447354759e-06, "loss": 0.3933, "step": 662 }, { "epoch": 1.36, "learning_rate": 8.547250676506926e-06, "loss": 0.3439, "step": 663 }, { "epoch": 1.36, "learning_rate": 8.542568651598547e-06, "loss": 0.358, "step": 664 }, { "epoch": 1.36, "learning_rate": 8.537880380884376e-06, "loss": 0.3542, "step": 665 }, { "epoch": 1.36, "learning_rate": 8.533185872630172e-06, "loss": 0.4072, "step": 666 }, { "epoch": 1.37, "learning_rate": 8.528485135112696e-06, "loss": 0.4065, "step": 667 }, { "epoch": 1.37, "learning_rate": 8.523778176619688e-06, "loss": 0.3544, "step": 668 }, { "epoch": 1.37, "learning_rate": 8.519065005449858e-06, "loss": 0.3352, "step": 669 }, { "epoch": 1.37, "learning_rate": 8.51434562991287e-06, "loss": 0.3704, "step": 670 }, { "epoch": 1.37, "learning_rate": 8.509620058329325e-06, "loss": 0.3517, "step": 671 }, { "epoch": 1.38, "learning_rate": 8.504888299030748e-06, "loss": 0.3777, "step": 672 }, { "epoch": 1.38, "learning_rate": 8.500150360359576e-06, "loss": 0.3468, "step": 673 }, { "epoch": 1.38, "learning_rate": 8.495406250669138e-06, "loss": 0.3418, "step": 674 }, { "epoch": 1.38, "learning_rate": 8.490655978323644e-06, "loss": 0.3766, "step": 675 }, { "epoch": 1.38, "learning_rate": 8.485899551698166e-06, "loss": 0.3854, "step": 676 }, { "epoch": 1.39, "learning_rate": 8.481136979178636e-06, "loss": 0.366, "step": 677 }, { "epoch": 1.39, "learning_rate": 8.476368269161812e-06, "loss": 0.3965, "step": 678 }, { "epoch": 1.39, "learning_rate": 8.471593430055276e-06, "loss": 0.3348, "step": 679 }, { "epoch": 1.39, "learning_rate": 8.466812470277415e-06, "loss": 0.4073, "step": 680 }, { "epoch": 1.39, "learning_rate": 8.46202539825741e-06, "loss": 0.3152, "step": 681 }, { "epoch": 1.4, "learning_rate": 8.457232222435216e-06, "loss": 0.3885, "step": 682 }, { "epoch": 1.4, "learning_rate": 8.452432951261549e-06, "loss": 0.3521, "step": 683 }, { "epoch": 1.4, "learning_rate": 8.447627593197875e-06, "loss": 0.4161, "step": 684 }, { "epoch": 1.4, "learning_rate": 8.442816156716386e-06, "loss": 0.3391, "step": 685 }, { "epoch": 1.4, "learning_rate": 8.437998650299997e-06, "loss": 0.3419, "step": 686 }, { "epoch": 1.41, "learning_rate": 8.433175082442319e-06, "loss": 0.3949, "step": 687 }, { "epoch": 1.41, "learning_rate": 8.428345461647656e-06, "loss": 0.3682, "step": 688 }, { "epoch": 1.41, "learning_rate": 8.423509796430978e-06, "loss": 0.3778, "step": 689 }, { "epoch": 1.41, "learning_rate": 8.418668095317912e-06, "loss": 0.3848, "step": 690 }, { "epoch": 1.41, "learning_rate": 8.413820366844732e-06, "loss": 0.3128, "step": 691 }, { "epoch": 1.42, "learning_rate": 8.408966619558332e-06, "loss": 0.389, "step": 692 }, { "epoch": 1.42, "learning_rate": 8.404106862016226e-06, "loss": 0.3975, "step": 693 }, { "epoch": 1.42, "learning_rate": 8.399241102786514e-06, "loss": 0.3712, "step": 694 }, { "epoch": 1.42, "learning_rate": 8.394369350447885e-06, "loss": 0.3946, "step": 695 }, { "epoch": 1.42, "learning_rate": 8.389491613589593e-06, "loss": 0.3895, "step": 696 }, { "epoch": 1.43, "learning_rate": 8.384607900811442e-06, "loss": 0.3736, "step": 697 }, { "epoch": 1.43, "learning_rate": 8.379718220723772e-06, "loss": 0.3794, "step": 698 }, { "epoch": 1.43, "learning_rate": 8.374822581947444e-06, "loss": 0.3321, "step": 699 }, { "epoch": 1.43, "learning_rate": 8.369920993113825e-06, "loss": 0.3849, "step": 700 }, { "epoch": 1.43, "learning_rate": 8.365013462864774e-06, "loss": 0.3775, "step": 701 }, { "epoch": 1.44, "learning_rate": 8.360099999852616e-06, "loss": 0.414, "step": 702 }, { "epoch": 1.44, "learning_rate": 8.35518061274015e-06, "loss": 0.3829, "step": 703 }, { "epoch": 1.44, "learning_rate": 8.350255310200611e-06, "loss": 0.3106, "step": 704 }, { "epoch": 1.44, "learning_rate": 8.345324100917667e-06, "loss": 0.366, "step": 705 }, { "epoch": 1.44, "learning_rate": 8.340386993585394e-06, "loss": 0.3634, "step": 706 }, { "epoch": 1.45, "learning_rate": 8.335443996908274e-06, "loss": 0.3931, "step": 707 }, { "epoch": 1.45, "learning_rate": 8.330495119601168e-06, "loss": 0.3576, "step": 708 }, { "epoch": 1.45, "learning_rate": 8.325540370389303e-06, "loss": 0.3158, "step": 709 }, { "epoch": 1.45, "learning_rate": 8.320579758008267e-06, "loss": 0.3655, "step": 710 }, { "epoch": 1.46, "learning_rate": 8.315613291203977e-06, "loss": 0.4109, "step": 711 }, { "epoch": 1.46, "learning_rate": 8.310640978732677e-06, "loss": 0.3472, "step": 712 }, { "epoch": 1.46, "learning_rate": 8.30566282936091e-06, "loss": 0.3613, "step": 713 }, { "epoch": 1.46, "learning_rate": 8.300678851865517e-06, "loss": 0.3895, "step": 714 }, { "epoch": 1.46, "learning_rate": 8.295689055033615e-06, "loss": 0.33, "step": 715 }, { "epoch": 1.47, "learning_rate": 8.290693447662577e-06, "loss": 0.3652, "step": 716 }, { "epoch": 1.47, "learning_rate": 8.28569203856002e-06, "loss": 0.3682, "step": 717 }, { "epoch": 1.47, "learning_rate": 8.280684836543794e-06, "loss": 0.3132, "step": 718 }, { "epoch": 1.47, "learning_rate": 8.275671850441957e-06, "loss": 0.3792, "step": 719 }, { "epoch": 1.47, "learning_rate": 8.270653089092769e-06, "loss": 0.345, "step": 720 }, { "epoch": 1.48, "learning_rate": 8.26562856134467e-06, "loss": 0.3467, "step": 721 }, { "epoch": 1.48, "learning_rate": 8.260598276056269e-06, "loss": 0.4121, "step": 722 }, { "epoch": 1.48, "learning_rate": 8.255562242096321e-06, "loss": 0.3836, "step": 723 }, { "epoch": 1.48, "learning_rate": 8.250520468343722e-06, "loss": 0.3368, "step": 724 }, { "epoch": 1.48, "learning_rate": 8.245472963687484e-06, "loss": 0.3816, "step": 725 }, { "epoch": 1.49, "learning_rate": 8.240419737026729e-06, "loss": 0.3675, "step": 726 }, { "epoch": 1.49, "learning_rate": 8.235360797270656e-06, "loss": 0.351, "step": 727 }, { "epoch": 1.49, "learning_rate": 8.230296153338544e-06, "loss": 0.3714, "step": 728 }, { "epoch": 1.49, "learning_rate": 8.225225814159731e-06, "loss": 0.393, "step": 729 }, { "epoch": 1.49, "learning_rate": 8.220149788673595e-06, "loss": 0.3059, "step": 730 }, { "epoch": 1.5, "learning_rate": 8.215068085829531e-06, "loss": 0.3567, "step": 731 }, { "epoch": 1.5, "learning_rate": 8.209980714586955e-06, "loss": 0.3875, "step": 732 }, { "epoch": 1.5, "learning_rate": 8.20488768391527e-06, "loss": 0.3651, "step": 733 }, { "epoch": 1.5, "learning_rate": 8.19978900279386e-06, "loss": 0.362, "step": 734 }, { "epoch": 1.5, "learning_rate": 8.19468468021207e-06, "loss": 0.4117, "step": 735 }, { "epoch": 1.51, "learning_rate": 8.189574725169193e-06, "loss": 0.356, "step": 736 }, { "epoch": 1.51, "learning_rate": 8.184459146674447e-06, "loss": 0.3893, "step": 737 }, { "epoch": 1.51, "learning_rate": 8.17933795374697e-06, "loss": 0.3459, "step": 738 }, { "epoch": 1.51, "learning_rate": 8.1742111554158e-06, "loss": 0.387, "step": 739 }, { "epoch": 1.51, "learning_rate": 8.169078760719849e-06, "loss": 0.3432, "step": 740 }, { "epoch": 1.52, "learning_rate": 8.163940778707905e-06, "loss": 0.3531, "step": 741 }, { "epoch": 1.52, "learning_rate": 8.158797218438603e-06, "loss": 0.4015, "step": 742 }, { "epoch": 1.52, "learning_rate": 8.153648088980414e-06, "loss": 0.3865, "step": 743 }, { "epoch": 1.52, "learning_rate": 8.148493399411626e-06, "loss": 0.3291, "step": 744 }, { "epoch": 1.52, "learning_rate": 8.143333158820332e-06, "loss": 0.3579, "step": 745 }, { "epoch": 1.53, "learning_rate": 8.138167376304411e-06, "loss": 0.3482, "step": 746 }, { "epoch": 1.53, "learning_rate": 8.132996060971512e-06, "loss": 0.4218, "step": 747 }, { "epoch": 1.53, "learning_rate": 8.12781922193904e-06, "loss": 0.381, "step": 748 }, { "epoch": 1.53, "learning_rate": 8.122636868334136e-06, "loss": 0.3499, "step": 749 }, { "epoch": 1.53, "learning_rate": 8.117449009293668e-06, "loss": 0.366, "step": 750 }, { "epoch": 1.54, "learning_rate": 8.11225565396421e-06, "loss": 0.3483, "step": 751 }, { "epoch": 1.54, "learning_rate": 8.10705681150202e-06, "loss": 0.3802, "step": 752 }, { "epoch": 1.54, "learning_rate": 8.101852491073036e-06, "loss": 0.3792, "step": 753 }, { "epoch": 1.54, "learning_rate": 8.096642701852857e-06, "loss": 0.3238, "step": 754 }, { "epoch": 1.55, "learning_rate": 8.091427453026716e-06, "loss": 0.3993, "step": 755 }, { "epoch": 1.55, "learning_rate": 8.086206753789475e-06, "loss": 0.3644, "step": 756 }, { "epoch": 1.55, "learning_rate": 8.080980613345608e-06, "loss": 0.3528, "step": 757 }, { "epoch": 1.55, "learning_rate": 8.07574904090918e-06, "loss": 0.368, "step": 758 }, { "epoch": 1.55, "learning_rate": 8.07051204570383e-06, "loss": 0.3824, "step": 759 }, { "epoch": 1.56, "learning_rate": 8.065269636962765e-06, "loss": 0.3999, "step": 760 }, { "epoch": 1.56, "learning_rate": 8.06002182392873e-06, "loss": 0.3937, "step": 761 }, { "epoch": 1.56, "learning_rate": 8.054768615854e-06, "loss": 0.3565, "step": 762 }, { "epoch": 1.56, "learning_rate": 8.049510022000365e-06, "loss": 0.3757, "step": 763 }, { "epoch": 1.56, "learning_rate": 8.044246051639104e-06, "loss": 0.3545, "step": 764 }, { "epoch": 1.57, "learning_rate": 8.038976714050983e-06, "loss": 0.366, "step": 765 }, { "epoch": 1.57, "learning_rate": 8.033702018526224e-06, "loss": 0.4051, "step": 766 }, { "epoch": 1.57, "learning_rate": 8.0284219743645e-06, "loss": 0.3434, "step": 767 }, { "epoch": 1.57, "learning_rate": 8.023136590874912e-06, "loss": 0.3575, "step": 768 }, { "epoch": 1.57, "learning_rate": 8.01784587737597e-06, "loss": 0.3743, "step": 769 }, { "epoch": 1.58, "learning_rate": 8.012549843195596e-06, "loss": 0.389, "step": 770 }, { "epoch": 1.58, "learning_rate": 8.007248497671076e-06, "loss": 0.3654, "step": 771 }, { "epoch": 1.58, "learning_rate": 8.001941850149067e-06, "loss": 0.3335, "step": 772 }, { "epoch": 1.58, "learning_rate": 7.996629909985576e-06, "loss": 0.3884, "step": 773 }, { "epoch": 1.58, "learning_rate": 7.991312686545939e-06, "loss": 0.3506, "step": 774 }, { "epoch": 1.59, "learning_rate": 7.985990189204806e-06, "loss": 0.3581, "step": 775 }, { "epoch": 1.59, "learning_rate": 7.980662427346127e-06, "loss": 0.385, "step": 776 }, { "epoch": 1.59, "learning_rate": 7.975329410363135e-06, "loss": 0.399, "step": 777 }, { "epoch": 1.59, "learning_rate": 7.969991147658323e-06, "loss": 0.3535, "step": 778 }, { "epoch": 1.59, "learning_rate": 7.964647648643438e-06, "loss": 0.371, "step": 779 }, { "epoch": 1.6, "learning_rate": 7.959298922739456e-06, "loss": 0.324, "step": 780 }, { "epoch": 1.6, "learning_rate": 7.953944979376567e-06, "loss": 0.3455, "step": 781 }, { "epoch": 1.6, "learning_rate": 7.948585827994167e-06, "loss": 0.3502, "step": 782 }, { "epoch": 1.6, "learning_rate": 7.943221478040824e-06, "loss": 0.3392, "step": 783 }, { "epoch": 1.6, "learning_rate": 7.93785193897428e-06, "loss": 0.3739, "step": 784 }, { "epoch": 1.61, "learning_rate": 7.93247722026142e-06, "loss": 0.3478, "step": 785 }, { "epoch": 1.61, "learning_rate": 7.927097331378267e-06, "loss": 0.3451, "step": 786 }, { "epoch": 1.61, "learning_rate": 7.92171228180995e-06, "loss": 0.3218, "step": 787 }, { "epoch": 1.61, "learning_rate": 7.916322081050708e-06, "loss": 0.3883, "step": 788 }, { "epoch": 1.61, "learning_rate": 7.910926738603855e-06, "loss": 0.3679, "step": 789 }, { "epoch": 1.62, "learning_rate": 7.905526263981769e-06, "loss": 0.4027, "step": 790 }, { "epoch": 1.62, "learning_rate": 7.90012066670588e-06, "loss": 0.3847, "step": 791 }, { "epoch": 1.62, "learning_rate": 7.89470995630665e-06, "loss": 0.3525, "step": 792 }, { "epoch": 1.62, "learning_rate": 7.889294142323554e-06, "loss": 0.3672, "step": 793 }, { "epoch": 1.62, "learning_rate": 7.883873234305063e-06, "loss": 0.3611, "step": 794 }, { "epoch": 1.63, "learning_rate": 7.878447241808634e-06, "loss": 0.3444, "step": 795 }, { "epoch": 1.63, "learning_rate": 7.873016174400687e-06, "loss": 0.3409, "step": 796 }, { "epoch": 1.63, "learning_rate": 7.867580041656583e-06, "loss": 0.3354, "step": 797 }, { "epoch": 1.63, "learning_rate": 7.862138853160625e-06, "loss": 0.4051, "step": 798 }, { "epoch": 1.64, "learning_rate": 7.85669261850602e-06, "loss": 0.343, "step": 799 }, { "epoch": 1.64, "learning_rate": 7.851241347294876e-06, "loss": 0.383, "step": 800 }, { "epoch": 1.64, "learning_rate": 7.845785049138178e-06, "loss": 0.3844, "step": 801 }, { "epoch": 1.64, "learning_rate": 7.84032373365578e-06, "loss": 0.3381, "step": 802 }, { "epoch": 1.64, "learning_rate": 7.834857410476373e-06, "loss": 0.3694, "step": 803 }, { "epoch": 1.65, "learning_rate": 7.829386089237487e-06, "loss": 0.4042, "step": 804 }, { "epoch": 1.65, "learning_rate": 7.823909779585456e-06, "loss": 0.3715, "step": 805 }, { "epoch": 1.65, "learning_rate": 7.81842849117541e-06, "loss": 0.3202, "step": 806 }, { "epoch": 1.65, "learning_rate": 7.81294223367126e-06, "loss": 0.3766, "step": 807 }, { "epoch": 1.65, "learning_rate": 7.807451016745677e-06, "loss": 0.373, "step": 808 }, { "epoch": 1.66, "learning_rate": 7.801954850080075e-06, "loss": 0.3378, "step": 809 }, { "epoch": 1.66, "learning_rate": 7.796453743364594e-06, "loss": 0.3696, "step": 810 }, { "epoch": 1.66, "learning_rate": 7.790947706298085e-06, "loss": 0.3578, "step": 811 }, { "epoch": 1.66, "learning_rate": 7.785436748588092e-06, "loss": 0.3753, "step": 812 }, { "epoch": 1.66, "learning_rate": 7.779920879950832e-06, "loss": 0.3282, "step": 813 }, { "epoch": 1.67, "learning_rate": 7.774400110111184e-06, "loss": 0.3322, "step": 814 }, { "epoch": 1.67, "learning_rate": 7.768874448802665e-06, "loss": 0.3635, "step": 815 }, { "epoch": 1.67, "learning_rate": 7.76334390576742e-06, "loss": 0.3487, "step": 816 }, { "epoch": 1.67, "learning_rate": 7.757808490756191e-06, "loss": 0.333, "step": 817 }, { "epoch": 1.67, "learning_rate": 7.752268213528325e-06, "loss": 0.2983, "step": 818 }, { "epoch": 1.68, "learning_rate": 7.746723083851726e-06, "loss": 0.4085, "step": 819 }, { "epoch": 1.68, "learning_rate": 7.741173111502864e-06, "loss": 0.35, "step": 820 }, { "epoch": 1.68, "learning_rate": 7.735618306266743e-06, "loss": 0.3762, "step": 821 }, { "epoch": 1.68, "learning_rate": 7.73005867793689e-06, "loss": 0.3242, "step": 822 }, { "epoch": 1.68, "learning_rate": 7.724494236315327e-06, "loss": 0.3586, "step": 823 }, { "epoch": 1.69, "learning_rate": 7.718924991212575e-06, "loss": 0.3622, "step": 824 }, { "epoch": 1.69, "learning_rate": 7.713350952447615e-06, "loss": 0.3595, "step": 825 }, { "epoch": 1.69, "learning_rate": 7.707772129847884e-06, "loss": 0.3885, "step": 826 }, { "epoch": 1.69, "learning_rate": 7.702188533249249e-06, "loss": 0.3605, "step": 827 }, { "epoch": 1.69, "learning_rate": 7.696600172495997e-06, "loss": 0.3465, "step": 828 }, { "epoch": 1.7, "learning_rate": 7.691007057440813e-06, "loss": 0.3722, "step": 829 }, { "epoch": 1.7, "learning_rate": 7.685409197944768e-06, "loss": 0.3424, "step": 830 }, { "epoch": 1.7, "learning_rate": 7.679806603877292e-06, "loss": 0.3424, "step": 831 }, { "epoch": 1.7, "learning_rate": 7.674199285116166e-06, "loss": 0.3735, "step": 832 }, { "epoch": 1.7, "learning_rate": 7.668587251547502e-06, "loss": 0.3913, "step": 833 }, { "epoch": 1.71, "learning_rate": 7.662970513065717e-06, "loss": 0.3714, "step": 834 }, { "epoch": 1.71, "learning_rate": 7.657349079573537e-06, "loss": 0.3331, "step": 835 }, { "epoch": 1.71, "learning_rate": 7.65172296098195e-06, "loss": 0.3465, "step": 836 }, { "epoch": 1.71, "learning_rate": 7.646092167210217e-06, "loss": 0.3378, "step": 837 }, { "epoch": 1.72, "learning_rate": 7.640456708185833e-06, "loss": 0.3897, "step": 838 }, { "epoch": 1.72, "learning_rate": 7.634816593844524e-06, "loss": 0.3264, "step": 839 }, { "epoch": 1.72, "learning_rate": 7.629171834130219e-06, "loss": 0.3702, "step": 840 }, { "epoch": 1.72, "learning_rate": 7.62352243899504e-06, "loss": 0.3346, "step": 841 }, { "epoch": 1.72, "learning_rate": 7.617868418399282e-06, "loss": 0.3338, "step": 842 }, { "epoch": 1.73, "learning_rate": 7.612209782311393e-06, "loss": 0.3467, "step": 843 }, { "epoch": 1.73, "learning_rate": 7.60654654070796e-06, "loss": 0.3875, "step": 844 }, { "epoch": 1.73, "learning_rate": 7.600878703573687e-06, "loss": 0.369, "step": 845 }, { "epoch": 1.73, "learning_rate": 7.595206280901384e-06, "loss": 0.3391, "step": 846 }, { "epoch": 1.73, "learning_rate": 7.5895292826919455e-06, "loss": 0.3701, "step": 847 }, { "epoch": 1.74, "learning_rate": 7.583847718954329e-06, "loss": 0.3886, "step": 848 }, { "epoch": 1.74, "learning_rate": 7.578161599705546e-06, "loss": 0.3167, "step": 849 }, { "epoch": 1.74, "learning_rate": 7.572470934970636e-06, "loss": 0.373, "step": 850 }, { "epoch": 1.74, "learning_rate": 7.566775734782656e-06, "loss": 0.3811, "step": 851 }, { "epoch": 1.74, "learning_rate": 7.561076009182656e-06, "loss": 0.3315, "step": 852 }, { "epoch": 1.75, "learning_rate": 7.555371768219667e-06, "loss": 0.3852, "step": 853 }, { "epoch": 1.75, "learning_rate": 7.5496630219506805e-06, "loss": 0.3566, "step": 854 }, { "epoch": 1.75, "learning_rate": 7.5439497804406296e-06, "loss": 0.3968, "step": 855 }, { "epoch": 1.75, "learning_rate": 7.538232053762373e-06, "loss": 0.3012, "step": 856 }, { "epoch": 1.75, "learning_rate": 7.532509851996681e-06, "loss": 0.3664, "step": 857 }, { "epoch": 1.76, "learning_rate": 7.526783185232208e-06, "loss": 0.3329, "step": 858 }, { "epoch": 1.76, "learning_rate": 7.521052063565486e-06, "loss": 0.3408, "step": 859 }, { "epoch": 1.76, "learning_rate": 7.515316497100898e-06, "loss": 0.3885, "step": 860 }, { "epoch": 1.76, "learning_rate": 7.5095764959506615e-06, "loss": 0.3637, "step": 861 }, { "epoch": 1.76, "learning_rate": 7.5038320702348176e-06, "loss": 0.3471, "step": 862 }, { "epoch": 1.77, "learning_rate": 7.4980832300812065e-06, "loss": 0.4077, "step": 863 }, { "epoch": 1.77, "learning_rate": 7.492329985625452e-06, "loss": 0.3399, "step": 864 }, { "epoch": 1.77, "learning_rate": 7.486572347010937e-06, "loss": 0.3272, "step": 865 }, { "epoch": 1.77, "learning_rate": 7.480810324388803e-06, "loss": 0.3923, "step": 866 }, { "epoch": 1.77, "learning_rate": 7.475043927917908e-06, "loss": 0.314, "step": 867 }, { "epoch": 1.78, "learning_rate": 7.469273167764831e-06, "loss": 0.3671, "step": 868 }, { "epoch": 1.78, "learning_rate": 7.463498054103842e-06, "loss": 0.3301, "step": 869 }, { "epoch": 1.78, "learning_rate": 7.457718597116883e-06, "loss": 0.3596, "step": 870 }, { "epoch": 1.78, "learning_rate": 7.4519348069935595e-06, "loss": 0.3338, "step": 871 }, { "epoch": 1.78, "learning_rate": 7.446146693931111e-06, "loss": 0.3703, "step": 872 }, { "epoch": 1.79, "learning_rate": 7.440354268134404e-06, "loss": 0.3382, "step": 873 }, { "epoch": 1.79, "learning_rate": 7.434557539815902e-06, "loss": 0.3982, "step": 874 }, { "epoch": 1.79, "learning_rate": 7.42875651919566e-06, "loss": 0.372, "step": 875 }, { "epoch": 1.79, "learning_rate": 7.4229512165013e-06, "loss": 0.3955, "step": 876 }, { "epoch": 1.79, "learning_rate": 7.4171416419679895e-06, "loss": 0.3805, "step": 877 }, { "epoch": 1.8, "learning_rate": 7.411327805838433e-06, "loss": 0.3378, "step": 878 }, { "epoch": 1.8, "learning_rate": 7.405509718362842e-06, "loss": 0.3212, "step": 879 }, { "epoch": 1.8, "learning_rate": 7.399687389798933e-06, "loss": 0.3712, "step": 880 }, { "epoch": 1.8, "learning_rate": 7.3938608304118885e-06, "loss": 0.3463, "step": 881 }, { "epoch": 1.81, "learning_rate": 7.388030050474358e-06, "loss": 0.3301, "step": 882 }, { "epoch": 1.81, "learning_rate": 7.382195060266431e-06, "loss": 0.3247, "step": 883 }, { "epoch": 1.81, "learning_rate": 7.376355870075618e-06, "loss": 0.2999, "step": 884 }, { "epoch": 1.81, "learning_rate": 7.370512490196835e-06, "loss": 0.3658, "step": 885 }, { "epoch": 1.81, "learning_rate": 7.364664930932385e-06, "loss": 0.385, "step": 886 }, { "epoch": 1.82, "learning_rate": 7.3588132025919405e-06, "loss": 0.3718, "step": 887 }, { "epoch": 1.82, "learning_rate": 7.352957315492522e-06, "loss": 0.3525, "step": 888 }, { "epoch": 1.82, "learning_rate": 7.347097279958485e-06, "loss": 0.3122, "step": 889 }, { "epoch": 1.82, "learning_rate": 7.3412331063214995e-06, "loss": 0.3265, "step": 890 }, { "epoch": 1.82, "learning_rate": 7.335364804920525e-06, "loss": 0.3285, "step": 891 }, { "epoch": 1.83, "learning_rate": 7.329492386101807e-06, "loss": 0.399, "step": 892 }, { "epoch": 1.83, "learning_rate": 7.323615860218844e-06, "loss": 0.3278, "step": 893 }, { "epoch": 1.83, "learning_rate": 7.317735237632379e-06, "loss": 0.4163, "step": 894 }, { "epoch": 1.83, "learning_rate": 7.3118505287103756e-06, "loss": 0.357, "step": 895 }, { "epoch": 1.83, "learning_rate": 7.305961743828005e-06, "loss": 0.3471, "step": 896 }, { "epoch": 1.84, "learning_rate": 7.30006889336762e-06, "loss": 0.3161, "step": 897 }, { "epoch": 1.84, "learning_rate": 7.294171987718745e-06, "loss": 0.3746, "step": 898 }, { "epoch": 1.84, "learning_rate": 7.2882710372780555e-06, "loss": 0.3721, "step": 899 }, { "epoch": 1.84, "learning_rate": 7.282366052449351e-06, "loss": 0.3602, "step": 900 }, { "epoch": 1.84, "learning_rate": 7.276457043643551e-06, "loss": 0.3839, "step": 901 }, { "epoch": 1.85, "learning_rate": 7.270544021278668e-06, "loss": 0.3488, "step": 902 }, { "epoch": 1.85, "learning_rate": 7.264626995779789e-06, "loss": 0.3887, "step": 903 }, { "epoch": 1.85, "learning_rate": 7.258705977579059e-06, "loss": 0.3109, "step": 904 }, { "epoch": 1.85, "learning_rate": 7.252780977115663e-06, "loss": 0.3781, "step": 905 }, { "epoch": 1.85, "learning_rate": 7.246852004835807e-06, "loss": 0.3687, "step": 906 }, { "epoch": 1.86, "learning_rate": 7.2409190711927015e-06, "loss": 0.3554, "step": 907 }, { "epoch": 1.86, "learning_rate": 7.2349821866465374e-06, "loss": 0.3385, "step": 908 }, { "epoch": 1.86, "learning_rate": 7.229041361664475e-06, "loss": 0.3257, "step": 909 }, { "epoch": 1.86, "learning_rate": 7.2230966067206185e-06, "loss": 0.3604, "step": 910 }, { "epoch": 1.86, "learning_rate": 7.217147932296005e-06, "loss": 0.38, "step": 911 }, { "epoch": 1.87, "learning_rate": 7.211195348878575e-06, "loss": 0.3543, "step": 912 }, { "epoch": 1.87, "learning_rate": 7.205238866963169e-06, "loss": 0.3359, "step": 913 }, { "epoch": 1.87, "learning_rate": 7.199278497051498e-06, "loss": 0.3615, "step": 914 }, { "epoch": 1.87, "learning_rate": 7.193314249652124e-06, "loss": 0.3694, "step": 915 }, { "epoch": 1.87, "learning_rate": 7.187346135280448e-06, "loss": 0.3858, "step": 916 }, { "epoch": 1.88, "learning_rate": 7.181374164458693e-06, "loss": 0.3232, "step": 917 }, { "epoch": 1.88, "learning_rate": 7.175398347715874e-06, "loss": 0.3555, "step": 918 }, { "epoch": 1.88, "learning_rate": 7.169418695587791e-06, "loss": 0.3422, "step": 919 }, { "epoch": 1.88, "learning_rate": 7.1634352186170054e-06, "loss": 0.3538, "step": 920 }, { "epoch": 1.88, "learning_rate": 7.157447927352821e-06, "loss": 0.3355, "step": 921 }, { "epoch": 1.89, "learning_rate": 7.151456832351265e-06, "loss": 0.3706, "step": 922 }, { "epoch": 1.89, "learning_rate": 7.145461944175076e-06, "loss": 0.3574, "step": 923 }, { "epoch": 1.89, "learning_rate": 7.1394632733936764e-06, "loss": 0.3954, "step": 924 }, { "epoch": 1.89, "learning_rate": 7.1334608305831584e-06, "loss": 0.3747, "step": 925 }, { "epoch": 1.9, "learning_rate": 7.127454626326263e-06, "loss": 0.3817, "step": 926 }, { "epoch": 1.9, "learning_rate": 7.121444671212367e-06, "loss": 0.3834, "step": 927 }, { "epoch": 1.9, "learning_rate": 7.115430975837457e-06, "loss": 0.3366, "step": 928 }, { "epoch": 1.9, "learning_rate": 7.109413550804114e-06, "loss": 0.3842, "step": 929 }, { "epoch": 1.9, "learning_rate": 7.1033924067214944e-06, "loss": 0.379, "step": 930 }, { "epoch": 1.91, "learning_rate": 7.0973675542053155e-06, "loss": 0.3545, "step": 931 }, { "epoch": 1.91, "learning_rate": 7.091339003877826e-06, "loss": 0.3378, "step": 932 }, { "epoch": 1.91, "learning_rate": 7.085306766367802e-06, "loss": 0.2926, "step": 933 }, { "epoch": 1.91, "learning_rate": 7.079270852310513e-06, "loss": 0.392, "step": 934 }, { "epoch": 1.91, "learning_rate": 7.073231272347714e-06, "loss": 0.3355, "step": 935 }, { "epoch": 1.92, "learning_rate": 7.067188037127624e-06, "loss": 0.3631, "step": 936 }, { "epoch": 1.92, "learning_rate": 7.061141157304906e-06, "loss": 0.3094, "step": 937 }, { "epoch": 1.92, "learning_rate": 7.055090643540649e-06, "loss": 0.3249, "step": 938 }, { "epoch": 1.92, "learning_rate": 7.0490365065023454e-06, "loss": 0.3097, "step": 939 }, { "epoch": 1.92, "learning_rate": 7.042978756863882e-06, "loss": 0.3937, "step": 940 }, { "epoch": 1.93, "learning_rate": 7.036917405305507e-06, "loss": 0.3279, "step": 941 }, { "epoch": 1.93, "learning_rate": 7.030852462513827e-06, "loss": 0.3221, "step": 942 }, { "epoch": 1.93, "learning_rate": 7.024783939181775e-06, "loss": 0.2899, "step": 943 }, { "epoch": 1.93, "learning_rate": 7.018711846008598e-06, "loss": 0.3236, "step": 944 }, { "epoch": 1.93, "learning_rate": 7.012636193699838e-06, "loss": 0.3699, "step": 945 }, { "epoch": 1.94, "learning_rate": 7.0065569929673114e-06, "loss": 0.3676, "step": 946 }, { "epoch": 1.94, "learning_rate": 7.000474254529091e-06, "loss": 0.3451, "step": 947 }, { "epoch": 1.94, "learning_rate": 6.994387989109485e-06, "loss": 0.3889, "step": 948 }, { "epoch": 1.94, "learning_rate": 6.988298207439022e-06, "loss": 0.3516, "step": 949 }, { "epoch": 1.94, "learning_rate": 6.982204920254428e-06, "loss": 0.294, "step": 950 }, { "epoch": 1.95, "learning_rate": 6.976108138298614e-06, "loss": 0.2859, "step": 951 }, { "epoch": 1.95, "learning_rate": 6.970007872320647e-06, "loss": 0.342, "step": 952 }, { "epoch": 1.95, "learning_rate": 6.963904133075738e-06, "loss": 0.3928, "step": 953 }, { "epoch": 1.95, "learning_rate": 6.957796931325225e-06, "loss": 0.3335, "step": 954 }, { "epoch": 1.95, "learning_rate": 6.951686277836547e-06, "loss": 0.3449, "step": 955 }, { "epoch": 1.96, "learning_rate": 6.945572183383229e-06, "loss": 0.3537, "step": 956 }, { "epoch": 1.96, "learning_rate": 6.939454658744865e-06, "loss": 0.399, "step": 957 }, { "epoch": 1.96, "learning_rate": 6.933333714707094e-06, "loss": 0.3423, "step": 958 }, { "epoch": 1.96, "learning_rate": 6.927209362061588e-06, "loss": 0.3624, "step": 959 }, { "epoch": 1.96, "learning_rate": 6.921081611606021e-06, "loss": 0.3569, "step": 960 }, { "epoch": 1.97, "learning_rate": 6.914950474144063e-06, "loss": 0.343, "step": 961 }, { "epoch": 1.97, "learning_rate": 6.908815960485358e-06, "loss": 0.361, "step": 962 }, { "epoch": 1.97, "learning_rate": 6.902678081445495e-06, "loss": 0.3592, "step": 963 }, { "epoch": 1.97, "learning_rate": 6.8965368478460016e-06, "loss": 0.3532, "step": 964 }, { "epoch": 1.97, "learning_rate": 6.89039227051432e-06, "loss": 0.3157, "step": 965 }, { "epoch": 1.98, "learning_rate": 6.884244360283786e-06, "loss": 0.3742, "step": 966 }, { "epoch": 1.98, "learning_rate": 6.87809312799361e-06, "loss": 0.4242, "step": 967 }, { "epoch": 1.98, "learning_rate": 6.871938584488862e-06, "loss": 0.353, "step": 968 }, { "epoch": 1.98, "learning_rate": 6.865780740620451e-06, "loss": 0.3077, "step": 969 }, { "epoch": 1.99, "learning_rate": 6.859619607245102e-06, "loss": 0.4021, "step": 970 }, { "epoch": 1.99, "learning_rate": 6.8534551952253395e-06, "loss": 0.3598, "step": 971 }, { "epoch": 1.99, "learning_rate": 6.847287515429468e-06, "loss": 0.4025, "step": 972 }, { "epoch": 1.99, "learning_rate": 6.841116578731559e-06, "loss": 0.3199, "step": 973 }, { "epoch": 1.99, "learning_rate": 6.834942396011419e-06, "loss": 0.3941, "step": 974 }, { "epoch": 2.0, "learning_rate": 6.828764978154581e-06, "loss": 0.3396, "step": 975 }, { "epoch": 2.0, "learning_rate": 6.8225843360522844e-06, "loss": 0.3125, "step": 976 }, { "epoch": 2.0, "learning_rate": 6.816400480601445e-06, "loss": 0.3414, "step": 977 }, { "epoch": 2.0, "learning_rate": 6.810213422704652e-06, "loss": 0.2488, "step": 978 }, { "epoch": 2.0, "learning_rate": 6.804023173270138e-06, "loss": 0.2118, "step": 979 }, { "epoch": 2.01, "learning_rate": 6.797829743211761e-06, "loss": 0.1845, "step": 980 }, { "epoch": 2.01, "learning_rate": 6.79163314344899e-06, "loss": 0.1893, "step": 981 }, { "epoch": 2.01, "learning_rate": 6.785433384906877e-06, "loss": 0.1533, "step": 982 }, { "epoch": 2.01, "learning_rate": 6.779230478516048e-06, "loss": 0.2071, "step": 983 }, { "epoch": 2.01, "learning_rate": 6.773024435212678e-06, "loss": 0.1579, "step": 984 }, { "epoch": 2.02, "learning_rate": 6.7668152659384735e-06, "loss": 0.1663, "step": 985 }, { "epoch": 2.02, "learning_rate": 6.760602981640648e-06, "loss": 0.1528, "step": 986 }, { "epoch": 2.02, "learning_rate": 6.754387593271911e-06, "loss": 0.1737, "step": 987 }, { "epoch": 2.02, "learning_rate": 6.748169111790445e-06, "loss": 0.1648, "step": 988 }, { "epoch": 2.02, "learning_rate": 6.741947548159882e-06, "loss": 0.1799, "step": 989 }, { "epoch": 2.03, "learning_rate": 6.735722913349291e-06, "loss": 0.1605, "step": 990 }, { "epoch": 2.03, "learning_rate": 6.729495218333157e-06, "loss": 0.1405, "step": 991 }, { "epoch": 2.03, "learning_rate": 6.723264474091355e-06, "loss": 0.1622, "step": 992 }, { "epoch": 2.03, "learning_rate": 6.717030691609144e-06, "loss": 0.1556, "step": 993 }, { "epoch": 2.03, "learning_rate": 6.710793881877131e-06, "loss": 0.1959, "step": 994 }, { "epoch": 2.04, "learning_rate": 6.704554055891269e-06, "loss": 0.1497, "step": 995 }, { "epoch": 2.04, "learning_rate": 6.698311224652822e-06, "loss": 0.1752, "step": 996 }, { "epoch": 2.04, "learning_rate": 6.692065399168352e-06, "loss": 0.1543, "step": 997 }, { "epoch": 2.04, "learning_rate": 6.685816590449708e-06, "loss": 0.1531, "step": 998 }, { "epoch": 2.04, "learning_rate": 6.679564809513991e-06, "loss": 0.1592, "step": 999 }, { "epoch": 2.05, "learning_rate": 6.673310067383545e-06, "loss": 0.1607, "step": 1000 }, { "epoch": 2.05, "learning_rate": 6.667052375085934e-06, "loss": 0.1587, "step": 1001 }, { "epoch": 2.05, "learning_rate": 6.660791743653924e-06, "loss": 0.1411, "step": 1002 }, { "epoch": 2.05, "learning_rate": 6.6545281841254635e-06, "loss": 0.1817, "step": 1003 }, { "epoch": 2.05, "learning_rate": 6.648261707543664e-06, "loss": 0.1834, "step": 1004 }, { "epoch": 2.06, "learning_rate": 6.641992324956776e-06, "loss": 0.142, "step": 1005 }, { "epoch": 2.06, "learning_rate": 6.6357200474181796e-06, "loss": 0.1715, "step": 1006 }, { "epoch": 2.06, "learning_rate": 6.6294448859863534e-06, "loss": 0.1445, "step": 1007 }, { "epoch": 2.06, "learning_rate": 6.623166851724862e-06, "loss": 0.1481, "step": 1008 }, { "epoch": 2.06, "learning_rate": 6.616885955702337e-06, "loss": 0.1719, "step": 1009 }, { "epoch": 2.07, "learning_rate": 6.6106022089924535e-06, "loss": 0.1805, "step": 1010 }, { "epoch": 2.07, "learning_rate": 6.604315622673914e-06, "loss": 0.1496, "step": 1011 }, { "epoch": 2.07, "learning_rate": 6.598026207830428e-06, "loss": 0.2071, "step": 1012 }, { "epoch": 2.07, "learning_rate": 6.591733975550687e-06, "loss": 0.1507, "step": 1013 }, { "epoch": 2.08, "learning_rate": 6.585438936928358e-06, "loss": 0.1763, "step": 1014 }, { "epoch": 2.08, "learning_rate": 6.579141103062047e-06, "loss": 0.1765, "step": 1015 }, { "epoch": 2.08, "learning_rate": 6.572840485055294e-06, "loss": 0.1543, "step": 1016 }, { "epoch": 2.08, "learning_rate": 6.5665370940165475e-06, "loss": 0.1613, "step": 1017 }, { "epoch": 2.08, "learning_rate": 6.560230941059141e-06, "loss": 0.1549, "step": 1018 }, { "epoch": 2.09, "learning_rate": 6.553922037301283e-06, "loss": 0.1629, "step": 1019 }, { "epoch": 2.09, "learning_rate": 6.547610393866027e-06, "loss": 0.1516, "step": 1020 }, { "epoch": 2.09, "learning_rate": 6.541296021881259e-06, "loss": 0.1764, "step": 1021 }, { "epoch": 2.09, "learning_rate": 6.534978932479677e-06, "loss": 0.1978, "step": 1022 }, { "epoch": 2.09, "learning_rate": 6.5286591367987655e-06, "loss": 0.173, "step": 1023 }, { "epoch": 2.1, "learning_rate": 6.522336645980785e-06, "loss": 0.1935, "step": 1024 }, { "epoch": 2.1, "learning_rate": 6.516011471172745e-06, "loss": 0.1406, "step": 1025 }, { "epoch": 2.1, "learning_rate": 6.5096836235263904e-06, "loss": 0.1922, "step": 1026 }, { "epoch": 2.1, "learning_rate": 6.503353114198175e-06, "loss": 0.1459, "step": 1027 }, { "epoch": 2.1, "learning_rate": 6.497019954349244e-06, "loss": 0.1593, "step": 1028 }, { "epoch": 2.11, "learning_rate": 6.490684155145422e-06, "loss": 0.1783, "step": 1029 }, { "epoch": 2.11, "learning_rate": 6.484345727757178e-06, "loss": 0.1855, "step": 1030 }, { "epoch": 2.11, "learning_rate": 6.4780046833596246e-06, "loss": 0.1508, "step": 1031 }, { "epoch": 2.11, "learning_rate": 6.471661033132482e-06, "loss": 0.1437, "step": 1032 }, { "epoch": 2.11, "learning_rate": 6.465314788260067e-06, "loss": 0.1621, "step": 1033 }, { "epoch": 2.12, "learning_rate": 6.458965959931267e-06, "loss": 0.142, "step": 1034 }, { "epoch": 2.12, "learning_rate": 6.452614559339532e-06, "loss": 0.1764, "step": 1035 }, { "epoch": 2.12, "learning_rate": 6.4462605976828395e-06, "loss": 0.1459, "step": 1036 }, { "epoch": 2.12, "learning_rate": 6.4399040861636855e-06, "loss": 0.1945, "step": 1037 }, { "epoch": 2.12, "learning_rate": 6.433545035989063e-06, "loss": 0.1727, "step": 1038 }, { "epoch": 2.13, "learning_rate": 6.427183458370436e-06, "loss": 0.1417, "step": 1039 }, { "epoch": 2.13, "learning_rate": 6.4208193645237314e-06, "loss": 0.1481, "step": 1040 }, { "epoch": 2.13, "learning_rate": 6.414452765669307e-06, "loss": 0.1576, "step": 1041 }, { "epoch": 2.13, "learning_rate": 6.40808367303194e-06, "loss": 0.1522, "step": 1042 }, { "epoch": 2.13, "learning_rate": 6.401712097840802e-06, "loss": 0.1619, "step": 1043 }, { "epoch": 2.14, "learning_rate": 6.395338051329443e-06, "loss": 0.1712, "step": 1044 }, { "epoch": 2.14, "learning_rate": 6.38896154473577e-06, "loss": 0.1566, "step": 1045 }, { "epoch": 2.14, "learning_rate": 6.382582589302029e-06, "loss": 0.1448, "step": 1046 }, { "epoch": 2.14, "learning_rate": 6.376201196274778e-06, "loss": 0.1889, "step": 1047 }, { "epoch": 2.14, "learning_rate": 6.36981737690488e-06, "loss": 0.1572, "step": 1048 }, { "epoch": 2.15, "learning_rate": 6.363431142447469e-06, "loss": 0.169, "step": 1049 }, { "epoch": 2.15, "learning_rate": 6.357042504161943e-06, "loss": 0.175, "step": 1050 }, { "epoch": 2.15, "learning_rate": 6.350651473311933e-06, "loss": 0.1896, "step": 1051 }, { "epoch": 2.15, "learning_rate": 6.3442580611652915e-06, "loss": 0.1593, "step": 1052 }, { "epoch": 2.16, "learning_rate": 6.337862278994069e-06, "loss": 0.1646, "step": 1053 }, { "epoch": 2.16, "learning_rate": 6.331464138074493e-06, "loss": 0.1585, "step": 1054 }, { "epoch": 2.16, "learning_rate": 6.325063649686951e-06, "loss": 0.1728, "step": 1055 }, { "epoch": 2.16, "learning_rate": 6.318660825115968e-06, "loss": 0.1621, "step": 1056 }, { "epoch": 2.16, "learning_rate": 6.312255675650188e-06, "loss": 0.1561, "step": 1057 }, { "epoch": 2.17, "learning_rate": 6.305848212582359e-06, "loss": 0.1602, "step": 1058 }, { "epoch": 2.17, "learning_rate": 6.299438447209298e-06, "loss": 0.123, "step": 1059 }, { "epoch": 2.17, "learning_rate": 6.29302639083189e-06, "loss": 0.1545, "step": 1060 }, { "epoch": 2.17, "learning_rate": 6.286612054755056e-06, "loss": 0.161, "step": 1061 }, { "epoch": 2.17, "learning_rate": 6.280195450287736e-06, "loss": 0.1556, "step": 1062 }, { "epoch": 2.18, "learning_rate": 6.273776588742869e-06, "loss": 0.1509, "step": 1063 }, { "epoch": 2.18, "learning_rate": 6.267355481437375e-06, "loss": 0.1895, "step": 1064 }, { "epoch": 2.18, "learning_rate": 6.260932139692133e-06, "loss": 0.1819, "step": 1065 }, { "epoch": 2.18, "learning_rate": 6.254506574831958e-06, "loss": 0.1905, "step": 1066 }, { "epoch": 2.18, "learning_rate": 6.248078798185591e-06, "loss": 0.184, "step": 1067 }, { "epoch": 2.19, "learning_rate": 6.241648821085666e-06, "loss": 0.1725, "step": 1068 }, { "epoch": 2.19, "learning_rate": 6.2352166548687006e-06, "loss": 0.1842, "step": 1069 }, { "epoch": 2.19, "learning_rate": 6.228782310875072e-06, "loss": 0.1567, "step": 1070 }, { "epoch": 2.19, "learning_rate": 6.222345800448996e-06, "loss": 0.1823, "step": 1071 }, { "epoch": 2.19, "learning_rate": 6.215907134938508e-06, "loss": 0.1557, "step": 1072 }, { "epoch": 2.2, "learning_rate": 6.2094663256954405e-06, "loss": 0.1582, "step": 1073 }, { "epoch": 2.2, "learning_rate": 6.20302338407541e-06, "loss": 0.1665, "step": 1074 }, { "epoch": 2.2, "learning_rate": 6.1965783214377895e-06, "loss": 0.1888, "step": 1075 }, { "epoch": 2.2, "learning_rate": 6.190131149145693e-06, "loss": 0.1511, "step": 1076 }, { "epoch": 2.2, "learning_rate": 6.183681878565955e-06, "loss": 0.1557, "step": 1077 }, { "epoch": 2.21, "learning_rate": 6.177230521069104e-06, "loss": 0.185, "step": 1078 }, { "epoch": 2.21, "learning_rate": 6.170777088029355e-06, "loss": 0.1419, "step": 1079 }, { "epoch": 2.21, "learning_rate": 6.164321590824577e-06, "loss": 0.1672, "step": 1080 }, { "epoch": 2.21, "learning_rate": 6.157864040836282e-06, "loss": 0.1699, "step": 1081 }, { "epoch": 2.21, "learning_rate": 6.1514044494496e-06, "loss": 0.1792, "step": 1082 }, { "epoch": 2.22, "learning_rate": 6.144942828053257e-06, "loss": 0.1568, "step": 1083 }, { "epoch": 2.22, "learning_rate": 6.138479188039564e-06, "loss": 0.1698, "step": 1084 }, { "epoch": 2.22, "learning_rate": 6.132013540804386e-06, "loss": 0.1626, "step": 1085 }, { "epoch": 2.22, "learning_rate": 6.12554589774713e-06, "loss": 0.1534, "step": 1086 }, { "epoch": 2.22, "learning_rate": 6.119076270270718e-06, "loss": 0.1413, "step": 1087 }, { "epoch": 2.23, "learning_rate": 6.112604669781572e-06, "loss": 0.1368, "step": 1088 }, { "epoch": 2.23, "learning_rate": 6.106131107689599e-06, "loss": 0.1569, "step": 1089 }, { "epoch": 2.23, "learning_rate": 6.099655595408154e-06, "loss": 0.1679, "step": 1090 }, { "epoch": 2.23, "learning_rate": 6.093178144354039e-06, "loss": 0.1483, "step": 1091 }, { "epoch": 2.23, "learning_rate": 6.086698765947468e-06, "loss": 0.1828, "step": 1092 }, { "epoch": 2.24, "learning_rate": 6.080217471612059e-06, "loss": 0.1347, "step": 1093 }, { "epoch": 2.24, "learning_rate": 6.0737342727748e-06, "loss": 0.1709, "step": 1094 }, { "epoch": 2.24, "learning_rate": 6.067249180866044e-06, "loss": 0.1724, "step": 1095 }, { "epoch": 2.24, "learning_rate": 6.060762207319479e-06, "loss": 0.1901, "step": 1096 }, { "epoch": 2.25, "learning_rate": 6.05427336357211e-06, "loss": 0.162, "step": 1097 }, { "epoch": 2.25, "learning_rate": 6.0477826610642394e-06, "loss": 0.1504, "step": 1098 }, { "epoch": 2.25, "learning_rate": 6.041290111239446e-06, "loss": 0.1242, "step": 1099 }, { "epoch": 2.25, "learning_rate": 6.034795725544571e-06, "loss": 0.1649, "step": 1100 }, { "epoch": 2.25, "learning_rate": 6.028299515429683e-06, "loss": 0.1403, "step": 1101 }, { "epoch": 2.26, "learning_rate": 6.021801492348075e-06, "loss": 0.1687, "step": 1102 }, { "epoch": 2.26, "learning_rate": 6.015301667756234e-06, "loss": 0.1715, "step": 1103 }, { "epoch": 2.26, "learning_rate": 6.00880005311382e-06, "loss": 0.1352, "step": 1104 }, { "epoch": 2.26, "learning_rate": 6.002296659883654e-06, "loss": 0.1769, "step": 1105 }, { "epoch": 2.26, "learning_rate": 5.995791499531691e-06, "loss": 0.1332, "step": 1106 }, { "epoch": 2.27, "learning_rate": 5.9892845835269986e-06, "loss": 0.1785, "step": 1107 }, { "epoch": 2.27, "learning_rate": 5.982775923341744e-06, "loss": 0.1961, "step": 1108 }, { "epoch": 2.27, "learning_rate": 5.976265530451166e-06, "loss": 0.1783, "step": 1109 }, { "epoch": 2.27, "learning_rate": 5.9697534163335645e-06, "loss": 0.1801, "step": 1110 }, { "epoch": 2.27, "learning_rate": 5.963239592470265e-06, "loss": 0.1712, "step": 1111 }, { "epoch": 2.28, "learning_rate": 5.956724070345613e-06, "loss": 0.1462, "step": 1112 }, { "epoch": 2.28, "learning_rate": 5.95020686144695e-06, "loss": 0.1752, "step": 1113 }, { "epoch": 2.28, "learning_rate": 5.943687977264584e-06, "loss": 0.1386, "step": 1114 }, { "epoch": 2.28, "learning_rate": 5.937167429291785e-06, "loss": 0.1161, "step": 1115 }, { "epoch": 2.28, "learning_rate": 5.93064522902475e-06, "loss": 0.1747, "step": 1116 }, { "epoch": 2.29, "learning_rate": 5.924121387962594e-06, "loss": 0.1442, "step": 1117 }, { "epoch": 2.29, "learning_rate": 5.9175959176073195e-06, "loss": 0.1837, "step": 1118 }, { "epoch": 2.29, "learning_rate": 5.9110688294638085e-06, "loss": 0.1426, "step": 1119 }, { "epoch": 2.29, "learning_rate": 5.904540135039789e-06, "loss": 0.1409, "step": 1120 }, { "epoch": 2.29, "learning_rate": 5.8980098458458235e-06, "loss": 0.1555, "step": 1121 }, { "epoch": 2.3, "learning_rate": 5.891477973395287e-06, "loss": 0.1623, "step": 1122 }, { "epoch": 2.3, "learning_rate": 5.8849445292043436e-06, "loss": 0.1669, "step": 1123 }, { "epoch": 2.3, "learning_rate": 5.878409524791931e-06, "loss": 0.1414, "step": 1124 }, { "epoch": 2.3, "learning_rate": 5.871872971679736e-06, "loss": 0.1456, "step": 1125 }, { "epoch": 2.3, "learning_rate": 5.865334881392177e-06, "loss": 0.1776, "step": 1126 }, { "epoch": 2.31, "learning_rate": 5.858795265456382e-06, "loss": 0.1894, "step": 1127 }, { "epoch": 2.31, "learning_rate": 5.852254135402165e-06, "loss": 0.153, "step": 1128 }, { "epoch": 2.31, "learning_rate": 5.8457115027620215e-06, "loss": 0.1788, "step": 1129 }, { "epoch": 2.31, "learning_rate": 5.83916737907108e-06, "loss": 0.1476, "step": 1130 }, { "epoch": 2.31, "learning_rate": 5.83262177586711e-06, "loss": 0.1363, "step": 1131 }, { "epoch": 2.32, "learning_rate": 5.826074704690484e-06, "loss": 0.1596, "step": 1132 }, { "epoch": 2.32, "learning_rate": 5.819526177084165e-06, "loss": 0.1847, "step": 1133 }, { "epoch": 2.32, "learning_rate": 5.812976204593681e-06, "loss": 0.1474, "step": 1134 }, { "epoch": 2.32, "learning_rate": 5.80642479876711e-06, "loss": 0.16, "step": 1135 }, { "epoch": 2.32, "learning_rate": 5.799871971155056e-06, "loss": 0.1529, "step": 1136 }, { "epoch": 2.33, "learning_rate": 5.793317733310631e-06, "loss": 0.1635, "step": 1137 }, { "epoch": 2.33, "learning_rate": 5.786762096789431e-06, "loss": 0.1392, "step": 1138 }, { "epoch": 2.33, "learning_rate": 5.780205073149521e-06, "loss": 0.1364, "step": 1139 }, { "epoch": 2.33, "learning_rate": 5.773646673951406e-06, "loss": 0.1785, "step": 1140 }, { "epoch": 2.34, "learning_rate": 5.767086910758026e-06, "loss": 0.1463, "step": 1141 }, { "epoch": 2.34, "learning_rate": 5.760525795134714e-06, "loss": 0.1574, "step": 1142 }, { "epoch": 2.34, "learning_rate": 5.753963338649197e-06, "loss": 0.1271, "step": 1143 }, { "epoch": 2.34, "learning_rate": 5.747399552871561e-06, "loss": 0.1578, "step": 1144 }, { "epoch": 2.34, "learning_rate": 5.740834449374237e-06, "loss": 0.1731, "step": 1145 }, { "epoch": 2.35, "learning_rate": 5.7342680397319806e-06, "loss": 0.1596, "step": 1146 }, { "epoch": 2.35, "learning_rate": 5.7277003355218454e-06, "loss": 0.1672, "step": 1147 }, { "epoch": 2.35, "learning_rate": 5.721131348323174e-06, "loss": 0.1472, "step": 1148 }, { "epoch": 2.35, "learning_rate": 5.7145610897175674e-06, "loss": 0.1645, "step": 1149 }, { "epoch": 2.35, "learning_rate": 5.7079895712888685e-06, "loss": 0.1392, "step": 1150 }, { "epoch": 2.36, "learning_rate": 5.701416804623142e-06, "loss": 0.1529, "step": 1151 }, { "epoch": 2.36, "learning_rate": 5.694842801308651e-06, "loss": 0.1596, "step": 1152 }, { "epoch": 2.36, "learning_rate": 5.688267572935843e-06, "loss": 0.1339, "step": 1153 }, { "epoch": 2.36, "learning_rate": 5.68169113109732e-06, "loss": 0.1464, "step": 1154 }, { "epoch": 2.36, "learning_rate": 5.67511348738783e-06, "loss": 0.162, "step": 1155 }, { "epoch": 2.37, "learning_rate": 5.668534653404235e-06, "loss": 0.1736, "step": 1156 }, { "epoch": 2.37, "learning_rate": 5.661954640745495e-06, "loss": 0.2031, "step": 1157 }, { "epoch": 2.37, "learning_rate": 5.655373461012656e-06, "loss": 0.1651, "step": 1158 }, { "epoch": 2.37, "learning_rate": 5.648791125808809e-06, "loss": 0.1466, "step": 1159 }, { "epoch": 2.37, "learning_rate": 5.642207646739094e-06, "loss": 0.1964, "step": 1160 }, { "epoch": 2.38, "learning_rate": 5.63562303541066e-06, "loss": 0.1635, "step": 1161 }, { "epoch": 2.38, "learning_rate": 5.629037303432655e-06, "loss": 0.169, "step": 1162 }, { "epoch": 2.38, "learning_rate": 5.622450462416205e-06, "loss": 0.1572, "step": 1163 }, { "epoch": 2.38, "learning_rate": 5.615862523974386e-06, "loss": 0.1461, "step": 1164 }, { "epoch": 2.38, "learning_rate": 5.609273499722213e-06, "loss": 0.1487, "step": 1165 }, { "epoch": 2.39, "learning_rate": 5.6026834012766155e-06, "loss": 0.18, "step": 1166 }, { "epoch": 2.39, "learning_rate": 5.596092240256414e-06, "loss": 0.1711, "step": 1167 }, { "epoch": 2.39, "learning_rate": 5.589500028282306e-06, "loss": 0.1767, "step": 1168 }, { "epoch": 2.39, "learning_rate": 5.582906776976839e-06, "loss": 0.1569, "step": 1169 }, { "epoch": 2.39, "learning_rate": 5.576312497964394e-06, "loss": 0.1488, "step": 1170 }, { "epoch": 2.4, "learning_rate": 5.569717202871161e-06, "loss": 0.1423, "step": 1171 }, { "epoch": 2.4, "learning_rate": 5.563120903325126e-06, "loss": 0.145, "step": 1172 }, { "epoch": 2.4, "learning_rate": 5.556523610956049e-06, "loss": 0.1691, "step": 1173 }, { "epoch": 2.4, "learning_rate": 5.549925337395428e-06, "loss": 0.1629, "step": 1174 }, { "epoch": 2.4, "learning_rate": 5.543326094276502e-06, "loss": 0.1772, "step": 1175 }, { "epoch": 2.41, "learning_rate": 5.536725893234214e-06, "loss": 0.1686, "step": 1176 }, { "epoch": 2.41, "learning_rate": 5.530124745905202e-06, "loss": 0.1884, "step": 1177 }, { "epoch": 2.41, "learning_rate": 5.523522663927764e-06, "loss": 0.1369, "step": 1178 }, { "epoch": 2.41, "learning_rate": 5.51691965894185e-06, "loss": 0.1712, "step": 1179 }, { "epoch": 2.41, "learning_rate": 5.510315742589042e-06, "loss": 0.1254, "step": 1180 }, { "epoch": 2.42, "learning_rate": 5.503710926512519e-06, "loss": 0.1487, "step": 1181 }, { "epoch": 2.42, "learning_rate": 5.497105222357053e-06, "loss": 0.1672, "step": 1182 }, { "epoch": 2.42, "learning_rate": 5.4904986417689795e-06, "loss": 0.1655, "step": 1183 }, { "epoch": 2.42, "learning_rate": 5.483891196396182e-06, "loss": 0.1712, "step": 1184 }, { "epoch": 2.43, "learning_rate": 5.477282897888064e-06, "loss": 0.1704, "step": 1185 }, { "epoch": 2.43, "learning_rate": 5.470673757895537e-06, "loss": 0.198, "step": 1186 }, { "epoch": 2.43, "learning_rate": 5.464063788070996e-06, "loss": 0.174, "step": 1187 }, { "epoch": 2.43, "learning_rate": 5.457453000068294e-06, "loss": 0.1623, "step": 1188 }, { "epoch": 2.43, "learning_rate": 5.450841405542735e-06, "loss": 0.1659, "step": 1189 }, { "epoch": 2.44, "learning_rate": 5.444229016151037e-06, "loss": 0.154, "step": 1190 }, { "epoch": 2.44, "learning_rate": 5.437615843551323e-06, "loss": 0.1495, "step": 1191 }, { "epoch": 2.44, "learning_rate": 5.4310018994030974e-06, "loss": 0.1522, "step": 1192 }, { "epoch": 2.44, "learning_rate": 5.424387195367225e-06, "loss": 0.1486, "step": 1193 }, { "epoch": 2.44, "learning_rate": 5.417771743105908e-06, "loss": 0.1658, "step": 1194 }, { "epoch": 2.45, "learning_rate": 5.411155554282666e-06, "loss": 0.1526, "step": 1195 }, { "epoch": 2.45, "learning_rate": 5.404538640562328e-06, "loss": 0.1617, "step": 1196 }, { "epoch": 2.45, "learning_rate": 5.397921013610987e-06, "loss": 0.1604, "step": 1197 }, { "epoch": 2.45, "learning_rate": 5.3913026850960026e-06, "loss": 0.1705, "step": 1198 }, { "epoch": 2.45, "learning_rate": 5.384683666685969e-06, "loss": 0.1648, "step": 1199 }, { "epoch": 2.46, "learning_rate": 5.378063970050694e-06, "loss": 0.1774, "step": 1200 }, { "epoch": 2.46, "learning_rate": 5.371443606861186e-06, "loss": 0.1379, "step": 1201 }, { "epoch": 2.46, "learning_rate": 5.364822588789624e-06, "loss": 0.1928, "step": 1202 }, { "epoch": 2.46, "learning_rate": 5.358200927509344e-06, "loss": 0.1826, "step": 1203 }, { "epoch": 2.46, "learning_rate": 5.351578634694818e-06, "loss": 0.1617, "step": 1204 }, { "epoch": 2.47, "learning_rate": 5.3449557220216245e-06, "loss": 0.1389, "step": 1205 }, { "epoch": 2.47, "learning_rate": 5.338332201166445e-06, "loss": 0.1871, "step": 1206 }, { "epoch": 2.47, "learning_rate": 5.331708083807025e-06, "loss": 0.144, "step": 1207 }, { "epoch": 2.47, "learning_rate": 5.325083381622165e-06, "loss": 0.1793, "step": 1208 }, { "epoch": 2.47, "learning_rate": 5.318458106291694e-06, "loss": 0.1425, "step": 1209 }, { "epoch": 2.48, "learning_rate": 5.311832269496455e-06, "loss": 0.1568, "step": 1210 }, { "epoch": 2.48, "learning_rate": 5.3052058829182815e-06, "loss": 0.1344, "step": 1211 }, { "epoch": 2.48, "learning_rate": 5.298578958239972e-06, "loss": 0.1462, "step": 1212 }, { "epoch": 2.48, "learning_rate": 5.291951507145276e-06, "loss": 0.1809, "step": 1213 }, { "epoch": 2.48, "learning_rate": 5.2853235413188696e-06, "loss": 0.1769, "step": 1214 }, { "epoch": 2.49, "learning_rate": 5.278695072446342e-06, "loss": 0.1574, "step": 1215 }, { "epoch": 2.49, "learning_rate": 5.272066112214163e-06, "loss": 0.1358, "step": 1216 }, { "epoch": 2.49, "learning_rate": 5.26543667230967e-06, "loss": 0.1768, "step": 1217 }, { "epoch": 2.49, "learning_rate": 5.258806764421048e-06, "loss": 0.1486, "step": 1218 }, { "epoch": 2.49, "learning_rate": 5.252176400237306e-06, "loss": 0.1769, "step": 1219 }, { "epoch": 2.5, "learning_rate": 5.2455455914482565e-06, "loss": 0.1736, "step": 1220 }, { "epoch": 2.5, "learning_rate": 5.238914349744498e-06, "loss": 0.1741, "step": 1221 }, { "epoch": 2.5, "learning_rate": 5.232282686817392e-06, "loss": 0.1784, "step": 1222 }, { "epoch": 2.5, "learning_rate": 5.22565061435904e-06, "loss": 0.1449, "step": 1223 }, { "epoch": 2.5, "learning_rate": 5.219018144062266e-06, "loss": 0.168, "step": 1224 }, { "epoch": 2.51, "learning_rate": 5.212385287620602e-06, "loss": 0.1718, "step": 1225 }, { "epoch": 2.51, "learning_rate": 5.205752056728252e-06, "loss": 0.192, "step": 1226 }, { "epoch": 2.51, "learning_rate": 5.199118463080083e-06, "loss": 0.1304, "step": 1227 }, { "epoch": 2.51, "learning_rate": 5.192484518371606e-06, "loss": 0.1726, "step": 1228 }, { "epoch": 2.52, "learning_rate": 5.185850234298943e-06, "loss": 0.187, "step": 1229 }, { "epoch": 2.52, "learning_rate": 5.179215622558821e-06, "loss": 0.1288, "step": 1230 }, { "epoch": 2.52, "learning_rate": 5.172580694848541e-06, "loss": 0.1569, "step": 1231 }, { "epoch": 2.52, "learning_rate": 5.165945462865964e-06, "loss": 0.157, "step": 1232 }, { "epoch": 2.52, "learning_rate": 5.159309938309485e-06, "loss": 0.1463, "step": 1233 }, { "epoch": 2.53, "learning_rate": 5.152674132878015e-06, "loss": 0.1567, "step": 1234 }, { "epoch": 2.53, "learning_rate": 5.1460380582709634e-06, "loss": 0.1847, "step": 1235 }, { "epoch": 2.53, "learning_rate": 5.139401726188208e-06, "loss": 0.1224, "step": 1236 }, { "epoch": 2.53, "learning_rate": 5.132765148330087e-06, "loss": 0.1844, "step": 1237 }, { "epoch": 2.53, "learning_rate": 5.126128336397368e-06, "loss": 0.1488, "step": 1238 }, { "epoch": 2.54, "learning_rate": 5.119491302091232e-06, "loss": 0.1689, "step": 1239 }, { "epoch": 2.54, "learning_rate": 5.112854057113254e-06, "loss": 0.1676, "step": 1240 }, { "epoch": 2.54, "learning_rate": 5.106216613165376e-06, "loss": 0.1443, "step": 1241 }, { "epoch": 2.54, "learning_rate": 5.099578981949898e-06, "loss": 0.1688, "step": 1242 }, { "epoch": 2.54, "learning_rate": 5.09294117516944e-06, "loss": 0.1445, "step": 1243 }, { "epoch": 2.55, "learning_rate": 5.0863032045269435e-06, "loss": 0.1867, "step": 1244 }, { "epoch": 2.55, "learning_rate": 5.079665081725629e-06, "loss": 0.1673, "step": 1245 }, { "epoch": 2.55, "learning_rate": 5.07302681846899e-06, "loss": 0.1411, "step": 1246 }, { "epoch": 2.55, "learning_rate": 5.066388426460768e-06, "loss": 0.1447, "step": 1247 }, { "epoch": 2.55, "learning_rate": 5.059749917404928e-06, "loss": 0.1709, "step": 1248 }, { "epoch": 2.56, "learning_rate": 5.0531113030056455e-06, "loss": 0.1673, "step": 1249 }, { "epoch": 2.56, "learning_rate": 5.046472594967279e-06, "loss": 0.1412, "step": 1250 }, { "epoch": 2.56, "learning_rate": 5.0398338049943504e-06, "loss": 0.1485, "step": 1251 }, { "epoch": 2.56, "learning_rate": 5.033194944791534e-06, "loss": 0.1919, "step": 1252 }, { "epoch": 2.56, "learning_rate": 5.026556026063616e-06, "loss": 0.1608, "step": 1253 }, { "epoch": 2.57, "learning_rate": 5.019917060515498e-06, "loss": 0.1752, "step": 1254 }, { "epoch": 2.57, "learning_rate": 5.013278059852156e-06, "loss": 0.1548, "step": 1255 }, { "epoch": 2.57, "learning_rate": 5.00663903577863e-06, "loss": 0.1686, "step": 1256 }, { "epoch": 2.57, "learning_rate": 5e-06, "loss": 0.1533, "step": 1257 }, { "epoch": 2.57, "learning_rate": 4.993360964221372e-06, "loss": 0.1701, "step": 1258 }, { "epoch": 2.58, "learning_rate": 4.986721940147846e-06, "loss": 0.172, "step": 1259 }, { "epoch": 2.58, "learning_rate": 4.980082939484502e-06, "loss": 0.1865, "step": 1260 }, { "epoch": 2.58, "learning_rate": 4.9734439739363855e-06, "loss": 0.1411, "step": 1261 }, { "epoch": 2.58, "learning_rate": 4.966805055208467e-06, "loss": 0.1667, "step": 1262 }, { "epoch": 2.58, "learning_rate": 4.96016619500565e-06, "loss": 0.1487, "step": 1263 }, { "epoch": 2.59, "learning_rate": 4.953527405032723e-06, "loss": 0.1528, "step": 1264 }, { "epoch": 2.59, "learning_rate": 4.946888696994357e-06, "loss": 0.1979, "step": 1265 }, { "epoch": 2.59, "learning_rate": 4.940250082595074e-06, "loss": 0.1718, "step": 1266 }, { "epoch": 2.59, "learning_rate": 4.933611573539232e-06, "loss": 0.1612, "step": 1267 }, { "epoch": 2.6, "learning_rate": 4.926973181531011e-06, "loss": 0.1565, "step": 1268 }, { "epoch": 2.6, "learning_rate": 4.920334918274372e-06, "loss": 0.151, "step": 1269 }, { "epoch": 2.6, "learning_rate": 4.913696795473058e-06, "loss": 0.1703, "step": 1270 }, { "epoch": 2.6, "learning_rate": 4.90705882483056e-06, "loss": 0.1521, "step": 1271 }, { "epoch": 2.6, "learning_rate": 4.900421018050106e-06, "loss": 0.1638, "step": 1272 }, { "epoch": 2.61, "learning_rate": 4.893783386834626e-06, "loss": 0.1409, "step": 1273 }, { "epoch": 2.61, "learning_rate": 4.887145942886749e-06, "loss": 0.1411, "step": 1274 }, { "epoch": 2.61, "learning_rate": 4.8805086979087704e-06, "loss": 0.1921, "step": 1275 }, { "epoch": 2.61, "learning_rate": 4.873871663602635e-06, "loss": 0.1602, "step": 1276 }, { "epoch": 2.61, "learning_rate": 4.867234851669914e-06, "loss": 0.1721, "step": 1277 }, { "epoch": 2.62, "learning_rate": 4.860598273811793e-06, "loss": 0.1907, "step": 1278 }, { "epoch": 2.62, "learning_rate": 4.853961941729038e-06, "loss": 0.1399, "step": 1279 }, { "epoch": 2.62, "learning_rate": 4.8473258671219855e-06, "loss": 0.1413, "step": 1280 }, { "epoch": 2.62, "learning_rate": 4.840690061690517e-06, "loss": 0.1944, "step": 1281 }, { "epoch": 2.62, "learning_rate": 4.834054537134038e-06, "loss": 0.1599, "step": 1282 }, { "epoch": 2.63, "learning_rate": 4.827419305151461e-06, "loss": 0.1352, "step": 1283 }, { "epoch": 2.63, "learning_rate": 4.8207843774411805e-06, "loss": 0.1544, "step": 1284 }, { "epoch": 2.63, "learning_rate": 4.814149765701059e-06, "loss": 0.1435, "step": 1285 }, { "epoch": 2.63, "learning_rate": 4.807515481628396e-06, "loss": 0.1558, "step": 1286 }, { "epoch": 2.63, "learning_rate": 4.800881536919918e-06, "loss": 0.1702, "step": 1287 }, { "epoch": 2.64, "learning_rate": 4.79424794327175e-06, "loss": 0.2012, "step": 1288 }, { "epoch": 2.64, "learning_rate": 4.787614712379399e-06, "loss": 0.1481, "step": 1289 }, { "epoch": 2.64, "learning_rate": 4.780981855937735e-06, "loss": 0.1542, "step": 1290 }, { "epoch": 2.64, "learning_rate": 4.774349385640962e-06, "loss": 0.1513, "step": 1291 }, { "epoch": 2.64, "learning_rate": 4.767717313182611e-06, "loss": 0.1758, "step": 1292 }, { "epoch": 2.65, "learning_rate": 4.761085650255503e-06, "loss": 0.1341, "step": 1293 }, { "epoch": 2.65, "learning_rate": 4.754454408551746e-06, "loss": 0.1523, "step": 1294 }, { "epoch": 2.65, "learning_rate": 4.747823599762697e-06, "loss": 0.1295, "step": 1295 }, { "epoch": 2.65, "learning_rate": 4.741193235578953e-06, "loss": 0.1885, "step": 1296 }, { "epoch": 2.65, "learning_rate": 4.734563327690332e-06, "loss": 0.1712, "step": 1297 }, { "epoch": 2.66, "learning_rate": 4.727933887785838e-06, "loss": 0.1516, "step": 1298 }, { "epoch": 2.66, "learning_rate": 4.721304927553659e-06, "loss": 0.164, "step": 1299 }, { "epoch": 2.66, "learning_rate": 4.71467645868113e-06, "loss": 0.1801, "step": 1300 }, { "epoch": 2.66, "learning_rate": 4.708048492854726e-06, "loss": 0.1729, "step": 1301 }, { "epoch": 2.66, "learning_rate": 4.70142104176003e-06, "loss": 0.1554, "step": 1302 }, { "epoch": 2.67, "learning_rate": 4.694794117081721e-06, "loss": 0.1334, "step": 1303 }, { "epoch": 2.67, "learning_rate": 4.688167730503546e-06, "loss": 0.1925, "step": 1304 }, { "epoch": 2.67, "learning_rate": 4.681541893708309e-06, "loss": 0.1592, "step": 1305 }, { "epoch": 2.67, "learning_rate": 4.6749166183778375e-06, "loss": 0.1677, "step": 1306 }, { "epoch": 2.67, "learning_rate": 4.668291916192976e-06, "loss": 0.1564, "step": 1307 }, { "epoch": 2.68, "learning_rate": 4.661667798833556e-06, "loss": 0.1286, "step": 1308 }, { "epoch": 2.68, "learning_rate": 4.6550442779783755e-06, "loss": 0.1733, "step": 1309 }, { "epoch": 2.68, "learning_rate": 4.648421365305185e-06, "loss": 0.1551, "step": 1310 }, { "epoch": 2.68, "learning_rate": 4.641799072490658e-06, "loss": 0.1336, "step": 1311 }, { "epoch": 2.69, "learning_rate": 4.635177411210379e-06, "loss": 0.1532, "step": 1312 }, { "epoch": 2.69, "learning_rate": 4.628556393138816e-06, "loss": 0.1774, "step": 1313 }, { "epoch": 2.69, "learning_rate": 4.621936029949309e-06, "loss": 0.1586, "step": 1314 }, { "epoch": 2.69, "learning_rate": 4.615316333314033e-06, "loss": 0.1621, "step": 1315 }, { "epoch": 2.69, "learning_rate": 4.608697314903997e-06, "loss": 0.1718, "step": 1316 }, { "epoch": 2.7, "learning_rate": 4.6020789863890144e-06, "loss": 0.1647, "step": 1317 }, { "epoch": 2.7, "learning_rate": 4.595461359437673e-06, "loss": 0.1599, "step": 1318 }, { "epoch": 2.7, "learning_rate": 4.588844445717335e-06, "loss": 0.1462, "step": 1319 }, { "epoch": 2.7, "learning_rate": 4.582228256894093e-06, "loss": 0.1638, "step": 1320 }, { "epoch": 2.7, "learning_rate": 4.575612804632778e-06, "loss": 0.1385, "step": 1321 }, { "epoch": 2.71, "learning_rate": 4.568998100596903e-06, "loss": 0.1473, "step": 1322 }, { "epoch": 2.71, "learning_rate": 4.562384156448679e-06, "loss": 0.1746, "step": 1323 }, { "epoch": 2.71, "learning_rate": 4.555770983848965e-06, "loss": 0.1521, "step": 1324 }, { "epoch": 2.71, "learning_rate": 4.5491585944572664e-06, "loss": 0.1551, "step": 1325 }, { "epoch": 2.71, "learning_rate": 4.5425469999317075e-06, "loss": 0.1474, "step": 1326 }, { "epoch": 2.72, "learning_rate": 4.535936211929005e-06, "loss": 0.1542, "step": 1327 }, { "epoch": 2.72, "learning_rate": 4.529326242104465e-06, "loss": 0.1722, "step": 1328 }, { "epoch": 2.72, "learning_rate": 4.522717102111937e-06, "loss": 0.1629, "step": 1329 }, { "epoch": 2.72, "learning_rate": 4.51610880360382e-06, "loss": 0.1707, "step": 1330 }, { "epoch": 2.72, "learning_rate": 4.509501358231021e-06, "loss": 0.1237, "step": 1331 }, { "epoch": 2.73, "learning_rate": 4.502894777642949e-06, "loss": 0.1948, "step": 1332 }, { "epoch": 2.73, "learning_rate": 4.496289073487483e-06, "loss": 0.1491, "step": 1333 }, { "epoch": 2.73, "learning_rate": 4.489684257410959e-06, "loss": 0.1737, "step": 1334 }, { "epoch": 2.73, "learning_rate": 4.4830803410581506e-06, "loss": 0.1546, "step": 1335 }, { "epoch": 2.73, "learning_rate": 4.476477336072237e-06, "loss": 0.1258, "step": 1336 }, { "epoch": 2.74, "learning_rate": 4.4698752540947995e-06, "loss": 0.1755, "step": 1337 }, { "epoch": 2.74, "learning_rate": 4.4632741067657855e-06, "loss": 0.1226, "step": 1338 }, { "epoch": 2.74, "learning_rate": 4.4566739057235005e-06, "loss": 0.127, "step": 1339 }, { "epoch": 2.74, "learning_rate": 4.450074662604575e-06, "loss": 0.1634, "step": 1340 }, { "epoch": 2.74, "learning_rate": 4.443476389043955e-06, "loss": 0.195, "step": 1341 }, { "epoch": 2.75, "learning_rate": 4.4368790966748746e-06, "loss": 0.2002, "step": 1342 }, { "epoch": 2.75, "learning_rate": 4.430282797128842e-06, "loss": 0.152, "step": 1343 }, { "epoch": 2.75, "learning_rate": 4.423687502035609e-06, "loss": 0.15, "step": 1344 }, { "epoch": 2.75, "learning_rate": 4.417093223023161e-06, "loss": 0.159, "step": 1345 }, { "epoch": 2.75, "learning_rate": 4.410499971717695e-06, "loss": 0.1463, "step": 1346 }, { "epoch": 2.76, "learning_rate": 4.403907759743586e-06, "loss": 0.1396, "step": 1347 }, { "epoch": 2.76, "learning_rate": 4.397316598723385e-06, "loss": 0.1564, "step": 1348 }, { "epoch": 2.76, "learning_rate": 4.390726500277788e-06, "loss": 0.2015, "step": 1349 }, { "epoch": 2.76, "learning_rate": 4.384137476025617e-06, "loss": 0.1516, "step": 1350 }, { "epoch": 2.76, "learning_rate": 4.377549537583797e-06, "loss": 0.1708, "step": 1351 }, { "epoch": 2.77, "learning_rate": 4.3709626965673464e-06, "loss": 0.1571, "step": 1352 }, { "epoch": 2.77, "learning_rate": 4.364376964589342e-06, "loss": 0.1596, "step": 1353 }, { "epoch": 2.77, "learning_rate": 4.357792353260907e-06, "loss": 0.1734, "step": 1354 }, { "epoch": 2.77, "learning_rate": 4.351208874191192e-06, "loss": 0.1611, "step": 1355 }, { "epoch": 2.78, "learning_rate": 4.344626538987345e-06, "loss": 0.1528, "step": 1356 }, { "epoch": 2.78, "learning_rate": 4.338045359254506e-06, "loss": 0.168, "step": 1357 }, { "epoch": 2.78, "learning_rate": 4.331465346595767e-06, "loss": 0.141, "step": 1358 }, { "epoch": 2.78, "learning_rate": 4.324886512612172e-06, "loss": 0.1431, "step": 1359 }, { "epoch": 2.78, "learning_rate": 4.318308868902681e-06, "loss": 0.147, "step": 1360 }, { "epoch": 2.79, "learning_rate": 4.31173242706416e-06, "loss": 0.1452, "step": 1361 }, { "epoch": 2.79, "learning_rate": 4.305157198691351e-06, "loss": 0.1611, "step": 1362 }, { "epoch": 2.79, "learning_rate": 4.2985831953768584e-06, "loss": 0.1476, "step": 1363 }, { "epoch": 2.79, "learning_rate": 4.292010428711133e-06, "loss": 0.1623, "step": 1364 }, { "epoch": 2.79, "learning_rate": 4.2854389102824325e-06, "loss": 0.1529, "step": 1365 }, { "epoch": 2.8, "learning_rate": 4.2788686516768265e-06, "loss": 0.1677, "step": 1366 }, { "epoch": 2.8, "learning_rate": 4.272299664478155e-06, "loss": 0.1647, "step": 1367 }, { "epoch": 2.8, "learning_rate": 4.265731960268022e-06, "loss": 0.1563, "step": 1368 }, { "epoch": 2.8, "learning_rate": 4.259165550625765e-06, "loss": 0.1472, "step": 1369 }, { "epoch": 2.8, "learning_rate": 4.252600447128441e-06, "loss": 0.1134, "step": 1370 }, { "epoch": 2.81, "learning_rate": 4.246036661350805e-06, "loss": 0.1612, "step": 1371 }, { "epoch": 2.81, "learning_rate": 4.239474204865288e-06, "loss": 0.1712, "step": 1372 }, { "epoch": 2.81, "learning_rate": 4.232913089241975e-06, "loss": 0.1441, "step": 1373 }, { "epoch": 2.81, "learning_rate": 4.226353326048594e-06, "loss": 0.1593, "step": 1374 }, { "epoch": 2.81, "learning_rate": 4.2197949268504815e-06, "loss": 0.1278, "step": 1375 }, { "epoch": 2.82, "learning_rate": 4.2132379032105695e-06, "loss": 0.159, "step": 1376 }, { "epoch": 2.82, "learning_rate": 4.206682266689371e-06, "loss": 0.1767, "step": 1377 }, { "epoch": 2.82, "learning_rate": 4.200128028844945e-06, "loss": 0.1315, "step": 1378 }, { "epoch": 2.82, "learning_rate": 4.193575201232893e-06, "loss": 0.1536, "step": 1379 }, { "epoch": 2.82, "learning_rate": 4.18702379540632e-06, "loss": 0.1711, "step": 1380 }, { "epoch": 2.83, "learning_rate": 4.1804738229158384e-06, "loss": 0.1625, "step": 1381 }, { "epoch": 2.83, "learning_rate": 4.173925295309517e-06, "loss": 0.1659, "step": 1382 }, { "epoch": 2.83, "learning_rate": 4.167378224132891e-06, "loss": 0.1774, "step": 1383 }, { "epoch": 2.83, "learning_rate": 4.160832620928921e-06, "loss": 0.1715, "step": 1384 }, { "epoch": 2.83, "learning_rate": 4.15428849723798e-06, "loss": 0.1515, "step": 1385 }, { "epoch": 2.84, "learning_rate": 4.1477458645978355e-06, "loss": 0.1322, "step": 1386 }, { "epoch": 2.84, "learning_rate": 4.14120473454362e-06, "loss": 0.1671, "step": 1387 }, { "epoch": 2.84, "learning_rate": 4.134665118607826e-06, "loss": 0.1563, "step": 1388 }, { "epoch": 2.84, "learning_rate": 4.128127028320265e-06, "loss": 0.1224, "step": 1389 }, { "epoch": 2.84, "learning_rate": 4.121590475208071e-06, "loss": 0.166, "step": 1390 }, { "epoch": 2.85, "learning_rate": 4.115055470795658e-06, "loss": 0.1367, "step": 1391 }, { "epoch": 2.85, "learning_rate": 4.108522026604714e-06, "loss": 0.1358, "step": 1392 }, { "epoch": 2.85, "learning_rate": 4.101990154154178e-06, "loss": 0.1419, "step": 1393 }, { "epoch": 2.85, "learning_rate": 4.095459864960211e-06, "loss": 0.1758, "step": 1394 }, { "epoch": 2.85, "learning_rate": 4.088931170536193e-06, "loss": 0.1231, "step": 1395 }, { "epoch": 2.86, "learning_rate": 4.0824040823926805e-06, "loss": 0.1686, "step": 1396 }, { "epoch": 2.86, "learning_rate": 4.075878612037408e-06, "loss": 0.1787, "step": 1397 }, { "epoch": 2.86, "learning_rate": 4.069354770975251e-06, "loss": 0.1649, "step": 1398 }, { "epoch": 2.86, "learning_rate": 4.0628325707082175e-06, "loss": 0.1786, "step": 1399 }, { "epoch": 2.87, "learning_rate": 4.056312022735417e-06, "loss": 0.1589, "step": 1400 }, { "epoch": 2.87, "learning_rate": 4.049793138553053e-06, "loss": 0.1719, "step": 1401 }, { "epoch": 2.87, "learning_rate": 4.043275929654388e-06, "loss": 0.1762, "step": 1402 }, { "epoch": 2.87, "learning_rate": 4.036760407529735e-06, "loss": 0.1275, "step": 1403 }, { "epoch": 2.87, "learning_rate": 4.030246583666437e-06, "loss": 0.1517, "step": 1404 }, { "epoch": 2.88, "learning_rate": 4.023734469548833e-06, "loss": 0.1571, "step": 1405 }, { "epoch": 2.88, "learning_rate": 4.017224076658258e-06, "loss": 0.135, "step": 1406 }, { "epoch": 2.88, "learning_rate": 4.010715416473003e-06, "loss": 0.1592, "step": 1407 }, { "epoch": 2.88, "learning_rate": 4.0042085004683114e-06, "loss": 0.1567, "step": 1408 }, { "epoch": 2.88, "learning_rate": 3.997703340116347e-06, "loss": 0.1732, "step": 1409 }, { "epoch": 2.89, "learning_rate": 3.991199946886182e-06, "loss": 0.1555, "step": 1410 }, { "epoch": 2.89, "learning_rate": 3.984698332243767e-06, "loss": 0.136, "step": 1411 }, { "epoch": 2.89, "learning_rate": 3.978198507651925e-06, "loss": 0.1671, "step": 1412 }, { "epoch": 2.89, "learning_rate": 3.9717004845703175e-06, "loss": 0.1227, "step": 1413 }, { "epoch": 2.89, "learning_rate": 3.96520427445543e-06, "loss": 0.1675, "step": 1414 }, { "epoch": 2.9, "learning_rate": 3.9587098887605545e-06, "loss": 0.1315, "step": 1415 }, { "epoch": 2.9, "learning_rate": 3.952217338935761e-06, "loss": 0.1448, "step": 1416 }, { "epoch": 2.9, "learning_rate": 3.945726636427893e-06, "loss": 0.1443, "step": 1417 }, { "epoch": 2.9, "learning_rate": 3.9392377926805226e-06, "loss": 0.1193, "step": 1418 }, { "epoch": 2.9, "learning_rate": 3.932750819133958e-06, "loss": 0.1665, "step": 1419 }, { "epoch": 2.91, "learning_rate": 3.926265727225201e-06, "loss": 0.1856, "step": 1420 }, { "epoch": 2.91, "learning_rate": 3.919782528387942e-06, "loss": 0.1253, "step": 1421 }, { "epoch": 2.91, "learning_rate": 3.9133012340525325e-06, "loss": 0.1502, "step": 1422 }, { "epoch": 2.91, "learning_rate": 3.906821855645961e-06, "loss": 0.1825, "step": 1423 }, { "epoch": 2.91, "learning_rate": 3.900344404591847e-06, "loss": 0.1284, "step": 1424 }, { "epoch": 2.92, "learning_rate": 3.8938688923104015e-06, "loss": 0.1565, "step": 1425 }, { "epoch": 2.92, "learning_rate": 3.887395330218429e-06, "loss": 0.1573, "step": 1426 }, { "epoch": 2.92, "learning_rate": 3.880923729729285e-06, "loss": 0.1424, "step": 1427 }, { "epoch": 2.92, "learning_rate": 3.874454102252873e-06, "loss": 0.1474, "step": 1428 }, { "epoch": 2.92, "learning_rate": 3.867986459195615e-06, "loss": 0.1341, "step": 1429 }, { "epoch": 2.93, "learning_rate": 3.861520811960438e-06, "loss": 0.1477, "step": 1430 }, { "epoch": 2.93, "learning_rate": 3.855057171946744e-06, "loss": 0.1564, "step": 1431 }, { "epoch": 2.93, "learning_rate": 3.848595550550401e-06, "loss": 0.1435, "step": 1432 }, { "epoch": 2.93, "learning_rate": 3.8421359591637185e-06, "loss": 0.142, "step": 1433 }, { "epoch": 2.93, "learning_rate": 3.835678409175424e-06, "loss": 0.1372, "step": 1434 }, { "epoch": 2.94, "learning_rate": 3.829222911970647e-06, "loss": 0.1525, "step": 1435 }, { "epoch": 2.94, "learning_rate": 3.822769478930898e-06, "loss": 0.1454, "step": 1436 }, { "epoch": 2.94, "learning_rate": 3.816318121434049e-06, "loss": 0.1818, "step": 1437 }, { "epoch": 2.94, "learning_rate": 3.8098688508543086e-06, "loss": 0.1287, "step": 1438 }, { "epoch": 2.94, "learning_rate": 3.803421678562213e-06, "loss": 0.1718, "step": 1439 }, { "epoch": 2.95, "learning_rate": 3.7969766159245913e-06, "loss": 0.1497, "step": 1440 }, { "epoch": 2.95, "learning_rate": 3.79053367430456e-06, "loss": 0.1624, "step": 1441 }, { "epoch": 2.95, "learning_rate": 3.784092865061494e-06, "loss": 0.1583, "step": 1442 }, { "epoch": 2.95, "learning_rate": 3.777654199551004e-06, "loss": 0.154, "step": 1443 }, { "epoch": 2.96, "learning_rate": 3.7712176891249286e-06, "loss": 0.155, "step": 1444 }, { "epoch": 2.96, "learning_rate": 3.7647833451313e-06, "loss": 0.1512, "step": 1445 }, { "epoch": 2.96, "learning_rate": 3.758351178914336e-06, "loss": 0.1568, "step": 1446 }, { "epoch": 2.96, "learning_rate": 3.751921201814411e-06, "loss": 0.1322, "step": 1447 }, { "epoch": 2.96, "learning_rate": 3.7454934251680447e-06, "loss": 0.1517, "step": 1448 }, { "epoch": 2.97, "learning_rate": 3.7390678603078687e-06, "loss": 0.1722, "step": 1449 }, { "epoch": 2.97, "learning_rate": 3.7326445185626255e-06, "loss": 0.1585, "step": 1450 }, { "epoch": 2.97, "learning_rate": 3.7262234112571316e-06, "loss": 0.1854, "step": 1451 }, { "epoch": 2.97, "learning_rate": 3.7198045497122647e-06, "loss": 0.1372, "step": 1452 }, { "epoch": 2.97, "learning_rate": 3.713387945244945e-06, "loss": 0.1462, "step": 1453 }, { "epoch": 2.98, "learning_rate": 3.7069736091681095e-06, "loss": 0.1749, "step": 1454 }, { "epoch": 2.98, "learning_rate": 3.7005615527907045e-06, "loss": 0.1818, "step": 1455 }, { "epoch": 2.98, "learning_rate": 3.6941517874176426e-06, "loss": 0.1305, "step": 1456 }, { "epoch": 2.98, "learning_rate": 3.687744324349813e-06, "loss": 0.1641, "step": 1457 }, { "epoch": 2.98, "learning_rate": 3.6813391748840337e-06, "loss": 0.1725, "step": 1458 }, { "epoch": 2.99, "learning_rate": 3.6749363503130508e-06, "loss": 0.1622, "step": 1459 }, { "epoch": 2.99, "learning_rate": 3.668535861925509e-06, "loss": 0.1741, "step": 1460 }, { "epoch": 2.99, "learning_rate": 3.6621377210059313e-06, "loss": 0.1727, "step": 1461 }, { "epoch": 2.99, "learning_rate": 3.6557419388347106e-06, "loss": 0.1914, "step": 1462 }, { "epoch": 2.99, "learning_rate": 3.6493485266880673e-06, "loss": 0.1353, "step": 1463 }, { "epoch": 3.0, "learning_rate": 3.642957495838059e-06, "loss": 0.1822, "step": 1464 }, { "epoch": 3.0, "learning_rate": 3.6365688575525315e-06, "loss": 0.1598, "step": 1465 }, { "epoch": 3.0, "learning_rate": 3.630182623095122e-06, "loss": 0.1395, "step": 1466 }, { "epoch": 3.0, "learning_rate": 3.623798803725223e-06, "loss": 0.0616, "step": 1467 }, { "epoch": 3.0, "learning_rate": 3.6174174106979743e-06, "loss": 0.0757, "step": 1468 }, { "epoch": 3.01, "learning_rate": 3.611038455264231e-06, "loss": 0.0787, "step": 1469 }, { "epoch": 3.01, "learning_rate": 3.6046619486705575e-06, "loss": 0.0675, "step": 1470 }, { "epoch": 3.01, "learning_rate": 3.5982879021591997e-06, "loss": 0.0777, "step": 1471 }, { "epoch": 3.01, "learning_rate": 3.5919163269680613e-06, "loss": 0.0755, "step": 1472 }, { "epoch": 3.01, "learning_rate": 3.5855472343306946e-06, "loss": 0.0578, "step": 1473 }, { "epoch": 3.02, "learning_rate": 3.5791806354762702e-06, "loss": 0.0638, "step": 1474 }, { "epoch": 3.02, "learning_rate": 3.572816541629566e-06, "loss": 0.0742, "step": 1475 }, { "epoch": 3.02, "learning_rate": 3.566454964010939e-06, "loss": 0.0886, "step": 1476 }, { "epoch": 3.02, "learning_rate": 3.560095913836317e-06, "loss": 0.0647, "step": 1477 }, { "epoch": 3.02, "learning_rate": 3.553739402317162e-06, "loss": 0.0381, "step": 1478 }, { "epoch": 3.03, "learning_rate": 3.547385440660469e-06, "loss": 0.0617, "step": 1479 }, { "epoch": 3.03, "learning_rate": 3.5410340400687336e-06, "loss": 0.0745, "step": 1480 }, { "epoch": 3.03, "learning_rate": 3.534685211739935e-06, "loss": 0.065, "step": 1481 }, { "epoch": 3.03, "learning_rate": 3.5283389668675196e-06, "loss": 0.0585, "step": 1482 }, { "epoch": 3.04, "learning_rate": 3.5219953166403754e-06, "loss": 0.0763, "step": 1483 }, { "epoch": 3.04, "learning_rate": 3.5156542722428242e-06, "loss": 0.0455, "step": 1484 }, { "epoch": 3.04, "learning_rate": 3.5093158448545807e-06, "loss": 0.0696, "step": 1485 }, { "epoch": 3.04, "learning_rate": 3.5029800456507577e-06, "loss": 0.0629, "step": 1486 }, { "epoch": 3.04, "learning_rate": 3.4966468858018275e-06, "loss": 0.0534, "step": 1487 }, { "epoch": 3.05, "learning_rate": 3.4903163764736104e-06, "loss": 0.068, "step": 1488 }, { "epoch": 3.05, "learning_rate": 3.4839885288272555e-06, "loss": 0.0676, "step": 1489 }, { "epoch": 3.05, "learning_rate": 3.477663354019215e-06, "loss": 0.0661, "step": 1490 }, { "epoch": 3.05, "learning_rate": 3.471340863201237e-06, "loss": 0.0717, "step": 1491 }, { "epoch": 3.05, "learning_rate": 3.4650210675203244e-06, "loss": 0.0645, "step": 1492 }, { "epoch": 3.06, "learning_rate": 3.458703978118742e-06, "loss": 0.0637, "step": 1493 }, { "epoch": 3.06, "learning_rate": 3.452389606133974e-06, "loss": 0.074, "step": 1494 }, { "epoch": 3.06, "learning_rate": 3.4460779626987186e-06, "loss": 0.0586, "step": 1495 }, { "epoch": 3.06, "learning_rate": 3.43976905894086e-06, "loss": 0.0608, "step": 1496 }, { "epoch": 3.06, "learning_rate": 3.433462905983455e-06, "loss": 0.0713, "step": 1497 }, { "epoch": 3.07, "learning_rate": 3.427159514944708e-06, "loss": 0.058, "step": 1498 }, { "epoch": 3.07, "learning_rate": 3.420858896937954e-06, "loss": 0.0869, "step": 1499 }, { "epoch": 3.07, "learning_rate": 3.414561063071644e-06, "loss": 0.0647, "step": 1500 }, { "epoch": 3.07, "learning_rate": 3.4082660244493125e-06, "loss": 0.0619, "step": 1501 }, { "epoch": 3.07, "learning_rate": 3.401973792169574e-06, "loss": 0.0659, "step": 1502 }, { "epoch": 3.08, "learning_rate": 3.395684377326086e-06, "loss": 0.0522, "step": 1503 }, { "epoch": 3.08, "learning_rate": 3.389397791007548e-06, "loss": 0.0539, "step": 1504 }, { "epoch": 3.08, "learning_rate": 3.383114044297665e-06, "loss": 0.0544, "step": 1505 }, { "epoch": 3.08, "learning_rate": 3.376833148275141e-06, "loss": 0.0676, "step": 1506 }, { "epoch": 3.08, "learning_rate": 3.3705551140136482e-06, "loss": 0.056, "step": 1507 }, { "epoch": 3.09, "learning_rate": 3.3642799525818217e-06, "loss": 0.0726, "step": 1508 }, { "epoch": 3.09, "learning_rate": 3.3580076750432244e-06, "loss": 0.0562, "step": 1509 }, { "epoch": 3.09, "learning_rate": 3.351738292456337e-06, "loss": 0.0694, "step": 1510 }, { "epoch": 3.09, "learning_rate": 3.3454718158745377e-06, "loss": 0.0694, "step": 1511 }, { "epoch": 3.09, "learning_rate": 3.339208256346077e-06, "loss": 0.0602, "step": 1512 }, { "epoch": 3.1, "learning_rate": 3.3329476249140687e-06, "loss": 0.0416, "step": 1513 }, { "epoch": 3.1, "learning_rate": 3.3266899326164563e-06, "loss": 0.0755, "step": 1514 }, { "epoch": 3.1, "learning_rate": 3.320435190486012e-06, "loss": 0.0324, "step": 1515 }, { "epoch": 3.1, "learning_rate": 3.314183409550293e-06, "loss": 0.057, "step": 1516 }, { "epoch": 3.1, "learning_rate": 3.307934600831648e-06, "loss": 0.0703, "step": 1517 }, { "epoch": 3.11, "learning_rate": 3.3016887753471803e-06, "loss": 0.0573, "step": 1518 }, { "epoch": 3.11, "learning_rate": 3.2954459441087315e-06, "loss": 0.047, "step": 1519 }, { "epoch": 3.11, "learning_rate": 3.2892061181228695e-06, "loss": 0.0448, "step": 1520 }, { "epoch": 3.11, "learning_rate": 3.2829693083908564e-06, "loss": 0.0608, "step": 1521 }, { "epoch": 3.11, "learning_rate": 3.276735525908647e-06, "loss": 0.039, "step": 1522 }, { "epoch": 3.12, "learning_rate": 3.270504781666845e-06, "loss": 0.0589, "step": 1523 }, { "epoch": 3.12, "learning_rate": 3.2642770866507114e-06, "loss": 0.0656, "step": 1524 }, { "epoch": 3.12, "learning_rate": 3.25805245184012e-06, "loss": 0.0501, "step": 1525 }, { "epoch": 3.12, "learning_rate": 3.251830888209558e-06, "loss": 0.0527, "step": 1526 }, { "epoch": 3.13, "learning_rate": 3.2456124067280903e-06, "loss": 0.0811, "step": 1527 }, { "epoch": 3.13, "learning_rate": 3.239397018359352e-06, "loss": 0.066, "step": 1528 }, { "epoch": 3.13, "learning_rate": 3.2331847340615286e-06, "loss": 0.0455, "step": 1529 }, { "epoch": 3.13, "learning_rate": 3.226975564787322e-06, "loss": 0.0506, "step": 1530 }, { "epoch": 3.13, "learning_rate": 3.220769521483953e-06, "loss": 0.0678, "step": 1531 }, { "epoch": 3.14, "learning_rate": 3.2145666150931247e-06, "loss": 0.0638, "step": 1532 }, { "epoch": 3.14, "learning_rate": 3.208366856551013e-06, "loss": 0.0597, "step": 1533 }, { "epoch": 3.14, "learning_rate": 3.20217025678824e-06, "loss": 0.0549, "step": 1534 }, { "epoch": 3.14, "learning_rate": 3.195976826729864e-06, "loss": 0.0665, "step": 1535 }, { "epoch": 3.14, "learning_rate": 3.1897865772953492e-06, "loss": 0.0547, "step": 1536 }, { "epoch": 3.15, "learning_rate": 3.1835995193985548e-06, "loss": 0.0478, "step": 1537 }, { "epoch": 3.15, "learning_rate": 3.1774156639477172e-06, "loss": 0.0528, "step": 1538 }, { "epoch": 3.15, "learning_rate": 3.1712350218454186e-06, "loss": 0.048, "step": 1539 }, { "epoch": 3.15, "learning_rate": 3.1650576039885824e-06, "loss": 0.0651, "step": 1540 }, { "epoch": 3.15, "learning_rate": 3.158883421268443e-06, "loss": 0.0551, "step": 1541 }, { "epoch": 3.16, "learning_rate": 3.1527124845705337e-06, "loss": 0.0423, "step": 1542 }, { "epoch": 3.16, "learning_rate": 3.1465448047746626e-06, "loss": 0.0578, "step": 1543 }, { "epoch": 3.16, "learning_rate": 3.140380392754901e-06, "loss": 0.0566, "step": 1544 }, { "epoch": 3.16, "learning_rate": 3.1342192593795496e-06, "loss": 0.0618, "step": 1545 }, { "epoch": 3.16, "learning_rate": 3.128061415511138e-06, "loss": 0.0822, "step": 1546 }, { "epoch": 3.17, "learning_rate": 3.1219068720063918e-06, "loss": 0.0618, "step": 1547 }, { "epoch": 3.17, "learning_rate": 3.115755639716216e-06, "loss": 0.046, "step": 1548 }, { "epoch": 3.17, "learning_rate": 3.1096077294856817e-06, "loss": 0.0653, "step": 1549 }, { "epoch": 3.17, "learning_rate": 3.103463152153998e-06, "loss": 0.0502, "step": 1550 }, { "epoch": 3.17, "learning_rate": 3.0973219185545077e-06, "loss": 0.0374, "step": 1551 }, { "epoch": 3.18, "learning_rate": 3.0911840395146436e-06, "loss": 0.0637, "step": 1552 }, { "epoch": 3.18, "learning_rate": 3.0850495258559383e-06, "loss": 0.0546, "step": 1553 }, { "epoch": 3.18, "learning_rate": 3.078918388393981e-06, "loss": 0.0566, "step": 1554 }, { "epoch": 3.18, "learning_rate": 3.072790637938415e-06, "loss": 0.0602, "step": 1555 }, { "epoch": 3.18, "learning_rate": 3.0666662852929063e-06, "loss": 0.0457, "step": 1556 }, { "epoch": 3.19, "learning_rate": 3.060545341255135e-06, "loss": 0.0551, "step": 1557 }, { "epoch": 3.19, "learning_rate": 3.054427816616773e-06, "loss": 0.049, "step": 1558 }, { "epoch": 3.19, "learning_rate": 3.048313722163454e-06, "loss": 0.0429, "step": 1559 }, { "epoch": 3.19, "learning_rate": 3.0422030686747765e-06, "loss": 0.0612, "step": 1560 }, { "epoch": 3.19, "learning_rate": 3.036095866924263e-06, "loss": 0.0603, "step": 1561 }, { "epoch": 3.2, "learning_rate": 3.0299921276793553e-06, "loss": 0.0417, "step": 1562 }, { "epoch": 3.2, "learning_rate": 3.0238918617013877e-06, "loss": 0.0604, "step": 1563 }, { "epoch": 3.2, "learning_rate": 3.0177950797455734e-06, "loss": 0.0558, "step": 1564 }, { "epoch": 3.2, "learning_rate": 3.0117017925609802e-06, "loss": 0.0701, "step": 1565 }, { "epoch": 3.2, "learning_rate": 3.0056120108905156e-06, "loss": 0.0457, "step": 1566 }, { "epoch": 3.21, "learning_rate": 2.9995257454709104e-06, "loss": 0.0531, "step": 1567 }, { "epoch": 3.21, "learning_rate": 2.9934430070326894e-06, "loss": 0.0511, "step": 1568 }, { "epoch": 3.21, "learning_rate": 2.9873638063001633e-06, "loss": 0.056, "step": 1569 }, { "epoch": 3.21, "learning_rate": 2.9812881539914028e-06, "loss": 0.067, "step": 1570 }, { "epoch": 3.22, "learning_rate": 2.975216060818227e-06, "loss": 0.0465, "step": 1571 }, { "epoch": 3.22, "learning_rate": 2.969147537486175e-06, "loss": 0.0569, "step": 1572 }, { "epoch": 3.22, "learning_rate": 2.9630825946944953e-06, "loss": 0.0512, "step": 1573 }, { "epoch": 3.22, "learning_rate": 2.95702124313612e-06, "loss": 0.0538, "step": 1574 }, { "epoch": 3.22, "learning_rate": 2.950963493497655e-06, "loss": 0.0528, "step": 1575 }, { "epoch": 3.23, "learning_rate": 2.9449093564593523e-06, "loss": 0.0463, "step": 1576 }, { "epoch": 3.23, "learning_rate": 2.9388588426950942e-06, "loss": 0.0681, "step": 1577 }, { "epoch": 3.23, "learning_rate": 2.932811962872377e-06, "loss": 0.0675, "step": 1578 }, { "epoch": 3.23, "learning_rate": 2.9267687276522876e-06, "loss": 0.0511, "step": 1579 }, { "epoch": 3.23, "learning_rate": 2.92072914768949e-06, "loss": 0.0596, "step": 1580 }, { "epoch": 3.24, "learning_rate": 2.9146932336322005e-06, "loss": 0.0429, "step": 1581 }, { "epoch": 3.24, "learning_rate": 2.9086609961221758e-06, "loss": 0.0571, "step": 1582 }, { "epoch": 3.24, "learning_rate": 2.9026324457946857e-06, "loss": 0.0472, "step": 1583 }, { "epoch": 3.24, "learning_rate": 2.8966075932785064e-06, "loss": 0.0616, "step": 1584 }, { "epoch": 3.24, "learning_rate": 2.8905864491958867e-06, "loss": 0.0571, "step": 1585 }, { "epoch": 3.25, "learning_rate": 2.8845690241625437e-06, "loss": 0.0599, "step": 1586 }, { "epoch": 3.25, "learning_rate": 2.878555328787634e-06, "loss": 0.0691, "step": 1587 }, { "epoch": 3.25, "learning_rate": 2.872545373673736e-06, "loss": 0.0715, "step": 1588 }, { "epoch": 3.25, "learning_rate": 2.8665391694168432e-06, "loss": 0.0378, "step": 1589 }, { "epoch": 3.25, "learning_rate": 2.8605367266063257e-06, "loss": 0.0612, "step": 1590 }, { "epoch": 3.26, "learning_rate": 2.8545380558249263e-06, "loss": 0.055, "step": 1591 }, { "epoch": 3.26, "learning_rate": 2.848543167648736e-06, "loss": 0.0543, "step": 1592 }, { "epoch": 3.26, "learning_rate": 2.842552072647182e-06, "loss": 0.0581, "step": 1593 }, { "epoch": 3.26, "learning_rate": 2.8365647813829954e-06, "loss": 0.0513, "step": 1594 }, { "epoch": 3.26, "learning_rate": 2.83058130441221e-06, "loss": 0.0516, "step": 1595 }, { "epoch": 3.27, "learning_rate": 2.824601652284128e-06, "loss": 0.0575, "step": 1596 }, { "epoch": 3.27, "learning_rate": 2.8186258355413076e-06, "loss": 0.0436, "step": 1597 }, { "epoch": 3.27, "learning_rate": 2.812653864719552e-06, "loss": 0.0543, "step": 1598 }, { "epoch": 3.27, "learning_rate": 2.8066857503478768e-06, "loss": 0.0533, "step": 1599 }, { "epoch": 3.27, "learning_rate": 2.800721502948506e-06, "loss": 0.057, "step": 1600 }, { "epoch": 3.28, "learning_rate": 2.794761133036832e-06, "loss": 0.0477, "step": 1601 }, { "epoch": 3.28, "learning_rate": 2.788804651121427e-06, "loss": 0.0555, "step": 1602 }, { "epoch": 3.28, "learning_rate": 2.782852067703997e-06, "loss": 0.0576, "step": 1603 }, { "epoch": 3.28, "learning_rate": 2.776903393279383e-06, "loss": 0.0505, "step": 1604 }, { "epoch": 3.28, "learning_rate": 2.770958638335527e-06, "loss": 0.0523, "step": 1605 }, { "epoch": 3.29, "learning_rate": 2.7650178133534634e-06, "loss": 0.0575, "step": 1606 }, { "epoch": 3.29, "learning_rate": 2.7590809288073e-06, "loss": 0.058, "step": 1607 }, { "epoch": 3.29, "learning_rate": 2.7531479951641928e-06, "loss": 0.0681, "step": 1608 }, { "epoch": 3.29, "learning_rate": 2.7472190228843387e-06, "loss": 0.0505, "step": 1609 }, { "epoch": 3.29, "learning_rate": 2.741294022420944e-06, "loss": 0.062, "step": 1610 }, { "epoch": 3.3, "learning_rate": 2.735373004220214e-06, "loss": 0.0482, "step": 1611 }, { "epoch": 3.3, "learning_rate": 2.7294559787213325e-06, "loss": 0.0466, "step": 1612 }, { "epoch": 3.3, "learning_rate": 2.7235429563564486e-06, "loss": 0.0469, "step": 1613 }, { "epoch": 3.3, "learning_rate": 2.7176339475506515e-06, "loss": 0.0438, "step": 1614 }, { "epoch": 3.31, "learning_rate": 2.7117289627219466e-06, "loss": 0.0642, "step": 1615 }, { "epoch": 3.31, "learning_rate": 2.7058280122812564e-06, "loss": 0.0609, "step": 1616 }, { "epoch": 3.31, "learning_rate": 2.699931106632381e-06, "loss": 0.0562, "step": 1617 }, { "epoch": 3.31, "learning_rate": 2.694038256171997e-06, "loss": 0.0725, "step": 1618 }, { "epoch": 3.31, "learning_rate": 2.688149471289626e-06, "loss": 0.0775, "step": 1619 }, { "epoch": 3.32, "learning_rate": 2.682264762367624e-06, "loss": 0.0645, "step": 1620 }, { "epoch": 3.32, "learning_rate": 2.6763841397811576e-06, "loss": 0.0558, "step": 1621 }, { "epoch": 3.32, "learning_rate": 2.6705076138981955e-06, "loss": 0.0549, "step": 1622 }, { "epoch": 3.32, "learning_rate": 2.6646351950794756e-06, "loss": 0.0483, "step": 1623 }, { "epoch": 3.32, "learning_rate": 2.6587668936785025e-06, "loss": 0.0628, "step": 1624 }, { "epoch": 3.33, "learning_rate": 2.6529027200415162e-06, "loss": 0.0588, "step": 1625 }, { "epoch": 3.33, "learning_rate": 2.6470426845074787e-06, "loss": 0.0413, "step": 1626 }, { "epoch": 3.33, "learning_rate": 2.641186797408062e-06, "loss": 0.0597, "step": 1627 }, { "epoch": 3.33, "learning_rate": 2.635335069067617e-06, "loss": 0.0596, "step": 1628 }, { "epoch": 3.33, "learning_rate": 2.629487509803168e-06, "loss": 0.0659, "step": 1629 }, { "epoch": 3.34, "learning_rate": 2.623644129924384e-06, "loss": 0.0576, "step": 1630 }, { "epoch": 3.34, "learning_rate": 2.6178049397335716e-06, "loss": 0.0759, "step": 1631 }, { "epoch": 3.34, "learning_rate": 2.6119699495256423e-06, "loss": 0.0688, "step": 1632 }, { "epoch": 3.34, "learning_rate": 2.606139169588113e-06, "loss": 0.0479, "step": 1633 }, { "epoch": 3.34, "learning_rate": 2.6003126102010696e-06, "loss": 0.0845, "step": 1634 }, { "epoch": 3.35, "learning_rate": 2.5944902816371573e-06, "loss": 0.0428, "step": 1635 }, { "epoch": 3.35, "learning_rate": 2.588672194161569e-06, "loss": 0.0485, "step": 1636 }, { "epoch": 3.35, "learning_rate": 2.58285835803201e-06, "loss": 0.0402, "step": 1637 }, { "epoch": 3.35, "learning_rate": 2.5770487834987013e-06, "loss": 0.0465, "step": 1638 }, { "epoch": 3.35, "learning_rate": 2.5712434808043406e-06, "loss": 0.0528, "step": 1639 }, { "epoch": 3.36, "learning_rate": 2.5654424601841e-06, "loss": 0.0536, "step": 1640 }, { "epoch": 3.36, "learning_rate": 2.559645731865598e-06, "loss": 0.063, "step": 1641 }, { "epoch": 3.36, "learning_rate": 2.553853306068888e-06, "loss": 0.0518, "step": 1642 }, { "epoch": 3.36, "learning_rate": 2.5480651930064426e-06, "loss": 0.0454, "step": 1643 }, { "epoch": 3.36, "learning_rate": 2.5422814028831173e-06, "loss": 0.0519, "step": 1644 }, { "epoch": 3.37, "learning_rate": 2.5365019458961604e-06, "loss": 0.057, "step": 1645 }, { "epoch": 3.37, "learning_rate": 2.530726832235169e-06, "loss": 0.0571, "step": 1646 }, { "epoch": 3.37, "learning_rate": 2.524956072082093e-06, "loss": 0.0553, "step": 1647 }, { "epoch": 3.37, "learning_rate": 2.5191896756112e-06, "loss": 0.0337, "step": 1648 }, { "epoch": 3.37, "learning_rate": 2.5134276529890646e-06, "loss": 0.0758, "step": 1649 }, { "epoch": 3.38, "learning_rate": 2.5076700143745495e-06, "loss": 0.0499, "step": 1650 }, { "epoch": 3.38, "learning_rate": 2.501916769918794e-06, "loss": 0.0642, "step": 1651 }, { "epoch": 3.38, "learning_rate": 2.496167929765182e-06, "loss": 0.0469, "step": 1652 }, { "epoch": 3.38, "learning_rate": 2.4904235040493397e-06, "loss": 0.046, "step": 1653 }, { "epoch": 3.39, "learning_rate": 2.4846835028991045e-06, "loss": 0.0526, "step": 1654 }, { "epoch": 3.39, "learning_rate": 2.4789479364345143e-06, "loss": 0.0541, "step": 1655 }, { "epoch": 3.39, "learning_rate": 2.4732168147677927e-06, "loss": 0.0417, "step": 1656 }, { "epoch": 3.39, "learning_rate": 2.4674901480033207e-06, "loss": 0.0553, "step": 1657 }, { "epoch": 3.39, "learning_rate": 2.4617679462376294e-06, "loss": 0.0781, "step": 1658 }, { "epoch": 3.4, "learning_rate": 2.456050219559372e-06, "loss": 0.0514, "step": 1659 }, { "epoch": 3.4, "learning_rate": 2.450336978049322e-06, "loss": 0.05, "step": 1660 }, { "epoch": 3.4, "learning_rate": 2.4446282317803332e-06, "loss": 0.0723, "step": 1661 }, { "epoch": 3.4, "learning_rate": 2.438923990817345e-06, "loss": 0.0649, "step": 1662 }, { "epoch": 3.4, "learning_rate": 2.433224265217346e-06, "loss": 0.0406, "step": 1663 }, { "epoch": 3.41, "learning_rate": 2.4275290650293644e-06, "loss": 0.0616, "step": 1664 }, { "epoch": 3.41, "learning_rate": 2.4218384002944556e-06, "loss": 0.0606, "step": 1665 }, { "epoch": 3.41, "learning_rate": 2.416152281045671e-06, "loss": 0.0489, "step": 1666 }, { "epoch": 3.41, "learning_rate": 2.410470717308058e-06, "loss": 0.0534, "step": 1667 }, { "epoch": 3.41, "learning_rate": 2.404793719098617e-06, "loss": 0.0572, "step": 1668 }, { "epoch": 3.42, "learning_rate": 2.3991212964263157e-06, "loss": 0.0522, "step": 1669 }, { "epoch": 3.42, "learning_rate": 2.3934534592920416e-06, "loss": 0.0529, "step": 1670 }, { "epoch": 3.42, "learning_rate": 2.387790217688608e-06, "loss": 0.0724, "step": 1671 }, { "epoch": 3.42, "learning_rate": 2.3821315816007197e-06, "loss": 0.0597, "step": 1672 }, { "epoch": 3.42, "learning_rate": 2.37647756100496e-06, "loss": 0.0744, "step": 1673 }, { "epoch": 3.43, "learning_rate": 2.3708281658697824e-06, "loss": 0.0481, "step": 1674 }, { "epoch": 3.43, "learning_rate": 2.3651834061554763e-06, "loss": 0.0815, "step": 1675 }, { "epoch": 3.43, "learning_rate": 2.3595432918141677e-06, "loss": 0.0641, "step": 1676 }, { "epoch": 3.43, "learning_rate": 2.3539078327897846e-06, "loss": 0.0694, "step": 1677 }, { "epoch": 3.43, "learning_rate": 2.3482770390180514e-06, "loss": 0.0496, "step": 1678 }, { "epoch": 3.44, "learning_rate": 2.342650920426465e-06, "loss": 0.054, "step": 1679 }, { "epoch": 3.44, "learning_rate": 2.3370294869342835e-06, "loss": 0.0773, "step": 1680 }, { "epoch": 3.44, "learning_rate": 2.331412748452502e-06, "loss": 0.0376, "step": 1681 }, { "epoch": 3.44, "learning_rate": 2.3258007148838345e-06, "loss": 0.0429, "step": 1682 }, { "epoch": 3.44, "learning_rate": 2.3201933961227097e-06, "loss": 0.057, "step": 1683 }, { "epoch": 3.45, "learning_rate": 2.314590802055232e-06, "loss": 0.0508, "step": 1684 }, { "epoch": 3.45, "learning_rate": 2.3089929425591873e-06, "loss": 0.0438, "step": 1685 }, { "epoch": 3.45, "learning_rate": 2.3033998275040047e-06, "loss": 0.0582, "step": 1686 }, { "epoch": 3.45, "learning_rate": 2.297811466750754e-06, "loss": 0.0401, "step": 1687 }, { "epoch": 3.45, "learning_rate": 2.2922278701521173e-06, "loss": 0.0451, "step": 1688 }, { "epoch": 3.46, "learning_rate": 2.286649047552386e-06, "loss": 0.0522, "step": 1689 }, { "epoch": 3.46, "learning_rate": 2.281075008787425e-06, "loss": 0.04, "step": 1690 }, { "epoch": 3.46, "learning_rate": 2.275505763684674e-06, "loss": 0.0483, "step": 1691 }, { "epoch": 3.46, "learning_rate": 2.2699413220631135e-06, "loss": 0.0623, "step": 1692 }, { "epoch": 3.46, "learning_rate": 2.2643816937332568e-06, "loss": 0.0394, "step": 1693 }, { "epoch": 3.47, "learning_rate": 2.258826888497137e-06, "loss": 0.0677, "step": 1694 }, { "epoch": 3.47, "learning_rate": 2.253276916148274e-06, "loss": 0.0624, "step": 1695 }, { "epoch": 3.47, "learning_rate": 2.2477317864716788e-06, "loss": 0.0561, "step": 1696 }, { "epoch": 3.47, "learning_rate": 2.2421915092438094e-06, "loss": 0.0552, "step": 1697 }, { "epoch": 3.48, "learning_rate": 2.2366560942325833e-06, "loss": 0.0684, "step": 1698 }, { "epoch": 3.48, "learning_rate": 2.2311255511973347e-06, "loss": 0.0538, "step": 1699 }, { "epoch": 3.48, "learning_rate": 2.2255998898888165e-06, "loss": 0.0552, "step": 1700 }, { "epoch": 3.48, "learning_rate": 2.2200791200491688e-06, "loss": 0.0513, "step": 1701 }, { "epoch": 3.48, "learning_rate": 2.2145632514119085e-06, "loss": 0.0571, "step": 1702 }, { "epoch": 3.49, "learning_rate": 2.2090522937019165e-06, "loss": 0.0556, "step": 1703 }, { "epoch": 3.49, "learning_rate": 2.2035462566354067e-06, "loss": 0.0448, "step": 1704 }, { "epoch": 3.49, "learning_rate": 2.1980451499199262e-06, "loss": 0.0515, "step": 1705 }, { "epoch": 3.49, "learning_rate": 2.1925489832543247e-06, "loss": 0.0444, "step": 1706 }, { "epoch": 3.49, "learning_rate": 2.1870577663287416e-06, "loss": 0.0617, "step": 1707 }, { "epoch": 3.5, "learning_rate": 2.1815715088245916e-06, "loss": 0.0591, "step": 1708 }, { "epoch": 3.5, "learning_rate": 2.1760902204145463e-06, "loss": 0.0468, "step": 1709 }, { "epoch": 3.5, "learning_rate": 2.1706139107625146e-06, "loss": 0.0553, "step": 1710 }, { "epoch": 3.5, "learning_rate": 2.1651425895236265e-06, "loss": 0.0508, "step": 1711 }, { "epoch": 3.5, "learning_rate": 2.159676266344222e-06, "loss": 0.0547, "step": 1712 }, { "epoch": 3.51, "learning_rate": 2.1542149508618216e-06, "loss": 0.0643, "step": 1713 }, { "epoch": 3.51, "learning_rate": 2.1487586527051257e-06, "loss": 0.0577, "step": 1714 }, { "epoch": 3.51, "learning_rate": 2.143307381493982e-06, "loss": 0.0717, "step": 1715 }, { "epoch": 3.51, "learning_rate": 2.1378611468393772e-06, "loss": 0.0479, "step": 1716 }, { "epoch": 3.51, "learning_rate": 2.1324199583434175e-06, "loss": 0.0561, "step": 1717 }, { "epoch": 3.52, "learning_rate": 2.126983825599316e-06, "loss": 0.0513, "step": 1718 }, { "epoch": 3.52, "learning_rate": 2.121552758191366e-06, "loss": 0.0618, "step": 1719 }, { "epoch": 3.52, "learning_rate": 2.116126765694938e-06, "loss": 0.0556, "step": 1720 }, { "epoch": 3.52, "learning_rate": 2.110705857676449e-06, "loss": 0.0571, "step": 1721 }, { "epoch": 3.52, "learning_rate": 2.105290043693351e-06, "loss": 0.0655, "step": 1722 }, { "epoch": 3.53, "learning_rate": 2.0998793332941215e-06, "loss": 0.0668, "step": 1723 }, { "epoch": 3.53, "learning_rate": 2.0944737360182333e-06, "loss": 0.0703, "step": 1724 }, { "epoch": 3.53, "learning_rate": 2.089073261396148e-06, "loss": 0.0626, "step": 1725 }, { "epoch": 3.53, "learning_rate": 2.0836779189492925e-06, "loss": 0.0587, "step": 1726 }, { "epoch": 3.53, "learning_rate": 2.0782877181900503e-06, "loss": 0.0543, "step": 1727 }, { "epoch": 3.54, "learning_rate": 2.072902668621734e-06, "loss": 0.0589, "step": 1728 }, { "epoch": 3.54, "learning_rate": 2.06752277973858e-06, "loss": 0.0523, "step": 1729 }, { "epoch": 3.54, "learning_rate": 2.0621480610257217e-06, "loss": 0.0561, "step": 1730 }, { "epoch": 3.54, "learning_rate": 2.056778521959176e-06, "loss": 0.0814, "step": 1731 }, { "epoch": 3.54, "learning_rate": 2.051414172005835e-06, "loss": 0.0637, "step": 1732 }, { "epoch": 3.55, "learning_rate": 2.0460550206234324e-06, "loss": 0.0414, "step": 1733 }, { "epoch": 3.55, "learning_rate": 2.040701077260548e-06, "loss": 0.0467, "step": 1734 }, { "epoch": 3.55, "learning_rate": 2.035352351356564e-06, "loss": 0.0349, "step": 1735 }, { "epoch": 3.55, "learning_rate": 2.0300088523416795e-06, "loss": 0.0613, "step": 1736 }, { "epoch": 3.55, "learning_rate": 2.024670589636866e-06, "loss": 0.0495, "step": 1737 }, { "epoch": 3.56, "learning_rate": 2.0193375726538737e-06, "loss": 0.0438, "step": 1738 }, { "epoch": 3.56, "learning_rate": 2.014009810795196e-06, "loss": 0.0562, "step": 1739 }, { "epoch": 3.56, "learning_rate": 2.0086873134540626e-06, "loss": 0.0701, "step": 1740 }, { "epoch": 3.56, "learning_rate": 2.003370090014426e-06, "loss": 0.0465, "step": 1741 }, { "epoch": 3.57, "learning_rate": 1.9980581498509336e-06, "loss": 0.047, "step": 1742 }, { "epoch": 3.57, "learning_rate": 1.992751502328926e-06, "loss": 0.0473, "step": 1743 }, { "epoch": 3.57, "learning_rate": 1.9874501568044057e-06, "loss": 0.0447, "step": 1744 }, { "epoch": 3.57, "learning_rate": 1.9821541226240304e-06, "loss": 0.0751, "step": 1745 }, { "epoch": 3.57, "learning_rate": 1.9768634091250905e-06, "loss": 0.0486, "step": 1746 }, { "epoch": 3.58, "learning_rate": 1.9715780256355014e-06, "loss": 0.0726, "step": 1747 }, { "epoch": 3.58, "learning_rate": 1.9662979814737776e-06, "loss": 0.0508, "step": 1748 }, { "epoch": 3.58, "learning_rate": 1.961023285949018e-06, "loss": 0.0701, "step": 1749 }, { "epoch": 3.58, "learning_rate": 1.955753948360897e-06, "loss": 0.0563, "step": 1750 }, { "epoch": 3.58, "learning_rate": 1.9504899779996354e-06, "loss": 0.0476, "step": 1751 }, { "epoch": 3.59, "learning_rate": 1.945231384146e-06, "loss": 0.0476, "step": 1752 }, { "epoch": 3.59, "learning_rate": 1.9399781760712716e-06, "loss": 0.0502, "step": 1753 }, { "epoch": 3.59, "learning_rate": 1.9347303630372373e-06, "loss": 0.0583, "step": 1754 }, { "epoch": 3.59, "learning_rate": 1.92948795429617e-06, "loss": 0.0571, "step": 1755 }, { "epoch": 3.59, "learning_rate": 1.924250959090822e-06, "loss": 0.0486, "step": 1756 }, { "epoch": 3.6, "learning_rate": 1.9190193866543922e-06, "loss": 0.0367, "step": 1757 }, { "epoch": 3.6, "learning_rate": 1.913793246210525e-06, "loss": 0.0535, "step": 1758 }, { "epoch": 3.6, "learning_rate": 1.9085725469732857e-06, "loss": 0.0514, "step": 1759 }, { "epoch": 3.6, "learning_rate": 1.9033572981471431e-06, "loss": 0.0638, "step": 1760 }, { "epoch": 3.6, "learning_rate": 1.8981475089269641e-06, "loss": 0.0432, "step": 1761 }, { "epoch": 3.61, "learning_rate": 1.8929431884979805e-06, "loss": 0.0599, "step": 1762 }, { "epoch": 3.61, "learning_rate": 1.8877443460357936e-06, "loss": 0.0467, "step": 1763 }, { "epoch": 3.61, "learning_rate": 1.8825509907063328e-06, "loss": 0.0636, "step": 1764 }, { "epoch": 3.61, "learning_rate": 1.8773631316658657e-06, "loss": 0.0482, "step": 1765 }, { "epoch": 3.61, "learning_rate": 1.8721807780609618e-06, "loss": 0.0556, "step": 1766 }, { "epoch": 3.62, "learning_rate": 1.86700393902849e-06, "loss": 0.0457, "step": 1767 }, { "epoch": 3.62, "learning_rate": 1.8618326236955908e-06, "loss": 0.054, "step": 1768 }, { "epoch": 3.62, "learning_rate": 1.8566668411796684e-06, "loss": 0.0487, "step": 1769 }, { "epoch": 3.62, "learning_rate": 1.8515066005883747e-06, "loss": 0.0566, "step": 1770 }, { "epoch": 3.62, "learning_rate": 1.846351911019586e-06, "loss": 0.0591, "step": 1771 }, { "epoch": 3.63, "learning_rate": 1.8412027815613976e-06, "loss": 0.059, "step": 1772 }, { "epoch": 3.63, "learning_rate": 1.8360592212920968e-06, "loss": 0.0404, "step": 1773 }, { "epoch": 3.63, "learning_rate": 1.830921239280154e-06, "loss": 0.0423, "step": 1774 }, { "epoch": 3.63, "learning_rate": 1.8257888445842026e-06, "loss": 0.0468, "step": 1775 }, { "epoch": 3.63, "learning_rate": 1.8206620462530311e-06, "loss": 0.0532, "step": 1776 }, { "epoch": 3.64, "learning_rate": 1.8155408533255553e-06, "loss": 0.0426, "step": 1777 }, { "epoch": 3.64, "learning_rate": 1.8104252748308083e-06, "loss": 0.0736, "step": 1778 }, { "epoch": 3.64, "learning_rate": 1.8053153197879298e-06, "loss": 0.0431, "step": 1779 }, { "epoch": 3.64, "learning_rate": 1.8002109972061387e-06, "loss": 0.0498, "step": 1780 }, { "epoch": 3.64, "learning_rate": 1.79511231608473e-06, "loss": 0.0475, "step": 1781 }, { "epoch": 3.65, "learning_rate": 1.7900192854130465e-06, "loss": 0.0497, "step": 1782 }, { "epoch": 3.65, "learning_rate": 1.7849319141704713e-06, "loss": 0.0473, "step": 1783 }, { "epoch": 3.65, "learning_rate": 1.7798502113264071e-06, "loss": 0.06, "step": 1784 }, { "epoch": 3.65, "learning_rate": 1.7747741858402696e-06, "loss": 0.0649, "step": 1785 }, { "epoch": 3.66, "learning_rate": 1.7697038466614562e-06, "loss": 0.051, "step": 1786 }, { "epoch": 3.66, "learning_rate": 1.7646392027293463e-06, "loss": 0.0436, "step": 1787 }, { "epoch": 3.66, "learning_rate": 1.759580262973274e-06, "loss": 0.049, "step": 1788 }, { "epoch": 3.66, "learning_rate": 1.7545270363125155e-06, "loss": 0.0647, "step": 1789 }, { "epoch": 3.66, "learning_rate": 1.7494795316562791e-06, "loss": 0.0356, "step": 1790 }, { "epoch": 3.67, "learning_rate": 1.7444377579036815e-06, "loss": 0.0544, "step": 1791 }, { "epoch": 3.67, "learning_rate": 1.7394017239437345e-06, "loss": 0.0508, "step": 1792 }, { "epoch": 3.67, "learning_rate": 1.7343714386553312e-06, "loss": 0.065, "step": 1793 }, { "epoch": 3.67, "learning_rate": 1.729346910907233e-06, "loss": 0.0654, "step": 1794 }, { "epoch": 3.67, "learning_rate": 1.7243281495580438e-06, "loss": 0.059, "step": 1795 }, { "epoch": 3.68, "learning_rate": 1.7193151634562071e-06, "loss": 0.0541, "step": 1796 }, { "epoch": 3.68, "learning_rate": 1.7143079614399805e-06, "loss": 0.0509, "step": 1797 }, { "epoch": 3.68, "learning_rate": 1.7093065523374231e-06, "loss": 0.0497, "step": 1798 }, { "epoch": 3.68, "learning_rate": 1.7043109449663852e-06, "loss": 0.0657, "step": 1799 }, { "epoch": 3.68, "learning_rate": 1.6993211481344824e-06, "loss": 0.0442, "step": 1800 }, { "epoch": 3.69, "learning_rate": 1.6943371706390937e-06, "loss": 0.0501, "step": 1801 }, { "epoch": 3.69, "learning_rate": 1.6893590212673268e-06, "loss": 0.0514, "step": 1802 }, { "epoch": 3.69, "learning_rate": 1.6843867087960252e-06, "loss": 0.0644, "step": 1803 }, { "epoch": 3.69, "learning_rate": 1.6794202419917337e-06, "loss": 0.0606, "step": 1804 }, { "epoch": 3.69, "learning_rate": 1.674459629610698e-06, "loss": 0.039, "step": 1805 }, { "epoch": 3.7, "learning_rate": 1.6695048803988356e-06, "loss": 0.0546, "step": 1806 }, { "epoch": 3.7, "learning_rate": 1.6645560030917274e-06, "loss": 0.0702, "step": 1807 }, { "epoch": 3.7, "learning_rate": 1.659613006414607e-06, "loss": 0.053, "step": 1808 }, { "epoch": 3.7, "learning_rate": 1.654675899082333e-06, "loss": 0.0435, "step": 1809 }, { "epoch": 3.7, "learning_rate": 1.6497446897993885e-06, "loss": 0.0545, "step": 1810 }, { "epoch": 3.71, "learning_rate": 1.6448193872598505e-06, "loss": 0.0474, "step": 1811 }, { "epoch": 3.71, "learning_rate": 1.6399000001473858e-06, "loss": 0.0659, "step": 1812 }, { "epoch": 3.71, "learning_rate": 1.6349865371352292e-06, "loss": 0.0506, "step": 1813 }, { "epoch": 3.71, "learning_rate": 1.630079006886176e-06, "loss": 0.0643, "step": 1814 }, { "epoch": 3.71, "learning_rate": 1.6251774180525575e-06, "loss": 0.06, "step": 1815 }, { "epoch": 3.72, "learning_rate": 1.6202817792762283e-06, "loss": 0.0576, "step": 1816 }, { "epoch": 3.72, "learning_rate": 1.6153920991885591e-06, "loss": 0.0572, "step": 1817 }, { "epoch": 3.72, "learning_rate": 1.6105083864104072e-06, "loss": 0.0513, "step": 1818 }, { "epoch": 3.72, "learning_rate": 1.6056306495521156e-06, "loss": 0.0562, "step": 1819 }, { "epoch": 3.72, "learning_rate": 1.600758897213488e-06, "loss": 0.0437, "step": 1820 }, { "epoch": 3.73, "learning_rate": 1.5958931379837771e-06, "loss": 0.0697, "step": 1821 }, { "epoch": 3.73, "learning_rate": 1.5910333804416682e-06, "loss": 0.0423, "step": 1822 }, { "epoch": 3.73, "learning_rate": 1.5861796331552703e-06, "loss": 0.0453, "step": 1823 }, { "epoch": 3.73, "learning_rate": 1.581331904682089e-06, "loss": 0.0427, "step": 1824 }, { "epoch": 3.73, "learning_rate": 1.5764902035690245e-06, "loss": 0.064, "step": 1825 }, { "epoch": 3.74, "learning_rate": 1.5716545383523458e-06, "loss": 0.0466, "step": 1826 }, { "epoch": 3.74, "learning_rate": 1.5668249175576806e-06, "loss": 0.0588, "step": 1827 }, { "epoch": 3.74, "learning_rate": 1.562001349700004e-06, "loss": 0.0477, "step": 1828 }, { "epoch": 3.74, "learning_rate": 1.557183843283614e-06, "loss": 0.0582, "step": 1829 }, { "epoch": 3.75, "learning_rate": 1.552372406802128e-06, "loss": 0.0761, "step": 1830 }, { "epoch": 3.75, "learning_rate": 1.547567048738452e-06, "loss": 0.0585, "step": 1831 }, { "epoch": 3.75, "learning_rate": 1.5427677775647865e-06, "loss": 0.0451, "step": 1832 }, { "epoch": 3.75, "learning_rate": 1.5379746017425906e-06, "loss": 0.0389, "step": 1833 }, { "epoch": 3.75, "learning_rate": 1.5331875297225862e-06, "loss": 0.0567, "step": 1834 }, { "epoch": 3.76, "learning_rate": 1.528406569944726e-06, "loss": 0.0737, "step": 1835 }, { "epoch": 3.76, "learning_rate": 1.5236317308381883e-06, "loss": 0.0381, "step": 1836 }, { "epoch": 3.76, "learning_rate": 1.518863020821364e-06, "loss": 0.0546, "step": 1837 }, { "epoch": 3.76, "learning_rate": 1.5141004483018323e-06, "loss": 0.0487, "step": 1838 }, { "epoch": 3.76, "learning_rate": 1.5093440216763577e-06, "loss": 0.0493, "step": 1839 }, { "epoch": 3.77, "learning_rate": 1.5045937493308642e-06, "loss": 0.0453, "step": 1840 }, { "epoch": 3.77, "learning_rate": 1.4998496396404266e-06, "loss": 0.0597, "step": 1841 }, { "epoch": 3.77, "learning_rate": 1.4951117009692528e-06, "loss": 0.0695, "step": 1842 }, { "epoch": 3.77, "learning_rate": 1.4903799416706772e-06, "loss": 0.0393, "step": 1843 }, { "epoch": 3.77, "learning_rate": 1.4856543700871318e-06, "loss": 0.0525, "step": 1844 }, { "epoch": 3.78, "learning_rate": 1.4809349945501422e-06, "loss": 0.037, "step": 1845 }, { "epoch": 3.78, "learning_rate": 1.4762218233803134e-06, "loss": 0.0471, "step": 1846 }, { "epoch": 3.78, "learning_rate": 1.4715148648873046e-06, "loss": 0.0479, "step": 1847 }, { "epoch": 3.78, "learning_rate": 1.4668141273698283e-06, "loss": 0.0581, "step": 1848 }, { "epoch": 3.78, "learning_rate": 1.4621196191156256e-06, "loss": 0.0324, "step": 1849 }, { "epoch": 3.79, "learning_rate": 1.4574313484014547e-06, "loss": 0.0825, "step": 1850 }, { "epoch": 3.79, "learning_rate": 1.4527493234930763e-06, "loss": 0.0594, "step": 1851 }, { "epoch": 3.79, "learning_rate": 1.4480735526452427e-06, "loss": 0.0648, "step": 1852 }, { "epoch": 3.79, "learning_rate": 1.443404044101675e-06, "loss": 0.0459, "step": 1853 }, { "epoch": 3.79, "learning_rate": 1.4387408060950586e-06, "loss": 0.0551, "step": 1854 }, { "epoch": 3.8, "learning_rate": 1.4340838468470198e-06, "loss": 0.0429, "step": 1855 }, { "epoch": 3.8, "learning_rate": 1.4294331745681133e-06, "loss": 0.0517, "step": 1856 }, { "epoch": 3.8, "learning_rate": 1.4247887974578162e-06, "loss": 0.0573, "step": 1857 }, { "epoch": 3.8, "learning_rate": 1.4201507237045004e-06, "loss": 0.0439, "step": 1858 }, { "epoch": 3.8, "learning_rate": 1.4155189614854275e-06, "loss": 0.0576, "step": 1859 }, { "epoch": 3.81, "learning_rate": 1.4108935189667283e-06, "loss": 0.0465, "step": 1860 }, { "epoch": 3.81, "learning_rate": 1.4062744043033971e-06, "loss": 0.0479, "step": 1861 }, { "epoch": 3.81, "learning_rate": 1.4016616256392656e-06, "loss": 0.0482, "step": 1862 }, { "epoch": 3.81, "learning_rate": 1.3970551911070013e-06, "loss": 0.043, "step": 1863 }, { "epoch": 3.81, "learning_rate": 1.3924551088280797e-06, "loss": 0.0559, "step": 1864 }, { "epoch": 3.82, "learning_rate": 1.3878613869127798e-06, "loss": 0.0387, "step": 1865 }, { "epoch": 3.82, "learning_rate": 1.3832740334601692e-06, "loss": 0.0687, "step": 1866 }, { "epoch": 3.82, "learning_rate": 1.3786930565580825e-06, "loss": 0.0551, "step": 1867 }, { "epoch": 3.82, "learning_rate": 1.374118464283119e-06, "loss": 0.0513, "step": 1868 }, { "epoch": 3.83, "learning_rate": 1.3695502647006114e-06, "loss": 0.0406, "step": 1869 }, { "epoch": 3.83, "learning_rate": 1.3649884658646312e-06, "loss": 0.0595, "step": 1870 }, { "epoch": 3.83, "learning_rate": 1.3604330758179585e-06, "loss": 0.0739, "step": 1871 }, { "epoch": 3.83, "learning_rate": 1.3558841025920794e-06, "loss": 0.0469, "step": 1872 }, { "epoch": 3.83, "learning_rate": 1.351341554207163e-06, "loss": 0.0563, "step": 1873 }, { "epoch": 3.84, "learning_rate": 1.3468054386720503e-06, "loss": 0.057, "step": 1874 }, { "epoch": 3.84, "learning_rate": 1.3422757639842454e-06, "loss": 0.0503, "step": 1875 }, { "epoch": 3.84, "learning_rate": 1.3377525381298905e-06, "loss": 0.0439, "step": 1876 }, { "epoch": 3.84, "learning_rate": 1.3332357690837644e-06, "loss": 0.0532, "step": 1877 }, { "epoch": 3.84, "learning_rate": 1.3287254648092573e-06, "loss": 0.0717, "step": 1878 }, { "epoch": 3.85, "learning_rate": 1.324221633258364e-06, "loss": 0.0547, "step": 1879 }, { "epoch": 3.85, "learning_rate": 1.319724282371664e-06, "loss": 0.0482, "step": 1880 }, { "epoch": 3.85, "learning_rate": 1.3152334200783167e-06, "loss": 0.0463, "step": 1881 }, { "epoch": 3.85, "learning_rate": 1.310749054296036e-06, "loss": 0.039, "step": 1882 }, { "epoch": 3.85, "learning_rate": 1.3062711929310872e-06, "loss": 0.0392, "step": 1883 }, { "epoch": 3.86, "learning_rate": 1.3017998438782636e-06, "loss": 0.0594, "step": 1884 }, { "epoch": 3.86, "learning_rate": 1.2973350150208763e-06, "loss": 0.0431, "step": 1885 }, { "epoch": 3.86, "learning_rate": 1.2928767142307463e-06, "loss": 0.0489, "step": 1886 }, { "epoch": 3.86, "learning_rate": 1.28842494936818e-06, "loss": 0.0891, "step": 1887 }, { "epoch": 3.86, "learning_rate": 1.283979728281962e-06, "loss": 0.0574, "step": 1888 }, { "epoch": 3.87, "learning_rate": 1.2795410588093394e-06, "loss": 0.0565, "step": 1889 }, { "epoch": 3.87, "learning_rate": 1.2751089487760098e-06, "loss": 0.0495, "step": 1890 }, { "epoch": 3.87, "learning_rate": 1.2706834059961042e-06, "loss": 0.0819, "step": 1891 }, { "epoch": 3.87, "learning_rate": 1.2662644382721777e-06, "loss": 0.0552, "step": 1892 }, { "epoch": 3.87, "learning_rate": 1.26185205339519e-06, "loss": 0.0547, "step": 1893 }, { "epoch": 3.88, "learning_rate": 1.257446259144494e-06, "loss": 0.0557, "step": 1894 }, { "epoch": 3.88, "learning_rate": 1.2530470632878295e-06, "loss": 0.0588, "step": 1895 }, { "epoch": 3.88, "learning_rate": 1.2486544735812939e-06, "loss": 0.0633, "step": 1896 }, { "epoch": 3.88, "learning_rate": 1.2442684977693465e-06, "loss": 0.0539, "step": 1897 }, { "epoch": 3.88, "learning_rate": 1.2398891435847753e-06, "loss": 0.0396, "step": 1898 }, { "epoch": 3.89, "learning_rate": 1.2355164187487046e-06, "loss": 0.0597, "step": 1899 }, { "epoch": 3.89, "learning_rate": 1.231150330970563e-06, "loss": 0.064, "step": 1900 }, { "epoch": 3.89, "learning_rate": 1.2267908879480822e-06, "loss": 0.0575, "step": 1901 }, { "epoch": 3.89, "learning_rate": 1.2224380973672762e-06, "loss": 0.0432, "step": 1902 }, { "epoch": 3.89, "learning_rate": 1.2180919669024294e-06, "loss": 0.0626, "step": 1903 }, { "epoch": 3.9, "learning_rate": 1.213752504216088e-06, "loss": 0.0532, "step": 1904 }, { "epoch": 3.9, "learning_rate": 1.2094197169590378e-06, "loss": 0.0635, "step": 1905 }, { "epoch": 3.9, "learning_rate": 1.205093612770299e-06, "loss": 0.0577, "step": 1906 }, { "epoch": 3.9, "learning_rate": 1.2007741992771065e-06, "loss": 0.0576, "step": 1907 }, { "epoch": 3.9, "learning_rate": 1.1964614840949002e-06, "loss": 0.067, "step": 1908 }, { "epoch": 3.91, "learning_rate": 1.192155474827309e-06, "loss": 0.0724, "step": 1909 }, { "epoch": 3.91, "learning_rate": 1.187856179066142e-06, "loss": 0.0609, "step": 1910 }, { "epoch": 3.91, "learning_rate": 1.183563604391369e-06, "loss": 0.0457, "step": 1911 }, { "epoch": 3.91, "learning_rate": 1.179277758371109e-06, "loss": 0.0515, "step": 1912 }, { "epoch": 3.92, "learning_rate": 1.1749986485616232e-06, "loss": 0.064, "step": 1913 }, { "epoch": 3.92, "learning_rate": 1.1707262825072896e-06, "loss": 0.0464, "step": 1914 }, { "epoch": 3.92, "learning_rate": 1.1664606677406025e-06, "loss": 0.0559, "step": 1915 }, { "epoch": 3.92, "learning_rate": 1.1622018117821504e-06, "loss": 0.0664, "step": 1916 }, { "epoch": 3.92, "learning_rate": 1.1579497221406038e-06, "loss": 0.0478, "step": 1917 }, { "epoch": 3.93, "learning_rate": 1.1537044063127057e-06, "loss": 0.0388, "step": 1918 }, { "epoch": 3.93, "learning_rate": 1.1494658717832574e-06, "loss": 0.046, "step": 1919 }, { "epoch": 3.93, "learning_rate": 1.145234126025102e-06, "loss": 0.0506, "step": 1920 }, { "epoch": 3.93, "learning_rate": 1.141009176499116e-06, "loss": 0.0405, "step": 1921 }, { "epoch": 3.93, "learning_rate": 1.1367910306541918e-06, "loss": 0.0478, "step": 1922 }, { "epoch": 3.94, "learning_rate": 1.1325796959272244e-06, "loss": 0.047, "step": 1923 }, { "epoch": 3.94, "learning_rate": 1.1283751797431064e-06, "loss": 0.0468, "step": 1924 }, { "epoch": 3.94, "learning_rate": 1.124177489514704e-06, "loss": 0.0647, "step": 1925 }, { "epoch": 3.94, "learning_rate": 1.1199866326428487e-06, "loss": 0.065, "step": 1926 }, { "epoch": 3.94, "learning_rate": 1.1158026165163254e-06, "loss": 0.0367, "step": 1927 }, { "epoch": 3.95, "learning_rate": 1.1116254485118606e-06, "loss": 0.0684, "step": 1928 }, { "epoch": 3.95, "learning_rate": 1.1074551359941022e-06, "loss": 0.0452, "step": 1929 }, { "epoch": 3.95, "learning_rate": 1.1032916863156161e-06, "loss": 0.0508, "step": 1930 }, { "epoch": 3.95, "learning_rate": 1.099135106816866e-06, "loss": 0.0499, "step": 1931 }, { "epoch": 3.95, "learning_rate": 1.0949854048262015e-06, "loss": 0.0395, "step": 1932 }, { "epoch": 3.96, "learning_rate": 1.0908425876598512e-06, "loss": 0.0448, "step": 1933 }, { "epoch": 3.96, "learning_rate": 1.0867066626219002e-06, "loss": 0.0588, "step": 1934 }, { "epoch": 3.96, "learning_rate": 1.082577637004289e-06, "loss": 0.0534, "step": 1935 }, { "epoch": 3.96, "learning_rate": 1.078455518086784e-06, "loss": 0.0527, "step": 1936 }, { "epoch": 3.96, "learning_rate": 1.074340313136984e-06, "loss": 0.0574, "step": 1937 }, { "epoch": 3.97, "learning_rate": 1.0702320294102913e-06, "loss": 0.0478, "step": 1938 }, { "epoch": 3.97, "learning_rate": 1.0661306741499116e-06, "loss": 0.0608, "step": 1939 }, { "epoch": 3.97, "learning_rate": 1.0620362545868307e-06, "loss": 0.0386, "step": 1940 }, { "epoch": 3.97, "learning_rate": 1.0579487779398062e-06, "loss": 0.0455, "step": 1941 }, { "epoch": 3.97, "learning_rate": 1.0538682514153586e-06, "loss": 0.0482, "step": 1942 }, { "epoch": 3.98, "learning_rate": 1.0497946822077504e-06, "loss": 0.0606, "step": 1943 }, { "epoch": 3.98, "learning_rate": 1.0457280774989825e-06, "loss": 0.0481, "step": 1944 }, { "epoch": 3.98, "learning_rate": 1.0416684444587727e-06, "loss": 0.0507, "step": 1945 }, { "epoch": 3.98, "learning_rate": 1.037615790244549e-06, "loss": 0.0512, "step": 1946 }, { "epoch": 3.98, "learning_rate": 1.0335701220014332e-06, "loss": 0.0421, "step": 1947 }, { "epoch": 3.99, "learning_rate": 1.029531446862236e-06, "loss": 0.0534, "step": 1948 }, { "epoch": 3.99, "learning_rate": 1.0254997719474314e-06, "loss": 0.0516, "step": 1949 }, { "epoch": 3.99, "learning_rate": 1.0214751043651582e-06, "loss": 0.0579, "step": 1950 }, { "epoch": 3.99, "learning_rate": 1.0174574512111961e-06, "loss": 0.0466, "step": 1951 }, { "epoch": 3.99, "learning_rate": 1.0134468195689578e-06, "loss": 0.0617, "step": 1952 }, { "epoch": 4.0, "learning_rate": 1.0094432165094813e-06, "loss": 0.041, "step": 1953 }, { "epoch": 4.0, "learning_rate": 1.0054466490914078e-06, "loss": 0.0375, "step": 1954 }, { "epoch": 4.0, "learning_rate": 1.0014571243609762e-06, "loss": 0.0352, "step": 1955 }, { "epoch": 4.0, "learning_rate": 9.974746493520066e-07, "loss": 0.0211, "step": 1956 }, { "epoch": 4.01, "learning_rate": 9.934992310858944e-07, "loss": 0.0276, "step": 1957 }, { "epoch": 4.01, "learning_rate": 9.895308765715882e-07, "loss": 0.033, "step": 1958 }, { "epoch": 4.01, "learning_rate": 9.85569592805588e-07, "loss": 0.02, "step": 1959 }, { "epoch": 4.01, "learning_rate": 9.816153867719231e-07, "loss": 0.0409, "step": 1960 }, { "epoch": 4.01, "learning_rate": 9.776682654421448e-07, "loss": 0.0187, "step": 1961 }, { "epoch": 4.02, "learning_rate": 9.73728235775317e-07, "loss": 0.0173, "step": 1962 }, { "epoch": 4.02, "learning_rate": 9.697953047179959e-07, "loss": 0.0334, "step": 1963 }, { "epoch": 4.02, "learning_rate": 9.658694792042284e-07, "loss": 0.0188, "step": 1964 }, { "epoch": 4.02, "learning_rate": 9.619507661555243e-07, "loss": 0.0178, "step": 1965 }, { "epoch": 4.02, "learning_rate": 9.580391724808636e-07, "loss": 0.0264, "step": 1966 }, { "epoch": 4.03, "learning_rate": 9.541347050766676e-07, "loss": 0.0273, "step": 1967 }, { "epoch": 4.03, "learning_rate": 9.502373708267976e-07, "loss": 0.0242, "step": 1968 }, { "epoch": 4.03, "learning_rate": 9.463471766025368e-07, "loss": 0.0202, "step": 1969 }, { "epoch": 4.03, "learning_rate": 9.424641292625774e-07, "loss": 0.0202, "step": 1970 }, { "epoch": 4.03, "learning_rate": 9.385882356530179e-07, "loss": 0.0345, "step": 1971 }, { "epoch": 4.04, "learning_rate": 9.347195026073369e-07, "loss": 0.0175, "step": 1972 }, { "epoch": 4.04, "learning_rate": 9.308579369463944e-07, "loss": 0.0335, "step": 1973 }, { "epoch": 4.04, "learning_rate": 9.270035454784098e-07, "loss": 0.0298, "step": 1974 }, { "epoch": 4.04, "learning_rate": 9.231563349989553e-07, "loss": 0.0339, "step": 1975 }, { "epoch": 4.04, "learning_rate": 9.193163122909404e-07, "loss": 0.0156, "step": 1976 }, { "epoch": 4.05, "learning_rate": 9.154834841246074e-07, "loss": 0.0225, "step": 1977 }, { "epoch": 4.05, "learning_rate": 9.116578572575091e-07, "loss": 0.0256, "step": 1978 }, { "epoch": 4.05, "learning_rate": 9.078394384345024e-07, "loss": 0.0257, "step": 1979 }, { "epoch": 4.05, "learning_rate": 9.040282343877388e-07, "loss": 0.0148, "step": 1980 }, { "epoch": 4.05, "learning_rate": 9.002242518366461e-07, "loss": 0.0348, "step": 1981 }, { "epoch": 4.06, "learning_rate": 8.964274974879239e-07, "loss": 0.0395, "step": 1982 }, { "epoch": 4.06, "learning_rate": 8.926379780355243e-07, "loss": 0.0272, "step": 1983 }, { "epoch": 4.06, "learning_rate": 8.888557001606463e-07, "loss": 0.0154, "step": 1984 }, { "epoch": 4.06, "learning_rate": 8.850806705317183e-07, "loss": 0.021, "step": 1985 }, { "epoch": 4.06, "learning_rate": 8.813128958043943e-07, "loss": 0.0313, "step": 1986 }, { "epoch": 4.07, "learning_rate": 8.775523826215327e-07, "loss": 0.0283, "step": 1987 }, { "epoch": 4.07, "learning_rate": 8.737991376131932e-07, "loss": 0.0212, "step": 1988 }, { "epoch": 4.07, "learning_rate": 8.700531673966184e-07, "loss": 0.0274, "step": 1989 }, { "epoch": 4.07, "learning_rate": 8.66314478576225e-07, "loss": 0.0179, "step": 1990 }, { "epoch": 4.07, "learning_rate": 8.625830777435945e-07, "loss": 0.026, "step": 1991 }, { "epoch": 4.08, "learning_rate": 8.58858971477457e-07, "loss": 0.0201, "step": 1992 }, { "epoch": 4.08, "learning_rate": 8.551421663436821e-07, "loss": 0.0212, "step": 1993 }, { "epoch": 4.08, "learning_rate": 8.514326688952657e-07, "loss": 0.0335, "step": 1994 }, { "epoch": 4.08, "learning_rate": 8.477304856723234e-07, "loss": 0.0263, "step": 1995 }, { "epoch": 4.08, "learning_rate": 8.440356232020719e-07, "loss": 0.0227, "step": 1996 }, { "epoch": 4.09, "learning_rate": 8.403480879988229e-07, "loss": 0.0409, "step": 1997 }, { "epoch": 4.09, "learning_rate": 8.366678865639688e-07, "loss": 0.0197, "step": 1998 }, { "epoch": 4.09, "learning_rate": 8.329950253859703e-07, "loss": 0.0202, "step": 1999 }, { "epoch": 4.09, "learning_rate": 8.293295109403504e-07, "loss": 0.0092, "step": 2000 }, { "epoch": 4.1, "learning_rate": 8.256713496896751e-07, "loss": 0.0142, "step": 2001 }, { "epoch": 4.1, "learning_rate": 8.220205480835525e-07, "loss": 0.0381, "step": 2002 }, { "epoch": 4.1, "learning_rate": 8.183771125586054e-07, "loss": 0.0342, "step": 2003 }, { "epoch": 4.1, "learning_rate": 8.147410495384789e-07, "loss": 0.0179, "step": 2004 }, { "epoch": 4.1, "learning_rate": 8.111123654338143e-07, "loss": 0.022, "step": 2005 }, { "epoch": 4.11, "learning_rate": 8.074910666422475e-07, "loss": 0.016, "step": 2006 }, { "epoch": 4.11, "learning_rate": 8.03877159548389e-07, "loss": 0.0182, "step": 2007 }, { "epoch": 4.11, "learning_rate": 8.0027065052382e-07, "loss": 0.0202, "step": 2008 }, { "epoch": 4.11, "learning_rate": 7.966715459270786e-07, "loss": 0.0245, "step": 2009 }, { "epoch": 4.11, "learning_rate": 7.930798521036459e-07, "loss": 0.0244, "step": 2010 }, { "epoch": 4.12, "learning_rate": 7.894955753859412e-07, "loss": 0.0187, "step": 2011 }, { "epoch": 4.12, "learning_rate": 7.85918722093304e-07, "loss": 0.0141, "step": 2012 }, { "epoch": 4.12, "learning_rate": 7.823492985319858e-07, "loss": 0.029, "step": 2013 }, { "epoch": 4.12, "learning_rate": 7.787873109951382e-07, "loss": 0.0192, "step": 2014 }, { "epoch": 4.12, "learning_rate": 7.752327657628067e-07, "loss": 0.02, "step": 2015 }, { "epoch": 4.13, "learning_rate": 7.716856691019098e-07, "loss": 0.0194, "step": 2016 }, { "epoch": 4.13, "learning_rate": 7.6814602726624e-07, "loss": 0.0307, "step": 2017 }, { "epoch": 4.13, "learning_rate": 7.646138464964403e-07, "loss": 0.0271, "step": 2018 }, { "epoch": 4.13, "learning_rate": 7.610891330200015e-07, "loss": 0.0221, "step": 2019 }, { "epoch": 4.13, "learning_rate": 7.575718930512516e-07, "loss": 0.0181, "step": 2020 }, { "epoch": 4.14, "learning_rate": 7.540621327913394e-07, "loss": 0.0259, "step": 2021 }, { "epoch": 4.14, "learning_rate": 7.505598584282264e-07, "loss": 0.0223, "step": 2022 }, { "epoch": 4.14, "learning_rate": 7.470650761366755e-07, "loss": 0.0223, "step": 2023 }, { "epoch": 4.14, "learning_rate": 7.435777920782444e-07, "loss": 0.0218, "step": 2024 }, { "epoch": 4.14, "learning_rate": 7.400980124012658e-07, "loss": 0.0309, "step": 2025 }, { "epoch": 4.15, "learning_rate": 7.366257432408464e-07, "loss": 0.0272, "step": 2026 }, { "epoch": 4.15, "learning_rate": 7.33160990718847e-07, "loss": 0.0253, "step": 2027 }, { "epoch": 4.15, "learning_rate": 7.297037609438779e-07, "loss": 0.0292, "step": 2028 }, { "epoch": 4.15, "learning_rate": 7.262540600112872e-07, "loss": 0.0247, "step": 2029 }, { "epoch": 4.15, "learning_rate": 7.22811894003147e-07, "loss": 0.0182, "step": 2030 }, { "epoch": 4.16, "learning_rate": 7.193772689882483e-07, "loss": 0.0177, "step": 2031 }, { "epoch": 4.16, "learning_rate": 7.159501910220811e-07, "loss": 0.0246, "step": 2032 }, { "epoch": 4.16, "learning_rate": 7.125306661468356e-07, "loss": 0.0323, "step": 2033 }, { "epoch": 4.16, "learning_rate": 7.091187003913802e-07, "loss": 0.0135, "step": 2034 }, { "epoch": 4.16, "learning_rate": 7.057142997712602e-07, "loss": 0.0205, "step": 2035 }, { "epoch": 4.17, "learning_rate": 7.023174702886804e-07, "loss": 0.0284, "step": 2036 }, { "epoch": 4.17, "learning_rate": 6.989282179324963e-07, "loss": 0.0222, "step": 2037 }, { "epoch": 4.17, "learning_rate": 6.955465486782093e-07, "loss": 0.0213, "step": 2038 }, { "epoch": 4.17, "learning_rate": 6.921724684879444e-07, "loss": 0.02, "step": 2039 }, { "epoch": 4.17, "learning_rate": 6.888059833104527e-07, "loss": 0.0202, "step": 2040 }, { "epoch": 4.18, "learning_rate": 6.854470990810907e-07, "loss": 0.0303, "step": 2041 }, { "epoch": 4.18, "learning_rate": 6.820958217218159e-07, "loss": 0.0197, "step": 2042 }, { "epoch": 4.18, "learning_rate": 6.787521571411721e-07, "loss": 0.0085, "step": 2043 }, { "epoch": 4.18, "learning_rate": 6.754161112342844e-07, "loss": 0.025, "step": 2044 }, { "epoch": 4.19, "learning_rate": 6.720876898828438e-07, "loss": 0.0265, "step": 2045 }, { "epoch": 4.19, "learning_rate": 6.687668989550971e-07, "loss": 0.0142, "step": 2046 }, { "epoch": 4.19, "learning_rate": 6.654537443058429e-07, "loss": 0.016, "step": 2047 }, { "epoch": 4.19, "learning_rate": 6.621482317764105e-07, "loss": 0.0225, "step": 2048 }, { "epoch": 4.19, "learning_rate": 6.588503671946611e-07, "loss": 0.0153, "step": 2049 }, { "epoch": 4.2, "learning_rate": 6.555601563749675e-07, "loss": 0.0285, "step": 2050 }, { "epoch": 4.2, "learning_rate": 6.522776051182117e-07, "loss": 0.0162, "step": 2051 }, { "epoch": 4.2, "learning_rate": 6.490027192117682e-07, "loss": 0.0154, "step": 2052 }, { "epoch": 4.2, "learning_rate": 6.457355044295005e-07, "loss": 0.0254, "step": 2053 }, { "epoch": 4.2, "learning_rate": 6.424759665317437e-07, "loss": 0.0163, "step": 2054 }, { "epoch": 4.21, "learning_rate": 6.392241112653031e-07, "loss": 0.0219, "step": 2055 }, { "epoch": 4.21, "learning_rate": 6.359799443634313e-07, "loss": 0.0209, "step": 2056 }, { "epoch": 4.21, "learning_rate": 6.327434715458325e-07, "loss": 0.0158, "step": 2057 }, { "epoch": 4.21, "learning_rate": 6.29514698518644e-07, "loss": 0.0374, "step": 2058 }, { "epoch": 4.21, "learning_rate": 6.262936309744261e-07, "loss": 0.0159, "step": 2059 }, { "epoch": 4.22, "learning_rate": 6.230802745921549e-07, "loss": 0.0148, "step": 2060 }, { "epoch": 4.22, "learning_rate": 6.198746350372104e-07, "loss": 0.0355, "step": 2061 }, { "epoch": 4.22, "learning_rate": 6.166767179613691e-07, "loss": 0.0252, "step": 2062 }, { "epoch": 4.22, "learning_rate": 6.134865290027903e-07, "loss": 0.0155, "step": 2063 }, { "epoch": 4.22, "learning_rate": 6.103040737860094e-07, "loss": 0.0375, "step": 2064 }, { "epoch": 4.23, "learning_rate": 6.071293579219256e-07, "loss": 0.0195, "step": 2065 }, { "epoch": 4.23, "learning_rate": 6.039623870077932e-07, "loss": 0.0205, "step": 2066 }, { "epoch": 4.23, "learning_rate": 6.008031666272129e-07, "loss": 0.0286, "step": 2067 }, { "epoch": 4.23, "learning_rate": 5.976517023501183e-07, "loss": 0.0156, "step": 2068 }, { "epoch": 4.23, "learning_rate": 5.945079997327713e-07, "loss": 0.0238, "step": 2069 }, { "epoch": 4.24, "learning_rate": 5.913720643177478e-07, "loss": 0.0147, "step": 2070 }, { "epoch": 4.24, "learning_rate": 5.882439016339286e-07, "loss": 0.0081, "step": 2071 }, { "epoch": 4.24, "learning_rate": 5.851235171964909e-07, "loss": 0.027, "step": 2072 }, { "epoch": 4.24, "learning_rate": 5.820109165069016e-07, "loss": 0.0173, "step": 2073 }, { "epoch": 4.24, "learning_rate": 5.789061050529005e-07, "loss": 0.0249, "step": 2074 }, { "epoch": 4.25, "learning_rate": 5.758090883084944e-07, "loss": 0.0183, "step": 2075 }, { "epoch": 4.25, "learning_rate": 5.727198717339511e-07, "loss": 0.0311, "step": 2076 }, { "epoch": 4.25, "learning_rate": 5.69638460775781e-07, "loss": 0.0364, "step": 2077 }, { "epoch": 4.25, "learning_rate": 5.665648608667384e-07, "loss": 0.0204, "step": 2078 }, { "epoch": 4.25, "learning_rate": 5.63499077425802e-07, "loss": 0.0228, "step": 2079 }, { "epoch": 4.26, "learning_rate": 5.604411158581702e-07, "loss": 0.0336, "step": 2080 }, { "epoch": 4.26, "learning_rate": 5.573909815552503e-07, "loss": 0.0169, "step": 2081 }, { "epoch": 4.26, "learning_rate": 5.543486798946529e-07, "loss": 0.0442, "step": 2082 }, { "epoch": 4.26, "learning_rate": 5.513142162401746e-07, "loss": 0.0232, "step": 2083 }, { "epoch": 4.27, "learning_rate": 5.482875959417983e-07, "loss": 0.0176, "step": 2084 }, { "epoch": 4.27, "learning_rate": 5.452688243356708e-07, "loss": 0.0279, "step": 2085 }, { "epoch": 4.27, "learning_rate": 5.422579067441086e-07, "loss": 0.0266, "step": 2086 }, { "epoch": 4.27, "learning_rate": 5.392548484755783e-07, "loss": 0.0184, "step": 2087 }, { "epoch": 4.27, "learning_rate": 5.362596548246895e-07, "loss": 0.0365, "step": 2088 }, { "epoch": 4.28, "learning_rate": 5.332723310721855e-07, "loss": 0.0099, "step": 2089 }, { "epoch": 4.28, "learning_rate": 5.302928824849335e-07, "loss": 0.0211, "step": 2090 }, { "epoch": 4.28, "learning_rate": 5.273213143159212e-07, "loss": 0.0397, "step": 2091 }, { "epoch": 4.28, "learning_rate": 5.243576318042359e-07, "loss": 0.0255, "step": 2092 }, { "epoch": 4.28, "learning_rate": 5.214018401750675e-07, "loss": 0.0173, "step": 2093 }, { "epoch": 4.29, "learning_rate": 5.184539446396902e-07, "loss": 0.0296, "step": 2094 }, { "epoch": 4.29, "learning_rate": 5.15513950395457e-07, "loss": 0.0142, "step": 2095 }, { "epoch": 4.29, "learning_rate": 5.125818626257928e-07, "loss": 0.0188, "step": 2096 }, { "epoch": 4.29, "learning_rate": 5.096576865001802e-07, "loss": 0.029, "step": 2097 }, { "epoch": 4.29, "learning_rate": 5.067414271741561e-07, "loss": 0.0184, "step": 2098 }, { "epoch": 4.3, "learning_rate": 5.03833089789294e-07, "loss": 0.019, "step": 2099 }, { "epoch": 4.3, "learning_rate": 5.009326794732072e-07, "loss": 0.0121, "step": 2100 }, { "epoch": 4.3, "learning_rate": 4.980402013395269e-07, "loss": 0.0273, "step": 2101 }, { "epoch": 4.3, "learning_rate": 4.951556604879049e-07, "loss": 0.0237, "step": 2102 }, { "epoch": 4.3, "learning_rate": 4.922790620039941e-07, "loss": 0.018, "step": 2103 }, { "epoch": 4.31, "learning_rate": 4.894104109594466e-07, "loss": 0.0322, "step": 2104 }, { "epoch": 4.31, "learning_rate": 4.865497124119045e-07, "loss": 0.0204, "step": 2105 }, { "epoch": 4.31, "learning_rate": 4.836969714049844e-07, "loss": 0.0339, "step": 2106 }, { "epoch": 4.31, "learning_rate": 4.808521929682785e-07, "loss": 0.0231, "step": 2107 }, { "epoch": 4.31, "learning_rate": 4.780153821173361e-07, "loss": 0.0216, "step": 2108 }, { "epoch": 4.32, "learning_rate": 4.7518654385366126e-07, "loss": 0.0182, "step": 2109 }, { "epoch": 4.32, "learning_rate": 4.723656831647e-07, "loss": 0.0141, "step": 2110 }, { "epoch": 4.32, "learning_rate": 4.695528050238368e-07, "loss": 0.0325, "step": 2111 }, { "epoch": 4.32, "learning_rate": 4.6674791439037914e-07, "loss": 0.0245, "step": 2112 }, { "epoch": 4.32, "learning_rate": 4.639510162095517e-07, "loss": 0.0218, "step": 2113 }, { "epoch": 4.33, "learning_rate": 4.611621154124912e-07, "loss": 0.0155, "step": 2114 }, { "epoch": 4.33, "learning_rate": 4.5838121691622995e-07, "loss": 0.0196, "step": 2115 }, { "epoch": 4.33, "learning_rate": 4.5560832562369593e-07, "loss": 0.0264, "step": 2116 }, { "epoch": 4.33, "learning_rate": 4.528434464236975e-07, "loss": 0.027, "step": 2117 }, { "epoch": 4.33, "learning_rate": 4.500865841909169e-07, "loss": 0.0198, "step": 2118 }, { "epoch": 4.34, "learning_rate": 4.473377437859022e-07, "loss": 0.0304, "step": 2119 }, { "epoch": 4.34, "learning_rate": 4.4459693005505956e-07, "loss": 0.0254, "step": 2120 }, { "epoch": 4.34, "learning_rate": 4.4186414783064124e-07, "loss": 0.0213, "step": 2121 }, { "epoch": 4.34, "learning_rate": 4.391394019307438e-07, "loss": 0.0183, "step": 2122 }, { "epoch": 4.34, "learning_rate": 4.3642269715928775e-07, "loss": 0.0198, "step": 2123 }, { "epoch": 4.35, "learning_rate": 4.3371403830602344e-07, "loss": 0.0148, "step": 2124 }, { "epoch": 4.35, "learning_rate": 4.3101343014651356e-07, "loss": 0.0246, "step": 2125 }, { "epoch": 4.35, "learning_rate": 4.28320877442126e-07, "loss": 0.0201, "step": 2126 }, { "epoch": 4.35, "learning_rate": 4.256363849400258e-07, "loss": 0.0149, "step": 2127 }, { "epoch": 4.36, "learning_rate": 4.2295995737316854e-07, "loss": 0.026, "step": 2128 }, { "epoch": 4.36, "learning_rate": 4.202915994602913e-07, "loss": 0.0261, "step": 2129 }, { "epoch": 4.36, "learning_rate": 4.1763131590590144e-07, "loss": 0.0198, "step": 2130 }, { "epoch": 4.36, "learning_rate": 4.1497911140027323e-07, "loss": 0.0186, "step": 2131 }, { "epoch": 4.36, "learning_rate": 4.123349906194357e-07, "loss": 0.0238, "step": 2132 }, { "epoch": 4.37, "learning_rate": 4.096989582251648e-07, "loss": 0.0233, "step": 2133 }, { "epoch": 4.37, "learning_rate": 4.0707101886497844e-07, "loss": 0.033, "step": 2134 }, { "epoch": 4.37, "learning_rate": 4.044511771721227e-07, "loss": 0.024, "step": 2135 }, { "epoch": 4.37, "learning_rate": 4.0183943776557154e-07, "loss": 0.0281, "step": 2136 }, { "epoch": 4.37, "learning_rate": 3.992358052500089e-07, "loss": 0.0233, "step": 2137 }, { "epoch": 4.38, "learning_rate": 3.9664028421582903e-07, "loss": 0.0233, "step": 2138 }, { "epoch": 4.38, "learning_rate": 3.940528792391224e-07, "loss": 0.0312, "step": 2139 }, { "epoch": 4.38, "learning_rate": 3.914735948816745e-07, "loss": 0.0205, "step": 2140 }, { "epoch": 4.38, "learning_rate": 3.8890243569094874e-07, "loss": 0.0303, "step": 2141 }, { "epoch": 4.38, "learning_rate": 3.8633940620008635e-07, "loss": 0.0135, "step": 2142 }, { "epoch": 4.39, "learning_rate": 3.837845109278948e-07, "loss": 0.0202, "step": 2143 }, { "epoch": 4.39, "learning_rate": 3.812377543788387e-07, "loss": 0.0264, "step": 2144 }, { "epoch": 4.39, "learning_rate": 3.786991410430368e-07, "loss": 0.029, "step": 2145 }, { "epoch": 4.39, "learning_rate": 3.7616867539624733e-07, "loss": 0.0148, "step": 2146 }, { "epoch": 4.39, "learning_rate": 3.7364636189986593e-07, "loss": 0.027, "step": 2147 }, { "epoch": 4.4, "learning_rate": 3.7113220500091254e-07, "loss": 0.0237, "step": 2148 }, { "epoch": 4.4, "learning_rate": 3.686262091320303e-07, "loss": 0.035, "step": 2149 }, { "epoch": 4.4, "learning_rate": 3.6612837871147057e-07, "loss": 0.0154, "step": 2150 }, { "epoch": 4.4, "learning_rate": 3.6363871814309224e-07, "loss": 0.0211, "step": 2151 }, { "epoch": 4.4, "learning_rate": 3.611572318163448e-07, "loss": 0.0269, "step": 2152 }, { "epoch": 4.41, "learning_rate": 3.586839241062695e-07, "loss": 0.0156, "step": 2153 }, { "epoch": 4.41, "learning_rate": 3.5621879937348836e-07, "loss": 0.0248, "step": 2154 }, { "epoch": 4.41, "learning_rate": 3.5376186196419527e-07, "loss": 0.0293, "step": 2155 }, { "epoch": 4.41, "learning_rate": 3.513131162101491e-07, "loss": 0.0208, "step": 2156 }, { "epoch": 4.41, "learning_rate": 3.488725664286652e-07, "loss": 0.0169, "step": 2157 }, { "epoch": 4.42, "learning_rate": 3.4644021692261165e-07, "loss": 0.0191, "step": 2158 }, { "epoch": 4.42, "learning_rate": 3.4401607198039645e-07, "loss": 0.0147, "step": 2159 }, { "epoch": 4.42, "learning_rate": 3.416001358759635e-07, "loss": 0.0219, "step": 2160 }, { "epoch": 4.42, "learning_rate": 3.3919241286878404e-07, "loss": 0.0199, "step": 2161 }, { "epoch": 4.42, "learning_rate": 3.3679290720384726e-07, "loss": 0.0251, "step": 2162 }, { "epoch": 4.43, "learning_rate": 3.3440162311165757e-07, "loss": 0.0175, "step": 2163 }, { "epoch": 4.43, "learning_rate": 3.320185648082208e-07, "loss": 0.0196, "step": 2164 }, { "epoch": 4.43, "learning_rate": 3.2964373649504447e-07, "loss": 0.0207, "step": 2165 }, { "epoch": 4.43, "learning_rate": 3.272771423591198e-07, "loss": 0.0333, "step": 2166 }, { "epoch": 4.43, "learning_rate": 3.2491878657292643e-07, "loss": 0.0152, "step": 2167 }, { "epoch": 4.44, "learning_rate": 3.2256867329441554e-07, "loss": 0.0199, "step": 2168 }, { "epoch": 4.44, "learning_rate": 3.202268066670089e-07, "loss": 0.0171, "step": 2169 }, { "epoch": 4.44, "learning_rate": 3.1789319081958704e-07, "loss": 0.0318, "step": 2170 }, { "epoch": 4.44, "learning_rate": 3.155678298664827e-07, "loss": 0.0323, "step": 2171 }, { "epoch": 4.45, "learning_rate": 3.1325072790747867e-07, "loss": 0.0273, "step": 2172 }, { "epoch": 4.45, "learning_rate": 3.1094188902779155e-07, "loss": 0.0262, "step": 2173 }, { "epoch": 4.45, "learning_rate": 3.08641317298074e-07, "loss": 0.0221, "step": 2174 }, { "epoch": 4.45, "learning_rate": 3.063490167743999e-07, "loss": 0.0322, "step": 2175 }, { "epoch": 4.45, "learning_rate": 3.040649914982613e-07, "loss": 0.0261, "step": 2176 }, { "epoch": 4.46, "learning_rate": 3.0178924549656096e-07, "loss": 0.0224, "step": 2177 }, { "epoch": 4.46, "learning_rate": 2.995217827816049e-07, "loss": 0.0194, "step": 2178 }, { "epoch": 4.46, "learning_rate": 2.972626073510937e-07, "loss": 0.0156, "step": 2179 }, { "epoch": 4.46, "learning_rate": 2.9501172318811834e-07, "loss": 0.0263, "step": 2180 }, { "epoch": 4.46, "learning_rate": 2.927691342611505e-07, "loss": 0.0205, "step": 2181 }, { "epoch": 4.47, "learning_rate": 2.905348445240369e-07, "loss": 0.0165, "step": 2182 }, { "epoch": 4.47, "learning_rate": 2.883088579159943e-07, "loss": 0.0129, "step": 2183 }, { "epoch": 4.47, "learning_rate": 2.860911783615983e-07, "loss": 0.0269, "step": 2184 }, { "epoch": 4.47, "learning_rate": 2.838818097707774e-07, "loss": 0.0124, "step": 2185 }, { "epoch": 4.47, "learning_rate": 2.8168075603881016e-07, "loss": 0.0397, "step": 2186 }, { "epoch": 4.48, "learning_rate": 2.794880210463141e-07, "loss": 0.0243, "step": 2187 }, { "epoch": 4.48, "learning_rate": 2.7730360865923954e-07, "loss": 0.0241, "step": 2188 }, { "epoch": 4.48, "learning_rate": 2.7512752272886525e-07, "loss": 0.0245, "step": 2189 }, { "epoch": 4.48, "learning_rate": 2.729597670917883e-07, "loss": 0.0181, "step": 2190 }, { "epoch": 4.48, "learning_rate": 2.708003455699182e-07, "loss": 0.0193, "step": 2191 }, { "epoch": 4.49, "learning_rate": 2.686492619704739e-07, "loss": 0.016, "step": 2192 }, { "epoch": 4.49, "learning_rate": 2.6650652008597067e-07, "loss": 0.0248, "step": 2193 }, { "epoch": 4.49, "learning_rate": 2.6437212369421873e-07, "loss": 0.0357, "step": 2194 }, { "epoch": 4.49, "learning_rate": 2.6224607655831236e-07, "loss": 0.0236, "step": 2195 }, { "epoch": 4.49, "learning_rate": 2.601283824266293e-07, "loss": 0.0199, "step": 2196 }, { "epoch": 4.5, "learning_rate": 2.580190450328163e-07, "loss": 0.0171, "step": 2197 }, { "epoch": 4.5, "learning_rate": 2.5591806809578954e-07, "loss": 0.0244, "step": 2198 }, { "epoch": 4.5, "learning_rate": 2.5382545531972394e-07, "loss": 0.0223, "step": 2199 }, { "epoch": 4.5, "learning_rate": 2.517412103940464e-07, "loss": 0.0247, "step": 2200 }, { "epoch": 4.5, "learning_rate": 2.4966533699343376e-07, "loss": 0.0251, "step": 2201 }, { "epoch": 4.51, "learning_rate": 2.475978387778e-07, "loss": 0.0172, "step": 2202 }, { "epoch": 4.51, "learning_rate": 2.4553871939229645e-07, "loss": 0.0273, "step": 2203 }, { "epoch": 4.51, "learning_rate": 2.434879824672992e-07, "loss": 0.0338, "step": 2204 }, { "epoch": 4.51, "learning_rate": 2.4144563161840606e-07, "loss": 0.0297, "step": 2205 }, { "epoch": 4.51, "learning_rate": 2.394116704464294e-07, "loss": 0.0326, "step": 2206 }, { "epoch": 4.52, "learning_rate": 2.373861025373919e-07, "loss": 0.0157, "step": 2207 }, { "epoch": 4.52, "learning_rate": 2.3536893146251626e-07, "loss": 0.0272, "step": 2208 }, { "epoch": 4.52, "learning_rate": 2.3336016077822154e-07, "loss": 0.0333, "step": 2209 }, { "epoch": 4.52, "learning_rate": 2.313597940261153e-07, "loss": 0.0179, "step": 2210 }, { "epoch": 4.52, "learning_rate": 2.2936783473299084e-07, "loss": 0.0221, "step": 2211 }, { "epoch": 4.53, "learning_rate": 2.2738428641081722e-07, "loss": 0.0247, "step": 2212 }, { "epoch": 4.53, "learning_rate": 2.2540915255673313e-07, "loss": 0.0303, "step": 2213 }, { "epoch": 4.53, "learning_rate": 2.2344243665304355e-07, "loss": 0.0223, "step": 2214 }, { "epoch": 4.53, "learning_rate": 2.2148414216721093e-07, "loss": 0.0312, "step": 2215 }, { "epoch": 4.54, "learning_rate": 2.1953427255185122e-07, "loss": 0.0255, "step": 2216 }, { "epoch": 4.54, "learning_rate": 2.1759283124472564e-07, "loss": 0.0151, "step": 2217 }, { "epoch": 4.54, "learning_rate": 2.1565982166873778e-07, "loss": 0.0196, "step": 2218 }, { "epoch": 4.54, "learning_rate": 2.137352472319215e-07, "loss": 0.0254, "step": 2219 }, { "epoch": 4.54, "learning_rate": 2.1181911132744203e-07, "loss": 0.0278, "step": 2220 }, { "epoch": 4.55, "learning_rate": 2.0991141733358755e-07, "loss": 0.0142, "step": 2221 }, { "epoch": 4.55, "learning_rate": 2.0801216861375983e-07, "loss": 0.022, "step": 2222 }, { "epoch": 4.55, "learning_rate": 2.0612136851647258e-07, "loss": 0.0192, "step": 2223 }, { "epoch": 4.55, "learning_rate": 2.0423902037534304e-07, "loss": 0.0168, "step": 2224 }, { "epoch": 4.55, "learning_rate": 2.0236512750908875e-07, "loss": 0.0207, "step": 2225 }, { "epoch": 4.56, "learning_rate": 2.0049969322151807e-07, "loss": 0.0219, "step": 2226 }, { "epoch": 4.56, "learning_rate": 1.986427208015279e-07, "loss": 0.0196, "step": 2227 }, { "epoch": 4.56, "learning_rate": 1.9679421352309548e-07, "loss": 0.0212, "step": 2228 }, { "epoch": 4.56, "learning_rate": 1.9495417464527156e-07, "loss": 0.0162, "step": 2229 }, { "epoch": 4.56, "learning_rate": 1.9312260741218114e-07, "loss": 0.0215, "step": 2230 }, { "epoch": 4.57, "learning_rate": 1.912995150530078e-07, "loss": 0.0153, "step": 2231 }, { "epoch": 4.57, "learning_rate": 1.8948490078199767e-07, "loss": 0.0246, "step": 2232 }, { "epoch": 4.57, "learning_rate": 1.8767876779844597e-07, "loss": 0.0248, "step": 2233 }, { "epoch": 4.57, "learning_rate": 1.8588111928669718e-07, "loss": 0.0167, "step": 2234 }, { "epoch": 4.57, "learning_rate": 1.8409195841613547e-07, "loss": 0.0131, "step": 2235 }, { "epoch": 4.58, "learning_rate": 1.823112883411826e-07, "loss": 0.0189, "step": 2236 }, { "epoch": 4.58, "learning_rate": 1.805391122012884e-07, "loss": 0.0351, "step": 2237 }, { "epoch": 4.58, "learning_rate": 1.7877543312092792e-07, "loss": 0.0326, "step": 2238 }, { "epoch": 4.58, "learning_rate": 1.7702025420959558e-07, "loss": 0.0149, "step": 2239 }, { "epoch": 4.58, "learning_rate": 1.7527357856179872e-07, "loss": 0.0132, "step": 2240 }, { "epoch": 4.59, "learning_rate": 1.7353540925705515e-07, "loss": 0.0191, "step": 2241 }, { "epoch": 4.59, "learning_rate": 1.7180574935988237e-07, "loss": 0.0375, "step": 2242 }, { "epoch": 4.59, "learning_rate": 1.7008460191979603e-07, "loss": 0.023, "step": 2243 }, { "epoch": 4.59, "learning_rate": 1.6837196997130434e-07, "loss": 0.0241, "step": 2244 }, { "epoch": 4.59, "learning_rate": 1.666678565339025e-07, "loss": 0.0239, "step": 2245 }, { "epoch": 4.6, "learning_rate": 1.649722646120655e-07, "loss": 0.034, "step": 2246 }, { "epoch": 4.6, "learning_rate": 1.6328519719524538e-07, "loss": 0.0206, "step": 2247 }, { "epoch": 4.6, "learning_rate": 1.6160665725786396e-07, "loss": 0.0153, "step": 2248 }, { "epoch": 4.6, "learning_rate": 1.599366477593095e-07, "loss": 0.0156, "step": 2249 }, { "epoch": 4.6, "learning_rate": 1.5827517164393013e-07, "loss": 0.015, "step": 2250 }, { "epoch": 4.61, "learning_rate": 1.5662223184102876e-07, "loss": 0.0137, "step": 2251 }, { "epoch": 4.61, "learning_rate": 1.5497783126485865e-07, "loss": 0.025, "step": 2252 }, { "epoch": 4.61, "learning_rate": 1.5334197281461626e-07, "loss": 0.0198, "step": 2253 }, { "epoch": 4.61, "learning_rate": 1.5171465937444009e-07, "loss": 0.0187, "step": 2254 }, { "epoch": 4.61, "learning_rate": 1.5009589381340117e-07, "loss": 0.0217, "step": 2255 }, { "epoch": 4.62, "learning_rate": 1.484856789855016e-07, "loss": 0.0186, "step": 2256 }, { "epoch": 4.62, "learning_rate": 1.4688401772966599e-07, "loss": 0.0157, "step": 2257 }, { "epoch": 4.62, "learning_rate": 1.4529091286973994e-07, "loss": 0.0349, "step": 2258 }, { "epoch": 4.62, "learning_rate": 1.437063672144834e-07, "loss": 0.0356, "step": 2259 }, { "epoch": 4.63, "learning_rate": 1.4213038355756436e-07, "loss": 0.0114, "step": 2260 }, { "epoch": 4.63, "learning_rate": 1.4056296467755803e-07, "loss": 0.0417, "step": 2261 }, { "epoch": 4.63, "learning_rate": 1.3900411333793605e-07, "loss": 0.0339, "step": 2262 }, { "epoch": 4.63, "learning_rate": 1.3745383228706776e-07, "loss": 0.0253, "step": 2263 }, { "epoch": 4.63, "learning_rate": 1.3591212425821009e-07, "loss": 0.0286, "step": 2264 }, { "epoch": 4.64, "learning_rate": 1.3437899196950765e-07, "loss": 0.0209, "step": 2265 }, { "epoch": 4.64, "learning_rate": 1.3285443812398324e-07, "loss": 0.0338, "step": 2266 }, { "epoch": 4.64, "learning_rate": 1.3133846540953567e-07, "loss": 0.0109, "step": 2267 }, { "epoch": 4.64, "learning_rate": 1.298310764989358e-07, "loss": 0.0202, "step": 2268 }, { "epoch": 4.64, "learning_rate": 1.283322740498183e-07, "loss": 0.0275, "step": 2269 }, { "epoch": 4.65, "learning_rate": 1.2684206070468276e-07, "loss": 0.0244, "step": 2270 }, { "epoch": 4.65, "learning_rate": 1.253604390908819e-07, "loss": 0.0267, "step": 2271 }, { "epoch": 4.65, "learning_rate": 1.2388741182062348e-07, "loss": 0.0334, "step": 2272 }, { "epoch": 4.65, "learning_rate": 1.2242298149095998e-07, "loss": 0.0228, "step": 2273 }, { "epoch": 4.65, "learning_rate": 1.209671506837895e-07, "loss": 0.0158, "step": 2274 }, { "epoch": 4.66, "learning_rate": 1.1951992196584772e-07, "loss": 0.0324, "step": 2275 }, { "epoch": 4.66, "learning_rate": 1.1808129788870304e-07, "loss": 0.0272, "step": 2276 }, { "epoch": 4.66, "learning_rate": 1.1665128098875433e-07, "loss": 0.0381, "step": 2277 }, { "epoch": 4.66, "learning_rate": 1.1522987378722595e-07, "loss": 0.0258, "step": 2278 }, { "epoch": 4.66, "learning_rate": 1.1381707879016158e-07, "loss": 0.0249, "step": 2279 }, { "epoch": 4.67, "learning_rate": 1.1241289848842151e-07, "loss": 0.0166, "step": 2280 }, { "epoch": 4.67, "learning_rate": 1.1101733535767767e-07, "loss": 0.0212, "step": 2281 }, { "epoch": 4.67, "learning_rate": 1.0963039185840852e-07, "loss": 0.0202, "step": 2282 }, { "epoch": 4.67, "learning_rate": 1.0825207043589747e-07, "loss": 0.0244, "step": 2283 }, { "epoch": 4.67, "learning_rate": 1.0688237352022346e-07, "loss": 0.0193, "step": 2284 }, { "epoch": 4.68, "learning_rate": 1.0552130352626421e-07, "loss": 0.0217, "step": 2285 }, { "epoch": 4.68, "learning_rate": 1.0416886285368188e-07, "loss": 0.0172, "step": 2286 }, { "epoch": 4.68, "learning_rate": 1.0282505388692964e-07, "loss": 0.0231, "step": 2287 }, { "epoch": 4.68, "learning_rate": 1.0148987899523954e-07, "loss": 0.0213, "step": 2288 }, { "epoch": 4.68, "learning_rate": 1.0016334053262244e-07, "loss": 0.0124, "step": 2289 }, { "epoch": 4.69, "learning_rate": 9.884544083786197e-08, "loss": 0.0231, "step": 2290 }, { "epoch": 4.69, "learning_rate": 9.753618223451e-08, "loss": 0.0239, "step": 2291 }, { "epoch": 4.69, "learning_rate": 9.623556703088566e-08, "loss": 0.0168, "step": 2292 }, { "epoch": 4.69, "learning_rate": 9.494359752006687e-08, "loss": 0.0182, "step": 2293 }, { "epoch": 4.69, "learning_rate": 9.366027597989103e-08, "loss": 0.0221, "step": 2294 }, { "epoch": 4.7, "learning_rate": 9.238560467294655e-08, "loss": 0.0129, "step": 2295 }, { "epoch": 4.7, "learning_rate": 9.111958584657133e-08, "loss": 0.0249, "step": 2296 }, { "epoch": 4.7, "learning_rate": 8.986222173284876e-08, "loss": 0.0258, "step": 2297 }, { "epoch": 4.7, "learning_rate": 8.861351454860224e-08, "loss": 0.022, "step": 2298 }, { "epoch": 4.71, "learning_rate": 8.737346649539513e-08, "loss": 0.0247, "step": 2299 }, { "epoch": 4.71, "learning_rate": 8.614207975952083e-08, "loss": 0.018, "step": 2300 }, { "epoch": 4.71, "learning_rate": 8.491935651200323e-08, "loss": 0.0198, "step": 2301 }, { "epoch": 4.71, "learning_rate": 8.370529890859291e-08, "loss": 0.0345, "step": 2302 }, { "epoch": 4.71, "learning_rate": 8.249990908976214e-08, "loss": 0.0162, "step": 2303 }, { "epoch": 4.72, "learning_rate": 8.130318918069924e-08, "loss": 0.0186, "step": 2304 }, { "epoch": 4.72, "learning_rate": 8.011514129130982e-08, "loss": 0.0167, "step": 2305 }, { "epoch": 4.72, "learning_rate": 7.893576751620724e-08, "loss": 0.0271, "step": 2306 }, { "epoch": 4.72, "learning_rate": 7.776506993471323e-08, "loss": 0.0212, "step": 2307 }, { "epoch": 4.72, "learning_rate": 7.660305061085394e-08, "loss": 0.0192, "step": 2308 }, { "epoch": 4.73, "learning_rate": 7.544971159335334e-08, "loss": 0.0185, "step": 2309 }, { "epoch": 4.73, "learning_rate": 7.430505491563101e-08, "loss": 0.0266, "step": 2310 }, { "epoch": 4.73, "learning_rate": 7.316908259580036e-08, "loss": 0.0167, "step": 2311 }, { "epoch": 4.73, "learning_rate": 7.204179663666321e-08, "loss": 0.0242, "step": 2312 }, { "epoch": 4.73, "learning_rate": 7.092319902570699e-08, "loss": 0.016, "step": 2313 }, { "epoch": 4.74, "learning_rate": 6.981329173509909e-08, "loss": 0.0163, "step": 2314 }, { "epoch": 4.74, "learning_rate": 6.871207672168755e-08, "loss": 0.0128, "step": 2315 }, { "epoch": 4.74, "learning_rate": 6.761955592699488e-08, "loss": 0.0246, "step": 2316 }, { "epoch": 4.74, "learning_rate": 6.653573127721414e-08, "loss": 0.0252, "step": 2317 }, { "epoch": 4.74, "learning_rate": 6.546060468320625e-08, "loss": 0.0243, "step": 2318 }, { "epoch": 4.75, "learning_rate": 6.439417804049885e-08, "loss": 0.0236, "step": 2319 }, { "epoch": 4.75, "learning_rate": 6.333645322927796e-08, "loss": 0.0284, "step": 2320 }, { "epoch": 4.75, "learning_rate": 6.22874321143907e-08, "loss": 0.0135, "step": 2321 }, { "epoch": 4.75, "learning_rate": 6.124711654533654e-08, "loss": 0.0221, "step": 2322 }, { "epoch": 4.75, "learning_rate": 6.021550835626777e-08, "loss": 0.0266, "step": 2323 }, { "epoch": 4.76, "learning_rate": 5.919260936598448e-08, "loss": 0.0308, "step": 2324 }, { "epoch": 4.76, "learning_rate": 5.817842137793128e-08, "loss": 0.0219, "step": 2325 }, { "epoch": 4.76, "learning_rate": 5.717294618019675e-08, "loss": 0.0223, "step": 2326 }, { "epoch": 4.76, "learning_rate": 5.617618554550508e-08, "loss": 0.0199, "step": 2327 }, { "epoch": 4.76, "learning_rate": 5.518814123121885e-08, "loss": 0.0251, "step": 2328 }, { "epoch": 4.77, "learning_rate": 5.4208814979330725e-08, "loss": 0.0242, "step": 2329 }, { "epoch": 4.77, "learning_rate": 5.323820851646511e-08, "loss": 0.0206, "step": 2330 }, { "epoch": 4.77, "learning_rate": 5.227632355387091e-08, "loss": 0.0283, "step": 2331 }, { "epoch": 4.77, "learning_rate": 5.1323161787421585e-08, "loss": 0.0211, "step": 2332 }, { "epoch": 4.77, "learning_rate": 5.037872489761064e-08, "loss": 0.0183, "step": 2333 }, { "epoch": 4.78, "learning_rate": 4.944301454954892e-08, "loss": 0.0218, "step": 2334 }, { "epoch": 4.78, "learning_rate": 4.851603239296065e-08, "loss": 0.0141, "step": 2335 }, { "epoch": 4.78, "learning_rate": 4.759778006218407e-08, "loss": 0.0348, "step": 2336 }, { "epoch": 4.78, "learning_rate": 4.668825917616415e-08, "loss": 0.0276, "step": 2337 }, { "epoch": 4.78, "learning_rate": 4.578747133845263e-08, "loss": 0.0306, "step": 2338 }, { "epoch": 4.79, "learning_rate": 4.489541813720355e-08, "loss": 0.0204, "step": 2339 }, { "epoch": 4.79, "learning_rate": 4.401210114517107e-08, "loss": 0.0261, "step": 2340 }, { "epoch": 4.79, "learning_rate": 4.313752191970777e-08, "loss": 0.0446, "step": 2341 }, { "epoch": 4.79, "learning_rate": 4.227168200276077e-08, "loss": 0.016, "step": 2342 }, { "epoch": 4.8, "learning_rate": 4.141458292086731e-08, "loss": 0.0169, "step": 2343 }, { "epoch": 4.8, "learning_rate": 4.056622618515638e-08, "loss": 0.0224, "step": 2344 }, { "epoch": 4.8, "learning_rate": 3.972661329134264e-08, "loss": 0.0162, "step": 2345 }, { "epoch": 4.8, "learning_rate": 3.889574571972365e-08, "loss": 0.0144, "step": 2346 }, { "epoch": 4.8, "learning_rate": 3.807362493518041e-08, "loss": 0.0142, "step": 2347 }, { "epoch": 4.81, "learning_rate": 3.7260252387171235e-08, "loss": 0.0262, "step": 2348 }, { "epoch": 4.81, "learning_rate": 3.645562950973014e-08, "loss": 0.0154, "step": 2349 }, { "epoch": 4.81, "learning_rate": 3.565975772146679e-08, "loss": 0.027, "step": 2350 }, { "epoch": 4.81, "learning_rate": 3.487263842555988e-08, "loss": 0.0201, "step": 2351 }, { "epoch": 4.81, "learning_rate": 3.4094273009759314e-08, "loss": 0.0187, "step": 2352 }, { "epoch": 4.82, "learning_rate": 3.3324662846378476e-08, "loss": 0.0176, "step": 2353 }, { "epoch": 4.82, "learning_rate": 3.2563809292296414e-08, "loss": 0.0273, "step": 2354 }, { "epoch": 4.82, "learning_rate": 3.181171368895342e-08, "loss": 0.0212, "step": 2355 }, { "epoch": 4.82, "learning_rate": 3.10683773623488e-08, "loss": 0.0379, "step": 2356 }, { "epoch": 4.82, "learning_rate": 3.033380162303812e-08, "loss": 0.0149, "step": 2357 }, { "epoch": 4.83, "learning_rate": 2.9607987766131496e-08, "loss": 0.0385, "step": 2358 }, { "epoch": 4.83, "learning_rate": 2.8890937071292537e-08, "loss": 0.0251, "step": 2359 }, { "epoch": 4.83, "learning_rate": 2.8182650802732194e-08, "loss": 0.0372, "step": 2360 }, { "epoch": 4.83, "learning_rate": 2.7483130209212116e-08, "loss": 0.0174, "step": 2361 }, { "epoch": 4.83, "learning_rate": 2.6792376524036878e-08, "loss": 0.0232, "step": 2362 }, { "epoch": 4.84, "learning_rate": 2.6110390965055632e-08, "loss": 0.0227, "step": 2363 }, { "epoch": 4.84, "learning_rate": 2.543717473465823e-08, "loss": 0.021, "step": 2364 }, { "epoch": 4.84, "learning_rate": 2.4772729019774122e-08, "loss": 0.0193, "step": 2365 }, { "epoch": 4.84, "learning_rate": 2.411705499186956e-08, "loss": 0.0385, "step": 2366 }, { "epoch": 4.84, "learning_rate": 2.347015380694484e-08, "loss": 0.0148, "step": 2367 }, { "epoch": 4.85, "learning_rate": 2.2832026605534296e-08, "loss": 0.0135, "step": 2368 }, { "epoch": 4.85, "learning_rate": 2.2202674512702415e-08, "loss": 0.0238, "step": 2369 }, { "epoch": 4.85, "learning_rate": 2.158209863804217e-08, "loss": 0.0198, "step": 2370 }, { "epoch": 4.85, "learning_rate": 2.097030007567502e-08, "loss": 0.0173, "step": 2371 }, { "epoch": 4.85, "learning_rate": 2.0367279904244808e-08, "loss": 0.0216, "step": 2372 }, { "epoch": 4.86, "learning_rate": 1.9773039186920528e-08, "loss": 0.0209, "step": 2373 }, { "epoch": 4.86, "learning_rate": 1.9187578971391897e-08, "loss": 0.0331, "step": 2374 }, { "epoch": 4.86, "learning_rate": 1.8610900289867673e-08, "loss": 0.0261, "step": 2375 }, { "epoch": 4.86, "learning_rate": 1.804300415907456e-08, "loss": 0.0219, "step": 2376 }, { "epoch": 4.86, "learning_rate": 1.7483891580253877e-08, "loss": 0.0241, "step": 2377 }, { "epoch": 4.87, "learning_rate": 1.693356353916209e-08, "loss": 0.0291, "step": 2378 }, { "epoch": 4.87, "learning_rate": 1.639202100606807e-08, "loss": 0.0247, "step": 2379 }, { "epoch": 4.87, "learning_rate": 1.585926493574974e-08, "loss": 0.0173, "step": 2380 }, { "epoch": 4.87, "learning_rate": 1.5335296267495747e-08, "loss": 0.014, "step": 2381 }, { "epoch": 4.87, "learning_rate": 1.4820115925099355e-08, "loss": 0.0212, "step": 2382 }, { "epoch": 4.88, "learning_rate": 1.4313724816862328e-08, "loss": 0.0144, "step": 2383 }, { "epoch": 4.88, "learning_rate": 1.3816123835588835e-08, "loss": 0.0225, "step": 2384 }, { "epoch": 4.88, "learning_rate": 1.3327313858584878e-08, "loss": 0.0361, "step": 2385 }, { "epoch": 4.88, "learning_rate": 1.2847295747658306e-08, "loss": 0.0165, "step": 2386 }, { "epoch": 4.89, "learning_rate": 1.2376070349115477e-08, "loss": 0.02, "step": 2387 }, { "epoch": 4.89, "learning_rate": 1.1913638493762369e-08, "loss": 0.0154, "step": 2388 }, { "epoch": 4.89, "learning_rate": 1.1460000996897924e-08, "loss": 0.0219, "step": 2389 }, { "epoch": 4.89, "learning_rate": 1.101515865831959e-08, "loss": 0.0245, "step": 2390 }, { "epoch": 4.89, "learning_rate": 1.0579112262316116e-08, "loss": 0.0223, "step": 2391 }, { "epoch": 4.9, "learning_rate": 1.0151862577668648e-08, "loss": 0.0255, "step": 2392 }, { "epoch": 4.9, "learning_rate": 9.733410357650186e-09, "loss": 0.0156, "step": 2393 }, { "epoch": 4.9, "learning_rate": 9.323756340021694e-09, "loss": 0.0212, "step": 2394 }, { "epoch": 4.9, "learning_rate": 8.922901247033212e-09, "loss": 0.0216, "step": 2395 }, { "epoch": 4.9, "learning_rate": 8.530845785421072e-09, "loss": 0.0289, "step": 2396 }, { "epoch": 4.91, "learning_rate": 8.147590646408465e-09, "loss": 0.0261, "step": 2397 }, { "epoch": 4.91, "learning_rate": 7.773136505700995e-09, "loss": 0.0267, "step": 2398 }, { "epoch": 4.91, "learning_rate": 7.407484023488898e-09, "loss": 0.0253, "step": 2399 }, { "epoch": 4.91, "learning_rate": 7.050633844443711e-09, "loss": 0.028, "step": 2400 }, { "epoch": 4.91, "learning_rate": 6.702586597719385e-09, "loss": 0.0204, "step": 2401 }, { "epoch": 4.92, "learning_rate": 6.363342896946734e-09, "loss": 0.0275, "step": 2402 }, { "epoch": 4.92, "learning_rate": 6.0329033402384275e-09, "loss": 0.0224, "step": 2403 }, { "epoch": 4.92, "learning_rate": 5.711268510182888e-09, "loss": 0.0432, "step": 2404 }, { "epoch": 4.92, "learning_rate": 5.398438973845954e-09, "loss": 0.0227, "step": 2405 }, { "epoch": 4.92, "learning_rate": 5.094415282768661e-09, "loss": 0.0191, "step": 2406 }, { "epoch": 4.93, "learning_rate": 4.7991979729661295e-09, "loss": 0.0289, "step": 2407 }, { "epoch": 4.93, "learning_rate": 4.512787564929233e-09, "loss": 0.0234, "step": 2408 }, { "epoch": 4.93, "learning_rate": 4.235184563619599e-09, "loss": 0.0273, "step": 2409 }, { "epoch": 4.93, "learning_rate": 3.9663894584712756e-09, "loss": 0.0104, "step": 2410 }, { "epoch": 4.93, "learning_rate": 3.7064027233896237e-09, "loss": 0.0201, "step": 2411 }, { "epoch": 4.94, "learning_rate": 3.4552248167507576e-09, "loss": 0.0187, "step": 2412 }, { "epoch": 4.94, "learning_rate": 3.2128561813987715e-09, "loss": 0.0187, "step": 2413 }, { "epoch": 4.94, "learning_rate": 2.9792972446479605e-09, "loss": 0.0328, "step": 2414 }, { "epoch": 4.94, "learning_rate": 2.754548418278935e-09, "loss": 0.0301, "step": 2415 }, { "epoch": 4.94, "learning_rate": 2.538610098540284e-09, "loss": 0.0327, "step": 2416 }, { "epoch": 4.95, "learning_rate": 2.3314826661474667e-09, "loss": 0.0151, "step": 2417 }, { "epoch": 4.95, "learning_rate": 2.1331664862811464e-09, "loss": 0.0302, "step": 2418 }, { "epoch": 4.95, "learning_rate": 1.943661908586636e-09, "loss": 0.0417, "step": 2419 }, { "epoch": 4.95, "learning_rate": 1.7629692671744526e-09, "loss": 0.0227, "step": 2420 }, { "epoch": 4.95, "learning_rate": 1.591088880619207e-09, "loss": 0.0181, "step": 2421 }, { "epoch": 4.96, "learning_rate": 1.4280210519584948e-09, "loss": 0.0281, "step": 2422 }, { "epoch": 4.96, "learning_rate": 1.2737660686917842e-09, "loss": 0.0199, "step": 2423 }, { "epoch": 4.96, "learning_rate": 1.128324202783193e-09, "loss": 0.017, "step": 2424 }, { "epoch": 4.96, "learning_rate": 9.916957106559378e-10, "loss": 0.0123, "step": 2425 }, { "epoch": 4.96, "learning_rate": 8.638808331973281e-10, "loss": 0.0234, "step": 2426 }, { "epoch": 4.97, "learning_rate": 7.448797957526621e-10, "loss": 0.0299, "step": 2427 }, { "epoch": 4.97, "learning_rate": 6.346928081313319e-10, "loss": 0.0129, "step": 2428 }, { "epoch": 4.97, "learning_rate": 5.333200645996073e-10, "loss": 0.0248, "step": 2429 }, { "epoch": 4.97, "learning_rate": 4.407617438850764e-10, "loss": 0.0209, "step": 2430 }, { "epoch": 4.98, "learning_rate": 3.5701800917553595e-10, "loss": 0.0287, "step": 2431 }, { "epoch": 4.98, "learning_rate": 2.820890081167704e-10, "loss": 0.0199, "step": 2432 }, { "epoch": 4.98, "learning_rate": 2.1597487281366236e-10, "loss": 0.0327, "step": 2433 }, { "epoch": 4.98, "learning_rate": 1.586757198313027e-10, "loss": 0.0231, "step": 2434 }, { "epoch": 4.98, "learning_rate": 1.101916501911049e-10, "loss": 0.029, "step": 2435 }, { "epoch": 4.99, "learning_rate": 7.052274937469073e-11, "loss": 0.0271, "step": 2436 }, { "epoch": 4.99, "learning_rate": 3.9669087321669854e-11, "loss": 0.0223, "step": 2437 }, { "epoch": 4.99, "learning_rate": 1.7630718427974482e-11, "loss": 0.0207, "step": 2438 }, { "epoch": 4.99, "learning_rate": 4.407681549745136e-12, "loss": 0.042, "step": 2439 }, { "epoch": 4.99, "learning_rate": 0.0, "loss": 0.0293, "step": 2440 }, { "epoch": 4.99, "step": 2440, "total_flos": 6.199193994568663e+17, "train_loss": 0.2505870845901673, "train_runtime": 23823.6854, "train_samples_per_second": 13.124, "train_steps_per_second": 0.102 } ], "max_steps": 2440, "num_train_epochs": 5, "total_flos": 6.199193994568663e+17, "trial_name": null, "trial_params": null }