diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7848 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 1303, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 0.6228, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6365, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-06, + "loss": 0.6543, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.5972, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-06, + "loss": 0.5361, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.5745, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 3.5e-06, + "loss": 0.5942, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 4.000000000000001e-06, + "loss": 0.4918, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 4.5e-06, + "loss": 0.4242, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 0.2893, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 5.500000000000001e-06, + "loss": 0.3571, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 6e-06, + "loss": 0.3047, + "step": 12 + }, + { + "epoch": 0.01, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.283, + "step": 13 + }, + { + "epoch": 0.01, + "learning_rate": 7e-06, + "loss": 0.3107, + "step": 14 + }, + { + "epoch": 0.01, + "learning_rate": 7.500000000000001e-06, + "loss": 0.2729, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 8.000000000000001e-06, + "loss": 0.2882, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 8.5e-06, + "loss": 0.2396, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 9e-06, + "loss": 0.292, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 9.5e-06, + "loss": 0.2642, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 0.2579, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.2875, + "step": 21 + }, + { + "epoch": 0.02, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.2772, + "step": 22 + }, + { + "epoch": 0.02, + "learning_rate": 1.15e-05, + "loss": 0.2433, + "step": 23 + }, + { + "epoch": 0.02, + "learning_rate": 1.2e-05, + "loss": 0.2805, + "step": 24 + }, + { + "epoch": 0.02, + "learning_rate": 1.25e-05, + "loss": 0.251, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.2667, + "step": 26 + }, + { + "epoch": 0.02, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.2556, + "step": 27 + }, + { + "epoch": 0.02, + "learning_rate": 1.4e-05, + "loss": 0.243, + "step": 28 + }, + { + "epoch": 0.02, + "learning_rate": 1.45e-05, + "loss": 0.2465, + "step": 29 + }, + { + "epoch": 0.02, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.2405, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 1.55e-05, + "loss": 0.2587, + "step": 31 + }, + { + "epoch": 0.02, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.2276, + "step": 32 + }, + { + "epoch": 0.03, + "learning_rate": 1.65e-05, + "loss": 0.2637, + "step": 33 + }, + { + "epoch": 0.03, + "learning_rate": 1.7e-05, + "loss": 0.2574, + "step": 34 + }, + { + "epoch": 0.03, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.2114, + "step": 35 + }, + { + "epoch": 0.03, + "learning_rate": 1.8e-05, + "loss": 0.2324, + "step": 36 + }, + { + "epoch": 0.03, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.201, + "step": 37 + }, + { + "epoch": 0.03, + "learning_rate": 1.9e-05, + "loss": 0.1838, + "step": 38 + }, + { + "epoch": 0.03, + "learning_rate": 1.95e-05, + "loss": 0.2421, + "step": 39 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 0.2412, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999969064095054e-05, + "loss": 0.2747, + "step": 41 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999876256571612e-05, + "loss": 0.2628, + "step": 42 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999721578003894e-05, + "loss": 0.2518, + "step": 43 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999505029348927e-05, + "loss": 0.1815, + "step": 44 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999226611946536e-05, + "loss": 0.2665, + "step": 45 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998886327519337e-05, + "loss": 0.2011, + "step": 46 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998484178172735e-05, + "loss": 0.2118, + "step": 47 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998020166394898e-05, + "loss": 0.2552, + "step": 48 + }, + { + "epoch": 0.04, + "learning_rate": 1.999749429505675e-05, + "loss": 0.2316, + "step": 49 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996906567411957e-05, + "loss": 0.191, + "step": 50 + }, + { + "epoch": 0.04, + "learning_rate": 1.999625698709689e-05, + "loss": 0.1987, + "step": 51 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995545558130624e-05, + "loss": 0.2211, + "step": 52 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994772284914897e-05, + "loss": 0.2299, + "step": 53 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993937172234094e-05, + "loss": 0.203, + "step": 54 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993040225255205e-05, + "loss": 0.2178, + "step": 55 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992081449527804e-05, + "loss": 0.2125, + "step": 56 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991060850984007e-05, + "loss": 0.2457, + "step": 57 + }, + { + "epoch": 0.04, + "learning_rate": 1.998997843593845e-05, + "loss": 0.2397, + "step": 58 + }, + { + "epoch": 0.05, + "learning_rate": 1.998883421108822e-05, + "loss": 0.217, + "step": 59 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987628183512854e-05, + "loss": 0.2744, + "step": 60 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986360360674252e-05, + "loss": 0.1821, + "step": 61 + }, + { + "epoch": 0.05, + "learning_rate": 1.998503075041667e-05, + "loss": 0.2382, + "step": 62 + }, + { + "epoch": 0.05, + "learning_rate": 1.998363936096665e-05, + "loss": 0.2343, + "step": 63 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982186200932964e-05, + "loss": 0.2371, + "step": 64 + }, + { + "epoch": 0.05, + "learning_rate": 1.998067127930658e-05, + "loss": 0.2141, + "step": 65 + }, + { + "epoch": 0.05, + "learning_rate": 1.997909460546059e-05, + "loss": 0.2029, + "step": 66 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977456189150164e-05, + "loss": 0.1962, + "step": 67 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975756040512477e-05, + "loss": 0.2177, + "step": 68 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973994170066654e-05, + "loss": 0.2207, + "step": 69 + }, + { + "epoch": 0.05, + "learning_rate": 1.9972170588713715e-05, + "loss": 0.1895, + "step": 70 + }, + { + "epoch": 0.05, + "learning_rate": 1.997028530773648e-05, + "loss": 0.1954, + "step": 71 + }, + { + "epoch": 0.06, + "learning_rate": 1.9968338338799525e-05, + "loss": 0.2328, + "step": 72 + }, + { + "epoch": 0.06, + "learning_rate": 1.9966329693949098e-05, + "loss": 0.2108, + "step": 73 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964259385613053e-05, + "loss": 0.2124, + "step": 74 + }, + { + "epoch": 0.06, + "learning_rate": 1.996212742660076e-05, + "loss": 0.1819, + "step": 75 + }, + { + "epoch": 0.06, + "learning_rate": 1.9959933830103034e-05, + "loss": 0.2227, + "step": 76 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957678609692056e-05, + "loss": 0.196, + "step": 77 + }, + { + "epoch": 0.06, + "learning_rate": 1.995536177932128e-05, + "loss": 0.1704, + "step": 78 + }, + { + "epoch": 0.06, + "learning_rate": 1.9952983353325358e-05, + "loss": 0.1788, + "step": 79 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950543346420042e-05, + "loss": 0.2139, + "step": 80 + }, + { + "epoch": 0.06, + "learning_rate": 1.994804177370209e-05, + "loss": 0.1984, + "step": 81 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945478650649192e-05, + "loss": 0.1946, + "step": 82 + }, + { + "epoch": 0.06, + "learning_rate": 1.9942853993119852e-05, + "loss": 0.2256, + "step": 83 + }, + { + "epoch": 0.06, + "learning_rate": 1.99401678173533e-05, + "loss": 0.2323, + "step": 84 + }, + { + "epoch": 0.07, + "learning_rate": 1.9937420139969397e-05, + "loss": 0.1898, + "step": 85 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934610977968514e-05, + "loss": 0.1957, + "step": 86 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931740348731447e-05, + "loss": 0.1924, + "step": 87 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928808270019297e-05, + "loss": 0.2029, + "step": 88 + }, + { + "epoch": 0.07, + "learning_rate": 1.992581475997337e-05, + "loss": 0.2148, + "step": 89 + }, + { + "epoch": 0.07, + "learning_rate": 1.992275983711505e-05, + "loss": 0.2034, + "step": 90 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919643520345698e-05, + "loss": 0.1851, + "step": 91 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916465828946533e-05, + "loss": 0.1973, + "step": 92 + }, + { + "epoch": 0.07, + "learning_rate": 1.9913226782578506e-05, + "loss": 0.168, + "step": 93 + }, + { + "epoch": 0.07, + "learning_rate": 1.990992640128218e-05, + "loss": 0.2184, + "step": 94 + }, + { + "epoch": 0.07, + "learning_rate": 1.9906564705477616e-05, + "loss": 0.2015, + "step": 95 + }, + { + "epoch": 0.07, + "learning_rate": 1.9903141715964233e-05, + "loss": 0.1582, + "step": 96 + }, + { + "epoch": 0.07, + "learning_rate": 1.989965745392068e-05, + "loss": 0.1948, + "step": 97 + }, + { + "epoch": 0.08, + "learning_rate": 1.9896111940904724e-05, + "loss": 0.2361, + "step": 98 + }, + { + "epoch": 0.08, + "learning_rate": 1.9892505198853097e-05, + "loss": 0.2212, + "step": 99 + }, + { + "epoch": 0.08, + "learning_rate": 1.988883725008136e-05, + "loss": 0.1744, + "step": 100 + }, + { + "epoch": 0.08, + "learning_rate": 1.988510811728378e-05, + "loss": 0.1954, + "step": 101 + }, + { + "epoch": 0.08, + "learning_rate": 1.9881317823533176e-05, + "loss": 0.174, + "step": 102 + }, + { + "epoch": 0.08, + "learning_rate": 1.9877466392280773e-05, + "loss": 0.1631, + "step": 103 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873553847356087e-05, + "loss": 0.1951, + "step": 104 + }, + { + "epoch": 0.08, + "learning_rate": 1.986958021296673e-05, + "loss": 0.1931, + "step": 105 + }, + { + "epoch": 0.08, + "learning_rate": 1.9865545513698304e-05, + "loss": 0.1638, + "step": 106 + }, + { + "epoch": 0.08, + "learning_rate": 1.9861449774514225e-05, + "loss": 0.2454, + "step": 107 + }, + { + "epoch": 0.08, + "learning_rate": 1.9857293020755565e-05, + "loss": 0.1537, + "step": 108 + }, + { + "epoch": 0.08, + "learning_rate": 1.9853075278140913e-05, + "loss": 0.2051, + "step": 109 + }, + { + "epoch": 0.08, + "learning_rate": 1.9848796572766214e-05, + "loss": 0.1603, + "step": 110 + }, + { + "epoch": 0.09, + "learning_rate": 1.9844456931104588e-05, + "loss": 0.1938, + "step": 111 + }, + { + "epoch": 0.09, + "learning_rate": 1.984005638000618e-05, + "loss": 0.1907, + "step": 112 + }, + { + "epoch": 0.09, + "learning_rate": 1.9835594946698e-05, + "loss": 0.2059, + "step": 113 + }, + { + "epoch": 0.09, + "learning_rate": 1.9831072658783745e-05, + "loss": 0.2077, + "step": 114 + }, + { + "epoch": 0.09, + "learning_rate": 1.9826489544243623e-05, + "loss": 0.2125, + "step": 115 + }, + { + "epoch": 0.09, + "learning_rate": 1.9821845631434192e-05, + "loss": 0.1701, + "step": 116 + }, + { + "epoch": 0.09, + "learning_rate": 1.9817140949088193e-05, + "loss": 0.2443, + "step": 117 + }, + { + "epoch": 0.09, + "learning_rate": 1.981237552631434e-05, + "loss": 0.2032, + "step": 118 + }, + { + "epoch": 0.09, + "learning_rate": 1.980754939259716e-05, + "loss": 0.2058, + "step": 119 + }, + { + "epoch": 0.09, + "learning_rate": 1.9802662577796822e-05, + "loss": 0.2188, + "step": 120 + }, + { + "epoch": 0.09, + "learning_rate": 1.9797715112148937e-05, + "loss": 0.1956, + "step": 121 + }, + { + "epoch": 0.09, + "learning_rate": 1.9792707026264364e-05, + "loss": 0.1921, + "step": 122 + }, + { + "epoch": 0.09, + "learning_rate": 1.978763835112904e-05, + "loss": 0.1755, + "step": 123 + }, + { + "epoch": 0.1, + "learning_rate": 1.9782509118103773e-05, + "loss": 0.1808, + "step": 124 + }, + { + "epoch": 0.1, + "learning_rate": 1.977731935892406e-05, + "loss": 0.1833, + "step": 125 + }, + { + "epoch": 0.1, + "learning_rate": 1.9772069105699878e-05, + "loss": 0.203, + "step": 126 + }, + { + "epoch": 0.1, + "learning_rate": 1.9766758390915494e-05, + "loss": 0.1536, + "step": 127 + }, + { + "epoch": 0.1, + "learning_rate": 1.976138724742926e-05, + "loss": 0.1759, + "step": 128 + }, + { + "epoch": 0.1, + "learning_rate": 1.9755955708473416e-05, + "loss": 0.1912, + "step": 129 + }, + { + "epoch": 0.1, + "learning_rate": 1.9750463807653873e-05, + "loss": 0.156, + "step": 130 + }, + { + "epoch": 0.1, + "learning_rate": 1.9744911578950023e-05, + "loss": 0.2351, + "step": 131 + }, + { + "epoch": 0.1, + "learning_rate": 1.9739299056714497e-05, + "loss": 0.1415, + "step": 132 + }, + { + "epoch": 0.1, + "learning_rate": 1.9733626275673e-05, + "loss": 0.1274, + "step": 133 + }, + { + "epoch": 0.1, + "learning_rate": 1.9727893270924043e-05, + "loss": 0.1941, + "step": 134 + }, + { + "epoch": 0.1, + "learning_rate": 1.972210007793877e-05, + "loss": 0.1843, + "step": 135 + }, + { + "epoch": 0.1, + "learning_rate": 1.9716246732560715e-05, + "loss": 0.1786, + "step": 136 + }, + { + "epoch": 0.11, + "learning_rate": 1.9710333271005586e-05, + "loss": 0.2245, + "step": 137 + }, + { + "epoch": 0.11, + "learning_rate": 1.9704359729861035e-05, + "loss": 0.1808, + "step": 138 + }, + { + "epoch": 0.11, + "learning_rate": 1.9698326146086446e-05, + "loss": 0.1969, + "step": 139 + }, + { + "epoch": 0.11, + "learning_rate": 1.9692232557012693e-05, + "loss": 0.1827, + "step": 140 + }, + { + "epoch": 0.11, + "learning_rate": 1.9686079000341914e-05, + "loss": 0.1504, + "step": 141 + }, + { + "epoch": 0.11, + "learning_rate": 1.967986551414728e-05, + "loss": 0.2186, + "step": 142 + }, + { + "epoch": 0.11, + "learning_rate": 1.967359213687275e-05, + "loss": 0.1589, + "step": 143 + }, + { + "epoch": 0.11, + "learning_rate": 1.966725890733285e-05, + "loss": 0.1551, + "step": 144 + }, + { + "epoch": 0.11, + "learning_rate": 1.9660865864712413e-05, + "loss": 0.1783, + "step": 145 + }, + { + "epoch": 0.11, + "learning_rate": 1.9654413048566353e-05, + "loss": 0.2355, + "step": 146 + }, + { + "epoch": 0.11, + "learning_rate": 1.9647900498819412e-05, + "loss": 0.1827, + "step": 147 + }, + { + "epoch": 0.11, + "learning_rate": 1.9641328255765916e-05, + "loss": 0.2344, + "step": 148 + }, + { + "epoch": 0.11, + "learning_rate": 1.9634696360069517e-05, + "loss": 0.1632, + "step": 149 + }, + { + "epoch": 0.12, + "learning_rate": 1.9628004852762952e-05, + "loss": 0.1532, + "step": 150 + }, + { + "epoch": 0.12, + "learning_rate": 1.96212537752478e-05, + "loss": 0.1341, + "step": 151 + }, + { + "epoch": 0.12, + "learning_rate": 1.9614443169294186e-05, + "loss": 0.1475, + "step": 152 + }, + { + "epoch": 0.12, + "learning_rate": 1.960757307704057e-05, + "loss": 0.1717, + "step": 153 + }, + { + "epoch": 0.12, + "learning_rate": 1.9600643540993453e-05, + "loss": 0.2061, + "step": 154 + }, + { + "epoch": 0.12, + "learning_rate": 1.959365460402713e-05, + "loss": 0.1491, + "step": 155 + }, + { + "epoch": 0.12, + "learning_rate": 1.9586606309383415e-05, + "loss": 0.1643, + "step": 156 + }, + { + "epoch": 0.12, + "learning_rate": 1.9579498700671386e-05, + "loss": 0.1744, + "step": 157 + }, + { + "epoch": 0.12, + "learning_rate": 1.9572331821867108e-05, + "loss": 0.1926, + "step": 158 + }, + { + "epoch": 0.12, + "learning_rate": 1.9565105717313352e-05, + "loss": 0.1363, + "step": 159 + }, + { + "epoch": 0.12, + "learning_rate": 1.9557820431719333e-05, + "loss": 0.2054, + "step": 160 + }, + { + "epoch": 0.12, + "learning_rate": 1.955047601016044e-05, + "loss": 0.1936, + "step": 161 + }, + { + "epoch": 0.12, + "learning_rate": 1.954307249807793e-05, + "loss": 0.1923, + "step": 162 + }, + { + "epoch": 0.13, + "learning_rate": 1.9535609941278676e-05, + "loss": 0.1788, + "step": 163 + }, + { + "epoch": 0.13, + "learning_rate": 1.952808838593487e-05, + "loss": 0.1501, + "step": 164 + }, + { + "epoch": 0.13, + "learning_rate": 1.9520507878583728e-05, + "loss": 0.1892, + "step": 165 + }, + { + "epoch": 0.13, + "learning_rate": 1.9512868466127232e-05, + "loss": 0.1671, + "step": 166 + }, + { + "epoch": 0.13, + "learning_rate": 1.95051701958318e-05, + "loss": 0.1625, + "step": 167 + }, + { + "epoch": 0.13, + "learning_rate": 1.9497413115328028e-05, + "loss": 0.1973, + "step": 168 + }, + { + "epoch": 0.13, + "learning_rate": 1.9489597272610377e-05, + "loss": 0.1787, + "step": 169 + }, + { + "epoch": 0.13, + "learning_rate": 1.9481722716036883e-05, + "loss": 0.2179, + "step": 170 + }, + { + "epoch": 0.13, + "learning_rate": 1.9473789494328844e-05, + "loss": 0.168, + "step": 171 + }, + { + "epoch": 0.13, + "learning_rate": 1.9465797656570546e-05, + "loss": 0.1288, + "step": 172 + }, + { + "epoch": 0.13, + "learning_rate": 1.9457747252208936e-05, + "loss": 0.1256, + "step": 173 + }, + { + "epoch": 0.13, + "learning_rate": 1.944963833105332e-05, + "loss": 0.1658, + "step": 174 + }, + { + "epoch": 0.13, + "learning_rate": 1.944147094327506e-05, + "loss": 0.171, + "step": 175 + }, + { + "epoch": 0.14, + "learning_rate": 1.9433245139407266e-05, + "loss": 0.1676, + "step": 176 + }, + { + "epoch": 0.14, + "learning_rate": 1.942496097034447e-05, + "loss": 0.1388, + "step": 177 + }, + { + "epoch": 0.14, + "learning_rate": 1.9416618487342333e-05, + "loss": 0.2108, + "step": 178 + }, + { + "epoch": 0.14, + "learning_rate": 1.94082177420173e-05, + "loss": 0.173, + "step": 179 + }, + { + "epoch": 0.14, + "learning_rate": 1.9399758786346305e-05, + "loss": 0.1769, + "step": 180 + }, + { + "epoch": 0.14, + "learning_rate": 1.9391241672666438e-05, + "loss": 0.1971, + "step": 181 + }, + { + "epoch": 0.14, + "learning_rate": 1.9382666453674625e-05, + "loss": 0.1708, + "step": 182 + }, + { + "epoch": 0.14, + "learning_rate": 1.9374033182427297e-05, + "loss": 0.1697, + "step": 183 + }, + { + "epoch": 0.14, + "learning_rate": 1.936534191234006e-05, + "loss": 0.1718, + "step": 184 + }, + { + "epoch": 0.14, + "learning_rate": 1.9356592697187383e-05, + "loss": 0.1803, + "step": 185 + }, + { + "epoch": 0.14, + "learning_rate": 1.9347785591102244e-05, + "loss": 0.1874, + "step": 186 + }, + { + "epoch": 0.14, + "learning_rate": 1.9338920648575798e-05, + "loss": 0.1672, + "step": 187 + }, + { + "epoch": 0.14, + "learning_rate": 1.9329997924457046e-05, + "loss": 0.183, + "step": 188 + }, + { + "epoch": 0.15, + "learning_rate": 1.93210174739525e-05, + "loss": 0.1642, + "step": 189 + }, + { + "epoch": 0.15, + "learning_rate": 1.9311979352625837e-05, + "loss": 0.1808, + "step": 190 + }, + { + "epoch": 0.15, + "learning_rate": 1.9302883616397546e-05, + "loss": 0.1992, + "step": 191 + }, + { + "epoch": 0.15, + "learning_rate": 1.929373032154459e-05, + "loss": 0.1766, + "step": 192 + }, + { + "epoch": 0.15, + "learning_rate": 1.928451952470007e-05, + "loss": 0.184, + "step": 193 + }, + { + "epoch": 0.15, + "learning_rate": 1.927525128285284e-05, + "loss": 0.1745, + "step": 194 + }, + { + "epoch": 0.15, + "learning_rate": 1.9265925653347198e-05, + "loss": 0.197, + "step": 195 + }, + { + "epoch": 0.15, + "learning_rate": 1.9256542693882505e-05, + "loss": 0.2009, + "step": 196 + }, + { + "epoch": 0.15, + "learning_rate": 1.9247102462512823e-05, + "loss": 0.1737, + "step": 197 + }, + { + "epoch": 0.15, + "learning_rate": 1.9237605017646573e-05, + "loss": 0.1817, + "step": 198 + }, + { + "epoch": 0.15, + "learning_rate": 1.922805041804617e-05, + "loss": 0.1366, + "step": 199 + }, + { + "epoch": 0.15, + "learning_rate": 1.9218438722827644e-05, + "loss": 0.1855, + "step": 200 + }, + { + "epoch": 0.15, + "learning_rate": 1.9208769991460298e-05, + "loss": 0.179, + "step": 201 + }, + { + "epoch": 0.16, + "learning_rate": 1.919904428376632e-05, + "loss": 0.1719, + "step": 202 + }, + { + "epoch": 0.16, + "learning_rate": 1.9189261659920428e-05, + "loss": 0.1447, + "step": 203 + }, + { + "epoch": 0.16, + "learning_rate": 1.9179422180449478e-05, + "loss": 0.1615, + "step": 204 + }, + { + "epoch": 0.16, + "learning_rate": 1.916952590623212e-05, + "loss": 0.1361, + "step": 205 + }, + { + "epoch": 0.16, + "learning_rate": 1.9159572898498387e-05, + "loss": 0.2001, + "step": 206 + }, + { + "epoch": 0.16, + "learning_rate": 1.9149563218829345e-05, + "loss": 0.1883, + "step": 207 + }, + { + "epoch": 0.16, + "learning_rate": 1.9139496929156685e-05, + "loss": 0.1318, + "step": 208 + }, + { + "epoch": 0.16, + "learning_rate": 1.9129374091762375e-05, + "loss": 0.1536, + "step": 209 + }, + { + "epoch": 0.16, + "learning_rate": 1.911919476927823e-05, + "loss": 0.1636, + "step": 210 + }, + { + "epoch": 0.16, + "learning_rate": 1.910895902468557e-05, + "loss": 0.1565, + "step": 211 + }, + { + "epoch": 0.16, + "learning_rate": 1.9098666921314793e-05, + "loss": 0.2114, + "step": 212 + }, + { + "epoch": 0.16, + "learning_rate": 1.908831852284501e-05, + "loss": 0.1618, + "step": 213 + }, + { + "epoch": 0.16, + "learning_rate": 1.907791389330363e-05, + "loss": 0.1819, + "step": 214 + }, + { + "epoch": 0.17, + "learning_rate": 1.9067453097065986e-05, + "loss": 0.204, + "step": 215 + }, + { + "epoch": 0.17, + "learning_rate": 1.905693619885491e-05, + "loss": 0.2068, + "step": 216 + }, + { + "epoch": 0.17, + "learning_rate": 1.904636326374036e-05, + "loss": 0.1831, + "step": 217 + }, + { + "epoch": 0.17, + "learning_rate": 1.9035734357139e-05, + "loss": 0.1766, + "step": 218 + }, + { + "epoch": 0.17, + "learning_rate": 1.9025049544813793e-05, + "loss": 0.2153, + "step": 219 + }, + { + "epoch": 0.17, + "learning_rate": 1.9014308892873612e-05, + "loss": 0.1774, + "step": 220 + }, + { + "epoch": 0.17, + "learning_rate": 1.900351246777281e-05, + "loss": 0.2006, + "step": 221 + }, + { + "epoch": 0.17, + "learning_rate": 1.899266033631083e-05, + "loss": 0.1797, + "step": 222 + }, + { + "epoch": 0.17, + "learning_rate": 1.8981752565631767e-05, + "loss": 0.1843, + "step": 223 + }, + { + "epoch": 0.17, + "learning_rate": 1.8970789223223978e-05, + "loss": 0.2014, + "step": 224 + }, + { + "epoch": 0.17, + "learning_rate": 1.895977037691964e-05, + "loss": 0.183, + "step": 225 + }, + { + "epoch": 0.17, + "learning_rate": 1.8948696094894354e-05, + "loss": 0.1566, + "step": 226 + }, + { + "epoch": 0.17, + "learning_rate": 1.8937566445666707e-05, + "loss": 0.2221, + "step": 227 + }, + { + "epoch": 0.17, + "learning_rate": 1.892638149809785e-05, + "loss": 0.1717, + "step": 228 + }, + { + "epoch": 0.18, + "learning_rate": 1.8915141321391083e-05, + "loss": 0.1841, + "step": 229 + }, + { + "epoch": 0.18, + "learning_rate": 1.8903845985091406e-05, + "loss": 0.1818, + "step": 230 + }, + { + "epoch": 0.18, + "learning_rate": 1.889249555908512e-05, + "loss": 0.1988, + "step": 231 + }, + { + "epoch": 0.18, + "learning_rate": 1.8881090113599353e-05, + "loss": 0.1559, + "step": 232 + }, + { + "epoch": 0.18, + "learning_rate": 1.8869629719201668e-05, + "loss": 0.163, + "step": 233 + }, + { + "epoch": 0.18, + "learning_rate": 1.88581144467996e-05, + "loss": 0.1536, + "step": 234 + }, + { + "epoch": 0.18, + "learning_rate": 1.8846544367640218e-05, + "loss": 0.1746, + "step": 235 + }, + { + "epoch": 0.18, + "learning_rate": 1.8834919553309704e-05, + "loss": 0.1735, + "step": 236 + }, + { + "epoch": 0.18, + "learning_rate": 1.882324007573288e-05, + "loss": 0.2136, + "step": 237 + }, + { + "epoch": 0.18, + "learning_rate": 1.881150600717279e-05, + "loss": 0.1572, + "step": 238 + }, + { + "epoch": 0.18, + "learning_rate": 1.8799717420230242e-05, + "loss": 0.1514, + "step": 239 + }, + { + "epoch": 0.18, + "learning_rate": 1.8787874387843358e-05, + "loss": 0.1613, + "step": 240 + }, + { + "epoch": 0.18, + "learning_rate": 1.8775976983287117e-05, + "loss": 0.2228, + "step": 241 + }, + { + "epoch": 0.19, + "learning_rate": 1.876402528017292e-05, + "loss": 0.1721, + "step": 242 + }, + { + "epoch": 0.19, + "learning_rate": 1.8752019352448114e-05, + "loss": 0.1595, + "step": 243 + }, + { + "epoch": 0.19, + "learning_rate": 1.873995927439555e-05, + "loss": 0.2287, + "step": 244 + }, + { + "epoch": 0.19, + "learning_rate": 1.8727845120633108e-05, + "loss": 0.1858, + "step": 245 + }, + { + "epoch": 0.19, + "learning_rate": 1.8715676966113256e-05, + "loss": 0.1971, + "step": 246 + }, + { + "epoch": 0.19, + "learning_rate": 1.8703454886122568e-05, + "loss": 0.1451, + "step": 247 + }, + { + "epoch": 0.19, + "learning_rate": 1.869117895628126e-05, + "loss": 0.172, + "step": 248 + }, + { + "epoch": 0.19, + "learning_rate": 1.8678849252542733e-05, + "loss": 0.147, + "step": 249 + }, + { + "epoch": 0.19, + "learning_rate": 1.86664658511931e-05, + "loss": 0.1631, + "step": 250 + }, + { + "epoch": 0.19, + "learning_rate": 1.8654028828850705e-05, + "loss": 0.1617, + "step": 251 + }, + { + "epoch": 0.19, + "learning_rate": 1.8641538262465656e-05, + "loss": 0.158, + "step": 252 + }, + { + "epoch": 0.19, + "learning_rate": 1.862899422931934e-05, + "loss": 0.1745, + "step": 253 + }, + { + "epoch": 0.19, + "learning_rate": 1.8616396807023975e-05, + "loss": 0.1735, + "step": 254 + }, + { + "epoch": 0.2, + "learning_rate": 1.860374607352208e-05, + "loss": 0.1605, + "step": 255 + }, + { + "epoch": 0.2, + "learning_rate": 1.859104210708604e-05, + "loss": 0.2098, + "step": 256 + }, + { + "epoch": 0.2, + "learning_rate": 1.8578284986317592e-05, + "loss": 0.1746, + "step": 257 + }, + { + "epoch": 0.2, + "learning_rate": 1.8565474790147352e-05, + "loss": 0.1743, + "step": 258 + }, + { + "epoch": 0.2, + "learning_rate": 1.855261159783432e-05, + "loss": 0.1649, + "step": 259 + }, + { + "epoch": 0.2, + "learning_rate": 1.8539695488965396e-05, + "loss": 0.2058, + "step": 260 + }, + { + "epoch": 0.2, + "learning_rate": 1.8526726543454883e-05, + "loss": 0.1575, + "step": 261 + }, + { + "epoch": 0.2, + "learning_rate": 1.8513704841543997e-05, + "loss": 0.1764, + "step": 262 + }, + { + "epoch": 0.2, + "learning_rate": 1.8500630463800364e-05, + "loss": 0.1185, + "step": 263 + }, + { + "epoch": 0.2, + "learning_rate": 1.8487503491117523e-05, + "loss": 0.2048, + "step": 264 + }, + { + "epoch": 0.2, + "learning_rate": 1.847432400471443e-05, + "loss": 0.1627, + "step": 265 + }, + { + "epoch": 0.2, + "learning_rate": 1.8461092086134953e-05, + "loss": 0.1704, + "step": 266 + }, + { + "epoch": 0.2, + "learning_rate": 1.8447807817247368e-05, + "loss": 0.1682, + "step": 267 + }, + { + "epoch": 0.21, + "learning_rate": 1.8434471280243854e-05, + "loss": 0.1464, + "step": 268 + }, + { + "epoch": 0.21, + "learning_rate": 1.8421082557639973e-05, + "loss": 0.1623, + "step": 269 + }, + { + "epoch": 0.21, + "learning_rate": 1.8407641732274174e-05, + "loss": 0.1785, + "step": 270 + }, + { + "epoch": 0.21, + "learning_rate": 1.8394148887307286e-05, + "loss": 0.1913, + "step": 271 + }, + { + "epoch": 0.21, + "learning_rate": 1.8380604106221973e-05, + "loss": 0.1482, + "step": 272 + }, + { + "epoch": 0.21, + "learning_rate": 1.8367007472822253e-05, + "loss": 0.1428, + "step": 273 + }, + { + "epoch": 0.21, + "learning_rate": 1.8353359071232954e-05, + "loss": 0.14, + "step": 274 + }, + { + "epoch": 0.21, + "learning_rate": 1.833965898589921e-05, + "loss": 0.1552, + "step": 275 + }, + { + "epoch": 0.21, + "learning_rate": 1.8325907301585924e-05, + "loss": 0.1795, + "step": 276 + }, + { + "epoch": 0.21, + "learning_rate": 1.8312104103377266e-05, + "loss": 0.1943, + "step": 277 + }, + { + "epoch": 0.21, + "learning_rate": 1.8298249476676107e-05, + "loss": 0.2051, + "step": 278 + }, + { + "epoch": 0.21, + "learning_rate": 1.828434350720354e-05, + "loss": 0.1447, + "step": 279 + }, + { + "epoch": 0.21, + "learning_rate": 1.827038628099831e-05, + "loss": 0.1593, + "step": 280 + }, + { + "epoch": 0.22, + "learning_rate": 1.8256377884416307e-05, + "loss": 0.2025, + "step": 281 + }, + { + "epoch": 0.22, + "learning_rate": 1.824231840413001e-05, + "loss": 0.1594, + "step": 282 + }, + { + "epoch": 0.22, + "learning_rate": 1.822820792712797e-05, + "loss": 0.2052, + "step": 283 + }, + { + "epoch": 0.22, + "learning_rate": 1.8214046540714268e-05, + "loss": 0.1827, + "step": 284 + }, + { + "epoch": 0.22, + "learning_rate": 1.8199834332507955e-05, + "loss": 0.1561, + "step": 285 + }, + { + "epoch": 0.22, + "learning_rate": 1.8185571390442542e-05, + "loss": 0.186, + "step": 286 + }, + { + "epoch": 0.22, + "learning_rate": 1.8171257802765436e-05, + "loss": 0.1912, + "step": 287 + }, + { + "epoch": 0.22, + "learning_rate": 1.8156893658037386e-05, + "loss": 0.1793, + "step": 288 + }, + { + "epoch": 0.22, + "learning_rate": 1.8142479045131956e-05, + "loss": 0.1619, + "step": 289 + }, + { + "epoch": 0.22, + "learning_rate": 1.812801405323497e-05, + "loss": 0.1592, + "step": 290 + }, + { + "epoch": 0.22, + "learning_rate": 1.811349877184395e-05, + "loss": 0.1606, + "step": 291 + }, + { + "epoch": 0.22, + "learning_rate": 1.809893329076757e-05, + "loss": 0.1774, + "step": 292 + }, + { + "epoch": 0.22, + "learning_rate": 1.808431770012509e-05, + "loss": 0.1843, + "step": 293 + }, + { + "epoch": 0.23, + "learning_rate": 1.8069652090345825e-05, + "loss": 0.1843, + "step": 294 + }, + { + "epoch": 0.23, + "learning_rate": 1.8054936552168548e-05, + "loss": 0.2021, + "step": 295 + }, + { + "epoch": 0.23, + "learning_rate": 1.8040171176640967e-05, + "loss": 0.1893, + "step": 296 + }, + { + "epoch": 0.23, + "learning_rate": 1.8025356055119122e-05, + "loss": 0.1358, + "step": 297 + }, + { + "epoch": 0.23, + "learning_rate": 1.801049127926686e-05, + "loss": 0.1947, + "step": 298 + }, + { + "epoch": 0.23, + "learning_rate": 1.7995576941055236e-05, + "loss": 0.1525, + "step": 299 + }, + { + "epoch": 0.23, + "learning_rate": 1.7980613132761956e-05, + "loss": 0.1676, + "step": 300 + }, + { + "epoch": 0.23, + "learning_rate": 1.7965599946970814e-05, + "loss": 0.1277, + "step": 301 + }, + { + "epoch": 0.23, + "learning_rate": 1.795053747657111e-05, + "loss": 0.152, + "step": 302 + }, + { + "epoch": 0.23, + "learning_rate": 1.793542581475707e-05, + "loss": 0.1774, + "step": 303 + }, + { + "epoch": 0.23, + "learning_rate": 1.7920265055027285e-05, + "loss": 0.1532, + "step": 304 + }, + { + "epoch": 0.23, + "learning_rate": 1.790505529118412e-05, + "loss": 0.1925, + "step": 305 + }, + { + "epoch": 0.23, + "learning_rate": 1.788979661733313e-05, + "loss": 0.1935, + "step": 306 + }, + { + "epoch": 0.24, + "learning_rate": 1.7874489127882496e-05, + "loss": 0.1649, + "step": 307 + }, + { + "epoch": 0.24, + "learning_rate": 1.7859132917542427e-05, + "loss": 0.1666, + "step": 308 + }, + { + "epoch": 0.24, + "learning_rate": 1.7843728081324575e-05, + "loss": 0.1657, + "step": 309 + }, + { + "epoch": 0.24, + "learning_rate": 1.7828274714541445e-05, + "loss": 0.1985, + "step": 310 + }, + { + "epoch": 0.24, + "learning_rate": 1.781277291280582e-05, + "loss": 0.1566, + "step": 311 + }, + { + "epoch": 0.24, + "learning_rate": 1.7797222772030155e-05, + "loss": 0.1545, + "step": 312 + }, + { + "epoch": 0.24, + "learning_rate": 1.7781624388425974e-05, + "loss": 0.1806, + "step": 313 + }, + { + "epoch": 0.24, + "learning_rate": 1.7765977858503314e-05, + "loss": 0.1703, + "step": 314 + }, + { + "epoch": 0.24, + "learning_rate": 1.7750283279070077e-05, + "loss": 0.1627, + "step": 315 + }, + { + "epoch": 0.24, + "learning_rate": 1.773454074723147e-05, + "loss": 0.188, + "step": 316 + }, + { + "epoch": 0.24, + "learning_rate": 1.771875036038939e-05, + "loss": 0.1873, + "step": 317 + }, + { + "epoch": 0.24, + "learning_rate": 1.770291221624181e-05, + "loss": 0.1747, + "step": 318 + }, + { + "epoch": 0.24, + "learning_rate": 1.76870264127822e-05, + "loss": 0.1897, + "step": 319 + }, + { + "epoch": 0.25, + "learning_rate": 1.7671093048298905e-05, + "loss": 0.1751, + "step": 320 + }, + { + "epoch": 0.25, + "learning_rate": 1.765511222137453e-05, + "loss": 0.1655, + "step": 321 + }, + { + "epoch": 0.25, + "learning_rate": 1.763908403088534e-05, + "loss": 0.1514, + "step": 322 + }, + { + "epoch": 0.25, + "learning_rate": 1.762300857600066e-05, + "loss": 0.1576, + "step": 323 + }, + { + "epoch": 0.25, + "learning_rate": 1.7606885956182228e-05, + "loss": 0.1656, + "step": 324 + }, + { + "epoch": 0.25, + "learning_rate": 1.759071627118362e-05, + "loss": 0.1844, + "step": 325 + }, + { + "epoch": 0.25, + "learning_rate": 1.75744996210496e-05, + "loss": 0.1579, + "step": 326 + }, + { + "epoch": 0.25, + "learning_rate": 1.7558236106115514e-05, + "loss": 0.1735, + "step": 327 + }, + { + "epoch": 0.25, + "learning_rate": 1.754192582700668e-05, + "loss": 0.159, + "step": 328 + }, + { + "epoch": 0.25, + "learning_rate": 1.752556888463774e-05, + "loss": 0.1721, + "step": 329 + }, + { + "epoch": 0.25, + "learning_rate": 1.750916538021206e-05, + "loss": 0.1626, + "step": 330 + }, + { + "epoch": 0.25, + "learning_rate": 1.7492715415221087e-05, + "loss": 0.1454, + "step": 331 + }, + { + "epoch": 0.25, + "learning_rate": 1.7476219091443738e-05, + "loss": 0.1846, + "step": 332 + }, + { + "epoch": 0.26, + "learning_rate": 1.745967651094575e-05, + "loss": 0.17, + "step": 333 + }, + { + "epoch": 0.26, + "learning_rate": 1.7443087776079068e-05, + "loss": 0.1989, + "step": 334 + }, + { + "epoch": 0.26, + "learning_rate": 1.7426452989481187e-05, + "loss": 0.1668, + "step": 335 + }, + { + "epoch": 0.26, + "learning_rate": 1.740977225407455e-05, + "loss": 0.15, + "step": 336 + }, + { + "epoch": 0.26, + "learning_rate": 1.739304567306588e-05, + "loss": 0.186, + "step": 337 + }, + { + "epoch": 0.26, + "learning_rate": 1.7376273349945568e-05, + "loss": 0.2104, + "step": 338 + }, + { + "epoch": 0.26, + "learning_rate": 1.7359455388487008e-05, + "loss": 0.1669, + "step": 339 + }, + { + "epoch": 0.26, + "learning_rate": 1.7342591892745978e-05, + "loss": 0.1796, + "step": 340 + }, + { + "epoch": 0.26, + "learning_rate": 1.732568296705997e-05, + "loss": 0.1844, + "step": 341 + }, + { + "epoch": 0.26, + "learning_rate": 1.7308728716047568e-05, + "loss": 0.2021, + "step": 342 + }, + { + "epoch": 0.26, + "learning_rate": 1.72917292446078e-05, + "loss": 0.1658, + "step": 343 + }, + { + "epoch": 0.26, + "learning_rate": 1.727468465791946e-05, + "loss": 0.178, + "step": 344 + }, + { + "epoch": 0.26, + "learning_rate": 1.7257595061440502e-05, + "loss": 0.1884, + "step": 345 + }, + { + "epoch": 0.27, + "learning_rate": 1.7240460560907345e-05, + "loss": 0.1221, + "step": 346 + }, + { + "epoch": 0.27, + "learning_rate": 1.7223281262334252e-05, + "loss": 0.1917, + "step": 347 + }, + { + "epoch": 0.27, + "learning_rate": 1.7206057272012647e-05, + "loss": 0.2018, + "step": 348 + }, + { + "epoch": 0.27, + "learning_rate": 1.7188788696510477e-05, + "loss": 0.1758, + "step": 349 + }, + { + "epoch": 0.27, + "learning_rate": 1.7171475642671546e-05, + "loss": 0.1558, + "step": 350 + }, + { + "epoch": 0.27, + "learning_rate": 1.7154118217614844e-05, + "loss": 0.1481, + "step": 351 + }, + { + "epoch": 0.27, + "learning_rate": 1.7136716528733916e-05, + "loss": 0.1893, + "step": 352 + }, + { + "epoch": 0.27, + "learning_rate": 1.7119270683696144e-05, + "loss": 0.1838, + "step": 353 + }, + { + "epoch": 0.27, + "learning_rate": 1.710178079044214e-05, + "loss": 0.1898, + "step": 354 + }, + { + "epoch": 0.27, + "learning_rate": 1.7084246957185036e-05, + "loss": 0.1707, + "step": 355 + }, + { + "epoch": 0.27, + "learning_rate": 1.706666929240983e-05, + "loss": 0.1949, + "step": 356 + }, + { + "epoch": 0.27, + "learning_rate": 1.7049047904872715e-05, + "loss": 0.1887, + "step": 357 + }, + { + "epoch": 0.27, + "learning_rate": 1.703138290360041e-05, + "loss": 0.1765, + "step": 358 + }, + { + "epoch": 0.28, + "learning_rate": 1.701367439788947e-05, + "loss": 0.1746, + "step": 359 + }, + { + "epoch": 0.28, + "learning_rate": 1.699592249730563e-05, + "loss": 0.1614, + "step": 360 + }, + { + "epoch": 0.28, + "learning_rate": 1.6978127311683103e-05, + "loss": 0.2043, + "step": 361 + }, + { + "epoch": 0.28, + "learning_rate": 1.6960288951123933e-05, + "loss": 0.1299, + "step": 362 + }, + { + "epoch": 0.28, + "learning_rate": 1.6942407525997277e-05, + "loss": 0.1686, + "step": 363 + }, + { + "epoch": 0.28, + "learning_rate": 1.6924483146938756e-05, + "loss": 0.1363, + "step": 364 + }, + { + "epoch": 0.28, + "learning_rate": 1.6906515924849745e-05, + "loss": 0.1557, + "step": 365 + }, + { + "epoch": 0.28, + "learning_rate": 1.6888505970896696e-05, + "loss": 0.1445, + "step": 366 + }, + { + "epoch": 0.28, + "learning_rate": 1.6870453396510456e-05, + "loss": 0.1707, + "step": 367 + }, + { + "epoch": 0.28, + "learning_rate": 1.6852358313385568e-05, + "loss": 0.1439, + "step": 368 + }, + { + "epoch": 0.28, + "learning_rate": 1.683422083347959e-05, + "loss": 0.1688, + "step": 369 + }, + { + "epoch": 0.28, + "learning_rate": 1.681604106901239e-05, + "loss": 0.1727, + "step": 370 + }, + { + "epoch": 0.28, + "learning_rate": 1.6797819132465462e-05, + "loss": 0.1892, + "step": 371 + }, + { + "epoch": 0.29, + "learning_rate": 1.6779555136581228e-05, + "loss": 0.1981, + "step": 372 + }, + { + "epoch": 0.29, + "learning_rate": 1.676124919436233e-05, + "loss": 0.1728, + "step": 373 + }, + { + "epoch": 0.29, + "learning_rate": 1.674290141907095e-05, + "loss": 0.179, + "step": 374 + }, + { + "epoch": 0.29, + "learning_rate": 1.6724511924228093e-05, + "loss": 0.1432, + "step": 375 + }, + { + "epoch": 0.29, + "learning_rate": 1.6706080823612897e-05, + "loss": 0.1774, + "step": 376 + }, + { + "epoch": 0.29, + "learning_rate": 1.668760823126191e-05, + "loss": 0.1309, + "step": 377 + }, + { + "epoch": 0.29, + "learning_rate": 1.6669094261468405e-05, + "loss": 0.1511, + "step": 378 + }, + { + "epoch": 0.29, + "learning_rate": 1.665053902878167e-05, + "loss": 0.1712, + "step": 379 + }, + { + "epoch": 0.29, + "learning_rate": 1.6631942648006284e-05, + "loss": 0.1736, + "step": 380 + }, + { + "epoch": 0.29, + "learning_rate": 1.661330523420142e-05, + "loss": 0.1523, + "step": 381 + }, + { + "epoch": 0.29, + "learning_rate": 1.659462690268013e-05, + "loss": 0.1563, + "step": 382 + }, + { + "epoch": 0.29, + "learning_rate": 1.6575907769008633e-05, + "loss": 0.1881, + "step": 383 + }, + { + "epoch": 0.29, + "learning_rate": 1.6557147949005597e-05, + "loss": 0.139, + "step": 384 + }, + { + "epoch": 0.3, + "learning_rate": 1.6538347558741424e-05, + "loss": 0.1664, + "step": 385 + }, + { + "epoch": 0.3, + "learning_rate": 1.651950671453753e-05, + "loss": 0.1446, + "step": 386 + }, + { + "epoch": 0.3, + "learning_rate": 1.650062553296563e-05, + "loss": 0.1598, + "step": 387 + }, + { + "epoch": 0.3, + "learning_rate": 1.6481704130847013e-05, + "loss": 0.1639, + "step": 388 + }, + { + "epoch": 0.3, + "learning_rate": 1.646274262525181e-05, + "loss": 0.1521, + "step": 389 + }, + { + "epoch": 0.3, + "learning_rate": 1.6443741133498297e-05, + "loss": 0.1823, + "step": 390 + }, + { + "epoch": 0.3, + "learning_rate": 1.642469977315214e-05, + "loss": 0.143, + "step": 391 + }, + { + "epoch": 0.3, + "learning_rate": 1.6405618662025678e-05, + "loss": 0.1797, + "step": 392 + }, + { + "epoch": 0.3, + "learning_rate": 1.6386497918177208e-05, + "loss": 0.1504, + "step": 393 + }, + { + "epoch": 0.3, + "learning_rate": 1.6367337659910223e-05, + "loss": 0.1612, + "step": 394 + }, + { + "epoch": 0.3, + "learning_rate": 1.6348138005772717e-05, + "loss": 0.1428, + "step": 395 + }, + { + "epoch": 0.3, + "learning_rate": 1.6328899074556415e-05, + "loss": 0.1461, + "step": 396 + }, + { + "epoch": 0.3, + "learning_rate": 1.6309620985296075e-05, + "loss": 0.1414, + "step": 397 + }, + { + "epoch": 0.31, + "learning_rate": 1.6290303857268724e-05, + "loss": 0.1598, + "step": 398 + }, + { + "epoch": 0.31, + "learning_rate": 1.6270947809992924e-05, + "loss": 0.1652, + "step": 399 + }, + { + "epoch": 0.31, + "learning_rate": 1.625155296322805e-05, + "loss": 0.1763, + "step": 400 + }, + { + "epoch": 0.31, + "learning_rate": 1.6232119436973525e-05, + "loss": 0.1559, + "step": 401 + }, + { + "epoch": 0.31, + "learning_rate": 1.6212647351468088e-05, + "loss": 0.158, + "step": 402 + }, + { + "epoch": 0.31, + "learning_rate": 1.6193136827189067e-05, + "loss": 0.1583, + "step": 403 + }, + { + "epoch": 0.31, + "learning_rate": 1.6173587984851596e-05, + "loss": 0.1823, + "step": 404 + }, + { + "epoch": 0.31, + "learning_rate": 1.6154000945407914e-05, + "loss": 0.165, + "step": 405 + }, + { + "epoch": 0.31, + "learning_rate": 1.6134375830046566e-05, + "loss": 0.1606, + "step": 406 + }, + { + "epoch": 0.31, + "learning_rate": 1.6114712760191697e-05, + "loss": 0.1712, + "step": 407 + }, + { + "epoch": 0.31, + "learning_rate": 1.6095011857502282e-05, + "loss": 0.2072, + "step": 408 + }, + { + "epoch": 0.31, + "learning_rate": 1.607527324387137e-05, + "loss": 0.1614, + "step": 409 + }, + { + "epoch": 0.31, + "learning_rate": 1.6055497041425333e-05, + "loss": 0.1514, + "step": 410 + }, + { + "epoch": 0.32, + "learning_rate": 1.603568337252312e-05, + "loss": 0.1722, + "step": 411 + }, + { + "epoch": 0.32, + "learning_rate": 1.6015832359755483e-05, + "loss": 0.1444, + "step": 412 + }, + { + "epoch": 0.32, + "learning_rate": 1.5995944125944235e-05, + "loss": 0.137, + "step": 413 + }, + { + "epoch": 0.32, + "learning_rate": 1.597601879414147e-05, + "loss": 0.1322, + "step": 414 + }, + { + "epoch": 0.32, + "learning_rate": 1.5956056487628832e-05, + "loss": 0.1699, + "step": 415 + }, + { + "epoch": 0.32, + "learning_rate": 1.5936057329916718e-05, + "loss": 0.1318, + "step": 416 + }, + { + "epoch": 0.32, + "learning_rate": 1.5916021444743535e-05, + "loss": 0.1461, + "step": 417 + }, + { + "epoch": 0.32, + "learning_rate": 1.5895948956074937e-05, + "loss": 0.1544, + "step": 418 + }, + { + "epoch": 0.32, + "learning_rate": 1.587583998810304e-05, + "loss": 0.2183, + "step": 419 + }, + { + "epoch": 0.32, + "learning_rate": 1.5855694665245665e-05, + "loss": 0.1257, + "step": 420 + }, + { + "epoch": 0.32, + "learning_rate": 1.5835513112145583e-05, + "loss": 0.1715, + "step": 421 + }, + { + "epoch": 0.32, + "learning_rate": 1.5815295453669702e-05, + "loss": 0.1283, + "step": 422 + }, + { + "epoch": 0.32, + "learning_rate": 1.579504181490834e-05, + "loss": 0.1766, + "step": 423 + }, + { + "epoch": 0.33, + "learning_rate": 1.5774752321174428e-05, + "loss": 0.1359, + "step": 424 + }, + { + "epoch": 0.33, + "learning_rate": 1.5754427098002735e-05, + "loss": 0.1571, + "step": 425 + }, + { + "epoch": 0.33, + "learning_rate": 1.573406627114909e-05, + "loss": 0.16, + "step": 426 + }, + { + "epoch": 0.33, + "learning_rate": 1.571366996658962e-05, + "loss": 0.199, + "step": 427 + }, + { + "epoch": 0.33, + "learning_rate": 1.5693238310519952e-05, + "loss": 0.1494, + "step": 428 + }, + { + "epoch": 0.33, + "learning_rate": 1.5672771429354436e-05, + "loss": 0.1779, + "step": 429 + }, + { + "epoch": 0.33, + "learning_rate": 1.5652269449725375e-05, + "loss": 0.1879, + "step": 430 + }, + { + "epoch": 0.33, + "learning_rate": 1.5631732498482222e-05, + "loss": 0.1899, + "step": 431 + }, + { + "epoch": 0.33, + "learning_rate": 1.561116070269082e-05, + "loss": 0.1733, + "step": 432 + }, + { + "epoch": 0.33, + "learning_rate": 1.5590554189632585e-05, + "loss": 0.132, + "step": 433 + }, + { + "epoch": 0.33, + "learning_rate": 1.5569913086803744e-05, + "loss": 0.1451, + "step": 434 + }, + { + "epoch": 0.33, + "learning_rate": 1.5549237521914537e-05, + "loss": 0.1439, + "step": 435 + }, + { + "epoch": 0.33, + "learning_rate": 1.552852762288843e-05, + "loss": 0.1458, + "step": 436 + }, + { + "epoch": 0.34, + "learning_rate": 1.5507783517861308e-05, + "loss": 0.1711, + "step": 437 + }, + { + "epoch": 0.34, + "learning_rate": 1.548700533518071e-05, + "loss": 0.1427, + "step": 438 + }, + { + "epoch": 0.34, + "learning_rate": 1.5466193203405017e-05, + "loss": 0.1492, + "step": 439 + }, + { + "epoch": 0.34, + "learning_rate": 1.5445347251302642e-05, + "loss": 0.1391, + "step": 440 + }, + { + "epoch": 0.34, + "learning_rate": 1.5424467607851274e-05, + "loss": 0.1373, + "step": 441 + }, + { + "epoch": 0.34, + "learning_rate": 1.540355440223704e-05, + "loss": 0.1694, + "step": 442 + }, + { + "epoch": 0.34, + "learning_rate": 1.5382607763853736e-05, + "loss": 0.1696, + "step": 443 + }, + { + "epoch": 0.34, + "learning_rate": 1.5361627822301997e-05, + "loss": 0.1806, + "step": 444 + }, + { + "epoch": 0.34, + "learning_rate": 1.534061470738852e-05, + "loss": 0.1771, + "step": 445 + }, + { + "epoch": 0.34, + "learning_rate": 1.5319568549125253e-05, + "loss": 0.1438, + "step": 446 + }, + { + "epoch": 0.34, + "learning_rate": 1.5298489477728587e-05, + "loss": 0.1574, + "step": 447 + }, + { + "epoch": 0.34, + "learning_rate": 1.527737762361855e-05, + "loss": 0.1293, + "step": 448 + }, + { + "epoch": 0.34, + "learning_rate": 1.5256233117418e-05, + "loss": 0.1812, + "step": 449 + }, + { + "epoch": 0.35, + "learning_rate": 1.523505608995183e-05, + "loss": 0.1493, + "step": 450 + }, + { + "epoch": 0.35, + "learning_rate": 1.5213846672246139e-05, + "loss": 0.1402, + "step": 451 + }, + { + "epoch": 0.35, + "learning_rate": 1.5192604995527433e-05, + "loss": 0.1629, + "step": 452 + }, + { + "epoch": 0.35, + "learning_rate": 1.5171331191221814e-05, + "loss": 0.1923, + "step": 453 + }, + { + "epoch": 0.35, + "learning_rate": 1.5150025390954153e-05, + "loss": 0.1578, + "step": 454 + }, + { + "epoch": 0.35, + "learning_rate": 1.5128687726547297e-05, + "loss": 0.1849, + "step": 455 + }, + { + "epoch": 0.35, + "learning_rate": 1.5107318330021237e-05, + "loss": 0.1554, + "step": 456 + }, + { + "epoch": 0.35, + "learning_rate": 1.5085917333592299e-05, + "loss": 0.1608, + "step": 457 + }, + { + "epoch": 0.35, + "learning_rate": 1.5064484869672318e-05, + "loss": 0.1281, + "step": 458 + }, + { + "epoch": 0.35, + "learning_rate": 1.5043021070867825e-05, + "loss": 0.2036, + "step": 459 + }, + { + "epoch": 0.35, + "learning_rate": 1.5021526069979232e-05, + "loss": 0.1752, + "step": 460 + }, + { + "epoch": 0.35, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.1619, + "step": 461 + }, + { + "epoch": 0.35, + "learning_rate": 1.4978442994115821e-05, + "loss": 0.158, + "step": 462 + }, + { + "epoch": 0.36, + "learning_rate": 1.4956855185703787e-05, + "loss": 0.1711, + "step": 463 + }, + { + "epoch": 0.36, + "learning_rate": 1.4935236708331582e-05, + "loss": 0.174, + "step": 464 + }, + { + "epoch": 0.36, + "learning_rate": 1.4913587695756632e-05, + "loss": 0.1359, + "step": 465 + }, + { + "epoch": 0.36, + "learning_rate": 1.48919082819253e-05, + "loss": 0.1531, + "step": 466 + }, + { + "epoch": 0.36, + "learning_rate": 1.4870198600972043e-05, + "loss": 0.1546, + "step": 467 + }, + { + "epoch": 0.36, + "learning_rate": 1.4848458787218583e-05, + "loss": 0.1509, + "step": 468 + }, + { + "epoch": 0.36, + "learning_rate": 1.4826688975173085e-05, + "loss": 0.1888, + "step": 469 + }, + { + "epoch": 0.36, + "learning_rate": 1.480488929952932e-05, + "loss": 0.1388, + "step": 470 + }, + { + "epoch": 0.36, + "learning_rate": 1.4783059895165818e-05, + "loss": 0.1703, + "step": 471 + }, + { + "epoch": 0.36, + "learning_rate": 1.4761200897145063e-05, + "loss": 0.175, + "step": 472 + }, + { + "epoch": 0.36, + "learning_rate": 1.4739312440712627e-05, + "loss": 0.1566, + "step": 473 + }, + { + "epoch": 0.36, + "learning_rate": 1.4717394661296353e-05, + "loss": 0.1714, + "step": 474 + }, + { + "epoch": 0.36, + "learning_rate": 1.4695447694505512e-05, + "loss": 0.2009, + "step": 475 + }, + { + "epoch": 0.37, + "learning_rate": 1.467347167612995e-05, + "loss": 0.1402, + "step": 476 + }, + { + "epoch": 0.37, + "learning_rate": 1.4651466742139284e-05, + "loss": 0.1637, + "step": 477 + }, + { + "epoch": 0.37, + "learning_rate": 1.4629433028682014e-05, + "loss": 0.1657, + "step": 478 + }, + { + "epoch": 0.37, + "learning_rate": 1.4607370672084717e-05, + "loss": 0.1577, + "step": 479 + }, + { + "epoch": 0.37, + "learning_rate": 1.458527980885118e-05, + "loss": 0.1331, + "step": 480 + }, + { + "epoch": 0.37, + "learning_rate": 1.456316057566158e-05, + "loss": 0.1654, + "step": 481 + }, + { + "epoch": 0.37, + "learning_rate": 1.454101310937161e-05, + "loss": 0.1318, + "step": 482 + }, + { + "epoch": 0.37, + "learning_rate": 1.4518837547011657e-05, + "loss": 0.1051, + "step": 483 + }, + { + "epoch": 0.37, + "learning_rate": 1.4496634025785938e-05, + "loss": 0.1705, + "step": 484 + }, + { + "epoch": 0.37, + "learning_rate": 1.4474402683071659e-05, + "loss": 0.1568, + "step": 485 + }, + { + "epoch": 0.37, + "learning_rate": 1.4452143656418157e-05, + "loss": 0.1725, + "step": 486 + }, + { + "epoch": 0.37, + "learning_rate": 1.4429857083546054e-05, + "loss": 0.1793, + "step": 487 + }, + { + "epoch": 0.37, + "learning_rate": 1.4407543102346423e-05, + "loss": 0.1368, + "step": 488 + }, + { + "epoch": 0.38, + "learning_rate": 1.4385201850879895e-05, + "loss": 0.199, + "step": 489 + }, + { + "epoch": 0.38, + "learning_rate": 1.4362833467375839e-05, + "loss": 0.1596, + "step": 490 + }, + { + "epoch": 0.38, + "learning_rate": 1.4340438090231492e-05, + "loss": 0.1677, + "step": 491 + }, + { + "epoch": 0.38, + "learning_rate": 1.4318015858011108e-05, + "loss": 0.1423, + "step": 492 + }, + { + "epoch": 0.38, + "learning_rate": 1.429556690944509e-05, + "loss": 0.1628, + "step": 493 + }, + { + "epoch": 0.38, + "learning_rate": 1.427309138342915e-05, + "loss": 0.1359, + "step": 494 + }, + { + "epoch": 0.38, + "learning_rate": 1.4250589419023435e-05, + "loss": 0.1379, + "step": 495 + }, + { + "epoch": 0.38, + "learning_rate": 1.4228061155451671e-05, + "loss": 0.1494, + "step": 496 + }, + { + "epoch": 0.38, + "learning_rate": 1.4205506732100303e-05, + "loss": 0.188, + "step": 497 + }, + { + "epoch": 0.38, + "learning_rate": 1.4182926288517628e-05, + "loss": 0.1556, + "step": 498 + }, + { + "epoch": 0.38, + "learning_rate": 1.4160319964412943e-05, + "loss": 0.1462, + "step": 499 + }, + { + "epoch": 0.38, + "learning_rate": 1.4137687899655662e-05, + "loss": 0.1661, + "step": 500 + }, + { + "epoch": 0.38, + "learning_rate": 1.4115030234274461e-05, + "loss": 0.1377, + "step": 501 + }, + { + "epoch": 0.39, + "learning_rate": 1.4092347108456425e-05, + "loss": 0.1414, + "step": 502 + }, + { + "epoch": 0.39, + "learning_rate": 1.4069638662546158e-05, + "loss": 0.1763, + "step": 503 + }, + { + "epoch": 0.39, + "learning_rate": 1.404690503704492e-05, + "loss": 0.1616, + "step": 504 + }, + { + "epoch": 0.39, + "learning_rate": 1.402414637260977e-05, + "loss": 0.1792, + "step": 505 + }, + { + "epoch": 0.39, + "learning_rate": 1.4001362810052684e-05, + "loss": 0.1776, + "step": 506 + }, + { + "epoch": 0.39, + "learning_rate": 1.3978554490339687e-05, + "loss": 0.1617, + "step": 507 + }, + { + "epoch": 0.39, + "learning_rate": 1.3955721554589979e-05, + "loss": 0.1276, + "step": 508 + }, + { + "epoch": 0.39, + "learning_rate": 1.3932864144075064e-05, + "loss": 0.1528, + "step": 509 + }, + { + "epoch": 0.39, + "learning_rate": 1.3909982400217885e-05, + "loss": 0.1324, + "step": 510 + }, + { + "epoch": 0.39, + "learning_rate": 1.388707646459193e-05, + "loss": 0.1289, + "step": 511 + }, + { + "epoch": 0.39, + "learning_rate": 1.3864146478920366e-05, + "loss": 0.1123, + "step": 512 + }, + { + "epoch": 0.39, + "learning_rate": 1.3841192585075164e-05, + "loss": 0.1663, + "step": 513 + }, + { + "epoch": 0.39, + "learning_rate": 1.3818214925076226e-05, + "loss": 0.1562, + "step": 514 + }, + { + "epoch": 0.4, + "learning_rate": 1.3795213641090483e-05, + "loss": 0.1518, + "step": 515 + }, + { + "epoch": 0.4, + "learning_rate": 1.3772188875431054e-05, + "loss": 0.1689, + "step": 516 + }, + { + "epoch": 0.4, + "learning_rate": 1.3749140770556322e-05, + "loss": 0.1257, + "step": 517 + }, + { + "epoch": 0.4, + "learning_rate": 1.3726069469069083e-05, + "loss": 0.1672, + "step": 518 + }, + { + "epoch": 0.4, + "learning_rate": 1.3702975113715661e-05, + "loss": 0.1568, + "step": 519 + }, + { + "epoch": 0.4, + "learning_rate": 1.367985784738501e-05, + "loss": 0.1649, + "step": 520 + }, + { + "epoch": 0.4, + "learning_rate": 1.365671781310784e-05, + "loss": 0.1476, + "step": 521 + }, + { + "epoch": 0.4, + "learning_rate": 1.3633555154055733e-05, + "loss": 0.16, + "step": 522 + }, + { + "epoch": 0.4, + "learning_rate": 1.361037001354025e-05, + "loss": 0.1575, + "step": 523 + }, + { + "epoch": 0.4, + "learning_rate": 1.3587162535012051e-05, + "loss": 0.1755, + "step": 524 + }, + { + "epoch": 0.4, + "learning_rate": 1.356393286206001e-05, + "loss": 0.1418, + "step": 525 + }, + { + "epoch": 0.4, + "learning_rate": 1.3540681138410317e-05, + "loss": 0.1648, + "step": 526 + }, + { + "epoch": 0.4, + "learning_rate": 1.3517407507925593e-05, + "loss": 0.1678, + "step": 527 + }, + { + "epoch": 0.41, + "learning_rate": 1.3494112114604001e-05, + "loss": 0.1302, + "step": 528 + }, + { + "epoch": 0.41, + "learning_rate": 1.3470795102578358e-05, + "loss": 0.1494, + "step": 529 + }, + { + "epoch": 0.41, + "learning_rate": 1.3447456616115238e-05, + "loss": 0.15, + "step": 530 + }, + { + "epoch": 0.41, + "learning_rate": 1.342409679961408e-05, + "loss": 0.1729, + "step": 531 + }, + { + "epoch": 0.41, + "learning_rate": 1.3400715797606293e-05, + "loss": 0.1377, + "step": 532 + }, + { + "epoch": 0.41, + "learning_rate": 1.3377313754754373e-05, + "loss": 0.1398, + "step": 533 + }, + { + "epoch": 0.41, + "learning_rate": 1.3353890815850993e-05, + "loss": 0.1501, + "step": 534 + }, + { + "epoch": 0.41, + "learning_rate": 1.3330447125818115e-05, + "loss": 0.1294, + "step": 535 + }, + { + "epoch": 0.41, + "learning_rate": 1.3306982829706093e-05, + "loss": 0.1425, + "step": 536 + }, + { + "epoch": 0.41, + "learning_rate": 1.3283498072692771e-05, + "loss": 0.1606, + "step": 537 + }, + { + "epoch": 0.41, + "learning_rate": 1.3259993000082599e-05, + "loss": 0.1425, + "step": 538 + }, + { + "epoch": 0.41, + "learning_rate": 1.3236467757305704e-05, + "loss": 0.1227, + "step": 539 + }, + { + "epoch": 0.41, + "learning_rate": 1.321292248991703e-05, + "loss": 0.1601, + "step": 540 + }, + { + "epoch": 0.42, + "learning_rate": 1.3189357343595405e-05, + "loss": 0.1459, + "step": 541 + }, + { + "epoch": 0.42, + "learning_rate": 1.3165772464142654e-05, + "loss": 0.1351, + "step": 542 + }, + { + "epoch": 0.42, + "learning_rate": 1.3142167997482693e-05, + "loss": 0.1617, + "step": 543 + }, + { + "epoch": 0.42, + "learning_rate": 1.3118544089660635e-05, + "loss": 0.1509, + "step": 544 + }, + { + "epoch": 0.42, + "learning_rate": 1.3094900886841867e-05, + "loss": 0.1467, + "step": 545 + }, + { + "epoch": 0.42, + "learning_rate": 1.3071238535311164e-05, + "loss": 0.1607, + "step": 546 + }, + { + "epoch": 0.42, + "learning_rate": 1.3047557181471784e-05, + "loss": 0.1551, + "step": 547 + }, + { + "epoch": 0.42, + "learning_rate": 1.3023856971844543e-05, + "loss": 0.1519, + "step": 548 + }, + { + "epoch": 0.42, + "learning_rate": 1.3000138053066931e-05, + "loss": 0.1434, + "step": 549 + }, + { + "epoch": 0.42, + "learning_rate": 1.2976400571892189e-05, + "loss": 0.1422, + "step": 550 + }, + { + "epoch": 0.42, + "learning_rate": 1.2952644675188413e-05, + "loss": 0.1603, + "step": 551 + }, + { + "epoch": 0.42, + "learning_rate": 1.2928870509937633e-05, + "loss": 0.1939, + "step": 552 + }, + { + "epoch": 0.42, + "learning_rate": 1.2905078223234907e-05, + "loss": 0.1395, + "step": 553 + }, + { + "epoch": 0.43, + "learning_rate": 1.288126796228743e-05, + "loss": 0.1453, + "step": 554 + }, + { + "epoch": 0.43, + "learning_rate": 1.2857439874413589e-05, + "loss": 0.1609, + "step": 555 + }, + { + "epoch": 0.43, + "learning_rate": 1.2833594107042078e-05, + "loss": 0.1581, + "step": 556 + }, + { + "epoch": 0.43, + "learning_rate": 1.2809730807710973e-05, + "loss": 0.1776, + "step": 557 + }, + { + "epoch": 0.43, + "learning_rate": 1.2785850124066829e-05, + "loss": 0.1639, + "step": 558 + }, + { + "epoch": 0.43, + "learning_rate": 1.2761952203863759e-05, + "loss": 0.1727, + "step": 559 + }, + { + "epoch": 0.43, + "learning_rate": 1.2738037194962516e-05, + "loss": 0.1725, + "step": 560 + }, + { + "epoch": 0.43, + "learning_rate": 1.2714105245329591e-05, + "loss": 0.1625, + "step": 561 + }, + { + "epoch": 0.43, + "learning_rate": 1.2690156503036288e-05, + "loss": 0.1178, + "step": 562 + }, + { + "epoch": 0.43, + "learning_rate": 1.266619111625781e-05, + "loss": 0.1387, + "step": 563 + }, + { + "epoch": 0.43, + "learning_rate": 1.264220923327234e-05, + "loss": 0.1333, + "step": 564 + }, + { + "epoch": 0.43, + "learning_rate": 1.2618211002460135e-05, + "loss": 0.1608, + "step": 565 + }, + { + "epoch": 0.43, + "learning_rate": 1.2594196572302586e-05, + "loss": 0.1705, + "step": 566 + }, + { + "epoch": 0.44, + "learning_rate": 1.257016609138132e-05, + "loss": 0.1398, + "step": 567 + }, + { + "epoch": 0.44, + "learning_rate": 1.2546119708377273e-05, + "loss": 0.1556, + "step": 568 + }, + { + "epoch": 0.44, + "learning_rate": 1.2522057572069772e-05, + "loss": 0.1579, + "step": 569 + }, + { + "epoch": 0.44, + "learning_rate": 1.2497979831335604e-05, + "loss": 0.1472, + "step": 570 + }, + { + "epoch": 0.44, + "learning_rate": 1.2473886635148109e-05, + "loss": 0.1385, + "step": 571 + }, + { + "epoch": 0.44, + "learning_rate": 1.2449778132576256e-05, + "loss": 0.1447, + "step": 572 + }, + { + "epoch": 0.44, + "learning_rate": 1.2425654472783712e-05, + "loss": 0.1469, + "step": 573 + }, + { + "epoch": 0.44, + "learning_rate": 1.2401515805027924e-05, + "loss": 0.1749, + "step": 574 + }, + { + "epoch": 0.44, + "learning_rate": 1.2377362278659202e-05, + "loss": 0.1406, + "step": 575 + }, + { + "epoch": 0.44, + "learning_rate": 1.2353194043119783e-05, + "loss": 0.1743, + "step": 576 + }, + { + "epoch": 0.44, + "learning_rate": 1.2329011247942915e-05, + "loss": 0.1695, + "step": 577 + }, + { + "epoch": 0.44, + "learning_rate": 1.2304814042751927e-05, + "loss": 0.1807, + "step": 578 + }, + { + "epoch": 0.44, + "learning_rate": 1.2280602577259312e-05, + "loss": 0.1356, + "step": 579 + }, + { + "epoch": 0.45, + "learning_rate": 1.2256377001265785e-05, + "loss": 0.1706, + "step": 580 + }, + { + "epoch": 0.45, + "learning_rate": 1.2232137464659368e-05, + "loss": 0.1539, + "step": 581 + }, + { + "epoch": 0.45, + "learning_rate": 1.2207884117414465e-05, + "loss": 0.1608, + "step": 582 + }, + { + "epoch": 0.45, + "learning_rate": 1.2183617109590925e-05, + "loss": 0.1359, + "step": 583 + }, + { + "epoch": 0.45, + "learning_rate": 1.215933659133311e-05, + "loss": 0.142, + "step": 584 + }, + { + "epoch": 0.45, + "learning_rate": 1.213504271286899e-05, + "loss": 0.1407, + "step": 585 + }, + { + "epoch": 0.45, + "learning_rate": 1.2110735624509184e-05, + "loss": 0.175, + "step": 586 + }, + { + "epoch": 0.45, + "learning_rate": 1.2086415476646047e-05, + "loss": 0.1622, + "step": 587 + }, + { + "epoch": 0.45, + "learning_rate": 1.2062082419752737e-05, + "loss": 0.1487, + "step": 588 + }, + { + "epoch": 0.45, + "learning_rate": 1.2037736604382279e-05, + "loss": 0.1431, + "step": 589 + }, + { + "epoch": 0.45, + "learning_rate": 1.201337818116664e-05, + "loss": 0.1998, + "step": 590 + }, + { + "epoch": 0.45, + "learning_rate": 1.1989007300815793e-05, + "loss": 0.1755, + "step": 591 + }, + { + "epoch": 0.45, + "learning_rate": 1.1964624114116784e-05, + "loss": 0.1401, + "step": 592 + }, + { + "epoch": 0.46, + "learning_rate": 1.1940228771932808e-05, + "loss": 0.1581, + "step": 593 + }, + { + "epoch": 0.46, + "learning_rate": 1.1915821425202258e-05, + "loss": 0.1665, + "step": 594 + }, + { + "epoch": 0.46, + "learning_rate": 1.1891402224937805e-05, + "loss": 0.1521, + "step": 595 + }, + { + "epoch": 0.46, + "learning_rate": 1.1866971322225462e-05, + "loss": 0.149, + "step": 596 + }, + { + "epoch": 0.46, + "learning_rate": 1.1842528868223645e-05, + "loss": 0.1561, + "step": 597 + }, + { + "epoch": 0.46, + "learning_rate": 1.1818075014162243e-05, + "loss": 0.1748, + "step": 598 + }, + { + "epoch": 0.46, + "learning_rate": 1.1793609911341672e-05, + "loss": 0.1284, + "step": 599 + }, + { + "epoch": 0.46, + "learning_rate": 1.176913371113196e-05, + "loss": 0.16, + "step": 600 + }, + { + "epoch": 0.46, + "learning_rate": 1.1744646564971777e-05, + "loss": 0.15, + "step": 601 + }, + { + "epoch": 0.46, + "learning_rate": 1.1720148624367533e-05, + "loss": 0.1507, + "step": 602 + }, + { + "epoch": 0.46, + "learning_rate": 1.1695640040892424e-05, + "loss": 0.1599, + "step": 603 + }, + { + "epoch": 0.46, + "learning_rate": 1.1671120966185486e-05, + "loss": 0.1293, + "step": 604 + }, + { + "epoch": 0.46, + "learning_rate": 1.1646591551950677e-05, + "loss": 0.1552, + "step": 605 + }, + { + "epoch": 0.47, + "learning_rate": 1.1622051949955915e-05, + "loss": 0.1371, + "step": 606 + }, + { + "epoch": 0.47, + "learning_rate": 1.159750231203217e-05, + "loss": 0.1745, + "step": 607 + }, + { + "epoch": 0.47, + "learning_rate": 1.1572942790072486e-05, + "loss": 0.1589, + "step": 608 + }, + { + "epoch": 0.47, + "learning_rate": 1.154837353603107e-05, + "loss": 0.1422, + "step": 609 + }, + { + "epoch": 0.47, + "learning_rate": 1.1523794701922351e-05, + "loss": 0.1644, + "step": 610 + }, + { + "epoch": 0.47, + "learning_rate": 1.149920643982002e-05, + "loss": 0.1649, + "step": 611 + }, + { + "epoch": 0.47, + "learning_rate": 1.1474608901856103e-05, + "loss": 0.1237, + "step": 612 + }, + { + "epoch": 0.47, + "learning_rate": 1.145000224022002e-05, + "loss": 0.1696, + "step": 613 + }, + { + "epoch": 0.47, + "learning_rate": 1.1425386607157642e-05, + "loss": 0.1442, + "step": 614 + }, + { + "epoch": 0.47, + "learning_rate": 1.1400762154970346e-05, + "loss": 0.1315, + "step": 615 + }, + { + "epoch": 0.47, + "learning_rate": 1.1376129036014073e-05, + "loss": 0.1257, + "step": 616 + }, + { + "epoch": 0.47, + "learning_rate": 1.1351487402698388e-05, + "loss": 0.1306, + "step": 617 + }, + { + "epoch": 0.47, + "learning_rate": 1.1326837407485537e-05, + "loss": 0.1968, + "step": 618 + }, + { + "epoch": 0.48, + "learning_rate": 1.1302179202889505e-05, + "loss": 0.1537, + "step": 619 + }, + { + "epoch": 0.48, + "learning_rate": 1.1277512941475059e-05, + "loss": 0.1595, + "step": 620 + }, + { + "epoch": 0.48, + "learning_rate": 1.1252838775856829e-05, + "loss": 0.1656, + "step": 621 + }, + { + "epoch": 0.48, + "learning_rate": 1.1228156858698344e-05, + "loss": 0.1618, + "step": 622 + }, + { + "epoch": 0.48, + "learning_rate": 1.1203467342711087e-05, + "loss": 0.1572, + "step": 623 + }, + { + "epoch": 0.48, + "learning_rate": 1.1178770380653567e-05, + "loss": 0.1497, + "step": 624 + }, + { + "epoch": 0.48, + "learning_rate": 1.1154066125330358e-05, + "loss": 0.1502, + "step": 625 + }, + { + "epoch": 0.48, + "learning_rate": 1.1129354729591158e-05, + "loss": 0.1497, + "step": 626 + }, + { + "epoch": 0.48, + "learning_rate": 1.1104636346329838e-05, + "loss": 0.1498, + "step": 627 + }, + { + "epoch": 0.48, + "learning_rate": 1.107991112848352e-05, + "loss": 0.1829, + "step": 628 + }, + { + "epoch": 0.48, + "learning_rate": 1.1055179229031598e-05, + "loss": 0.1269, + "step": 629 + }, + { + "epoch": 0.48, + "learning_rate": 1.1030440800994806e-05, + "loss": 0.177, + "step": 630 + }, + { + "epoch": 0.48, + "learning_rate": 1.100569599743428e-05, + "loss": 0.1429, + "step": 631 + }, + { + "epoch": 0.49, + "learning_rate": 1.09809449714506e-05, + "loss": 0.1397, + "step": 632 + }, + { + "epoch": 0.49, + "learning_rate": 1.0956187876182837e-05, + "loss": 0.1304, + "step": 633 + }, + { + "epoch": 0.49, + "learning_rate": 1.0931424864807624e-05, + "loss": 0.1517, + "step": 634 + }, + { + "epoch": 0.49, + "learning_rate": 1.0906656090538196e-05, + "loss": 0.1763, + "step": 635 + }, + { + "epoch": 0.49, + "learning_rate": 1.0881881706623443e-05, + "loss": 0.1377, + "step": 636 + }, + { + "epoch": 0.49, + "learning_rate": 1.0857101866346953e-05, + "loss": 0.1616, + "step": 637 + }, + { + "epoch": 0.49, + "learning_rate": 1.0832316723026092e-05, + "loss": 0.1833, + "step": 638 + }, + { + "epoch": 0.49, + "learning_rate": 1.0807526430011028e-05, + "loss": 0.1581, + "step": 639 + }, + { + "epoch": 0.49, + "learning_rate": 1.0782731140683786e-05, + "loss": 0.1369, + "step": 640 + }, + { + "epoch": 0.49, + "learning_rate": 1.0757931008457308e-05, + "loss": 0.1349, + "step": 641 + }, + { + "epoch": 0.49, + "learning_rate": 1.0733126186774504e-05, + "loss": 0.1469, + "step": 642 + }, + { + "epoch": 0.49, + "learning_rate": 1.0708316829107295e-05, + "loss": 0.1632, + "step": 643 + }, + { + "epoch": 0.49, + "learning_rate": 1.0683503088955663e-05, + "loss": 0.1455, + "step": 644 + }, + { + "epoch": 0.5, + "learning_rate": 1.0658685119846713e-05, + "loss": 0.178, + "step": 645 + }, + { + "epoch": 0.5, + "learning_rate": 1.0633863075333713e-05, + "loss": 0.1727, + "step": 646 + }, + { + "epoch": 0.5, + "learning_rate": 1.0609037108995143e-05, + "loss": 0.1544, + "step": 647 + }, + { + "epoch": 0.5, + "learning_rate": 1.058420737443375e-05, + "loss": 0.1403, + "step": 648 + }, + { + "epoch": 0.5, + "learning_rate": 1.0559374025275597e-05, + "loss": 0.1364, + "step": 649 + }, + { + "epoch": 0.5, + "learning_rate": 1.0534537215169106e-05, + "loss": 0.1556, + "step": 650 + }, + { + "epoch": 0.5, + "learning_rate": 1.0509697097784122e-05, + "loss": 0.1201, + "step": 651 + }, + { + "epoch": 0.5, + "learning_rate": 1.0484853826810942e-05, + "loss": 0.1265, + "step": 652 + }, + { + "epoch": 0.5, + "learning_rate": 1.0460007555959384e-05, + "loss": 0.1558, + "step": 653 + }, + { + "epoch": 0.5, + "learning_rate": 1.043515843895782e-05, + "loss": 0.1304, + "step": 654 + }, + { + "epoch": 0.5, + "learning_rate": 1.0410306629552231e-05, + "loss": 0.1529, + "step": 655 + }, + { + "epoch": 0.5, + "learning_rate": 1.0385452281505269e-05, + "loss": 0.149, + "step": 656 + }, + { + "epoch": 0.5, + "learning_rate": 1.0360595548595274e-05, + "loss": 0.14, + "step": 657 + }, + { + "epoch": 0.5, + "learning_rate": 1.0335736584615357e-05, + "loss": 0.1418, + "step": 658 + }, + { + "epoch": 0.51, + "learning_rate": 1.0310875543372425e-05, + "loss": 0.1474, + "step": 659 + }, + { + "epoch": 0.51, + "learning_rate": 1.0286012578686244e-05, + "loss": 0.2043, + "step": 660 + }, + { + "epoch": 0.51, + "learning_rate": 1.0261147844388472e-05, + "loss": 0.1749, + "step": 661 + }, + { + "epoch": 0.51, + "learning_rate": 1.023628149432172e-05, + "loss": 0.1605, + "step": 662 + }, + { + "epoch": 0.51, + "learning_rate": 1.0211413682338603e-05, + "loss": 0.1263, + "step": 663 + }, + { + "epoch": 0.51, + "learning_rate": 1.0186544562300766e-05, + "loss": 0.129, + "step": 664 + }, + { + "epoch": 0.51, + "learning_rate": 1.0161674288077959e-05, + "loss": 0.1573, + "step": 665 + }, + { + "epoch": 0.51, + "learning_rate": 1.0136803013547071e-05, + "loss": 0.158, + "step": 666 + }, + { + "epoch": 0.51, + "learning_rate": 1.011193089259118e-05, + "loss": 0.1378, + "step": 667 + }, + { + "epoch": 0.51, + "learning_rate": 1.0087058079098595e-05, + "loss": 0.1514, + "step": 668 + }, + { + "epoch": 0.51, + "learning_rate": 1.0062184726961918e-05, + "loss": 0.1923, + "step": 669 + }, + { + "epoch": 0.51, + "learning_rate": 1.0037310990077083e-05, + "loss": 0.1526, + "step": 670 + }, + { + "epoch": 0.51, + "learning_rate": 1.0012437022342399e-05, + "loss": 0.1386, + "step": 671 + }, + { + "epoch": 0.52, + "learning_rate": 9.987562977657605e-06, + "loss": 0.1846, + "step": 672 + }, + { + "epoch": 0.52, + "learning_rate": 9.962689009922918e-06, + "loss": 0.1565, + "step": 673 + }, + { + "epoch": 0.52, + "learning_rate": 9.937815273038082e-06, + "loss": 0.1765, + "step": 674 + }, + { + "epoch": 0.52, + "learning_rate": 9.912941920901408e-06, + "loss": 0.1717, + "step": 675 + }, + { + "epoch": 0.52, + "learning_rate": 9.888069107408824e-06, + "loss": 0.1339, + "step": 676 + }, + { + "epoch": 0.52, + "learning_rate": 9.86319698645293e-06, + "loss": 0.1597, + "step": 677 + }, + { + "epoch": 0.52, + "learning_rate": 9.838325711922045e-06, + "loss": 0.1493, + "step": 678 + }, + { + "epoch": 0.52, + "learning_rate": 9.813455437699238e-06, + "loss": 0.1155, + "step": 679 + }, + { + "epoch": 0.52, + "learning_rate": 9.788586317661404e-06, + "loss": 0.1222, + "step": 680 + }, + { + "epoch": 0.52, + "learning_rate": 9.763718505678282e-06, + "loss": 0.162, + "step": 681 + }, + { + "epoch": 0.52, + "learning_rate": 9.738852155611531e-06, + "loss": 0.1505, + "step": 682 + }, + { + "epoch": 0.52, + "learning_rate": 9.71398742131376e-06, + "loss": 0.1562, + "step": 683 + }, + { + "epoch": 0.52, + "learning_rate": 9.689124456627577e-06, + "loss": 0.1824, + "step": 684 + }, + { + "epoch": 0.53, + "learning_rate": 9.664263415384644e-06, + "loss": 0.1526, + "step": 685 + }, + { + "epoch": 0.53, + "learning_rate": 9.639404451404729e-06, + "loss": 0.1288, + "step": 686 + }, + { + "epoch": 0.53, + "learning_rate": 9.614547718494736e-06, + "loss": 0.1289, + "step": 687 + }, + { + "epoch": 0.53, + "learning_rate": 9.589693370447769e-06, + "loss": 0.149, + "step": 688 + }, + { + "epoch": 0.53, + "learning_rate": 9.564841561042187e-06, + "loss": 0.1466, + "step": 689 + }, + { + "epoch": 0.53, + "learning_rate": 9.53999244404062e-06, + "loss": 0.1608, + "step": 690 + }, + { + "epoch": 0.53, + "learning_rate": 9.515146173189058e-06, + "loss": 0.1835, + "step": 691 + }, + { + "epoch": 0.53, + "learning_rate": 9.490302902215882e-06, + "loss": 0.1229, + "step": 692 + }, + { + "epoch": 0.53, + "learning_rate": 9.465462784830895e-06, + "loss": 0.1499, + "step": 693 + }, + { + "epoch": 0.53, + "learning_rate": 9.440625974724408e-06, + "loss": 0.1361, + "step": 694 + }, + { + "epoch": 0.53, + "learning_rate": 9.415792625566253e-06, + "loss": 0.1902, + "step": 695 + }, + { + "epoch": 0.53, + "learning_rate": 9.390962891004858e-06, + "loss": 0.1227, + "step": 696 + }, + { + "epoch": 0.53, + "learning_rate": 9.366136924666289e-06, + "loss": 0.1444, + "step": 697 + }, + { + "epoch": 0.54, + "learning_rate": 9.34131488015329e-06, + "loss": 0.1432, + "step": 698 + }, + { + "epoch": 0.54, + "learning_rate": 9.31649691104434e-06, + "loss": 0.1489, + "step": 699 + }, + { + "epoch": 0.54, + "learning_rate": 9.291683170892712e-06, + "loss": 0.1693, + "step": 700 + }, + { + "epoch": 0.54, + "learning_rate": 9.266873813225498e-06, + "loss": 0.17, + "step": 701 + }, + { + "epoch": 0.54, + "learning_rate": 9.242068991542694e-06, + "loss": 0.14, + "step": 702 + }, + { + "epoch": 0.54, + "learning_rate": 9.217268859316219e-06, + "loss": 0.1736, + "step": 703 + }, + { + "epoch": 0.54, + "learning_rate": 9.192473569988975e-06, + "loss": 0.1615, + "step": 704 + }, + { + "epoch": 0.54, + "learning_rate": 9.167683276973906e-06, + "loss": 0.1528, + "step": 705 + }, + { + "epoch": 0.54, + "learning_rate": 9.142898133653049e-06, + "loss": 0.1623, + "step": 706 + }, + { + "epoch": 0.54, + "learning_rate": 9.11811829337656e-06, + "loss": 0.1485, + "step": 707 + }, + { + "epoch": 0.54, + "learning_rate": 9.093343909461807e-06, + "loss": 0.1675, + "step": 708 + }, + { + "epoch": 0.54, + "learning_rate": 9.068575135192377e-06, + "loss": 0.1639, + "step": 709 + }, + { + "epoch": 0.54, + "learning_rate": 9.043812123817165e-06, + "loss": 0.1341, + "step": 710 + }, + { + "epoch": 0.55, + "learning_rate": 9.019055028549403e-06, + "loss": 0.1243, + "step": 711 + }, + { + "epoch": 0.55, + "learning_rate": 8.994304002565723e-06, + "loss": 0.1823, + "step": 712 + }, + { + "epoch": 0.55, + "learning_rate": 8.969559199005196e-06, + "loss": 0.1382, + "step": 713 + }, + { + "epoch": 0.55, + "learning_rate": 8.944820770968407e-06, + "loss": 0.151, + "step": 714 + }, + { + "epoch": 0.55, + "learning_rate": 8.920088871516482e-06, + "loss": 0.1578, + "step": 715 + }, + { + "epoch": 0.55, + "learning_rate": 8.895363653670162e-06, + "loss": 0.1764, + "step": 716 + }, + { + "epoch": 0.55, + "learning_rate": 8.870645270408849e-06, + "loss": 0.1588, + "step": 717 + }, + { + "epoch": 0.55, + "learning_rate": 8.845933874669645e-06, + "loss": 0.1785, + "step": 718 + }, + { + "epoch": 0.55, + "learning_rate": 8.821229619346433e-06, + "loss": 0.1792, + "step": 719 + }, + { + "epoch": 0.55, + "learning_rate": 8.796532657288916e-06, + "loss": 0.1313, + "step": 720 + }, + { + "epoch": 0.55, + "learning_rate": 8.771843141301659e-06, + "loss": 0.1253, + "step": 721 + }, + { + "epoch": 0.55, + "learning_rate": 8.747161224143175e-06, + "loss": 0.1489, + "step": 722 + }, + { + "epoch": 0.55, + "learning_rate": 8.722487058524945e-06, + "loss": 0.1428, + "step": 723 + }, + { + "epoch": 0.56, + "learning_rate": 8.697820797110499e-06, + "loss": 0.1637, + "step": 724 + }, + { + "epoch": 0.56, + "learning_rate": 8.673162592514466e-06, + "loss": 0.162, + "step": 725 + }, + { + "epoch": 0.56, + "learning_rate": 8.648512597301613e-06, + "loss": 0.1533, + "step": 726 + }, + { + "epoch": 0.56, + "learning_rate": 8.62387096398593e-06, + "loss": 0.0983, + "step": 727 + }, + { + "epoch": 0.56, + "learning_rate": 8.599237845029657e-06, + "loss": 0.1464, + "step": 728 + }, + { + "epoch": 0.56, + "learning_rate": 8.574613392842361e-06, + "loss": 0.1644, + "step": 729 + }, + { + "epoch": 0.56, + "learning_rate": 8.549997759779981e-06, + "loss": 0.1322, + "step": 730 + }, + { + "epoch": 0.56, + "learning_rate": 8.525391098143902e-06, + "loss": 0.1486, + "step": 731 + }, + { + "epoch": 0.56, + "learning_rate": 8.500793560179984e-06, + "loss": 0.1199, + "step": 732 + }, + { + "epoch": 0.56, + "learning_rate": 8.47620529807765e-06, + "loss": 0.157, + "step": 733 + }, + { + "epoch": 0.56, + "learning_rate": 8.451626463968932e-06, + "loss": 0.1303, + "step": 734 + }, + { + "epoch": 0.56, + "learning_rate": 8.427057209927519e-06, + "loss": 0.1253, + "step": 735 + }, + { + "epoch": 0.56, + "learning_rate": 8.402497687967837e-06, + "loss": 0.121, + "step": 736 + }, + { + "epoch": 0.57, + "learning_rate": 8.377948050044087e-06, + "loss": 0.1411, + "step": 737 + }, + { + "epoch": 0.57, + "learning_rate": 8.353408448049327e-06, + "loss": 0.1296, + "step": 738 + }, + { + "epoch": 0.57, + "learning_rate": 8.328879033814516e-06, + "loss": 0.1353, + "step": 739 + }, + { + "epoch": 0.57, + "learning_rate": 8.304359959107579e-06, + "loss": 0.1236, + "step": 740 + }, + { + "epoch": 0.57, + "learning_rate": 8.279851375632469e-06, + "loss": 0.1898, + "step": 741 + }, + { + "epoch": 0.57, + "learning_rate": 8.255353435028228e-06, + "loss": 0.1373, + "step": 742 + }, + { + "epoch": 0.57, + "learning_rate": 8.230866288868045e-06, + "loss": 0.1348, + "step": 743 + }, + { + "epoch": 0.57, + "learning_rate": 8.206390088658326e-06, + "loss": 0.1464, + "step": 744 + }, + { + "epoch": 0.57, + "learning_rate": 8.181924985837762e-06, + "loss": 0.1536, + "step": 745 + }, + { + "epoch": 0.57, + "learning_rate": 8.157471131776356e-06, + "loss": 0.1459, + "step": 746 + }, + { + "epoch": 0.57, + "learning_rate": 8.13302867777454e-06, + "loss": 0.1177, + "step": 747 + }, + { + "epoch": 0.57, + "learning_rate": 8.108597775062198e-06, + "loss": 0.1281, + "step": 748 + }, + { + "epoch": 0.57, + "learning_rate": 8.084178574797745e-06, + "loss": 0.1549, + "step": 749 + }, + { + "epoch": 0.58, + "learning_rate": 8.059771228067197e-06, + "loss": 0.1098, + "step": 750 + }, + { + "epoch": 0.58, + "learning_rate": 8.035375885883217e-06, + "loss": 0.1377, + "step": 751 + }, + { + "epoch": 0.58, + "learning_rate": 8.010992699184208e-06, + "loss": 0.136, + "step": 752 + }, + { + "epoch": 0.58, + "learning_rate": 7.986621818833364e-06, + "loss": 0.1519, + "step": 753 + }, + { + "epoch": 0.58, + "learning_rate": 7.962263395617724e-06, + "loss": 0.1469, + "step": 754 + }, + { + "epoch": 0.58, + "learning_rate": 7.937917580247265e-06, + "loss": 0.1231, + "step": 755 + }, + { + "epoch": 0.58, + "learning_rate": 7.913584523353958e-06, + "loss": 0.1518, + "step": 756 + }, + { + "epoch": 0.58, + "learning_rate": 7.88926437549082e-06, + "loss": 0.1262, + "step": 757 + }, + { + "epoch": 0.58, + "learning_rate": 7.864957287131012e-06, + "loss": 0.1462, + "step": 758 + }, + { + "epoch": 0.58, + "learning_rate": 7.840663408666893e-06, + "loss": 0.1082, + "step": 759 + }, + { + "epoch": 0.58, + "learning_rate": 7.81638289040908e-06, + "loss": 0.1541, + "step": 760 + }, + { + "epoch": 0.58, + "learning_rate": 7.792115882585536e-06, + "loss": 0.1821, + "step": 761 + }, + { + "epoch": 0.58, + "learning_rate": 7.767862535340635e-06, + "loss": 0.1318, + "step": 762 + }, + { + "epoch": 0.59, + "learning_rate": 7.743622998734217e-06, + "loss": 0.1402, + "step": 763 + }, + { + "epoch": 0.59, + "learning_rate": 7.71939742274069e-06, + "loss": 0.1638, + "step": 764 + }, + { + "epoch": 0.59, + "learning_rate": 7.695185957248074e-06, + "loss": 0.1342, + "step": 765 + }, + { + "epoch": 0.59, + "learning_rate": 7.670988752057088e-06, + "loss": 0.131, + "step": 766 + }, + { + "epoch": 0.59, + "learning_rate": 7.646805956880222e-06, + "loss": 0.1409, + "step": 767 + }, + { + "epoch": 0.59, + "learning_rate": 7.622637721340801e-06, + "loss": 0.1612, + "step": 768 + }, + { + "epoch": 0.59, + "learning_rate": 7.598484194972076e-06, + "loss": 0.1199, + "step": 769 + }, + { + "epoch": 0.59, + "learning_rate": 7.574345527216293e-06, + "loss": 0.1552, + "step": 770 + }, + { + "epoch": 0.59, + "learning_rate": 7.550221867423746e-06, + "loss": 0.1327, + "step": 771 + }, + { + "epoch": 0.59, + "learning_rate": 7.526113364851891e-06, + "loss": 0.1688, + "step": 772 + }, + { + "epoch": 0.59, + "learning_rate": 7.5020201686644e-06, + "loss": 0.138, + "step": 773 + }, + { + "epoch": 0.59, + "learning_rate": 7.47794242793023e-06, + "loss": 0.1579, + "step": 774 + }, + { + "epoch": 0.59, + "learning_rate": 7.453880291622726e-06, + "loss": 0.1556, + "step": 775 + }, + { + "epoch": 0.6, + "learning_rate": 7.429833908618682e-06, + "loss": 0.1631, + "step": 776 + }, + { + "epoch": 0.6, + "learning_rate": 7.405803427697417e-06, + "loss": 0.1072, + "step": 777 + }, + { + "epoch": 0.6, + "learning_rate": 7.381788997539869e-06, + "loss": 0.1691, + "step": 778 + }, + { + "epoch": 0.6, + "learning_rate": 7.357790766727663e-06, + "loss": 0.1448, + "step": 779 + }, + { + "epoch": 0.6, + "learning_rate": 7.333808883742192e-06, + "loss": 0.1266, + "step": 780 + }, + { + "epoch": 0.6, + "learning_rate": 7.309843496963715e-06, + "loss": 0.1395, + "step": 781 + }, + { + "epoch": 0.6, + "learning_rate": 7.285894754670413e-06, + "loss": 0.1482, + "step": 782 + }, + { + "epoch": 0.6, + "learning_rate": 7.261962805037486e-06, + "loss": 0.1088, + "step": 783 + }, + { + "epoch": 0.6, + "learning_rate": 7.238047796136247e-06, + "loss": 0.1393, + "step": 784 + }, + { + "epoch": 0.6, + "learning_rate": 7.214149875933173e-06, + "loss": 0.1528, + "step": 785 + }, + { + "epoch": 0.6, + "learning_rate": 7.190269192289028e-06, + "loss": 0.1342, + "step": 786 + }, + { + "epoch": 0.6, + "learning_rate": 7.166405892957926e-06, + "loss": 0.1461, + "step": 787 + }, + { + "epoch": 0.6, + "learning_rate": 7.142560125586413e-06, + "loss": 0.1223, + "step": 788 + }, + { + "epoch": 0.61, + "learning_rate": 7.11873203771257e-06, + "loss": 0.1385, + "step": 789 + }, + { + "epoch": 0.61, + "learning_rate": 7.094921776765095e-06, + "loss": 0.1687, + "step": 790 + }, + { + "epoch": 0.61, + "learning_rate": 7.071129490062372e-06, + "loss": 0.146, + "step": 791 + }, + { + "epoch": 0.61, + "learning_rate": 7.047355324811591e-06, + "loss": 0.1597, + "step": 792 + }, + { + "epoch": 0.61, + "learning_rate": 7.023599428107815e-06, + "loss": 0.1599, + "step": 793 + }, + { + "epoch": 0.61, + "learning_rate": 6.999861946933072e-06, + "loss": 0.1318, + "step": 794 + }, + { + "epoch": 0.61, + "learning_rate": 6.97614302815546e-06, + "loss": 0.1526, + "step": 795 + }, + { + "epoch": 0.61, + "learning_rate": 6.95244281852822e-06, + "loss": 0.1495, + "step": 796 + }, + { + "epoch": 0.61, + "learning_rate": 6.928761464688836e-06, + "loss": 0.158, + "step": 797 + }, + { + "epoch": 0.61, + "learning_rate": 6.905099113158138e-06, + "loss": 0.1516, + "step": 798 + }, + { + "epoch": 0.61, + "learning_rate": 6.881455910339369e-06, + "loss": 0.1445, + "step": 799 + }, + { + "epoch": 0.61, + "learning_rate": 6.857832002517307e-06, + "loss": 0.1675, + "step": 800 + }, + { + "epoch": 0.61, + "learning_rate": 6.834227535857349e-06, + "loss": 0.1384, + "step": 801 + }, + { + "epoch": 0.62, + "learning_rate": 6.8106426564045965e-06, + "loss": 0.1508, + "step": 802 + }, + { + "epoch": 0.62, + "learning_rate": 6.78707751008297e-06, + "loss": 0.1315, + "step": 803 + }, + { + "epoch": 0.62, + "learning_rate": 6.763532242694298e-06, + "loss": 0.152, + "step": 804 + }, + { + "epoch": 0.62, + "learning_rate": 6.740006999917406e-06, + "loss": 0.17, + "step": 805 + }, + { + "epoch": 0.62, + "learning_rate": 6.716501927307231e-06, + "loss": 0.1817, + "step": 806 + }, + { + "epoch": 0.62, + "learning_rate": 6.69301717029391e-06, + "loss": 0.1542, + "step": 807 + }, + { + "epoch": 0.62, + "learning_rate": 6.669552874181888e-06, + "loss": 0.1599, + "step": 808 + }, + { + "epoch": 0.62, + "learning_rate": 6.646109184149011e-06, + "loss": 0.1501, + "step": 809 + }, + { + "epoch": 0.62, + "learning_rate": 6.6226862452456295e-06, + "loss": 0.1547, + "step": 810 + }, + { + "epoch": 0.62, + "learning_rate": 6.599284202393709e-06, + "loss": 0.1898, + "step": 811 + }, + { + "epoch": 0.62, + "learning_rate": 6.575903200385927e-06, + "loss": 0.1584, + "step": 812 + }, + { + "epoch": 0.62, + "learning_rate": 6.552543383884766e-06, + "loss": 0.1512, + "step": 813 + }, + { + "epoch": 0.62, + "learning_rate": 6.529204897421644e-06, + "loss": 0.153, + "step": 814 + }, + { + "epoch": 0.63, + "learning_rate": 6.505887885396003e-06, + "loss": 0.1422, + "step": 815 + }, + { + "epoch": 0.63, + "learning_rate": 6.482592492074411e-06, + "loss": 0.1531, + "step": 816 + }, + { + "epoch": 0.63, + "learning_rate": 6.4593188615896855e-06, + "loss": 0.1213, + "step": 817 + }, + { + "epoch": 0.63, + "learning_rate": 6.436067137939994e-06, + "loss": 0.131, + "step": 818 + }, + { + "epoch": 0.63, + "learning_rate": 6.412837464987951e-06, + "loss": 0.1306, + "step": 819 + }, + { + "epoch": 0.63, + "learning_rate": 6.389629986459756e-06, + "loss": 0.1619, + "step": 820 + }, + { + "epoch": 0.63, + "learning_rate": 6.36644484594427e-06, + "loss": 0.1307, + "step": 821 + }, + { + "epoch": 0.63, + "learning_rate": 6.343282186892163e-06, + "loss": 0.1339, + "step": 822 + }, + { + "epoch": 0.63, + "learning_rate": 6.3201421526149945e-06, + "loss": 0.1501, + "step": 823 + }, + { + "epoch": 0.63, + "learning_rate": 6.297024886284342e-06, + "loss": 0.1694, + "step": 824 + }, + { + "epoch": 0.63, + "learning_rate": 6.273930530930919e-06, + "loss": 0.1494, + "step": 825 + }, + { + "epoch": 0.63, + "learning_rate": 6.250859229443684e-06, + "loss": 0.1433, + "step": 826 + }, + { + "epoch": 0.63, + "learning_rate": 6.2278111245689495e-06, + "loss": 0.1362, + "step": 827 + }, + { + "epoch": 0.64, + "learning_rate": 6.204786358909516e-06, + "loss": 0.136, + "step": 828 + }, + { + "epoch": 0.64, + "learning_rate": 6.181785074923778e-06, + "loss": 0.1478, + "step": 829 + }, + { + "epoch": 0.64, + "learning_rate": 6.1588074149248365e-06, + "loss": 0.1317, + "step": 830 + }, + { + "epoch": 0.64, + "learning_rate": 6.135853521079636e-06, + "loss": 0.1588, + "step": 831 + }, + { + "epoch": 0.64, + "learning_rate": 6.112923535408074e-06, + "loss": 0.1459, + "step": 832 + }, + { + "epoch": 0.64, + "learning_rate": 6.0900175997821165e-06, + "loss": 0.1692, + "step": 833 + }, + { + "epoch": 0.64, + "learning_rate": 6.067135855924937e-06, + "loss": 0.1084, + "step": 834 + }, + { + "epoch": 0.64, + "learning_rate": 6.044278445410025e-06, + "loss": 0.1339, + "step": 835 + }, + { + "epoch": 0.64, + "learning_rate": 6.021445509660315e-06, + "loss": 0.1735, + "step": 836 + }, + { + "epoch": 0.64, + "learning_rate": 5.9986371899473205e-06, + "loss": 0.1054, + "step": 837 + }, + { + "epoch": 0.64, + "learning_rate": 5.975853627390233e-06, + "loss": 0.1334, + "step": 838 + }, + { + "epoch": 0.64, + "learning_rate": 5.953094962955081e-06, + "loss": 0.1395, + "step": 839 + }, + { + "epoch": 0.64, + "learning_rate": 5.930361337453847e-06, + "loss": 0.1121, + "step": 840 + }, + { + "epoch": 0.65, + "learning_rate": 5.907652891543576e-06, + "loss": 0.153, + "step": 841 + }, + { + "epoch": 0.65, + "learning_rate": 5.88496976572554e-06, + "loss": 0.1368, + "step": 842 + }, + { + "epoch": 0.65, + "learning_rate": 5.862312100344345e-06, + "loss": 0.1368, + "step": 843 + }, + { + "epoch": 0.65, + "learning_rate": 5.839680035587061e-06, + "loss": 0.1158, + "step": 844 + }, + { + "epoch": 0.65, + "learning_rate": 5.8170737114823715e-06, + "loss": 0.1136, + "step": 845 + }, + { + "epoch": 0.65, + "learning_rate": 5.794493267899699e-06, + "loss": 0.1198, + "step": 846 + }, + { + "epoch": 0.65, + "learning_rate": 5.771938844548331e-06, + "loss": 0.1435, + "step": 847 + }, + { + "epoch": 0.65, + "learning_rate": 5.749410580976569e-06, + "loss": 0.1306, + "step": 848 + }, + { + "epoch": 0.65, + "learning_rate": 5.7269086165708546e-06, + "loss": 0.1386, + "step": 849 + }, + { + "epoch": 0.65, + "learning_rate": 5.704433090554912e-06, + "loss": 0.1523, + "step": 850 + }, + { + "epoch": 0.65, + "learning_rate": 5.6819841419889e-06, + "loss": 0.1416, + "step": 851 + }, + { + "epoch": 0.65, + "learning_rate": 5.659561909768509e-06, + "loss": 0.1427, + "step": 852 + }, + { + "epoch": 0.65, + "learning_rate": 5.6371665326241635e-06, + "loss": 0.1183, + "step": 853 + }, + { + "epoch": 0.66, + "learning_rate": 5.614798149120108e-06, + "loss": 0.128, + "step": 854 + }, + { + "epoch": 0.66, + "learning_rate": 5.5924568976535775e-06, + "loss": 0.1332, + "step": 855 + }, + { + "epoch": 0.66, + "learning_rate": 5.570142916453944e-06, + "loss": 0.1309, + "step": 856 + }, + { + "epoch": 0.66, + "learning_rate": 5.547856343581848e-06, + "loss": 0.1473, + "step": 857 + }, + { + "epoch": 0.66, + "learning_rate": 5.525597316928344e-06, + "loss": 0.1506, + "step": 858 + }, + { + "epoch": 0.66, + "learning_rate": 5.503365974214059e-06, + "loss": 0.1292, + "step": 859 + }, + { + "epoch": 0.66, + "learning_rate": 5.481162452988344e-06, + "loss": 0.1241, + "step": 860 + }, + { + "epoch": 0.66, + "learning_rate": 5.458986890628389e-06, + "loss": 0.1215, + "step": 861 + }, + { + "epoch": 0.66, + "learning_rate": 5.436839424338426e-06, + "loss": 0.1758, + "step": 862 + }, + { + "epoch": 0.66, + "learning_rate": 5.414720191148822e-06, + "loss": 0.1288, + "step": 863 + }, + { + "epoch": 0.66, + "learning_rate": 5.392629327915289e-06, + "loss": 0.107, + "step": 864 + }, + { + "epoch": 0.66, + "learning_rate": 5.37056697131799e-06, + "loss": 0.1331, + "step": 865 + }, + { + "epoch": 0.66, + "learning_rate": 5.3485332578607205e-06, + "loss": 0.1093, + "step": 866 + }, + { + "epoch": 0.67, + "learning_rate": 5.326528323870049e-06, + "loss": 0.1636, + "step": 867 + }, + { + "epoch": 0.67, + "learning_rate": 5.304552305494493e-06, + "loss": 0.1128, + "step": 868 + }, + { + "epoch": 0.67, + "learning_rate": 5.2826053387036495e-06, + "loss": 0.1729, + "step": 869 + }, + { + "epoch": 0.67, + "learning_rate": 5.260687559287376e-06, + "loss": 0.1187, + "step": 870 + }, + { + "epoch": 0.67, + "learning_rate": 5.238799102854941e-06, + "loss": 0.158, + "step": 871 + }, + { + "epoch": 0.67, + "learning_rate": 5.2169401048341815e-06, + "loss": 0.1252, + "step": 872 + }, + { + "epoch": 0.67, + "learning_rate": 5.1951107004706825e-06, + "loss": 0.1401, + "step": 873 + }, + { + "epoch": 0.67, + "learning_rate": 5.173311024826916e-06, + "loss": 0.1047, + "step": 874 + }, + { + "epoch": 0.67, + "learning_rate": 5.15154121278142e-06, + "loss": 0.1689, + "step": 875 + }, + { + "epoch": 0.67, + "learning_rate": 5.129801399027962e-06, + "loss": 0.151, + "step": 876 + }, + { + "epoch": 0.67, + "learning_rate": 5.108091718074706e-06, + "loss": 0.1289, + "step": 877 + }, + { + "epoch": 0.67, + "learning_rate": 5.08641230424337e-06, + "loss": 0.117, + "step": 878 + }, + { + "epoch": 0.67, + "learning_rate": 5.064763291668425e-06, + "loss": 0.1656, + "step": 879 + }, + { + "epoch": 0.68, + "learning_rate": 5.043144814296214e-06, + "loss": 0.1221, + "step": 880 + }, + { + "epoch": 0.68, + "learning_rate": 5.021557005884183e-06, + "loss": 0.1306, + "step": 881 + }, + { + "epoch": 0.68, + "learning_rate": 5.000000000000003e-06, + "loss": 0.1522, + "step": 882 + }, + { + "epoch": 0.68, + "learning_rate": 4.9784739300207675e-06, + "loss": 0.0971, + "step": 883 + }, + { + "epoch": 0.68, + "learning_rate": 4.956978929132177e-06, + "loss": 0.1313, + "step": 884 + }, + { + "epoch": 0.68, + "learning_rate": 4.935515130327686e-06, + "loss": 0.1356, + "step": 885 + }, + { + "epoch": 0.68, + "learning_rate": 4.914082666407705e-06, + "loss": 0.1385, + "step": 886 + }, + { + "epoch": 0.68, + "learning_rate": 4.892681669978762e-06, + "loss": 0.1486, + "step": 887 + }, + { + "epoch": 0.68, + "learning_rate": 4.871312273452706e-06, + "loss": 0.1226, + "step": 888 + }, + { + "epoch": 0.68, + "learning_rate": 4.849974609045849e-06, + "loss": 0.1167, + "step": 889 + }, + { + "epoch": 0.68, + "learning_rate": 4.828668808778194e-06, + "loss": 0.1118, + "step": 890 + }, + { + "epoch": 0.68, + "learning_rate": 4.807395004472569e-06, + "loss": 0.1064, + "step": 891 + }, + { + "epoch": 0.68, + "learning_rate": 4.786153327753865e-06, + "loss": 0.1519, + "step": 892 + }, + { + "epoch": 0.69, + "learning_rate": 4.764943910048175e-06, + "loss": 0.1515, + "step": 893 + }, + { + "epoch": 0.69, + "learning_rate": 4.743766882582006e-06, + "loss": 0.1672, + "step": 894 + }, + { + "epoch": 0.69, + "learning_rate": 4.722622376381455e-06, + "loss": 0.1354, + "step": 895 + }, + { + "epoch": 0.69, + "learning_rate": 4.701510522271416e-06, + "loss": 0.1295, + "step": 896 + }, + { + "epoch": 0.69, + "learning_rate": 4.680431450874748e-06, + "loss": 0.1541, + "step": 897 + }, + { + "epoch": 0.69, + "learning_rate": 4.659385292611479e-06, + "loss": 0.1615, + "step": 898 + }, + { + "epoch": 0.69, + "learning_rate": 4.638372177698007e-06, + "loss": 0.1323, + "step": 899 + }, + { + "epoch": 0.69, + "learning_rate": 4.617392236146267e-06, + "loss": 0.1592, + "step": 900 + }, + { + "epoch": 0.69, + "learning_rate": 4.59644559776296e-06, + "loss": 0.1747, + "step": 901 + }, + { + "epoch": 0.69, + "learning_rate": 4.575532392148729e-06, + "loss": 0.1382, + "step": 902 + }, + { + "epoch": 0.69, + "learning_rate": 4.554652748697361e-06, + "loss": 0.1252, + "step": 903 + }, + { + "epoch": 0.69, + "learning_rate": 4.533806796594989e-06, + "loss": 0.1432, + "step": 904 + }, + { + "epoch": 0.69, + "learning_rate": 4.5129946648192926e-06, + "loss": 0.1538, + "step": 905 + }, + { + "epoch": 0.7, + "learning_rate": 4.4922164821386926e-06, + "loss": 0.1437, + "step": 906 + }, + { + "epoch": 0.7, + "learning_rate": 4.471472377111574e-06, + "loss": 0.1414, + "step": 907 + }, + { + "epoch": 0.7, + "learning_rate": 4.450762478085465e-06, + "loss": 0.1168, + "step": 908 + }, + { + "epoch": 0.7, + "learning_rate": 4.43008691319626e-06, + "loss": 0.1405, + "step": 909 + }, + { + "epoch": 0.7, + "learning_rate": 4.409445810367421e-06, + "loss": 0.1372, + "step": 910 + }, + { + "epoch": 0.7, + "learning_rate": 4.388839297309182e-06, + "loss": 0.134, + "step": 911 + }, + { + "epoch": 0.7, + "learning_rate": 4.368267501517778e-06, + "loss": 0.1462, + "step": 912 + }, + { + "epoch": 0.7, + "learning_rate": 4.347730550274628e-06, + "loss": 0.1419, + "step": 913 + }, + { + "epoch": 0.7, + "learning_rate": 4.327228570645566e-06, + "loss": 0.124, + "step": 914 + }, + { + "epoch": 0.7, + "learning_rate": 4.306761689480049e-06, + "loss": 0.1397, + "step": 915 + }, + { + "epoch": 0.7, + "learning_rate": 4.286330033410384e-06, + "loss": 0.1179, + "step": 916 + }, + { + "epoch": 0.7, + "learning_rate": 4.26593372885091e-06, + "loss": 0.1735, + "step": 917 + }, + { + "epoch": 0.7, + "learning_rate": 4.2455729019972715e-06, + "loss": 0.1429, + "step": 918 + }, + { + "epoch": 0.71, + "learning_rate": 4.2252476788255735e-06, + "loss": 0.1198, + "step": 919 + }, + { + "epoch": 0.71, + "learning_rate": 4.204958185091662e-06, + "loss": 0.133, + "step": 920 + }, + { + "epoch": 0.71, + "learning_rate": 4.184704546330302e-06, + "loss": 0.1344, + "step": 921 + }, + { + "epoch": 0.71, + "learning_rate": 4.164486887854424e-06, + "loss": 0.1655, + "step": 922 + }, + { + "epoch": 0.71, + "learning_rate": 4.144305334754335e-06, + "loss": 0.1162, + "step": 923 + }, + { + "epoch": 0.71, + "learning_rate": 4.124160011896965e-06, + "loss": 0.1405, + "step": 924 + }, + { + "epoch": 0.71, + "learning_rate": 4.104051043925068e-06, + "loss": 0.1249, + "step": 925 + }, + { + "epoch": 0.71, + "learning_rate": 4.083978555256466e-06, + "loss": 0.1201, + "step": 926 + }, + { + "epoch": 0.71, + "learning_rate": 4.063942670083288e-06, + "loss": 0.1511, + "step": 927 + }, + { + "epoch": 0.71, + "learning_rate": 4.043943512371171e-06, + "loss": 0.0911, + "step": 928 + }, + { + "epoch": 0.71, + "learning_rate": 4.02398120585853e-06, + "loss": 0.1402, + "step": 929 + }, + { + "epoch": 0.71, + "learning_rate": 4.004055874055769e-06, + "loss": 0.1402, + "step": 930 + }, + { + "epoch": 0.71, + "learning_rate": 3.984167640244518e-06, + "loss": 0.1791, + "step": 931 + }, + { + "epoch": 0.72, + "learning_rate": 3.964316627476883e-06, + "loss": 0.1511, + "step": 932 + }, + { + "epoch": 0.72, + "learning_rate": 3.9445029585746695e-06, + "loss": 0.1492, + "step": 933 + }, + { + "epoch": 0.72, + "learning_rate": 3.924726756128632e-06, + "loss": 0.1592, + "step": 934 + }, + { + "epoch": 0.72, + "learning_rate": 3.90498814249772e-06, + "loss": 0.1299, + "step": 935 + }, + { + "epoch": 0.72, + "learning_rate": 3.885287239808306e-06, + "loss": 0.1415, + "step": 936 + }, + { + "epoch": 0.72, + "learning_rate": 3.8656241699534396e-06, + "loss": 0.1324, + "step": 937 + }, + { + "epoch": 0.72, + "learning_rate": 3.845999054592092e-06, + "loss": 0.1171, + "step": 938 + }, + { + "epoch": 0.72, + "learning_rate": 3.826412015148403e-06, + "loss": 0.1458, + "step": 939 + }, + { + "epoch": 0.72, + "learning_rate": 3.8068631728109364e-06, + "loss": 0.1119, + "step": 940 + }, + { + "epoch": 0.72, + "learning_rate": 3.7873526485319135e-06, + "loss": 0.1619, + "step": 941 + }, + { + "epoch": 0.72, + "learning_rate": 3.7678805630264802e-06, + "loss": 0.1647, + "step": 942 + }, + { + "epoch": 0.72, + "learning_rate": 3.7484470367719493e-06, + "loss": 0.1176, + "step": 943 + }, + { + "epoch": 0.72, + "learning_rate": 3.7290521900070774e-06, + "loss": 0.1519, + "step": 944 + }, + { + "epoch": 0.73, + "learning_rate": 3.709696142731277e-06, + "loss": 0.1189, + "step": 945 + }, + { + "epoch": 0.73, + "learning_rate": 3.6903790147039286e-06, + "loss": 0.1123, + "step": 946 + }, + { + "epoch": 0.73, + "learning_rate": 3.671100925443586e-06, + "loss": 0.1374, + "step": 947 + }, + { + "epoch": 0.73, + "learning_rate": 3.6518619942272883e-06, + "loss": 0.1244, + "step": 948 + }, + { + "epoch": 0.73, + "learning_rate": 3.6326623400897797e-06, + "loss": 0.1542, + "step": 949 + }, + { + "epoch": 0.73, + "learning_rate": 3.613502081822794e-06, + "loss": 0.1219, + "step": 950 + }, + { + "epoch": 0.73, + "learning_rate": 3.594381337974322e-06, + "loss": 0.146, + "step": 951 + }, + { + "epoch": 0.73, + "learning_rate": 3.575300226847863e-06, + "loss": 0.1529, + "step": 952 + }, + { + "epoch": 0.73, + "learning_rate": 3.5562588665017063e-06, + "loss": 0.1328, + "step": 953 + }, + { + "epoch": 0.73, + "learning_rate": 3.5372573747481907e-06, + "loss": 0.1257, + "step": 954 + }, + { + "epoch": 0.73, + "learning_rate": 3.5182958691529945e-06, + "loss": 0.1434, + "step": 955 + }, + { + "epoch": 0.73, + "learning_rate": 3.4993744670343723e-06, + "loss": 0.1478, + "step": 956 + }, + { + "epoch": 0.73, + "learning_rate": 3.4804932854624716e-06, + "loss": 0.1598, + "step": 957 + }, + { + "epoch": 0.74, + "learning_rate": 3.4616524412585797e-06, + "loss": 0.1108, + "step": 958 + }, + { + "epoch": 0.74, + "learning_rate": 3.4428520509944065e-06, + "loss": 0.1122, + "step": 959 + }, + { + "epoch": 0.74, + "learning_rate": 3.4240922309913715e-06, + "loss": 0.1046, + "step": 960 + }, + { + "epoch": 0.74, + "learning_rate": 3.405373097319875e-06, + "loss": 0.1217, + "step": 961 + }, + { + "epoch": 0.74, + "learning_rate": 3.3866947657985827e-06, + "loss": 0.1311, + "step": 962 + }, + { + "epoch": 0.74, + "learning_rate": 3.3680573519937187e-06, + "loss": 0.151, + "step": 963 + }, + { + "epoch": 0.74, + "learning_rate": 3.3494609712183323e-06, + "loss": 0.1498, + "step": 964 + }, + { + "epoch": 0.74, + "learning_rate": 3.3309057385315966e-06, + "loss": 0.1071, + "step": 965 + }, + { + "epoch": 0.74, + "learning_rate": 3.312391768738095e-06, + "loss": 0.1489, + "step": 966 + }, + { + "epoch": 0.74, + "learning_rate": 3.2939191763871047e-06, + "loss": 0.1398, + "step": 967 + }, + { + "epoch": 0.74, + "learning_rate": 3.2754880757719065e-06, + "loss": 0.148, + "step": 968 + }, + { + "epoch": 0.74, + "learning_rate": 3.2570985809290525e-06, + "loss": 0.1647, + "step": 969 + }, + { + "epoch": 0.74, + "learning_rate": 3.2387508056376726e-06, + "loss": 0.1411, + "step": 970 + }, + { + "epoch": 0.75, + "learning_rate": 3.2204448634187734e-06, + "loss": 0.1137, + "step": 971 + }, + { + "epoch": 0.75, + "learning_rate": 3.202180867534541e-06, + "loss": 0.1285, + "step": 972 + }, + { + "epoch": 0.75, + "learning_rate": 3.183958930987612e-06, + "loss": 0.122, + "step": 973 + }, + { + "epoch": 0.75, + "learning_rate": 3.165779166520415e-06, + "loss": 0.113, + "step": 974 + }, + { + "epoch": 0.75, + "learning_rate": 3.1476416866144343e-06, + "loss": 0.1422, + "step": 975 + }, + { + "epoch": 0.75, + "learning_rate": 3.1295466034895482e-06, + "loss": 0.1307, + "step": 976 + }, + { + "epoch": 0.75, + "learning_rate": 3.1114940291033092e-06, + "loss": 0.1409, + "step": 977 + }, + { + "epoch": 0.75, + "learning_rate": 3.093484075150257e-06, + "loss": 0.1454, + "step": 978 + }, + { + "epoch": 0.75, + "learning_rate": 3.0755168530612444e-06, + "loss": 0.1587, + "step": 979 + }, + { + "epoch": 0.75, + "learning_rate": 3.057592474002723e-06, + "loss": 0.1316, + "step": 980 + }, + { + "epoch": 0.75, + "learning_rate": 3.0397110488760707e-06, + "loss": 0.1523, + "step": 981 + }, + { + "epoch": 0.75, + "learning_rate": 3.021872688316896e-06, + "loss": 0.1177, + "step": 982 + }, + { + "epoch": 0.75, + "learning_rate": 3.0040775026943757e-06, + "loss": 0.1644, + "step": 983 + }, + { + "epoch": 0.76, + "learning_rate": 2.98632560211053e-06, + "loss": 0.1648, + "step": 984 + }, + { + "epoch": 0.76, + "learning_rate": 2.968617096399592e-06, + "loss": 0.1328, + "step": 985 + }, + { + "epoch": 0.76, + "learning_rate": 2.9509520951272864e-06, + "loss": 0.1494, + "step": 986 + }, + { + "epoch": 0.76, + "learning_rate": 2.933330707590174e-06, + "loss": 0.1299, + "step": 987 + }, + { + "epoch": 0.76, + "learning_rate": 2.9157530428149683e-06, + "loss": 0.1473, + "step": 988 + }, + { + "epoch": 0.76, + "learning_rate": 2.898219209557864e-06, + "loss": 0.134, + "step": 989 + }, + { + "epoch": 0.76, + "learning_rate": 2.8807293163038572e-06, + "loss": 0.1327, + "step": 990 + }, + { + "epoch": 0.76, + "learning_rate": 2.8632834712660882e-06, + "loss": 0.1027, + "step": 991 + }, + { + "epoch": 0.76, + "learning_rate": 2.845881782385156e-06, + "loss": 0.1169, + "step": 992 + }, + { + "epoch": 0.76, + "learning_rate": 2.828524357328456e-06, + "loss": 0.119, + "step": 993 + }, + { + "epoch": 0.76, + "learning_rate": 2.8112113034895273e-06, + "loss": 0.1217, + "step": 994 + }, + { + "epoch": 0.76, + "learning_rate": 2.7939427279873543e-06, + "loss": 0.1499, + "step": 995 + }, + { + "epoch": 0.76, + "learning_rate": 2.7767187376657502e-06, + "loss": 0.1459, + "step": 996 + }, + { + "epoch": 0.77, + "learning_rate": 2.7595394390926557e-06, + "loss": 0.1046, + "step": 997 + }, + { + "epoch": 0.77, + "learning_rate": 2.742404938559502e-06, + "loss": 0.1093, + "step": 998 + }, + { + "epoch": 0.77, + "learning_rate": 2.72531534208054e-06, + "loss": 0.1437, + "step": 999 + }, + { + "epoch": 0.77, + "learning_rate": 2.708270755392207e-06, + "loss": 0.1722, + "step": 1000 + }, + { + "epoch": 0.77, + "learning_rate": 2.6912712839524336e-06, + "loss": 0.153, + "step": 1001 + }, + { + "epoch": 0.77, + "learning_rate": 2.674317032940035e-06, + "loss": 0.0967, + "step": 1002 + }, + { + "epoch": 0.77, + "learning_rate": 2.657408107254027e-06, + "loss": 0.1166, + "step": 1003 + }, + { + "epoch": 0.77, + "learning_rate": 2.640544611512993e-06, + "loss": 0.1004, + "step": 1004 + }, + { + "epoch": 0.77, + "learning_rate": 2.6237266500544344e-06, + "loss": 0.1589, + "step": 1005 + }, + { + "epoch": 0.77, + "learning_rate": 2.60695432693412e-06, + "loss": 0.1299, + "step": 1006 + }, + { + "epoch": 0.77, + "learning_rate": 2.5902277459254533e-06, + "loss": 0.1389, + "step": 1007 + }, + { + "epoch": 0.77, + "learning_rate": 2.5735470105188166e-06, + "loss": 0.1682, + "step": 1008 + }, + { + "epoch": 0.77, + "learning_rate": 2.5569122239209366e-06, + "loss": 0.1118, + "step": 1009 + }, + { + "epoch": 0.78, + "learning_rate": 2.5403234890542483e-06, + "loss": 0.1663, + "step": 1010 + }, + { + "epoch": 0.78, + "learning_rate": 2.523780908556265e-06, + "loss": 0.1281, + "step": 1011 + }, + { + "epoch": 0.78, + "learning_rate": 2.507284584778913e-06, + "loss": 0.1573, + "step": 1012 + }, + { + "epoch": 0.78, + "learning_rate": 2.490834619787943e-06, + "loss": 0.1495, + "step": 1013 + }, + { + "epoch": 0.78, + "learning_rate": 2.4744311153622636e-06, + "loss": 0.1109, + "step": 1014 + }, + { + "epoch": 0.78, + "learning_rate": 2.4580741729933246e-06, + "loss": 0.1307, + "step": 1015 + }, + { + "epoch": 0.78, + "learning_rate": 2.4417638938844877e-06, + "loss": 0.1447, + "step": 1016 + }, + { + "epoch": 0.78, + "learning_rate": 2.425500378950405e-06, + "loss": 0.1495, + "step": 1017 + }, + { + "epoch": 0.78, + "learning_rate": 2.4092837288163807e-06, + "loss": 0.1116, + "step": 1018 + }, + { + "epoch": 0.78, + "learning_rate": 2.3931140438177724e-06, + "loss": 0.1251, + "step": 1019 + }, + { + "epoch": 0.78, + "learning_rate": 2.3769914239993432e-06, + "loss": 0.1301, + "step": 1020 + }, + { + "epoch": 0.78, + "learning_rate": 2.3609159691146577e-06, + "loss": 0.132, + "step": 1021 + }, + { + "epoch": 0.78, + "learning_rate": 2.3448877786254742e-06, + "loss": 0.1156, + "step": 1022 + }, + { + "epoch": 0.79, + "learning_rate": 2.3289069517010953e-06, + "loss": 0.1458, + "step": 1023 + }, + { + "epoch": 0.79, + "learning_rate": 2.312973587217798e-06, + "loss": 0.117, + "step": 1024 + }, + { + "epoch": 0.79, + "learning_rate": 2.2970877837581917e-06, + "loss": 0.1222, + "step": 1025 + }, + { + "epoch": 0.79, + "learning_rate": 2.2812496396106155e-06, + "loss": 0.1165, + "step": 1026 + }, + { + "epoch": 0.79, + "learning_rate": 2.2654592527685305e-06, + "loss": 0.1253, + "step": 1027 + }, + { + "epoch": 0.79, + "learning_rate": 2.2497167209299277e-06, + "loss": 0.1566, + "step": 1028 + }, + { + "epoch": 0.79, + "learning_rate": 2.234022141496689e-06, + "loss": 0.1167, + "step": 1029 + }, + { + "epoch": 0.79, + "learning_rate": 2.2183756115740274e-06, + "loss": 0.1401, + "step": 1030 + }, + { + "epoch": 0.79, + "learning_rate": 2.202777227969851e-06, + "loss": 0.1691, + "step": 1031 + }, + { + "epoch": 0.79, + "learning_rate": 2.1872270871941824e-06, + "loss": 0.1343, + "step": 1032 + }, + { + "epoch": 0.79, + "learning_rate": 2.171725285458559e-06, + "loss": 0.1263, + "step": 1033 + }, + { + "epoch": 0.79, + "learning_rate": 2.1562719186754287e-06, + "loss": 0.1284, + "step": 1034 + }, + { + "epoch": 0.79, + "learning_rate": 2.140867082457575e-06, + "loss": 0.1513, + "step": 1035 + }, + { + "epoch": 0.8, + "learning_rate": 2.1255108721175066e-06, + "loss": 0.1252, + "step": 1036 + }, + { + "epoch": 0.8, + "learning_rate": 2.110203382666873e-06, + "loss": 0.1099, + "step": 1037 + }, + { + "epoch": 0.8, + "learning_rate": 2.0949447088158812e-06, + "loss": 0.1269, + "step": 1038 + }, + { + "epoch": 0.8, + "learning_rate": 2.079734944972717e-06, + "loss": 0.1399, + "step": 1039 + }, + { + "epoch": 0.8, + "learning_rate": 2.064574185242929e-06, + "loss": 0.1677, + "step": 1040 + }, + { + "epoch": 0.8, + "learning_rate": 2.049462523428891e-06, + "loss": 0.1197, + "step": 1041 + }, + { + "epoch": 0.8, + "learning_rate": 2.0344000530291875e-06, + "loss": 0.1111, + "step": 1042 + }, + { + "epoch": 0.8, + "learning_rate": 2.019386867238048e-06, + "loss": 0.1371, + "step": 1043 + }, + { + "epoch": 0.8, + "learning_rate": 2.0044230589447698e-06, + "loss": 0.1318, + "step": 1044 + }, + { + "epoch": 0.8, + "learning_rate": 1.9895087207331422e-06, + "loss": 0.1212, + "step": 1045 + }, + { + "epoch": 0.8, + "learning_rate": 1.9746439448808785e-06, + "loss": 0.1719, + "step": 1046 + }, + { + "epoch": 0.8, + "learning_rate": 1.959828823359037e-06, + "loss": 0.1774, + "step": 1047 + }, + { + "epoch": 0.8, + "learning_rate": 1.945063447831452e-06, + "loss": 0.1444, + "step": 1048 + }, + { + "epoch": 0.81, + "learning_rate": 1.9303479096541767e-06, + "loss": 0.1477, + "step": 1049 + }, + { + "epoch": 0.81, + "learning_rate": 1.9156822998749126e-06, + "loss": 0.1213, + "step": 1050 + }, + { + "epoch": 0.81, + "learning_rate": 1.9010667092324342e-06, + "loss": 0.1494, + "step": 1051 + }, + { + "epoch": 0.81, + "learning_rate": 1.8865012281560523e-06, + "loss": 0.1501, + "step": 1052 + }, + { + "epoch": 0.81, + "learning_rate": 1.8719859467650314e-06, + "loss": 0.1611, + "step": 1053 + }, + { + "epoch": 0.81, + "learning_rate": 1.8575209548680472e-06, + "loss": 0.1302, + "step": 1054 + }, + { + "epoch": 0.81, + "learning_rate": 1.8431063419626172e-06, + "loss": 0.1354, + "step": 1055 + }, + { + "epoch": 0.81, + "learning_rate": 1.8287421972345697e-06, + "loss": 0.1032, + "step": 1056 + }, + { + "epoch": 0.81, + "learning_rate": 1.814428609557458e-06, + "loss": 0.1342, + "step": 1057 + }, + { + "epoch": 0.81, + "learning_rate": 1.8001656674920453e-06, + "loss": 0.1583, + "step": 1058 + }, + { + "epoch": 0.81, + "learning_rate": 1.7859534592857353e-06, + "loss": 0.1392, + "step": 1059 + }, + { + "epoch": 0.81, + "learning_rate": 1.7717920728720284e-06, + "loss": 0.1368, + "step": 1060 + }, + { + "epoch": 0.81, + "learning_rate": 1.7576815958699932e-06, + "loss": 0.1336, + "step": 1061 + }, + { + "epoch": 0.82, + "learning_rate": 1.7436221155836953e-06, + "loss": 0.1251, + "step": 1062 + }, + { + "epoch": 0.82, + "learning_rate": 1.7296137190016916e-06, + "loss": 0.1131, + "step": 1063 + }, + { + "epoch": 0.82, + "learning_rate": 1.7156564927964637e-06, + "loss": 0.1281, + "step": 1064 + }, + { + "epoch": 0.82, + "learning_rate": 1.7017505233238962e-06, + "loss": 0.1192, + "step": 1065 + }, + { + "epoch": 0.82, + "learning_rate": 1.6878958966227366e-06, + "loss": 0.1315, + "step": 1066 + }, + { + "epoch": 0.82, + "learning_rate": 1.6740926984140771e-06, + "loss": 0.1454, + "step": 1067 + }, + { + "epoch": 0.82, + "learning_rate": 1.6603410141007913e-06, + "loss": 0.1409, + "step": 1068 + }, + { + "epoch": 0.82, + "learning_rate": 1.646640928767047e-06, + "loss": 0.1392, + "step": 1069 + }, + { + "epoch": 0.82, + "learning_rate": 1.6329925271777492e-06, + "loss": 0.137, + "step": 1070 + }, + { + "epoch": 0.82, + "learning_rate": 1.6193958937780285e-06, + "loss": 0.1085, + "step": 1071 + }, + { + "epoch": 0.82, + "learning_rate": 1.6058511126927178e-06, + "loss": 0.136, + "step": 1072 + }, + { + "epoch": 0.82, + "learning_rate": 1.5923582677258254e-06, + "loss": 0.1249, + "step": 1073 + }, + { + "epoch": 0.82, + "learning_rate": 1.5789174423600307e-06, + "loss": 0.1355, + "step": 1074 + }, + { + "epoch": 0.83, + "learning_rate": 1.5655287197561497e-06, + "loss": 0.1412, + "step": 1075 + }, + { + "epoch": 0.83, + "learning_rate": 1.5521921827526332e-06, + "loss": 0.1488, + "step": 1076 + }, + { + "epoch": 0.83, + "learning_rate": 1.5389079138650464e-06, + "loss": 0.1064, + "step": 1077 + }, + { + "epoch": 0.83, + "learning_rate": 1.5256759952855737e-06, + "loss": 0.1736, + "step": 1078 + }, + { + "epoch": 0.83, + "learning_rate": 1.5124965088824795e-06, + "loss": 0.1454, + "step": 1079 + }, + { + "epoch": 0.83, + "learning_rate": 1.4993695361996374e-06, + "loss": 0.1449, + "step": 1080 + }, + { + "epoch": 0.83, + "learning_rate": 1.4862951584560037e-06, + "loss": 0.1262, + "step": 1081 + }, + { + "epoch": 0.83, + "learning_rate": 1.473273456545119e-06, + "loss": 0.1353, + "step": 1082 + }, + { + "epoch": 0.83, + "learning_rate": 1.460304511034606e-06, + "loss": 0.149, + "step": 1083 + }, + { + "epoch": 0.83, + "learning_rate": 1.447388402165686e-06, + "loss": 0.1104, + "step": 1084 + }, + { + "epoch": 0.83, + "learning_rate": 1.4345252098526518e-06, + "loss": 0.1214, + "step": 1085 + }, + { + "epoch": 0.83, + "learning_rate": 1.4217150136824121e-06, + "loss": 0.1336, + "step": 1086 + }, + { + "epoch": 0.83, + "learning_rate": 1.4089578929139635e-06, + "loss": 0.1774, + "step": 1087 + }, + { + "epoch": 0.83, + "learning_rate": 1.3962539264779206e-06, + "loss": 0.1284, + "step": 1088 + }, + { + "epoch": 0.84, + "learning_rate": 1.3836031929760296e-06, + "loss": 0.1526, + "step": 1089 + }, + { + "epoch": 0.84, + "learning_rate": 1.371005770680659e-06, + "loss": 0.146, + "step": 1090 + }, + { + "epoch": 0.84, + "learning_rate": 1.3584617375343478e-06, + "loss": 0.1228, + "step": 1091 + }, + { + "epoch": 0.84, + "learning_rate": 1.3459711711492962e-06, + "loss": 0.1277, + "step": 1092 + }, + { + "epoch": 0.84, + "learning_rate": 1.3335341488068997e-06, + "loss": 0.1296, + "step": 1093 + }, + { + "epoch": 0.84, + "learning_rate": 1.321150747457265e-06, + "loss": 0.1346, + "step": 1094 + }, + { + "epoch": 0.84, + "learning_rate": 1.308821043718743e-06, + "loss": 0.1183, + "step": 1095 + }, + { + "epoch": 0.84, + "learning_rate": 1.2965451138774343e-06, + "loss": 0.1393, + "step": 1096 + }, + { + "epoch": 0.84, + "learning_rate": 1.2843230338867452e-06, + "loss": 0.1581, + "step": 1097 + }, + { + "epoch": 0.84, + "learning_rate": 1.2721548793668936e-06, + "loss": 0.1227, + "step": 1098 + }, + { + "epoch": 0.84, + "learning_rate": 1.2600407256044544e-06, + "loss": 0.1638, + "step": 1099 + }, + { + "epoch": 0.84, + "learning_rate": 1.2479806475518896e-06, + "loss": 0.1273, + "step": 1100 + }, + { + "epoch": 0.84, + "learning_rate": 1.2359747198270832e-06, + "loss": 0.1533, + "step": 1101 + }, + { + "epoch": 0.85, + "learning_rate": 1.2240230167128863e-06, + "loss": 0.1843, + "step": 1102 + }, + { + "epoch": 0.85, + "learning_rate": 1.212125612156647e-06, + "loss": 0.1274, + "step": 1103 + }, + { + "epoch": 0.85, + "learning_rate": 1.2002825797697604e-06, + "loss": 0.1323, + "step": 1104 + }, + { + "epoch": 0.85, + "learning_rate": 1.188493992827211e-06, + "loss": 0.1273, + "step": 1105 + }, + { + "epoch": 0.85, + "learning_rate": 1.1767599242671245e-06, + "loss": 0.1308, + "step": 1106 + }, + { + "epoch": 0.85, + "learning_rate": 1.1650804466902987e-06, + "loss": 0.1276, + "step": 1107 + }, + { + "epoch": 0.85, + "learning_rate": 1.1534556323597824e-06, + "loss": 0.1516, + "step": 1108 + }, + { + "epoch": 0.85, + "learning_rate": 1.1418855532004025e-06, + "loss": 0.134, + "step": 1109 + }, + { + "epoch": 0.85, + "learning_rate": 1.130370280798334e-06, + "loss": 0.1335, + "step": 1110 + }, + { + "epoch": 0.85, + "learning_rate": 1.1189098864006488e-06, + "loss": 0.1317, + "step": 1111 + }, + { + "epoch": 0.85, + "learning_rate": 1.107504440914885e-06, + "loss": 0.1416, + "step": 1112 + }, + { + "epoch": 0.85, + "learning_rate": 1.0961540149085958e-06, + "loss": 0.1387, + "step": 1113 + }, + { + "epoch": 0.85, + "learning_rate": 1.084858678608922e-06, + "loss": 0.1221, + "step": 1114 + }, + { + "epoch": 0.86, + "learning_rate": 1.073618501902154e-06, + "loss": 0.1387, + "step": 1115 + }, + { + "epoch": 0.86, + "learning_rate": 1.0624335543332964e-06, + "loss": 0.125, + "step": 1116 + }, + { + "epoch": 0.86, + "learning_rate": 1.0513039051056507e-06, + "loss": 0.1354, + "step": 1117 + }, + { + "epoch": 0.86, + "learning_rate": 1.040229623080362e-06, + "loss": 0.1358, + "step": 1118 + }, + { + "epoch": 0.86, + "learning_rate": 1.0292107767760261e-06, + "loss": 0.1284, + "step": 1119 + }, + { + "epoch": 0.86, + "learning_rate": 1.0182474343682346e-06, + "loss": 0.1293, + "step": 1120 + }, + { + "epoch": 0.86, + "learning_rate": 1.007339663689173e-06, + "loss": 0.1234, + "step": 1121 + }, + { + "epoch": 0.86, + "learning_rate": 9.964875322271895e-07, + "loss": 0.1043, + "step": 1122 + }, + { + "epoch": 0.86, + "learning_rate": 9.85691107126392e-07, + "loss": 0.1461, + "step": 1123 + }, + { + "epoch": 0.86, + "learning_rate": 9.749504551862087e-07, + "loss": 0.1273, + "step": 1124 + }, + { + "epoch": 0.86, + "learning_rate": 9.642656428610042e-07, + "loss": 0.1233, + "step": 1125 + }, + { + "epoch": 0.86, + "learning_rate": 9.536367362596422e-07, + "loss": 0.1323, + "step": 1126 + }, + { + "epoch": 0.86, + "learning_rate": 9.430638011450932e-07, + "loss": 0.1294, + "step": 1127 + }, + { + "epoch": 0.87, + "learning_rate": 9.325469029340195e-07, + "loss": 0.1316, + "step": 1128 + }, + { + "epoch": 0.87, + "learning_rate": 9.220861066963715e-07, + "loss": 0.125, + "step": 1129 + }, + { + "epoch": 0.87, + "learning_rate": 9.116814771549943e-07, + "loss": 0.1264, + "step": 1130 + }, + { + "epoch": 0.87, + "learning_rate": 9.013330786852093e-07, + "loss": 0.1618, + "step": 1131 + }, + { + "epoch": 0.87, + "learning_rate": 8.910409753144344e-07, + "loss": 0.1174, + "step": 1132 + }, + { + "epoch": 0.87, + "learning_rate": 8.808052307217707e-07, + "loss": 0.0969, + "step": 1133 + }, + { + "epoch": 0.87, + "learning_rate": 8.7062590823763e-07, + "loss": 0.1237, + "step": 1134 + }, + { + "epoch": 0.87, + "learning_rate": 8.605030708433149e-07, + "loss": 0.1344, + "step": 1135 + }, + { + "epoch": 0.87, + "learning_rate": 8.504367811706582e-07, + "loss": 0.1058, + "step": 1136 + }, + { + "epoch": 0.87, + "learning_rate": 8.404271015016152e-07, + "loss": 0.1223, + "step": 1137 + }, + { + "epoch": 0.87, + "learning_rate": 8.304740937678835e-07, + "loss": 0.1405, + "step": 1138 + }, + { + "epoch": 0.87, + "learning_rate": 8.205778195505221e-07, + "loss": 0.142, + "step": 1139 + }, + { + "epoch": 0.87, + "learning_rate": 8.107383400795754e-07, + "loss": 0.1165, + "step": 1140 + }, + { + "epoch": 0.88, + "learning_rate": 8.009557162336823e-07, + "loss": 0.1229, + "step": 1141 + }, + { + "epoch": 0.88, + "learning_rate": 7.912300085397051e-07, + "loss": 0.1359, + "step": 1142 + }, + { + "epoch": 0.88, + "learning_rate": 7.815612771723591e-07, + "loss": 0.1432, + "step": 1143 + }, + { + "epoch": 0.88, + "learning_rate": 7.719495819538325e-07, + "loss": 0.1377, + "step": 1144 + }, + { + "epoch": 0.88, + "learning_rate": 7.623949823534282e-07, + "loss": 0.1649, + "step": 1145 + }, + { + "epoch": 0.88, + "learning_rate": 7.528975374871783e-07, + "loss": 0.1356, + "step": 1146 + }, + { + "epoch": 0.88, + "learning_rate": 7.434573061174966e-07, + "loss": 0.1296, + "step": 1147 + }, + { + "epoch": 0.88, + "learning_rate": 7.340743466528021e-07, + "loss": 0.1578, + "step": 1148 + }, + { + "epoch": 0.88, + "learning_rate": 7.247487171471624e-07, + "loss": 0.1542, + "step": 1149 + }, + { + "epoch": 0.88, + "learning_rate": 7.154804752999344e-07, + "loss": 0.1497, + "step": 1150 + }, + { + "epoch": 0.88, + "learning_rate": 7.062696784554124e-07, + "loss": 0.1151, + "step": 1151 + }, + { + "epoch": 0.88, + "learning_rate": 6.971163836024575e-07, + "loss": 0.1234, + "step": 1152 + }, + { + "epoch": 0.88, + "learning_rate": 6.880206473741646e-07, + "loss": 0.1415, + "step": 1153 + }, + { + "epoch": 0.89, + "learning_rate": 6.789825260475014e-07, + "loss": 0.1332, + "step": 1154 + }, + { + "epoch": 0.89, + "learning_rate": 6.700020755429559e-07, + "loss": 0.1279, + "step": 1155 + }, + { + "epoch": 0.89, + "learning_rate": 6.610793514242075e-07, + "loss": 0.1436, + "step": 1156 + }, + { + "epoch": 0.89, + "learning_rate": 6.522144088977578e-07, + "loss": 0.125, + "step": 1157 + }, + { + "epoch": 0.89, + "learning_rate": 6.434073028126164e-07, + "loss": 0.1414, + "step": 1158 + }, + { + "epoch": 0.89, + "learning_rate": 6.346580876599395e-07, + "loss": 0.1407, + "step": 1159 + }, + { + "epoch": 0.89, + "learning_rate": 6.259668175727063e-07, + "loss": 0.1343, + "step": 1160 + }, + { + "epoch": 0.89, + "learning_rate": 6.17333546325376e-07, + "loss": 0.1296, + "step": 1161 + }, + { + "epoch": 0.89, + "learning_rate": 6.08758327333564e-07, + "loss": 0.1446, + "step": 1162 + }, + { + "epoch": 0.89, + "learning_rate": 6.002412136536972e-07, + "loss": 0.1435, + "step": 1163 + }, + { + "epoch": 0.89, + "learning_rate": 5.917822579827037e-07, + "loss": 0.1203, + "step": 1164 + }, + { + "epoch": 0.89, + "learning_rate": 5.833815126576714e-07, + "loss": 0.1339, + "step": 1165 + }, + { + "epoch": 0.89, + "learning_rate": 5.750390296555319e-07, + "loss": 0.1219, + "step": 1166 + }, + { + "epoch": 0.9, + "learning_rate": 5.667548605927365e-07, + "loss": 0.1252, + "step": 1167 + }, + { + "epoch": 0.9, + "learning_rate": 5.585290567249424e-07, + "loss": 0.1125, + "step": 1168 + }, + { + "epoch": 0.9, + "learning_rate": 5.503616689466829e-07, + "loss": 0.1358, + "step": 1169 + }, + { + "epoch": 0.9, + "learning_rate": 5.422527477910666e-07, + "loss": 0.121, + "step": 1170 + }, + { + "epoch": 0.9, + "learning_rate": 5.342023434294552e-07, + "loss": 0.132, + "step": 1171 + }, + { + "epoch": 0.9, + "learning_rate": 5.262105056711575e-07, + "loss": 0.1108, + "step": 1172 + }, + { + "epoch": 0.9, + "learning_rate": 5.182772839631223e-07, + "loss": 0.1111, + "step": 1173 + }, + { + "epoch": 0.9, + "learning_rate": 5.104027273896239e-07, + "loss": 0.1494, + "step": 1174 + }, + { + "epoch": 0.9, + "learning_rate": 5.025868846719728e-07, + "loss": 0.1277, + "step": 1175 + }, + { + "epoch": 0.9, + "learning_rate": 4.94829804168202e-07, + "loss": 0.1166, + "step": 1176 + }, + { + "epoch": 0.9, + "learning_rate": 4.871315338727711e-07, + "loss": 0.1416, + "step": 1177 + }, + { + "epoch": 0.9, + "learning_rate": 4.794921214162717e-07, + "loss": 0.1551, + "step": 1178 + }, + { + "epoch": 0.9, + "learning_rate": 4.7191161406513363e-07, + "loss": 0.1167, + "step": 1179 + }, + { + "epoch": 0.91, + "learning_rate": 4.6439005872132457e-07, + "loss": 0.1158, + "step": 1180 + }, + { + "epoch": 0.91, + "learning_rate": 4.569275019220709e-07, + "loss": 0.1466, + "step": 1181 + }, + { + "epoch": 0.91, + "learning_rate": 4.4952398983956205e-07, + "loss": 0.1682, + "step": 1182 + }, + { + "epoch": 0.91, + "learning_rate": 4.421795682806662e-07, + "loss": 0.1123, + "step": 1183 + }, + { + "epoch": 0.91, + "learning_rate": 4.34894282686652e-07, + "loss": 0.1434, + "step": 1184 + }, + { + "epoch": 0.91, + "learning_rate": 4.27668178132894e-07, + "loss": 0.1271, + "step": 1185 + }, + { + "epoch": 0.91, + "learning_rate": 4.2050129932861394e-07, + "loss": 0.1254, + "step": 1186 + }, + { + "epoch": 0.91, + "learning_rate": 4.1339369061658696e-07, + "loss": 0.1512, + "step": 1187 + }, + { + "epoch": 0.91, + "learning_rate": 4.063453959728747e-07, + "loss": 0.1283, + "step": 1188 + }, + { + "epoch": 0.91, + "learning_rate": 3.9935645900654906e-07, + "loss": 0.1514, + "step": 1189 + }, + { + "epoch": 0.91, + "learning_rate": 3.924269229594335e-07, + "loss": 0.1357, + "step": 1190 + }, + { + "epoch": 0.91, + "learning_rate": 3.855568307058155e-07, + "loss": 0.1205, + "step": 1191 + }, + { + "epoch": 0.91, + "learning_rate": 3.7874622475220336e-07, + "loss": 0.1191, + "step": 1192 + }, + { + "epoch": 0.92, + "learning_rate": 3.7199514723704754e-07, + "loss": 0.1203, + "step": 1193 + }, + { + "epoch": 0.92, + "learning_rate": 3.6530363993048654e-07, + "loss": 0.1058, + "step": 1194 + }, + { + "epoch": 0.92, + "learning_rate": 3.58671744234087e-07, + "loss": 0.1151, + "step": 1195 + }, + { + "epoch": 0.92, + "learning_rate": 3.5209950118058835e-07, + "loss": 0.1348, + "step": 1196 + }, + { + "epoch": 0.92, + "learning_rate": 3.455869514336474e-07, + "loss": 0.1615, + "step": 1197 + }, + { + "epoch": 0.92, + "learning_rate": 3.3913413528758877e-07, + "loss": 0.1454, + "step": 1198 + }, + { + "epoch": 0.92, + "learning_rate": 3.327410926671526e-07, + "loss": 0.1103, + "step": 1199 + }, + { + "epoch": 0.92, + "learning_rate": 3.264078631272505e-07, + "loss": 0.142, + "step": 1200 + }, + { + "epoch": 0.92, + "learning_rate": 3.2013448585272333e-07, + "loss": 0.1428, + "step": 1201 + }, + { + "epoch": 0.92, + "learning_rate": 3.139209996580872e-07, + "loss": 0.1244, + "step": 1202 + }, + { + "epoch": 0.92, + "learning_rate": 3.07767442987309e-07, + "loss": 0.1048, + "step": 1203 + }, + { + "epoch": 0.92, + "learning_rate": 3.016738539135566e-07, + "loss": 0.1411, + "step": 1204 + }, + { + "epoch": 0.92, + "learning_rate": 2.9564027013896713e-07, + "loss": 0.1516, + "step": 1205 + }, + { + "epoch": 0.93, + "learning_rate": 2.896667289944166e-07, + "loss": 0.1236, + "step": 1206 + }, + { + "epoch": 0.93, + "learning_rate": 2.837532674392862e-07, + "loss": 0.0942, + "step": 1207 + }, + { + "epoch": 0.93, + "learning_rate": 2.7789992206123106e-07, + "loss": 0.1439, + "step": 1208 + }, + { + "epoch": 0.93, + "learning_rate": 2.721067290759594e-07, + "loss": 0.1613, + "step": 1209 + }, + { + "epoch": 0.93, + "learning_rate": 2.6637372432700483e-07, + "loss": 0.1216, + "step": 1210 + }, + { + "epoch": 0.93, + "learning_rate": 2.6070094328550344e-07, + "loss": 0.1573, + "step": 1211 + }, + { + "epoch": 0.93, + "learning_rate": 2.5508842104998265e-07, + "loss": 0.1479, + "step": 1212 + }, + { + "epoch": 0.93, + "learning_rate": 2.4953619234612816e-07, + "loss": 0.1694, + "step": 1213 + }, + { + "epoch": 0.93, + "learning_rate": 2.4404429152658726e-07, + "loss": 0.129, + "step": 1214 + }, + { + "epoch": 0.93, + "learning_rate": 2.386127525707427e-07, + "loss": 0.1313, + "step": 1215 + }, + { + "epoch": 0.93, + "learning_rate": 2.332416090845102e-07, + "loss": 0.1096, + "step": 1216 + }, + { + "epoch": 0.93, + "learning_rate": 2.279308943001246e-07, + "loss": 0.1484, + "step": 1217 + }, + { + "epoch": 0.93, + "learning_rate": 2.2268064107594302e-07, + "loss": 0.1172, + "step": 1218 + }, + { + "epoch": 0.94, + "learning_rate": 2.1749088189622848e-07, + "loss": 0.1484, + "step": 1219 + }, + { + "epoch": 0.94, + "learning_rate": 2.1236164887096345e-07, + "loss": 0.1427, + "step": 1220 + }, + { + "epoch": 0.94, + "learning_rate": 2.0729297373563995e-07, + "loss": 0.1105, + "step": 1221 + }, + { + "epoch": 0.94, + "learning_rate": 2.0228488785106636e-07, + "loss": 0.1247, + "step": 1222 + }, + { + "epoch": 0.94, + "learning_rate": 1.9733742220317876e-07, + "loss": 0.139, + "step": 1223 + }, + { + "epoch": 0.94, + "learning_rate": 1.9245060740284316e-07, + "loss": 0.1609, + "step": 1224 + }, + { + "epoch": 0.94, + "learning_rate": 1.8762447368566582e-07, + "loss": 0.0999, + "step": 1225 + }, + { + "epoch": 0.94, + "learning_rate": 1.828590509118089e-07, + "loss": 0.1661, + "step": 1226 + }, + { + "epoch": 0.94, + "learning_rate": 1.7815436856580714e-07, + "loss": 0.1401, + "step": 1227 + }, + { + "epoch": 0.94, + "learning_rate": 1.7351045575638047e-07, + "loss": 0.119, + "step": 1228 + }, + { + "epoch": 0.94, + "learning_rate": 1.6892734121625954e-07, + "loss": 0.1142, + "step": 1229 + }, + { + "epoch": 0.94, + "learning_rate": 1.644050533020014e-07, + "loss": 0.1285, + "step": 1230 + }, + { + "epoch": 0.94, + "learning_rate": 1.5994361999382202e-07, + "loss": 0.1219, + "step": 1231 + }, + { + "epoch": 0.95, + "learning_rate": 1.5554306889541514e-07, + "loss": 0.1527, + "step": 1232 + }, + { + "epoch": 0.95, + "learning_rate": 1.51203427233787e-07, + "loss": 0.1191, + "step": 1233 + }, + { + "epoch": 0.95, + "learning_rate": 1.4692472185908635e-07, + "loss": 0.1099, + "step": 1234 + }, + { + "epoch": 0.95, + "learning_rate": 1.42706979244438e-07, + "loss": 0.1492, + "step": 1235 + }, + { + "epoch": 0.95, + "learning_rate": 1.3855022548577845e-07, + "loss": 0.144, + "step": 1236 + }, + { + "epoch": 0.95, + "learning_rate": 1.344544863016961e-07, + "loss": 0.1532, + "step": 1237 + }, + { + "epoch": 0.95, + "learning_rate": 1.304197870332713e-07, + "loss": 0.1497, + "step": 1238 + }, + { + "epoch": 0.95, + "learning_rate": 1.2644615264391536e-07, + "loss": 0.1043, + "step": 1239 + }, + { + "epoch": 0.95, + "learning_rate": 1.225336077192274e-07, + "loss": 0.0963, + "step": 1240 + }, + { + "epoch": 0.95, + "learning_rate": 1.1868217646682889e-07, + "loss": 0.1232, + "step": 1241 + }, + { + "epoch": 0.95, + "learning_rate": 1.1489188271622154e-07, + "loss": 0.1072, + "step": 1242 + }, + { + "epoch": 0.95, + "learning_rate": 1.1116274991864073e-07, + "loss": 0.1263, + "step": 1243 + }, + { + "epoch": 0.95, + "learning_rate": 1.0749480114690458e-07, + "loss": 0.1107, + "step": 1244 + }, + { + "epoch": 0.96, + "learning_rate": 1.0388805909527621e-07, + "loss": 0.1743, + "step": 1245 + }, + { + "epoch": 0.96, + "learning_rate": 1.003425460793217e-07, + "loss": 0.1223, + "step": 1246 + }, + { + "epoch": 0.96, + "learning_rate": 9.685828403577124e-08, + "loss": 0.1193, + "step": 1247 + }, + { + "epoch": 0.96, + "learning_rate": 9.343529452238598e-08, + "loss": 0.1379, + "step": 1248 + }, + { + "epoch": 0.96, + "learning_rate": 9.00735987178214e-08, + "loss": 0.1318, + "step": 1249 + }, + { + "epoch": 0.96, + "learning_rate": 8.677321742149525e-08, + "loss": 0.1224, + "step": 1250 + }, + { + "epoch": 0.96, + "learning_rate": 8.353417105346762e-08, + "loss": 0.1554, + "step": 1251 + }, + { + "epoch": 0.96, + "learning_rate": 8.035647965430215e-08, + "loss": 0.1392, + "step": 1252 + }, + { + "epoch": 0.96, + "learning_rate": 7.724016288495173e-08, + "loss": 0.1354, + "step": 1253 + }, + { + "epoch": 0.96, + "learning_rate": 7.418524002663296e-08, + "loss": 0.1282, + "step": 1254 + }, + { + "epoch": 0.96, + "learning_rate": 7.119172998070412e-08, + "loss": 0.1335, + "step": 1255 + }, + { + "epoch": 0.96, + "learning_rate": 6.825965126855517e-08, + "loss": 0.16, + "step": 1256 + }, + { + "epoch": 0.96, + "learning_rate": 6.53890220314879e-08, + "loss": 0.1734, + "step": 1257 + }, + { + "epoch": 0.97, + "learning_rate": 6.25798600306049e-08, + "loss": 0.1393, + "step": 1258 + }, + { + "epoch": 0.97, + "learning_rate": 5.983218264669965e-08, + "loss": 0.1155, + "step": 1259 + }, + { + "epoch": 0.97, + "learning_rate": 5.714600688014882e-08, + "loss": 0.1148, + "step": 1260 + }, + { + "epoch": 0.97, + "learning_rate": 5.4521349350808996e-08, + "loss": 0.1144, + "step": 1261 + }, + { + "epoch": 0.97, + "learning_rate": 5.195822629791125e-08, + "loss": 0.1296, + "step": 1262 + }, + { + "epoch": 0.97, + "learning_rate": 4.945665357996121e-08, + "loss": 0.1425, + "step": 1263 + }, + { + "epoch": 0.97, + "learning_rate": 4.701664667464245e-08, + "loss": 0.1125, + "step": 1264 + }, + { + "epoch": 0.97, + "learning_rate": 4.463822067871993e-08, + "loss": 0.1179, + "step": 1265 + }, + { + "epoch": 0.97, + "learning_rate": 4.2321390307944464e-08, + "loss": 0.1641, + "step": 1266 + }, + { + "epoch": 0.97, + "learning_rate": 4.006616989696621e-08, + "loss": 0.1232, + "step": 1267 + }, + { + "epoch": 0.97, + "learning_rate": 3.787257339924133e-08, + "loss": 0.1457, + "step": 1268 + }, + { + "epoch": 0.97, + "learning_rate": 3.5740614386947644e-08, + "loss": 0.1624, + "step": 1269 + }, + { + "epoch": 0.97, + "learning_rate": 3.367030605090249e-08, + "loss": 0.1359, + "step": 1270 + }, + { + "epoch": 0.98, + "learning_rate": 3.166166120047831e-08, + "loss": 0.1428, + "step": 1271 + }, + { + "epoch": 0.98, + "learning_rate": 2.971469226352275e-08, + "loss": 0.1678, + "step": 1272 + }, + { + "epoch": 0.98, + "learning_rate": 2.7829411286287577e-08, + "loss": 0.1122, + "step": 1273 + }, + { + "epoch": 0.98, + "learning_rate": 2.600582993334544e-08, + "loss": 0.1486, + "step": 1274 + }, + { + "epoch": 0.98, + "learning_rate": 2.4243959487524337e-08, + "loss": 0.1507, + "step": 1275 + }, + { + "epoch": 0.98, + "learning_rate": 2.254381084983659e-08, + "loss": 0.1028, + "step": 1276 + }, + { + "epoch": 0.98, + "learning_rate": 2.0905394539409984e-08, + "loss": 0.1249, + "step": 1277 + }, + { + "epoch": 0.98, + "learning_rate": 1.9328720693420065e-08, + "loss": 0.1239, + "step": 1278 + }, + { + "epoch": 0.98, + "learning_rate": 1.781379906703573e-08, + "loss": 0.1354, + "step": 1279 + }, + { + "epoch": 0.98, + "learning_rate": 1.636063903335039e-08, + "loss": 0.1522, + "step": 1280 + }, + { + "epoch": 0.98, + "learning_rate": 1.496924958332868e-08, + "loss": 0.1352, + "step": 1281 + }, + { + "epoch": 0.98, + "learning_rate": 1.3639639325748744e-08, + "loss": 0.1457, + "step": 1282 + }, + { + "epoch": 0.98, + "learning_rate": 1.2371816487150023e-08, + "loss": 0.131, + "step": 1283 + }, + { + "epoch": 0.99, + "learning_rate": 1.1165788911781106e-08, + "loss": 0.1323, + "step": 1284 + }, + { + "epoch": 0.99, + "learning_rate": 1.0021564061554189e-08, + "loss": 0.1082, + "step": 1285 + }, + { + "epoch": 0.99, + "learning_rate": 8.93914901599291e-09, + "loss": 0.1111, + "step": 1286 + }, + { + "epoch": 0.99, + "learning_rate": 7.918550472199027e-09, + "loss": 0.1308, + "step": 1287 + }, + { + "epoch": 0.99, + "learning_rate": 6.959774744796921e-09, + "loss": 0.1291, + "step": 1288 + }, + { + "epoch": 0.99, + "learning_rate": 6.062827765906942e-09, + "loss": 0.1315, + "step": 1289 + }, + { + "epoch": 0.99, + "learning_rate": 5.227715085103224e-09, + "loss": 0.1454, + "step": 1290 + }, + { + "epoch": 0.99, + "learning_rate": 4.454441869377047e-09, + "loss": 0.131, + "step": 1291 + }, + { + "epoch": 0.99, + "learning_rate": 3.7430129031124085e-09, + "loss": 0.1419, + "step": 1292 + }, + { + "epoch": 0.99, + "learning_rate": 3.0934325880460636e-09, + "loss": 0.1497, + "step": 1293 + }, + { + "epoch": 0.99, + "learning_rate": 2.5057049432519744e-09, + "loss": 0.1219, + "step": 1294 + }, + { + "epoch": 0.99, + "learning_rate": 1.979833605104675e-09, + "loss": 0.129, + "step": 1295 + }, + { + "epoch": 0.99, + "learning_rate": 1.5158218272670611e-09, + "loss": 0.1303, + "step": 1296 + }, + { + "epoch": 1.0, + "learning_rate": 1.1136724806637411e-09, + "loss": 0.1225, + "step": 1297 + }, + { + "epoch": 1.0, + "learning_rate": 7.733880534654959e-10, + "loss": 0.1472, + "step": 1298 + }, + { + "epoch": 1.0, + "learning_rate": 4.94970651073734e-10, + "loss": 0.142, + "step": 1299 + }, + { + "epoch": 1.0, + "learning_rate": 2.784219961060597e-10, + "loss": 0.1168, + "step": 1300 + }, + { + "epoch": 1.0, + "learning_rate": 1.237434283907213e-10, + "loss": 0.1114, + "step": 1301 + }, + { + "epoch": 1.0, + "learning_rate": 3.09359049488478e-11, + "loss": 0.106, + "step": 1302 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.1397, + "step": 1303 + }, + { + "epoch": 1.0, + "step": 1303, + "total_flos": 223839164989440.0, + "train_loss": 0.15930285860004556, + "train_runtime": 5519.5021, + "train_samples_per_second": 30.197, + "train_steps_per_second": 0.236 + } + ], + "logging_steps": 1.0, + "max_steps": 1303, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 800, + "total_flos": 223839164989440.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}