{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 1757800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 9.994311070656502e-05, "loss": 1637.3108, "step": 1000 }, { "epoch": 0.11, "learning_rate": 9.988622141313006e-05, "loss": 1771.0639, "step": 2000 }, { "epoch": 0.17, "learning_rate": 9.982933211969508e-05, "loss": 1831.546, "step": 3000 }, { "epoch": 0.23, "learning_rate": 9.977244282626011e-05, "loss": 1863.4652, "step": 4000 }, { "epoch": 0.28, "learning_rate": 9.971555353282513e-05, "loss": 1892.3608, "step": 5000 }, { "epoch": 0.34, "learning_rate": 9.965866423939015e-05, "loss": 1948.3324, "step": 6000 }, { "epoch": 0.4, "learning_rate": 9.960177494595517e-05, "loss": 1953.1626, "step": 7000 }, { "epoch": 0.46, "learning_rate": 9.95448856525202e-05, "loss": 1971.6802, "step": 8000 }, { "epoch": 0.51, "learning_rate": 9.948799635908522e-05, "loss": 1989.0836, "step": 9000 }, { "epoch": 0.57, "learning_rate": 9.943110706565025e-05, "loss": 1991.3279, "step": 10000 }, { "epoch": 0.63, "learning_rate": 9.937421777221527e-05, "loss": 2039.0211, "step": 11000 }, { "epoch": 0.68, "learning_rate": 9.93173284787803e-05, "loss": 2022.0984, "step": 12000 }, { "epoch": 0.74, "learning_rate": 9.926043918534533e-05, "loss": 2031.0062, "step": 13000 }, { "epoch": 0.8, "learning_rate": 9.920354989191035e-05, "loss": 2054.1835, "step": 14000 }, { "epoch": 0.85, "learning_rate": 9.914666059847537e-05, "loss": 2045.1063, "step": 15000 }, { "epoch": 0.91, "learning_rate": 9.908977130504038e-05, "loss": 2078.9246, "step": 16000 }, { "epoch": 0.97, "learning_rate": 9.903288201160542e-05, "loss": 2075.4118, "step": 17000 }, { "epoch": 1.02, "learning_rate": 9.897599271817044e-05, "loss": 2033.5196, "step": 18000 }, { "epoch": 1.08, "learning_rate": 9.891910342473547e-05, "loss": 1964.649, "step": 19000 }, { "epoch": 1.14, "learning_rate": 9.886221413130049e-05, "loss": 1983.5348, "step": 20000 }, { "epoch": 1.19, "learning_rate": 9.880532483786552e-05, "loss": 1993.9566, "step": 21000 }, { "epoch": 1.25, "learning_rate": 9.874843554443054e-05, "loss": 2022.0739, "step": 22000 }, { "epoch": 1.31, "learning_rate": 9.869154625099558e-05, "loss": 2010.5351, "step": 23000 }, { "epoch": 1.37, "learning_rate": 9.86346569575606e-05, "loss": 2029.9906, "step": 24000 }, { "epoch": 1.42, "learning_rate": 9.857776766412561e-05, "loss": 2053.5098, "step": 25000 }, { "epoch": 1.48, "learning_rate": 9.852087837069063e-05, "loss": 2027.3185, "step": 26000 }, { "epoch": 1.54, "learning_rate": 9.846398907725567e-05, "loss": 2035.3547, "step": 27000 }, { "epoch": 1.59, "learning_rate": 9.840709978382069e-05, "loss": 2064.6104, "step": 28000 }, { "epoch": 1.65, "learning_rate": 9.835021049038572e-05, "loss": 2072.1658, "step": 29000 }, { "epoch": 1.71, "learning_rate": 9.829332119695074e-05, "loss": 2094.5132, "step": 30000 }, { "epoch": 1.76, "learning_rate": 9.823643190351576e-05, "loss": 2092.0145, "step": 31000 }, { "epoch": 1.82, "learning_rate": 9.817954261008079e-05, "loss": 2071.7852, "step": 32000 }, { "epoch": 1.88, "learning_rate": 9.812265331664581e-05, "loss": 2086.4401, "step": 33000 }, { "epoch": 1.93, "learning_rate": 9.806576402321084e-05, "loss": 2098.7477, "step": 34000 }, { "epoch": 1.99, "learning_rate": 9.800887472977585e-05, "loss": 2118.5907, "step": 35000 }, { "epoch": 2.05, "learning_rate": 9.795198543634088e-05, "loss": 1991.307, "step": 36000 }, { "epoch": 2.1, "learning_rate": 9.78950961429059e-05, "loss": 1982.902, "step": 37000 }, { "epoch": 2.16, "learning_rate": 9.783820684947094e-05, "loss": 1988.4017, "step": 38000 }, { "epoch": 2.22, "learning_rate": 9.778131755603596e-05, "loss": 2007.5981, "step": 39000 }, { "epoch": 2.28, "learning_rate": 9.772442826260099e-05, "loss": 2019.4265, "step": 40000 }, { "epoch": 2.33, "learning_rate": 9.766753896916601e-05, "loss": 2033.9654, "step": 41000 }, { "epoch": 2.39, "learning_rate": 9.761064967573104e-05, "loss": 2042.6026, "step": 42000 }, { "epoch": 2.45, "learning_rate": 9.755376038229606e-05, "loss": 2029.7671, "step": 43000 }, { "epoch": 2.5, "learning_rate": 9.749687108886108e-05, "loss": 2060.2393, "step": 44000 }, { "epoch": 2.56, "learning_rate": 9.74399817954261e-05, "loss": 2059.3415, "step": 45000 }, { "epoch": 2.62, "learning_rate": 9.738309250199112e-05, "loss": 2068.303, "step": 46000 }, { "epoch": 2.67, "learning_rate": 9.732620320855615e-05, "loss": 2076.7921, "step": 47000 }, { "epoch": 2.73, "learning_rate": 9.726931391512117e-05, "loss": 2089.6512, "step": 48000 }, { "epoch": 2.79, "learning_rate": 9.72124246216862e-05, "loss": 2096.8182, "step": 49000 }, { "epoch": 2.84, "learning_rate": 9.715553532825122e-05, "loss": 2081.0214, "step": 50000 }, { "epoch": 2.9, "learning_rate": 9.709864603481626e-05, "loss": 2100.5905, "step": 51000 }, { "epoch": 2.96, "learning_rate": 9.704175674138128e-05, "loss": 2103.213, "step": 52000 }, { "epoch": 3.02, "learning_rate": 9.698486744794631e-05, "loss": 2068.1625, "step": 53000 }, { "epoch": 3.07, "learning_rate": 9.692797815451133e-05, "loss": 1957.0281, "step": 54000 }, { "epoch": 3.13, "learning_rate": 9.687108886107635e-05, "loss": 1977.1799, "step": 55000 }, { "epoch": 3.19, "learning_rate": 9.681419956764137e-05, "loss": 2004.3457, "step": 56000 }, { "epoch": 3.24, "learning_rate": 9.67573102742064e-05, "loss": 2019.3999, "step": 57000 }, { "epoch": 3.3, "learning_rate": 9.670042098077142e-05, "loss": 2028.043, "step": 58000 }, { "epoch": 3.36, "learning_rate": 9.664353168733645e-05, "loss": 2032.5048, "step": 59000 }, { "epoch": 3.41, "learning_rate": 9.658664239390147e-05, "loss": 2041.4691, "step": 60000 }, { "epoch": 3.47, "learning_rate": 9.652975310046651e-05, "loss": 2037.748, "step": 61000 }, { "epoch": 3.53, "learning_rate": 9.647286380703153e-05, "loss": 2052.7881, "step": 62000 }, { "epoch": 3.58, "learning_rate": 9.641597451359655e-05, "loss": 2052.8214, "step": 63000 }, { "epoch": 3.64, "learning_rate": 9.635908522016157e-05, "loss": 2060.3598, "step": 64000 }, { "epoch": 3.7, "learning_rate": 9.630219592672658e-05, "loss": 2086.1793, "step": 65000 }, { "epoch": 3.75, "learning_rate": 9.624530663329162e-05, "loss": 2075.7579, "step": 66000 }, { "epoch": 3.81, "learning_rate": 9.618841733985664e-05, "loss": 2063.3329, "step": 67000 }, { "epoch": 3.87, "learning_rate": 9.613152804642167e-05, "loss": 2074.581, "step": 68000 }, { "epoch": 3.93, "learning_rate": 9.607463875298669e-05, "loss": 2083.8542, "step": 69000 }, { "epoch": 3.98, "learning_rate": 9.601774945955172e-05, "loss": 2097.5785, "step": 70000 }, { "epoch": 4.04, "learning_rate": 9.596086016611674e-05, "loss": 2010.7385, "step": 71000 }, { "epoch": 4.1, "learning_rate": 9.590397087268178e-05, "loss": 1968.284, "step": 72000 }, { "epoch": 4.15, "learning_rate": 9.58470815792468e-05, "loss": 1974.3724, "step": 73000 }, { "epoch": 4.21, "learning_rate": 9.579019228581181e-05, "loss": 2005.3251, "step": 74000 }, { "epoch": 4.27, "learning_rate": 9.573330299237683e-05, "loss": 2014.8865, "step": 75000 }, { "epoch": 4.32, "learning_rate": 9.567641369894187e-05, "loss": 2023.0207, "step": 76000 }, { "epoch": 4.38, "learning_rate": 9.561952440550689e-05, "loss": 2024.7419, "step": 77000 }, { "epoch": 4.44, "learning_rate": 9.55626351120719e-05, "loss": 2033.7381, "step": 78000 }, { "epoch": 4.49, "learning_rate": 9.550574581863694e-05, "loss": 2045.6694, "step": 79000 }, { "epoch": 4.55, "learning_rate": 9.544885652520196e-05, "loss": 2041.7299, "step": 80000 }, { "epoch": 4.61, "learning_rate": 9.539196723176699e-05, "loss": 2049.2071, "step": 81000 }, { "epoch": 4.66, "learning_rate": 9.533507793833201e-05, "loss": 2059.9716, "step": 82000 }, { "epoch": 4.72, "learning_rate": 9.527818864489703e-05, "loss": 2043.7918, "step": 83000 }, { "epoch": 4.78, "learning_rate": 9.522129935146205e-05, "loss": 2073.5507, "step": 84000 }, { "epoch": 4.84, "learning_rate": 9.516441005802708e-05, "loss": 2068.1969, "step": 85000 }, { "epoch": 4.89, "learning_rate": 9.51075207645921e-05, "loss": 2078.5601, "step": 86000 }, { "epoch": 4.95, "learning_rate": 9.505063147115714e-05, "loss": 2077.9275, "step": 87000 }, { "epoch": 5.01, "learning_rate": 9.499374217772216e-05, "loss": 2078.0383, "step": 88000 }, { "epoch": 5.06, "learning_rate": 9.493685288428719e-05, "loss": 1966.2024, "step": 89000 }, { "epoch": 5.12, "learning_rate": 9.487996359085221e-05, "loss": 1973.5184, "step": 90000 }, { "epoch": 5.18, "learning_rate": 9.482307429741724e-05, "loss": 1980.9996, "step": 91000 }, { "epoch": 5.23, "learning_rate": 9.476618500398226e-05, "loss": 1990.6835, "step": 92000 }, { "epoch": 5.29, "learning_rate": 9.470929571054728e-05, "loss": 1993.3916, "step": 93000 }, { "epoch": 5.35, "learning_rate": 9.46524064171123e-05, "loss": 2016.3351, "step": 94000 }, { "epoch": 5.4, "learning_rate": 9.459551712367732e-05, "loss": 2015.6781, "step": 95000 }, { "epoch": 5.46, "learning_rate": 9.453862783024235e-05, "loss": 2014.7127, "step": 96000 }, { "epoch": 5.52, "learning_rate": 9.448173853680737e-05, "loss": 2033.2291, "step": 97000 }, { "epoch": 5.58, "learning_rate": 9.44248492433724e-05, "loss": 2036.7319, "step": 98000 }, { "epoch": 5.63, "learning_rate": 9.436795994993742e-05, "loss": 2024.842, "step": 99000 }, { "epoch": 5.69, "learning_rate": 9.431107065650246e-05, "loss": 2032.8951, "step": 100000 }, { "epoch": 5.75, "learning_rate": 9.425418136306748e-05, "loss": 2050.2114, "step": 101000 }, { "epoch": 5.8, "learning_rate": 9.419729206963251e-05, "loss": 2052.493, "step": 102000 }, { "epoch": 5.86, "learning_rate": 9.414040277619752e-05, "loss": 2054.2525, "step": 103000 }, { "epoch": 5.92, "learning_rate": 9.408351348276255e-05, "loss": 2070.9837, "step": 104000 }, { "epoch": 5.97, "learning_rate": 9.402662418932757e-05, "loss": 2083.3815, "step": 105000 }, { "epoch": 6.03, "learning_rate": 9.39697348958926e-05, "loss": 2012.522, "step": 106000 }, { "epoch": 6.09, "learning_rate": 9.391284560245762e-05, "loss": 1943.3739, "step": 107000 }, { "epoch": 6.14, "learning_rate": 9.385595630902264e-05, "loss": 1976.78, "step": 108000 }, { "epoch": 6.2, "learning_rate": 9.379906701558767e-05, "loss": 1973.2661, "step": 109000 }, { "epoch": 6.26, "learning_rate": 9.37421777221527e-05, "loss": 1991.4656, "step": 110000 }, { "epoch": 6.31, "learning_rate": 9.368528842871773e-05, "loss": 1986.2832, "step": 111000 }, { "epoch": 6.37, "learning_rate": 9.362839913528275e-05, "loss": 1996.9543, "step": 112000 }, { "epoch": 6.43, "learning_rate": 9.357150984184777e-05, "loss": 1991.405, "step": 113000 }, { "epoch": 6.49, "learning_rate": 9.351462054841278e-05, "loss": 1993.9017, "step": 114000 }, { "epoch": 6.54, "learning_rate": 9.345773125497782e-05, "loss": 2031.8681, "step": 115000 }, { "epoch": 6.6, "learning_rate": 9.340084196154284e-05, "loss": 2002.7425, "step": 116000 }, { "epoch": 6.66, "learning_rate": 9.334395266810787e-05, "loss": 2026.0885, "step": 117000 }, { "epoch": 6.71, "learning_rate": 9.328706337467289e-05, "loss": 2026.4601, "step": 118000 }, { "epoch": 6.77, "learning_rate": 9.323017408123792e-05, "loss": 2039.4704, "step": 119000 }, { "epoch": 6.83, "learning_rate": 9.317328478780294e-05, "loss": 2029.2517, "step": 120000 }, { "epoch": 6.88, "learning_rate": 9.311639549436798e-05, "loss": 2062.0242, "step": 121000 }, { "epoch": 6.94, "learning_rate": 9.3059506200933e-05, "loss": 2051.3376, "step": 122000 }, { "epoch": 7.0, "learning_rate": 9.300261690749801e-05, "loss": 2035.9021, "step": 123000 }, { "epoch": 7.05, "learning_rate": 9.294572761406303e-05, "loss": 1919.8876, "step": 124000 }, { "epoch": 7.11, "learning_rate": 9.288883832062805e-05, "loss": 1939.9697, "step": 125000 }, { "epoch": 7.17, "learning_rate": 9.283194902719309e-05, "loss": 1953.6514, "step": 126000 }, { "epoch": 7.22, "learning_rate": 9.27750597337581e-05, "loss": 1975.6353, "step": 127000 }, { "epoch": 7.28, "learning_rate": 9.271817044032314e-05, "loss": 1992.5641, "step": 128000 }, { "epoch": 7.34, "learning_rate": 9.266128114688816e-05, "loss": 1991.482, "step": 129000 }, { "epoch": 7.4, "learning_rate": 9.260439185345319e-05, "loss": 1979.8079, "step": 130000 }, { "epoch": 7.45, "learning_rate": 9.254750256001821e-05, "loss": 1982.3815, "step": 131000 }, { "epoch": 7.51, "learning_rate": 9.249061326658323e-05, "loss": 1999.8026, "step": 132000 }, { "epoch": 7.57, "learning_rate": 9.243372397314825e-05, "loss": 2007.609, "step": 133000 }, { "epoch": 7.62, "learning_rate": 9.237683467971328e-05, "loss": 2027.0755, "step": 134000 }, { "epoch": 7.68, "learning_rate": 9.23199453862783e-05, "loss": 2043.5187, "step": 135000 }, { "epoch": 7.74, "learning_rate": 9.226305609284334e-05, "loss": 2033.0004, "step": 136000 }, { "epoch": 7.79, "learning_rate": 9.220616679940836e-05, "loss": 2025.6005, "step": 137000 }, { "epoch": 7.85, "learning_rate": 9.214927750597338e-05, "loss": 2043.1772, "step": 138000 }, { "epoch": 7.91, "learning_rate": 9.209238821253841e-05, "loss": 2019.7066, "step": 139000 }, { "epoch": 7.96, "learning_rate": 9.203549891910343e-05, "loss": 2039.6357, "step": 140000 }, { "epoch": 8.02, "learning_rate": 9.197860962566846e-05, "loss": 1984.4004, "step": 141000 }, { "epoch": 8.08, "learning_rate": 9.192172033223348e-05, "loss": 1924.1782, "step": 142000 }, { "epoch": 8.14, "learning_rate": 9.18648310387985e-05, "loss": 1912.8035, "step": 143000 }, { "epoch": 8.19, "learning_rate": 9.180794174536352e-05, "loss": 1957.2061, "step": 144000 }, { "epoch": 8.25, "learning_rate": 9.175105245192855e-05, "loss": 1951.4827, "step": 145000 }, { "epoch": 8.31, "learning_rate": 9.169416315849357e-05, "loss": 1972.4778, "step": 146000 }, { "epoch": 8.36, "learning_rate": 9.16372738650586e-05, "loss": 1967.3432, "step": 147000 }, { "epoch": 8.42, "learning_rate": 9.158038457162362e-05, "loss": 1970.3834, "step": 148000 }, { "epoch": 8.48, "learning_rate": 9.152349527818866e-05, "loss": 1988.6926, "step": 149000 }, { "epoch": 8.53, "learning_rate": 9.146660598475368e-05, "loss": 1997.6614, "step": 150000 }, { "epoch": 8.59, "learning_rate": 9.14097166913187e-05, "loss": 1996.3969, "step": 151000 }, { "epoch": 8.65, "learning_rate": 9.135282739788372e-05, "loss": 2010.5641, "step": 152000 }, { "epoch": 8.7, "learning_rate": 9.129593810444874e-05, "loss": 2014.0509, "step": 153000 }, { "epoch": 8.76, "learning_rate": 9.123904881101377e-05, "loss": 2002.7638, "step": 154000 }, { "epoch": 8.82, "learning_rate": 9.118215951757879e-05, "loss": 2021.278, "step": 155000 }, { "epoch": 8.87, "learning_rate": 9.112527022414382e-05, "loss": 2021.2417, "step": 156000 }, { "epoch": 8.93, "learning_rate": 9.106838093070884e-05, "loss": 2018.3311, "step": 157000 }, { "epoch": 8.99, "learning_rate": 9.101149163727387e-05, "loss": 2032.2806, "step": 158000 }, { "epoch": 9.05, "learning_rate": 9.09546023438389e-05, "loss": 1932.8299, "step": 159000 }, { "epoch": 9.1, "learning_rate": 9.089771305040393e-05, "loss": 1936.1312, "step": 160000 }, { "epoch": 9.16, "learning_rate": 9.084082375696895e-05, "loss": 1944.036, "step": 161000 }, { "epoch": 9.22, "learning_rate": 9.078393446353397e-05, "loss": 1938.2614, "step": 162000 }, { "epoch": 9.27, "learning_rate": 9.072704517009899e-05, "loss": 1952.0616, "step": 163000 }, { "epoch": 9.33, "learning_rate": 9.067015587666402e-05, "loss": 1953.4854, "step": 164000 }, { "epoch": 9.39, "learning_rate": 9.061326658322904e-05, "loss": 1963.2387, "step": 165000 }, { "epoch": 9.44, "learning_rate": 9.055637728979407e-05, "loss": 1969.9245, "step": 166000 }, { "epoch": 9.5, "learning_rate": 9.049948799635909e-05, "loss": 1967.8105, "step": 167000 }, { "epoch": 9.56, "learning_rate": 9.044259870292412e-05, "loss": 1973.13, "step": 168000 }, { "epoch": 9.61, "learning_rate": 9.038570940948914e-05, "loss": 1975.9359, "step": 169000 }, { "epoch": 9.67, "learning_rate": 9.032882011605416e-05, "loss": 1986.6705, "step": 170000 }, { "epoch": 9.73, "learning_rate": 9.027193082261918e-05, "loss": 1980.8743, "step": 171000 }, { "epoch": 9.78, "learning_rate": 9.02150415291842e-05, "loss": 1982.4266, "step": 172000 }, { "epoch": 9.84, "learning_rate": 9.015815223574923e-05, "loss": 1993.9405, "step": 173000 }, { "epoch": 9.9, "learning_rate": 9.010126294231425e-05, "loss": 2003.9435, "step": 174000 }, { "epoch": 9.96, "learning_rate": 9.004437364887929e-05, "loss": 2018.0354, "step": 175000 }, { "epoch": 10.01, "learning_rate": 8.99874843554443e-05, "loss": 1996.1613, "step": 176000 }, { "epoch": 10.07, "learning_rate": 8.993059506200934e-05, "loss": 1887.4395, "step": 177000 }, { "epoch": 10.13, "learning_rate": 8.987370576857436e-05, "loss": 1912.1635, "step": 178000 }, { "epoch": 10.18, "learning_rate": 8.981681647513939e-05, "loss": 1914.1128, "step": 179000 }, { "epoch": 10.24, "learning_rate": 8.975992718170441e-05, "loss": 1915.693, "step": 180000 }, { "epoch": 10.3, "learning_rate": 8.970303788826943e-05, "loss": 1944.1751, "step": 181000 }, { "epoch": 10.35, "learning_rate": 8.964614859483445e-05, "loss": 1957.652, "step": 182000 }, { "epoch": 10.41, "learning_rate": 8.958925930139948e-05, "loss": 1939.1833, "step": 183000 }, { "epoch": 10.47, "learning_rate": 8.95323700079645e-05, "loss": 1971.4809, "step": 184000 }, { "epoch": 10.52, "learning_rate": 8.947548071452952e-05, "loss": 1936.8591, "step": 185000 }, { "epoch": 10.58, "learning_rate": 8.941859142109456e-05, "loss": 1959.6852, "step": 186000 }, { "epoch": 10.64, "learning_rate": 8.936170212765958e-05, "loss": 1980.4951, "step": 187000 }, { "epoch": 10.7, "learning_rate": 8.930481283422461e-05, "loss": 1957.1274, "step": 188000 }, { "epoch": 10.75, "learning_rate": 8.924792354078963e-05, "loss": 1966.8711, "step": 189000 }, { "epoch": 10.81, "learning_rate": 8.919103424735466e-05, "loss": 1969.1336, "step": 190000 }, { "epoch": 10.87, "learning_rate": 8.913414495391968e-05, "loss": 1990.2703, "step": 191000 }, { "epoch": 10.92, "learning_rate": 8.90772556604847e-05, "loss": 1996.2948, "step": 192000 }, { "epoch": 10.98, "learning_rate": 8.902036636704972e-05, "loss": 1971.4264, "step": 193000 }, { "epoch": 11.04, "learning_rate": 8.896347707361475e-05, "loss": 1892.3119, "step": 194000 }, { "epoch": 11.09, "learning_rate": 8.890658778017977e-05, "loss": 1877.9966, "step": 195000 }, { "epoch": 11.15, "learning_rate": 8.88496984867448e-05, "loss": 1904.8198, "step": 196000 }, { "epoch": 11.21, "learning_rate": 8.879280919330982e-05, "loss": 1908.8677, "step": 197000 }, { "epoch": 11.26, "learning_rate": 8.873591989987486e-05, "loss": 1911.1944, "step": 198000 }, { "epoch": 11.32, "learning_rate": 8.867903060643988e-05, "loss": 1921.7016, "step": 199000 }, { "epoch": 11.38, "learning_rate": 8.86221413130049e-05, "loss": 1912.4709, "step": 200000 }, { "epoch": 11.43, "learning_rate": 8.856525201956992e-05, "loss": 1940.3343, "step": 201000 }, { "epoch": 11.49, "learning_rate": 8.850836272613494e-05, "loss": 1933.7241, "step": 202000 }, { "epoch": 11.55, "learning_rate": 8.845147343269997e-05, "loss": 1948.0095, "step": 203000 }, { "epoch": 11.61, "learning_rate": 8.839458413926499e-05, "loss": 1934.6449, "step": 204000 }, { "epoch": 11.66, "learning_rate": 8.833769484583002e-05, "loss": 1965.7999, "step": 205000 }, { "epoch": 11.72, "learning_rate": 8.828080555239504e-05, "loss": 1952.9249, "step": 206000 }, { "epoch": 11.78, "learning_rate": 8.822391625896007e-05, "loss": 1957.9589, "step": 207000 }, { "epoch": 11.83, "learning_rate": 8.81670269655251e-05, "loss": 1976.8232, "step": 208000 }, { "epoch": 11.89, "learning_rate": 8.811013767209013e-05, "loss": 1975.3899, "step": 209000 }, { "epoch": 11.95, "learning_rate": 8.805324837865515e-05, "loss": 1977.1743, "step": 210000 }, { "epoch": 12.0, "learning_rate": 8.799635908522017e-05, "loss": 1971.3185, "step": 211000 }, { "epoch": 12.06, "learning_rate": 8.793946979178519e-05, "loss": 1850.4376, "step": 212000 }, { "epoch": 12.12, "learning_rate": 8.788258049835022e-05, "loss": 1890.5767, "step": 213000 }, { "epoch": 12.17, "learning_rate": 8.782569120491524e-05, "loss": 1880.8981, "step": 214000 }, { "epoch": 12.23, "learning_rate": 8.776880191148026e-05, "loss": 1880.1549, "step": 215000 }, { "epoch": 12.29, "learning_rate": 8.771191261804529e-05, "loss": 1897.1077, "step": 216000 }, { "epoch": 12.34, "learning_rate": 8.765502332461031e-05, "loss": 1910.0996, "step": 217000 }, { "epoch": 12.4, "learning_rate": 8.759813403117534e-05, "loss": 1928.2378, "step": 218000 }, { "epoch": 12.46, "learning_rate": 8.754124473774036e-05, "loss": 1924.8241, "step": 219000 }, { "epoch": 12.52, "learning_rate": 8.748435544430538e-05, "loss": 1932.5865, "step": 220000 }, { "epoch": 12.57, "learning_rate": 8.74274661508704e-05, "loss": 1933.5503, "step": 221000 }, { "epoch": 12.63, "learning_rate": 8.737057685743543e-05, "loss": 1947.2204, "step": 222000 }, { "epoch": 12.69, "learning_rate": 8.731368756400045e-05, "loss": 1935.6608, "step": 223000 }, { "epoch": 12.74, "learning_rate": 8.725679827056549e-05, "loss": 1961.0318, "step": 224000 }, { "epoch": 12.8, "learning_rate": 8.71999089771305e-05, "loss": 1951.2295, "step": 225000 }, { "epoch": 12.86, "learning_rate": 8.714301968369554e-05, "loss": 1963.1482, "step": 226000 }, { "epoch": 12.91, "learning_rate": 8.708613039026056e-05, "loss": 1949.5069, "step": 227000 }, { "epoch": 12.97, "learning_rate": 8.702924109682559e-05, "loss": 1955.2881, "step": 228000 }, { "epoch": 13.03, "learning_rate": 8.697235180339061e-05, "loss": 1908.1034, "step": 229000 }, { "epoch": 13.08, "learning_rate": 8.691546250995563e-05, "loss": 1865.5404, "step": 230000 }, { "epoch": 13.14, "learning_rate": 8.685857321652065e-05, "loss": 1862.6525, "step": 231000 }, { "epoch": 13.2, "learning_rate": 8.680168392308567e-05, "loss": 1880.0992, "step": 232000 }, { "epoch": 13.26, "learning_rate": 8.67447946296507e-05, "loss": 1877.8166, "step": 233000 }, { "epoch": 13.31, "learning_rate": 8.668790533621572e-05, "loss": 1878.3721, "step": 234000 }, { "epoch": 13.37, "learning_rate": 8.663101604278076e-05, "loss": 1895.0763, "step": 235000 }, { "epoch": 13.43, "learning_rate": 8.657412674934578e-05, "loss": 1900.9017, "step": 236000 }, { "epoch": 13.48, "learning_rate": 8.651723745591081e-05, "loss": 1901.857, "step": 237000 }, { "epoch": 13.54, "learning_rate": 8.646034816247583e-05, "loss": 1906.9074, "step": 238000 }, { "epoch": 13.6, "learning_rate": 8.640345886904085e-05, "loss": 1926.1181, "step": 239000 }, { "epoch": 13.65, "learning_rate": 8.634656957560587e-05, "loss": 1928.2863, "step": 240000 }, { "epoch": 13.71, "learning_rate": 8.62896802821709e-05, "loss": 1936.9472, "step": 241000 }, { "epoch": 13.77, "learning_rate": 8.623279098873592e-05, "loss": 1929.119, "step": 242000 }, { "epoch": 13.82, "learning_rate": 8.617590169530095e-05, "loss": 1946.9085, "step": 243000 }, { "epoch": 13.88, "learning_rate": 8.611901240186597e-05, "loss": 1952.9779, "step": 244000 }, { "epoch": 13.94, "learning_rate": 8.606212310843099e-05, "loss": 1954.192, "step": 245000 }, { "epoch": 13.99, "learning_rate": 8.600523381499602e-05, "loss": 1939.1035, "step": 246000 }, { "epoch": 14.05, "learning_rate": 8.594834452156104e-05, "loss": 1842.6915, "step": 247000 }, { "epoch": 14.11, "learning_rate": 8.589145522812608e-05, "loss": 1844.9884, "step": 248000 }, { "epoch": 14.17, "learning_rate": 8.58345659346911e-05, "loss": 1831.4434, "step": 249000 }, { "epoch": 14.22, "learning_rate": 8.577767664125612e-05, "loss": 1868.0654, "step": 250000 }, { "epoch": 14.28, "learning_rate": 8.572078734782114e-05, "loss": 1866.8941, "step": 251000 }, { "epoch": 14.34, "learning_rate": 8.566389805438617e-05, "loss": 1894.5974, "step": 252000 }, { "epoch": 14.39, "learning_rate": 8.560700876095119e-05, "loss": 1878.0779, "step": 253000 }, { "epoch": 14.45, "learning_rate": 8.555011946751622e-05, "loss": 1908.2756, "step": 254000 }, { "epoch": 14.51, "learning_rate": 8.549323017408124e-05, "loss": 1910.6527, "step": 255000 }, { "epoch": 14.56, "learning_rate": 8.543634088064627e-05, "loss": 1891.4085, "step": 256000 }, { "epoch": 14.62, "learning_rate": 8.53794515872113e-05, "loss": 1909.4175, "step": 257000 }, { "epoch": 14.68, "learning_rate": 8.532256229377633e-05, "loss": 1889.477, "step": 258000 }, { "epoch": 14.73, "learning_rate": 8.526567300034135e-05, "loss": 1925.5411, "step": 259000 }, { "epoch": 14.79, "learning_rate": 8.520878370690635e-05, "loss": 1914.162, "step": 260000 }, { "epoch": 14.85, "learning_rate": 8.515189441347139e-05, "loss": 1913.5177, "step": 261000 }, { "epoch": 14.9, "learning_rate": 8.50950051200364e-05, "loss": 1920.026, "step": 262000 }, { "epoch": 14.96, "learning_rate": 8.503811582660144e-05, "loss": 1926.3234, "step": 263000 }, { "epoch": 15.02, "learning_rate": 8.498122653316646e-05, "loss": 1891.2166, "step": 264000 }, { "epoch": 15.08, "learning_rate": 8.492433723973149e-05, "loss": 1845.4495, "step": 265000 }, { "epoch": 15.13, "learning_rate": 8.486744794629651e-05, "loss": 1826.487, "step": 266000 }, { "epoch": 15.19, "learning_rate": 8.481055865286154e-05, "loss": 1853.0485, "step": 267000 }, { "epoch": 15.25, "learning_rate": 8.475366935942656e-05, "loss": 1851.555, "step": 268000 }, { "epoch": 15.3, "learning_rate": 8.469678006599158e-05, "loss": 1853.2924, "step": 269000 }, { "epoch": 15.36, "learning_rate": 8.46398907725566e-05, "loss": 1869.0499, "step": 270000 }, { "epoch": 15.42, "learning_rate": 8.458300147912163e-05, "loss": 1886.5152, "step": 271000 }, { "epoch": 15.47, "learning_rate": 8.452611218568665e-05, "loss": 1860.7915, "step": 272000 }, { "epoch": 15.53, "learning_rate": 8.446922289225169e-05, "loss": 1885.7366, "step": 273000 }, { "epoch": 15.59, "learning_rate": 8.44123335988167e-05, "loss": 1894.759, "step": 274000 }, { "epoch": 15.64, "learning_rate": 8.435544430538174e-05, "loss": 1907.0171, "step": 275000 }, { "epoch": 15.7, "learning_rate": 8.429855501194676e-05, "loss": 1905.3022, "step": 276000 }, { "epoch": 15.76, "learning_rate": 8.424166571851178e-05, "loss": 1883.3041, "step": 277000 }, { "epoch": 15.82, "learning_rate": 8.418477642507681e-05, "loss": 1918.4162, "step": 278000 }, { "epoch": 15.87, "learning_rate": 8.412788713164183e-05, "loss": 1909.8654, "step": 279000 }, { "epoch": 15.93, "learning_rate": 8.407099783820685e-05, "loss": 1915.3605, "step": 280000 }, { "epoch": 15.99, "learning_rate": 8.401410854477187e-05, "loss": 1902.4605, "step": 281000 }, { "epoch": 16.04, "learning_rate": 8.39572192513369e-05, "loss": 1836.0744, "step": 282000 }, { "epoch": 16.1, "learning_rate": 8.390032995790192e-05, "loss": 1814.5775, "step": 283000 }, { "epoch": 16.16, "learning_rate": 8.384344066446696e-05, "loss": 1819.637, "step": 284000 }, { "epoch": 16.21, "learning_rate": 8.378655137103198e-05, "loss": 1836.2818, "step": 285000 }, { "epoch": 16.27, "learning_rate": 8.372966207759701e-05, "loss": 1839.8624, "step": 286000 }, { "epoch": 16.33, "learning_rate": 8.367277278416203e-05, "loss": 1846.7935, "step": 287000 }, { "epoch": 16.38, "learning_rate": 8.361588349072705e-05, "loss": 1829.7068, "step": 288000 }, { "epoch": 16.44, "learning_rate": 8.355899419729207e-05, "loss": 1862.9713, "step": 289000 }, { "epoch": 16.5, "learning_rate": 8.35021049038571e-05, "loss": 1856.2565, "step": 290000 }, { "epoch": 16.55, "learning_rate": 8.344521561042212e-05, "loss": 1886.7356, "step": 291000 }, { "epoch": 16.61, "learning_rate": 8.338832631698714e-05, "loss": 1867.0571, "step": 292000 }, { "epoch": 16.67, "learning_rate": 8.333143702355217e-05, "loss": 1877.6426, "step": 293000 }, { "epoch": 16.73, "learning_rate": 8.327454773011719e-05, "loss": 1880.3535, "step": 294000 }, { "epoch": 16.78, "learning_rate": 8.321765843668222e-05, "loss": 1895.9644, "step": 295000 }, { "epoch": 16.84, "learning_rate": 8.316076914324724e-05, "loss": 1888.0698, "step": 296000 }, { "epoch": 16.9, "learning_rate": 8.310387984981228e-05, "loss": 1899.5197, "step": 297000 }, { "epoch": 16.95, "learning_rate": 8.30469905563773e-05, "loss": 1880.891, "step": 298000 }, { "epoch": 17.01, "learning_rate": 8.299010126294232e-05, "loss": 1897.9652, "step": 299000 }, { "epoch": 17.07, "learning_rate": 8.293321196950734e-05, "loss": 1782.3024, "step": 300000 }, { "epoch": 17.12, "learning_rate": 8.287632267607237e-05, "loss": 1801.5901, "step": 301000 }, { "epoch": 17.18, "learning_rate": 8.281943338263739e-05, "loss": 1816.9566, "step": 302000 }, { "epoch": 17.24, "learning_rate": 8.276254408920242e-05, "loss": 1828.478, "step": 303000 }, { "epoch": 17.29, "learning_rate": 8.270565479576744e-05, "loss": 1832.0951, "step": 304000 }, { "epoch": 17.35, "learning_rate": 8.264876550233247e-05, "loss": 1832.6669, "step": 305000 }, { "epoch": 17.41, "learning_rate": 8.25918762088975e-05, "loss": 1836.1121, "step": 306000 }, { "epoch": 17.47, "learning_rate": 8.253498691546251e-05, "loss": 1842.0629, "step": 307000 }, { "epoch": 17.52, "learning_rate": 8.247809762202753e-05, "loss": 1870.7997, "step": 308000 }, { "epoch": 17.58, "learning_rate": 8.242120832859255e-05, "loss": 1855.2314, "step": 309000 }, { "epoch": 17.64, "learning_rate": 8.236431903515759e-05, "loss": 1870.8179, "step": 310000 }, { "epoch": 17.69, "learning_rate": 8.23074297417226e-05, "loss": 1878.0725, "step": 311000 }, { "epoch": 17.75, "learning_rate": 8.225054044828764e-05, "loss": 1868.5885, "step": 312000 }, { "epoch": 17.81, "learning_rate": 8.219365115485266e-05, "loss": 1881.7681, "step": 313000 }, { "epoch": 17.86, "learning_rate": 8.213676186141769e-05, "loss": 1885.4194, "step": 314000 }, { "epoch": 17.92, "learning_rate": 8.207987256798271e-05, "loss": 1886.3476, "step": 315000 }, { "epoch": 17.98, "learning_rate": 8.202298327454774e-05, "loss": 1868.1915, "step": 316000 }, { "epoch": 18.03, "learning_rate": 8.196609398111276e-05, "loss": 1836.1858, "step": 317000 }, { "epoch": 18.09, "learning_rate": 8.190920468767778e-05, "loss": 1800.9355, "step": 318000 }, { "epoch": 18.15, "learning_rate": 8.18523153942428e-05, "loss": 1782.684, "step": 319000 }, { "epoch": 18.2, "learning_rate": 8.179542610080783e-05, "loss": 1791.5013, "step": 320000 }, { "epoch": 18.26, "learning_rate": 8.173853680737285e-05, "loss": 1817.3179, "step": 321000 }, { "epoch": 18.32, "learning_rate": 8.168164751393787e-05, "loss": 1803.4168, "step": 322000 }, { "epoch": 18.38, "learning_rate": 8.16247582205029e-05, "loss": 1837.1586, "step": 323000 }, { "epoch": 18.43, "learning_rate": 8.156786892706793e-05, "loss": 1822.7013, "step": 324000 }, { "epoch": 18.49, "learning_rate": 8.151097963363296e-05, "loss": 1839.543, "step": 325000 }, { "epoch": 18.55, "learning_rate": 8.145409034019798e-05, "loss": 1845.4601, "step": 326000 }, { "epoch": 18.6, "learning_rate": 8.139720104676301e-05, "loss": 1849.2046, "step": 327000 }, { "epoch": 18.66, "learning_rate": 8.134031175332802e-05, "loss": 1846.8283, "step": 328000 }, { "epoch": 18.72, "learning_rate": 8.128342245989305e-05, "loss": 1861.7088, "step": 329000 }, { "epoch": 18.77, "learning_rate": 8.122653316645807e-05, "loss": 1869.1199, "step": 330000 }, { "epoch": 18.83, "learning_rate": 8.11696438730231e-05, "loss": 1852.2466, "step": 331000 }, { "epoch": 18.89, "learning_rate": 8.111275457958812e-05, "loss": 1875.0819, "step": 332000 }, { "epoch": 18.94, "learning_rate": 8.105586528615316e-05, "loss": 1871.639, "step": 333000 }, { "epoch": 19.0, "learning_rate": 8.099897599271818e-05, "loss": 1879.6095, "step": 334000 }, { "epoch": 19.06, "learning_rate": 8.094208669928321e-05, "loss": 1772.4983, "step": 335000 }, { "epoch": 19.11, "learning_rate": 8.088519740584823e-05, "loss": 1775.247, "step": 336000 }, { "epoch": 19.17, "learning_rate": 8.082830811241325e-05, "loss": 1778.8844, "step": 337000 }, { "epoch": 19.23, "learning_rate": 8.077141881897827e-05, "loss": 1802.1153, "step": 338000 }, { "epoch": 19.29, "learning_rate": 8.071452952554329e-05, "loss": 1800.6032, "step": 339000 }, { "epoch": 19.34, "learning_rate": 8.065764023210832e-05, "loss": 1806.2944, "step": 340000 }, { "epoch": 19.4, "learning_rate": 8.060075093867334e-05, "loss": 1805.9448, "step": 341000 }, { "epoch": 19.46, "learning_rate": 8.054386164523837e-05, "loss": 1824.8719, "step": 342000 }, { "epoch": 19.51, "learning_rate": 8.048697235180339e-05, "loss": 1818.8835, "step": 343000 }, { "epoch": 19.57, "learning_rate": 8.043008305836842e-05, "loss": 1818.4666, "step": 344000 }, { "epoch": 19.63, "learning_rate": 8.037319376493344e-05, "loss": 1840.202, "step": 345000 }, { "epoch": 19.68, "learning_rate": 8.031630447149848e-05, "loss": 1834.4321, "step": 346000 }, { "epoch": 19.74, "learning_rate": 8.02594151780635e-05, "loss": 1846.0205, "step": 347000 }, { "epoch": 19.8, "learning_rate": 8.020252588462852e-05, "loss": 1841.8634, "step": 348000 }, { "epoch": 19.85, "learning_rate": 8.014563659119354e-05, "loss": 1840.3843, "step": 349000 }, { "epoch": 19.91, "learning_rate": 8.008874729775857e-05, "loss": 1854.8982, "step": 350000 }, { "epoch": 19.97, "learning_rate": 8.003185800432359e-05, "loss": 1848.7417, "step": 351000 }, { "epoch": 20.03, "learning_rate": 7.997496871088861e-05, "loss": 1821.2365, "step": 352000 }, { "epoch": 20.08, "learning_rate": 7.991807941745364e-05, "loss": 1759.8334, "step": 353000 }, { "epoch": 20.14, "learning_rate": 7.986119012401866e-05, "loss": 1763.1386, "step": 354000 }, { "epoch": 20.2, "learning_rate": 7.98043008305837e-05, "loss": 1773.7397, "step": 355000 }, { "epoch": 20.25, "learning_rate": 7.974741153714871e-05, "loss": 1787.2912, "step": 356000 }, { "epoch": 20.31, "learning_rate": 7.969052224371373e-05, "loss": 1788.4006, "step": 357000 }, { "epoch": 20.37, "learning_rate": 7.963363295027875e-05, "loss": 1794.5117, "step": 358000 }, { "epoch": 20.42, "learning_rate": 7.957674365684379e-05, "loss": 1810.196, "step": 359000 }, { "epoch": 20.48, "learning_rate": 7.95198543634088e-05, "loss": 1809.5826, "step": 360000 }, { "epoch": 20.54, "learning_rate": 7.946296506997384e-05, "loss": 1809.3866, "step": 361000 }, { "epoch": 20.59, "learning_rate": 7.940607577653886e-05, "loss": 1822.6434, "step": 362000 }, { "epoch": 20.65, "learning_rate": 7.934918648310389e-05, "loss": 1816.58, "step": 363000 }, { "epoch": 20.71, "learning_rate": 7.929229718966891e-05, "loss": 1825.1131, "step": 364000 }, { "epoch": 20.76, "learning_rate": 7.923540789623394e-05, "loss": 1826.8616, "step": 365000 }, { "epoch": 20.82, "learning_rate": 7.917851860279896e-05, "loss": 1831.6498, "step": 366000 }, { "epoch": 20.88, "learning_rate": 7.912162930936398e-05, "loss": 1812.4101, "step": 367000 }, { "epoch": 20.94, "learning_rate": 7.9064740015929e-05, "loss": 1850.2621, "step": 368000 }, { "epoch": 20.99, "learning_rate": 7.900785072249402e-05, "loss": 1867.1521, "step": 369000 }, { "epoch": 21.05, "learning_rate": 7.895096142905905e-05, "loss": 1734.5459, "step": 370000 }, { "epoch": 21.11, "learning_rate": 7.889407213562407e-05, "loss": 1754.2654, "step": 371000 }, { "epoch": 21.16, "learning_rate": 7.88371828421891e-05, "loss": 1762.1995, "step": 372000 }, { "epoch": 21.22, "learning_rate": 7.878029354875413e-05, "loss": 1768.4351, "step": 373000 }, { "epoch": 21.28, "learning_rate": 7.872340425531916e-05, "loss": 1792.8422, "step": 374000 }, { "epoch": 21.33, "learning_rate": 7.866651496188418e-05, "loss": 1766.734, "step": 375000 }, { "epoch": 21.39, "learning_rate": 7.86096256684492e-05, "loss": 1786.7906, "step": 376000 }, { "epoch": 21.45, "learning_rate": 7.855273637501422e-05, "loss": 1798.3824, "step": 377000 }, { "epoch": 21.5, "learning_rate": 7.849584708157925e-05, "loss": 1787.1071, "step": 378000 }, { "epoch": 21.56, "learning_rate": 7.843895778814427e-05, "loss": 1799.0259, "step": 379000 }, { "epoch": 21.62, "learning_rate": 7.83820684947093e-05, "loss": 1802.8057, "step": 380000 }, { "epoch": 21.67, "learning_rate": 7.832517920127432e-05, "loss": 1806.3905, "step": 381000 }, { "epoch": 21.73, "learning_rate": 7.826828990783936e-05, "loss": 1797.5199, "step": 382000 }, { "epoch": 21.79, "learning_rate": 7.821140061440438e-05, "loss": 1828.8731, "step": 383000 }, { "epoch": 21.85, "learning_rate": 7.81545113209694e-05, "loss": 1818.4944, "step": 384000 }, { "epoch": 21.9, "learning_rate": 7.809762202753443e-05, "loss": 1832.021, "step": 385000 }, { "epoch": 21.96, "learning_rate": 7.804073273409945e-05, "loss": 1827.9072, "step": 386000 }, { "epoch": 22.02, "learning_rate": 7.798384344066447e-05, "loss": 1803.8186, "step": 387000 }, { "epoch": 22.07, "learning_rate": 7.792695414722949e-05, "loss": 1738.8453, "step": 388000 }, { "epoch": 22.13, "learning_rate": 7.787006485379452e-05, "loss": 1766.0729, "step": 389000 }, { "epoch": 22.19, "learning_rate": 7.781317556035954e-05, "loss": 1749.646, "step": 390000 }, { "epoch": 22.24, "learning_rate": 7.775628626692457e-05, "loss": 1770.4327, "step": 391000 }, { "epoch": 22.3, "learning_rate": 7.769939697348959e-05, "loss": 1765.1315, "step": 392000 }, { "epoch": 22.36, "learning_rate": 7.764250768005462e-05, "loss": 1760.0144, "step": 393000 }, { "epoch": 22.41, "learning_rate": 7.758561838661964e-05, "loss": 1781.5706, "step": 394000 }, { "epoch": 22.47, "learning_rate": 7.752872909318468e-05, "loss": 1764.6782, "step": 395000 }, { "epoch": 22.53, "learning_rate": 7.74718397997497e-05, "loss": 1781.3635, "step": 396000 }, { "epoch": 22.59, "learning_rate": 7.741495050631472e-05, "loss": 1787.8784, "step": 397000 }, { "epoch": 22.64, "learning_rate": 7.735806121287974e-05, "loss": 1795.1088, "step": 398000 }, { "epoch": 22.7, "learning_rate": 7.730117191944476e-05, "loss": 1797.871, "step": 399000 }, { "epoch": 22.76, "learning_rate": 7.724428262600979e-05, "loss": 1792.7554, "step": 400000 }, { "epoch": 22.81, "learning_rate": 7.718739333257481e-05, "loss": 1795.1205, "step": 401000 }, { "epoch": 22.87, "learning_rate": 7.713050403913984e-05, "loss": 1810.7887, "step": 402000 }, { "epoch": 22.93, "learning_rate": 7.707361474570486e-05, "loss": 1805.4066, "step": 403000 }, { "epoch": 22.98, "learning_rate": 7.70167254522699e-05, "loss": 1796.9633, "step": 404000 }, { "epoch": 23.04, "learning_rate": 7.695983615883491e-05, "loss": 1746.1002, "step": 405000 }, { "epoch": 23.1, "learning_rate": 7.690294686539993e-05, "loss": 1724.3684, "step": 406000 }, { "epoch": 23.15, "learning_rate": 7.684605757196495e-05, "loss": 1726.3012, "step": 407000 }, { "epoch": 23.21, "learning_rate": 7.678916827852999e-05, "loss": 1746.337, "step": 408000 }, { "epoch": 23.27, "learning_rate": 7.6732278985095e-05, "loss": 1757.9005, "step": 409000 }, { "epoch": 23.32, "learning_rate": 7.667538969166004e-05, "loss": 1750.2483, "step": 410000 }, { "epoch": 23.38, "learning_rate": 7.661850039822506e-05, "loss": 1740.813, "step": 411000 }, { "epoch": 23.44, "learning_rate": 7.656161110479009e-05, "loss": 1744.0898, "step": 412000 }, { "epoch": 23.5, "learning_rate": 7.650472181135511e-05, "loss": 1768.0571, "step": 413000 }, { "epoch": 23.55, "learning_rate": 7.644783251792013e-05, "loss": 1764.5887, "step": 414000 }, { "epoch": 23.61, "learning_rate": 7.639094322448516e-05, "loss": 1786.1807, "step": 415000 }, { "epoch": 23.67, "learning_rate": 7.633405393105018e-05, "loss": 1779.4684, "step": 416000 }, { "epoch": 23.72, "learning_rate": 7.62771646376152e-05, "loss": 1782.9111, "step": 417000 }, { "epoch": 23.78, "learning_rate": 7.622027534418022e-05, "loss": 1795.2493, "step": 418000 }, { "epoch": 23.84, "learning_rate": 7.616338605074525e-05, "loss": 1803.2718, "step": 419000 }, { "epoch": 23.89, "learning_rate": 7.610649675731027e-05, "loss": 1798.9961, "step": 420000 }, { "epoch": 23.95, "learning_rate": 7.604960746387531e-05, "loss": 1803.2227, "step": 421000 }, { "epoch": 24.01, "learning_rate": 7.599271817044033e-05, "loss": 1790.7471, "step": 422000 }, { "epoch": 24.06, "learning_rate": 7.593582887700536e-05, "loss": 1710.2821, "step": 423000 }, { "epoch": 24.12, "learning_rate": 7.587893958357038e-05, "loss": 1717.5219, "step": 424000 }, { "epoch": 24.18, "learning_rate": 7.58220502901354e-05, "loss": 1723.6976, "step": 425000 }, { "epoch": 24.23, "learning_rate": 7.576516099670042e-05, "loss": 1731.2728, "step": 426000 }, { "epoch": 24.29, "learning_rate": 7.570827170326545e-05, "loss": 1735.1057, "step": 427000 }, { "epoch": 24.35, "learning_rate": 7.565138240983047e-05, "loss": 1745.7684, "step": 428000 }, { "epoch": 24.41, "learning_rate": 7.559449311639549e-05, "loss": 1734.6625, "step": 429000 }, { "epoch": 24.46, "learning_rate": 7.553760382296052e-05, "loss": 1755.2097, "step": 430000 }, { "epoch": 24.52, "learning_rate": 7.548071452952554e-05, "loss": 1753.0896, "step": 431000 }, { "epoch": 24.58, "learning_rate": 7.542382523609058e-05, "loss": 1761.2314, "step": 432000 }, { "epoch": 24.63, "learning_rate": 7.53669359426556e-05, "loss": 1759.1823, "step": 433000 }, { "epoch": 24.69, "learning_rate": 7.531004664922063e-05, "loss": 1754.1639, "step": 434000 }, { "epoch": 24.75, "learning_rate": 7.525315735578565e-05, "loss": 1779.1229, "step": 435000 }, { "epoch": 24.8, "learning_rate": 7.519626806235067e-05, "loss": 1776.6302, "step": 436000 }, { "epoch": 24.86, "learning_rate": 7.513937876891569e-05, "loss": 1762.9941, "step": 437000 }, { "epoch": 24.92, "learning_rate": 7.508248947548072e-05, "loss": 1765.174, "step": 438000 }, { "epoch": 24.97, "learning_rate": 7.502560018204574e-05, "loss": 1779.0483, "step": 439000 }, { "epoch": 25.03, "learning_rate": 7.496871088861077e-05, "loss": 1728.9636, "step": 440000 }, { "epoch": 25.09, "learning_rate": 7.491182159517579e-05, "loss": 1677.9141, "step": 441000 }, { "epoch": 25.15, "learning_rate": 7.485493230174083e-05, "loss": 1705.5877, "step": 442000 }, { "epoch": 25.2, "learning_rate": 7.479804300830584e-05, "loss": 1723.2534, "step": 443000 }, { "epoch": 25.26, "learning_rate": 7.474115371487086e-05, "loss": 1719.9688, "step": 444000 }, { "epoch": 25.32, "learning_rate": 7.468426442143588e-05, "loss": 1738.6949, "step": 445000 }, { "epoch": 25.37, "learning_rate": 7.46273751280009e-05, "loss": 1726.5089, "step": 446000 }, { "epoch": 25.43, "learning_rate": 7.457048583456594e-05, "loss": 1724.772, "step": 447000 }, { "epoch": 25.49, "learning_rate": 7.451359654113096e-05, "loss": 1750.3384, "step": 448000 }, { "epoch": 25.54, "learning_rate": 7.445670724769599e-05, "loss": 1744.3055, "step": 449000 }, { "epoch": 25.6, "learning_rate": 7.439981795426101e-05, "loss": 1747.2213, "step": 450000 }, { "epoch": 25.66, "learning_rate": 7.434292866082604e-05, "loss": 1760.1624, "step": 451000 }, { "epoch": 25.71, "learning_rate": 7.428603936739106e-05, "loss": 1758.302, "step": 452000 }, { "epoch": 25.77, "learning_rate": 7.42291500739561e-05, "loss": 1764.9035, "step": 453000 }, { "epoch": 25.83, "learning_rate": 7.417226078052111e-05, "loss": 1753.3045, "step": 454000 }, { "epoch": 25.88, "learning_rate": 7.411537148708613e-05, "loss": 1778.9106, "step": 455000 }, { "epoch": 25.94, "learning_rate": 7.405848219365115e-05, "loss": 1782.8129, "step": 456000 }, { "epoch": 26.0, "learning_rate": 7.400159290021619e-05, "loss": 1749.9249, "step": 457000 }, { "epoch": 26.06, "learning_rate": 7.39447036067812e-05, "loss": 1685.9009, "step": 458000 }, { "epoch": 26.11, "learning_rate": 7.388781431334622e-05, "loss": 1690.1636, "step": 459000 }, { "epoch": 26.17, "learning_rate": 7.383092501991126e-05, "loss": 1696.5267, "step": 460000 }, { "epoch": 26.23, "learning_rate": 7.377403572647628e-05, "loss": 1712.5632, "step": 461000 }, { "epoch": 26.28, "learning_rate": 7.371714643304131e-05, "loss": 1701.1225, "step": 462000 }, { "epoch": 26.34, "learning_rate": 7.366025713960633e-05, "loss": 1708.5919, "step": 463000 }, { "epoch": 26.4, "learning_rate": 7.360336784617136e-05, "loss": 1715.701, "step": 464000 }, { "epoch": 26.45, "learning_rate": 7.354647855273637e-05, "loss": 1740.4959, "step": 465000 }, { "epoch": 26.51, "learning_rate": 7.34895892593014e-05, "loss": 1754.594, "step": 466000 }, { "epoch": 26.57, "learning_rate": 7.343269996586642e-05, "loss": 1728.4143, "step": 467000 }, { "epoch": 26.62, "learning_rate": 7.337581067243145e-05, "loss": 1750.8064, "step": 468000 }, { "epoch": 26.68, "learning_rate": 7.331892137899647e-05, "loss": 1743.5628, "step": 469000 }, { "epoch": 26.74, "learning_rate": 7.326203208556151e-05, "loss": 1751.4254, "step": 470000 }, { "epoch": 26.79, "learning_rate": 7.320514279212653e-05, "loss": 1739.6772, "step": 471000 }, { "epoch": 26.85, "learning_rate": 7.314825349869156e-05, "loss": 1744.865, "step": 472000 }, { "epoch": 26.91, "learning_rate": 7.309136420525658e-05, "loss": 1737.5535, "step": 473000 }, { "epoch": 26.97, "learning_rate": 7.30344749118216e-05, "loss": 1744.8645, "step": 474000 }, { "epoch": 27.02, "learning_rate": 7.297758561838662e-05, "loss": 1712.4401, "step": 475000 }, { "epoch": 27.08, "learning_rate": 7.292069632495164e-05, "loss": 1679.6836, "step": 476000 }, { "epoch": 27.14, "learning_rate": 7.286380703151667e-05, "loss": 1682.8698, "step": 477000 }, { "epoch": 27.19, "learning_rate": 7.280691773808169e-05, "loss": 1677.9102, "step": 478000 }, { "epoch": 27.25, "learning_rate": 7.275002844464672e-05, "loss": 1683.7864, "step": 479000 }, { "epoch": 27.31, "learning_rate": 7.269313915121174e-05, "loss": 1709.8998, "step": 480000 }, { "epoch": 27.36, "learning_rate": 7.263624985777678e-05, "loss": 1719.8421, "step": 481000 }, { "epoch": 27.42, "learning_rate": 7.25793605643418e-05, "loss": 1709.9282, "step": 482000 }, { "epoch": 27.48, "learning_rate": 7.252247127090683e-05, "loss": 1708.4709, "step": 483000 }, { "epoch": 27.53, "learning_rate": 7.246558197747185e-05, "loss": 1716.2429, "step": 484000 }, { "epoch": 27.59, "learning_rate": 7.240869268403687e-05, "loss": 1722.8788, "step": 485000 }, { "epoch": 27.65, "learning_rate": 7.235180339060189e-05, "loss": 1704.2666, "step": 486000 }, { "epoch": 27.71, "learning_rate": 7.229491409716692e-05, "loss": 1729.0061, "step": 487000 }, { "epoch": 27.76, "learning_rate": 7.223802480373194e-05, "loss": 1721.9655, "step": 488000 }, { "epoch": 27.82, "learning_rate": 7.218113551029697e-05, "loss": 1732.9599, "step": 489000 }, { "epoch": 27.88, "learning_rate": 7.212424621686199e-05, "loss": 1729.6118, "step": 490000 }, { "epoch": 27.93, "learning_rate": 7.206735692342701e-05, "loss": 1739.5918, "step": 491000 }, { "epoch": 27.99, "learning_rate": 7.201046762999204e-05, "loss": 1741.7421, "step": 492000 }, { "epoch": 28.05, "learning_rate": 7.195357833655706e-05, "loss": 1681.5856, "step": 493000 }, { "epoch": 28.1, "learning_rate": 7.189668904312208e-05, "loss": 1664.9931, "step": 494000 }, { "epoch": 28.16, "learning_rate": 7.18397997496871e-05, "loss": 1669.2375, "step": 495000 }, { "epoch": 28.22, "learning_rate": 7.178291045625214e-05, "loss": 1686.1343, "step": 496000 }, { "epoch": 28.27, "learning_rate": 7.172602116281716e-05, "loss": 1685.5216, "step": 497000 }, { "epoch": 28.33, "learning_rate": 7.166913186938219e-05, "loss": 1696.418, "step": 498000 }, { "epoch": 28.39, "learning_rate": 7.161224257594721e-05, "loss": 1690.2223, "step": 499000 }, { "epoch": 28.44, "learning_rate": 7.155535328251224e-05, "loss": 1698.4006, "step": 500000 }, { "epoch": 28.5, "learning_rate": 7.149846398907726e-05, "loss": 1705.1219, "step": 501000 }, { "epoch": 28.56, "learning_rate": 7.14415746956423e-05, "loss": 1695.1662, "step": 502000 }, { "epoch": 28.62, "learning_rate": 7.138468540220731e-05, "loss": 1711.3654, "step": 503000 }, { "epoch": 28.67, "learning_rate": 7.132779610877233e-05, "loss": 1718.9269, "step": 504000 }, { "epoch": 28.73, "learning_rate": 7.127090681533735e-05, "loss": 1712.6167, "step": 505000 }, { "epoch": 28.79, "learning_rate": 7.121401752190237e-05, "loss": 1718.9559, "step": 506000 }, { "epoch": 28.84, "learning_rate": 7.11571282284674e-05, "loss": 1725.913, "step": 507000 }, { "epoch": 28.9, "learning_rate": 7.110023893503242e-05, "loss": 1722.1521, "step": 508000 }, { "epoch": 28.96, "learning_rate": 7.104334964159746e-05, "loss": 1715.5414, "step": 509000 }, { "epoch": 29.01, "learning_rate": 7.098646034816248e-05, "loss": 1710.795, "step": 510000 }, { "epoch": 29.07, "learning_rate": 7.092957105472751e-05, "loss": 1650.8457, "step": 511000 }, { "epoch": 29.13, "learning_rate": 7.087268176129253e-05, "loss": 1663.5039, "step": 512000 }, { "epoch": 29.18, "learning_rate": 7.081579246785755e-05, "loss": 1672.0442, "step": 513000 }, { "epoch": 29.24, "learning_rate": 7.075890317442257e-05, "loss": 1673.7752, "step": 514000 }, { "epoch": 29.3, "learning_rate": 7.07020138809876e-05, "loss": 1650.697, "step": 515000 }, { "epoch": 29.35, "learning_rate": 7.064512458755262e-05, "loss": 1668.9139, "step": 516000 }, { "epoch": 29.41, "learning_rate": 7.058823529411765e-05, "loss": 1678.3528, "step": 517000 }, { "epoch": 29.47, "learning_rate": 7.053134600068267e-05, "loss": 1693.3855, "step": 518000 }, { "epoch": 29.53, "learning_rate": 7.047445670724771e-05, "loss": 1687.9925, "step": 519000 }, { "epoch": 29.58, "learning_rate": 7.041756741381273e-05, "loss": 1700.8955, "step": 520000 }, { "epoch": 29.64, "learning_rate": 7.036067812037775e-05, "loss": 1692.1443, "step": 521000 }, { "epoch": 29.7, "learning_rate": 7.030378882694278e-05, "loss": 1707.2272, "step": 522000 }, { "epoch": 29.75, "learning_rate": 7.02468995335078e-05, "loss": 1703.5711, "step": 523000 }, { "epoch": 29.81, "learning_rate": 7.019001024007282e-05, "loss": 1700.2985, "step": 524000 }, { "epoch": 29.87, "learning_rate": 7.013312094663784e-05, "loss": 1714.6324, "step": 525000 }, { "epoch": 29.92, "learning_rate": 7.007623165320287e-05, "loss": 1729.6229, "step": 526000 }, { "epoch": 29.98, "learning_rate": 7.001934235976789e-05, "loss": 1717.646, "step": 527000 }, { "epoch": 30.04, "learning_rate": 6.996245306633292e-05, "loss": 1656.0448, "step": 528000 }, { "epoch": 30.09, "learning_rate": 6.990556377289794e-05, "loss": 1645.7974, "step": 529000 }, { "epoch": 30.15, "learning_rate": 6.984867447946298e-05, "loss": 1654.4923, "step": 530000 }, { "epoch": 30.21, "learning_rate": 6.9791785186028e-05, "loss": 1647.0569, "step": 531000 }, { "epoch": 30.27, "learning_rate": 6.973489589259303e-05, "loss": 1656.4875, "step": 532000 }, { "epoch": 30.32, "learning_rate": 6.967800659915803e-05, "loss": 1655.7869, "step": 533000 }, { "epoch": 30.38, "learning_rate": 6.962111730572307e-05, "loss": 1650.2406, "step": 534000 }, { "epoch": 30.44, "learning_rate": 6.956422801228809e-05, "loss": 1676.6643, "step": 535000 }, { "epoch": 30.49, "learning_rate": 6.95073387188531e-05, "loss": 1674.9825, "step": 536000 }, { "epoch": 30.55, "learning_rate": 6.945044942541814e-05, "loss": 1676.4666, "step": 537000 }, { "epoch": 30.61, "learning_rate": 6.939356013198316e-05, "loss": 1692.3589, "step": 538000 }, { "epoch": 30.66, "learning_rate": 6.933667083854819e-05, "loss": 1687.6176, "step": 539000 }, { "epoch": 30.72, "learning_rate": 6.927978154511321e-05, "loss": 1675.5787, "step": 540000 }, { "epoch": 30.78, "learning_rate": 6.922289225167824e-05, "loss": 1699.3369, "step": 541000 }, { "epoch": 30.83, "learning_rate": 6.916600295824326e-05, "loss": 1697.4086, "step": 542000 }, { "epoch": 30.89, "learning_rate": 6.910911366480828e-05, "loss": 1701.0041, "step": 543000 }, { "epoch": 30.95, "learning_rate": 6.90522243713733e-05, "loss": 1702.379, "step": 544000 }, { "epoch": 31.0, "learning_rate": 6.899533507793834e-05, "loss": 1698.63, "step": 545000 }, { "epoch": 31.06, "learning_rate": 6.893844578450336e-05, "loss": 1618.8003, "step": 546000 }, { "epoch": 31.12, "learning_rate": 6.888155649106839e-05, "loss": 1623.9575, "step": 547000 }, { "epoch": 31.18, "learning_rate": 6.882466719763341e-05, "loss": 1641.0449, "step": 548000 }, { "epoch": 31.23, "learning_rate": 6.876777790419844e-05, "loss": 1638.956, "step": 549000 }, { "epoch": 31.29, "learning_rate": 6.871088861076346e-05, "loss": 1644.2068, "step": 550000 }, { "epoch": 31.35, "learning_rate": 6.865399931732848e-05, "loss": 1653.3782, "step": 551000 }, { "epoch": 31.4, "learning_rate": 6.859711002389351e-05, "loss": 1659.3823, "step": 552000 }, { "epoch": 31.46, "learning_rate": 6.854022073045853e-05, "loss": 1669.3661, "step": 553000 }, { "epoch": 31.52, "learning_rate": 6.848333143702355e-05, "loss": 1672.9213, "step": 554000 }, { "epoch": 31.57, "learning_rate": 6.842644214358857e-05, "loss": 1667.0746, "step": 555000 }, { "epoch": 31.63, "learning_rate": 6.83695528501536e-05, "loss": 1649.5024, "step": 556000 }, { "epoch": 31.69, "learning_rate": 6.831266355671862e-05, "loss": 1689.5287, "step": 557000 }, { "epoch": 31.74, "learning_rate": 6.825577426328366e-05, "loss": 1681.2549, "step": 558000 }, { "epoch": 31.8, "learning_rate": 6.819888496984868e-05, "loss": 1665.7016, "step": 559000 }, { "epoch": 31.86, "learning_rate": 6.814199567641371e-05, "loss": 1682.5196, "step": 560000 }, { "epoch": 31.91, "learning_rate": 6.808510638297873e-05, "loss": 1706.0114, "step": 561000 }, { "epoch": 31.97, "learning_rate": 6.802821708954375e-05, "loss": 1681.9006, "step": 562000 }, { "epoch": 32.03, "learning_rate": 6.797132779610877e-05, "loss": 1654.1549, "step": 563000 }, { "epoch": 32.09, "learning_rate": 6.79144385026738e-05, "loss": 1609.992, "step": 564000 }, { "epoch": 32.14, "learning_rate": 6.785754920923882e-05, "loss": 1622.8775, "step": 565000 }, { "epoch": 32.2, "learning_rate": 6.780065991580384e-05, "loss": 1637.2477, "step": 566000 }, { "epoch": 32.26, "learning_rate": 6.774377062236887e-05, "loss": 1630.1408, "step": 567000 }, { "epoch": 32.31, "learning_rate": 6.76868813289339e-05, "loss": 1633.6798, "step": 568000 }, { "epoch": 32.37, "learning_rate": 6.762999203549893e-05, "loss": 1635.3104, "step": 569000 }, { "epoch": 32.43, "learning_rate": 6.757310274206395e-05, "loss": 1647.4196, "step": 570000 }, { "epoch": 32.48, "learning_rate": 6.751621344862898e-05, "loss": 1638.1315, "step": 571000 }, { "epoch": 32.54, "learning_rate": 6.7459324155194e-05, "loss": 1650.1771, "step": 572000 }, { "epoch": 32.6, "learning_rate": 6.740243486175902e-05, "loss": 1669.1781, "step": 573000 }, { "epoch": 32.65, "learning_rate": 6.734554556832404e-05, "loss": 1663.2475, "step": 574000 }, { "epoch": 32.71, "learning_rate": 6.728865627488907e-05, "loss": 1655.6212, "step": 575000 }, { "epoch": 32.77, "learning_rate": 6.723176698145409e-05, "loss": 1666.3836, "step": 576000 }, { "epoch": 32.83, "learning_rate": 6.717487768801912e-05, "loss": 1670.7705, "step": 577000 }, { "epoch": 32.88, "learning_rate": 6.711798839458414e-05, "loss": 1671.5865, "step": 578000 }, { "epoch": 32.94, "learning_rate": 6.706109910114918e-05, "loss": 1679.4247, "step": 579000 }, { "epoch": 33.0, "learning_rate": 6.70042098077142e-05, "loss": 1665.7174, "step": 580000 }, { "epoch": 33.05, "learning_rate": 6.694732051427921e-05, "loss": 1612.1826, "step": 581000 }, { "epoch": 33.11, "learning_rate": 6.689043122084423e-05, "loss": 1604.1491, "step": 582000 }, { "epoch": 33.17, "learning_rate": 6.683354192740925e-05, "loss": 1593.8801, "step": 583000 }, { "epoch": 33.22, "learning_rate": 6.677665263397429e-05, "loss": 1609.5726, "step": 584000 }, { "epoch": 33.28, "learning_rate": 6.67197633405393e-05, "loss": 1622.8214, "step": 585000 }, { "epoch": 33.34, "learning_rate": 6.666287404710434e-05, "loss": 1645.6988, "step": 586000 }, { "epoch": 33.39, "learning_rate": 6.660598475366936e-05, "loss": 1612.3199, "step": 587000 }, { "epoch": 33.45, "learning_rate": 6.654909546023439e-05, "loss": 1628.3038, "step": 588000 }, { "epoch": 33.51, "learning_rate": 6.649220616679941e-05, "loss": 1631.5374, "step": 589000 }, { "epoch": 33.56, "learning_rate": 6.643531687336444e-05, "loss": 1648.2081, "step": 590000 }, { "epoch": 33.62, "learning_rate": 6.637842757992946e-05, "loss": 1645.123, "step": 591000 }, { "epoch": 33.68, "learning_rate": 6.632153828649448e-05, "loss": 1657.0939, "step": 592000 }, { "epoch": 33.74, "learning_rate": 6.62646489930595e-05, "loss": 1654.806, "step": 593000 }, { "epoch": 33.79, "learning_rate": 6.620775969962454e-05, "loss": 1652.6015, "step": 594000 }, { "epoch": 33.85, "learning_rate": 6.615087040618956e-05, "loss": 1662.2655, "step": 595000 }, { "epoch": 33.91, "learning_rate": 6.609398111275459e-05, "loss": 1656.8008, "step": 596000 }, { "epoch": 33.96, "learning_rate": 6.603709181931961e-05, "loss": 1653.0994, "step": 597000 }, { "epoch": 34.02, "learning_rate": 6.598020252588463e-05, "loss": 1640.4476, "step": 598000 }, { "epoch": 34.08, "learning_rate": 6.592331323244966e-05, "loss": 1586.6324, "step": 599000 }, { "epoch": 34.13, "learning_rate": 6.586642393901468e-05, "loss": 1597.7515, "step": 600000 }, { "epoch": 34.19, "learning_rate": 6.58095346455797e-05, "loss": 1602.7075, "step": 601000 }, { "epoch": 34.25, "learning_rate": 6.575264535214472e-05, "loss": 1616.2401, "step": 602000 }, { "epoch": 34.3, "learning_rate": 6.569575605870975e-05, "loss": 1613.0471, "step": 603000 }, { "epoch": 34.36, "learning_rate": 6.563886676527477e-05, "loss": 1623.5351, "step": 604000 }, { "epoch": 34.42, "learning_rate": 6.55819774718398e-05, "loss": 1619.3749, "step": 605000 }, { "epoch": 34.47, "learning_rate": 6.552508817840482e-05, "loss": 1638.1735, "step": 606000 }, { "epoch": 34.53, "learning_rate": 6.546819888496986e-05, "loss": 1625.9651, "step": 607000 }, { "epoch": 34.59, "learning_rate": 6.541130959153488e-05, "loss": 1647.5083, "step": 608000 }, { "epoch": 34.65, "learning_rate": 6.535442029809991e-05, "loss": 1630.7306, "step": 609000 }, { "epoch": 34.7, "learning_rate": 6.529753100466493e-05, "loss": 1651.992, "step": 610000 }, { "epoch": 34.76, "learning_rate": 6.524064171122995e-05, "loss": 1647.7085, "step": 611000 }, { "epoch": 34.82, "learning_rate": 6.518375241779497e-05, "loss": 1639.407, "step": 612000 }, { "epoch": 34.87, "learning_rate": 6.512686312435999e-05, "loss": 1650.852, "step": 613000 }, { "epoch": 34.93, "learning_rate": 6.506997383092502e-05, "loss": 1634.9972, "step": 614000 }, { "epoch": 34.99, "learning_rate": 6.501308453749004e-05, "loss": 1646.3459, "step": 615000 }, { "epoch": 35.04, "learning_rate": 6.495619524405507e-05, "loss": 1594.2734, "step": 616000 }, { "epoch": 35.1, "learning_rate": 6.48993059506201e-05, "loss": 1594.5142, "step": 617000 }, { "epoch": 35.16, "learning_rate": 6.484241665718513e-05, "loss": 1602.5239, "step": 618000 }, { "epoch": 35.21, "learning_rate": 6.478552736375015e-05, "loss": 1597.584, "step": 619000 }, { "epoch": 35.27, "learning_rate": 6.472863807031518e-05, "loss": 1599.8529, "step": 620000 }, { "epoch": 35.33, "learning_rate": 6.46717487768802e-05, "loss": 1618.6385, "step": 621000 }, { "epoch": 35.39, "learning_rate": 6.461485948344522e-05, "loss": 1616.2439, "step": 622000 }, { "epoch": 35.44, "learning_rate": 6.455797019001024e-05, "loss": 1608.12, "step": 623000 }, { "epoch": 35.5, "learning_rate": 6.450108089657527e-05, "loss": 1613.9035, "step": 624000 }, { "epoch": 35.56, "learning_rate": 6.444419160314029e-05, "loss": 1631.4272, "step": 625000 }, { "epoch": 35.61, "learning_rate": 6.438730230970532e-05, "loss": 1609.869, "step": 626000 }, { "epoch": 35.67, "learning_rate": 6.433041301627034e-05, "loss": 1614.3354, "step": 627000 }, { "epoch": 35.73, "learning_rate": 6.427352372283536e-05, "loss": 1636.0874, "step": 628000 }, { "epoch": 35.78, "learning_rate": 6.42166344294004e-05, "loss": 1629.62, "step": 629000 }, { "epoch": 35.84, "learning_rate": 6.415974513596542e-05, "loss": 1642.625, "step": 630000 }, { "epoch": 35.9, "learning_rate": 6.410285584253043e-05, "loss": 1635.7355, "step": 631000 }, { "epoch": 35.95, "learning_rate": 6.404596654909545e-05, "loss": 1630.2351, "step": 632000 }, { "epoch": 36.01, "learning_rate": 6.398907725566049e-05, "loss": 1623.9576, "step": 633000 }, { "epoch": 36.07, "learning_rate": 6.39321879622255e-05, "loss": 1569.4096, "step": 634000 }, { "epoch": 36.12, "learning_rate": 6.387529866879054e-05, "loss": 1556.9552, "step": 635000 }, { "epoch": 36.18, "learning_rate": 6.381840937535556e-05, "loss": 1571.4336, "step": 636000 }, { "epoch": 36.24, "learning_rate": 6.376152008192059e-05, "loss": 1581.258, "step": 637000 }, { "epoch": 36.3, "learning_rate": 6.370463078848561e-05, "loss": 1589.5528, "step": 638000 }, { "epoch": 36.35, "learning_rate": 6.364774149505064e-05, "loss": 1599.3106, "step": 639000 }, { "epoch": 36.41, "learning_rate": 6.359085220161566e-05, "loss": 1603.8071, "step": 640000 }, { "epoch": 36.47, "learning_rate": 6.353396290818068e-05, "loss": 1603.186, "step": 641000 }, { "epoch": 36.52, "learning_rate": 6.34770736147457e-05, "loss": 1601.1141, "step": 642000 }, { "epoch": 36.58, "learning_rate": 6.342018432131072e-05, "loss": 1606.7013, "step": 643000 }, { "epoch": 36.64, "learning_rate": 6.336329502787576e-05, "loss": 1625.3829, "step": 644000 }, { "epoch": 36.69, "learning_rate": 6.330640573444078e-05, "loss": 1599.6351, "step": 645000 }, { "epoch": 36.75, "learning_rate": 6.324951644100581e-05, "loss": 1624.8943, "step": 646000 }, { "epoch": 36.81, "learning_rate": 6.319262714757083e-05, "loss": 1620.1544, "step": 647000 }, { "epoch": 36.86, "learning_rate": 6.313573785413586e-05, "loss": 1620.5376, "step": 648000 }, { "epoch": 36.92, "learning_rate": 6.307884856070088e-05, "loss": 1626.7378, "step": 649000 }, { "epoch": 36.98, "learning_rate": 6.30219592672659e-05, "loss": 1612.9253, "step": 650000 }, { "epoch": 37.03, "learning_rate": 6.296506997383092e-05, "loss": 1579.1456, "step": 651000 }, { "epoch": 37.09, "learning_rate": 6.290818068039595e-05, "loss": 1560.5031, "step": 652000 }, { "epoch": 37.15, "learning_rate": 6.285129138696097e-05, "loss": 1576.492, "step": 653000 }, { "epoch": 37.21, "learning_rate": 6.2794402093526e-05, "loss": 1582.5555, "step": 654000 }, { "epoch": 37.26, "learning_rate": 6.273751280009102e-05, "loss": 1571.104, "step": 655000 }, { "epoch": 37.32, "learning_rate": 6.268062350665606e-05, "loss": 1584.779, "step": 656000 }, { "epoch": 37.38, "learning_rate": 6.262373421322108e-05, "loss": 1606.5686, "step": 657000 }, { "epoch": 37.43, "learning_rate": 6.25668449197861e-05, "loss": 1582.1899, "step": 658000 }, { "epoch": 37.49, "learning_rate": 6.250995562635113e-05, "loss": 1580.6579, "step": 659000 }, { "epoch": 37.55, "learning_rate": 6.245306633291615e-05, "loss": 1576.8599, "step": 660000 }, { "epoch": 37.6, "learning_rate": 6.239617703948117e-05, "loss": 1607.7116, "step": 661000 }, { "epoch": 37.66, "learning_rate": 6.233928774604619e-05, "loss": 1602.7387, "step": 662000 }, { "epoch": 37.72, "learning_rate": 6.228239845261122e-05, "loss": 1619.6085, "step": 663000 }, { "epoch": 37.77, "learning_rate": 6.222550915917624e-05, "loss": 1625.4526, "step": 664000 }, { "epoch": 37.83, "learning_rate": 6.216861986574127e-05, "loss": 1621.509, "step": 665000 }, { "epoch": 37.89, "learning_rate": 6.21117305723063e-05, "loss": 1616.6885, "step": 666000 }, { "epoch": 37.95, "learning_rate": 6.205484127887133e-05, "loss": 1624.9085, "step": 667000 }, { "epoch": 38.0, "learning_rate": 6.199795198543635e-05, "loss": 1614.6681, "step": 668000 }, { "epoch": 38.06, "learning_rate": 6.194106269200138e-05, "loss": 1546.4807, "step": 669000 }, { "epoch": 38.12, "learning_rate": 6.188417339856639e-05, "loss": 1556.9278, "step": 670000 }, { "epoch": 38.17, "learning_rate": 6.182728410513142e-05, "loss": 1550.161, "step": 671000 }, { "epoch": 38.23, "learning_rate": 6.177039481169644e-05, "loss": 1570.524, "step": 672000 }, { "epoch": 38.29, "learning_rate": 6.171350551826146e-05, "loss": 1552.4574, "step": 673000 }, { "epoch": 38.34, "learning_rate": 6.165661622482649e-05, "loss": 1552.8044, "step": 674000 }, { "epoch": 38.4, "learning_rate": 6.159972693139151e-05, "loss": 1559.4571, "step": 675000 }, { "epoch": 38.46, "learning_rate": 6.154283763795654e-05, "loss": 1584.1924, "step": 676000 }, { "epoch": 38.51, "learning_rate": 6.148594834452156e-05, "loss": 1594.481, "step": 677000 }, { "epoch": 38.57, "learning_rate": 6.14290590510866e-05, "loss": 1600.3744, "step": 678000 }, { "epoch": 38.63, "learning_rate": 6.137216975765162e-05, "loss": 1573.6876, "step": 679000 }, { "epoch": 38.68, "learning_rate": 6.131528046421663e-05, "loss": 1580.4147, "step": 680000 }, { "epoch": 38.74, "learning_rate": 6.125839117078165e-05, "loss": 1600.7051, "step": 681000 }, { "epoch": 38.8, "learning_rate": 6.120150187734669e-05, "loss": 1605.3811, "step": 682000 }, { "epoch": 38.86, "learning_rate": 6.11446125839117e-05, "loss": 1594.6529, "step": 683000 }, { "epoch": 38.91, "learning_rate": 6.108772329047674e-05, "loss": 1602.4889, "step": 684000 }, { "epoch": 38.97, "learning_rate": 6.103083399704176e-05, "loss": 1606.3965, "step": 685000 }, { "epoch": 39.03, "learning_rate": 6.097394470360679e-05, "loss": 1564.6019, "step": 686000 }, { "epoch": 39.08, "learning_rate": 6.0917055410171805e-05, "loss": 1536.2535, "step": 687000 }, { "epoch": 39.14, "learning_rate": 6.0860166116736825e-05, "loss": 1532.7839, "step": 688000 }, { "epoch": 39.2, "learning_rate": 6.080327682330186e-05, "loss": 1548.0561, "step": 689000 }, { "epoch": 39.25, "learning_rate": 6.074638752986688e-05, "loss": 1543.2991, "step": 690000 }, { "epoch": 39.31, "learning_rate": 6.068949823643191e-05, "loss": 1555.8112, "step": 691000 }, { "epoch": 39.37, "learning_rate": 6.063260894299693e-05, "loss": 1565.0348, "step": 692000 }, { "epoch": 39.42, "learning_rate": 6.0575719649561956e-05, "loss": 1577.7809, "step": 693000 }, { "epoch": 39.48, "learning_rate": 6.0518830356126976e-05, "loss": 1577.4689, "step": 694000 }, { "epoch": 39.54, "learning_rate": 6.046194106269201e-05, "loss": 1566.3115, "step": 695000 }, { "epoch": 39.59, "learning_rate": 6.040505176925703e-05, "loss": 1575.2004, "step": 696000 }, { "epoch": 39.65, "learning_rate": 6.0348162475822054e-05, "loss": 1585.2444, "step": 697000 }, { "epoch": 39.71, "learning_rate": 6.0291273182387074e-05, "loss": 1584.2024, "step": 698000 }, { "epoch": 39.77, "learning_rate": 6.023438388895211e-05, "loss": 1574.2989, "step": 699000 }, { "epoch": 39.82, "learning_rate": 6.0177494595517127e-05, "loss": 1596.1741, "step": 700000 }, { "epoch": 39.88, "learning_rate": 6.012060530208215e-05, "loss": 1589.8946, "step": 701000 }, { "epoch": 39.94, "learning_rate": 6.006371600864717e-05, "loss": 1573.3869, "step": 702000 }, { "epoch": 39.99, "learning_rate": 6.0006826715212205e-05, "loss": 1600.8972, "step": 703000 }, { "epoch": 40.05, "learning_rate": 5.9949937421777225e-05, "loss": 1512.3105, "step": 704000 }, { "epoch": 40.11, "learning_rate": 5.9893048128342244e-05, "loss": 1537.8946, "step": 705000 }, { "epoch": 40.16, "learning_rate": 5.983615883490728e-05, "loss": 1531.6721, "step": 706000 }, { "epoch": 40.22, "learning_rate": 5.97792695414723e-05, "loss": 1535.2285, "step": 707000 }, { "epoch": 40.28, "learning_rate": 5.972238024803732e-05, "loss": 1541.8789, "step": 708000 }, { "epoch": 40.33, "learning_rate": 5.966549095460234e-05, "loss": 1551.8709, "step": 709000 }, { "epoch": 40.39, "learning_rate": 5.9608601661167376e-05, "loss": 1546.9776, "step": 710000 }, { "epoch": 40.45, "learning_rate": 5.9551712367732395e-05, "loss": 1565.2638, "step": 711000 }, { "epoch": 40.51, "learning_rate": 5.949482307429742e-05, "loss": 1552.4615, "step": 712000 }, { "epoch": 40.56, "learning_rate": 5.943793378086244e-05, "loss": 1566.013, "step": 713000 }, { "epoch": 40.62, "learning_rate": 5.9381044487427474e-05, "loss": 1563.5406, "step": 714000 }, { "epoch": 40.68, "learning_rate": 5.9324155193992494e-05, "loss": 1552.392, "step": 715000 }, { "epoch": 40.73, "learning_rate": 5.926726590055752e-05, "loss": 1571.383, "step": 716000 }, { "epoch": 40.79, "learning_rate": 5.921037660712254e-05, "loss": 1551.0039, "step": 717000 }, { "epoch": 40.85, "learning_rate": 5.915348731368757e-05, "loss": 1562.9479, "step": 718000 }, { "epoch": 40.9, "learning_rate": 5.909659802025259e-05, "loss": 1588.6915, "step": 719000 }, { "epoch": 40.96, "learning_rate": 5.903970872681761e-05, "loss": 1584.2642, "step": 720000 }, { "epoch": 41.02, "learning_rate": 5.898281943338264e-05, "loss": 1557.5122, "step": 721000 }, { "epoch": 41.07, "learning_rate": 5.892593013994766e-05, "loss": 1502.4213, "step": 722000 }, { "epoch": 41.13, "learning_rate": 5.886904084651269e-05, "loss": 1526.8101, "step": 723000 }, { "epoch": 41.19, "learning_rate": 5.881215155307771e-05, "loss": 1515.5841, "step": 724000 }, { "epoch": 41.24, "learning_rate": 5.875526225964274e-05, "loss": 1550.6167, "step": 725000 }, { "epoch": 41.3, "learning_rate": 5.869837296620776e-05, "loss": 1523.8431, "step": 726000 }, { "epoch": 41.36, "learning_rate": 5.864148367277279e-05, "loss": 1543.7703, "step": 727000 }, { "epoch": 41.42, "learning_rate": 5.858459437933781e-05, "loss": 1539.5497, "step": 728000 }, { "epoch": 41.47, "learning_rate": 5.852770508590284e-05, "loss": 1545.5635, "step": 729000 }, { "epoch": 41.53, "learning_rate": 5.847081579246786e-05, "loss": 1546.7719, "step": 730000 }, { "epoch": 41.59, "learning_rate": 5.841392649903289e-05, "loss": 1570.1741, "step": 731000 }, { "epoch": 41.64, "learning_rate": 5.835703720559791e-05, "loss": 1545.6298, "step": 732000 }, { "epoch": 41.7, "learning_rate": 5.830014791216294e-05, "loss": 1547.7866, "step": 733000 }, { "epoch": 41.76, "learning_rate": 5.824325861872796e-05, "loss": 1567.7844, "step": 734000 }, { "epoch": 41.81, "learning_rate": 5.818636932529298e-05, "loss": 1553.9446, "step": 735000 }, { "epoch": 41.87, "learning_rate": 5.8129480031858005e-05, "loss": 1562.8866, "step": 736000 }, { "epoch": 41.93, "learning_rate": 5.8072590738423025e-05, "loss": 1563.9419, "step": 737000 }, { "epoch": 41.98, "learning_rate": 5.801570144498806e-05, "loss": 1583.9055, "step": 738000 }, { "epoch": 42.04, "learning_rate": 5.795881215155308e-05, "loss": 1510.9761, "step": 739000 }, { "epoch": 42.1, "learning_rate": 5.790192285811811e-05, "loss": 1523.6974, "step": 740000 }, { "epoch": 42.15, "learning_rate": 5.784503356468313e-05, "loss": 1498.9704, "step": 741000 }, { "epoch": 42.21, "learning_rate": 5.7788144271248156e-05, "loss": 1521.0267, "step": 742000 }, { "epoch": 42.27, "learning_rate": 5.7731254977813176e-05, "loss": 1528.4972, "step": 743000 }, { "epoch": 42.33, "learning_rate": 5.767436568437821e-05, "loss": 1524.5999, "step": 744000 }, { "epoch": 42.38, "learning_rate": 5.761747639094323e-05, "loss": 1533.9181, "step": 745000 }, { "epoch": 42.44, "learning_rate": 5.7560587097508255e-05, "loss": 1533.992, "step": 746000 }, { "epoch": 42.5, "learning_rate": 5.7503697804073274e-05, "loss": 1536.9071, "step": 747000 }, { "epoch": 42.55, "learning_rate": 5.744680851063831e-05, "loss": 1542.6356, "step": 748000 }, { "epoch": 42.61, "learning_rate": 5.738991921720333e-05, "loss": 1542.659, "step": 749000 }, { "epoch": 42.67, "learning_rate": 5.7333029923768346e-05, "loss": 1532.437, "step": 750000 }, { "epoch": 42.72, "learning_rate": 5.727614063033337e-05, "loss": 1542.5241, "step": 751000 }, { "epoch": 42.78, "learning_rate": 5.721925133689839e-05, "loss": 1565.6498, "step": 752000 }, { "epoch": 42.84, "learning_rate": 5.7162362043463425e-05, "loss": 1556.6278, "step": 753000 }, { "epoch": 42.89, "learning_rate": 5.7105472750028445e-05, "loss": 1546.9685, "step": 754000 }, { "epoch": 42.95, "learning_rate": 5.704858345659347e-05, "loss": 1546.2209, "step": 755000 }, { "epoch": 43.01, "learning_rate": 5.699169416315849e-05, "loss": 1552.5761, "step": 756000 }, { "epoch": 43.07, "learning_rate": 5.693480486972352e-05, "loss": 1467.5445, "step": 757000 }, { "epoch": 43.12, "learning_rate": 5.687791557628854e-05, "loss": 1489.3265, "step": 758000 }, { "epoch": 43.18, "learning_rate": 5.6821026282853576e-05, "loss": 1503.7974, "step": 759000 }, { "epoch": 43.24, "learning_rate": 5.6764136989418595e-05, "loss": 1493.906, "step": 760000 }, { "epoch": 43.29, "learning_rate": 5.670724769598362e-05, "loss": 1521.7048, "step": 761000 }, { "epoch": 43.35, "learning_rate": 5.665035840254864e-05, "loss": 1531.1412, "step": 762000 }, { "epoch": 43.41, "learning_rate": 5.6593469109113674e-05, "loss": 1525.6994, "step": 763000 }, { "epoch": 43.46, "learning_rate": 5.6536579815678694e-05, "loss": 1526.8034, "step": 764000 }, { "epoch": 43.52, "learning_rate": 5.647969052224371e-05, "loss": 1521.6851, "step": 765000 }, { "epoch": 43.58, "learning_rate": 5.642280122880874e-05, "loss": 1533.6653, "step": 766000 }, { "epoch": 43.63, "learning_rate": 5.636591193537376e-05, "loss": 1531.0187, "step": 767000 }, { "epoch": 43.69, "learning_rate": 5.630902264193879e-05, "loss": 1536.7369, "step": 768000 }, { "epoch": 43.75, "learning_rate": 5.625213334850381e-05, "loss": 1545.8532, "step": 769000 }, { "epoch": 43.8, "learning_rate": 5.619524405506884e-05, "loss": 1538.3353, "step": 770000 }, { "epoch": 43.86, "learning_rate": 5.613835476163386e-05, "loss": 1529.6112, "step": 771000 }, { "epoch": 43.92, "learning_rate": 5.608146546819889e-05, "loss": 1538.066, "step": 772000 }, { "epoch": 43.98, "learning_rate": 5.602457617476391e-05, "loss": 1536.7104, "step": 773000 }, { "epoch": 44.03, "learning_rate": 5.596768688132894e-05, "loss": 1500.0411, "step": 774000 }, { "epoch": 44.09, "learning_rate": 5.591079758789396e-05, "loss": 1481.6396, "step": 775000 }, { "epoch": 44.15, "learning_rate": 5.585390829445899e-05, "loss": 1480.2336, "step": 776000 }, { "epoch": 44.2, "learning_rate": 5.579701900102401e-05, "loss": 1493.5156, "step": 777000 }, { "epoch": 44.26, "learning_rate": 5.574012970758904e-05, "loss": 1490.8755, "step": 778000 }, { "epoch": 44.32, "learning_rate": 5.568324041415406e-05, "loss": 1505.6116, "step": 779000 }, { "epoch": 44.37, "learning_rate": 5.562635112071908e-05, "loss": 1495.6506, "step": 780000 }, { "epoch": 44.43, "learning_rate": 5.556946182728411e-05, "loss": 1514.071, "step": 781000 }, { "epoch": 44.49, "learning_rate": 5.5512572533849126e-05, "loss": 1513.5575, "step": 782000 }, { "epoch": 44.54, "learning_rate": 5.545568324041416e-05, "loss": 1520.2201, "step": 783000 }, { "epoch": 44.6, "learning_rate": 5.539879394697918e-05, "loss": 1511.8255, "step": 784000 }, { "epoch": 44.66, "learning_rate": 5.5341904653544205e-05, "loss": 1522.2696, "step": 785000 }, { "epoch": 44.71, "learning_rate": 5.5285015360109225e-05, "loss": 1521.6124, "step": 786000 }, { "epoch": 44.77, "learning_rate": 5.522812606667426e-05, "loss": 1518.5965, "step": 787000 }, { "epoch": 44.83, "learning_rate": 5.517123677323928e-05, "loss": 1517.722, "step": 788000 }, { "epoch": 44.89, "learning_rate": 5.5114347479804304e-05, "loss": 1533.6879, "step": 789000 }, { "epoch": 44.94, "learning_rate": 5.505745818636932e-05, "loss": 1555.3634, "step": 790000 }, { "epoch": 45.0, "learning_rate": 5.5000568892934356e-05, "loss": 1540.248, "step": 791000 }, { "epoch": 45.06, "learning_rate": 5.4943679599499376e-05, "loss": 1473.8466, "step": 792000 }, { "epoch": 45.11, "learning_rate": 5.488679030606441e-05, "loss": 1493.7744, "step": 793000 }, { "epoch": 45.17, "learning_rate": 5.482990101262943e-05, "loss": 1473.4984, "step": 794000 }, { "epoch": 45.23, "learning_rate": 5.477301171919445e-05, "loss": 1478.2452, "step": 795000 }, { "epoch": 45.28, "learning_rate": 5.4716122425759474e-05, "loss": 1482.2044, "step": 796000 }, { "epoch": 45.34, "learning_rate": 5.4659233132324494e-05, "loss": 1492.7139, "step": 797000 }, { "epoch": 45.4, "learning_rate": 5.460234383888953e-05, "loss": 1483.7593, "step": 798000 }, { "epoch": 45.45, "learning_rate": 5.4545454545454546e-05, "loss": 1505.8969, "step": 799000 }, { "epoch": 45.51, "learning_rate": 5.448856525201957e-05, "loss": 1504.6569, "step": 800000 }, { "epoch": 45.57, "learning_rate": 5.443167595858459e-05, "loss": 1501.435, "step": 801000 }, { "epoch": 45.63, "learning_rate": 5.4374786665149625e-05, "loss": 1514.303, "step": 802000 }, { "epoch": 45.68, "learning_rate": 5.4317897371714645e-05, "loss": 1513.4491, "step": 803000 }, { "epoch": 45.74, "learning_rate": 5.426100807827967e-05, "loss": 1502.6437, "step": 804000 }, { "epoch": 45.8, "learning_rate": 5.420411878484469e-05, "loss": 1526.0077, "step": 805000 }, { "epoch": 45.85, "learning_rate": 5.4147229491409723e-05, "loss": 1515.0332, "step": 806000 }, { "epoch": 45.91, "learning_rate": 5.409034019797474e-05, "loss": 1530.0286, "step": 807000 }, { "epoch": 45.97, "learning_rate": 5.4033450904539776e-05, "loss": 1526.8981, "step": 808000 }, { "epoch": 46.02, "learning_rate": 5.3976561611104796e-05, "loss": 1485.1602, "step": 809000 }, { "epoch": 46.08, "learning_rate": 5.391967231766982e-05, "loss": 1461.5816, "step": 810000 }, { "epoch": 46.14, "learning_rate": 5.386278302423484e-05, "loss": 1469.5287, "step": 811000 }, { "epoch": 46.19, "learning_rate": 5.380589373079986e-05, "loss": 1458.5487, "step": 812000 }, { "epoch": 46.25, "learning_rate": 5.3749004437364894e-05, "loss": 1467.3724, "step": 813000 }, { "epoch": 46.31, "learning_rate": 5.3692115143929913e-05, "loss": 1469.522, "step": 814000 }, { "epoch": 46.36, "learning_rate": 5.363522585049494e-05, "loss": 1480.8531, "step": 815000 }, { "epoch": 46.42, "learning_rate": 5.357833655705996e-05, "loss": 1480.0546, "step": 816000 }, { "epoch": 46.48, "learning_rate": 5.352144726362499e-05, "loss": 1490.1096, "step": 817000 }, { "epoch": 46.54, "learning_rate": 5.346455797019001e-05, "loss": 1485.2406, "step": 818000 }, { "epoch": 46.59, "learning_rate": 5.340766867675504e-05, "loss": 1495.7885, "step": 819000 }, { "epoch": 46.65, "learning_rate": 5.335077938332006e-05, "loss": 1508.2609, "step": 820000 }, { "epoch": 46.71, "learning_rate": 5.329389008988509e-05, "loss": 1494.4761, "step": 821000 }, { "epoch": 46.76, "learning_rate": 5.323700079645011e-05, "loss": 1513.8735, "step": 822000 }, { "epoch": 46.82, "learning_rate": 5.3180111503015137e-05, "loss": 1529.6516, "step": 823000 }, { "epoch": 46.88, "learning_rate": 5.3123222209580156e-05, "loss": 1513.2785, "step": 824000 }, { "epoch": 46.93, "learning_rate": 5.306633291614519e-05, "loss": 1494.7416, "step": 825000 }, { "epoch": 46.99, "learning_rate": 5.300944362271021e-05, "loss": 1514.5505, "step": 826000 }, { "epoch": 47.05, "learning_rate": 5.295255432927523e-05, "loss": 1457.3135, "step": 827000 }, { "epoch": 47.1, "learning_rate": 5.289566503584026e-05, "loss": 1444.0734, "step": 828000 }, { "epoch": 47.16, "learning_rate": 5.283877574240528e-05, "loss": 1459.7475, "step": 829000 }, { "epoch": 47.22, "learning_rate": 5.278188644897031e-05, "loss": 1459.4211, "step": 830000 }, { "epoch": 47.28, "learning_rate": 5.2724997155535326e-05, "loss": 1482.1501, "step": 831000 }, { "epoch": 47.33, "learning_rate": 5.266810786210036e-05, "loss": 1472.1243, "step": 832000 }, { "epoch": 47.39, "learning_rate": 5.261121856866538e-05, "loss": 1474.7287, "step": 833000 }, { "epoch": 47.45, "learning_rate": 5.2554329275230405e-05, "loss": 1482.9671, "step": 834000 }, { "epoch": 47.5, "learning_rate": 5.2497439981795425e-05, "loss": 1485.8347, "step": 835000 }, { "epoch": 47.56, "learning_rate": 5.244055068836046e-05, "loss": 1491.3694, "step": 836000 }, { "epoch": 47.62, "learning_rate": 5.238366139492548e-05, "loss": 1482.9026, "step": 837000 }, { "epoch": 47.67, "learning_rate": 5.2326772101490504e-05, "loss": 1502.524, "step": 838000 }, { "epoch": 47.73, "learning_rate": 5.226988280805552e-05, "loss": 1492.4124, "step": 839000 }, { "epoch": 47.79, "learning_rate": 5.2212993514620556e-05, "loss": 1498.4682, "step": 840000 }, { "epoch": 47.84, "learning_rate": 5.2156104221185576e-05, "loss": 1508.4499, "step": 841000 }, { "epoch": 47.9, "learning_rate": 5.2099214927750595e-05, "loss": 1503.8618, "step": 842000 }, { "epoch": 47.96, "learning_rate": 5.204232563431563e-05, "loss": 1495.3409, "step": 843000 }, { "epoch": 48.01, "learning_rate": 5.198543634088064e-05, "loss": 1485.3988, "step": 844000 }, { "epoch": 48.07, "learning_rate": 5.1928547047445674e-05, "loss": 1436.3326, "step": 845000 }, { "epoch": 48.13, "learning_rate": 5.1871657754010694e-05, "loss": 1453.9624, "step": 846000 }, { "epoch": 48.19, "learning_rate": 5.181476846057573e-05, "loss": 1453.2616, "step": 847000 }, { "epoch": 48.24, "learning_rate": 5.1757879167140746e-05, "loss": 1450.347, "step": 848000 }, { "epoch": 48.3, "learning_rate": 5.170098987370577e-05, "loss": 1454.6437, "step": 849000 }, { "epoch": 48.36, "learning_rate": 5.164410058027079e-05, "loss": 1453.4771, "step": 850000 }, { "epoch": 48.41, "learning_rate": 5.1587211286835825e-05, "loss": 1474.8485, "step": 851000 }, { "epoch": 48.47, "learning_rate": 5.1530321993400845e-05, "loss": 1485.1433, "step": 852000 }, { "epoch": 48.53, "learning_rate": 5.147343269996587e-05, "loss": 1474.8222, "step": 853000 }, { "epoch": 48.58, "learning_rate": 5.141654340653089e-05, "loss": 1471.7504, "step": 854000 }, { "epoch": 48.64, "learning_rate": 5.1359654113095924e-05, "loss": 1469.6772, "step": 855000 }, { "epoch": 48.7, "learning_rate": 5.130276481966094e-05, "loss": 1480.4671, "step": 856000 }, { "epoch": 48.75, "learning_rate": 5.124587552622596e-05, "loss": 1477.0144, "step": 857000 }, { "epoch": 48.81, "learning_rate": 5.118898623279099e-05, "loss": 1485.199, "step": 858000 }, { "epoch": 48.87, "learning_rate": 5.113209693935601e-05, "loss": 1471.6212, "step": 859000 }, { "epoch": 48.92, "learning_rate": 5.107520764592104e-05, "loss": 1491.6975, "step": 860000 }, { "epoch": 48.98, "learning_rate": 5.101831835248606e-05, "loss": 1479.9576, "step": 861000 }, { "epoch": 49.04, "learning_rate": 5.0961429059051094e-05, "loss": 1442.6555, "step": 862000 }, { "epoch": 49.1, "learning_rate": 5.0904539765616114e-05, "loss": 1444.1623, "step": 863000 }, { "epoch": 49.15, "learning_rate": 5.084765047218114e-05, "loss": 1441.0291, "step": 864000 }, { "epoch": 49.21, "learning_rate": 5.079076117874616e-05, "loss": 1452.7593, "step": 865000 }, { "epoch": 49.27, "learning_rate": 5.073387188531119e-05, "loss": 1446.3483, "step": 866000 }, { "epoch": 49.32, "learning_rate": 5.067698259187621e-05, "loss": 1450.2395, "step": 867000 }, { "epoch": 49.38, "learning_rate": 5.062009329844124e-05, "loss": 1441.2845, "step": 868000 }, { "epoch": 49.44, "learning_rate": 5.056320400500626e-05, "loss": 1451.4144, "step": 869000 }, { "epoch": 49.49, "learning_rate": 5.050631471157129e-05, "loss": 1463.5928, "step": 870000 }, { "epoch": 49.55, "learning_rate": 5.044942541813631e-05, "loss": 1468.1669, "step": 871000 }, { "epoch": 49.61, "learning_rate": 5.039253612470133e-05, "loss": 1466.8811, "step": 872000 }, { "epoch": 49.66, "learning_rate": 5.0335646831266356e-05, "loss": 1457.2025, "step": 873000 }, { "epoch": 49.72, "learning_rate": 5.0278757537831376e-05, "loss": 1479.2574, "step": 874000 }, { "epoch": 49.78, "learning_rate": 5.022186824439641e-05, "loss": 1482.1643, "step": 875000 }, { "epoch": 49.84, "learning_rate": 5.016497895096143e-05, "loss": 1475.1512, "step": 876000 }, { "epoch": 49.89, "learning_rate": 5.010808965752646e-05, "loss": 1485.5673, "step": 877000 }, { "epoch": 49.95, "learning_rate": 5.0051200364091474e-05, "loss": 1477.8049, "step": 878000 }, { "epoch": 50.01, "learning_rate": 4.99943110706565e-05, "loss": 1457.2552, "step": 879000 }, { "epoch": 50.06, "learning_rate": 4.9937421777221527e-05, "loss": 1410.3422, "step": 880000 }, { "epoch": 50.12, "learning_rate": 4.988053248378655e-05, "loss": 1420.7601, "step": 881000 }, { "epoch": 50.18, "learning_rate": 4.982364319035158e-05, "loss": 1422.0394, "step": 882000 }, { "epoch": 50.23, "learning_rate": 4.97667538969166e-05, "loss": 1428.53, "step": 883000 }, { "epoch": 50.29, "learning_rate": 4.9709864603481625e-05, "loss": 1436.9594, "step": 884000 }, { "epoch": 50.35, "learning_rate": 4.965297531004665e-05, "loss": 1449.9593, "step": 885000 }, { "epoch": 50.4, "learning_rate": 4.959608601661168e-05, "loss": 1444.1273, "step": 886000 }, { "epoch": 50.46, "learning_rate": 4.9539196723176704e-05, "loss": 1451.4593, "step": 887000 }, { "epoch": 50.52, "learning_rate": 4.948230742974172e-05, "loss": 1460.7906, "step": 888000 }, { "epoch": 50.57, "learning_rate": 4.942541813630675e-05, "loss": 1460.6111, "step": 889000 }, { "epoch": 50.63, "learning_rate": 4.9368528842871776e-05, "loss": 1448.3555, "step": 890000 }, { "epoch": 50.69, "learning_rate": 4.93116395494368e-05, "loss": 1458.398, "step": 891000 }, { "epoch": 50.75, "learning_rate": 4.925475025600182e-05, "loss": 1450.1935, "step": 892000 }, { "epoch": 50.8, "learning_rate": 4.919786096256685e-05, "loss": 1462.2466, "step": 893000 }, { "epoch": 50.86, "learning_rate": 4.9140971669131874e-05, "loss": 1473.1418, "step": 894000 }, { "epoch": 50.92, "learning_rate": 4.9084082375696894e-05, "loss": 1460.9494, "step": 895000 }, { "epoch": 50.97, "learning_rate": 4.902719308226192e-05, "loss": 1486.814, "step": 896000 }, { "epoch": 51.03, "learning_rate": 4.8970303788826946e-05, "loss": 1431.3066, "step": 897000 }, { "epoch": 51.09, "learning_rate": 4.8913414495391966e-05, "loss": 1420.6391, "step": 898000 }, { "epoch": 51.14, "learning_rate": 4.885652520195699e-05, "loss": 1420.4321, "step": 899000 }, { "epoch": 51.2, "learning_rate": 4.879963590852202e-05, "loss": 1409.5859, "step": 900000 }, { "epoch": 51.26, "learning_rate": 4.8742746615087045e-05, "loss": 1425.5093, "step": 901000 }, { "epoch": 51.31, "learning_rate": 4.8685857321652064e-05, "loss": 1434.9921, "step": 902000 }, { "epoch": 51.37, "learning_rate": 4.862896802821709e-05, "loss": 1423.3864, "step": 903000 }, { "epoch": 51.43, "learning_rate": 4.857207873478212e-05, "loss": 1439.098, "step": 904000 }, { "epoch": 51.48, "learning_rate": 4.851518944134714e-05, "loss": 1440.4952, "step": 905000 }, { "epoch": 51.54, "learning_rate": 4.845830014791217e-05, "loss": 1438.2052, "step": 906000 }, { "epoch": 51.6, "learning_rate": 4.840141085447719e-05, "loss": 1457.4209, "step": 907000 }, { "epoch": 51.66, "learning_rate": 4.8344521561042215e-05, "loss": 1431.5081, "step": 908000 }, { "epoch": 51.71, "learning_rate": 4.828763226760724e-05, "loss": 1443.8136, "step": 909000 }, { "epoch": 51.77, "learning_rate": 4.823074297417226e-05, "loss": 1449.907, "step": 910000 }, { "epoch": 51.83, "learning_rate": 4.817385368073729e-05, "loss": 1447.7141, "step": 911000 }, { "epoch": 51.88, "learning_rate": 4.811696438730231e-05, "loss": 1464.1871, "step": 912000 }, { "epoch": 51.94, "learning_rate": 4.806007509386733e-05, "loss": 1460.0676, "step": 913000 }, { "epoch": 52.0, "learning_rate": 4.800318580043236e-05, "loss": 1462.8633, "step": 914000 }, { "epoch": 52.05, "learning_rate": 4.7946296506997386e-05, "loss": 1411.9011, "step": 915000 }, { "epoch": 52.11, "learning_rate": 4.788940721356241e-05, "loss": 1412.8915, "step": 916000 }, { "epoch": 52.17, "learning_rate": 4.783251792012743e-05, "loss": 1422.405, "step": 917000 }, { "epoch": 52.22, "learning_rate": 4.777562862669246e-05, "loss": 1408.673, "step": 918000 }, { "epoch": 52.28, "learning_rate": 4.7718739333257484e-05, "loss": 1420.5905, "step": 919000 }, { "epoch": 52.34, "learning_rate": 4.766185003982251e-05, "loss": 1423.8896, "step": 920000 }, { "epoch": 52.4, "learning_rate": 4.760496074638754e-05, "loss": 1419.6969, "step": 921000 }, { "epoch": 52.45, "learning_rate": 4.7548071452952556e-05, "loss": 1439.9423, "step": 922000 }, { "epoch": 52.51, "learning_rate": 4.749118215951758e-05, "loss": 1416.4144, "step": 923000 }, { "epoch": 52.57, "learning_rate": 4.743429286608261e-05, "loss": 1433.3589, "step": 924000 }, { "epoch": 52.62, "learning_rate": 4.737740357264763e-05, "loss": 1448.1575, "step": 925000 }, { "epoch": 52.68, "learning_rate": 4.7320514279212655e-05, "loss": 1434.969, "step": 926000 }, { "epoch": 52.74, "learning_rate": 4.7263624985777674e-05, "loss": 1435.409, "step": 927000 }, { "epoch": 52.79, "learning_rate": 4.72067356923427e-05, "loss": 1442.1536, "step": 928000 }, { "epoch": 52.85, "learning_rate": 4.714984639890773e-05, "loss": 1432.7261, "step": 929000 }, { "epoch": 52.91, "learning_rate": 4.709295710547275e-05, "loss": 1439.7834, "step": 930000 }, { "epoch": 52.96, "learning_rate": 4.703606781203778e-05, "loss": 1446.026, "step": 931000 }, { "epoch": 53.02, "learning_rate": 4.69791785186028e-05, "loss": 1419.8316, "step": 932000 }, { "epoch": 53.08, "learning_rate": 4.6922289225167825e-05, "loss": 1393.9454, "step": 933000 }, { "epoch": 53.13, "learning_rate": 4.686539993173285e-05, "loss": 1402.8833, "step": 934000 }, { "epoch": 53.19, "learning_rate": 4.680851063829788e-05, "loss": 1397.6079, "step": 935000 }, { "epoch": 53.25, "learning_rate": 4.67516213448629e-05, "loss": 1392.49, "step": 936000 }, { "epoch": 53.31, "learning_rate": 4.6694732051427923e-05, "loss": 1424.2674, "step": 937000 }, { "epoch": 53.36, "learning_rate": 4.663784275799295e-05, "loss": 1407.8266, "step": 938000 }, { "epoch": 53.42, "learning_rate": 4.6580953464557976e-05, "loss": 1414.4605, "step": 939000 }, { "epoch": 53.48, "learning_rate": 4.6524064171123e-05, "loss": 1417.0438, "step": 940000 }, { "epoch": 53.53, "learning_rate": 4.646717487768802e-05, "loss": 1414.4977, "step": 941000 }, { "epoch": 53.59, "learning_rate": 4.641028558425304e-05, "loss": 1425.8153, "step": 942000 }, { "epoch": 53.65, "learning_rate": 4.635339629081807e-05, "loss": 1419.5114, "step": 943000 }, { "epoch": 53.7, "learning_rate": 4.6296506997383094e-05, "loss": 1422.2062, "step": 944000 }, { "epoch": 53.76, "learning_rate": 4.623961770394812e-05, "loss": 1435.841, "step": 945000 }, { "epoch": 53.82, "learning_rate": 4.618272841051314e-05, "loss": 1429.7986, "step": 946000 }, { "epoch": 53.87, "learning_rate": 4.6125839117078166e-05, "loss": 1428.0028, "step": 947000 }, { "epoch": 53.93, "learning_rate": 4.606894982364319e-05, "loss": 1415.7939, "step": 948000 }, { "epoch": 53.99, "learning_rate": 4.601206053020822e-05, "loss": 1434.6161, "step": 949000 }, { "epoch": 54.04, "learning_rate": 4.5955171236773245e-05, "loss": 1406.5694, "step": 950000 }, { "epoch": 54.1, "learning_rate": 4.5898281943338264e-05, "loss": 1382.8934, "step": 951000 }, { "epoch": 54.16, "learning_rate": 4.584139264990329e-05, "loss": 1394.2203, "step": 952000 }, { "epoch": 54.22, "learning_rate": 4.578450335646832e-05, "loss": 1394.0786, "step": 953000 }, { "epoch": 54.27, "learning_rate": 4.572761406303334e-05, "loss": 1399.649, "step": 954000 }, { "epoch": 54.33, "learning_rate": 4.567072476959837e-05, "loss": 1396.0142, "step": 955000 }, { "epoch": 54.39, "learning_rate": 4.561383547616339e-05, "loss": 1406.9284, "step": 956000 }, { "epoch": 54.44, "learning_rate": 4.555694618272841e-05, "loss": 1396.1073, "step": 957000 }, { "epoch": 54.5, "learning_rate": 4.5500056889293435e-05, "loss": 1412.4086, "step": 958000 }, { "epoch": 54.56, "learning_rate": 4.544316759585846e-05, "loss": 1424.7387, "step": 959000 }, { "epoch": 54.61, "learning_rate": 4.538627830242349e-05, "loss": 1398.1517, "step": 960000 }, { "epoch": 54.67, "learning_rate": 4.532938900898851e-05, "loss": 1419.0332, "step": 961000 }, { "epoch": 54.73, "learning_rate": 4.527249971555353e-05, "loss": 1414.7771, "step": 962000 }, { "epoch": 54.78, "learning_rate": 4.521561042211856e-05, "loss": 1416.8347, "step": 963000 }, { "epoch": 54.84, "learning_rate": 4.5158721128683586e-05, "loss": 1418.8846, "step": 964000 }, { "epoch": 54.9, "learning_rate": 4.510183183524861e-05, "loss": 1410.0279, "step": 965000 }, { "epoch": 54.96, "learning_rate": 4.504494254181363e-05, "loss": 1407.854, "step": 966000 }, { "epoch": 55.01, "learning_rate": 4.498805324837866e-05, "loss": 1401.7722, "step": 967000 }, { "epoch": 55.07, "learning_rate": 4.4931163954943684e-05, "loss": 1381.5277, "step": 968000 }, { "epoch": 55.13, "learning_rate": 4.487427466150871e-05, "loss": 1384.3396, "step": 969000 }, { "epoch": 55.18, "learning_rate": 4.481738536807373e-05, "loss": 1384.875, "step": 970000 }, { "epoch": 55.24, "learning_rate": 4.476049607463875e-05, "loss": 1379.5448, "step": 971000 }, { "epoch": 55.3, "learning_rate": 4.4703606781203776e-05, "loss": 1391.3579, "step": 972000 }, { "epoch": 55.35, "learning_rate": 4.46467174877688e-05, "loss": 1377.1931, "step": 973000 }, { "epoch": 55.41, "learning_rate": 4.458982819433383e-05, "loss": 1395.6391, "step": 974000 }, { "epoch": 55.47, "learning_rate": 4.4532938900898855e-05, "loss": 1404.9155, "step": 975000 }, { "epoch": 55.52, "learning_rate": 4.4476049607463874e-05, "loss": 1395.3968, "step": 976000 }, { "epoch": 55.58, "learning_rate": 4.44191603140289e-05, "loss": 1403.308, "step": 977000 }, { "epoch": 55.64, "learning_rate": 4.436227102059393e-05, "loss": 1383.9726, "step": 978000 }, { "epoch": 55.69, "learning_rate": 4.430538172715895e-05, "loss": 1399.5193, "step": 979000 }, { "epoch": 55.75, "learning_rate": 4.424849243372397e-05, "loss": 1399.5634, "step": 980000 }, { "epoch": 55.81, "learning_rate": 4.4191603140289e-05, "loss": 1405.5265, "step": 981000 }, { "epoch": 55.87, "learning_rate": 4.4134713846854025e-05, "loss": 1411.227, "step": 982000 }, { "epoch": 55.92, "learning_rate": 4.407782455341905e-05, "loss": 1433.6602, "step": 983000 }, { "epoch": 55.98, "learning_rate": 4.402093525998408e-05, "loss": 1429.4426, "step": 984000 }, { "epoch": 56.04, "learning_rate": 4.39640459665491e-05, "loss": 1378.7529, "step": 985000 }, { "epoch": 56.09, "learning_rate": 4.390715667311412e-05, "loss": 1373.8057, "step": 986000 }, { "epoch": 56.15, "learning_rate": 4.385026737967914e-05, "loss": 1376.4955, "step": 987000 }, { "epoch": 56.21, "learning_rate": 4.379337808624417e-05, "loss": 1368.9189, "step": 988000 }, { "epoch": 56.26, "learning_rate": 4.3736488792809196e-05, "loss": 1369.0187, "step": 989000 }, { "epoch": 56.32, "learning_rate": 4.367959949937422e-05, "loss": 1367.7469, "step": 990000 }, { "epoch": 56.38, "learning_rate": 4.362271020593924e-05, "loss": 1385.1408, "step": 991000 }, { "epoch": 56.43, "learning_rate": 4.356582091250427e-05, "loss": 1392.7776, "step": 992000 }, { "epoch": 56.49, "learning_rate": 4.3508931619069294e-05, "loss": 1385.3786, "step": 993000 }, { "epoch": 56.55, "learning_rate": 4.345204232563432e-05, "loss": 1384.116, "step": 994000 }, { "epoch": 56.6, "learning_rate": 4.339515303219934e-05, "loss": 1390.9349, "step": 995000 }, { "epoch": 56.66, "learning_rate": 4.3338263738764366e-05, "loss": 1384.5113, "step": 996000 }, { "epoch": 56.72, "learning_rate": 4.328137444532939e-05, "loss": 1409.7151, "step": 997000 }, { "epoch": 56.78, "learning_rate": 4.322448515189442e-05, "loss": 1392.9444, "step": 998000 }, { "epoch": 56.83, "learning_rate": 4.3167595858459445e-05, "loss": 1409.5379, "step": 999000 }, { "epoch": 56.89, "learning_rate": 4.3110706565024464e-05, "loss": 1411.7393, "step": 1000000 }, { "epoch": 56.95, "learning_rate": 4.305381727158949e-05, "loss": 1410.5654, "step": 1001000 }, { "epoch": 57.0, "learning_rate": 4.299692797815451e-05, "loss": 1403.6015, "step": 1002000 }, { "epoch": 57.06, "learning_rate": 4.2940038684719537e-05, "loss": 1340.4456, "step": 1003000 }, { "epoch": 57.12, "learning_rate": 4.288314939128456e-05, "loss": 1355.5106, "step": 1004000 }, { "epoch": 57.17, "learning_rate": 4.282626009784958e-05, "loss": 1349.326, "step": 1005000 }, { "epoch": 57.23, "learning_rate": 4.276937080441461e-05, "loss": 1367.2814, "step": 1006000 }, { "epoch": 57.29, "learning_rate": 4.2712481510979635e-05, "loss": 1368.0407, "step": 1007000 }, { "epoch": 57.34, "learning_rate": 4.265559221754466e-05, "loss": 1371.5805, "step": 1008000 }, { "epoch": 57.4, "learning_rate": 4.259870292410969e-05, "loss": 1391.3825, "step": 1009000 }, { "epoch": 57.46, "learning_rate": 4.254181363067471e-05, "loss": 1386.9325, "step": 1010000 }, { "epoch": 57.52, "learning_rate": 4.248492433723973e-05, "loss": 1384.6933, "step": 1011000 }, { "epoch": 57.57, "learning_rate": 4.242803504380476e-05, "loss": 1379.2609, "step": 1012000 }, { "epoch": 57.63, "learning_rate": 4.2371145750369786e-05, "loss": 1391.7539, "step": 1013000 }, { "epoch": 57.69, "learning_rate": 4.231425645693481e-05, "loss": 1390.128, "step": 1014000 }, { "epoch": 57.74, "learning_rate": 4.225736716349983e-05, "loss": 1395.4465, "step": 1015000 }, { "epoch": 57.8, "learning_rate": 4.220047787006486e-05, "loss": 1398.9347, "step": 1016000 }, { "epoch": 57.86, "learning_rate": 4.214358857662988e-05, "loss": 1386.4813, "step": 1017000 }, { "epoch": 57.91, "learning_rate": 4.2086699283194904e-05, "loss": 1376.5611, "step": 1018000 }, { "epoch": 57.97, "learning_rate": 4.202980998975993e-05, "loss": 1395.9813, "step": 1019000 }, { "epoch": 58.03, "learning_rate": 4.197292069632495e-05, "loss": 1372.5131, "step": 1020000 }, { "epoch": 58.08, "learning_rate": 4.1916031402889976e-05, "loss": 1340.152, "step": 1021000 }, { "epoch": 58.14, "learning_rate": 4.1859142109455e-05, "loss": 1356.3731, "step": 1022000 }, { "epoch": 58.2, "learning_rate": 4.180225281602003e-05, "loss": 1350.6039, "step": 1023000 }, { "epoch": 58.25, "learning_rate": 4.1745363522585055e-05, "loss": 1342.336, "step": 1024000 }, { "epoch": 58.31, "learning_rate": 4.1688474229150074e-05, "loss": 1367.6961, "step": 1025000 }, { "epoch": 58.37, "learning_rate": 4.16315849357151e-05, "loss": 1378.291, "step": 1026000 }, { "epoch": 58.43, "learning_rate": 4.157469564228013e-05, "loss": 1368.3586, "step": 1027000 }, { "epoch": 58.48, "learning_rate": 4.151780634884515e-05, "loss": 1380.9392, "step": 1028000 }, { "epoch": 58.54, "learning_rate": 4.146091705541017e-05, "loss": 1353.5531, "step": 1029000 }, { "epoch": 58.6, "learning_rate": 4.14040277619752e-05, "loss": 1359.1265, "step": 1030000 }, { "epoch": 58.65, "learning_rate": 4.1347138468540225e-05, "loss": 1371.6984, "step": 1031000 }, { "epoch": 58.71, "learning_rate": 4.1290249175105245e-05, "loss": 1383.8718, "step": 1032000 }, { "epoch": 58.77, "learning_rate": 4.123335988167027e-05, "loss": 1376.6243, "step": 1033000 }, { "epoch": 58.82, "learning_rate": 4.11764705882353e-05, "loss": 1368.1823, "step": 1034000 }, { "epoch": 58.88, "learning_rate": 4.111958129480032e-05, "loss": 1378.275, "step": 1035000 }, { "epoch": 58.94, "learning_rate": 4.106269200136534e-05, "loss": 1394.8345, "step": 1036000 }, { "epoch": 58.99, "learning_rate": 4.100580270793037e-05, "loss": 1374.6516, "step": 1037000 }, { "epoch": 59.05, "learning_rate": 4.0948913414495396e-05, "loss": 1358.6837, "step": 1038000 }, { "epoch": 59.11, "learning_rate": 4.0892024121060415e-05, "loss": 1331.7564, "step": 1039000 }, { "epoch": 59.16, "learning_rate": 4.083513482762544e-05, "loss": 1339.3138, "step": 1040000 }, { "epoch": 59.22, "learning_rate": 4.077824553419047e-05, "loss": 1354.2134, "step": 1041000 }, { "epoch": 59.28, "learning_rate": 4.0721356240755494e-05, "loss": 1355.7516, "step": 1042000 }, { "epoch": 59.34, "learning_rate": 4.066446694732052e-05, "loss": 1348.532, "step": 1043000 }, { "epoch": 59.39, "learning_rate": 4.060757765388554e-05, "loss": 1359.355, "step": 1044000 }, { "epoch": 59.45, "learning_rate": 4.0550688360450566e-05, "loss": 1367.0232, "step": 1045000 }, { "epoch": 59.51, "learning_rate": 4.049379906701559e-05, "loss": 1362.8901, "step": 1046000 }, { "epoch": 59.56, "learning_rate": 4.043690977358062e-05, "loss": 1365.8005, "step": 1047000 }, { "epoch": 59.62, "learning_rate": 4.038002048014564e-05, "loss": 1367.3115, "step": 1048000 }, { "epoch": 59.68, "learning_rate": 4.032313118671066e-05, "loss": 1358.7971, "step": 1049000 }, { "epoch": 59.73, "learning_rate": 4.0266241893275684e-05, "loss": 1369.9351, "step": 1050000 }, { "epoch": 59.79, "learning_rate": 4.020935259984071e-05, "loss": 1381.4084, "step": 1051000 }, { "epoch": 59.85, "learning_rate": 4.0152463306405737e-05, "loss": 1364.0674, "step": 1052000 }, { "epoch": 59.9, "learning_rate": 4.009557401297076e-05, "loss": 1360.6619, "step": 1053000 }, { "epoch": 59.96, "learning_rate": 4.003868471953578e-05, "loss": 1360.3134, "step": 1054000 }, { "epoch": 60.02, "learning_rate": 3.998179542610081e-05, "loss": 1353.1028, "step": 1055000 }, { "epoch": 60.08, "learning_rate": 3.9924906132665835e-05, "loss": 1330.2395, "step": 1056000 }, { "epoch": 60.13, "learning_rate": 3.986801683923086e-05, "loss": 1335.7539, "step": 1057000 }, { "epoch": 60.19, "learning_rate": 3.981112754579589e-05, "loss": 1343.1043, "step": 1058000 }, { "epoch": 60.25, "learning_rate": 3.975423825236091e-05, "loss": 1356.7641, "step": 1059000 }, { "epoch": 60.3, "learning_rate": 3.969734895892593e-05, "loss": 1344.7671, "step": 1060000 }, { "epoch": 60.36, "learning_rate": 3.964045966549096e-05, "loss": 1352.5415, "step": 1061000 }, { "epoch": 60.42, "learning_rate": 3.9583570372055986e-05, "loss": 1342.5885, "step": 1062000 }, { "epoch": 60.47, "learning_rate": 3.9526681078621005e-05, "loss": 1342.65, "step": 1063000 }, { "epoch": 60.53, "learning_rate": 3.9469791785186025e-05, "loss": 1334.8052, "step": 1064000 }, { "epoch": 60.59, "learning_rate": 3.941290249175105e-05, "loss": 1345.5519, "step": 1065000 }, { "epoch": 60.64, "learning_rate": 3.935601319831608e-05, "loss": 1364.7105, "step": 1066000 }, { "epoch": 60.7, "learning_rate": 3.9299123904881104e-05, "loss": 1347.3367, "step": 1067000 }, { "epoch": 60.76, "learning_rate": 3.924223461144613e-05, "loss": 1364.7013, "step": 1068000 }, { "epoch": 60.81, "learning_rate": 3.918534531801115e-05, "loss": 1361.9801, "step": 1069000 }, { "epoch": 60.87, "learning_rate": 3.9128456024576176e-05, "loss": 1362.5226, "step": 1070000 }, { "epoch": 60.93, "learning_rate": 3.90715667311412e-05, "loss": 1362.4665, "step": 1071000 }, { "epoch": 60.99, "learning_rate": 3.901467743770623e-05, "loss": 1362.1824, "step": 1072000 }, { "epoch": 61.04, "learning_rate": 3.895778814427125e-05, "loss": 1348.9848, "step": 1073000 }, { "epoch": 61.1, "learning_rate": 3.8900898850836274e-05, "loss": 1322.9893, "step": 1074000 }, { "epoch": 61.16, "learning_rate": 3.88440095574013e-05, "loss": 1320.145, "step": 1075000 }, { "epoch": 61.21, "learning_rate": 3.878712026396633e-05, "loss": 1327.8551, "step": 1076000 }, { "epoch": 61.27, "learning_rate": 3.873023097053135e-05, "loss": 1327.2493, "step": 1077000 }, { "epoch": 61.33, "learning_rate": 3.867334167709637e-05, "loss": 1340.0026, "step": 1078000 }, { "epoch": 61.38, "learning_rate": 3.861645238366139e-05, "loss": 1322.1997, "step": 1079000 }, { "epoch": 61.44, "learning_rate": 3.855956309022642e-05, "loss": 1338.1894, "step": 1080000 }, { "epoch": 61.5, "learning_rate": 3.8502673796791445e-05, "loss": 1336.8322, "step": 1081000 }, { "epoch": 61.55, "learning_rate": 3.844578450335647e-05, "loss": 1347.628, "step": 1082000 }, { "epoch": 61.61, "learning_rate": 3.838889520992149e-05, "loss": 1349.1724, "step": 1083000 }, { "epoch": 61.67, "learning_rate": 3.833200591648652e-05, "loss": 1349.1363, "step": 1084000 }, { "epoch": 61.72, "learning_rate": 3.827511662305154e-05, "loss": 1354.0266, "step": 1085000 }, { "epoch": 61.78, "learning_rate": 3.821822732961657e-05, "loss": 1350.4012, "step": 1086000 }, { "epoch": 61.84, "learning_rate": 3.8161338036181596e-05, "loss": 1335.4919, "step": 1087000 }, { "epoch": 61.9, "learning_rate": 3.8104448742746615e-05, "loss": 1349.753, "step": 1088000 }, { "epoch": 61.95, "learning_rate": 3.804755944931164e-05, "loss": 1356.8377, "step": 1089000 }, { "epoch": 62.01, "learning_rate": 3.799067015587667e-05, "loss": 1350.5734, "step": 1090000 }, { "epoch": 62.07, "learning_rate": 3.7933780862441694e-05, "loss": 1309.3285, "step": 1091000 }, { "epoch": 62.12, "learning_rate": 3.787689156900672e-05, "loss": 1318.706, "step": 1092000 }, { "epoch": 62.18, "learning_rate": 3.782000227557173e-05, "loss": 1326.3741, "step": 1093000 }, { "epoch": 62.24, "learning_rate": 3.776311298213676e-05, "loss": 1318.908, "step": 1094000 }, { "epoch": 62.29, "learning_rate": 3.7706223688701786e-05, "loss": 1331.3244, "step": 1095000 }, { "epoch": 62.35, "learning_rate": 3.764933439526681e-05, "loss": 1325.7211, "step": 1096000 }, { "epoch": 62.41, "learning_rate": 3.759244510183184e-05, "loss": 1328.6514, "step": 1097000 }, { "epoch": 62.46, "learning_rate": 3.753555580839686e-05, "loss": 1338.5616, "step": 1098000 }, { "epoch": 62.52, "learning_rate": 3.7478666514961884e-05, "loss": 1319.0344, "step": 1099000 }, { "epoch": 62.58, "learning_rate": 3.742177722152691e-05, "loss": 1337.191, "step": 1100000 }, { "epoch": 62.64, "learning_rate": 3.736488792809194e-05, "loss": 1327.8355, "step": 1101000 }, { "epoch": 62.69, "learning_rate": 3.730799863465696e-05, "loss": 1325.3075, "step": 1102000 }, { "epoch": 62.75, "learning_rate": 3.725110934122198e-05, "loss": 1336.0187, "step": 1103000 }, { "epoch": 62.81, "learning_rate": 3.719422004778701e-05, "loss": 1346.2621, "step": 1104000 }, { "epoch": 62.86, "learning_rate": 3.7137330754352035e-05, "loss": 1334.7985, "step": 1105000 }, { "epoch": 62.92, "learning_rate": 3.708044146091706e-05, "loss": 1343.435, "step": 1106000 }, { "epoch": 62.98, "learning_rate": 3.702355216748208e-05, "loss": 1341.8235, "step": 1107000 }, { "epoch": 63.03, "learning_rate": 3.696666287404711e-05, "loss": 1306.4944, "step": 1108000 }, { "epoch": 63.09, "learning_rate": 3.690977358061213e-05, "loss": 1305.8802, "step": 1109000 }, { "epoch": 63.15, "learning_rate": 3.685288428717715e-05, "loss": 1311.6237, "step": 1110000 }, { "epoch": 63.2, "learning_rate": 3.679599499374218e-05, "loss": 1309.0452, "step": 1111000 }, { "epoch": 63.26, "learning_rate": 3.6739105700307206e-05, "loss": 1319.2765, "step": 1112000 }, { "epoch": 63.32, "learning_rate": 3.6682216406872225e-05, "loss": 1307.698, "step": 1113000 }, { "epoch": 63.37, "learning_rate": 3.662532711343725e-05, "loss": 1321.2332, "step": 1114000 }, { "epoch": 63.43, "learning_rate": 3.656843782000228e-05, "loss": 1325.2036, "step": 1115000 }, { "epoch": 63.49, "learning_rate": 3.6511548526567304e-05, "loss": 1309.7259, "step": 1116000 }, { "epoch": 63.55, "learning_rate": 3.6454659233132323e-05, "loss": 1309.4072, "step": 1117000 }, { "epoch": 63.6, "learning_rate": 3.639776993969735e-05, "loss": 1324.8522, "step": 1118000 }, { "epoch": 63.66, "learning_rate": 3.6340880646262376e-05, "loss": 1333.5303, "step": 1119000 }, { "epoch": 63.72, "learning_rate": 3.62839913528274e-05, "loss": 1339.0274, "step": 1120000 }, { "epoch": 63.77, "learning_rate": 3.622710205939243e-05, "loss": 1330.9017, "step": 1121000 }, { "epoch": 63.83, "learning_rate": 3.617021276595745e-05, "loss": 1339.8504, "step": 1122000 }, { "epoch": 63.89, "learning_rate": 3.6113323472522474e-05, "loss": 1340.6674, "step": 1123000 }, { "epoch": 63.94, "learning_rate": 3.6056434179087494e-05, "loss": 1323.5593, "step": 1124000 }, { "epoch": 64.0, "learning_rate": 3.599954488565252e-05, "loss": 1326.9607, "step": 1125000 }, { "epoch": 64.06, "learning_rate": 3.5942655592217546e-05, "loss": 1294.2585, "step": 1126000 }, { "epoch": 64.11, "learning_rate": 3.5885766298782566e-05, "loss": 1306.5483, "step": 1127000 }, { "epoch": 64.17, "learning_rate": 3.582887700534759e-05, "loss": 1312.2281, "step": 1128000 }, { "epoch": 64.23, "learning_rate": 3.577198771191262e-05, "loss": 1295.1591, "step": 1129000 }, { "epoch": 64.28, "learning_rate": 3.5715098418477645e-05, "loss": 1313.7513, "step": 1130000 }, { "epoch": 64.34, "learning_rate": 3.565820912504267e-05, "loss": 1301.2609, "step": 1131000 }, { "epoch": 64.4, "learning_rate": 3.560131983160769e-05, "loss": 1310.5661, "step": 1132000 }, { "epoch": 64.46, "learning_rate": 3.554443053817272e-05, "loss": 1311.9871, "step": 1133000 }, { "epoch": 64.51, "learning_rate": 3.548754124473774e-05, "loss": 1311.7386, "step": 1134000 }, { "epoch": 64.57, "learning_rate": 3.543065195130277e-05, "loss": 1305.1498, "step": 1135000 }, { "epoch": 64.63, "learning_rate": 3.5373762657867796e-05, "loss": 1308.403, "step": 1136000 }, { "epoch": 64.68, "learning_rate": 3.5316873364432815e-05, "loss": 1321.9365, "step": 1137000 }, { "epoch": 64.74, "learning_rate": 3.525998407099784e-05, "loss": 1312.642, "step": 1138000 }, { "epoch": 64.8, "learning_rate": 3.520309477756286e-05, "loss": 1313.0409, "step": 1139000 }, { "epoch": 64.85, "learning_rate": 3.514620548412789e-05, "loss": 1326.3134, "step": 1140000 }, { "epoch": 64.91, "learning_rate": 3.5089316190692914e-05, "loss": 1309.1301, "step": 1141000 }, { "epoch": 64.97, "learning_rate": 3.503242689725793e-05, "loss": 1325.8679, "step": 1142000 }, { "epoch": 65.02, "learning_rate": 3.497553760382296e-05, "loss": 1300.7217, "step": 1143000 }, { "epoch": 65.08, "learning_rate": 3.4918648310387986e-05, "loss": 1287.5244, "step": 1144000 }, { "epoch": 65.14, "learning_rate": 3.486175901695301e-05, "loss": 1299.4445, "step": 1145000 }, { "epoch": 65.2, "learning_rate": 3.480486972351804e-05, "loss": 1295.0219, "step": 1146000 }, { "epoch": 65.25, "learning_rate": 3.474798043008306e-05, "loss": 1302.362, "step": 1147000 }, { "epoch": 65.31, "learning_rate": 3.4691091136648084e-05, "loss": 1286.3095, "step": 1148000 }, { "epoch": 65.37, "learning_rate": 3.463420184321311e-05, "loss": 1296.6343, "step": 1149000 }, { "epoch": 65.42, "learning_rate": 3.457731254977814e-05, "loss": 1311.9636, "step": 1150000 }, { "epoch": 65.48, "learning_rate": 3.4520423256343156e-05, "loss": 1297.0695, "step": 1151000 }, { "epoch": 65.54, "learning_rate": 3.446353396290818e-05, "loss": 1316.2928, "step": 1152000 }, { "epoch": 65.59, "learning_rate": 3.440664466947321e-05, "loss": 1302.785, "step": 1153000 }, { "epoch": 65.65, "learning_rate": 3.4349755376038235e-05, "loss": 1317.1405, "step": 1154000 }, { "epoch": 65.71, "learning_rate": 3.4292866082603255e-05, "loss": 1312.062, "step": 1155000 }, { "epoch": 65.76, "learning_rate": 3.423597678916828e-05, "loss": 1322.3881, "step": 1156000 }, { "epoch": 65.82, "learning_rate": 3.41790874957333e-05, "loss": 1303.9627, "step": 1157000 }, { "epoch": 65.88, "learning_rate": 3.412219820229833e-05, "loss": 1300.6221, "step": 1158000 }, { "epoch": 65.93, "learning_rate": 3.406530890886335e-05, "loss": 1297.754, "step": 1159000 }, { "epoch": 65.99, "learning_rate": 3.400841961542838e-05, "loss": 1325.5806, "step": 1160000 }, { "epoch": 66.05, "learning_rate": 3.39515303219934e-05, "loss": 1291.2976, "step": 1161000 }, { "epoch": 66.11, "learning_rate": 3.3894641028558425e-05, "loss": 1272.9427, "step": 1162000 }, { "epoch": 66.16, "learning_rate": 3.383775173512345e-05, "loss": 1269.1419, "step": 1163000 }, { "epoch": 66.22, "learning_rate": 3.378086244168848e-05, "loss": 1283.7979, "step": 1164000 }, { "epoch": 66.28, "learning_rate": 3.3723973148253504e-05, "loss": 1279.9143, "step": 1165000 }, { "epoch": 66.33, "learning_rate": 3.3667083854818524e-05, "loss": 1287.2928, "step": 1166000 }, { "epoch": 66.39, "learning_rate": 3.361019456138355e-05, "loss": 1286.7836, "step": 1167000 }, { "epoch": 66.45, "learning_rate": 3.3553305267948576e-05, "loss": 1315.1875, "step": 1168000 }, { "epoch": 66.5, "learning_rate": 3.34964159745136e-05, "loss": 1289.2281, "step": 1169000 }, { "epoch": 66.56, "learning_rate": 3.343952668107862e-05, "loss": 1294.26, "step": 1170000 }, { "epoch": 66.62, "learning_rate": 3.338263738764365e-05, "loss": 1309.1308, "step": 1171000 }, { "epoch": 66.67, "learning_rate": 3.332574809420867e-05, "loss": 1301.9129, "step": 1172000 }, { "epoch": 66.73, "learning_rate": 3.3268858800773694e-05, "loss": 1293.2513, "step": 1173000 }, { "epoch": 66.79, "learning_rate": 3.321196950733872e-05, "loss": 1289.4241, "step": 1174000 }, { "epoch": 66.84, "learning_rate": 3.3155080213903747e-05, "loss": 1290.7513, "step": 1175000 }, { "epoch": 66.9, "learning_rate": 3.3098190920468766e-05, "loss": 1299.1079, "step": 1176000 }, { "epoch": 66.96, "learning_rate": 3.304130162703379e-05, "loss": 1302.9189, "step": 1177000 }, { "epoch": 67.02, "learning_rate": 3.298441233359882e-05, "loss": 1291.1761, "step": 1178000 }, { "epoch": 67.07, "learning_rate": 3.2927523040163845e-05, "loss": 1274.8294, "step": 1179000 }, { "epoch": 67.13, "learning_rate": 3.287063374672887e-05, "loss": 1268.2645, "step": 1180000 }, { "epoch": 67.19, "learning_rate": 3.281374445329389e-05, "loss": 1275.567, "step": 1181000 }, { "epoch": 67.24, "learning_rate": 3.275685515985892e-05, "loss": 1278.2656, "step": 1182000 }, { "epoch": 67.3, "learning_rate": 3.269996586642394e-05, "loss": 1280.1092, "step": 1183000 }, { "epoch": 67.36, "learning_rate": 3.264307657298897e-05, "loss": 1265.5366, "step": 1184000 }, { "epoch": 67.41, "learning_rate": 3.258618727955399e-05, "loss": 1277.3436, "step": 1185000 }, { "epoch": 67.47, "learning_rate": 3.252929798611901e-05, "loss": 1290.6669, "step": 1186000 }, { "epoch": 67.53, "learning_rate": 3.2472408692684035e-05, "loss": 1294.7142, "step": 1187000 }, { "epoch": 67.58, "learning_rate": 3.241551939924906e-05, "loss": 1288.9499, "step": 1188000 }, { "epoch": 67.64, "learning_rate": 3.235863010581409e-05, "loss": 1282.4047, "step": 1189000 }, { "epoch": 67.7, "learning_rate": 3.2301740812379114e-05, "loss": 1283.248, "step": 1190000 }, { "epoch": 67.76, "learning_rate": 3.224485151894413e-05, "loss": 1292.0804, "step": 1191000 }, { "epoch": 67.81, "learning_rate": 3.218796222550916e-05, "loss": 1287.3724, "step": 1192000 }, { "epoch": 67.87, "learning_rate": 3.2131072932074186e-05, "loss": 1299.4706, "step": 1193000 }, { "epoch": 67.93, "learning_rate": 3.207418363863921e-05, "loss": 1289.2519, "step": 1194000 }, { "epoch": 67.98, "learning_rate": 3.201729434520424e-05, "loss": 1286.8601, "step": 1195000 }, { "epoch": 68.04, "learning_rate": 3.196040505176926e-05, "loss": 1266.1305, "step": 1196000 }, { "epoch": 68.1, "learning_rate": 3.1903515758334284e-05, "loss": 1254.9997, "step": 1197000 }, { "epoch": 68.15, "learning_rate": 3.184662646489931e-05, "loss": 1268.4014, "step": 1198000 }, { "epoch": 68.21, "learning_rate": 3.178973717146434e-05, "loss": 1261.417, "step": 1199000 }, { "epoch": 68.27, "learning_rate": 3.1732847878029356e-05, "loss": 1269.7135, "step": 1200000 }, { "epoch": 68.32, "learning_rate": 3.1675958584594376e-05, "loss": 1275.4537, "step": 1201000 }, { "epoch": 68.38, "learning_rate": 3.16190692911594e-05, "loss": 1264.121, "step": 1202000 }, { "epoch": 68.44, "learning_rate": 3.156217999772443e-05, "loss": 1263.4568, "step": 1203000 }, { "epoch": 68.49, "learning_rate": 3.1505290704289455e-05, "loss": 1283.5248, "step": 1204000 }, { "epoch": 68.55, "learning_rate": 3.144840141085448e-05, "loss": 1268.8824, "step": 1205000 }, { "epoch": 68.61, "learning_rate": 3.13915121174195e-05, "loss": 1266.0994, "step": 1206000 }, { "epoch": 68.67, "learning_rate": 3.133462282398453e-05, "loss": 1273.2693, "step": 1207000 }, { "epoch": 68.72, "learning_rate": 3.127773353054955e-05, "loss": 1271.4641, "step": 1208000 }, { "epoch": 68.78, "learning_rate": 3.122084423711458e-05, "loss": 1282.3871, "step": 1209000 }, { "epoch": 68.84, "learning_rate": 3.11639549436796e-05, "loss": 1293.3351, "step": 1210000 }, { "epoch": 68.89, "learning_rate": 3.1107065650244625e-05, "loss": 1283.2284, "step": 1211000 }, { "epoch": 68.95, "learning_rate": 3.105017635680965e-05, "loss": 1285.3965, "step": 1212000 }, { "epoch": 69.01, "learning_rate": 3.099328706337468e-05, "loss": 1290.7256, "step": 1213000 }, { "epoch": 69.06, "learning_rate": 3.0936397769939704e-05, "loss": 1248.5135, "step": 1214000 }, { "epoch": 69.12, "learning_rate": 3.0879508476504724e-05, "loss": 1250.5239, "step": 1215000 }, { "epoch": 69.18, "learning_rate": 3.082261918306974e-05, "loss": 1258.1981, "step": 1216000 }, { "epoch": 69.23, "learning_rate": 3.076572988963477e-05, "loss": 1261.2136, "step": 1217000 }, { "epoch": 69.29, "learning_rate": 3.0708840596199796e-05, "loss": 1268.2262, "step": 1218000 }, { "epoch": 69.35, "learning_rate": 3.065195130276482e-05, "loss": 1266.8876, "step": 1219000 }, { "epoch": 69.4, "learning_rate": 3.059506200932984e-05, "loss": 1259.7765, "step": 1220000 }, { "epoch": 69.46, "learning_rate": 3.053817271589487e-05, "loss": 1261.7441, "step": 1221000 }, { "epoch": 69.52, "learning_rate": 3.0481283422459894e-05, "loss": 1281.3345, "step": 1222000 }, { "epoch": 69.58, "learning_rate": 3.042439412902492e-05, "loss": 1274.8876, "step": 1223000 }, { "epoch": 69.63, "learning_rate": 3.0367504835589943e-05, "loss": 1266.9106, "step": 1224000 }, { "epoch": 69.69, "learning_rate": 3.031061554215497e-05, "loss": 1260.1679, "step": 1225000 }, { "epoch": 69.75, "learning_rate": 3.0253726248719992e-05, "loss": 1266.3253, "step": 1226000 }, { "epoch": 69.8, "learning_rate": 3.019683695528502e-05, "loss": 1278.5824, "step": 1227000 }, { "epoch": 69.86, "learning_rate": 3.013994766185004e-05, "loss": 1278.2207, "step": 1228000 }, { "epoch": 69.92, "learning_rate": 3.0083058368415068e-05, "loss": 1260.7024, "step": 1229000 }, { "epoch": 69.97, "learning_rate": 3.0026169074980094e-05, "loss": 1278.0767, "step": 1230000 }, { "epoch": 70.03, "learning_rate": 2.9969279781545114e-05, "loss": 1246.9709, "step": 1231000 }, { "epoch": 70.09, "learning_rate": 2.9912390488110137e-05, "loss": 1235.8134, "step": 1232000 }, { "epoch": 70.14, "learning_rate": 2.9855501194675163e-05, "loss": 1228.1921, "step": 1233000 }, { "epoch": 70.2, "learning_rate": 2.9798611901240186e-05, "loss": 1262.2297, "step": 1234000 }, { "epoch": 70.26, "learning_rate": 2.9741722607805212e-05, "loss": 1244.0374, "step": 1235000 }, { "epoch": 70.32, "learning_rate": 2.9684833314370235e-05, "loss": 1258.978, "step": 1236000 }, { "epoch": 70.37, "learning_rate": 2.962794402093526e-05, "loss": 1257.7517, "step": 1237000 }, { "epoch": 70.43, "learning_rate": 2.9571054727500284e-05, "loss": 1252.3501, "step": 1238000 }, { "epoch": 70.49, "learning_rate": 2.951416543406531e-05, "loss": 1269.8844, "step": 1239000 }, { "epoch": 70.54, "learning_rate": 2.9457276140630337e-05, "loss": 1259.7821, "step": 1240000 }, { "epoch": 70.6, "learning_rate": 2.940038684719536e-05, "loss": 1255.481, "step": 1241000 }, { "epoch": 70.66, "learning_rate": 2.9343497553760386e-05, "loss": 1268.071, "step": 1242000 }, { "epoch": 70.71, "learning_rate": 2.928660826032541e-05, "loss": 1259.476, "step": 1243000 }, { "epoch": 70.77, "learning_rate": 2.9229718966890435e-05, "loss": 1240.7596, "step": 1244000 }, { "epoch": 70.83, "learning_rate": 2.9172829673455458e-05, "loss": 1264.1351, "step": 1245000 }, { "epoch": 70.88, "learning_rate": 2.9115940380020478e-05, "loss": 1272.9305, "step": 1246000 }, { "epoch": 70.94, "learning_rate": 2.9059051086585504e-05, "loss": 1262.9494, "step": 1247000 }, { "epoch": 71.0, "learning_rate": 2.900216179315053e-05, "loss": 1278.493, "step": 1248000 }, { "epoch": 71.05, "learning_rate": 2.8945272499715553e-05, "loss": 1237.0874, "step": 1249000 }, { "epoch": 71.11, "learning_rate": 2.888838320628058e-05, "loss": 1230.4429, "step": 1250000 }, { "epoch": 71.17, "learning_rate": 2.8831493912845602e-05, "loss": 1239.493, "step": 1251000 }, { "epoch": 71.23, "learning_rate": 2.877460461941063e-05, "loss": 1246.7569, "step": 1252000 }, { "epoch": 71.28, "learning_rate": 2.871771532597565e-05, "loss": 1239.9172, "step": 1253000 }, { "epoch": 71.34, "learning_rate": 2.8660826032540678e-05, "loss": 1235.984, "step": 1254000 }, { "epoch": 71.4, "learning_rate": 2.86039367391057e-05, "loss": 1245.2676, "step": 1255000 }, { "epoch": 71.45, "learning_rate": 2.8547047445670727e-05, "loss": 1249.7554, "step": 1256000 }, { "epoch": 71.51, "learning_rate": 2.8490158152235753e-05, "loss": 1258.4275, "step": 1257000 }, { "epoch": 71.57, "learning_rate": 2.8433268858800776e-05, "loss": 1249.4285, "step": 1258000 }, { "epoch": 71.62, "learning_rate": 2.8376379565365802e-05, "loss": 1263.9046, "step": 1259000 }, { "epoch": 71.68, "learning_rate": 2.8319490271930825e-05, "loss": 1264.1874, "step": 1260000 }, { "epoch": 71.74, "learning_rate": 2.826260097849585e-05, "loss": 1259.2052, "step": 1261000 }, { "epoch": 71.79, "learning_rate": 2.820571168506087e-05, "loss": 1250.6136, "step": 1262000 }, { "epoch": 71.85, "learning_rate": 2.8148822391625894e-05, "loss": 1264.1386, "step": 1263000 }, { "epoch": 71.91, "learning_rate": 2.809193309819092e-05, "loss": 1249.8709, "step": 1264000 }, { "epoch": 71.96, "learning_rate": 2.8035043804755947e-05, "loss": 1255.1571, "step": 1265000 }, { "epoch": 72.02, "learning_rate": 2.797815451132097e-05, "loss": 1250.2874, "step": 1266000 }, { "epoch": 72.08, "learning_rate": 2.7921265217885996e-05, "loss": 1222.2571, "step": 1267000 }, { "epoch": 72.14, "learning_rate": 2.786437592445102e-05, "loss": 1220.4179, "step": 1268000 }, { "epoch": 72.19, "learning_rate": 2.7807486631016045e-05, "loss": 1240.9492, "step": 1269000 }, { "epoch": 72.25, "learning_rate": 2.7750597337581068e-05, "loss": 1221.8525, "step": 1270000 }, { "epoch": 72.31, "learning_rate": 2.7693708044146094e-05, "loss": 1241.5765, "step": 1271000 }, { "epoch": 72.36, "learning_rate": 2.7636818750711117e-05, "loss": 1238.417, "step": 1272000 }, { "epoch": 72.42, "learning_rate": 2.7579929457276143e-05, "loss": 1242.3884, "step": 1273000 }, { "epoch": 72.48, "learning_rate": 2.752304016384117e-05, "loss": 1232.1858, "step": 1274000 }, { "epoch": 72.53, "learning_rate": 2.7466150870406193e-05, "loss": 1248.4278, "step": 1275000 }, { "epoch": 72.59, "learning_rate": 2.740926157697122e-05, "loss": 1240.8836, "step": 1276000 }, { "epoch": 72.65, "learning_rate": 2.735237228353624e-05, "loss": 1245.345, "step": 1277000 }, { "epoch": 72.7, "learning_rate": 2.729548299010126e-05, "loss": 1239.759, "step": 1278000 }, { "epoch": 72.76, "learning_rate": 2.7238593696666288e-05, "loss": 1237.604, "step": 1279000 }, { "epoch": 72.82, "learning_rate": 2.718170440323131e-05, "loss": 1252.7389, "step": 1280000 }, { "epoch": 72.88, "learning_rate": 2.7124815109796337e-05, "loss": 1247.1384, "step": 1281000 }, { "epoch": 72.93, "learning_rate": 2.7067925816361363e-05, "loss": 1256.847, "step": 1282000 }, { "epoch": 72.99, "learning_rate": 2.7011036522926386e-05, "loss": 1245.975, "step": 1283000 }, { "epoch": 73.05, "learning_rate": 2.6954147229491412e-05, "loss": 1231.1399, "step": 1284000 }, { "epoch": 73.1, "learning_rate": 2.6897257936056435e-05, "loss": 1205.7425, "step": 1285000 }, { "epoch": 73.16, "learning_rate": 2.684036864262146e-05, "loss": 1215.8488, "step": 1286000 }, { "epoch": 73.22, "learning_rate": 2.6783479349186484e-05, "loss": 1222.3592, "step": 1287000 }, { "epoch": 73.27, "learning_rate": 2.672659005575151e-05, "loss": 1207.4959, "step": 1288000 }, { "epoch": 73.33, "learning_rate": 2.6669700762316533e-05, "loss": 1213.6765, "step": 1289000 }, { "epoch": 73.39, "learning_rate": 2.661281146888156e-05, "loss": 1231.6557, "step": 1290000 }, { "epoch": 73.44, "learning_rate": 2.6555922175446586e-05, "loss": 1225.8214, "step": 1291000 }, { "epoch": 73.5, "learning_rate": 2.6499032882011606e-05, "loss": 1226.3789, "step": 1292000 }, { "epoch": 73.56, "learning_rate": 2.644214358857663e-05, "loss": 1231.7249, "step": 1293000 }, { "epoch": 73.61, "learning_rate": 2.6385254295141655e-05, "loss": 1229.5675, "step": 1294000 }, { "epoch": 73.67, "learning_rate": 2.6328365001706678e-05, "loss": 1234.4497, "step": 1295000 }, { "epoch": 73.73, "learning_rate": 2.6271475708271704e-05, "loss": 1217.283, "step": 1296000 }, { "epoch": 73.79, "learning_rate": 2.6214586414836727e-05, "loss": 1250.2821, "step": 1297000 }, { "epoch": 73.84, "learning_rate": 2.6157697121401753e-05, "loss": 1243.1043, "step": 1298000 }, { "epoch": 73.9, "learning_rate": 2.610080782796678e-05, "loss": 1240.0269, "step": 1299000 }, { "epoch": 73.96, "learning_rate": 2.6043918534531802e-05, "loss": 1239.3193, "step": 1300000 }, { "epoch": 74.01, "learning_rate": 2.598702924109683e-05, "loss": 1230.6859, "step": 1301000 }, { "epoch": 74.07, "learning_rate": 2.593013994766185e-05, "loss": 1203.8056, "step": 1302000 }, { "epoch": 74.13, "learning_rate": 2.5873250654226878e-05, "loss": 1215.1579, "step": 1303000 }, { "epoch": 74.18, "learning_rate": 2.58163613607919e-05, "loss": 1214.4923, "step": 1304000 }, { "epoch": 74.24, "learning_rate": 2.5759472067356927e-05, "loss": 1204.5544, "step": 1305000 }, { "epoch": 74.3, "learning_rate": 2.570258277392195e-05, "loss": 1209.7384, "step": 1306000 }, { "epoch": 74.35, "learning_rate": 2.564569348048697e-05, "loss": 1209.6011, "step": 1307000 }, { "epoch": 74.41, "learning_rate": 2.5588804187051996e-05, "loss": 1211.1865, "step": 1308000 }, { "epoch": 74.47, "learning_rate": 2.5531914893617022e-05, "loss": 1218.4329, "step": 1309000 }, { "epoch": 74.52, "learning_rate": 2.5475025600182045e-05, "loss": 1227.9114, "step": 1310000 }, { "epoch": 74.58, "learning_rate": 2.541813630674707e-05, "loss": 1230.4482, "step": 1311000 }, { "epoch": 74.64, "learning_rate": 2.5361247013312094e-05, "loss": 1222.4231, "step": 1312000 }, { "epoch": 74.7, "learning_rate": 2.530435771987712e-05, "loss": 1227.7088, "step": 1313000 }, { "epoch": 74.75, "learning_rate": 2.5247468426442143e-05, "loss": 1234.4689, "step": 1314000 }, { "epoch": 74.81, "learning_rate": 2.519057913300717e-05, "loss": 1230.2299, "step": 1315000 }, { "epoch": 74.87, "learning_rate": 2.5133689839572196e-05, "loss": 1232.3656, "step": 1316000 }, { "epoch": 74.92, "learning_rate": 2.507680054613722e-05, "loss": 1229.7561, "step": 1317000 }, { "epoch": 74.98, "learning_rate": 2.5019911252702245e-05, "loss": 1231.2514, "step": 1318000 }, { "epoch": 75.04, "learning_rate": 2.4963021959267265e-05, "loss": 1201.1504, "step": 1319000 }, { "epoch": 75.09, "learning_rate": 2.490613266583229e-05, "loss": 1201.5943, "step": 1320000 }, { "epoch": 75.15, "learning_rate": 2.4849243372397317e-05, "loss": 1187.7491, "step": 1321000 }, { "epoch": 75.21, "learning_rate": 2.479235407896234e-05, "loss": 1200.7007, "step": 1322000 }, { "epoch": 75.26, "learning_rate": 2.4735464785527366e-05, "loss": 1226.9382, "step": 1323000 }, { "epoch": 75.32, "learning_rate": 2.467857549209239e-05, "loss": 1207.0174, "step": 1324000 }, { "epoch": 75.38, "learning_rate": 2.4621686198657416e-05, "loss": 1201.2749, "step": 1325000 }, { "epoch": 75.44, "learning_rate": 2.456479690522244e-05, "loss": 1215.8894, "step": 1326000 }, { "epoch": 75.49, "learning_rate": 2.450790761178746e-05, "loss": 1212.1351, "step": 1327000 }, { "epoch": 75.55, "learning_rate": 2.4451018318352488e-05, "loss": 1219.4041, "step": 1328000 }, { "epoch": 75.61, "learning_rate": 2.439412902491751e-05, "loss": 1219.2811, "step": 1329000 }, { "epoch": 75.66, "learning_rate": 2.4337239731482537e-05, "loss": 1215.1675, "step": 1330000 }, { "epoch": 75.72, "learning_rate": 2.428035043804756e-05, "loss": 1228.5261, "step": 1331000 }, { "epoch": 75.78, "learning_rate": 2.4223461144612586e-05, "loss": 1218.07, "step": 1332000 }, { "epoch": 75.83, "learning_rate": 2.4166571851177612e-05, "loss": 1216.0176, "step": 1333000 }, { "epoch": 75.89, "learning_rate": 2.4109682557742632e-05, "loss": 1218.35, "step": 1334000 }, { "epoch": 75.95, "learning_rate": 2.4052793264307658e-05, "loss": 1231.3149, "step": 1335000 }, { "epoch": 76.0, "learning_rate": 2.399590397087268e-05, "loss": 1224.585, "step": 1336000 }, { "epoch": 76.06, "learning_rate": 2.3939014677437707e-05, "loss": 1188.4584, "step": 1337000 }, { "epoch": 76.12, "learning_rate": 2.3882125384002734e-05, "loss": 1185.666, "step": 1338000 }, { "epoch": 76.17, "learning_rate": 2.3825236090567756e-05, "loss": 1197.9449, "step": 1339000 }, { "epoch": 76.23, "learning_rate": 2.3768346797132783e-05, "loss": 1196.5395, "step": 1340000 }, { "epoch": 76.29, "learning_rate": 2.3711457503697806e-05, "loss": 1186.724, "step": 1341000 }, { "epoch": 76.35, "learning_rate": 2.365456821026283e-05, "loss": 1191.945, "step": 1342000 }, { "epoch": 76.4, "learning_rate": 2.3597678916827855e-05, "loss": 1196.3835, "step": 1343000 }, { "epoch": 76.46, "learning_rate": 2.3540789623392878e-05, "loss": 1207.0567, "step": 1344000 }, { "epoch": 76.52, "learning_rate": 2.3483900329957904e-05, "loss": 1205.2645, "step": 1345000 }, { "epoch": 76.57, "learning_rate": 2.3427011036522927e-05, "loss": 1204.7142, "step": 1346000 }, { "epoch": 76.63, "learning_rate": 2.3370121743087953e-05, "loss": 1211.8616, "step": 1347000 }, { "epoch": 76.69, "learning_rate": 2.3313232449652976e-05, "loss": 1208.3547, "step": 1348000 }, { "epoch": 76.74, "learning_rate": 2.3256343156218e-05, "loss": 1209.8834, "step": 1349000 }, { "epoch": 76.8, "learning_rate": 2.3199453862783025e-05, "loss": 1212.192, "step": 1350000 }, { "epoch": 76.86, "learning_rate": 2.3142564569348048e-05, "loss": 1221.4207, "step": 1351000 }, { "epoch": 76.91, "learning_rate": 2.3085675275913075e-05, "loss": 1202.5544, "step": 1352000 }, { "epoch": 76.97, "learning_rate": 2.3028785982478097e-05, "loss": 1208.2573, "step": 1353000 }, { "epoch": 77.03, "learning_rate": 2.2971896689043124e-05, "loss": 1200.6246, "step": 1354000 }, { "epoch": 77.08, "learning_rate": 2.291500739560815e-05, "loss": 1183.6086, "step": 1355000 }, { "epoch": 77.14, "learning_rate": 2.2858118102173173e-05, "loss": 1179.3908, "step": 1356000 }, { "epoch": 77.2, "learning_rate": 2.2801228808738196e-05, "loss": 1184.4676, "step": 1357000 }, { "epoch": 77.26, "learning_rate": 2.274433951530322e-05, "loss": 1196.7686, "step": 1358000 }, { "epoch": 77.31, "learning_rate": 2.2687450221868245e-05, "loss": 1185.06, "step": 1359000 }, { "epoch": 77.37, "learning_rate": 2.263056092843327e-05, "loss": 1202.369, "step": 1360000 }, { "epoch": 77.43, "learning_rate": 2.2573671634998294e-05, "loss": 1184.7162, "step": 1361000 }, { "epoch": 77.48, "learning_rate": 2.251678234156332e-05, "loss": 1192.9868, "step": 1362000 }, { "epoch": 77.54, "learning_rate": 2.2459893048128343e-05, "loss": 1212.2264, "step": 1363000 }, { "epoch": 77.6, "learning_rate": 2.240300375469337e-05, "loss": 1180.367, "step": 1364000 }, { "epoch": 77.65, "learning_rate": 2.2346114461258393e-05, "loss": 1199.1958, "step": 1365000 }, { "epoch": 77.71, "learning_rate": 2.2289225167823415e-05, "loss": 1203.7797, "step": 1366000 }, { "epoch": 77.77, "learning_rate": 2.2232335874388442e-05, "loss": 1204.3984, "step": 1367000 }, { "epoch": 77.82, "learning_rate": 2.2175446580953465e-05, "loss": 1209.3718, "step": 1368000 }, { "epoch": 77.88, "learning_rate": 2.211855728751849e-05, "loss": 1202.4209, "step": 1369000 }, { "epoch": 77.94, "learning_rate": 2.2061667994083514e-05, "loss": 1213.2796, "step": 1370000 }, { "epoch": 78.0, "learning_rate": 2.200477870064854e-05, "loss": 1186.3549, "step": 1371000 }, { "epoch": 78.05, "learning_rate": 2.1947889407213563e-05, "loss": 1182.4227, "step": 1372000 }, { "epoch": 78.11, "learning_rate": 2.1891000113778586e-05, "loss": 1180.1546, "step": 1373000 }, { "epoch": 78.17, "learning_rate": 2.1834110820343612e-05, "loss": 1169.9831, "step": 1374000 }, { "epoch": 78.22, "learning_rate": 2.1777221526908635e-05, "loss": 1184.1939, "step": 1375000 }, { "epoch": 78.28, "learning_rate": 2.172033223347366e-05, "loss": 1176.0429, "step": 1376000 }, { "epoch": 78.34, "learning_rate": 2.1663442940038688e-05, "loss": 1184.305, "step": 1377000 }, { "epoch": 78.39, "learning_rate": 2.160655364660371e-05, "loss": 1183.903, "step": 1378000 }, { "epoch": 78.45, "learning_rate": 2.1549664353168737e-05, "loss": 1180.1133, "step": 1379000 }, { "epoch": 78.51, "learning_rate": 2.1492775059733756e-05, "loss": 1184.0736, "step": 1380000 }, { "epoch": 78.56, "learning_rate": 2.1435885766298783e-05, "loss": 1185.9709, "step": 1381000 }, { "epoch": 78.62, "learning_rate": 2.137899647286381e-05, "loss": 1183.0906, "step": 1382000 }, { "epoch": 78.68, "learning_rate": 2.1322107179428832e-05, "loss": 1192.5355, "step": 1383000 }, { "epoch": 78.73, "learning_rate": 2.1265217885993858e-05, "loss": 1192.9458, "step": 1384000 }, { "epoch": 78.79, "learning_rate": 2.120832859255888e-05, "loss": 1182.993, "step": 1385000 }, { "epoch": 78.85, "learning_rate": 2.1151439299123907e-05, "loss": 1193.7806, "step": 1386000 }, { "epoch": 78.91, "learning_rate": 2.109455000568893e-05, "loss": 1185.4926, "step": 1387000 }, { "epoch": 78.96, "learning_rate": 2.1037660712253953e-05, "loss": 1195.9494, "step": 1388000 }, { "epoch": 79.02, "learning_rate": 2.098077141881898e-05, "loss": 1182.2955, "step": 1389000 }, { "epoch": 79.08, "learning_rate": 2.0923882125384002e-05, "loss": 1169.7652, "step": 1390000 }, { "epoch": 79.13, "learning_rate": 2.086699283194903e-05, "loss": 1177.7477, "step": 1391000 }, { "epoch": 79.19, "learning_rate": 2.081010353851405e-05, "loss": 1172.0405, "step": 1392000 }, { "epoch": 79.25, "learning_rate": 2.0753214245079078e-05, "loss": 1170.7849, "step": 1393000 }, { "epoch": 79.3, "learning_rate": 2.0696324951644104e-05, "loss": 1175.2803, "step": 1394000 }, { "epoch": 79.36, "learning_rate": 2.0639435658209124e-05, "loss": 1170.3756, "step": 1395000 }, { "epoch": 79.42, "learning_rate": 2.058254636477415e-05, "loss": 1161.9664, "step": 1396000 }, { "epoch": 79.47, "learning_rate": 2.0525657071339173e-05, "loss": 1176.8119, "step": 1397000 }, { "epoch": 79.53, "learning_rate": 2.04687677779042e-05, "loss": 1177.215, "step": 1398000 }, { "epoch": 79.59, "learning_rate": 2.0411878484469225e-05, "loss": 1200.9484, "step": 1399000 }, { "epoch": 79.65, "learning_rate": 2.0354989191034248e-05, "loss": 1181.1481, "step": 1400000 }, { "epoch": 79.7, "learning_rate": 2.0298099897599275e-05, "loss": 1180.7864, "step": 1401000 }, { "epoch": 79.76, "learning_rate": 2.0241210604164297e-05, "loss": 1178.7059, "step": 1402000 }, { "epoch": 79.82, "learning_rate": 2.018432131072932e-05, "loss": 1185.7616, "step": 1403000 }, { "epoch": 79.87, "learning_rate": 2.0127432017294347e-05, "loss": 1180.373, "step": 1404000 }, { "epoch": 79.93, "learning_rate": 2.007054272385937e-05, "loss": 1184.3008, "step": 1405000 }, { "epoch": 79.99, "learning_rate": 2.0013653430424396e-05, "loss": 1185.7079, "step": 1406000 }, { "epoch": 80.04, "learning_rate": 1.995676413698942e-05, "loss": 1165.0614, "step": 1407000 }, { "epoch": 80.1, "learning_rate": 1.9899874843554445e-05, "loss": 1162.3374, "step": 1408000 }, { "epoch": 80.16, "learning_rate": 1.9842985550119468e-05, "loss": 1164.4613, "step": 1409000 }, { "epoch": 80.21, "learning_rate": 1.9786096256684494e-05, "loss": 1164.8291, "step": 1410000 }, { "epoch": 80.27, "learning_rate": 1.9729206963249517e-05, "loss": 1162.4905, "step": 1411000 }, { "epoch": 80.33, "learning_rate": 1.967231766981454e-05, "loss": 1150.664, "step": 1412000 }, { "epoch": 80.38, "learning_rate": 1.9615428376379566e-05, "loss": 1178.7514, "step": 1413000 }, { "epoch": 80.44, "learning_rate": 1.955853908294459e-05, "loss": 1171.9224, "step": 1414000 }, { "epoch": 80.5, "learning_rate": 1.9501649789509616e-05, "loss": 1172.6478, "step": 1415000 }, { "epoch": 80.56, "learning_rate": 1.9444760496074642e-05, "loss": 1166.4991, "step": 1416000 }, { "epoch": 80.61, "learning_rate": 1.9387871202639665e-05, "loss": 1172.113, "step": 1417000 }, { "epoch": 80.67, "learning_rate": 1.9330981909204688e-05, "loss": 1159.9136, "step": 1418000 }, { "epoch": 80.73, "learning_rate": 1.927409261576971e-05, "loss": 1175.692, "step": 1419000 }, { "epoch": 80.78, "learning_rate": 1.9217203322334737e-05, "loss": 1169.887, "step": 1420000 }, { "epoch": 80.84, "learning_rate": 1.9160314028899763e-05, "loss": 1185.6144, "step": 1421000 }, { "epoch": 80.9, "learning_rate": 1.9103424735464786e-05, "loss": 1175.742, "step": 1422000 }, { "epoch": 80.95, "learning_rate": 1.9046535442029812e-05, "loss": 1172.6076, "step": 1423000 }, { "epoch": 81.01, "learning_rate": 1.8989646148594835e-05, "loss": 1173.311, "step": 1424000 }, { "epoch": 81.07, "learning_rate": 1.893275685515986e-05, "loss": 1147.8195, "step": 1425000 }, { "epoch": 81.12, "learning_rate": 1.8875867561724884e-05, "loss": 1162.9901, "step": 1426000 }, { "epoch": 81.18, "learning_rate": 1.8818978268289907e-05, "loss": 1158.6924, "step": 1427000 }, { "epoch": 81.24, "learning_rate": 1.8762088974854934e-05, "loss": 1147.9249, "step": 1428000 }, { "epoch": 81.29, "learning_rate": 1.8705199681419956e-05, "loss": 1150.0205, "step": 1429000 }, { "epoch": 81.35, "learning_rate": 1.8648310387984983e-05, "loss": 1161.5246, "step": 1430000 }, { "epoch": 81.41, "learning_rate": 1.8591421094550006e-05, "loss": 1164.8296, "step": 1431000 }, { "epoch": 81.47, "learning_rate": 1.8534531801115032e-05, "loss": 1165.4834, "step": 1432000 }, { "epoch": 81.52, "learning_rate": 1.8477642507680055e-05, "loss": 1165.3402, "step": 1433000 }, { "epoch": 81.58, "learning_rate": 1.8420753214245078e-05, "loss": 1162.1901, "step": 1434000 }, { "epoch": 81.64, "learning_rate": 1.8363863920810104e-05, "loss": 1180.1814, "step": 1435000 }, { "epoch": 81.69, "learning_rate": 1.8306974627375127e-05, "loss": 1155.7976, "step": 1436000 }, { "epoch": 81.75, "learning_rate": 1.8250085333940153e-05, "loss": 1161.4264, "step": 1437000 }, { "epoch": 81.81, "learning_rate": 1.819319604050518e-05, "loss": 1164.9099, "step": 1438000 }, { "epoch": 81.86, "learning_rate": 1.8136306747070202e-05, "loss": 1178.0811, "step": 1439000 }, { "epoch": 81.92, "learning_rate": 1.807941745363523e-05, "loss": 1159.1011, "step": 1440000 }, { "epoch": 81.98, "learning_rate": 1.8022528160200248e-05, "loss": 1167.5847, "step": 1441000 }, { "epoch": 82.03, "learning_rate": 1.7965638866765275e-05, "loss": 1148.5929, "step": 1442000 }, { "epoch": 82.09, "learning_rate": 1.79087495733303e-05, "loss": 1147.208, "step": 1443000 }, { "epoch": 82.15, "learning_rate": 1.7851860279895324e-05, "loss": 1138.3263, "step": 1444000 }, { "epoch": 82.21, "learning_rate": 1.779497098646035e-05, "loss": 1150.9631, "step": 1445000 }, { "epoch": 82.26, "learning_rate": 1.7738081693025373e-05, "loss": 1144.256, "step": 1446000 }, { "epoch": 82.32, "learning_rate": 1.76811923995904e-05, "loss": 1141.4891, "step": 1447000 }, { "epoch": 82.38, "learning_rate": 1.7624303106155422e-05, "loss": 1156.2427, "step": 1448000 }, { "epoch": 82.43, "learning_rate": 1.7567413812720445e-05, "loss": 1154.8924, "step": 1449000 }, { "epoch": 82.49, "learning_rate": 1.751052451928547e-05, "loss": 1148.3214, "step": 1450000 }, { "epoch": 82.55, "learning_rate": 1.7453635225850494e-05, "loss": 1153.4801, "step": 1451000 }, { "epoch": 82.6, "learning_rate": 1.739674593241552e-05, "loss": 1153.7668, "step": 1452000 }, { "epoch": 82.66, "learning_rate": 1.7339856638980543e-05, "loss": 1157.5399, "step": 1453000 }, { "epoch": 82.72, "learning_rate": 1.728296734554557e-05, "loss": 1157.2121, "step": 1454000 }, { "epoch": 82.77, "learning_rate": 1.7226078052110596e-05, "loss": 1153.1155, "step": 1455000 }, { "epoch": 82.83, "learning_rate": 1.7169188758675615e-05, "loss": 1152.2974, "step": 1456000 }, { "epoch": 82.89, "learning_rate": 1.7112299465240642e-05, "loss": 1159.7889, "step": 1457000 }, { "epoch": 82.94, "learning_rate": 1.7055410171805668e-05, "loss": 1159.2494, "step": 1458000 }, { "epoch": 83.0, "learning_rate": 1.699852087837069e-05, "loss": 1158.0434, "step": 1459000 }, { "epoch": 83.06, "learning_rate": 1.6941631584935717e-05, "loss": 1136.6234, "step": 1460000 }, { "epoch": 83.12, "learning_rate": 1.688474229150074e-05, "loss": 1136.6204, "step": 1461000 }, { "epoch": 83.17, "learning_rate": 1.6827852998065766e-05, "loss": 1136.0679, "step": 1462000 }, { "epoch": 83.23, "learning_rate": 1.677096370463079e-05, "loss": 1145.5604, "step": 1463000 }, { "epoch": 83.29, "learning_rate": 1.6714074411195812e-05, "loss": 1149.4044, "step": 1464000 }, { "epoch": 83.34, "learning_rate": 1.665718511776084e-05, "loss": 1143.5284, "step": 1465000 }, { "epoch": 83.4, "learning_rate": 1.660029582432586e-05, "loss": 1137.6674, "step": 1466000 }, { "epoch": 83.46, "learning_rate": 1.6543406530890888e-05, "loss": 1147.9658, "step": 1467000 }, { "epoch": 83.51, "learning_rate": 1.648651723745591e-05, "loss": 1155.7677, "step": 1468000 }, { "epoch": 83.57, "learning_rate": 1.6429627944020937e-05, "loss": 1141.7793, "step": 1469000 }, { "epoch": 83.63, "learning_rate": 1.637273865058596e-05, "loss": 1145.7056, "step": 1470000 }, { "epoch": 83.68, "learning_rate": 1.6315849357150986e-05, "loss": 1141.1419, "step": 1471000 }, { "epoch": 83.74, "learning_rate": 1.625896006371601e-05, "loss": 1141.7394, "step": 1472000 }, { "epoch": 83.8, "learning_rate": 1.6202070770281032e-05, "loss": 1140.3324, "step": 1473000 }, { "epoch": 83.85, "learning_rate": 1.6145181476846058e-05, "loss": 1135.1596, "step": 1474000 }, { "epoch": 83.91, "learning_rate": 1.6088292183411084e-05, "loss": 1148.8506, "step": 1475000 }, { "epoch": 83.97, "learning_rate": 1.6031402889976107e-05, "loss": 1147.414, "step": 1476000 }, { "epoch": 84.03, "learning_rate": 1.5974513596541134e-05, "loss": 1141.6139, "step": 1477000 }, { "epoch": 84.08, "learning_rate": 1.5917624303106157e-05, "loss": 1127.0494, "step": 1478000 }, { "epoch": 84.14, "learning_rate": 1.586073500967118e-05, "loss": 1139.2555, "step": 1479000 }, { "epoch": 84.2, "learning_rate": 1.5803845716236206e-05, "loss": 1139.8898, "step": 1480000 }, { "epoch": 84.25, "learning_rate": 1.574695642280123e-05, "loss": 1128.3751, "step": 1481000 }, { "epoch": 84.31, "learning_rate": 1.5690067129366255e-05, "loss": 1132.8457, "step": 1482000 }, { "epoch": 84.37, "learning_rate": 1.5633177835931278e-05, "loss": 1142.597, "step": 1483000 }, { "epoch": 84.42, "learning_rate": 1.5576288542496304e-05, "loss": 1130.3128, "step": 1484000 }, { "epoch": 84.48, "learning_rate": 1.5519399249061327e-05, "loss": 1133.946, "step": 1485000 }, { "epoch": 84.54, "learning_rate": 1.5462509955626353e-05, "loss": 1135.3319, "step": 1486000 }, { "epoch": 84.59, "learning_rate": 1.5405620662191376e-05, "loss": 1130.9378, "step": 1487000 }, { "epoch": 84.65, "learning_rate": 1.53487313687564e-05, "loss": 1135.2162, "step": 1488000 }, { "epoch": 84.71, "learning_rate": 1.5291842075321425e-05, "loss": 1144.2474, "step": 1489000 }, { "epoch": 84.77, "learning_rate": 1.523495278188645e-05, "loss": 1140.287, "step": 1490000 }, { "epoch": 84.82, "learning_rate": 1.5178063488451475e-05, "loss": 1149.3139, "step": 1491000 }, { "epoch": 84.88, "learning_rate": 1.51211741950165e-05, "loss": 1144.6466, "step": 1492000 }, { "epoch": 84.94, "learning_rate": 1.5064284901581524e-05, "loss": 1136.9894, "step": 1493000 }, { "epoch": 84.99, "learning_rate": 1.5007395608146548e-05, "loss": 1149.1305, "step": 1494000 }, { "epoch": 85.05, "learning_rate": 1.4950506314711571e-05, "loss": 1127.2636, "step": 1495000 }, { "epoch": 85.11, "learning_rate": 1.4893617021276596e-05, "loss": 1117.3661, "step": 1496000 }, { "epoch": 85.16, "learning_rate": 1.483672772784162e-05, "loss": 1127.2085, "step": 1497000 }, { "epoch": 85.22, "learning_rate": 1.4779838434406645e-05, "loss": 1126.4929, "step": 1498000 }, { "epoch": 85.28, "learning_rate": 1.472294914097167e-05, "loss": 1124.6271, "step": 1499000 }, { "epoch": 85.33, "learning_rate": 1.4666059847536694e-05, "loss": 1128.6382, "step": 1500000 }, { "epoch": 85.39, "learning_rate": 1.460917055410172e-05, "loss": 1131.2007, "step": 1501000 }, { "epoch": 85.45, "learning_rate": 1.4552281260666742e-05, "loss": 1125.954, "step": 1502000 }, { "epoch": 85.5, "learning_rate": 1.4495391967231766e-05, "loss": 1144.0759, "step": 1503000 }, { "epoch": 85.56, "learning_rate": 1.4438502673796791e-05, "loss": 1132.6319, "step": 1504000 }, { "epoch": 85.62, "learning_rate": 1.4381613380361816e-05, "loss": 1132.7251, "step": 1505000 }, { "epoch": 85.68, "learning_rate": 1.4324724086926842e-05, "loss": 1146.3869, "step": 1506000 }, { "epoch": 85.73, "learning_rate": 1.4267834793491866e-05, "loss": 1126.8225, "step": 1507000 }, { "epoch": 85.79, "learning_rate": 1.4210945500056891e-05, "loss": 1124.6022, "step": 1508000 }, { "epoch": 85.85, "learning_rate": 1.4154056206621916e-05, "loss": 1122.0214, "step": 1509000 }, { "epoch": 85.9, "learning_rate": 1.4097166913186939e-05, "loss": 1137.1519, "step": 1510000 }, { "epoch": 85.96, "learning_rate": 1.4040277619751963e-05, "loss": 1135.3529, "step": 1511000 }, { "epoch": 86.02, "learning_rate": 1.3983388326316988e-05, "loss": 1129.2861, "step": 1512000 }, { "epoch": 86.07, "learning_rate": 1.3926499032882012e-05, "loss": 1121.5106, "step": 1513000 }, { "epoch": 86.13, "learning_rate": 1.3869609739447037e-05, "loss": 1113.2622, "step": 1514000 }, { "epoch": 86.19, "learning_rate": 1.3812720446012061e-05, "loss": 1123.8396, "step": 1515000 }, { "epoch": 86.24, "learning_rate": 1.3755831152577086e-05, "loss": 1127.5199, "step": 1516000 }, { "epoch": 86.3, "learning_rate": 1.369894185914211e-05, "loss": 1118.2024, "step": 1517000 }, { "epoch": 86.36, "learning_rate": 1.3642052565707134e-05, "loss": 1119.0596, "step": 1518000 }, { "epoch": 86.41, "learning_rate": 1.3585163272272158e-05, "loss": 1126.4019, "step": 1519000 }, { "epoch": 86.47, "learning_rate": 1.3528273978837183e-05, "loss": 1121.0984, "step": 1520000 }, { "epoch": 86.53, "learning_rate": 1.3471384685402207e-05, "loss": 1122.5064, "step": 1521000 }, { "epoch": 86.59, "learning_rate": 1.3414495391967234e-05, "loss": 1134.6251, "step": 1522000 }, { "epoch": 86.64, "learning_rate": 1.3357606098532258e-05, "loss": 1125.4545, "step": 1523000 }, { "epoch": 86.7, "learning_rate": 1.3300716805097283e-05, "loss": 1117.6469, "step": 1524000 }, { "epoch": 86.76, "learning_rate": 1.3243827511662304e-05, "loss": 1124.5699, "step": 1525000 }, { "epoch": 86.81, "learning_rate": 1.3186938218227329e-05, "loss": 1134.0654, "step": 1526000 }, { "epoch": 86.87, "learning_rate": 1.3130048924792355e-05, "loss": 1127.0761, "step": 1527000 }, { "epoch": 86.93, "learning_rate": 1.307315963135738e-05, "loss": 1121.0956, "step": 1528000 }, { "epoch": 86.98, "learning_rate": 1.3016270337922404e-05, "loss": 1131.5735, "step": 1529000 }, { "epoch": 87.04, "learning_rate": 1.2959381044487429e-05, "loss": 1109.5545, "step": 1530000 }, { "epoch": 87.1, "learning_rate": 1.2902491751052453e-05, "loss": 1114.4014, "step": 1531000 }, { "epoch": 87.15, "learning_rate": 1.2845602457617478e-05, "loss": 1108.4309, "step": 1532000 }, { "epoch": 87.21, "learning_rate": 1.27887131641825e-05, "loss": 1112.9382, "step": 1533000 }, { "epoch": 87.27, "learning_rate": 1.2731823870747525e-05, "loss": 1118.8911, "step": 1534000 }, { "epoch": 87.33, "learning_rate": 1.267493457731255e-05, "loss": 1113.2851, "step": 1535000 }, { "epoch": 87.38, "learning_rate": 1.2618045283877575e-05, "loss": 1112.5121, "step": 1536000 }, { "epoch": 87.44, "learning_rate": 1.25611559904426e-05, "loss": 1115.0399, "step": 1537000 }, { "epoch": 87.5, "learning_rate": 1.2504266697007624e-05, "loss": 1118.6761, "step": 1538000 }, { "epoch": 87.55, "learning_rate": 1.2447377403572648e-05, "loss": 1112.6969, "step": 1539000 }, { "epoch": 87.61, "learning_rate": 1.2390488110137673e-05, "loss": 1118.2444, "step": 1540000 }, { "epoch": 87.67, "learning_rate": 1.2333598816702698e-05, "loss": 1113.6517, "step": 1541000 }, { "epoch": 87.72, "learning_rate": 1.227670952326772e-05, "loss": 1126.9607, "step": 1542000 }, { "epoch": 87.78, "learning_rate": 1.2219820229832745e-05, "loss": 1110.1945, "step": 1543000 }, { "epoch": 87.84, "learning_rate": 1.2162930936397771e-05, "loss": 1113.2117, "step": 1544000 }, { "epoch": 87.89, "learning_rate": 1.2106041642962796e-05, "loss": 1122.9605, "step": 1545000 }, { "epoch": 87.95, "learning_rate": 1.2049152349527819e-05, "loss": 1111.6869, "step": 1546000 }, { "epoch": 88.01, "learning_rate": 1.1992263056092843e-05, "loss": 1119.3374, "step": 1547000 }, { "epoch": 88.06, "learning_rate": 1.1935373762657868e-05, "loss": 1101.6833, "step": 1548000 }, { "epoch": 88.12, "learning_rate": 1.1878484469222893e-05, "loss": 1107.5354, "step": 1549000 }, { "epoch": 88.18, "learning_rate": 1.1821595175787917e-05, "loss": 1121.0419, "step": 1550000 }, { "epoch": 88.24, "learning_rate": 1.1764705882352942e-05, "loss": 1098.1549, "step": 1551000 }, { "epoch": 88.29, "learning_rate": 1.1707816588917966e-05, "loss": 1095.4716, "step": 1552000 }, { "epoch": 88.35, "learning_rate": 1.1650927295482991e-05, "loss": 1109.7774, "step": 1553000 }, { "epoch": 88.41, "learning_rate": 1.1594038002048014e-05, "loss": 1113.6749, "step": 1554000 }, { "epoch": 88.46, "learning_rate": 1.153714870861304e-05, "loss": 1086.1384, "step": 1555000 }, { "epoch": 88.52, "learning_rate": 1.1480259415178065e-05, "loss": 1108.1874, "step": 1556000 }, { "epoch": 88.58, "learning_rate": 1.142337012174309e-05, "loss": 1113.2659, "step": 1557000 }, { "epoch": 88.63, "learning_rate": 1.1366480828308112e-05, "loss": 1115.2307, "step": 1558000 }, { "epoch": 88.69, "learning_rate": 1.1309591534873137e-05, "loss": 1106.7597, "step": 1559000 }, { "epoch": 88.75, "learning_rate": 1.1252702241438162e-05, "loss": 1113.5464, "step": 1560000 }, { "epoch": 88.8, "learning_rate": 1.1195812948003188e-05, "loss": 1113.1184, "step": 1561000 }, { "epoch": 88.86, "learning_rate": 1.113892365456821e-05, "loss": 1104.449, "step": 1562000 }, { "epoch": 88.92, "learning_rate": 1.1082034361133235e-05, "loss": 1114.8906, "step": 1563000 }, { "epoch": 88.97, "learning_rate": 1.102514506769826e-05, "loss": 1107.803, "step": 1564000 }, { "epoch": 89.03, "learning_rate": 1.0968255774263283e-05, "loss": 1117.175, "step": 1565000 }, { "epoch": 89.09, "learning_rate": 1.0911366480828309e-05, "loss": 1097.3404, "step": 1566000 }, { "epoch": 89.15, "learning_rate": 1.0854477187393334e-05, "loss": 1090.3605, "step": 1567000 }, { "epoch": 89.2, "learning_rate": 1.0797587893958358e-05, "loss": 1087.2985, "step": 1568000 }, { "epoch": 89.26, "learning_rate": 1.0740698600523381e-05, "loss": 1093.1449, "step": 1569000 }, { "epoch": 89.32, "learning_rate": 1.0683809307088406e-05, "loss": 1096.0732, "step": 1570000 }, { "epoch": 89.37, "learning_rate": 1.062692001365343e-05, "loss": 1095.5364, "step": 1571000 }, { "epoch": 89.43, "learning_rate": 1.0570030720218457e-05, "loss": 1090.9849, "step": 1572000 }, { "epoch": 89.49, "learning_rate": 1.051314142678348e-05, "loss": 1107.299, "step": 1573000 }, { "epoch": 89.54, "learning_rate": 1.0456252133348504e-05, "loss": 1098.4516, "step": 1574000 }, { "epoch": 89.6, "learning_rate": 1.0399362839913529e-05, "loss": 1096.7204, "step": 1575000 }, { "epoch": 89.66, "learning_rate": 1.0342473546478553e-05, "loss": 1093.3604, "step": 1576000 }, { "epoch": 89.71, "learning_rate": 1.0285584253043578e-05, "loss": 1106.8506, "step": 1577000 }, { "epoch": 89.77, "learning_rate": 1.0228694959608603e-05, "loss": 1098.2148, "step": 1578000 }, { "epoch": 89.83, "learning_rate": 1.0171805666173627e-05, "loss": 1100.2651, "step": 1579000 }, { "epoch": 89.89, "learning_rate": 1.0114916372738652e-05, "loss": 1109.0013, "step": 1580000 }, { "epoch": 89.94, "learning_rate": 1.0058027079303675e-05, "loss": 1109.1815, "step": 1581000 }, { "epoch": 90.0, "learning_rate": 1.00011377858687e-05, "loss": 1094.9883, "step": 1582000 }, { "epoch": 90.06, "learning_rate": 9.944248492433725e-06, "loss": 1077.7589, "step": 1583000 }, { "epoch": 90.11, "learning_rate": 9.887359198998748e-06, "loss": 1094.59, "step": 1584000 }, { "epoch": 90.17, "learning_rate": 9.830469905563773e-06, "loss": 1091.2107, "step": 1585000 }, { "epoch": 90.23, "learning_rate": 9.773580612128798e-06, "loss": 1088.9885, "step": 1586000 }, { "epoch": 90.28, "learning_rate": 9.716691318693822e-06, "loss": 1083.6439, "step": 1587000 }, { "epoch": 90.34, "learning_rate": 9.659802025258847e-06, "loss": 1093.6745, "step": 1588000 }, { "epoch": 90.4, "learning_rate": 9.602912731823871e-06, "loss": 1093.6715, "step": 1589000 }, { "epoch": 90.45, "learning_rate": 9.546023438388896e-06, "loss": 1101.8837, "step": 1590000 }, { "epoch": 90.51, "learning_rate": 9.48913414495392e-06, "loss": 1091.5484, "step": 1591000 }, { "epoch": 90.57, "learning_rate": 9.432244851518943e-06, "loss": 1085.3718, "step": 1592000 }, { "epoch": 90.62, "learning_rate": 9.375355558083968e-06, "loss": 1104.2806, "step": 1593000 }, { "epoch": 90.68, "learning_rate": 9.318466264648994e-06, "loss": 1084.4771, "step": 1594000 }, { "epoch": 90.74, "learning_rate": 9.261576971214019e-06, "loss": 1095.9153, "step": 1595000 }, { "epoch": 90.8, "learning_rate": 9.204687677779042e-06, "loss": 1097.0506, "step": 1596000 }, { "epoch": 90.85, "learning_rate": 9.147798384344066e-06, "loss": 1097.5512, "step": 1597000 }, { "epoch": 90.91, "learning_rate": 9.090909090909091e-06, "loss": 1091.6741, "step": 1598000 }, { "epoch": 90.97, "learning_rate": 9.034019797474116e-06, "loss": 1093.6969, "step": 1599000 }, { "epoch": 91.02, "learning_rate": 8.97713050403914e-06, "loss": 1091.6543, "step": 1600000 }, { "epoch": 91.08, "learning_rate": 8.920241210604165e-06, "loss": 1090.8894, "step": 1601000 }, { "epoch": 91.14, "learning_rate": 8.86335191716919e-06, "loss": 1082.1671, "step": 1602000 }, { "epoch": 91.19, "learning_rate": 8.806462623734214e-06, "loss": 1073.4534, "step": 1603000 }, { "epoch": 91.25, "learning_rate": 8.749573330299237e-06, "loss": 1085.4431, "step": 1604000 }, { "epoch": 91.31, "learning_rate": 8.692684036864263e-06, "loss": 1076.5241, "step": 1605000 }, { "epoch": 91.36, "learning_rate": 8.635794743429288e-06, "loss": 1088.213, "step": 1606000 }, { "epoch": 91.42, "learning_rate": 8.57890544999431e-06, "loss": 1083.7541, "step": 1607000 }, { "epoch": 91.48, "learning_rate": 8.522016156559335e-06, "loss": 1097.26, "step": 1608000 }, { "epoch": 91.53, "learning_rate": 8.46512686312436e-06, "loss": 1081.5745, "step": 1609000 }, { "epoch": 91.59, "learning_rate": 8.408237569689384e-06, "loss": 1084.276, "step": 1610000 }, { "epoch": 91.65, "learning_rate": 8.351348276254409e-06, "loss": 1096.3106, "step": 1611000 }, { "epoch": 91.71, "learning_rate": 8.294458982819434e-06, "loss": 1083.8941, "step": 1612000 }, { "epoch": 91.76, "learning_rate": 8.237569689384458e-06, "loss": 1088.2236, "step": 1613000 }, { "epoch": 91.82, "learning_rate": 8.180680395949483e-06, "loss": 1078.3374, "step": 1614000 }, { "epoch": 91.88, "learning_rate": 8.123791102514507e-06, "loss": 1091.2979, "step": 1615000 }, { "epoch": 91.93, "learning_rate": 8.066901809079532e-06, "loss": 1078.4552, "step": 1616000 }, { "epoch": 91.99, "learning_rate": 8.010012515644557e-06, "loss": 1097.2995, "step": 1617000 }, { "epoch": 92.05, "learning_rate": 7.953123222209581e-06, "loss": 1075.6526, "step": 1618000 }, { "epoch": 92.1, "learning_rate": 7.896233928774604e-06, "loss": 1081.5936, "step": 1619000 }, { "epoch": 92.16, "learning_rate": 7.839344635339629e-06, "loss": 1075.9354, "step": 1620000 }, { "epoch": 92.22, "learning_rate": 7.782455341904655e-06, "loss": 1080.505, "step": 1621000 }, { "epoch": 92.27, "learning_rate": 7.72556604846968e-06, "loss": 1070.3919, "step": 1622000 }, { "epoch": 92.33, "learning_rate": 7.668676755034703e-06, "loss": 1076.8446, "step": 1623000 }, { "epoch": 92.39, "learning_rate": 7.611787461599727e-06, "loss": 1068.621, "step": 1624000 }, { "epoch": 92.45, "learning_rate": 7.554898168164752e-06, "loss": 1085.1759, "step": 1625000 }, { "epoch": 92.5, "learning_rate": 7.4980088747297755e-06, "loss": 1080.7703, "step": 1626000 }, { "epoch": 92.56, "learning_rate": 7.4411195812948e-06, "loss": 1081.9409, "step": 1627000 }, { "epoch": 92.62, "learning_rate": 7.3842302878598255e-06, "loss": 1075.6502, "step": 1628000 }, { "epoch": 92.67, "learning_rate": 7.32734099442485e-06, "loss": 1086.1202, "step": 1629000 }, { "epoch": 92.73, "learning_rate": 7.270451700989874e-06, "loss": 1071.0248, "step": 1630000 }, { "epoch": 92.79, "learning_rate": 7.2135624075548984e-06, "loss": 1085.1731, "step": 1631000 }, { "epoch": 92.84, "learning_rate": 7.156673114119923e-06, "loss": 1076.1421, "step": 1632000 }, { "epoch": 92.9, "learning_rate": 7.099783820684948e-06, "loss": 1066.8614, "step": 1633000 }, { "epoch": 92.96, "learning_rate": 7.042894527249971e-06, "loss": 1096.7254, "step": 1634000 }, { "epoch": 93.01, "learning_rate": 6.986005233814996e-06, "loss": 1074.3671, "step": 1635000 }, { "epoch": 93.07, "learning_rate": 6.9291159403800206e-06, "loss": 1068.0549, "step": 1636000 }, { "epoch": 93.13, "learning_rate": 6.872226646945046e-06, "loss": 1068.2271, "step": 1637000 }, { "epoch": 93.18, "learning_rate": 6.815337353510069e-06, "loss": 1067.7906, "step": 1638000 }, { "epoch": 93.24, "learning_rate": 6.758448060075094e-06, "loss": 1080.0105, "step": 1639000 }, { "epoch": 93.3, "learning_rate": 6.701558766640119e-06, "loss": 1075.7644, "step": 1640000 }, { "epoch": 93.36, "learning_rate": 6.6446694732051435e-06, "loss": 1082.0277, "step": 1641000 }, { "epoch": 93.41, "learning_rate": 6.587780179770167e-06, "loss": 1067.4605, "step": 1642000 }, { "epoch": 93.47, "learning_rate": 6.530890886335192e-06, "loss": 1065.3825, "step": 1643000 }, { "epoch": 93.53, "learning_rate": 6.4740015929002165e-06, "loss": 1072.5138, "step": 1644000 }, { "epoch": 93.58, "learning_rate": 6.417112299465242e-06, "loss": 1074.6221, "step": 1645000 }, { "epoch": 93.64, "learning_rate": 6.360223006030265e-06, "loss": 1085.9053, "step": 1646000 }, { "epoch": 93.7, "learning_rate": 6.30333371259529e-06, "loss": 1070.7372, "step": 1647000 }, { "epoch": 93.75, "learning_rate": 6.246444419160314e-06, "loss": 1067.4809, "step": 1648000 }, { "epoch": 93.81, "learning_rate": 6.189555125725339e-06, "loss": 1063.8144, "step": 1649000 }, { "epoch": 93.87, "learning_rate": 6.132665832290363e-06, "loss": 1063.1395, "step": 1650000 }, { "epoch": 93.92, "learning_rate": 6.075776538855388e-06, "loss": 1066.9593, "step": 1651000 }, { "epoch": 93.98, "learning_rate": 6.0188872454204116e-06, "loss": 1077.2679, "step": 1652000 }, { "epoch": 94.04, "learning_rate": 5.961997951985437e-06, "loss": 1072.2746, "step": 1653000 }, { "epoch": 94.09, "learning_rate": 5.905108658550461e-06, "loss": 1057.2509, "step": 1654000 }, { "epoch": 94.15, "learning_rate": 5.848219365115485e-06, "loss": 1058.5381, "step": 1655000 }, { "epoch": 94.21, "learning_rate": 5.79133007168051e-06, "loss": 1050.2731, "step": 1656000 }, { "epoch": 94.27, "learning_rate": 5.7344407782455345e-06, "loss": 1063.0222, "step": 1657000 }, { "epoch": 94.32, "learning_rate": 5.677551484810559e-06, "loss": 1056.5561, "step": 1658000 }, { "epoch": 94.38, "learning_rate": 5.620662191375584e-06, "loss": 1073.6494, "step": 1659000 }, { "epoch": 94.44, "learning_rate": 5.5637728979406075e-06, "loss": 1075.7826, "step": 1660000 }, { "epoch": 94.49, "learning_rate": 5.506883604505633e-06, "loss": 1073.5648, "step": 1661000 }, { "epoch": 94.55, "learning_rate": 5.449994311070657e-06, "loss": 1072.3453, "step": 1662000 }, { "epoch": 94.61, "learning_rate": 5.393105017635681e-06, "loss": 1070.7346, "step": 1663000 }, { "epoch": 94.66, "learning_rate": 5.336215724200706e-06, "loss": 1062.1372, "step": 1664000 }, { "epoch": 94.72, "learning_rate": 5.2793264307657304e-06, "loss": 1061.1122, "step": 1665000 }, { "epoch": 94.78, "learning_rate": 5.222437137330754e-06, "loss": 1068.318, "step": 1666000 }, { "epoch": 94.83, "learning_rate": 5.16554784389578e-06, "loss": 1061.6929, "step": 1667000 }, { "epoch": 94.89, "learning_rate": 5.108658550460803e-06, "loss": 1058.8579, "step": 1668000 }, { "epoch": 94.95, "learning_rate": 5.051769257025828e-06, "loss": 1079.8582, "step": 1669000 }, { "epoch": 95.01, "learning_rate": 4.9948799635908526e-06, "loss": 1067.4991, "step": 1670000 }, { "epoch": 95.06, "learning_rate": 4.937990670155876e-06, "loss": 1051.402, "step": 1671000 }, { "epoch": 95.12, "learning_rate": 4.881101376720902e-06, "loss": 1051.1781, "step": 1672000 }, { "epoch": 95.18, "learning_rate": 4.8242120832859255e-06, "loss": 1056.3135, "step": 1673000 }, { "epoch": 95.23, "learning_rate": 4.76732278985095e-06, "loss": 1044.9904, "step": 1674000 }, { "epoch": 95.29, "learning_rate": 4.710433496415975e-06, "loss": 1061.8119, "step": 1675000 }, { "epoch": 95.35, "learning_rate": 4.653544202980999e-06, "loss": 1058.4149, "step": 1676000 }, { "epoch": 95.4, "learning_rate": 4.596654909546023e-06, "loss": 1064.3592, "step": 1677000 }, { "epoch": 95.46, "learning_rate": 4.5397656161110485e-06, "loss": 1052.3924, "step": 1678000 }, { "epoch": 95.52, "learning_rate": 4.482876322676072e-06, "loss": 1057.8231, "step": 1679000 }, { "epoch": 95.57, "learning_rate": 4.425987029241097e-06, "loss": 1068.8194, "step": 1680000 }, { "epoch": 95.63, "learning_rate": 4.3690977358061214e-06, "loss": 1056.8919, "step": 1681000 }, { "epoch": 95.69, "learning_rate": 4.312208442371146e-06, "loss": 1058.4952, "step": 1682000 }, { "epoch": 95.74, "learning_rate": 4.255319148936171e-06, "loss": 1060.3724, "step": 1683000 }, { "epoch": 95.8, "learning_rate": 4.198429855501195e-06, "loss": 1058.1774, "step": 1684000 }, { "epoch": 95.86, "learning_rate": 4.141540562066219e-06, "loss": 1069.2876, "step": 1685000 }, { "epoch": 95.92, "learning_rate": 4.084651268631244e-06, "loss": 1071.9934, "step": 1686000 }, { "epoch": 95.97, "learning_rate": 4.027761975196268e-06, "loss": 1070.8982, "step": 1687000 }, { "epoch": 96.03, "learning_rate": 3.970872681761293e-06, "loss": 1055.7587, "step": 1688000 }, { "epoch": 96.09, "learning_rate": 3.913983388326317e-06, "loss": 1059.1715, "step": 1689000 }, { "epoch": 96.14, "learning_rate": 3.857094094891341e-06, "loss": 1063.2, "step": 1690000 }, { "epoch": 96.2, "learning_rate": 3.800204801456366e-06, "loss": 1054.6501, "step": 1691000 }, { "epoch": 96.26, "learning_rate": 3.7433155080213903e-06, "loss": 1055.0367, "step": 1692000 }, { "epoch": 96.31, "learning_rate": 3.686426214586415e-06, "loss": 1064.0915, "step": 1693000 }, { "epoch": 96.37, "learning_rate": 3.629536921151439e-06, "loss": 1047.4447, "step": 1694000 }, { "epoch": 96.43, "learning_rate": 3.572647627716464e-06, "loss": 1046.4394, "step": 1695000 }, { "epoch": 96.48, "learning_rate": 3.5157583342814882e-06, "loss": 1058.1025, "step": 1696000 }, { "epoch": 96.54, "learning_rate": 3.458869040846513e-06, "loss": 1068.0409, "step": 1697000 }, { "epoch": 96.6, "learning_rate": 3.401979747411537e-06, "loss": 1049.7309, "step": 1698000 }, { "epoch": 96.65, "learning_rate": 3.345090453976562e-06, "loss": 1052.8636, "step": 1699000 }, { "epoch": 96.71, "learning_rate": 3.288201160541586e-06, "loss": 1053.6625, "step": 1700000 }, { "epoch": 96.77, "learning_rate": 3.2313118671066108e-06, "loss": 1052.6025, "step": 1701000 }, { "epoch": 96.83, "learning_rate": 3.174422573671635e-06, "loss": 1054.5915, "step": 1702000 }, { "epoch": 96.88, "learning_rate": 3.1175332802366596e-06, "loss": 1066.4962, "step": 1703000 }, { "epoch": 96.94, "learning_rate": 3.060643986801684e-06, "loss": 1044.0403, "step": 1704000 }, { "epoch": 97.0, "learning_rate": 3.0037546933667083e-06, "loss": 1057.349, "step": 1705000 }, { "epoch": 97.05, "learning_rate": 2.946865399931733e-06, "loss": 1052.9554, "step": 1706000 }, { "epoch": 97.11, "learning_rate": 2.8899761064967575e-06, "loss": 1059.2834, "step": 1707000 }, { "epoch": 97.17, "learning_rate": 2.8330868130617817e-06, "loss": 1046.4251, "step": 1708000 }, { "epoch": 97.22, "learning_rate": 2.7761975196268063e-06, "loss": 1055.8756, "step": 1709000 }, { "epoch": 97.28, "learning_rate": 2.719308226191831e-06, "loss": 1052.8722, "step": 1710000 }, { "epoch": 97.34, "learning_rate": 2.6624189327568555e-06, "loss": 1051.8356, "step": 1711000 }, { "epoch": 97.39, "learning_rate": 2.6055296393218796e-06, "loss": 1044.8456, "step": 1712000 }, { "epoch": 97.45, "learning_rate": 2.5486403458869042e-06, "loss": 1037.8506, "step": 1713000 }, { "epoch": 97.51, "learning_rate": 2.491751052451929e-06, "loss": 1049.2854, "step": 1714000 }, { "epoch": 97.57, "learning_rate": 2.434861759016953e-06, "loss": 1042.9379, "step": 1715000 }, { "epoch": 97.62, "learning_rate": 2.3779724655819776e-06, "loss": 1048.6644, "step": 1716000 }, { "epoch": 97.68, "learning_rate": 2.321083172147002e-06, "loss": 1044.0989, "step": 1717000 }, { "epoch": 97.74, "learning_rate": 2.2641938787120268e-06, "loss": 1051.0176, "step": 1718000 }, { "epoch": 97.79, "learning_rate": 2.207304585277051e-06, "loss": 1050.4999, "step": 1719000 }, { "epoch": 97.85, "learning_rate": 2.1504152918420756e-06, "loss": 1062.4196, "step": 1720000 }, { "epoch": 97.91, "learning_rate": 2.0935259984071e-06, "loss": 1045.7354, "step": 1721000 }, { "epoch": 97.96, "learning_rate": 2.0366367049721243e-06, "loss": 1048.9475, "step": 1722000 }, { "epoch": 98.02, "learning_rate": 1.9797474115371485e-06, "loss": 1040.5112, "step": 1723000 }, { "epoch": 98.08, "learning_rate": 1.922858118102173e-06, "loss": 1041.0399, "step": 1724000 }, { "epoch": 98.13, "learning_rate": 1.8659688246671977e-06, "loss": 1049.1707, "step": 1725000 }, { "epoch": 98.19, "learning_rate": 1.809079531232222e-06, "loss": 1043.6125, "step": 1726000 }, { "epoch": 98.25, "learning_rate": 1.7521902377972467e-06, "loss": 1043.5132, "step": 1727000 }, { "epoch": 98.3, "learning_rate": 1.695300944362271e-06, "loss": 1044.9226, "step": 1728000 }, { "epoch": 98.36, "learning_rate": 1.6384116509272954e-06, "loss": 1046.5654, "step": 1729000 }, { "epoch": 98.42, "learning_rate": 1.58152235749232e-06, "loss": 1032.5776, "step": 1730000 }, { "epoch": 98.48, "learning_rate": 1.5246330640573444e-06, "loss": 1042.1588, "step": 1731000 }, { "epoch": 98.53, "learning_rate": 1.467743770622369e-06, "loss": 1028.2497, "step": 1732000 }, { "epoch": 98.59, "learning_rate": 1.4108544771873934e-06, "loss": 1038.3866, "step": 1733000 }, { "epoch": 98.65, "learning_rate": 1.353965183752418e-06, "loss": 1038.5657, "step": 1734000 }, { "epoch": 98.7, "learning_rate": 1.2970758903174424e-06, "loss": 1044.7608, "step": 1735000 }, { "epoch": 98.76, "learning_rate": 1.2401865968824667e-06, "loss": 1046.2042, "step": 1736000 }, { "epoch": 98.82, "learning_rate": 1.1832973034474913e-06, "loss": 1049.8215, "step": 1737000 }, { "epoch": 98.87, "learning_rate": 1.1264080100125155e-06, "loss": 1035.5336, "step": 1738000 }, { "epoch": 98.93, "learning_rate": 1.0695187165775401e-06, "loss": 1035.831, "step": 1739000 }, { "epoch": 98.99, "learning_rate": 1.0126294231425645e-06, "loss": 1035.2835, "step": 1740000 }, { "epoch": 99.04, "learning_rate": 9.55740129707589e-07, "loss": 1032.7643, "step": 1741000 }, { "epoch": 99.1, "learning_rate": 8.988508362726135e-07, "loss": 1046.4654, "step": 1742000 }, { "epoch": 99.16, "learning_rate": 8.41961542837638e-07, "loss": 1039.5064, "step": 1743000 }, { "epoch": 99.21, "learning_rate": 7.850722494026625e-07, "loss": 1039.8524, "step": 1744000 }, { "epoch": 99.27, "learning_rate": 7.281829559676869e-07, "loss": 1038.3202, "step": 1745000 }, { "epoch": 99.33, "learning_rate": 6.712936625327113e-07, "loss": 1046.5058, "step": 1746000 }, { "epoch": 99.39, "learning_rate": 6.144043690977358e-07, "loss": 1049.6586, "step": 1747000 }, { "epoch": 99.44, "learning_rate": 5.575150756627603e-07, "loss": 1041.2756, "step": 1748000 }, { "epoch": 99.5, "learning_rate": 5.006257822277848e-07, "loss": 1027.9045, "step": 1749000 }, { "epoch": 99.56, "learning_rate": 4.4373648879280923e-07, "loss": 1041.6981, "step": 1750000 }, { "epoch": 99.61, "learning_rate": 3.8684719535783366e-07, "loss": 1031.1304, "step": 1751000 }, { "epoch": 99.67, "learning_rate": 3.2995790192285815e-07, "loss": 1035.2171, "step": 1752000 }, { "epoch": 99.73, "learning_rate": 2.730686084878826e-07, "loss": 1041.2465, "step": 1753000 }, { "epoch": 99.78, "learning_rate": 2.1617931505290705e-07, "loss": 1044.1264, "step": 1754000 }, { "epoch": 99.84, "learning_rate": 1.5929002161793151e-07, "loss": 1033.9724, "step": 1755000 }, { "epoch": 99.9, "learning_rate": 1.0240072818295596e-07, "loss": 1042.4498, "step": 1756000 }, { "epoch": 99.95, "learning_rate": 4.5511434747980434e-08, "loss": 1036.0821, "step": 1757000 } ], "max_steps": 1757800, "num_train_epochs": 100, "total_flos": 4.126140729603308e+21, "trial_name": null, "trial_params": null }