{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 812310, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.996922357228152e-05, "loss": 1.5563, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.9938447144563037e-05, "loss": 1.4798, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.9907670716844554e-05, "loss": 1.4636, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.987689428912608e-05, "loss": 1.4411, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.9846117861407595e-05, "loss": 1.4334, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.981534143368911e-05, "loss": 1.4234, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.978456500597063e-05, "loss": 1.4075, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.9753788578252146e-05, "loss": 1.4069, "step": 4000 }, { "epoch": 0.02, "learning_rate": 4.972301215053366e-05, "loss": 1.4132, "step": 4500 }, { "epoch": 0.02, "learning_rate": 4.969223572281519e-05, "loss": 1.4005, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.9661459295096704e-05, "loss": 1.3938, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.963068286737822e-05, "loss": 1.3949, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.959990643965974e-05, "loss": 1.3849, "step": 6500 }, { "epoch": 0.03, "learning_rate": 4.9569130011941255e-05, "loss": 1.3828, "step": 7000 }, { "epoch": 0.03, "learning_rate": 4.953835358422277e-05, "loss": 1.3794, "step": 7500 }, { "epoch": 0.03, "learning_rate": 4.950757715650429e-05, "loss": 1.3727, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.947680072878581e-05, "loss": 1.3797, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.944602430106733e-05, "loss": 1.364, "step": 9000 }, { "epoch": 0.04, "learning_rate": 4.941524787334885e-05, "loss": 1.3602, "step": 9500 }, { "epoch": 0.04, "learning_rate": 4.9384471445630365e-05, "loss": 1.3704, "step": 10000 }, { "epoch": 0.04, "learning_rate": 4.935369501791188e-05, "loss": 1.3619, "step": 10500 }, { "epoch": 0.04, "learning_rate": 4.93229185901934e-05, "loss": 1.369, "step": 11000 }, { "epoch": 0.04, "learning_rate": 4.9292142162474916e-05, "loss": 1.3687, "step": 11500 }, { "epoch": 0.04, "learning_rate": 4.926136573475644e-05, "loss": 1.3481, "step": 12000 }, { "epoch": 0.05, "learning_rate": 4.923058930703796e-05, "loss": 1.3553, "step": 12500 }, { "epoch": 0.05, "learning_rate": 4.9199812879319474e-05, "loss": 1.3532, "step": 13000 }, { "epoch": 0.05, "learning_rate": 4.916903645160099e-05, "loss": 1.3519, "step": 13500 }, { "epoch": 0.05, "learning_rate": 4.913826002388251e-05, "loss": 1.3524, "step": 14000 }, { "epoch": 0.05, "learning_rate": 4.9107483596164025e-05, "loss": 1.3427, "step": 14500 }, { "epoch": 0.06, "learning_rate": 4.907670716844555e-05, "loss": 1.3436, "step": 15000 }, { "epoch": 0.06, "learning_rate": 4.9045930740727066e-05, "loss": 1.3342, "step": 15500 }, { "epoch": 0.06, "learning_rate": 4.901515431300858e-05, "loss": 1.332, "step": 16000 }, { "epoch": 0.06, "learning_rate": 4.89843778852901e-05, "loss": 1.3384, "step": 16500 }, { "epoch": 0.06, "learning_rate": 4.895360145757162e-05, "loss": 1.3252, "step": 17000 }, { "epoch": 0.06, "learning_rate": 4.8922825029853134e-05, "loss": 1.3405, "step": 17500 }, { "epoch": 0.07, "learning_rate": 4.889204860213465e-05, "loss": 1.3363, "step": 18000 }, { "epoch": 0.07, "learning_rate": 4.8861272174416175e-05, "loss": 1.3297, "step": 18500 }, { "epoch": 0.07, "learning_rate": 4.883049574669769e-05, "loss": 1.3268, "step": 19000 }, { "epoch": 0.07, "learning_rate": 4.879971931897921e-05, "loss": 1.3136, "step": 19500 }, { "epoch": 0.07, "learning_rate": 4.876894289126073e-05, "loss": 1.3182, "step": 20000 }, { "epoch": 0.08, "learning_rate": 4.8738166463542244e-05, "loss": 1.3226, "step": 20500 }, { "epoch": 0.08, "learning_rate": 4.870739003582376e-05, "loss": 1.3186, "step": 21000 }, { "epoch": 0.08, "learning_rate": 4.8676613608105285e-05, "loss": 1.3234, "step": 21500 }, { "epoch": 0.08, "learning_rate": 4.86458371803868e-05, "loss": 1.3214, "step": 22000 }, { "epoch": 0.08, "learning_rate": 4.861506075266832e-05, "loss": 1.3184, "step": 22500 }, { "epoch": 0.08, "learning_rate": 4.8584284324949836e-05, "loss": 1.3074, "step": 23000 }, { "epoch": 0.09, "learning_rate": 4.855350789723135e-05, "loss": 1.3137, "step": 23500 }, { "epoch": 0.09, "learning_rate": 4.852273146951287e-05, "loss": 1.3114, "step": 24000 }, { "epoch": 0.09, "learning_rate": 4.849195504179439e-05, "loss": 1.3174, "step": 24500 }, { "epoch": 0.09, "learning_rate": 4.846117861407591e-05, "loss": 1.3094, "step": 25000 }, { "epoch": 0.09, "learning_rate": 4.843040218635743e-05, "loss": 1.3133, "step": 25500 }, { "epoch": 0.1, "learning_rate": 4.8399625758638945e-05, "loss": 1.3095, "step": 26000 }, { "epoch": 0.1, "learning_rate": 4.836884933092046e-05, "loss": 1.3052, "step": 26500 }, { "epoch": 0.1, "learning_rate": 4.833807290320198e-05, "loss": 1.3048, "step": 27000 }, { "epoch": 0.1, "learning_rate": 4.8307296475483497e-05, "loss": 1.3093, "step": 27500 }, { "epoch": 0.1, "learning_rate": 4.8276520047765014e-05, "loss": 1.307, "step": 28000 }, { "epoch": 0.11, "learning_rate": 4.824574362004654e-05, "loss": 1.312, "step": 28500 }, { "epoch": 0.11, "learning_rate": 4.8214967192328055e-05, "loss": 1.3006, "step": 29000 }, { "epoch": 0.11, "learning_rate": 4.818419076460957e-05, "loss": 1.3069, "step": 29500 }, { "epoch": 0.11, "learning_rate": 4.815341433689109e-05, "loss": 1.2969, "step": 30000 }, { "epoch": 0.11, "learning_rate": 4.8122637909172606e-05, "loss": 1.3042, "step": 30500 }, { "epoch": 0.11, "learning_rate": 4.809186148145412e-05, "loss": 1.2999, "step": 31000 }, { "epoch": 0.12, "learning_rate": 4.806108505373565e-05, "loss": 1.3, "step": 31500 }, { "epoch": 0.12, "learning_rate": 4.8030308626017164e-05, "loss": 1.2973, "step": 32000 }, { "epoch": 0.12, "learning_rate": 4.799953219829868e-05, "loss": 1.2949, "step": 32500 }, { "epoch": 0.12, "learning_rate": 4.79687557705802e-05, "loss": 1.293, "step": 33000 }, { "epoch": 0.12, "learning_rate": 4.7937979342861715e-05, "loss": 1.2834, "step": 33500 }, { "epoch": 0.13, "learning_rate": 4.790720291514323e-05, "loss": 1.3032, "step": 34000 }, { "epoch": 0.13, "learning_rate": 4.787642648742475e-05, "loss": 1.2926, "step": 34500 }, { "epoch": 0.13, "learning_rate": 4.784565005970627e-05, "loss": 1.291, "step": 35000 }, { "epoch": 0.13, "learning_rate": 4.781487363198779e-05, "loss": 1.2958, "step": 35500 }, { "epoch": 0.13, "learning_rate": 4.778409720426931e-05, "loss": 1.2883, "step": 36000 }, { "epoch": 0.13, "learning_rate": 4.7753320776550824e-05, "loss": 1.2998, "step": 36500 }, { "epoch": 0.14, "learning_rate": 4.772254434883234e-05, "loss": 1.2781, "step": 37000 }, { "epoch": 0.14, "learning_rate": 4.769176792111386e-05, "loss": 1.2933, "step": 37500 }, { "epoch": 0.14, "learning_rate": 4.766099149339538e-05, "loss": 1.2969, "step": 38000 }, { "epoch": 0.14, "learning_rate": 4.76302150656769e-05, "loss": 1.279, "step": 38500 }, { "epoch": 0.14, "learning_rate": 4.759943863795842e-05, "loss": 1.2929, "step": 39000 }, { "epoch": 0.15, "learning_rate": 4.7568662210239934e-05, "loss": 1.2873, "step": 39500 }, { "epoch": 0.15, "learning_rate": 4.753788578252145e-05, "loss": 1.2923, "step": 40000 }, { "epoch": 0.15, "learning_rate": 4.750710935480297e-05, "loss": 1.2974, "step": 40500 }, { "epoch": 0.15, "learning_rate": 4.7476332927084485e-05, "loss": 1.2789, "step": 41000 }, { "epoch": 0.15, "learning_rate": 4.744555649936601e-05, "loss": 1.2783, "step": 41500 }, { "epoch": 0.16, "learning_rate": 4.7414780071647526e-05, "loss": 1.2781, "step": 42000 }, { "epoch": 0.16, "learning_rate": 4.738400364392904e-05, "loss": 1.2925, "step": 42500 }, { "epoch": 0.16, "learning_rate": 4.735322721621056e-05, "loss": 1.2818, "step": 43000 }, { "epoch": 0.16, "learning_rate": 4.732245078849208e-05, "loss": 1.2788, "step": 43500 }, { "epoch": 0.16, "learning_rate": 4.7291674360773594e-05, "loss": 1.2732, "step": 44000 }, { "epoch": 0.16, "learning_rate": 4.726089793305512e-05, "loss": 1.278, "step": 44500 }, { "epoch": 0.17, "learning_rate": 4.7230121505336635e-05, "loss": 1.2772, "step": 45000 }, { "epoch": 0.17, "learning_rate": 4.719934507761815e-05, "loss": 1.2752, "step": 45500 }, { "epoch": 0.17, "learning_rate": 4.716856864989967e-05, "loss": 1.2623, "step": 46000 }, { "epoch": 0.17, "learning_rate": 4.713779222218119e-05, "loss": 1.2704, "step": 46500 }, { "epoch": 0.17, "learning_rate": 4.7107015794462704e-05, "loss": 1.2778, "step": 47000 }, { "epoch": 0.18, "learning_rate": 4.707623936674422e-05, "loss": 1.2775, "step": 47500 }, { "epoch": 0.18, "learning_rate": 4.7045462939025745e-05, "loss": 1.2728, "step": 48000 }, { "epoch": 0.18, "learning_rate": 4.701468651130726e-05, "loss": 1.2722, "step": 48500 }, { "epoch": 0.18, "learning_rate": 4.698391008358878e-05, "loss": 1.2701, "step": 49000 }, { "epoch": 0.18, "learning_rate": 4.6953133655870296e-05, "loss": 1.2808, "step": 49500 }, { "epoch": 0.18, "learning_rate": 4.692235722815181e-05, "loss": 1.2653, "step": 50000 }, { "epoch": 0.19, "learning_rate": 4.689158080043333e-05, "loss": 1.2769, "step": 50500 }, { "epoch": 0.19, "learning_rate": 4.686080437271485e-05, "loss": 1.2752, "step": 51000 }, { "epoch": 0.19, "learning_rate": 4.683002794499637e-05, "loss": 1.2725, "step": 51500 }, { "epoch": 0.19, "learning_rate": 4.679925151727789e-05, "loss": 1.26, "step": 52000 }, { "epoch": 0.19, "learning_rate": 4.6768475089559405e-05, "loss": 1.2682, "step": 52500 }, { "epoch": 0.2, "learning_rate": 4.673769866184092e-05, "loss": 1.2759, "step": 53000 }, { "epoch": 0.2, "learning_rate": 4.670692223412244e-05, "loss": 1.2719, "step": 53500 }, { "epoch": 0.2, "learning_rate": 4.6676145806403957e-05, "loss": 1.268, "step": 54000 }, { "epoch": 0.2, "learning_rate": 4.664536937868548e-05, "loss": 1.2631, "step": 54500 }, { "epoch": 0.2, "learning_rate": 4.6614592950967e-05, "loss": 1.2692, "step": 55000 }, { "epoch": 0.2, "learning_rate": 4.6583816523248515e-05, "loss": 1.269, "step": 55500 }, { "epoch": 0.21, "learning_rate": 4.655304009553003e-05, "loss": 1.2673, "step": 56000 }, { "epoch": 0.21, "learning_rate": 4.652226366781155e-05, "loss": 1.2633, "step": 56500 }, { "epoch": 0.21, "learning_rate": 4.6491487240093066e-05, "loss": 1.2589, "step": 57000 }, { "epoch": 0.21, "learning_rate": 4.646071081237458e-05, "loss": 1.2639, "step": 57500 }, { "epoch": 0.21, "learning_rate": 4.642993438465611e-05, "loss": 1.2571, "step": 58000 }, { "epoch": 0.22, "learning_rate": 4.6399157956937624e-05, "loss": 1.2647, "step": 58500 }, { "epoch": 0.22, "learning_rate": 4.636838152921914e-05, "loss": 1.2582, "step": 59000 }, { "epoch": 0.22, "learning_rate": 4.633760510150066e-05, "loss": 1.259, "step": 59500 }, { "epoch": 0.22, "learning_rate": 4.6306828673782175e-05, "loss": 1.2657, "step": 60000 }, { "epoch": 0.22, "learning_rate": 4.627605224606369e-05, "loss": 1.2602, "step": 60500 }, { "epoch": 0.23, "learning_rate": 4.6245275818345216e-05, "loss": 1.2594, "step": 61000 }, { "epoch": 0.23, "learning_rate": 4.621449939062673e-05, "loss": 1.2608, "step": 61500 }, { "epoch": 0.23, "learning_rate": 4.618372296290825e-05, "loss": 1.259, "step": 62000 }, { "epoch": 0.23, "learning_rate": 4.615294653518977e-05, "loss": 1.2659, "step": 62500 }, { "epoch": 0.23, "learning_rate": 4.6122170107471284e-05, "loss": 1.2597, "step": 63000 }, { "epoch": 0.23, "learning_rate": 4.60913936797528e-05, "loss": 1.2524, "step": 63500 }, { "epoch": 0.24, "learning_rate": 4.606061725203432e-05, "loss": 1.2521, "step": 64000 }, { "epoch": 0.24, "learning_rate": 4.602984082431584e-05, "loss": 1.2588, "step": 64500 }, { "epoch": 0.24, "learning_rate": 4.599906439659736e-05, "loss": 1.2554, "step": 65000 }, { "epoch": 0.24, "learning_rate": 4.596828796887888e-05, "loss": 1.25, "step": 65500 }, { "epoch": 0.24, "learning_rate": 4.5937511541160394e-05, "loss": 1.2467, "step": 66000 }, { "epoch": 0.25, "learning_rate": 4.590673511344191e-05, "loss": 1.2472, "step": 66500 }, { "epoch": 0.25, "learning_rate": 4.587595868572343e-05, "loss": 1.2652, "step": 67000 }, { "epoch": 0.25, "learning_rate": 4.5845182258004945e-05, "loss": 1.2506, "step": 67500 }, { "epoch": 0.25, "learning_rate": 4.581440583028647e-05, "loss": 1.247, "step": 68000 }, { "epoch": 0.25, "learning_rate": 4.5783629402567986e-05, "loss": 1.2519, "step": 68500 }, { "epoch": 0.25, "learning_rate": 4.57528529748495e-05, "loss": 1.2489, "step": 69000 }, { "epoch": 0.26, "learning_rate": 4.572207654713102e-05, "loss": 1.2508, "step": 69500 }, { "epoch": 0.26, "learning_rate": 4.569130011941254e-05, "loss": 1.2556, "step": 70000 }, { "epoch": 0.26, "learning_rate": 4.566052369169406e-05, "loss": 1.2555, "step": 70500 }, { "epoch": 0.26, "learning_rate": 4.562974726397558e-05, "loss": 1.2407, "step": 71000 }, { "epoch": 0.26, "learning_rate": 4.55989708362571e-05, "loss": 1.2478, "step": 71500 }, { "epoch": 0.27, "learning_rate": 4.556819440853862e-05, "loss": 1.2537, "step": 72000 }, { "epoch": 0.27, "learning_rate": 4.5537417980820136e-05, "loss": 1.2487, "step": 72500 }, { "epoch": 0.27, "learning_rate": 4.550664155310165e-05, "loss": 1.2418, "step": 73000 }, { "epoch": 0.27, "learning_rate": 4.547586512538317e-05, "loss": 1.2568, "step": 73500 }, { "epoch": 0.27, "learning_rate": 4.544508869766469e-05, "loss": 1.2428, "step": 74000 }, { "epoch": 0.28, "learning_rate": 4.5414312269946205e-05, "loss": 1.2588, "step": 74500 }, { "epoch": 0.28, "learning_rate": 4.538353584222773e-05, "loss": 1.2455, "step": 75000 }, { "epoch": 0.28, "learning_rate": 4.5352759414509246e-05, "loss": 1.2459, "step": 75500 }, { "epoch": 0.28, "learning_rate": 4.532198298679076e-05, "loss": 1.2502, "step": 76000 }, { "epoch": 0.28, "learning_rate": 4.529120655907228e-05, "loss": 1.2396, "step": 76500 }, { "epoch": 0.28, "learning_rate": 4.52604301313538e-05, "loss": 1.2437, "step": 77000 }, { "epoch": 0.29, "learning_rate": 4.5229653703635314e-05, "loss": 1.2482, "step": 77500 }, { "epoch": 0.29, "learning_rate": 4.519887727591683e-05, "loss": 1.2368, "step": 78000 }, { "epoch": 0.29, "learning_rate": 4.5168100848198355e-05, "loss": 1.2435, "step": 78500 }, { "epoch": 0.29, "learning_rate": 4.513732442047987e-05, "loss": 1.2463, "step": 79000 }, { "epoch": 0.29, "learning_rate": 4.510654799276139e-05, "loss": 1.239, "step": 79500 }, { "epoch": 0.3, "learning_rate": 4.5075771565042906e-05, "loss": 1.2439, "step": 80000 }, { "epoch": 0.3, "learning_rate": 4.504499513732442e-05, "loss": 1.2313, "step": 80500 }, { "epoch": 0.3, "learning_rate": 4.501421870960594e-05, "loss": 1.2419, "step": 81000 }, { "epoch": 0.3, "learning_rate": 4.4983442281887464e-05, "loss": 1.2356, "step": 81500 }, { "epoch": 0.3, "learning_rate": 4.495266585416898e-05, "loss": 1.2308, "step": 82000 }, { "epoch": 0.3, "learning_rate": 4.49218894264505e-05, "loss": 1.2382, "step": 82500 }, { "epoch": 0.31, "learning_rate": 4.4891112998732015e-05, "loss": 1.2555, "step": 83000 }, { "epoch": 0.31, "learning_rate": 4.486033657101353e-05, "loss": 1.245, "step": 83500 }, { "epoch": 0.31, "learning_rate": 4.482956014329505e-05, "loss": 1.2384, "step": 84000 }, { "epoch": 0.31, "learning_rate": 4.479878371557657e-05, "loss": 1.2379, "step": 84500 }, { "epoch": 0.31, "learning_rate": 4.476800728785809e-05, "loss": 1.241, "step": 85000 }, { "epoch": 0.32, "learning_rate": 4.473723086013961e-05, "loss": 1.2262, "step": 85500 }, { "epoch": 0.32, "learning_rate": 4.4706454432421125e-05, "loss": 1.2283, "step": 86000 }, { "epoch": 0.32, "learning_rate": 4.467567800470264e-05, "loss": 1.2331, "step": 86500 }, { "epoch": 0.32, "learning_rate": 4.464490157698416e-05, "loss": 1.2439, "step": 87000 }, { "epoch": 0.32, "learning_rate": 4.4614125149265676e-05, "loss": 1.2369, "step": 87500 }, { "epoch": 0.32, "learning_rate": 4.45833487215472e-05, "loss": 1.2335, "step": 88000 }, { "epoch": 0.33, "learning_rate": 4.455257229382872e-05, "loss": 1.236, "step": 88500 }, { "epoch": 0.33, "learning_rate": 4.4521795866110234e-05, "loss": 1.2333, "step": 89000 }, { "epoch": 0.33, "learning_rate": 4.449101943839175e-05, "loss": 1.2288, "step": 89500 }, { "epoch": 0.33, "learning_rate": 4.446024301067327e-05, "loss": 1.2315, "step": 90000 }, { "epoch": 0.33, "learning_rate": 4.4429466582954785e-05, "loss": 1.2349, "step": 90500 }, { "epoch": 0.34, "learning_rate": 4.43986901552363e-05, "loss": 1.2227, "step": 91000 }, { "epoch": 0.34, "learning_rate": 4.4367913727517826e-05, "loss": 1.2261, "step": 91500 }, { "epoch": 0.34, "learning_rate": 4.4337137299799343e-05, "loss": 1.2303, "step": 92000 }, { "epoch": 0.34, "learning_rate": 4.430636087208086e-05, "loss": 1.2235, "step": 92500 }, { "epoch": 0.34, "learning_rate": 4.427558444436238e-05, "loss": 1.2336, "step": 93000 }, { "epoch": 0.35, "learning_rate": 4.4244808016643895e-05, "loss": 1.2345, "step": 93500 }, { "epoch": 0.35, "learning_rate": 4.421403158892541e-05, "loss": 1.2257, "step": 94000 }, { "epoch": 0.35, "learning_rate": 4.4183255161206936e-05, "loss": 1.2212, "step": 94500 }, { "epoch": 0.35, "learning_rate": 4.415247873348845e-05, "loss": 1.2221, "step": 95000 }, { "epoch": 0.35, "learning_rate": 4.412170230576997e-05, "loss": 1.2238, "step": 95500 }, { "epoch": 0.35, "learning_rate": 4.409092587805149e-05, "loss": 1.2256, "step": 96000 }, { "epoch": 0.36, "learning_rate": 4.4060149450333004e-05, "loss": 1.2271, "step": 96500 }, { "epoch": 0.36, "learning_rate": 4.402937302261452e-05, "loss": 1.2193, "step": 97000 }, { "epoch": 0.36, "learning_rate": 4.399859659489604e-05, "loss": 1.223, "step": 97500 }, { "epoch": 0.36, "learning_rate": 4.396782016717756e-05, "loss": 1.2272, "step": 98000 }, { "epoch": 0.36, "learning_rate": 4.393704373945908e-05, "loss": 1.2184, "step": 98500 }, { "epoch": 0.37, "learning_rate": 4.3906267311740596e-05, "loss": 1.2223, "step": 99000 }, { "epoch": 0.37, "learning_rate": 4.387549088402211e-05, "loss": 1.2296, "step": 99500 }, { "epoch": 0.37, "learning_rate": 4.384471445630363e-05, "loss": 1.2229, "step": 100000 }, { "epoch": 0.37, "learning_rate": 4.381393802858515e-05, "loss": 1.2206, "step": 100500 }, { "epoch": 0.37, "learning_rate": 4.3783161600866665e-05, "loss": 1.2293, "step": 101000 }, { "epoch": 0.37, "learning_rate": 4.375238517314819e-05, "loss": 1.2245, "step": 101500 }, { "epoch": 0.38, "learning_rate": 4.3721608745429706e-05, "loss": 1.2254, "step": 102000 }, { "epoch": 0.38, "learning_rate": 4.369083231771122e-05, "loss": 1.2136, "step": 102500 }, { "epoch": 0.38, "learning_rate": 4.366005588999274e-05, "loss": 1.2289, "step": 103000 }, { "epoch": 0.38, "learning_rate": 4.362927946227426e-05, "loss": 1.2234, "step": 103500 }, { "epoch": 0.38, "learning_rate": 4.3598503034555774e-05, "loss": 1.2157, "step": 104000 }, { "epoch": 0.39, "learning_rate": 4.35677266068373e-05, "loss": 1.2285, "step": 104500 }, { "epoch": 0.39, "learning_rate": 4.3536950179118815e-05, "loss": 1.2198, "step": 105000 }, { "epoch": 0.39, "learning_rate": 4.350617375140033e-05, "loss": 1.2256, "step": 105500 }, { "epoch": 0.39, "learning_rate": 4.347539732368185e-05, "loss": 1.2267, "step": 106000 }, { "epoch": 0.39, "learning_rate": 4.3444620895963366e-05, "loss": 1.2103, "step": 106500 }, { "epoch": 0.4, "learning_rate": 4.341384446824488e-05, "loss": 1.2176, "step": 107000 }, { "epoch": 0.4, "learning_rate": 4.33830680405264e-05, "loss": 1.2279, "step": 107500 }, { "epoch": 0.4, "learning_rate": 4.3352291612807924e-05, "loss": 1.2208, "step": 108000 }, { "epoch": 0.4, "learning_rate": 4.332151518508944e-05, "loss": 1.221, "step": 108500 }, { "epoch": 0.4, "learning_rate": 4.329073875737096e-05, "loss": 1.2268, "step": 109000 }, { "epoch": 0.4, "learning_rate": 4.3259962329652475e-05, "loss": 1.2279, "step": 109500 }, { "epoch": 0.41, "learning_rate": 4.322918590193399e-05, "loss": 1.2205, "step": 110000 }, { "epoch": 0.41, "learning_rate": 4.319840947421551e-05, "loss": 1.2212, "step": 110500 }, { "epoch": 0.41, "learning_rate": 4.3167633046497034e-05, "loss": 1.2086, "step": 111000 }, { "epoch": 0.41, "learning_rate": 4.313685661877855e-05, "loss": 1.2144, "step": 111500 }, { "epoch": 0.41, "learning_rate": 4.310608019106007e-05, "loss": 1.2346, "step": 112000 }, { "epoch": 0.42, "learning_rate": 4.3075303763341585e-05, "loss": 1.2149, "step": 112500 }, { "epoch": 0.42, "learning_rate": 4.30445273356231e-05, "loss": 1.2156, "step": 113000 }, { "epoch": 0.42, "learning_rate": 4.301375090790462e-05, "loss": 1.2136, "step": 113500 }, { "epoch": 0.42, "learning_rate": 4.2982974480186136e-05, "loss": 1.2101, "step": 114000 }, { "epoch": 0.42, "learning_rate": 4.295219805246766e-05, "loss": 1.2077, "step": 114500 }, { "epoch": 0.42, "learning_rate": 4.292142162474918e-05, "loss": 1.2137, "step": 115000 }, { "epoch": 0.43, "learning_rate": 4.2890645197030694e-05, "loss": 1.2186, "step": 115500 }, { "epoch": 0.43, "learning_rate": 4.285986876931221e-05, "loss": 1.2302, "step": 116000 }, { "epoch": 0.43, "learning_rate": 4.282909234159373e-05, "loss": 1.2185, "step": 116500 }, { "epoch": 0.43, "learning_rate": 4.2798315913875245e-05, "loss": 1.21, "step": 117000 }, { "epoch": 0.43, "learning_rate": 4.276753948615676e-05, "loss": 1.2065, "step": 117500 }, { "epoch": 0.44, "learning_rate": 4.2736763058438286e-05, "loss": 1.2174, "step": 118000 }, { "epoch": 0.44, "learning_rate": 4.2705986630719803e-05, "loss": 1.2078, "step": 118500 }, { "epoch": 0.44, "learning_rate": 4.267521020300132e-05, "loss": 1.2036, "step": 119000 }, { "epoch": 0.44, "learning_rate": 4.264443377528284e-05, "loss": 1.2098, "step": 119500 }, { "epoch": 0.44, "learning_rate": 4.2613657347564355e-05, "loss": 1.209, "step": 120000 }, { "epoch": 0.45, "learning_rate": 4.258288091984587e-05, "loss": 1.2026, "step": 120500 }, { "epoch": 0.45, "learning_rate": 4.2552104492127396e-05, "loss": 1.2192, "step": 121000 }, { "epoch": 0.45, "learning_rate": 4.252132806440891e-05, "loss": 1.219, "step": 121500 }, { "epoch": 0.45, "learning_rate": 4.249055163669043e-05, "loss": 1.2026, "step": 122000 }, { "epoch": 0.45, "learning_rate": 4.245977520897195e-05, "loss": 1.2077, "step": 122500 }, { "epoch": 0.45, "learning_rate": 4.2428998781253464e-05, "loss": 1.2012, "step": 123000 }, { "epoch": 0.46, "learning_rate": 4.239822235353498e-05, "loss": 1.213, "step": 123500 }, { "epoch": 0.46, "learning_rate": 4.23674459258165e-05, "loss": 1.2107, "step": 124000 }, { "epoch": 0.46, "learning_rate": 4.233666949809802e-05, "loss": 1.2144, "step": 124500 }, { "epoch": 0.46, "learning_rate": 4.230589307037954e-05, "loss": 1.2044, "step": 125000 }, { "epoch": 0.46, "learning_rate": 4.2275116642661056e-05, "loss": 1.2031, "step": 125500 }, { "epoch": 0.47, "learning_rate": 4.224434021494257e-05, "loss": 1.2116, "step": 126000 }, { "epoch": 0.47, "learning_rate": 4.221356378722409e-05, "loss": 1.2063, "step": 126500 }, { "epoch": 0.47, "learning_rate": 4.218278735950561e-05, "loss": 1.211, "step": 127000 }, { "epoch": 0.47, "learning_rate": 4.215201093178713e-05, "loss": 1.2049, "step": 127500 }, { "epoch": 0.47, "learning_rate": 4.212123450406865e-05, "loss": 1.2059, "step": 128000 }, { "epoch": 0.47, "learning_rate": 4.2090458076350166e-05, "loss": 1.2088, "step": 128500 }, { "epoch": 0.48, "learning_rate": 4.205968164863168e-05, "loss": 1.2126, "step": 129000 }, { "epoch": 0.48, "learning_rate": 4.20289052209132e-05, "loss": 1.1968, "step": 129500 }, { "epoch": 0.48, "learning_rate": 4.199812879319472e-05, "loss": 1.1977, "step": 130000 }, { "epoch": 0.48, "learning_rate": 4.1967352365476234e-05, "loss": 1.2032, "step": 130500 }, { "epoch": 0.48, "learning_rate": 4.193657593775776e-05, "loss": 1.2078, "step": 131000 }, { "epoch": 0.49, "learning_rate": 4.1905799510039275e-05, "loss": 1.1938, "step": 131500 }, { "epoch": 0.49, "learning_rate": 4.187502308232079e-05, "loss": 1.2054, "step": 132000 }, { "epoch": 0.49, "learning_rate": 4.184424665460231e-05, "loss": 1.208, "step": 132500 }, { "epoch": 0.49, "learning_rate": 4.1813470226883826e-05, "loss": 1.1988, "step": 133000 }, { "epoch": 0.49, "learning_rate": 4.178269379916534e-05, "loss": 1.2053, "step": 133500 }, { "epoch": 0.49, "learning_rate": 4.175191737144686e-05, "loss": 1.1997, "step": 134000 }, { "epoch": 0.5, "learning_rate": 4.1721140943728384e-05, "loss": 1.2018, "step": 134500 }, { "epoch": 0.5, "learning_rate": 4.16903645160099e-05, "loss": 1.2037, "step": 135000 }, { "epoch": 0.5, "learning_rate": 4.165958808829142e-05, "loss": 1.2033, "step": 135500 }, { "epoch": 0.5, "learning_rate": 4.1628811660572935e-05, "loss": 1.1989, "step": 136000 }, { "epoch": 0.5, "learning_rate": 4.159803523285445e-05, "loss": 1.2092, "step": 136500 }, { "epoch": 0.51, "learning_rate": 4.156725880513597e-05, "loss": 1.2002, "step": 137000 }, { "epoch": 0.51, "learning_rate": 4.1536482377417493e-05, "loss": 1.2061, "step": 137500 }, { "epoch": 0.51, "learning_rate": 4.150570594969901e-05, "loss": 1.2063, "step": 138000 }, { "epoch": 0.51, "learning_rate": 4.147492952198053e-05, "loss": 1.2015, "step": 138500 }, { "epoch": 0.51, "learning_rate": 4.1444153094262045e-05, "loss": 1.1987, "step": 139000 }, { "epoch": 0.52, "learning_rate": 4.141337666654356e-05, "loss": 1.1982, "step": 139500 }, { "epoch": 0.52, "learning_rate": 4.138260023882508e-05, "loss": 1.2023, "step": 140000 }, { "epoch": 0.52, "learning_rate": 4.1351823811106596e-05, "loss": 1.1953, "step": 140500 }, { "epoch": 0.52, "learning_rate": 4.132104738338812e-05, "loss": 1.1943, "step": 141000 }, { "epoch": 0.52, "learning_rate": 4.129027095566964e-05, "loss": 1.2085, "step": 141500 }, { "epoch": 0.52, "learning_rate": 4.1259494527951154e-05, "loss": 1.1943, "step": 142000 }, { "epoch": 0.53, "learning_rate": 4.122871810023267e-05, "loss": 1.1994, "step": 142500 }, { "epoch": 0.53, "learning_rate": 4.119794167251419e-05, "loss": 1.2037, "step": 143000 }, { "epoch": 0.53, "learning_rate": 4.1167165244795705e-05, "loss": 1.1968, "step": 143500 }, { "epoch": 0.53, "learning_rate": 4.113638881707723e-05, "loss": 1.2031, "step": 144000 }, { "epoch": 0.53, "learning_rate": 4.1105612389358746e-05, "loss": 1.2052, "step": 144500 }, { "epoch": 0.54, "learning_rate": 4.107483596164026e-05, "loss": 1.2006, "step": 145000 }, { "epoch": 0.54, "learning_rate": 4.104405953392178e-05, "loss": 1.1961, "step": 145500 }, { "epoch": 0.54, "learning_rate": 4.10132831062033e-05, "loss": 1.1909, "step": 146000 }, { "epoch": 0.54, "learning_rate": 4.0982506678484815e-05, "loss": 1.1995, "step": 146500 }, { "epoch": 0.54, "learning_rate": 4.095173025076633e-05, "loss": 1.1978, "step": 147000 }, { "epoch": 0.54, "learning_rate": 4.0920953823047856e-05, "loss": 1.1952, "step": 147500 }, { "epoch": 0.55, "learning_rate": 4.089017739532937e-05, "loss": 1.1901, "step": 148000 }, { "epoch": 0.55, "learning_rate": 4.085940096761089e-05, "loss": 1.1974, "step": 148500 }, { "epoch": 0.55, "learning_rate": 4.082862453989241e-05, "loss": 1.1914, "step": 149000 }, { "epoch": 0.55, "learning_rate": 4.0797848112173924e-05, "loss": 1.193, "step": 149500 }, { "epoch": 0.55, "learning_rate": 4.076707168445544e-05, "loss": 1.1915, "step": 150000 }, { "epoch": 0.56, "learning_rate": 4.0736295256736965e-05, "loss": 1.1945, "step": 150500 }, { "epoch": 0.56, "learning_rate": 4.070551882901848e-05, "loss": 1.1986, "step": 151000 }, { "epoch": 0.56, "learning_rate": 4.06747424013e-05, "loss": 1.1928, "step": 151500 }, { "epoch": 0.56, "learning_rate": 4.0643965973581516e-05, "loss": 1.1946, "step": 152000 }, { "epoch": 0.56, "learning_rate": 4.061318954586303e-05, "loss": 1.1959, "step": 152500 }, { "epoch": 0.57, "learning_rate": 4.058241311814455e-05, "loss": 1.1911, "step": 153000 }, { "epoch": 0.57, "learning_rate": 4.055163669042607e-05, "loss": 1.1901, "step": 153500 }, { "epoch": 0.57, "learning_rate": 4.052086026270759e-05, "loss": 1.195, "step": 154000 }, { "epoch": 0.57, "learning_rate": 4.049008383498911e-05, "loss": 1.1938, "step": 154500 }, { "epoch": 0.57, "learning_rate": 4.0459307407270625e-05, "loss": 1.1994, "step": 155000 }, { "epoch": 0.57, "learning_rate": 4.042853097955214e-05, "loss": 1.1911, "step": 155500 }, { "epoch": 0.58, "learning_rate": 4.039775455183366e-05, "loss": 1.1942, "step": 156000 }, { "epoch": 0.58, "learning_rate": 4.036697812411518e-05, "loss": 1.1889, "step": 156500 }, { "epoch": 0.58, "learning_rate": 4.0336201696396694e-05, "loss": 1.185, "step": 157000 }, { "epoch": 0.58, "learning_rate": 4.030542526867822e-05, "loss": 1.1938, "step": 157500 }, { "epoch": 0.58, "learning_rate": 4.0274648840959735e-05, "loss": 1.1941, "step": 158000 }, { "epoch": 0.59, "learning_rate": 4.024387241324125e-05, "loss": 1.1771, "step": 158500 }, { "epoch": 0.59, "learning_rate": 4.021309598552277e-05, "loss": 1.1895, "step": 159000 }, { "epoch": 0.59, "learning_rate": 4.0182319557804286e-05, "loss": 1.1915, "step": 159500 }, { "epoch": 0.59, "learning_rate": 4.01515431300858e-05, "loss": 1.1935, "step": 160000 }, { "epoch": 0.59, "learning_rate": 4.012076670236733e-05, "loss": 1.194, "step": 160500 }, { "epoch": 0.59, "learning_rate": 4.0089990274648844e-05, "loss": 1.189, "step": 161000 }, { "epoch": 0.6, "learning_rate": 4.005921384693036e-05, "loss": 1.1821, "step": 161500 }, { "epoch": 0.6, "learning_rate": 4.002843741921188e-05, "loss": 1.1875, "step": 162000 }, { "epoch": 0.6, "learning_rate": 3.9997660991493395e-05, "loss": 1.1758, "step": 162500 }, { "epoch": 0.6, "learning_rate": 3.996688456377491e-05, "loss": 1.1868, "step": 163000 }, { "epoch": 0.6, "learning_rate": 3.993610813605643e-05, "loss": 1.1873, "step": 163500 }, { "epoch": 0.61, "learning_rate": 3.9905331708337953e-05, "loss": 1.1875, "step": 164000 }, { "epoch": 0.61, "learning_rate": 3.987455528061947e-05, "loss": 1.1864, "step": 164500 }, { "epoch": 0.61, "learning_rate": 3.984377885290099e-05, "loss": 1.1872, "step": 165000 }, { "epoch": 0.61, "learning_rate": 3.9813002425182505e-05, "loss": 1.1796, "step": 165500 }, { "epoch": 0.61, "learning_rate": 3.978222599746402e-05, "loss": 1.1841, "step": 166000 }, { "epoch": 0.61, "learning_rate": 3.975144956974554e-05, "loss": 1.1899, "step": 166500 }, { "epoch": 0.62, "learning_rate": 3.972067314202706e-05, "loss": 1.1836, "step": 167000 }, { "epoch": 0.62, "learning_rate": 3.968989671430858e-05, "loss": 1.1829, "step": 167500 }, { "epoch": 0.62, "learning_rate": 3.96591202865901e-05, "loss": 1.1899, "step": 168000 }, { "epoch": 0.62, "learning_rate": 3.9628343858871614e-05, "loss": 1.1746, "step": 168500 }, { "epoch": 0.62, "learning_rate": 3.959756743115313e-05, "loss": 1.1882, "step": 169000 }, { "epoch": 0.63, "learning_rate": 3.956679100343465e-05, "loss": 1.1935, "step": 169500 }, { "epoch": 0.63, "learning_rate": 3.9536014575716165e-05, "loss": 1.1812, "step": 170000 }, { "epoch": 0.63, "learning_rate": 3.950523814799769e-05, "loss": 1.187, "step": 170500 }, { "epoch": 0.63, "learning_rate": 3.9474461720279206e-05, "loss": 1.1849, "step": 171000 }, { "epoch": 0.63, "learning_rate": 3.944368529256072e-05, "loss": 1.1839, "step": 171500 }, { "epoch": 0.64, "learning_rate": 3.941290886484224e-05, "loss": 1.1804, "step": 172000 }, { "epoch": 0.64, "learning_rate": 3.938213243712376e-05, "loss": 1.1821, "step": 172500 }, { "epoch": 0.64, "learning_rate": 3.9351356009405275e-05, "loss": 1.1805, "step": 173000 }, { "epoch": 0.64, "learning_rate": 3.932057958168679e-05, "loss": 1.1808, "step": 173500 }, { "epoch": 0.64, "learning_rate": 3.9289803153968316e-05, "loss": 1.1842, "step": 174000 }, { "epoch": 0.64, "learning_rate": 3.925902672624983e-05, "loss": 1.1811, "step": 174500 }, { "epoch": 0.65, "learning_rate": 3.922825029853135e-05, "loss": 1.1841, "step": 175000 }, { "epoch": 0.65, "learning_rate": 3.919747387081287e-05, "loss": 1.1749, "step": 175500 }, { "epoch": 0.65, "learning_rate": 3.9166697443094384e-05, "loss": 1.1738, "step": 176000 }, { "epoch": 0.65, "learning_rate": 3.91359210153759e-05, "loss": 1.1821, "step": 176500 }, { "epoch": 0.65, "learning_rate": 3.9105144587657425e-05, "loss": 1.1717, "step": 177000 }, { "epoch": 0.66, "learning_rate": 3.907436815993894e-05, "loss": 1.183, "step": 177500 }, { "epoch": 0.66, "learning_rate": 3.904359173222046e-05, "loss": 1.1745, "step": 178000 }, { "epoch": 0.66, "learning_rate": 3.9012815304501976e-05, "loss": 1.1769, "step": 178500 }, { "epoch": 0.66, "learning_rate": 3.898203887678349e-05, "loss": 1.181, "step": 179000 }, { "epoch": 0.66, "learning_rate": 3.895126244906501e-05, "loss": 1.1838, "step": 179500 }, { "epoch": 0.66, "learning_rate": 3.892048602134653e-05, "loss": 1.1808, "step": 180000 }, { "epoch": 0.67, "learning_rate": 3.888970959362805e-05, "loss": 1.1802, "step": 180500 }, { "epoch": 0.67, "learning_rate": 3.885893316590957e-05, "loss": 1.1713, "step": 181000 }, { "epoch": 0.67, "learning_rate": 3.8828156738191085e-05, "loss": 1.1832, "step": 181500 }, { "epoch": 0.67, "learning_rate": 3.87973803104726e-05, "loss": 1.1727, "step": 182000 }, { "epoch": 0.67, "learning_rate": 3.876660388275412e-05, "loss": 1.1835, "step": 182500 }, { "epoch": 0.68, "learning_rate": 3.873582745503564e-05, "loss": 1.1777, "step": 183000 }, { "epoch": 0.68, "learning_rate": 3.870505102731716e-05, "loss": 1.1784, "step": 183500 }, { "epoch": 0.68, "learning_rate": 3.867427459959868e-05, "loss": 1.173, "step": 184000 }, { "epoch": 0.68, "learning_rate": 3.8643498171880195e-05, "loss": 1.1789, "step": 184500 }, { "epoch": 0.68, "learning_rate": 3.861272174416171e-05, "loss": 1.1719, "step": 185000 }, { "epoch": 0.69, "learning_rate": 3.858194531644323e-05, "loss": 1.1741, "step": 185500 }, { "epoch": 0.69, "learning_rate": 3.8551168888724746e-05, "loss": 1.1817, "step": 186000 }, { "epoch": 0.69, "learning_rate": 3.852039246100626e-05, "loss": 1.1754, "step": 186500 }, { "epoch": 0.69, "learning_rate": 3.848961603328779e-05, "loss": 1.1789, "step": 187000 }, { "epoch": 0.69, "learning_rate": 3.8458839605569304e-05, "loss": 1.1693, "step": 187500 }, { "epoch": 0.69, "learning_rate": 3.842806317785082e-05, "loss": 1.1775, "step": 188000 }, { "epoch": 0.7, "learning_rate": 3.839728675013234e-05, "loss": 1.1698, "step": 188500 }, { "epoch": 0.7, "learning_rate": 3.8366510322413855e-05, "loss": 1.1773, "step": 189000 }, { "epoch": 0.7, "learning_rate": 3.833573389469537e-05, "loss": 1.1791, "step": 189500 }, { "epoch": 0.7, "learning_rate": 3.830495746697689e-05, "loss": 1.1708, "step": 190000 }, { "epoch": 0.7, "learning_rate": 3.8274181039258413e-05, "loss": 1.1702, "step": 190500 }, { "epoch": 0.71, "learning_rate": 3.824340461153993e-05, "loss": 1.1818, "step": 191000 }, { "epoch": 0.71, "learning_rate": 3.821262818382145e-05, "loss": 1.1785, "step": 191500 }, { "epoch": 0.71, "learning_rate": 3.8181851756102965e-05, "loss": 1.1732, "step": 192000 }, { "epoch": 0.71, "learning_rate": 3.815107532838448e-05, "loss": 1.1719, "step": 192500 }, { "epoch": 0.71, "learning_rate": 3.8120298900666e-05, "loss": 1.176, "step": 193000 }, { "epoch": 0.71, "learning_rate": 3.808952247294752e-05, "loss": 1.1743, "step": 193500 }, { "epoch": 0.72, "learning_rate": 3.805874604522904e-05, "loss": 1.1712, "step": 194000 }, { "epoch": 0.72, "learning_rate": 3.802796961751056e-05, "loss": 1.1693, "step": 194500 }, { "epoch": 0.72, "learning_rate": 3.7997193189792074e-05, "loss": 1.1695, "step": 195000 }, { "epoch": 0.72, "learning_rate": 3.796641676207359e-05, "loss": 1.1749, "step": 195500 }, { "epoch": 0.72, "learning_rate": 3.793564033435511e-05, "loss": 1.1658, "step": 196000 }, { "epoch": 0.73, "learning_rate": 3.7904863906636625e-05, "loss": 1.1661, "step": 196500 }, { "epoch": 0.73, "learning_rate": 3.787408747891815e-05, "loss": 1.179, "step": 197000 }, { "epoch": 0.73, "learning_rate": 3.7843311051199666e-05, "loss": 1.1703, "step": 197500 }, { "epoch": 0.73, "learning_rate": 3.781253462348118e-05, "loss": 1.1705, "step": 198000 }, { "epoch": 0.73, "learning_rate": 3.77817581957627e-05, "loss": 1.1607, "step": 198500 }, { "epoch": 0.73, "learning_rate": 3.775098176804422e-05, "loss": 1.1698, "step": 199000 }, { "epoch": 0.74, "learning_rate": 3.7720205340325735e-05, "loss": 1.1671, "step": 199500 }, { "epoch": 0.74, "learning_rate": 3.768942891260726e-05, "loss": 1.1753, "step": 200000 }, { "epoch": 0.74, "learning_rate": 3.7658652484888776e-05, "loss": 1.1637, "step": 200500 }, { "epoch": 0.74, "learning_rate": 3.762787605717029e-05, "loss": 1.169, "step": 201000 }, { "epoch": 0.74, "learning_rate": 3.759709962945181e-05, "loss": 1.1707, "step": 201500 }, { "epoch": 0.75, "learning_rate": 3.756632320173333e-05, "loss": 1.1625, "step": 202000 }, { "epoch": 0.75, "learning_rate": 3.7535546774014844e-05, "loss": 1.1568, "step": 202500 }, { "epoch": 0.75, "learning_rate": 3.750477034629636e-05, "loss": 1.1629, "step": 203000 }, { "epoch": 0.75, "learning_rate": 3.7473993918577885e-05, "loss": 1.1645, "step": 203500 }, { "epoch": 0.75, "learning_rate": 3.74432174908594e-05, "loss": 1.1689, "step": 204000 }, { "epoch": 0.76, "learning_rate": 3.741244106314092e-05, "loss": 1.1638, "step": 204500 }, { "epoch": 0.76, "learning_rate": 3.7381664635422436e-05, "loss": 1.1676, "step": 205000 }, { "epoch": 0.76, "learning_rate": 3.735088820770395e-05, "loss": 1.1614, "step": 205500 }, { "epoch": 0.76, "learning_rate": 3.732011177998547e-05, "loss": 1.1637, "step": 206000 }, { "epoch": 0.76, "learning_rate": 3.7289335352266994e-05, "loss": 1.161, "step": 206500 }, { "epoch": 0.76, "learning_rate": 3.725855892454851e-05, "loss": 1.1651, "step": 207000 }, { "epoch": 0.77, "learning_rate": 3.722778249683003e-05, "loss": 1.1691, "step": 207500 }, { "epoch": 0.77, "learning_rate": 3.7197006069111545e-05, "loss": 1.1666, "step": 208000 }, { "epoch": 0.77, "learning_rate": 3.716622964139306e-05, "loss": 1.1594, "step": 208500 }, { "epoch": 0.77, "learning_rate": 3.713545321367458e-05, "loss": 1.1636, "step": 209000 }, { "epoch": 0.77, "learning_rate": 3.71046767859561e-05, "loss": 1.1646, "step": 209500 }, { "epoch": 0.78, "learning_rate": 3.707390035823762e-05, "loss": 1.1693, "step": 210000 }, { "epoch": 0.78, "learning_rate": 3.7043123930519144e-05, "loss": 1.1628, "step": 210500 }, { "epoch": 0.78, "learning_rate": 3.701234750280066e-05, "loss": 1.1708, "step": 211000 }, { "epoch": 0.78, "learning_rate": 3.698157107508218e-05, "loss": 1.1617, "step": 211500 }, { "epoch": 0.78, "learning_rate": 3.6950794647363696e-05, "loss": 1.1649, "step": 212000 }, { "epoch": 0.78, "learning_rate": 3.692001821964521e-05, "loss": 1.1599, "step": 212500 }, { "epoch": 0.79, "learning_rate": 3.688924179192673e-05, "loss": 1.1562, "step": 213000 }, { "epoch": 0.79, "learning_rate": 3.685846536420825e-05, "loss": 1.1553, "step": 213500 }, { "epoch": 0.79, "learning_rate": 3.682768893648977e-05, "loss": 1.1671, "step": 214000 }, { "epoch": 0.79, "learning_rate": 3.679691250877129e-05, "loss": 1.1593, "step": 214500 }, { "epoch": 0.79, "learning_rate": 3.6766136081052805e-05, "loss": 1.1561, "step": 215000 }, { "epoch": 0.8, "learning_rate": 3.673535965333432e-05, "loss": 1.1652, "step": 215500 }, { "epoch": 0.8, "learning_rate": 3.670458322561584e-05, "loss": 1.1615, "step": 216000 }, { "epoch": 0.8, "learning_rate": 3.6673806797897356e-05, "loss": 1.1635, "step": 216500 }, { "epoch": 0.8, "learning_rate": 3.664303037017888e-05, "loss": 1.1586, "step": 217000 }, { "epoch": 0.8, "learning_rate": 3.66122539424604e-05, "loss": 1.1531, "step": 217500 }, { "epoch": 0.81, "learning_rate": 3.6581477514741914e-05, "loss": 1.1629, "step": 218000 }, { "epoch": 0.81, "learning_rate": 3.655070108702343e-05, "loss": 1.1649, "step": 218500 }, { "epoch": 0.81, "learning_rate": 3.651992465930495e-05, "loss": 1.1541, "step": 219000 }, { "epoch": 0.81, "learning_rate": 3.6489148231586466e-05, "loss": 1.1532, "step": 219500 }, { "epoch": 0.81, "learning_rate": 3.645837180386798e-05, "loss": 1.1637, "step": 220000 }, { "epoch": 0.81, "learning_rate": 3.6427595376149507e-05, "loss": 1.1622, "step": 220500 }, { "epoch": 0.82, "learning_rate": 3.6396818948431024e-05, "loss": 1.1599, "step": 221000 }, { "epoch": 0.82, "learning_rate": 3.636604252071254e-05, "loss": 1.1606, "step": 221500 }, { "epoch": 0.82, "learning_rate": 3.633526609299406e-05, "loss": 1.16, "step": 222000 }, { "epoch": 0.82, "learning_rate": 3.6304489665275575e-05, "loss": 1.1663, "step": 222500 }, { "epoch": 0.82, "learning_rate": 3.627371323755709e-05, "loss": 1.1549, "step": 223000 }, { "epoch": 0.83, "learning_rate": 3.624293680983861e-05, "loss": 1.1636, "step": 223500 }, { "epoch": 0.83, "learning_rate": 3.621216038212013e-05, "loss": 1.1565, "step": 224000 }, { "epoch": 0.83, "learning_rate": 3.618138395440165e-05, "loss": 1.1622, "step": 224500 }, { "epoch": 0.83, "learning_rate": 3.615060752668317e-05, "loss": 1.1639, "step": 225000 }, { "epoch": 0.83, "learning_rate": 3.6119831098964684e-05, "loss": 1.1588, "step": 225500 }, { "epoch": 0.83, "learning_rate": 3.60890546712462e-05, "loss": 1.155, "step": 226000 }, { "epoch": 0.84, "learning_rate": 3.605827824352772e-05, "loss": 1.152, "step": 226500 }, { "epoch": 0.84, "learning_rate": 3.602750181580924e-05, "loss": 1.1504, "step": 227000 }, { "epoch": 0.84, "learning_rate": 3.599672538809076e-05, "loss": 1.1579, "step": 227500 }, { "epoch": 0.84, "learning_rate": 3.5965948960372276e-05, "loss": 1.1603, "step": 228000 }, { "epoch": 0.84, "learning_rate": 3.5935172532653794e-05, "loss": 1.1619, "step": 228500 }, { "epoch": 0.85, "learning_rate": 3.590439610493531e-05, "loss": 1.1486, "step": 229000 }, { "epoch": 0.85, "learning_rate": 3.587361967721683e-05, "loss": 1.1568, "step": 229500 }, { "epoch": 0.85, "learning_rate": 3.5842843249498345e-05, "loss": 1.1534, "step": 230000 }, { "epoch": 0.85, "learning_rate": 3.581206682177987e-05, "loss": 1.1532, "step": 230500 }, { "epoch": 0.85, "learning_rate": 3.5781290394061386e-05, "loss": 1.1587, "step": 231000 }, { "epoch": 0.85, "learning_rate": 3.57505139663429e-05, "loss": 1.1572, "step": 231500 }, { "epoch": 0.86, "learning_rate": 3.571973753862442e-05, "loss": 1.1539, "step": 232000 }, { "epoch": 0.86, "learning_rate": 3.568896111090594e-05, "loss": 1.1548, "step": 232500 }, { "epoch": 0.86, "learning_rate": 3.5658184683187454e-05, "loss": 1.1496, "step": 233000 }, { "epoch": 0.86, "learning_rate": 3.562740825546898e-05, "loss": 1.1589, "step": 233500 }, { "epoch": 0.86, "learning_rate": 3.5596631827750495e-05, "loss": 1.1562, "step": 234000 }, { "epoch": 0.87, "learning_rate": 3.556585540003201e-05, "loss": 1.1447, "step": 234500 }, { "epoch": 0.87, "learning_rate": 3.553507897231353e-05, "loss": 1.1534, "step": 235000 }, { "epoch": 0.87, "learning_rate": 3.5504302544595046e-05, "loss": 1.1518, "step": 235500 }, { "epoch": 0.87, "learning_rate": 3.5473526116876563e-05, "loss": 1.1538, "step": 236000 }, { "epoch": 0.87, "learning_rate": 3.544274968915808e-05, "loss": 1.1462, "step": 236500 }, { "epoch": 0.88, "learning_rate": 3.5411973261439604e-05, "loss": 1.1496, "step": 237000 }, { "epoch": 0.88, "learning_rate": 3.538119683372112e-05, "loss": 1.1523, "step": 237500 }, { "epoch": 0.88, "learning_rate": 3.535042040600264e-05, "loss": 1.1456, "step": 238000 }, { "epoch": 0.88, "learning_rate": 3.5319643978284156e-05, "loss": 1.1486, "step": 238500 }, { "epoch": 0.88, "learning_rate": 3.528886755056567e-05, "loss": 1.1433, "step": 239000 }, { "epoch": 0.88, "learning_rate": 3.525809112284719e-05, "loss": 1.1533, "step": 239500 }, { "epoch": 0.89, "learning_rate": 3.522731469512871e-05, "loss": 1.1488, "step": 240000 }, { "epoch": 0.89, "learning_rate": 3.519653826741023e-05, "loss": 1.1501, "step": 240500 }, { "epoch": 0.89, "learning_rate": 3.516576183969175e-05, "loss": 1.1464, "step": 241000 }, { "epoch": 0.89, "learning_rate": 3.5134985411973265e-05, "loss": 1.1515, "step": 241500 }, { "epoch": 0.89, "learning_rate": 3.510420898425478e-05, "loss": 1.1446, "step": 242000 }, { "epoch": 0.9, "learning_rate": 3.50734325565363e-05, "loss": 1.1515, "step": 242500 }, { "epoch": 0.9, "learning_rate": 3.5042656128817816e-05, "loss": 1.1478, "step": 243000 }, { "epoch": 0.9, "learning_rate": 3.501187970109934e-05, "loss": 1.1478, "step": 243500 }, { "epoch": 0.9, "learning_rate": 3.498110327338086e-05, "loss": 1.1541, "step": 244000 }, { "epoch": 0.9, "learning_rate": 3.4950326845662374e-05, "loss": 1.1524, "step": 244500 }, { "epoch": 0.9, "learning_rate": 3.491955041794389e-05, "loss": 1.1595, "step": 245000 }, { "epoch": 0.91, "learning_rate": 3.488877399022541e-05, "loss": 1.1546, "step": 245500 }, { "epoch": 0.91, "learning_rate": 3.4857997562506926e-05, "loss": 1.1554, "step": 246000 }, { "epoch": 0.91, "learning_rate": 3.482722113478844e-05, "loss": 1.1479, "step": 246500 }, { "epoch": 0.91, "learning_rate": 3.4796444707069967e-05, "loss": 1.1498, "step": 247000 }, { "epoch": 0.91, "learning_rate": 3.4765668279351484e-05, "loss": 1.1502, "step": 247500 }, { "epoch": 0.92, "learning_rate": 3.4734891851633e-05, "loss": 1.1477, "step": 248000 }, { "epoch": 0.92, "learning_rate": 3.470411542391452e-05, "loss": 1.1501, "step": 248500 }, { "epoch": 0.92, "learning_rate": 3.4673338996196035e-05, "loss": 1.1536, "step": 249000 }, { "epoch": 0.92, "learning_rate": 3.464256256847755e-05, "loss": 1.149, "step": 249500 }, { "epoch": 0.92, "learning_rate": 3.4611786140759076e-05, "loss": 1.1513, "step": 250000 }, { "epoch": 0.93, "learning_rate": 3.458100971304059e-05, "loss": 1.1459, "step": 250500 }, { "epoch": 0.93, "learning_rate": 3.455023328532211e-05, "loss": 1.1386, "step": 251000 }, { "epoch": 0.93, "learning_rate": 3.451945685760363e-05, "loss": 1.1449, "step": 251500 }, { "epoch": 0.93, "learning_rate": 3.4488680429885144e-05, "loss": 1.1577, "step": 252000 }, { "epoch": 0.93, "learning_rate": 3.445790400216666e-05, "loss": 1.1557, "step": 252500 }, { "epoch": 0.93, "learning_rate": 3.442712757444818e-05, "loss": 1.1495, "step": 253000 }, { "epoch": 0.94, "learning_rate": 3.43963511467297e-05, "loss": 1.1445, "step": 253500 }, { "epoch": 0.94, "learning_rate": 3.436557471901122e-05, "loss": 1.1511, "step": 254000 }, { "epoch": 0.94, "learning_rate": 3.4334798291292736e-05, "loss": 1.1509, "step": 254500 }, { "epoch": 0.94, "learning_rate": 3.4304021863574254e-05, "loss": 1.1537, "step": 255000 }, { "epoch": 0.94, "learning_rate": 3.427324543585577e-05, "loss": 1.1451, "step": 255500 }, { "epoch": 0.95, "learning_rate": 3.424246900813729e-05, "loss": 1.1454, "step": 256000 }, { "epoch": 0.95, "learning_rate": 3.421169258041881e-05, "loss": 1.15, "step": 256500 }, { "epoch": 0.95, "learning_rate": 3.418091615270033e-05, "loss": 1.1492, "step": 257000 }, { "epoch": 0.95, "learning_rate": 3.4150139724981846e-05, "loss": 1.1415, "step": 257500 }, { "epoch": 0.95, "learning_rate": 3.411936329726336e-05, "loss": 1.1405, "step": 258000 }, { "epoch": 0.95, "learning_rate": 3.408858686954488e-05, "loss": 1.1472, "step": 258500 }, { "epoch": 0.96, "learning_rate": 3.40578104418264e-05, "loss": 1.1429, "step": 259000 }, { "epoch": 0.96, "learning_rate": 3.4027034014107914e-05, "loss": 1.1368, "step": 259500 }, { "epoch": 0.96, "learning_rate": 3.399625758638944e-05, "loss": 1.1467, "step": 260000 }, { "epoch": 0.96, "learning_rate": 3.3965481158670955e-05, "loss": 1.1562, "step": 260500 }, { "epoch": 0.96, "learning_rate": 3.393470473095247e-05, "loss": 1.1372, "step": 261000 }, { "epoch": 0.97, "learning_rate": 3.390392830323399e-05, "loss": 1.1459, "step": 261500 }, { "epoch": 0.97, "learning_rate": 3.3873151875515506e-05, "loss": 1.1473, "step": 262000 }, { "epoch": 0.97, "learning_rate": 3.3842375447797023e-05, "loss": 1.1462, "step": 262500 }, { "epoch": 0.97, "learning_rate": 3.381159902007854e-05, "loss": 1.1363, "step": 263000 }, { "epoch": 0.97, "learning_rate": 3.3780822592360064e-05, "loss": 1.1392, "step": 263500 }, { "epoch": 0.97, "learning_rate": 3.375004616464158e-05, "loss": 1.1459, "step": 264000 }, { "epoch": 0.98, "learning_rate": 3.37192697369231e-05, "loss": 1.144, "step": 264500 }, { "epoch": 0.98, "learning_rate": 3.3688493309204616e-05, "loss": 1.1426, "step": 265000 }, { "epoch": 0.98, "learning_rate": 3.365771688148613e-05, "loss": 1.1369, "step": 265500 }, { "epoch": 0.98, "learning_rate": 3.362694045376765e-05, "loss": 1.1415, "step": 266000 }, { "epoch": 0.98, "learning_rate": 3.3596164026049174e-05, "loss": 1.1548, "step": 266500 }, { "epoch": 0.99, "learning_rate": 3.356538759833069e-05, "loss": 1.1399, "step": 267000 }, { "epoch": 0.99, "learning_rate": 3.353461117061221e-05, "loss": 1.1362, "step": 267500 }, { "epoch": 0.99, "learning_rate": 3.3503834742893725e-05, "loss": 1.1469, "step": 268000 }, { "epoch": 0.99, "learning_rate": 3.347305831517524e-05, "loss": 1.139, "step": 268500 }, { "epoch": 0.99, "learning_rate": 3.344228188745676e-05, "loss": 1.1356, "step": 269000 }, { "epoch": 1.0, "learning_rate": 3.3411505459738276e-05, "loss": 1.1382, "step": 269500 }, { "epoch": 1.0, "learning_rate": 3.33807290320198e-05, "loss": 1.1351, "step": 270000 }, { "epoch": 1.0, "learning_rate": 3.334995260430132e-05, "loss": 1.1331, "step": 270500 }, { "epoch": 1.0, "learning_rate": 3.3319176176582834e-05, "loss": 1.1403, "step": 271000 }, { "epoch": 1.0, "learning_rate": 3.328839974886435e-05, "loss": 1.1388, "step": 271500 }, { "epoch": 1.0, "learning_rate": 3.325762332114587e-05, "loss": 1.1388, "step": 272000 }, { "epoch": 1.01, "learning_rate": 3.3226846893427386e-05, "loss": 1.1347, "step": 272500 }, { "epoch": 1.01, "learning_rate": 3.319607046570891e-05, "loss": 1.1375, "step": 273000 }, { "epoch": 1.01, "learning_rate": 3.3165294037990427e-05, "loss": 1.1324, "step": 273500 }, { "epoch": 1.01, "learning_rate": 3.3134517610271944e-05, "loss": 1.1322, "step": 274000 }, { "epoch": 1.01, "learning_rate": 3.310374118255346e-05, "loss": 1.1442, "step": 274500 }, { "epoch": 1.02, "learning_rate": 3.307296475483498e-05, "loss": 1.1326, "step": 275000 }, { "epoch": 1.02, "learning_rate": 3.3042188327116495e-05, "loss": 1.1346, "step": 275500 }, { "epoch": 1.02, "learning_rate": 3.301141189939801e-05, "loss": 1.1319, "step": 276000 }, { "epoch": 1.02, "learning_rate": 3.2980635471679536e-05, "loss": 1.132, "step": 276500 }, { "epoch": 1.02, "learning_rate": 3.294985904396105e-05, "loss": 1.1366, "step": 277000 }, { "epoch": 1.02, "learning_rate": 3.291908261624257e-05, "loss": 1.136, "step": 277500 }, { "epoch": 1.03, "learning_rate": 3.288830618852409e-05, "loss": 1.1282, "step": 278000 }, { "epoch": 1.03, "learning_rate": 3.2857529760805604e-05, "loss": 1.1345, "step": 278500 }, { "epoch": 1.03, "learning_rate": 3.282675333308712e-05, "loss": 1.1328, "step": 279000 }, { "epoch": 1.03, "learning_rate": 3.279597690536864e-05, "loss": 1.1454, "step": 279500 }, { "epoch": 1.03, "learning_rate": 3.276520047765016e-05, "loss": 1.1363, "step": 280000 }, { "epoch": 1.04, "learning_rate": 3.273442404993168e-05, "loss": 1.1317, "step": 280500 }, { "epoch": 1.04, "learning_rate": 3.2703647622213196e-05, "loss": 1.1365, "step": 281000 }, { "epoch": 1.04, "learning_rate": 3.2672871194494713e-05, "loss": 1.1274, "step": 281500 }, { "epoch": 1.04, "learning_rate": 3.264209476677623e-05, "loss": 1.1183, "step": 282000 }, { "epoch": 1.04, "learning_rate": 3.261131833905775e-05, "loss": 1.1323, "step": 282500 }, { "epoch": 1.05, "learning_rate": 3.258054191133927e-05, "loss": 1.1345, "step": 283000 }, { "epoch": 1.05, "learning_rate": 3.254976548362079e-05, "loss": 1.1327, "step": 283500 }, { "epoch": 1.05, "learning_rate": 3.2518989055902306e-05, "loss": 1.1311, "step": 284000 }, { "epoch": 1.05, "learning_rate": 3.248821262818382e-05, "loss": 1.1279, "step": 284500 }, { "epoch": 1.05, "learning_rate": 3.245743620046534e-05, "loss": 1.1349, "step": 285000 }, { "epoch": 1.05, "learning_rate": 3.242665977274686e-05, "loss": 1.1286, "step": 285500 }, { "epoch": 1.06, "learning_rate": 3.2395883345028374e-05, "loss": 1.1328, "step": 286000 }, { "epoch": 1.06, "learning_rate": 3.23651069173099e-05, "loss": 1.1296, "step": 286500 }, { "epoch": 1.06, "learning_rate": 3.2334330489591415e-05, "loss": 1.1322, "step": 287000 }, { "epoch": 1.06, "learning_rate": 3.230355406187293e-05, "loss": 1.1292, "step": 287500 }, { "epoch": 1.06, "learning_rate": 3.227277763415445e-05, "loss": 1.1322, "step": 288000 }, { "epoch": 1.07, "learning_rate": 3.2242001206435966e-05, "loss": 1.1352, "step": 288500 }, { "epoch": 1.07, "learning_rate": 3.2211224778717483e-05, "loss": 1.1268, "step": 289000 }, { "epoch": 1.07, "learning_rate": 3.218044835099901e-05, "loss": 1.1239, "step": 289500 }, { "epoch": 1.07, "learning_rate": 3.2149671923280524e-05, "loss": 1.129, "step": 290000 }, { "epoch": 1.07, "learning_rate": 3.211889549556204e-05, "loss": 1.1321, "step": 290500 }, { "epoch": 1.07, "learning_rate": 3.208811906784356e-05, "loss": 1.1374, "step": 291000 }, { "epoch": 1.08, "learning_rate": 3.2057342640125076e-05, "loss": 1.1297, "step": 291500 }, { "epoch": 1.08, "learning_rate": 3.202656621240659e-05, "loss": 1.1311, "step": 292000 }, { "epoch": 1.08, "learning_rate": 3.199578978468811e-05, "loss": 1.1225, "step": 292500 }, { "epoch": 1.08, "learning_rate": 3.1965013356969634e-05, "loss": 1.1225, "step": 293000 }, { "epoch": 1.08, "learning_rate": 3.193423692925115e-05, "loss": 1.1295, "step": 293500 }, { "epoch": 1.09, "learning_rate": 3.190346050153267e-05, "loss": 1.1262, "step": 294000 }, { "epoch": 1.09, "learning_rate": 3.1872684073814185e-05, "loss": 1.1258, "step": 294500 }, { "epoch": 1.09, "learning_rate": 3.18419076460957e-05, "loss": 1.1154, "step": 295000 }, { "epoch": 1.09, "learning_rate": 3.181113121837722e-05, "loss": 1.1327, "step": 295500 }, { "epoch": 1.09, "learning_rate": 3.1780354790658736e-05, "loss": 1.1235, "step": 296000 }, { "epoch": 1.1, "learning_rate": 3.174957836294026e-05, "loss": 1.1287, "step": 296500 }, { "epoch": 1.1, "learning_rate": 3.171880193522178e-05, "loss": 1.128, "step": 297000 }, { "epoch": 1.1, "learning_rate": 3.1688025507503294e-05, "loss": 1.1221, "step": 297500 }, { "epoch": 1.1, "learning_rate": 3.165724907978481e-05, "loss": 1.1259, "step": 298000 }, { "epoch": 1.1, "learning_rate": 3.162647265206633e-05, "loss": 1.1263, "step": 298500 }, { "epoch": 1.1, "learning_rate": 3.1595696224347846e-05, "loss": 1.1298, "step": 299000 }, { "epoch": 1.11, "learning_rate": 3.156491979662937e-05, "loss": 1.1263, "step": 299500 }, { "epoch": 1.11, "learning_rate": 3.1534143368910886e-05, "loss": 1.1299, "step": 300000 }, { "epoch": 1.11, "learning_rate": 3.1503366941192404e-05, "loss": 1.1265, "step": 300500 }, { "epoch": 1.11, "learning_rate": 3.147259051347392e-05, "loss": 1.1249, "step": 301000 }, { "epoch": 1.11, "learning_rate": 3.144181408575544e-05, "loss": 1.1206, "step": 301500 }, { "epoch": 1.12, "learning_rate": 3.1411037658036955e-05, "loss": 1.1186, "step": 302000 }, { "epoch": 1.12, "learning_rate": 3.138026123031847e-05, "loss": 1.1181, "step": 302500 }, { "epoch": 1.12, "learning_rate": 3.1349484802599996e-05, "loss": 1.1291, "step": 303000 }, { "epoch": 1.12, "learning_rate": 3.131870837488151e-05, "loss": 1.1262, "step": 303500 }, { "epoch": 1.12, "learning_rate": 3.128793194716303e-05, "loss": 1.1227, "step": 304000 }, { "epoch": 1.12, "learning_rate": 3.125715551944455e-05, "loss": 1.1233, "step": 304500 }, { "epoch": 1.13, "learning_rate": 3.1226379091726064e-05, "loss": 1.1156, "step": 305000 }, { "epoch": 1.13, "learning_rate": 3.119560266400758e-05, "loss": 1.1147, "step": 305500 }, { "epoch": 1.13, "learning_rate": 3.1164826236289105e-05, "loss": 1.1226, "step": 306000 }, { "epoch": 1.13, "learning_rate": 3.113404980857062e-05, "loss": 1.1175, "step": 306500 }, { "epoch": 1.13, "learning_rate": 3.110327338085214e-05, "loss": 1.124, "step": 307000 }, { "epoch": 1.14, "learning_rate": 3.1072496953133656e-05, "loss": 1.1166, "step": 307500 }, { "epoch": 1.14, "learning_rate": 3.1041720525415173e-05, "loss": 1.1222, "step": 308000 }, { "epoch": 1.14, "learning_rate": 3.101094409769669e-05, "loss": 1.1214, "step": 308500 }, { "epoch": 1.14, "learning_rate": 3.098016766997821e-05, "loss": 1.1231, "step": 309000 }, { "epoch": 1.14, "learning_rate": 3.094939124225973e-05, "loss": 1.1208, "step": 309500 }, { "epoch": 1.14, "learning_rate": 3.091861481454125e-05, "loss": 1.1167, "step": 310000 }, { "epoch": 1.15, "learning_rate": 3.0887838386822766e-05, "loss": 1.1238, "step": 310500 }, { "epoch": 1.15, "learning_rate": 3.085706195910428e-05, "loss": 1.1203, "step": 311000 }, { "epoch": 1.15, "learning_rate": 3.08262855313858e-05, "loss": 1.1276, "step": 311500 }, { "epoch": 1.15, "learning_rate": 3.079550910366732e-05, "loss": 1.1169, "step": 312000 }, { "epoch": 1.15, "learning_rate": 3.076473267594884e-05, "loss": 1.1197, "step": 312500 }, { "epoch": 1.16, "learning_rate": 3.073395624823036e-05, "loss": 1.1207, "step": 313000 }, { "epoch": 1.16, "learning_rate": 3.0703179820511875e-05, "loss": 1.1227, "step": 313500 }, { "epoch": 1.16, "learning_rate": 3.067240339279339e-05, "loss": 1.1221, "step": 314000 }, { "epoch": 1.16, "learning_rate": 3.064162696507491e-05, "loss": 1.1189, "step": 314500 }, { "epoch": 1.16, "learning_rate": 3.0610850537356426e-05, "loss": 1.1168, "step": 315000 }, { "epoch": 1.17, "learning_rate": 3.058007410963794e-05, "loss": 1.1153, "step": 315500 }, { "epoch": 1.17, "learning_rate": 3.054929768191947e-05, "loss": 1.1149, "step": 316000 }, { "epoch": 1.17, "learning_rate": 3.0518521254200984e-05, "loss": 1.1202, "step": 316500 }, { "epoch": 1.17, "learning_rate": 3.04877448264825e-05, "loss": 1.1233, "step": 317000 }, { "epoch": 1.17, "learning_rate": 3.045696839876402e-05, "loss": 1.1165, "step": 317500 }, { "epoch": 1.17, "learning_rate": 3.0426191971045536e-05, "loss": 1.1274, "step": 318000 }, { "epoch": 1.18, "learning_rate": 3.0395415543327056e-05, "loss": 1.1161, "step": 318500 }, { "epoch": 1.18, "learning_rate": 3.0364639115608573e-05, "loss": 1.1149, "step": 319000 }, { "epoch": 1.18, "learning_rate": 3.033386268789009e-05, "loss": 1.1194, "step": 319500 }, { "epoch": 1.18, "learning_rate": 3.030308626017161e-05, "loss": 1.1261, "step": 320000 }, { "epoch": 1.18, "learning_rate": 3.0272309832453128e-05, "loss": 1.1206, "step": 320500 }, { "epoch": 1.19, "learning_rate": 3.0241533404734645e-05, "loss": 1.1144, "step": 321000 }, { "epoch": 1.19, "learning_rate": 3.0210756977016162e-05, "loss": 1.1186, "step": 321500 }, { "epoch": 1.19, "learning_rate": 3.0179980549297682e-05, "loss": 1.129, "step": 322000 }, { "epoch": 1.19, "learning_rate": 3.01492041215792e-05, "loss": 1.1249, "step": 322500 }, { "epoch": 1.19, "learning_rate": 3.0118427693860717e-05, "loss": 1.121, "step": 323000 }, { "epoch": 1.19, "learning_rate": 3.0087651266142237e-05, "loss": 1.1155, "step": 323500 }, { "epoch": 1.2, "learning_rate": 3.0056874838423754e-05, "loss": 1.1212, "step": 324000 }, { "epoch": 1.2, "learning_rate": 3.002609841070527e-05, "loss": 1.1076, "step": 324500 }, { "epoch": 1.2, "learning_rate": 2.9995321982986792e-05, "loss": 1.1176, "step": 325000 }, { "epoch": 1.2, "learning_rate": 2.996454555526831e-05, "loss": 1.1167, "step": 325500 }, { "epoch": 1.2, "learning_rate": 2.9933769127549826e-05, "loss": 1.1186, "step": 326000 }, { "epoch": 1.21, "learning_rate": 2.9902992699831346e-05, "loss": 1.1161, "step": 326500 }, { "epoch": 1.21, "learning_rate": 2.9872216272112864e-05, "loss": 1.1168, "step": 327000 }, { "epoch": 1.21, "learning_rate": 2.984143984439438e-05, "loss": 1.1132, "step": 327500 }, { "epoch": 1.21, "learning_rate": 2.9810663416675898e-05, "loss": 1.1185, "step": 328000 }, { "epoch": 1.21, "learning_rate": 2.9779886988957418e-05, "loss": 1.1088, "step": 328500 }, { "epoch": 1.22, "learning_rate": 2.9749110561238935e-05, "loss": 1.1146, "step": 329000 }, { "epoch": 1.22, "learning_rate": 2.9718334133520452e-05, "loss": 1.1189, "step": 329500 }, { "epoch": 1.22, "learning_rate": 2.9687557705801973e-05, "loss": 1.1159, "step": 330000 }, { "epoch": 1.22, "learning_rate": 2.965678127808349e-05, "loss": 1.1174, "step": 330500 }, { "epoch": 1.22, "learning_rate": 2.9626004850365007e-05, "loss": 1.1194, "step": 331000 }, { "epoch": 1.22, "learning_rate": 2.9595228422646528e-05, "loss": 1.1091, "step": 331500 }, { "epoch": 1.23, "learning_rate": 2.9564451994928045e-05, "loss": 1.112, "step": 332000 }, { "epoch": 1.23, "learning_rate": 2.953367556720956e-05, "loss": 1.112, "step": 332500 }, { "epoch": 1.23, "learning_rate": 2.950289913949108e-05, "loss": 1.1104, "step": 333000 }, { "epoch": 1.23, "learning_rate": 2.94721227117726e-05, "loss": 1.1053, "step": 333500 }, { "epoch": 1.23, "learning_rate": 2.9441346284054116e-05, "loss": 1.1106, "step": 334000 }, { "epoch": 1.24, "learning_rate": 2.9410569856335633e-05, "loss": 1.1185, "step": 334500 }, { "epoch": 1.24, "learning_rate": 2.9379793428617154e-05, "loss": 1.1113, "step": 335000 }, { "epoch": 1.24, "learning_rate": 2.934901700089867e-05, "loss": 1.1096, "step": 335500 }, { "epoch": 1.24, "learning_rate": 2.9318240573180188e-05, "loss": 1.1085, "step": 336000 }, { "epoch": 1.24, "learning_rate": 2.928746414546171e-05, "loss": 1.1137, "step": 336500 }, { "epoch": 1.24, "learning_rate": 2.9256687717743226e-05, "loss": 1.1075, "step": 337000 }, { "epoch": 1.25, "learning_rate": 2.9225911290024743e-05, "loss": 1.1093, "step": 337500 }, { "epoch": 1.25, "learning_rate": 2.919513486230626e-05, "loss": 1.1159, "step": 338000 }, { "epoch": 1.25, "learning_rate": 2.916435843458778e-05, "loss": 1.1063, "step": 338500 }, { "epoch": 1.25, "learning_rate": 2.9133582006869297e-05, "loss": 1.1135, "step": 339000 }, { "epoch": 1.25, "learning_rate": 2.9102805579150815e-05, "loss": 1.1186, "step": 339500 }, { "epoch": 1.26, "learning_rate": 2.9072029151432335e-05, "loss": 1.1128, "step": 340000 }, { "epoch": 1.26, "learning_rate": 2.9041252723713852e-05, "loss": 1.1148, "step": 340500 }, { "epoch": 1.26, "learning_rate": 2.901047629599537e-05, "loss": 1.1056, "step": 341000 }, { "epoch": 1.26, "learning_rate": 2.897969986827689e-05, "loss": 1.1153, "step": 341500 }, { "epoch": 1.26, "learning_rate": 2.8948923440558407e-05, "loss": 1.1147, "step": 342000 }, { "epoch": 1.26, "learning_rate": 2.8918147012839924e-05, "loss": 1.1155, "step": 342500 }, { "epoch": 1.27, "learning_rate": 2.8887370585121444e-05, "loss": 1.1132, "step": 343000 }, { "epoch": 1.27, "learning_rate": 2.885659415740296e-05, "loss": 1.1072, "step": 343500 }, { "epoch": 1.27, "learning_rate": 2.882581772968448e-05, "loss": 1.1175, "step": 344000 }, { "epoch": 1.27, "learning_rate": 2.8795041301965996e-05, "loss": 1.1048, "step": 344500 }, { "epoch": 1.27, "learning_rate": 2.8764264874247516e-05, "loss": 1.1126, "step": 345000 }, { "epoch": 1.28, "learning_rate": 2.8733488446529033e-05, "loss": 1.1107, "step": 345500 }, { "epoch": 1.28, "learning_rate": 2.870271201881055e-05, "loss": 1.1108, "step": 346000 }, { "epoch": 1.28, "learning_rate": 2.867193559109207e-05, "loss": 1.1062, "step": 346500 }, { "epoch": 1.28, "learning_rate": 2.8641159163373588e-05, "loss": 1.1083, "step": 347000 }, { "epoch": 1.28, "learning_rate": 2.8610382735655105e-05, "loss": 1.0995, "step": 347500 }, { "epoch": 1.29, "learning_rate": 2.8579606307936625e-05, "loss": 1.1143, "step": 348000 }, { "epoch": 1.29, "learning_rate": 2.8548829880218142e-05, "loss": 1.1146, "step": 348500 }, { "epoch": 1.29, "learning_rate": 2.851805345249966e-05, "loss": 1.1144, "step": 349000 }, { "epoch": 1.29, "learning_rate": 2.8487277024781177e-05, "loss": 1.1108, "step": 349500 }, { "epoch": 1.29, "learning_rate": 2.8456500597062697e-05, "loss": 1.1105, "step": 350000 }, { "epoch": 1.29, "learning_rate": 2.842572416934422e-05, "loss": 1.1059, "step": 350500 }, { "epoch": 1.3, "learning_rate": 2.8394947741625738e-05, "loss": 1.1037, "step": 351000 }, { "epoch": 1.3, "learning_rate": 2.8364171313907255e-05, "loss": 1.1111, "step": 351500 }, { "epoch": 1.3, "learning_rate": 2.8333394886188776e-05, "loss": 1.1103, "step": 352000 }, { "epoch": 1.3, "learning_rate": 2.8302618458470293e-05, "loss": 1.1081, "step": 352500 }, { "epoch": 1.3, "learning_rate": 2.827184203075181e-05, "loss": 1.1095, "step": 353000 }, { "epoch": 1.31, "learning_rate": 2.824106560303333e-05, "loss": 1.1069, "step": 353500 }, { "epoch": 1.31, "learning_rate": 2.8210289175314847e-05, "loss": 1.1134, "step": 354000 }, { "epoch": 1.31, "learning_rate": 2.8179512747596364e-05, "loss": 1.108, "step": 354500 }, { "epoch": 1.31, "learning_rate": 2.814873631987788e-05, "loss": 1.1124, "step": 355000 }, { "epoch": 1.31, "learning_rate": 2.8117959892159402e-05, "loss": 1.1144, "step": 355500 }, { "epoch": 1.31, "learning_rate": 2.808718346444092e-05, "loss": 1.1154, "step": 356000 }, { "epoch": 1.32, "learning_rate": 2.8056407036722436e-05, "loss": 1.1116, "step": 356500 }, { "epoch": 1.32, "learning_rate": 2.8025630609003957e-05, "loss": 1.1119, "step": 357000 }, { "epoch": 1.32, "learning_rate": 2.7994854181285474e-05, "loss": 1.1138, "step": 357500 }, { "epoch": 1.32, "learning_rate": 2.796407775356699e-05, "loss": 1.1047, "step": 358000 }, { "epoch": 1.32, "learning_rate": 2.793330132584851e-05, "loss": 1.1068, "step": 358500 }, { "epoch": 1.33, "learning_rate": 2.790252489813003e-05, "loss": 1.1063, "step": 359000 }, { "epoch": 1.33, "learning_rate": 2.7871748470411546e-05, "loss": 1.0996, "step": 359500 }, { "epoch": 1.33, "learning_rate": 2.7840972042693063e-05, "loss": 1.1076, "step": 360000 }, { "epoch": 1.33, "learning_rate": 2.7810195614974583e-05, "loss": 1.0979, "step": 360500 }, { "epoch": 1.33, "learning_rate": 2.77794191872561e-05, "loss": 1.1051, "step": 361000 }, { "epoch": 1.34, "learning_rate": 2.7748642759537617e-05, "loss": 1.1009, "step": 361500 }, { "epoch": 1.34, "learning_rate": 2.7717866331819138e-05, "loss": 1.104, "step": 362000 }, { "epoch": 1.34, "learning_rate": 2.7687089904100655e-05, "loss": 1.108, "step": 362500 }, { "epoch": 1.34, "learning_rate": 2.7656313476382172e-05, "loss": 1.0968, "step": 363000 }, { "epoch": 1.34, "learning_rate": 2.7625537048663692e-05, "loss": 1.1071, "step": 363500 }, { "epoch": 1.34, "learning_rate": 2.759476062094521e-05, "loss": 1.0957, "step": 364000 }, { "epoch": 1.35, "learning_rate": 2.7563984193226727e-05, "loss": 1.1063, "step": 364500 }, { "epoch": 1.35, "learning_rate": 2.7533207765508247e-05, "loss": 1.1008, "step": 365000 }, { "epoch": 1.35, "learning_rate": 2.7502431337789764e-05, "loss": 1.1065, "step": 365500 }, { "epoch": 1.35, "learning_rate": 2.747165491007128e-05, "loss": 1.1019, "step": 366000 }, { "epoch": 1.35, "learning_rate": 2.74408784823528e-05, "loss": 1.1028, "step": 366500 }, { "epoch": 1.36, "learning_rate": 2.741010205463432e-05, "loss": 1.0995, "step": 367000 }, { "epoch": 1.36, "learning_rate": 2.7379325626915836e-05, "loss": 1.1066, "step": 367500 }, { "epoch": 1.36, "learning_rate": 2.7348549199197353e-05, "loss": 1.0997, "step": 368000 }, { "epoch": 1.36, "learning_rate": 2.7317772771478873e-05, "loss": 1.1093, "step": 368500 }, { "epoch": 1.36, "learning_rate": 2.728699634376039e-05, "loss": 1.1062, "step": 369000 }, { "epoch": 1.36, "learning_rate": 2.7256219916041908e-05, "loss": 1.103, "step": 369500 }, { "epoch": 1.37, "learning_rate": 2.7225443488323428e-05, "loss": 1.115, "step": 370000 }, { "epoch": 1.37, "learning_rate": 2.7194667060604945e-05, "loss": 1.0995, "step": 370500 }, { "epoch": 1.37, "learning_rate": 2.7163890632886462e-05, "loss": 1.0975, "step": 371000 }, { "epoch": 1.37, "learning_rate": 2.713311420516798e-05, "loss": 1.0929, "step": 371500 }, { "epoch": 1.37, "learning_rate": 2.71023377774495e-05, "loss": 1.1008, "step": 372000 }, { "epoch": 1.38, "learning_rate": 2.7071561349731017e-05, "loss": 1.0988, "step": 372500 }, { "epoch": 1.38, "learning_rate": 2.7040784922012534e-05, "loss": 1.0953, "step": 373000 }, { "epoch": 1.38, "learning_rate": 2.7010008494294055e-05, "loss": 1.1023, "step": 373500 }, { "epoch": 1.38, "learning_rate": 2.697923206657557e-05, "loss": 1.1068, "step": 374000 }, { "epoch": 1.38, "learning_rate": 2.694845563885709e-05, "loss": 1.1052, "step": 374500 }, { "epoch": 1.38, "learning_rate": 2.691767921113861e-05, "loss": 1.0968, "step": 375000 }, { "epoch": 1.39, "learning_rate": 2.6886902783420126e-05, "loss": 1.1074, "step": 375500 }, { "epoch": 1.39, "learning_rate": 2.6856126355701643e-05, "loss": 1.0953, "step": 376000 }, { "epoch": 1.39, "learning_rate": 2.682534992798316e-05, "loss": 1.1027, "step": 376500 }, { "epoch": 1.39, "learning_rate": 2.679457350026468e-05, "loss": 1.0974, "step": 377000 }, { "epoch": 1.39, "learning_rate": 2.6763797072546198e-05, "loss": 1.0923, "step": 377500 }, { "epoch": 1.4, "learning_rate": 2.6733020644827715e-05, "loss": 1.0968, "step": 378000 }, { "epoch": 1.4, "learning_rate": 2.6702244217109236e-05, "loss": 1.1017, "step": 378500 }, { "epoch": 1.4, "learning_rate": 2.6671467789390753e-05, "loss": 1.1037, "step": 379000 }, { "epoch": 1.4, "learning_rate": 2.664069136167227e-05, "loss": 1.1003, "step": 379500 }, { "epoch": 1.4, "learning_rate": 2.660991493395379e-05, "loss": 1.102, "step": 380000 }, { "epoch": 1.41, "learning_rate": 2.6579138506235307e-05, "loss": 1.0969, "step": 380500 }, { "epoch": 1.41, "learning_rate": 2.6548362078516824e-05, "loss": 1.1022, "step": 381000 }, { "epoch": 1.41, "learning_rate": 2.6517585650798345e-05, "loss": 1.0969, "step": 381500 }, { "epoch": 1.41, "learning_rate": 2.6486809223079862e-05, "loss": 1.0876, "step": 382000 }, { "epoch": 1.41, "learning_rate": 2.645603279536138e-05, "loss": 1.1026, "step": 382500 }, { "epoch": 1.41, "learning_rate": 2.6425256367642896e-05, "loss": 1.0973, "step": 383000 }, { "epoch": 1.42, "learning_rate": 2.6394479939924417e-05, "loss": 1.1026, "step": 383500 }, { "epoch": 1.42, "learning_rate": 2.6363703512205934e-05, "loss": 1.0965, "step": 384000 }, { "epoch": 1.42, "learning_rate": 2.633292708448745e-05, "loss": 1.0984, "step": 384500 }, { "epoch": 1.42, "learning_rate": 2.630215065676897e-05, "loss": 1.101, "step": 385000 }, { "epoch": 1.42, "learning_rate": 2.627137422905049e-05, "loss": 1.0917, "step": 385500 }, { "epoch": 1.43, "learning_rate": 2.6240597801332005e-05, "loss": 1.0949, "step": 386000 }, { "epoch": 1.43, "learning_rate": 2.6209821373613526e-05, "loss": 1.101, "step": 386500 }, { "epoch": 1.43, "learning_rate": 2.6179044945895043e-05, "loss": 1.0962, "step": 387000 }, { "epoch": 1.43, "learning_rate": 2.614826851817656e-05, "loss": 1.1, "step": 387500 }, { "epoch": 1.43, "learning_rate": 2.6117492090458077e-05, "loss": 1.0923, "step": 388000 }, { "epoch": 1.43, "learning_rate": 2.6086715662739598e-05, "loss": 1.0999, "step": 388500 }, { "epoch": 1.44, "learning_rate": 2.6055939235021115e-05, "loss": 1.0905, "step": 389000 }, { "epoch": 1.44, "learning_rate": 2.6025162807302632e-05, "loss": 1.1041, "step": 389500 }, { "epoch": 1.44, "learning_rate": 2.5994386379584152e-05, "loss": 1.1014, "step": 390000 }, { "epoch": 1.44, "learning_rate": 2.596360995186567e-05, "loss": 1.0937, "step": 390500 }, { "epoch": 1.44, "learning_rate": 2.5932833524147187e-05, "loss": 1.1003, "step": 391000 }, { "epoch": 1.45, "learning_rate": 2.5902057096428707e-05, "loss": 1.1029, "step": 391500 }, { "epoch": 1.45, "learning_rate": 2.5871280668710224e-05, "loss": 1.0963, "step": 392000 }, { "epoch": 1.45, "learning_rate": 2.584050424099174e-05, "loss": 1.0957, "step": 392500 }, { "epoch": 1.45, "learning_rate": 2.5809727813273262e-05, "loss": 1.1007, "step": 393000 }, { "epoch": 1.45, "learning_rate": 2.577895138555478e-05, "loss": 1.1006, "step": 393500 }, { "epoch": 1.46, "learning_rate": 2.5748174957836296e-05, "loss": 1.0938, "step": 394000 }, { "epoch": 1.46, "learning_rate": 2.5717398530117813e-05, "loss": 1.0928, "step": 394500 }, { "epoch": 1.46, "learning_rate": 2.5686622102399333e-05, "loss": 1.0849, "step": 395000 }, { "epoch": 1.46, "learning_rate": 2.565584567468085e-05, "loss": 1.0985, "step": 395500 }, { "epoch": 1.46, "learning_rate": 2.5625069246962368e-05, "loss": 1.0899, "step": 396000 }, { "epoch": 1.46, "learning_rate": 2.5594292819243888e-05, "loss": 1.0957, "step": 396500 }, { "epoch": 1.47, "learning_rate": 2.5563516391525405e-05, "loss": 1.0867, "step": 397000 }, { "epoch": 1.47, "learning_rate": 2.5532739963806922e-05, "loss": 1.1027, "step": 397500 }, { "epoch": 1.47, "learning_rate": 2.5501963536088443e-05, "loss": 1.0899, "step": 398000 }, { "epoch": 1.47, "learning_rate": 2.547118710836996e-05, "loss": 1.0836, "step": 398500 }, { "epoch": 1.47, "learning_rate": 2.5440410680651477e-05, "loss": 1.0939, "step": 399000 }, { "epoch": 1.48, "learning_rate": 2.5409634252932994e-05, "loss": 1.0925, "step": 399500 }, { "epoch": 1.48, "learning_rate": 2.5378857825214515e-05, "loss": 1.1019, "step": 400000 }, { "epoch": 1.48, "learning_rate": 2.534808139749603e-05, "loss": 1.1007, "step": 400500 }, { "epoch": 1.48, "learning_rate": 2.531730496977755e-05, "loss": 1.0905, "step": 401000 }, { "epoch": 1.48, "learning_rate": 2.528652854205907e-05, "loss": 1.0936, "step": 401500 }, { "epoch": 1.48, "learning_rate": 2.5255752114340586e-05, "loss": 1.0953, "step": 402000 }, { "epoch": 1.49, "learning_rate": 2.5224975686622103e-05, "loss": 1.0967, "step": 402500 }, { "epoch": 1.49, "learning_rate": 2.5194199258903624e-05, "loss": 1.0934, "step": 403000 }, { "epoch": 1.49, "learning_rate": 2.516342283118514e-05, "loss": 1.1003, "step": 403500 }, { "epoch": 1.49, "learning_rate": 2.5132646403466658e-05, "loss": 1.0893, "step": 404000 }, { "epoch": 1.49, "learning_rate": 2.5101869975748175e-05, "loss": 1.0878, "step": 404500 }, { "epoch": 1.5, "learning_rate": 2.5071093548029696e-05, "loss": 1.0921, "step": 405000 }, { "epoch": 1.5, "learning_rate": 2.5040317120311213e-05, "loss": 1.0976, "step": 405500 }, { "epoch": 1.5, "learning_rate": 2.500954069259273e-05, "loss": 1.0978, "step": 406000 }, { "epoch": 1.5, "learning_rate": 2.497876426487425e-05, "loss": 1.0948, "step": 406500 }, { "epoch": 1.5, "learning_rate": 2.4947987837155767e-05, "loss": 1.0894, "step": 407000 }, { "epoch": 1.5, "learning_rate": 2.4917211409437284e-05, "loss": 1.0933, "step": 407500 }, { "epoch": 1.51, "learning_rate": 2.4886434981718805e-05, "loss": 1.0835, "step": 408000 }, { "epoch": 1.51, "learning_rate": 2.4855658554000322e-05, "loss": 1.0971, "step": 408500 }, { "epoch": 1.51, "learning_rate": 2.482488212628184e-05, "loss": 1.0921, "step": 409000 }, { "epoch": 1.51, "learning_rate": 2.479410569856336e-05, "loss": 1.085, "step": 409500 }, { "epoch": 1.51, "learning_rate": 2.4763329270844877e-05, "loss": 1.0838, "step": 410000 }, { "epoch": 1.52, "learning_rate": 2.4732552843126394e-05, "loss": 1.0863, "step": 410500 }, { "epoch": 1.52, "learning_rate": 2.470177641540791e-05, "loss": 1.0882, "step": 411000 }, { "epoch": 1.52, "learning_rate": 2.467099998768943e-05, "loss": 1.0895, "step": 411500 }, { "epoch": 1.52, "learning_rate": 2.464022355997095e-05, "loss": 1.0949, "step": 412000 }, { "epoch": 1.52, "learning_rate": 2.4609447132252465e-05, "loss": 1.0864, "step": 412500 }, { "epoch": 1.53, "learning_rate": 2.4578670704533986e-05, "loss": 1.0848, "step": 413000 }, { "epoch": 1.53, "learning_rate": 2.4547894276815503e-05, "loss": 1.0895, "step": 413500 }, { "epoch": 1.53, "learning_rate": 2.451711784909702e-05, "loss": 1.0822, "step": 414000 }, { "epoch": 1.53, "learning_rate": 2.448634142137854e-05, "loss": 1.0859, "step": 414500 }, { "epoch": 1.53, "learning_rate": 2.4455564993660058e-05, "loss": 1.0809, "step": 415000 }, { "epoch": 1.53, "learning_rate": 2.4424788565941575e-05, "loss": 1.0877, "step": 415500 }, { "epoch": 1.54, "learning_rate": 2.4394012138223092e-05, "loss": 1.0822, "step": 416000 }, { "epoch": 1.54, "learning_rate": 2.4363235710504612e-05, "loss": 1.0981, "step": 416500 }, { "epoch": 1.54, "learning_rate": 2.433245928278613e-05, "loss": 1.0834, "step": 417000 }, { "epoch": 1.54, "learning_rate": 2.4301682855067647e-05, "loss": 1.0893, "step": 417500 }, { "epoch": 1.54, "learning_rate": 2.4270906427349167e-05, "loss": 1.0872, "step": 418000 }, { "epoch": 1.55, "learning_rate": 2.4240129999630684e-05, "loss": 1.0805, "step": 418500 }, { "epoch": 1.55, "learning_rate": 2.42093535719122e-05, "loss": 1.0848, "step": 419000 }, { "epoch": 1.55, "learning_rate": 2.417857714419372e-05, "loss": 1.0869, "step": 419500 }, { "epoch": 1.55, "learning_rate": 2.414780071647524e-05, "loss": 1.0846, "step": 420000 }, { "epoch": 1.55, "learning_rate": 2.4117024288756756e-05, "loss": 1.0872, "step": 420500 }, { "epoch": 1.55, "learning_rate": 2.4086247861038276e-05, "loss": 1.0835, "step": 421000 }, { "epoch": 1.56, "learning_rate": 2.4055471433319793e-05, "loss": 1.0825, "step": 421500 }, { "epoch": 1.56, "learning_rate": 2.402469500560131e-05, "loss": 1.0898, "step": 422000 }, { "epoch": 1.56, "learning_rate": 2.3993918577882828e-05, "loss": 1.0779, "step": 422500 }, { "epoch": 1.56, "learning_rate": 2.3963142150164348e-05, "loss": 1.0823, "step": 423000 }, { "epoch": 1.56, "learning_rate": 2.3932365722445865e-05, "loss": 1.084, "step": 423500 }, { "epoch": 1.57, "learning_rate": 2.3901589294727382e-05, "loss": 1.0851, "step": 424000 }, { "epoch": 1.57, "learning_rate": 2.3870812867008903e-05, "loss": 1.086, "step": 424500 }, { "epoch": 1.57, "learning_rate": 2.384003643929042e-05, "loss": 1.0877, "step": 425000 }, { "epoch": 1.57, "learning_rate": 2.3809260011571937e-05, "loss": 1.0923, "step": 425500 }, { "epoch": 1.57, "learning_rate": 2.3778483583853457e-05, "loss": 1.0843, "step": 426000 }, { "epoch": 1.58, "learning_rate": 2.3747707156134974e-05, "loss": 1.0748, "step": 426500 }, { "epoch": 1.58, "learning_rate": 2.371693072841649e-05, "loss": 1.0833, "step": 427000 }, { "epoch": 1.58, "learning_rate": 2.368615430069801e-05, "loss": 1.0839, "step": 427500 }, { "epoch": 1.58, "learning_rate": 2.365537787297953e-05, "loss": 1.0942, "step": 428000 }, { "epoch": 1.58, "learning_rate": 2.3624601445261046e-05, "loss": 1.0913, "step": 428500 }, { "epoch": 1.58, "learning_rate": 2.3593825017542563e-05, "loss": 1.093, "step": 429000 }, { "epoch": 1.59, "learning_rate": 2.3563048589824084e-05, "loss": 1.0757, "step": 429500 }, { "epoch": 1.59, "learning_rate": 2.35322721621056e-05, "loss": 1.0821, "step": 430000 }, { "epoch": 1.59, "learning_rate": 2.3501495734387118e-05, "loss": 1.0827, "step": 430500 }, { "epoch": 1.59, "learning_rate": 2.347071930666864e-05, "loss": 1.0845, "step": 431000 }, { "epoch": 1.59, "learning_rate": 2.3439942878950156e-05, "loss": 1.0876, "step": 431500 }, { "epoch": 1.6, "learning_rate": 2.3409166451231673e-05, "loss": 1.0776, "step": 432000 }, { "epoch": 1.6, "learning_rate": 2.337839002351319e-05, "loss": 1.0837, "step": 432500 }, { "epoch": 1.6, "learning_rate": 2.334761359579471e-05, "loss": 1.075, "step": 433000 }, { "epoch": 1.6, "learning_rate": 2.3316837168076227e-05, "loss": 1.0931, "step": 433500 }, { "epoch": 1.6, "learning_rate": 2.3286060740357744e-05, "loss": 1.0905, "step": 434000 }, { "epoch": 1.6, "learning_rate": 2.3255284312639265e-05, "loss": 1.0864, "step": 434500 }, { "epoch": 1.61, "learning_rate": 2.3224507884920782e-05, "loss": 1.0845, "step": 435000 }, { "epoch": 1.61, "learning_rate": 2.31937314572023e-05, "loss": 1.0933, "step": 435500 }, { "epoch": 1.61, "learning_rate": 2.316295502948382e-05, "loss": 1.076, "step": 436000 }, { "epoch": 1.61, "learning_rate": 2.3132178601765337e-05, "loss": 1.0816, "step": 436500 }, { "epoch": 1.61, "learning_rate": 2.3101402174046854e-05, "loss": 1.0791, "step": 437000 }, { "epoch": 1.62, "learning_rate": 2.3070625746328374e-05, "loss": 1.0818, "step": 437500 }, { "epoch": 1.62, "learning_rate": 2.303984931860989e-05, "loss": 1.0844, "step": 438000 }, { "epoch": 1.62, "learning_rate": 2.300907289089141e-05, "loss": 1.0735, "step": 438500 }, { "epoch": 1.62, "learning_rate": 2.2978296463172925e-05, "loss": 1.0766, "step": 439000 }, { "epoch": 1.62, "learning_rate": 2.2947520035454446e-05, "loss": 1.0827, "step": 439500 }, { "epoch": 1.62, "learning_rate": 2.2916743607735963e-05, "loss": 1.0855, "step": 440000 }, { "epoch": 1.63, "learning_rate": 2.288596718001748e-05, "loss": 1.074, "step": 440500 }, { "epoch": 1.63, "learning_rate": 2.2855190752299e-05, "loss": 1.0849, "step": 441000 }, { "epoch": 1.63, "learning_rate": 2.2824414324580518e-05, "loss": 1.0771, "step": 441500 }, { "epoch": 1.63, "learning_rate": 2.2793637896862035e-05, "loss": 1.0858, "step": 442000 }, { "epoch": 1.63, "learning_rate": 2.2762861469143555e-05, "loss": 1.0815, "step": 442500 }, { "epoch": 1.64, "learning_rate": 2.2732085041425072e-05, "loss": 1.0783, "step": 443000 }, { "epoch": 1.64, "learning_rate": 2.270130861370659e-05, "loss": 1.0707, "step": 443500 }, { "epoch": 1.64, "learning_rate": 2.2670532185988107e-05, "loss": 1.0763, "step": 444000 }, { "epoch": 1.64, "learning_rate": 2.2639755758269627e-05, "loss": 1.0802, "step": 444500 }, { "epoch": 1.64, "learning_rate": 2.2608979330551144e-05, "loss": 1.0757, "step": 445000 }, { "epoch": 1.65, "learning_rate": 2.257820290283266e-05, "loss": 1.0804, "step": 445500 }, { "epoch": 1.65, "learning_rate": 2.254742647511418e-05, "loss": 1.0786, "step": 446000 }, { "epoch": 1.65, "learning_rate": 2.25166500473957e-05, "loss": 1.0785, "step": 446500 }, { "epoch": 1.65, "learning_rate": 2.2485873619677216e-05, "loss": 1.0696, "step": 447000 }, { "epoch": 1.65, "learning_rate": 2.2455097191958736e-05, "loss": 1.072, "step": 447500 }, { "epoch": 1.65, "learning_rate": 2.2424320764240253e-05, "loss": 1.0787, "step": 448000 }, { "epoch": 1.66, "learning_rate": 2.239354433652177e-05, "loss": 1.0763, "step": 448500 }, { "epoch": 1.66, "learning_rate": 2.236276790880329e-05, "loss": 1.0783, "step": 449000 }, { "epoch": 1.66, "learning_rate": 2.2331991481084808e-05, "loss": 1.0721, "step": 449500 }, { "epoch": 1.66, "learning_rate": 2.2301215053366325e-05, "loss": 1.0833, "step": 450000 }, { "epoch": 1.66, "learning_rate": 2.2270438625647842e-05, "loss": 1.0705, "step": 450500 }, { "epoch": 1.67, "learning_rate": 2.2239662197929363e-05, "loss": 1.0791, "step": 451000 }, { "epoch": 1.67, "learning_rate": 2.220888577021088e-05, "loss": 1.0793, "step": 451500 }, { "epoch": 1.67, "learning_rate": 2.2178109342492397e-05, "loss": 1.0814, "step": 452000 }, { "epoch": 1.67, "learning_rate": 2.2147332914773917e-05, "loss": 1.0752, "step": 452500 }, { "epoch": 1.67, "learning_rate": 2.2116556487055434e-05, "loss": 1.0876, "step": 453000 }, { "epoch": 1.67, "learning_rate": 2.208578005933695e-05, "loss": 1.072, "step": 453500 }, { "epoch": 1.68, "learning_rate": 2.2055003631618472e-05, "loss": 1.0739, "step": 454000 }, { "epoch": 1.68, "learning_rate": 2.202422720389999e-05, "loss": 1.0814, "step": 454500 }, { "epoch": 1.68, "learning_rate": 2.1993450776181506e-05, "loss": 1.0705, "step": 455000 }, { "epoch": 1.68, "learning_rate": 2.1962674348463027e-05, "loss": 1.0753, "step": 455500 }, { "epoch": 1.68, "learning_rate": 2.1931897920744547e-05, "loss": 1.0834, "step": 456000 }, { "epoch": 1.69, "learning_rate": 2.1901121493026064e-05, "loss": 1.0849, "step": 456500 }, { "epoch": 1.69, "learning_rate": 2.187034506530758e-05, "loss": 1.0623, "step": 457000 }, { "epoch": 1.69, "learning_rate": 2.18395686375891e-05, "loss": 1.0664, "step": 457500 }, { "epoch": 1.69, "learning_rate": 2.180879220987062e-05, "loss": 1.0791, "step": 458000 }, { "epoch": 1.69, "learning_rate": 2.1778015782152136e-05, "loss": 1.0707, "step": 458500 }, { "epoch": 1.7, "learning_rate": 2.1747239354433653e-05, "loss": 1.0701, "step": 459000 }, { "epoch": 1.7, "learning_rate": 2.1716462926715174e-05, "loss": 1.0753, "step": 459500 }, { "epoch": 1.7, "learning_rate": 2.168568649899669e-05, "loss": 1.0739, "step": 460000 }, { "epoch": 1.7, "learning_rate": 2.1654910071278208e-05, "loss": 1.0732, "step": 460500 }, { "epoch": 1.7, "learning_rate": 2.1624133643559728e-05, "loss": 1.068, "step": 461000 }, { "epoch": 1.7, "learning_rate": 2.1593357215841245e-05, "loss": 1.0816, "step": 461500 }, { "epoch": 1.71, "learning_rate": 2.1562580788122762e-05, "loss": 1.0682, "step": 462000 }, { "epoch": 1.71, "learning_rate": 2.1531804360404283e-05, "loss": 1.0639, "step": 462500 }, { "epoch": 1.71, "learning_rate": 2.15010279326858e-05, "loss": 1.0723, "step": 463000 }, { "epoch": 1.71, "learning_rate": 2.1470251504967317e-05, "loss": 1.074, "step": 463500 }, { "epoch": 1.71, "learning_rate": 2.1439475077248834e-05, "loss": 1.0734, "step": 464000 }, { "epoch": 1.72, "learning_rate": 2.1408698649530355e-05, "loss": 1.0698, "step": 464500 }, { "epoch": 1.72, "learning_rate": 2.1377922221811872e-05, "loss": 1.0669, "step": 465000 }, { "epoch": 1.72, "learning_rate": 2.134714579409339e-05, "loss": 1.0744, "step": 465500 }, { "epoch": 1.72, "learning_rate": 2.131636936637491e-05, "loss": 1.0741, "step": 466000 }, { "epoch": 1.72, "learning_rate": 2.1285592938656426e-05, "loss": 1.068, "step": 466500 }, { "epoch": 1.72, "learning_rate": 2.1254816510937943e-05, "loss": 1.0707, "step": 467000 }, { "epoch": 1.73, "learning_rate": 2.1224040083219464e-05, "loss": 1.0806, "step": 467500 }, { "epoch": 1.73, "learning_rate": 2.119326365550098e-05, "loss": 1.0701, "step": 468000 }, { "epoch": 1.73, "learning_rate": 2.1162487227782498e-05, "loss": 1.08, "step": 468500 }, { "epoch": 1.73, "learning_rate": 2.1131710800064015e-05, "loss": 1.0767, "step": 469000 }, { "epoch": 1.73, "learning_rate": 2.1100934372345536e-05, "loss": 1.0618, "step": 469500 }, { "epoch": 1.74, "learning_rate": 2.1070157944627053e-05, "loss": 1.0704, "step": 470000 }, { "epoch": 1.74, "learning_rate": 2.103938151690857e-05, "loss": 1.0647, "step": 470500 }, { "epoch": 1.74, "learning_rate": 2.100860508919009e-05, "loss": 1.0657, "step": 471000 }, { "epoch": 1.74, "learning_rate": 2.0977828661471607e-05, "loss": 1.064, "step": 471500 }, { "epoch": 1.74, "learning_rate": 2.0947052233753125e-05, "loss": 1.0709, "step": 472000 }, { "epoch": 1.75, "learning_rate": 2.0916275806034645e-05, "loss": 1.0733, "step": 472500 }, { "epoch": 1.75, "learning_rate": 2.0885499378316162e-05, "loss": 1.0783, "step": 473000 }, { "epoch": 1.75, "learning_rate": 2.085472295059768e-05, "loss": 1.0638, "step": 473500 }, { "epoch": 1.75, "learning_rate": 2.08239465228792e-05, "loss": 1.0615, "step": 474000 }, { "epoch": 1.75, "learning_rate": 2.0793170095160717e-05, "loss": 1.0698, "step": 474500 }, { "epoch": 1.75, "learning_rate": 2.0762393667442234e-05, "loss": 1.0687, "step": 475000 }, { "epoch": 1.76, "learning_rate": 2.073161723972375e-05, "loss": 1.0697, "step": 475500 }, { "epoch": 1.76, "learning_rate": 2.070084081200527e-05, "loss": 1.0668, "step": 476000 }, { "epoch": 1.76, "learning_rate": 2.067006438428679e-05, "loss": 1.0686, "step": 476500 }, { "epoch": 1.76, "learning_rate": 2.0639287956568306e-05, "loss": 1.0624, "step": 477000 }, { "epoch": 1.76, "learning_rate": 2.0608511528849826e-05, "loss": 1.0668, "step": 477500 }, { "epoch": 1.77, "learning_rate": 2.0577735101131343e-05, "loss": 1.0646, "step": 478000 }, { "epoch": 1.77, "learning_rate": 2.054695867341286e-05, "loss": 1.0695, "step": 478500 }, { "epoch": 1.77, "learning_rate": 2.051618224569438e-05, "loss": 1.0713, "step": 479000 }, { "epoch": 1.77, "learning_rate": 2.0485405817975898e-05, "loss": 1.0712, "step": 479500 }, { "epoch": 1.77, "learning_rate": 2.0454629390257415e-05, "loss": 1.065, "step": 480000 }, { "epoch": 1.77, "learning_rate": 2.0423852962538932e-05, "loss": 1.0627, "step": 480500 }, { "epoch": 1.78, "learning_rate": 2.0393076534820452e-05, "loss": 1.0649, "step": 481000 }, { "epoch": 1.78, "learning_rate": 2.036230010710197e-05, "loss": 1.0738, "step": 481500 }, { "epoch": 1.78, "learning_rate": 2.0331523679383487e-05, "loss": 1.0716, "step": 482000 }, { "epoch": 1.78, "learning_rate": 2.0300747251665007e-05, "loss": 1.0641, "step": 482500 }, { "epoch": 1.78, "learning_rate": 2.0269970823946524e-05, "loss": 1.0614, "step": 483000 }, { "epoch": 1.79, "learning_rate": 2.023919439622804e-05, "loss": 1.0647, "step": 483500 }, { "epoch": 1.79, "learning_rate": 2.0208417968509562e-05, "loss": 1.0593, "step": 484000 }, { "epoch": 1.79, "learning_rate": 2.017764154079108e-05, "loss": 1.0743, "step": 484500 }, { "epoch": 1.79, "learning_rate": 2.0146865113072596e-05, "loss": 1.0641, "step": 485000 }, { "epoch": 1.79, "learning_rate": 2.0116088685354113e-05, "loss": 1.0732, "step": 485500 }, { "epoch": 1.79, "learning_rate": 2.0085312257635634e-05, "loss": 1.0735, "step": 486000 }, { "epoch": 1.8, "learning_rate": 2.005453582991715e-05, "loss": 1.0573, "step": 486500 }, { "epoch": 1.8, "learning_rate": 2.0023759402198668e-05, "loss": 1.0641, "step": 487000 }, { "epoch": 1.8, "learning_rate": 1.9992982974480188e-05, "loss": 1.0585, "step": 487500 }, { "epoch": 1.8, "learning_rate": 1.9962206546761705e-05, "loss": 1.068, "step": 488000 }, { "epoch": 1.8, "learning_rate": 1.9931430119043222e-05, "loss": 1.0679, "step": 488500 }, { "epoch": 1.81, "learning_rate": 1.9900653691324743e-05, "loss": 1.0714, "step": 489000 }, { "epoch": 1.81, "learning_rate": 1.986987726360626e-05, "loss": 1.0592, "step": 489500 }, { "epoch": 1.81, "learning_rate": 1.9839100835887777e-05, "loss": 1.0718, "step": 490000 }, { "epoch": 1.81, "learning_rate": 1.9808324408169297e-05, "loss": 1.0697, "step": 490500 }, { "epoch": 1.81, "learning_rate": 1.9777547980450815e-05, "loss": 1.0602, "step": 491000 }, { "epoch": 1.82, "learning_rate": 1.974677155273233e-05, "loss": 1.0675, "step": 491500 }, { "epoch": 1.82, "learning_rate": 1.971599512501385e-05, "loss": 1.0659, "step": 492000 }, { "epoch": 1.82, "learning_rate": 1.968521869729537e-05, "loss": 1.0687, "step": 492500 }, { "epoch": 1.82, "learning_rate": 1.9654442269576886e-05, "loss": 1.0665, "step": 493000 }, { "epoch": 1.82, "learning_rate": 1.9623665841858403e-05, "loss": 1.0683, "step": 493500 }, { "epoch": 1.82, "learning_rate": 1.9592889414139924e-05, "loss": 1.0626, "step": 494000 }, { "epoch": 1.83, "learning_rate": 1.956211298642144e-05, "loss": 1.0596, "step": 494500 }, { "epoch": 1.83, "learning_rate": 1.9531336558702958e-05, "loss": 1.065, "step": 495000 }, { "epoch": 1.83, "learning_rate": 1.950056013098448e-05, "loss": 1.0709, "step": 495500 }, { "epoch": 1.83, "learning_rate": 1.9469783703265996e-05, "loss": 1.0693, "step": 496000 }, { "epoch": 1.83, "learning_rate": 1.9439007275547513e-05, "loss": 1.0558, "step": 496500 }, { "epoch": 1.84, "learning_rate": 1.940823084782903e-05, "loss": 1.0681, "step": 497000 }, { "epoch": 1.84, "learning_rate": 1.937745442011055e-05, "loss": 1.0686, "step": 497500 }, { "epoch": 1.84, "learning_rate": 1.9346677992392067e-05, "loss": 1.0641, "step": 498000 }, { "epoch": 1.84, "learning_rate": 1.9315901564673584e-05, "loss": 1.0601, "step": 498500 }, { "epoch": 1.84, "learning_rate": 1.9285125136955105e-05, "loss": 1.0638, "step": 499000 }, { "epoch": 1.84, "learning_rate": 1.9254348709236622e-05, "loss": 1.0572, "step": 499500 }, { "epoch": 1.85, "learning_rate": 1.922357228151814e-05, "loss": 1.063, "step": 500000 }, { "epoch": 1.85, "learning_rate": 1.919279585379966e-05, "loss": 1.0585, "step": 500500 }, { "epoch": 1.85, "learning_rate": 1.9162019426081177e-05, "loss": 1.0664, "step": 501000 }, { "epoch": 1.85, "learning_rate": 1.9131242998362694e-05, "loss": 1.0591, "step": 501500 }, { "epoch": 1.85, "learning_rate": 1.9100466570644214e-05, "loss": 1.0509, "step": 502000 }, { "epoch": 1.86, "learning_rate": 1.906969014292573e-05, "loss": 1.0532, "step": 502500 }, { "epoch": 1.86, "learning_rate": 1.903891371520725e-05, "loss": 1.0721, "step": 503000 }, { "epoch": 1.86, "learning_rate": 1.9008137287488766e-05, "loss": 1.0554, "step": 503500 }, { "epoch": 1.86, "learning_rate": 1.8977360859770286e-05, "loss": 1.0689, "step": 504000 }, { "epoch": 1.86, "learning_rate": 1.8946584432051803e-05, "loss": 1.0692, "step": 504500 }, { "epoch": 1.87, "learning_rate": 1.891580800433332e-05, "loss": 1.0658, "step": 505000 }, { "epoch": 1.87, "learning_rate": 1.888503157661484e-05, "loss": 1.0602, "step": 505500 }, { "epoch": 1.87, "learning_rate": 1.8854255148896358e-05, "loss": 1.0624, "step": 506000 }, { "epoch": 1.87, "learning_rate": 1.8823478721177875e-05, "loss": 1.0545, "step": 506500 }, { "epoch": 1.87, "learning_rate": 1.8792702293459395e-05, "loss": 1.0551, "step": 507000 }, { "epoch": 1.87, "learning_rate": 1.8761925865740912e-05, "loss": 1.0572, "step": 507500 }, { "epoch": 1.88, "learning_rate": 1.873114943802243e-05, "loss": 1.0695, "step": 508000 }, { "epoch": 1.88, "learning_rate": 1.8700373010303947e-05, "loss": 1.0517, "step": 508500 }, { "epoch": 1.88, "learning_rate": 1.8669596582585467e-05, "loss": 1.0665, "step": 509000 }, { "epoch": 1.88, "learning_rate": 1.8638820154866984e-05, "loss": 1.0576, "step": 509500 }, { "epoch": 1.88, "learning_rate": 1.86080437271485e-05, "loss": 1.0605, "step": 510000 }, { "epoch": 1.89, "learning_rate": 1.8577267299430022e-05, "loss": 1.0606, "step": 510500 }, { "epoch": 1.89, "learning_rate": 1.854649087171154e-05, "loss": 1.0629, "step": 511000 }, { "epoch": 1.89, "learning_rate": 1.8515714443993056e-05, "loss": 1.06, "step": 511500 }, { "epoch": 1.89, "learning_rate": 1.8484938016274576e-05, "loss": 1.0572, "step": 512000 }, { "epoch": 1.89, "learning_rate": 1.8454161588556093e-05, "loss": 1.059, "step": 512500 }, { "epoch": 1.89, "learning_rate": 1.842338516083761e-05, "loss": 1.0534, "step": 513000 }, { "epoch": 1.9, "learning_rate": 1.8392608733119128e-05, "loss": 1.0593, "step": 513500 }, { "epoch": 1.9, "learning_rate": 1.8361832305400648e-05, "loss": 1.065, "step": 514000 }, { "epoch": 1.9, "learning_rate": 1.8331055877682165e-05, "loss": 1.0593, "step": 514500 }, { "epoch": 1.9, "learning_rate": 1.8300279449963682e-05, "loss": 1.0554, "step": 515000 }, { "epoch": 1.9, "learning_rate": 1.8269503022245203e-05, "loss": 1.0605, "step": 515500 }, { "epoch": 1.91, "learning_rate": 1.823872659452672e-05, "loss": 1.0586, "step": 516000 }, { "epoch": 1.91, "learning_rate": 1.8207950166808237e-05, "loss": 1.0597, "step": 516500 }, { "epoch": 1.91, "learning_rate": 1.8177173739089757e-05, "loss": 1.0561, "step": 517000 }, { "epoch": 1.91, "learning_rate": 1.8146397311371275e-05, "loss": 1.0484, "step": 517500 }, { "epoch": 1.91, "learning_rate": 1.811562088365279e-05, "loss": 1.0564, "step": 518000 }, { "epoch": 1.91, "learning_rate": 1.8084844455934312e-05, "loss": 1.0528, "step": 518500 }, { "epoch": 1.92, "learning_rate": 1.805406802821583e-05, "loss": 1.0546, "step": 519000 }, { "epoch": 1.92, "learning_rate": 1.8023291600497346e-05, "loss": 1.0573, "step": 519500 }, { "epoch": 1.92, "learning_rate": 1.7992515172778863e-05, "loss": 1.0481, "step": 520000 }, { "epoch": 1.92, "learning_rate": 1.7961738745060384e-05, "loss": 1.0636, "step": 520500 }, { "epoch": 1.92, "learning_rate": 1.79309623173419e-05, "loss": 1.0585, "step": 521000 }, { "epoch": 1.93, "learning_rate": 1.7900185889623418e-05, "loss": 1.0595, "step": 521500 }, { "epoch": 1.93, "learning_rate": 1.786940946190494e-05, "loss": 1.0621, "step": 522000 }, { "epoch": 1.93, "learning_rate": 1.7838633034186456e-05, "loss": 1.0544, "step": 522500 }, { "epoch": 1.93, "learning_rate": 1.7807856606467973e-05, "loss": 1.0543, "step": 523000 }, { "epoch": 1.93, "learning_rate": 1.7777080178749493e-05, "loss": 1.0646, "step": 523500 }, { "epoch": 1.94, "learning_rate": 1.774630375103101e-05, "loss": 1.0572, "step": 524000 }, { "epoch": 1.94, "learning_rate": 1.7715527323312527e-05, "loss": 1.0555, "step": 524500 }, { "epoch": 1.94, "learning_rate": 1.7684750895594044e-05, "loss": 1.06, "step": 525000 }, { "epoch": 1.94, "learning_rate": 1.765397446787557e-05, "loss": 1.0527, "step": 525500 }, { "epoch": 1.94, "learning_rate": 1.7623198040157085e-05, "loss": 1.0552, "step": 526000 }, { "epoch": 1.94, "learning_rate": 1.7592421612438603e-05, "loss": 1.0586, "step": 526500 }, { "epoch": 1.95, "learning_rate": 1.7561645184720123e-05, "loss": 1.0532, "step": 527000 }, { "epoch": 1.95, "learning_rate": 1.753086875700164e-05, "loss": 1.0546, "step": 527500 }, { "epoch": 1.95, "learning_rate": 1.7500092329283157e-05, "loss": 1.0597, "step": 528000 }, { "epoch": 1.95, "learning_rate": 1.7469315901564674e-05, "loss": 1.0543, "step": 528500 }, { "epoch": 1.95, "learning_rate": 1.7438539473846195e-05, "loss": 1.0525, "step": 529000 }, { "epoch": 1.96, "learning_rate": 1.7407763046127712e-05, "loss": 1.0597, "step": 529500 }, { "epoch": 1.96, "learning_rate": 1.737698661840923e-05, "loss": 1.0478, "step": 530000 }, { "epoch": 1.96, "learning_rate": 1.734621019069075e-05, "loss": 1.0532, "step": 530500 }, { "epoch": 1.96, "learning_rate": 1.7315433762972266e-05, "loss": 1.0545, "step": 531000 }, { "epoch": 1.96, "learning_rate": 1.7284657335253784e-05, "loss": 1.0438, "step": 531500 }, { "epoch": 1.96, "learning_rate": 1.7253880907535304e-05, "loss": 1.0544, "step": 532000 }, { "epoch": 1.97, "learning_rate": 1.722310447981682e-05, "loss": 1.0539, "step": 532500 }, { "epoch": 1.97, "learning_rate": 1.7192328052098338e-05, "loss": 1.0544, "step": 533000 }, { "epoch": 1.97, "learning_rate": 1.7161551624379855e-05, "loss": 1.0535, "step": 533500 }, { "epoch": 1.97, "learning_rate": 1.7130775196661376e-05, "loss": 1.0501, "step": 534000 }, { "epoch": 1.97, "learning_rate": 1.7099998768942893e-05, "loss": 1.0558, "step": 534500 }, { "epoch": 1.98, "learning_rate": 1.706922234122441e-05, "loss": 1.0466, "step": 535000 }, { "epoch": 1.98, "learning_rate": 1.703844591350593e-05, "loss": 1.0561, "step": 535500 }, { "epoch": 1.98, "learning_rate": 1.7007669485787448e-05, "loss": 1.0504, "step": 536000 }, { "epoch": 1.98, "learning_rate": 1.6976893058068965e-05, "loss": 1.0419, "step": 536500 }, { "epoch": 1.98, "learning_rate": 1.6946116630350485e-05, "loss": 1.0501, "step": 537000 }, { "epoch": 1.99, "learning_rate": 1.6915340202632002e-05, "loss": 1.0584, "step": 537500 }, { "epoch": 1.99, "learning_rate": 1.688456377491352e-05, "loss": 1.0499, "step": 538000 }, { "epoch": 1.99, "learning_rate": 1.6853787347195036e-05, "loss": 1.0525, "step": 538500 }, { "epoch": 1.99, "learning_rate": 1.6823010919476557e-05, "loss": 1.0508, "step": 539000 }, { "epoch": 1.99, "learning_rate": 1.6792234491758074e-05, "loss": 1.0516, "step": 539500 }, { "epoch": 1.99, "learning_rate": 1.676145806403959e-05, "loss": 1.0469, "step": 540000 }, { "epoch": 2.0, "learning_rate": 1.673068163632111e-05, "loss": 1.0507, "step": 540500 }, { "epoch": 2.0, "learning_rate": 1.669990520860263e-05, "loss": 1.053, "step": 541000 }, { "epoch": 2.0, "learning_rate": 1.6669128780884146e-05, "loss": 1.0577, "step": 541500 }, { "epoch": 2.0, "learning_rate": 1.6638352353165666e-05, "loss": 1.0475, "step": 542000 }, { "epoch": 2.0, "learning_rate": 1.6607575925447183e-05, "loss": 1.0467, "step": 542500 }, { "epoch": 2.01, "learning_rate": 1.65767994977287e-05, "loss": 1.0514, "step": 543000 }, { "epoch": 2.01, "learning_rate": 1.654602307001022e-05, "loss": 1.0497, "step": 543500 }, { "epoch": 2.01, "learning_rate": 1.6515246642291738e-05, "loss": 1.0483, "step": 544000 }, { "epoch": 2.01, "learning_rate": 1.6484470214573255e-05, "loss": 1.0512, "step": 544500 }, { "epoch": 2.01, "learning_rate": 1.6453693786854772e-05, "loss": 1.0497, "step": 545000 }, { "epoch": 2.01, "learning_rate": 1.6422917359136293e-05, "loss": 1.0509, "step": 545500 }, { "epoch": 2.02, "learning_rate": 1.639214093141781e-05, "loss": 1.0495, "step": 546000 }, { "epoch": 2.02, "learning_rate": 1.6361364503699327e-05, "loss": 1.0474, "step": 546500 }, { "epoch": 2.02, "learning_rate": 1.6330588075980847e-05, "loss": 1.0404, "step": 547000 }, { "epoch": 2.02, "learning_rate": 1.6299811648262364e-05, "loss": 1.0409, "step": 547500 }, { "epoch": 2.02, "learning_rate": 1.626903522054388e-05, "loss": 1.0383, "step": 548000 }, { "epoch": 2.03, "learning_rate": 1.6238258792825402e-05, "loss": 1.0425, "step": 548500 }, { "epoch": 2.03, "learning_rate": 1.620748236510692e-05, "loss": 1.0421, "step": 549000 }, { "epoch": 2.03, "learning_rate": 1.6176705937388436e-05, "loss": 1.0518, "step": 549500 }, { "epoch": 2.03, "learning_rate": 1.6145929509669953e-05, "loss": 1.0409, "step": 550000 }, { "epoch": 2.03, "learning_rate": 1.6115153081951474e-05, "loss": 1.0464, "step": 550500 }, { "epoch": 2.03, "learning_rate": 1.608437665423299e-05, "loss": 1.038, "step": 551000 }, { "epoch": 2.04, "learning_rate": 1.6053600226514508e-05, "loss": 1.0489, "step": 551500 }, { "epoch": 2.04, "learning_rate": 1.6022823798796028e-05, "loss": 1.0533, "step": 552000 }, { "epoch": 2.04, "learning_rate": 1.5992047371077545e-05, "loss": 1.0447, "step": 552500 }, { "epoch": 2.04, "learning_rate": 1.5961270943359062e-05, "loss": 1.0416, "step": 553000 }, { "epoch": 2.04, "learning_rate": 1.5930494515640583e-05, "loss": 1.0463, "step": 553500 }, { "epoch": 2.05, "learning_rate": 1.58997180879221e-05, "loss": 1.0365, "step": 554000 }, { "epoch": 2.05, "learning_rate": 1.5868941660203617e-05, "loss": 1.0371, "step": 554500 }, { "epoch": 2.05, "learning_rate": 1.5838165232485138e-05, "loss": 1.0506, "step": 555000 }, { "epoch": 2.05, "learning_rate": 1.5807388804766655e-05, "loss": 1.0464, "step": 555500 }, { "epoch": 2.05, "learning_rate": 1.5776612377048172e-05, "loss": 1.0452, "step": 556000 }, { "epoch": 2.06, "learning_rate": 1.574583594932969e-05, "loss": 1.0499, "step": 556500 }, { "epoch": 2.06, "learning_rate": 1.571505952161121e-05, "loss": 1.0477, "step": 557000 }, { "epoch": 2.06, "learning_rate": 1.5684283093892726e-05, "loss": 1.0486, "step": 557500 }, { "epoch": 2.06, "learning_rate": 1.5653506666174244e-05, "loss": 1.0367, "step": 558000 }, { "epoch": 2.06, "learning_rate": 1.5622730238455764e-05, "loss": 1.0467, "step": 558500 }, { "epoch": 2.06, "learning_rate": 1.559195381073728e-05, "loss": 1.0455, "step": 559000 }, { "epoch": 2.07, "learning_rate": 1.5561177383018798e-05, "loss": 1.0463, "step": 559500 }, { "epoch": 2.07, "learning_rate": 1.553040095530032e-05, "loss": 1.0446, "step": 560000 }, { "epoch": 2.07, "learning_rate": 1.5499624527581836e-05, "loss": 1.0366, "step": 560500 }, { "epoch": 2.07, "learning_rate": 1.5468848099863353e-05, "loss": 1.0396, "step": 561000 }, { "epoch": 2.07, "learning_rate": 1.543807167214487e-05, "loss": 1.041, "step": 561500 }, { "epoch": 2.08, "learning_rate": 1.540729524442639e-05, "loss": 1.0454, "step": 562000 }, { "epoch": 2.08, "learning_rate": 1.5376518816707908e-05, "loss": 1.0422, "step": 562500 }, { "epoch": 2.08, "learning_rate": 1.5345742388989425e-05, "loss": 1.042, "step": 563000 }, { "epoch": 2.08, "learning_rate": 1.5314965961270945e-05, "loss": 1.0425, "step": 563500 }, { "epoch": 2.08, "learning_rate": 1.5284189533552462e-05, "loss": 1.0461, "step": 564000 }, { "epoch": 2.08, "learning_rate": 1.5253413105833981e-05, "loss": 1.0457, "step": 564500 }, { "epoch": 2.09, "learning_rate": 1.5222636678115498e-05, "loss": 1.0344, "step": 565000 }, { "epoch": 2.09, "learning_rate": 1.5191860250397017e-05, "loss": 1.0468, "step": 565500 }, { "epoch": 2.09, "learning_rate": 1.5161083822678534e-05, "loss": 1.0469, "step": 566000 }, { "epoch": 2.09, "learning_rate": 1.5130307394960053e-05, "loss": 1.0401, "step": 566500 }, { "epoch": 2.09, "learning_rate": 1.5099530967241571e-05, "loss": 1.0404, "step": 567000 }, { "epoch": 2.1, "learning_rate": 1.5068754539523089e-05, "loss": 1.0428, "step": 567500 }, { "epoch": 2.1, "learning_rate": 1.5037978111804607e-05, "loss": 1.0439, "step": 568000 }, { "epoch": 2.1, "learning_rate": 1.5007201684086124e-05, "loss": 1.0397, "step": 568500 }, { "epoch": 2.1, "learning_rate": 1.4976425256367643e-05, "loss": 1.0346, "step": 569000 }, { "epoch": 2.1, "learning_rate": 1.4945648828649162e-05, "loss": 1.0366, "step": 569500 }, { "epoch": 2.11, "learning_rate": 1.4914872400930679e-05, "loss": 1.0415, "step": 570000 }, { "epoch": 2.11, "learning_rate": 1.4884095973212198e-05, "loss": 1.0388, "step": 570500 }, { "epoch": 2.11, "learning_rate": 1.4853319545493715e-05, "loss": 1.0404, "step": 571000 }, { "epoch": 2.11, "learning_rate": 1.4822543117775234e-05, "loss": 1.0359, "step": 571500 }, { "epoch": 2.11, "learning_rate": 1.4791766690056753e-05, "loss": 1.0466, "step": 572000 }, { "epoch": 2.11, "learning_rate": 1.476099026233827e-05, "loss": 1.0335, "step": 572500 }, { "epoch": 2.12, "learning_rate": 1.4730213834619788e-05, "loss": 1.0328, "step": 573000 }, { "epoch": 2.12, "learning_rate": 1.4699437406901306e-05, "loss": 1.0387, "step": 573500 }, { "epoch": 2.12, "learning_rate": 1.4668660979182824e-05, "loss": 1.0415, "step": 574000 }, { "epoch": 2.12, "learning_rate": 1.4637884551464343e-05, "loss": 1.0438, "step": 574500 }, { "epoch": 2.12, "learning_rate": 1.460710812374586e-05, "loss": 1.0379, "step": 575000 }, { "epoch": 2.13, "learning_rate": 1.4576331696027379e-05, "loss": 1.0424, "step": 575500 }, { "epoch": 2.13, "learning_rate": 1.4545555268308898e-05, "loss": 1.0308, "step": 576000 }, { "epoch": 2.13, "learning_rate": 1.4514778840590415e-05, "loss": 1.0334, "step": 576500 }, { "epoch": 2.13, "learning_rate": 1.4484002412871934e-05, "loss": 1.0349, "step": 577000 }, { "epoch": 2.13, "learning_rate": 1.445322598515345e-05, "loss": 1.0468, "step": 577500 }, { "epoch": 2.13, "learning_rate": 1.442244955743497e-05, "loss": 1.0394, "step": 578000 }, { "epoch": 2.14, "learning_rate": 1.4391673129716488e-05, "loss": 1.0469, "step": 578500 }, { "epoch": 2.14, "learning_rate": 1.4360896701998005e-05, "loss": 1.0423, "step": 579000 }, { "epoch": 2.14, "learning_rate": 1.4330120274279524e-05, "loss": 1.0297, "step": 579500 }, { "epoch": 2.14, "learning_rate": 1.4299343846561041e-05, "loss": 1.0411, "step": 580000 }, { "epoch": 2.14, "learning_rate": 1.426856741884256e-05, "loss": 1.0392, "step": 580500 }, { "epoch": 2.15, "learning_rate": 1.4237790991124079e-05, "loss": 1.0259, "step": 581000 }, { "epoch": 2.15, "learning_rate": 1.4207014563405596e-05, "loss": 1.035, "step": 581500 }, { "epoch": 2.15, "learning_rate": 1.4176238135687115e-05, "loss": 1.0395, "step": 582000 }, { "epoch": 2.15, "learning_rate": 1.4145461707968632e-05, "loss": 1.0358, "step": 582500 }, { "epoch": 2.15, "learning_rate": 1.411468528025015e-05, "loss": 1.0413, "step": 583000 }, { "epoch": 2.15, "learning_rate": 1.408390885253167e-05, "loss": 1.0329, "step": 583500 }, { "epoch": 2.16, "learning_rate": 1.4053132424813186e-05, "loss": 1.039, "step": 584000 }, { "epoch": 2.16, "learning_rate": 1.4022355997094705e-05, "loss": 1.0357, "step": 584500 }, { "epoch": 2.16, "learning_rate": 1.3991579569376222e-05, "loss": 1.0297, "step": 585000 }, { "epoch": 2.16, "learning_rate": 1.3960803141657741e-05, "loss": 1.0311, "step": 585500 }, { "epoch": 2.16, "learning_rate": 1.393002671393926e-05, "loss": 1.0412, "step": 586000 }, { "epoch": 2.17, "learning_rate": 1.3899250286220777e-05, "loss": 1.0364, "step": 586500 }, { "epoch": 2.17, "learning_rate": 1.3868473858502296e-05, "loss": 1.0375, "step": 587000 }, { "epoch": 2.17, "learning_rate": 1.3837697430783813e-05, "loss": 1.0295, "step": 587500 }, { "epoch": 2.17, "learning_rate": 1.3806921003065332e-05, "loss": 1.0396, "step": 588000 }, { "epoch": 2.17, "learning_rate": 1.377614457534685e-05, "loss": 1.0308, "step": 588500 }, { "epoch": 2.18, "learning_rate": 1.3745368147628367e-05, "loss": 1.0396, "step": 589000 }, { "epoch": 2.18, "learning_rate": 1.3714591719909886e-05, "loss": 1.0346, "step": 589500 }, { "epoch": 2.18, "learning_rate": 1.3683815292191405e-05, "loss": 1.0344, "step": 590000 }, { "epoch": 2.18, "learning_rate": 1.3653038864472922e-05, "loss": 1.0344, "step": 590500 }, { "epoch": 2.18, "learning_rate": 1.3622262436754441e-05, "loss": 1.0258, "step": 591000 }, { "epoch": 2.18, "learning_rate": 1.3591486009035958e-05, "loss": 1.0383, "step": 591500 }, { "epoch": 2.19, "learning_rate": 1.3560709581317477e-05, "loss": 1.0299, "step": 592000 }, { "epoch": 2.19, "learning_rate": 1.3529933153598996e-05, "loss": 1.0326, "step": 592500 }, { "epoch": 2.19, "learning_rate": 1.3499156725880513e-05, "loss": 1.0393, "step": 593000 }, { "epoch": 2.19, "learning_rate": 1.3468380298162031e-05, "loss": 1.0327, "step": 593500 }, { "epoch": 2.19, "learning_rate": 1.3437603870443549e-05, "loss": 1.0373, "step": 594000 }, { "epoch": 2.2, "learning_rate": 1.3406827442725067e-05, "loss": 1.038, "step": 594500 }, { "epoch": 2.2, "learning_rate": 1.3376051015006586e-05, "loss": 1.0346, "step": 595000 }, { "epoch": 2.2, "learning_rate": 1.3345274587288107e-05, "loss": 1.025, "step": 595500 }, { "epoch": 2.2, "learning_rate": 1.3314498159569624e-05, "loss": 1.0369, "step": 596000 }, { "epoch": 2.2, "learning_rate": 1.3283721731851142e-05, "loss": 1.027, "step": 596500 }, { "epoch": 2.2, "learning_rate": 1.3252945304132661e-05, "loss": 1.037, "step": 597000 }, { "epoch": 2.21, "learning_rate": 1.3222168876414178e-05, "loss": 1.0388, "step": 597500 }, { "epoch": 2.21, "learning_rate": 1.3191392448695697e-05, "loss": 1.0262, "step": 598000 }, { "epoch": 2.21, "learning_rate": 1.3160616020977214e-05, "loss": 1.0306, "step": 598500 }, { "epoch": 2.21, "learning_rate": 1.3129839593258733e-05, "loss": 1.0307, "step": 599000 }, { "epoch": 2.21, "learning_rate": 1.3099063165540252e-05, "loss": 1.0347, "step": 599500 }, { "epoch": 2.22, "learning_rate": 1.3068286737821769e-05, "loss": 1.0303, "step": 600000 }, { "epoch": 2.22, "learning_rate": 1.3037510310103288e-05, "loss": 1.0367, "step": 600500 }, { "epoch": 2.22, "learning_rate": 1.3006733882384806e-05, "loss": 1.0267, "step": 601000 }, { "epoch": 2.22, "learning_rate": 1.2975957454666324e-05, "loss": 1.0302, "step": 601500 }, { "epoch": 2.22, "learning_rate": 1.2945181026947842e-05, "loss": 1.033, "step": 602000 }, { "epoch": 2.23, "learning_rate": 1.291440459922936e-05, "loss": 1.0207, "step": 602500 }, { "epoch": 2.23, "learning_rate": 1.2883628171510878e-05, "loss": 1.0293, "step": 603000 }, { "epoch": 2.23, "learning_rate": 1.2852851743792397e-05, "loss": 1.0309, "step": 603500 }, { "epoch": 2.23, "learning_rate": 1.2822075316073914e-05, "loss": 1.04, "step": 604000 }, { "epoch": 2.23, "learning_rate": 1.2791298888355433e-05, "loss": 1.029, "step": 604500 }, { "epoch": 2.23, "learning_rate": 1.276052246063695e-05, "loss": 1.0284, "step": 605000 }, { "epoch": 2.24, "learning_rate": 1.2729746032918469e-05, "loss": 1.0186, "step": 605500 }, { "epoch": 2.24, "learning_rate": 1.2698969605199987e-05, "loss": 1.0257, "step": 606000 }, { "epoch": 2.24, "learning_rate": 1.2668193177481505e-05, "loss": 1.0252, "step": 606500 }, { "epoch": 2.24, "learning_rate": 1.2637416749763023e-05, "loss": 1.0336, "step": 607000 }, { "epoch": 2.24, "learning_rate": 1.260664032204454e-05, "loss": 1.0255, "step": 607500 }, { "epoch": 2.25, "learning_rate": 1.257586389432606e-05, "loss": 1.0264, "step": 608000 }, { "epoch": 2.25, "learning_rate": 1.2545087466607578e-05, "loss": 1.0306, "step": 608500 }, { "epoch": 2.25, "learning_rate": 1.2514311038889095e-05, "loss": 1.0351, "step": 609000 }, { "epoch": 2.25, "learning_rate": 1.2483534611170612e-05, "loss": 1.0356, "step": 609500 }, { "epoch": 2.25, "learning_rate": 1.2452758183452131e-05, "loss": 1.024, "step": 610000 }, { "epoch": 2.25, "learning_rate": 1.2421981755733648e-05, "loss": 1.028, "step": 610500 }, { "epoch": 2.26, "learning_rate": 1.2391205328015167e-05, "loss": 1.0263, "step": 611000 }, { "epoch": 2.26, "learning_rate": 1.2360428900296684e-05, "loss": 1.0299, "step": 611500 }, { "epoch": 2.26, "learning_rate": 1.2329652472578203e-05, "loss": 1.0258, "step": 612000 }, { "epoch": 2.26, "learning_rate": 1.2298876044859722e-05, "loss": 1.0265, "step": 612500 }, { "epoch": 2.26, "learning_rate": 1.226809961714124e-05, "loss": 1.0224, "step": 613000 }, { "epoch": 2.27, "learning_rate": 1.2237323189422759e-05, "loss": 1.0276, "step": 613500 }, { "epoch": 2.27, "learning_rate": 1.2206546761704276e-05, "loss": 1.0318, "step": 614000 }, { "epoch": 2.27, "learning_rate": 1.2175770333985795e-05, "loss": 1.0265, "step": 614500 }, { "epoch": 2.27, "learning_rate": 1.2144993906267314e-05, "loss": 1.0305, "step": 615000 }, { "epoch": 2.27, "learning_rate": 1.211421747854883e-05, "loss": 1.0289, "step": 615500 }, { "epoch": 2.27, "learning_rate": 1.208344105083035e-05, "loss": 1.0209, "step": 616000 }, { "epoch": 2.28, "learning_rate": 1.2052664623111867e-05, "loss": 1.0253, "step": 616500 }, { "epoch": 2.28, "learning_rate": 1.2021888195393385e-05, "loss": 1.0313, "step": 617000 }, { "epoch": 2.28, "learning_rate": 1.1991111767674904e-05, "loss": 1.025, "step": 617500 }, { "epoch": 2.28, "learning_rate": 1.1960335339956421e-05, "loss": 1.0258, "step": 618000 }, { "epoch": 2.28, "learning_rate": 1.192955891223794e-05, "loss": 1.016, "step": 618500 }, { "epoch": 2.29, "learning_rate": 1.1898782484519457e-05, "loss": 1.0291, "step": 619000 }, { "epoch": 2.29, "learning_rate": 1.1868006056800976e-05, "loss": 1.0263, "step": 619500 }, { "epoch": 2.29, "learning_rate": 1.1837229629082495e-05, "loss": 1.0274, "step": 620000 }, { "epoch": 2.29, "learning_rate": 1.1806453201364012e-05, "loss": 1.0261, "step": 620500 }, { "epoch": 2.29, "learning_rate": 1.177567677364553e-05, "loss": 1.0293, "step": 621000 }, { "epoch": 2.3, "learning_rate": 1.1744900345927048e-05, "loss": 1.0311, "step": 621500 }, { "epoch": 2.3, "learning_rate": 1.1714123918208567e-05, "loss": 1.0236, "step": 622000 }, { "epoch": 2.3, "learning_rate": 1.1683347490490085e-05, "loss": 1.0212, "step": 622500 }, { "epoch": 2.3, "learning_rate": 1.1652571062771602e-05, "loss": 1.0271, "step": 623000 }, { "epoch": 2.3, "learning_rate": 1.1621794635053121e-05, "loss": 1.018, "step": 623500 }, { "epoch": 2.3, "learning_rate": 1.1591018207334638e-05, "loss": 1.032, "step": 624000 }, { "epoch": 2.31, "learning_rate": 1.1560241779616157e-05, "loss": 1.0238, "step": 624500 }, { "epoch": 2.31, "learning_rate": 1.1529465351897676e-05, "loss": 1.0294, "step": 625000 }, { "epoch": 2.31, "learning_rate": 1.1498688924179193e-05, "loss": 1.0274, "step": 625500 }, { "epoch": 2.31, "learning_rate": 1.1467912496460712e-05, "loss": 1.0283, "step": 626000 }, { "epoch": 2.31, "learning_rate": 1.1437136068742229e-05, "loss": 1.0293, "step": 626500 }, { "epoch": 2.32, "learning_rate": 1.1406359641023748e-05, "loss": 1.023, "step": 627000 }, { "epoch": 2.32, "learning_rate": 1.1375583213305266e-05, "loss": 1.0227, "step": 627500 }, { "epoch": 2.32, "learning_rate": 1.1344806785586783e-05, "loss": 1.0293, "step": 628000 }, { "epoch": 2.32, "learning_rate": 1.1314030357868302e-05, "loss": 1.022, "step": 628500 }, { "epoch": 2.32, "learning_rate": 1.1283253930149821e-05, "loss": 1.0209, "step": 629000 }, { "epoch": 2.32, "learning_rate": 1.1252477502431338e-05, "loss": 1.0243, "step": 629500 }, { "epoch": 2.33, "learning_rate": 1.1221701074712857e-05, "loss": 1.0291, "step": 630000 }, { "epoch": 2.33, "learning_rate": 1.1190924646994374e-05, "loss": 1.0269, "step": 630500 }, { "epoch": 2.33, "learning_rate": 1.1160148219275893e-05, "loss": 1.0256, "step": 631000 }, { "epoch": 2.33, "learning_rate": 1.1129371791557412e-05, "loss": 1.0227, "step": 631500 }, { "epoch": 2.33, "learning_rate": 1.1098595363838929e-05, "loss": 1.0253, "step": 632000 }, { "epoch": 2.34, "learning_rate": 1.1067818936120447e-05, "loss": 1.0284, "step": 632500 }, { "epoch": 2.34, "learning_rate": 1.1037042508401965e-05, "loss": 1.0226, "step": 633000 }, { "epoch": 2.34, "learning_rate": 1.1006266080683483e-05, "loss": 1.0222, "step": 633500 }, { "epoch": 2.34, "learning_rate": 1.0975489652965002e-05, "loss": 1.0258, "step": 634000 }, { "epoch": 2.34, "learning_rate": 1.094471322524652e-05, "loss": 1.0206, "step": 634500 }, { "epoch": 2.35, "learning_rate": 1.0913936797528038e-05, "loss": 1.0256, "step": 635000 }, { "epoch": 2.35, "learning_rate": 1.0883160369809555e-05, "loss": 1.0255, "step": 635500 }, { "epoch": 2.35, "learning_rate": 1.0852383942091074e-05, "loss": 1.0245, "step": 636000 }, { "epoch": 2.35, "learning_rate": 1.0821607514372593e-05, "loss": 1.0236, "step": 636500 }, { "epoch": 2.35, "learning_rate": 1.079083108665411e-05, "loss": 1.0197, "step": 637000 }, { "epoch": 2.35, "learning_rate": 1.0760054658935629e-05, "loss": 1.0218, "step": 637500 }, { "epoch": 2.36, "learning_rate": 1.0729278231217146e-05, "loss": 1.0248, "step": 638000 }, { "epoch": 2.36, "learning_rate": 1.0698501803498664e-05, "loss": 1.0275, "step": 638500 }, { "epoch": 2.36, "learning_rate": 1.0667725375780183e-05, "loss": 1.0304, "step": 639000 }, { "epoch": 2.36, "learning_rate": 1.06369489480617e-05, "loss": 1.0205, "step": 639500 }, { "epoch": 2.36, "learning_rate": 1.0606172520343219e-05, "loss": 1.038, "step": 640000 }, { "epoch": 2.37, "learning_rate": 1.0575396092624736e-05, "loss": 1.0189, "step": 640500 }, { "epoch": 2.37, "learning_rate": 1.0544619664906255e-05, "loss": 1.0216, "step": 641000 }, { "epoch": 2.37, "learning_rate": 1.0513843237187774e-05, "loss": 1.0206, "step": 641500 }, { "epoch": 2.37, "learning_rate": 1.048306680946929e-05, "loss": 1.0216, "step": 642000 }, { "epoch": 2.37, "learning_rate": 1.045229038175081e-05, "loss": 1.0243, "step": 642500 }, { "epoch": 2.37, "learning_rate": 1.0421513954032328e-05, "loss": 1.015, "step": 643000 }, { "epoch": 2.38, "learning_rate": 1.0390737526313845e-05, "loss": 1.0225, "step": 643500 }, { "epoch": 2.38, "learning_rate": 1.0359961098595364e-05, "loss": 1.0217, "step": 644000 }, { "epoch": 2.38, "learning_rate": 1.0329184670876881e-05, "loss": 1.0163, "step": 644500 }, { "epoch": 2.38, "learning_rate": 1.02984082431584e-05, "loss": 1.0086, "step": 645000 }, { "epoch": 2.38, "learning_rate": 1.0267631815439919e-05, "loss": 1.0167, "step": 645500 }, { "epoch": 2.39, "learning_rate": 1.0236855387721436e-05, "loss": 1.0152, "step": 646000 }, { "epoch": 2.39, "learning_rate": 1.0206078960002955e-05, "loss": 1.0267, "step": 646500 }, { "epoch": 2.39, "learning_rate": 1.0175302532284472e-05, "loss": 1.0146, "step": 647000 }, { "epoch": 2.39, "learning_rate": 1.014452610456599e-05, "loss": 1.0155, "step": 647500 }, { "epoch": 2.39, "learning_rate": 1.011374967684751e-05, "loss": 1.0239, "step": 648000 }, { "epoch": 2.4, "learning_rate": 1.0082973249129028e-05, "loss": 1.0219, "step": 648500 }, { "epoch": 2.4, "learning_rate": 1.0052196821410547e-05, "loss": 1.0199, "step": 649000 }, { "epoch": 2.4, "learning_rate": 1.0021420393692064e-05, "loss": 1.0163, "step": 649500 }, { "epoch": 2.4, "learning_rate": 9.990643965973583e-06, "loss": 1.0213, "step": 650000 }, { "epoch": 2.4, "learning_rate": 9.9598675382551e-06, "loss": 1.0201, "step": 650500 }, { "epoch": 2.4, "learning_rate": 9.929091110536619e-06, "loss": 1.0217, "step": 651000 }, { "epoch": 2.41, "learning_rate": 9.898314682818138e-06, "loss": 1.0217, "step": 651500 }, { "epoch": 2.41, "learning_rate": 9.867538255099655e-06, "loss": 1.0152, "step": 652000 }, { "epoch": 2.41, "learning_rate": 9.836761827381173e-06, "loss": 1.0236, "step": 652500 }, { "epoch": 2.41, "learning_rate": 9.80598539966269e-06, "loss": 1.0183, "step": 653000 }, { "epoch": 2.41, "learning_rate": 9.77520897194421e-06, "loss": 1.0152, "step": 653500 }, { "epoch": 2.42, "learning_rate": 9.744432544225728e-06, "loss": 1.0205, "step": 654000 }, { "epoch": 2.42, "learning_rate": 9.713656116507245e-06, "loss": 1.0268, "step": 654500 }, { "epoch": 2.42, "learning_rate": 9.682879688788764e-06, "loss": 1.03, "step": 655000 }, { "epoch": 2.42, "learning_rate": 9.652103261070283e-06, "loss": 1.018, "step": 655500 }, { "epoch": 2.42, "learning_rate": 9.6213268333518e-06, "loss": 1.02, "step": 656000 }, { "epoch": 2.42, "learning_rate": 9.590550405633319e-06, "loss": 1.0198, "step": 656500 }, { "epoch": 2.43, "learning_rate": 9.559773977914836e-06, "loss": 1.0178, "step": 657000 }, { "epoch": 2.43, "learning_rate": 9.528997550196354e-06, "loss": 1.016, "step": 657500 }, { "epoch": 2.43, "learning_rate": 9.498221122477873e-06, "loss": 1.0213, "step": 658000 }, { "epoch": 2.43, "learning_rate": 9.46744469475939e-06, "loss": 1.0184, "step": 658500 }, { "epoch": 2.43, "learning_rate": 9.436668267040909e-06, "loss": 1.0244, "step": 659000 }, { "epoch": 2.44, "learning_rate": 9.405891839322426e-06, "loss": 1.0191, "step": 659500 }, { "epoch": 2.44, "learning_rate": 9.375115411603945e-06, "loss": 1.0232, "step": 660000 }, { "epoch": 2.44, "learning_rate": 9.344338983885464e-06, "loss": 1.0143, "step": 660500 }, { "epoch": 2.44, "learning_rate": 9.313562556166981e-06, "loss": 1.0198, "step": 661000 }, { "epoch": 2.44, "learning_rate": 9.2827861284485e-06, "loss": 1.0201, "step": 661500 }, { "epoch": 2.44, "learning_rate": 9.252009700730017e-06, "loss": 1.0174, "step": 662000 }, { "epoch": 2.45, "learning_rate": 9.221233273011536e-06, "loss": 1.0231, "step": 662500 }, { "epoch": 2.45, "learning_rate": 9.190456845293054e-06, "loss": 1.0119, "step": 663000 }, { "epoch": 2.45, "learning_rate": 9.159680417574571e-06, "loss": 1.0127, "step": 663500 }, { "epoch": 2.45, "learning_rate": 9.12890398985609e-06, "loss": 1.0168, "step": 664000 }, { "epoch": 2.45, "learning_rate": 9.098127562137607e-06, "loss": 1.0155, "step": 664500 }, { "epoch": 2.46, "learning_rate": 9.067351134419126e-06, "loss": 1.0175, "step": 665000 }, { "epoch": 2.46, "learning_rate": 9.036574706700645e-06, "loss": 1.0174, "step": 665500 }, { "epoch": 2.46, "learning_rate": 9.005798278982162e-06, "loss": 1.0173, "step": 666000 }, { "epoch": 2.46, "learning_rate": 8.97502185126368e-06, "loss": 1.0178, "step": 666500 }, { "epoch": 2.46, "learning_rate": 8.944245423545198e-06, "loss": 1.0124, "step": 667000 }, { "epoch": 2.47, "learning_rate": 8.913468995826717e-06, "loss": 1.0189, "step": 667500 }, { "epoch": 2.47, "learning_rate": 8.882692568108235e-06, "loss": 1.0231, "step": 668000 }, { "epoch": 2.47, "learning_rate": 8.851916140389752e-06, "loss": 1.0143, "step": 668500 }, { "epoch": 2.47, "learning_rate": 8.821139712671271e-06, "loss": 1.0153, "step": 669000 }, { "epoch": 2.47, "learning_rate": 8.79036328495279e-06, "loss": 1.0174, "step": 669500 }, { "epoch": 2.47, "learning_rate": 8.759586857234307e-06, "loss": 1.0158, "step": 670000 }, { "epoch": 2.48, "learning_rate": 8.728810429515826e-06, "loss": 1.0116, "step": 670500 }, { "epoch": 2.48, "learning_rate": 8.698034001797343e-06, "loss": 1.02, "step": 671000 }, { "epoch": 2.48, "learning_rate": 8.667257574078862e-06, "loss": 1.014, "step": 671500 }, { "epoch": 2.48, "learning_rate": 8.63648114636038e-06, "loss": 1.016, "step": 672000 }, { "epoch": 2.48, "learning_rate": 8.605704718641898e-06, "loss": 1.0173, "step": 672500 }, { "epoch": 2.49, "learning_rate": 8.574928290923416e-06, "loss": 1.0045, "step": 673000 }, { "epoch": 2.49, "learning_rate": 8.544151863204934e-06, "loss": 1.0185, "step": 673500 }, { "epoch": 2.49, "learning_rate": 8.513375435486452e-06, "loss": 1.0156, "step": 674000 }, { "epoch": 2.49, "learning_rate": 8.482599007767971e-06, "loss": 1.0106, "step": 674500 }, { "epoch": 2.49, "learning_rate": 8.451822580049488e-06, "loss": 1.0153, "step": 675000 }, { "epoch": 2.49, "learning_rate": 8.421046152331007e-06, "loss": 1.01, "step": 675500 }, { "epoch": 2.5, "learning_rate": 8.390269724612524e-06, "loss": 1.0161, "step": 676000 }, { "epoch": 2.5, "learning_rate": 8.359493296894043e-06, "loss": 1.0146, "step": 676500 }, { "epoch": 2.5, "learning_rate": 8.328716869175562e-06, "loss": 1.0152, "step": 677000 }, { "epoch": 2.5, "learning_rate": 8.297940441457079e-06, "loss": 1.0089, "step": 677500 }, { "epoch": 2.5, "learning_rate": 8.267164013738598e-06, "loss": 1.0139, "step": 678000 }, { "epoch": 2.51, "learning_rate": 8.236387586020115e-06, "loss": 1.0202, "step": 678500 }, { "epoch": 2.51, "learning_rate": 8.205611158301633e-06, "loss": 1.0082, "step": 679000 }, { "epoch": 2.51, "learning_rate": 8.174834730583152e-06, "loss": 1.0067, "step": 679500 }, { "epoch": 2.51, "learning_rate": 8.14405830286467e-06, "loss": 1.0156, "step": 680000 }, { "epoch": 2.51, "learning_rate": 8.113281875146188e-06, "loss": 1.0075, "step": 680500 }, { "epoch": 2.52, "learning_rate": 8.082505447427705e-06, "loss": 1.0108, "step": 681000 }, { "epoch": 2.52, "learning_rate": 8.051729019709224e-06, "loss": 1.0115, "step": 681500 }, { "epoch": 2.52, "learning_rate": 8.020952591990743e-06, "loss": 1.014, "step": 682000 }, { "epoch": 2.52, "learning_rate": 7.99017616427226e-06, "loss": 1.0147, "step": 682500 }, { "epoch": 2.52, "learning_rate": 7.95939973655378e-06, "loss": 1.006, "step": 683000 }, { "epoch": 2.52, "learning_rate": 7.928623308835297e-06, "loss": 1.016, "step": 683500 }, { "epoch": 2.53, "learning_rate": 7.897846881116816e-06, "loss": 1.025, "step": 684000 }, { "epoch": 2.53, "learning_rate": 7.867070453398335e-06, "loss": 1.0174, "step": 684500 }, { "epoch": 2.53, "learning_rate": 7.836294025679852e-06, "loss": 1.0158, "step": 685000 }, { "epoch": 2.53, "learning_rate": 7.80551759796137e-06, "loss": 1.0128, "step": 685500 }, { "epoch": 2.53, "learning_rate": 7.774741170242888e-06, "loss": 1.0142, "step": 686000 }, { "epoch": 2.54, "learning_rate": 7.743964742524407e-06, "loss": 1.0199, "step": 686500 }, { "epoch": 2.54, "learning_rate": 7.713188314805925e-06, "loss": 1.0173, "step": 687000 }, { "epoch": 2.54, "learning_rate": 7.682411887087443e-06, "loss": 1.0143, "step": 687500 }, { "epoch": 2.54, "learning_rate": 7.651635459368961e-06, "loss": 1.0135, "step": 688000 }, { "epoch": 2.54, "learning_rate": 7.620859031650479e-06, "loss": 1.0042, "step": 688500 }, { "epoch": 2.54, "learning_rate": 7.590082603931997e-06, "loss": 1.0161, "step": 689000 }, { "epoch": 2.55, "learning_rate": 7.559306176213515e-06, "loss": 1.01, "step": 689500 }, { "epoch": 2.55, "learning_rate": 7.528529748495033e-06, "loss": 1.0041, "step": 690000 }, { "epoch": 2.55, "learning_rate": 7.497753320776552e-06, "loss": 1.0185, "step": 690500 }, { "epoch": 2.55, "learning_rate": 7.46697689305807e-06, "loss": 1.0042, "step": 691000 }, { "epoch": 2.55, "learning_rate": 7.436200465339588e-06, "loss": 0.9985, "step": 691500 }, { "epoch": 2.56, "learning_rate": 7.405424037621106e-06, "loss": 1.0094, "step": 692000 }, { "epoch": 2.56, "learning_rate": 7.3746476099026245e-06, "loss": 1.0084, "step": 692500 }, { "epoch": 2.56, "learning_rate": 7.343871182184142e-06, "loss": 1.0106, "step": 693000 }, { "epoch": 2.56, "learning_rate": 7.31309475446566e-06, "loss": 1.0174, "step": 693500 }, { "epoch": 2.56, "learning_rate": 7.282318326747178e-06, "loss": 1.0137, "step": 694000 }, { "epoch": 2.56, "learning_rate": 7.251541899028696e-06, "loss": 1.0066, "step": 694500 }, { "epoch": 2.57, "learning_rate": 7.220765471310215e-06, "loss": 1.0081, "step": 695000 }, { "epoch": 2.57, "learning_rate": 7.189989043591733e-06, "loss": 1.0042, "step": 695500 }, { "epoch": 2.57, "learning_rate": 7.159212615873251e-06, "loss": 1.0139, "step": 696000 }, { "epoch": 2.57, "learning_rate": 7.128436188154769e-06, "loss": 1.0137, "step": 696500 }, { "epoch": 2.57, "learning_rate": 7.097659760436287e-06, "loss": 1.0135, "step": 697000 }, { "epoch": 2.58, "learning_rate": 7.0668833327178055e-06, "loss": 1.01, "step": 697500 }, { "epoch": 2.58, "learning_rate": 7.0361069049993235e-06, "loss": 1.0041, "step": 698000 }, { "epoch": 2.58, "learning_rate": 7.005330477280841e-06, "loss": 1.0142, "step": 698500 }, { "epoch": 2.58, "learning_rate": 6.974554049562359e-06, "loss": 1.0058, "step": 699000 }, { "epoch": 2.58, "learning_rate": 6.943777621843878e-06, "loss": 1.0086, "step": 699500 }, { "epoch": 2.59, "learning_rate": 6.913001194125396e-06, "loss": 1.0055, "step": 700000 }, { "epoch": 2.59, "learning_rate": 6.882224766406914e-06, "loss": 1.0148, "step": 700500 }, { "epoch": 2.59, "learning_rate": 6.851448338688432e-06, "loss": 1.004, "step": 701000 }, { "epoch": 2.59, "learning_rate": 6.82067191096995e-06, "loss": 1.0096, "step": 701500 }, { "epoch": 2.59, "learning_rate": 6.789895483251469e-06, "loss": 1.0043, "step": 702000 }, { "epoch": 2.59, "learning_rate": 6.759119055532987e-06, "loss": 1.0115, "step": 702500 }, { "epoch": 2.6, "learning_rate": 6.7283426278145045e-06, "loss": 1.0067, "step": 703000 }, { "epoch": 2.6, "learning_rate": 6.6975662000960225e-06, "loss": 1.0118, "step": 703500 }, { "epoch": 2.6, "learning_rate": 6.66678977237754e-06, "loss": 1.0056, "step": 704000 }, { "epoch": 2.6, "learning_rate": 6.636013344659059e-06, "loss": 1.0146, "step": 704500 }, { "epoch": 2.6, "learning_rate": 6.605236916940577e-06, "loss": 1.0111, "step": 705000 }, { "epoch": 2.61, "learning_rate": 6.574460489222095e-06, "loss": 1.0074, "step": 705500 }, { "epoch": 2.61, "learning_rate": 6.543684061503613e-06, "loss": 1.0116, "step": 706000 }, { "epoch": 2.61, "learning_rate": 6.512907633785132e-06, "loss": 1.0076, "step": 706500 }, { "epoch": 2.61, "learning_rate": 6.48213120606665e-06, "loss": 1.008, "step": 707000 }, { "epoch": 2.61, "learning_rate": 6.451354778348168e-06, "loss": 1.0123, "step": 707500 }, { "epoch": 2.61, "learning_rate": 6.420578350629686e-06, "loss": 1.0056, "step": 708000 }, { "epoch": 2.62, "learning_rate": 6.3898019229112035e-06, "loss": 1.0049, "step": 708500 }, { "epoch": 2.62, "learning_rate": 6.359025495192722e-06, "loss": 1.0028, "step": 709000 }, { "epoch": 2.62, "learning_rate": 6.32824906747424e-06, "loss": 1.0069, "step": 709500 }, { "epoch": 2.62, "learning_rate": 6.297472639755758e-06, "loss": 1.0091, "step": 710000 }, { "epoch": 2.62, "learning_rate": 6.266696212037276e-06, "loss": 1.0159, "step": 710500 }, { "epoch": 2.63, "learning_rate": 6.235919784318795e-06, "loss": 1.0079, "step": 711000 }, { "epoch": 2.63, "learning_rate": 6.205143356600313e-06, "loss": 1.0096, "step": 711500 }, { "epoch": 2.63, "learning_rate": 6.174366928881832e-06, "loss": 1.0024, "step": 712000 }, { "epoch": 2.63, "learning_rate": 6.1435905011633496e-06, "loss": 0.9978, "step": 712500 }, { "epoch": 2.63, "learning_rate": 6.1128140734448675e-06, "loss": 1.0059, "step": 713000 }, { "epoch": 2.64, "learning_rate": 6.0820376457263854e-06, "loss": 1.0076, "step": 713500 }, { "epoch": 2.64, "learning_rate": 6.051261218007904e-06, "loss": 1.0115, "step": 714000 }, { "epoch": 2.64, "learning_rate": 6.020484790289422e-06, "loss": 1.0101, "step": 714500 }, { "epoch": 2.64, "learning_rate": 5.98970836257094e-06, "loss": 1.011, "step": 715000 }, { "epoch": 2.64, "learning_rate": 5.958931934852458e-06, "loss": 1.0119, "step": 715500 }, { "epoch": 2.64, "learning_rate": 5.928155507133976e-06, "loss": 1.0122, "step": 716000 }, { "epoch": 2.65, "learning_rate": 5.897379079415495e-06, "loss": 1.0039, "step": 716500 }, { "epoch": 2.65, "learning_rate": 5.866602651697013e-06, "loss": 1.0031, "step": 717000 }, { "epoch": 2.65, "learning_rate": 5.835826223978531e-06, "loss": 1.0059, "step": 717500 }, { "epoch": 2.65, "learning_rate": 5.8050497962600486e-06, "loss": 1.0082, "step": 718000 }, { "epoch": 2.65, "learning_rate": 5.7742733685415665e-06, "loss": 1.0027, "step": 718500 }, { "epoch": 2.66, "learning_rate": 5.743496940823085e-06, "loss": 0.9952, "step": 719000 }, { "epoch": 2.66, "learning_rate": 5.712720513104603e-06, "loss": 1.0025, "step": 719500 }, { "epoch": 2.66, "learning_rate": 5.681944085386121e-06, "loss": 1.0005, "step": 720000 }, { "epoch": 2.66, "learning_rate": 5.651167657667639e-06, "loss": 1.0066, "step": 720500 }, { "epoch": 2.66, "learning_rate": 5.620391229949158e-06, "loss": 1.0004, "step": 721000 }, { "epoch": 2.66, "learning_rate": 5.589614802230676e-06, "loss": 1.0066, "step": 721500 }, { "epoch": 2.67, "learning_rate": 5.558838374512194e-06, "loss": 1.0082, "step": 722000 }, { "epoch": 2.67, "learning_rate": 5.528061946793712e-06, "loss": 1.0082, "step": 722500 }, { "epoch": 2.67, "learning_rate": 5.49728551907523e-06, "loss": 1.0108, "step": 723000 }, { "epoch": 2.67, "learning_rate": 5.466509091356748e-06, "loss": 1.0073, "step": 723500 }, { "epoch": 2.67, "learning_rate": 5.435732663638266e-06, "loss": 1.0071, "step": 724000 }, { "epoch": 2.68, "learning_rate": 5.404956235919784e-06, "loss": 1.0023, "step": 724500 }, { "epoch": 2.68, "learning_rate": 5.374179808201302e-06, "loss": 0.9992, "step": 725000 }, { "epoch": 2.68, "learning_rate": 5.34340338048282e-06, "loss": 1.0023, "step": 725500 }, { "epoch": 2.68, "learning_rate": 5.312626952764339e-06, "loss": 0.9983, "step": 726000 }, { "epoch": 2.68, "learning_rate": 5.281850525045857e-06, "loss": 1.0009, "step": 726500 }, { "epoch": 2.68, "learning_rate": 5.251074097327376e-06, "loss": 1.0027, "step": 727000 }, { "epoch": 2.69, "learning_rate": 5.220297669608894e-06, "loss": 1.0083, "step": 727500 }, { "epoch": 2.69, "learning_rate": 5.1895212418904115e-06, "loss": 1.0076, "step": 728000 }, { "epoch": 2.69, "learning_rate": 5.15874481417193e-06, "loss": 0.9977, "step": 728500 }, { "epoch": 2.69, "learning_rate": 5.127968386453448e-06, "loss": 1.0028, "step": 729000 }, { "epoch": 2.69, "learning_rate": 5.097191958734966e-06, "loss": 1.0072, "step": 729500 }, { "epoch": 2.7, "learning_rate": 5.066415531016484e-06, "loss": 1.0031, "step": 730000 }, { "epoch": 2.7, "learning_rate": 5.035639103298002e-06, "loss": 1.0051, "step": 730500 }, { "epoch": 2.7, "learning_rate": 5.004862675579521e-06, "loss": 1.0055, "step": 731000 }, { "epoch": 2.7, "learning_rate": 4.974086247861039e-06, "loss": 1.0034, "step": 731500 }, { "epoch": 2.7, "learning_rate": 4.943309820142557e-06, "loss": 1.0042, "step": 732000 }, { "epoch": 2.71, "learning_rate": 4.912533392424075e-06, "loss": 1.0039, "step": 732500 }, { "epoch": 2.71, "learning_rate": 4.8817569647055934e-06, "loss": 1.0033, "step": 733000 }, { "epoch": 2.71, "learning_rate": 4.850980536987111e-06, "loss": 0.9988, "step": 733500 }, { "epoch": 2.71, "learning_rate": 4.820204109268629e-06, "loss": 1.0052, "step": 734000 }, { "epoch": 2.71, "learning_rate": 4.789427681550147e-06, "loss": 0.9979, "step": 734500 }, { "epoch": 2.71, "learning_rate": 4.758651253831665e-06, "loss": 0.9971, "step": 735000 }, { "epoch": 2.72, "learning_rate": 4.727874826113184e-06, "loss": 1.0054, "step": 735500 }, { "epoch": 2.72, "learning_rate": 4.697098398394702e-06, "loss": 0.9994, "step": 736000 }, { "epoch": 2.72, "learning_rate": 4.66632197067622e-06, "loss": 0.9963, "step": 736500 }, { "epoch": 2.72, "learning_rate": 4.635545542957738e-06, "loss": 0.9992, "step": 737000 }, { "epoch": 2.72, "learning_rate": 4.604769115239256e-06, "loss": 1.0022, "step": 737500 }, { "epoch": 2.73, "learning_rate": 4.5739926875207745e-06, "loss": 1.0108, "step": 738000 }, { "epoch": 2.73, "learning_rate": 4.5432162598022924e-06, "loss": 0.9971, "step": 738500 }, { "epoch": 2.73, "learning_rate": 4.51243983208381e-06, "loss": 0.9978, "step": 739000 }, { "epoch": 2.73, "learning_rate": 4.481663404365328e-06, "loss": 1.0005, "step": 739500 }, { "epoch": 2.73, "learning_rate": 4.450886976646847e-06, "loss": 1.0058, "step": 740000 }, { "epoch": 2.73, "learning_rate": 4.420110548928365e-06, "loss": 0.9967, "step": 740500 }, { "epoch": 2.74, "learning_rate": 4.389334121209883e-06, "loss": 0.9919, "step": 741000 }, { "epoch": 2.74, "learning_rate": 4.358557693491401e-06, "loss": 1.0065, "step": 741500 }, { "epoch": 2.74, "learning_rate": 4.327781265772919e-06, "loss": 1.0024, "step": 742000 }, { "epoch": 2.74, "learning_rate": 4.297004838054438e-06, "loss": 0.9953, "step": 742500 }, { "epoch": 2.74, "learning_rate": 4.2662284103359556e-06, "loss": 0.9981, "step": 743000 }, { "epoch": 2.75, "learning_rate": 4.2354519826174735e-06, "loss": 0.9962, "step": 743500 }, { "epoch": 2.75, "learning_rate": 4.2046755548989914e-06, "loss": 1.0003, "step": 744000 }, { "epoch": 2.75, "learning_rate": 4.17389912718051e-06, "loss": 0.995, "step": 744500 }, { "epoch": 2.75, "learning_rate": 4.143122699462028e-06, "loss": 1.0064, "step": 745000 }, { "epoch": 2.75, "learning_rate": 4.112346271743547e-06, "loss": 1.001, "step": 745500 }, { "epoch": 2.76, "learning_rate": 4.081569844025065e-06, "loss": 0.9993, "step": 746000 }, { "epoch": 2.76, "learning_rate": 4.050793416306583e-06, "loss": 0.9941, "step": 746500 }, { "epoch": 2.76, "learning_rate": 4.020016988588101e-06, "loss": 1.0024, "step": 747000 }, { "epoch": 2.76, "learning_rate": 3.9892405608696195e-06, "loss": 1.0016, "step": 747500 }, { "epoch": 2.76, "learning_rate": 3.9584641331511375e-06, "loss": 1.004, "step": 748000 }, { "epoch": 2.76, "learning_rate": 3.927687705432655e-06, "loss": 1.0006, "step": 748500 }, { "epoch": 2.77, "learning_rate": 3.896911277714173e-06, "loss": 1.0003, "step": 749000 }, { "epoch": 2.77, "learning_rate": 3.866134849995691e-06, "loss": 0.9968, "step": 749500 }, { "epoch": 2.77, "learning_rate": 3.83535842227721e-06, "loss": 0.9979, "step": 750000 }, { "epoch": 2.77, "learning_rate": 3.804581994558728e-06, "loss": 1.0025, "step": 750500 }, { "epoch": 2.77, "learning_rate": 3.773805566840246e-06, "loss": 0.9947, "step": 751000 }, { "epoch": 2.78, "learning_rate": 3.7430291391217643e-06, "loss": 0.9975, "step": 751500 }, { "epoch": 2.78, "learning_rate": 3.7122527114032822e-06, "loss": 1.0006, "step": 752000 }, { "epoch": 2.78, "learning_rate": 3.6814762836848e-06, "loss": 0.9894, "step": 752500 }, { "epoch": 2.78, "learning_rate": 3.6506998559663185e-06, "loss": 1.0005, "step": 753000 }, { "epoch": 2.78, "learning_rate": 3.6199234282478365e-06, "loss": 1.0106, "step": 753500 }, { "epoch": 2.78, "learning_rate": 3.589147000529355e-06, "loss": 1.0023, "step": 754000 }, { "epoch": 2.79, "learning_rate": 3.5583705728108728e-06, "loss": 0.9934, "step": 754500 }, { "epoch": 2.79, "learning_rate": 3.527594145092391e-06, "loss": 0.9958, "step": 755000 }, { "epoch": 2.79, "learning_rate": 3.496817717373909e-06, "loss": 0.9978, "step": 755500 }, { "epoch": 2.79, "learning_rate": 3.466041289655427e-06, "loss": 0.9994, "step": 756000 }, { "epoch": 2.79, "learning_rate": 3.4352648619369454e-06, "loss": 1.0067, "step": 756500 }, { "epoch": 2.8, "learning_rate": 3.4044884342184633e-06, "loss": 0.9952, "step": 757000 }, { "epoch": 2.8, "learning_rate": 3.3737120064999817e-06, "loss": 1.0057, "step": 757500 }, { "epoch": 2.8, "learning_rate": 3.3429355787814996e-06, "loss": 0.9982, "step": 758000 }, { "epoch": 2.8, "learning_rate": 3.312159151063018e-06, "loss": 0.9944, "step": 758500 }, { "epoch": 2.8, "learning_rate": 3.281382723344536e-06, "loss": 0.9935, "step": 759000 }, { "epoch": 2.8, "learning_rate": 3.250606295626054e-06, "loss": 0.9997, "step": 759500 }, { "epoch": 2.81, "learning_rate": 3.219829867907572e-06, "loss": 0.9982, "step": 760000 }, { "epoch": 2.81, "learning_rate": 3.18905344018909e-06, "loss": 0.9938, "step": 760500 }, { "epoch": 2.81, "learning_rate": 3.1582770124706085e-06, "loss": 0.9998, "step": 761000 }, { "epoch": 2.81, "learning_rate": 3.1275005847521264e-06, "loss": 0.9983, "step": 761500 }, { "epoch": 2.81, "learning_rate": 3.0967241570336448e-06, "loss": 1.0039, "step": 762000 }, { "epoch": 2.82, "learning_rate": 3.065947729315163e-06, "loss": 0.9966, "step": 762500 }, { "epoch": 2.82, "learning_rate": 3.035171301596681e-06, "loss": 0.9901, "step": 763000 }, { "epoch": 2.82, "learning_rate": 3.0043948738781994e-06, "loss": 0.9968, "step": 763500 }, { "epoch": 2.82, "learning_rate": 2.9736184461597174e-06, "loss": 0.9932, "step": 764000 }, { "epoch": 2.82, "learning_rate": 2.9428420184412353e-06, "loss": 0.9969, "step": 764500 }, { "epoch": 2.83, "learning_rate": 2.9120655907227537e-06, "loss": 0.9881, "step": 765000 }, { "epoch": 2.83, "learning_rate": 2.8812891630042716e-06, "loss": 0.9991, "step": 765500 }, { "epoch": 2.83, "learning_rate": 2.8505127352857904e-06, "loss": 0.994, "step": 766000 }, { "epoch": 2.83, "learning_rate": 2.8197363075673083e-06, "loss": 0.9993, "step": 766500 }, { "epoch": 2.83, "learning_rate": 2.7889598798488263e-06, "loss": 0.987, "step": 767000 }, { "epoch": 2.83, "learning_rate": 2.7581834521303446e-06, "loss": 0.9931, "step": 767500 }, { "epoch": 2.84, "learning_rate": 2.7274070244118626e-06, "loss": 0.9928, "step": 768000 }, { "epoch": 2.84, "learning_rate": 2.696630596693381e-06, "loss": 0.9971, "step": 768500 }, { "epoch": 2.84, "learning_rate": 2.665854168974899e-06, "loss": 0.9889, "step": 769000 }, { "epoch": 2.84, "learning_rate": 2.6350777412564172e-06, "loss": 0.9953, "step": 769500 }, { "epoch": 2.84, "learning_rate": 2.604301313537935e-06, "loss": 0.9981, "step": 770000 }, { "epoch": 2.85, "learning_rate": 2.573524885819453e-06, "loss": 0.9927, "step": 770500 }, { "epoch": 2.85, "learning_rate": 2.5427484581009715e-06, "loss": 0.9945, "step": 771000 }, { "epoch": 2.85, "learning_rate": 2.5119720303824894e-06, "loss": 1.0024, "step": 771500 }, { "epoch": 2.85, "learning_rate": 2.4811956026640078e-06, "loss": 0.9978, "step": 772000 }, { "epoch": 2.85, "learning_rate": 2.4504191749455257e-06, "loss": 0.991, "step": 772500 }, { "epoch": 2.85, "learning_rate": 2.419642747227044e-06, "loss": 0.9995, "step": 773000 }, { "epoch": 2.86, "learning_rate": 2.388866319508562e-06, "loss": 1.0027, "step": 773500 }, { "epoch": 2.86, "learning_rate": 2.35808989179008e-06, "loss": 0.9949, "step": 774000 }, { "epoch": 2.86, "learning_rate": 2.3273134640715983e-06, "loss": 0.9938, "step": 774500 }, { "epoch": 2.86, "learning_rate": 2.2965370363531166e-06, "loss": 1.0029, "step": 775000 }, { "epoch": 2.86, "learning_rate": 2.265760608634635e-06, "loss": 0.9957, "step": 775500 }, { "epoch": 2.87, "learning_rate": 2.234984180916153e-06, "loss": 0.9978, "step": 776000 }, { "epoch": 2.87, "learning_rate": 2.204207753197671e-06, "loss": 0.9916, "step": 776500 }, { "epoch": 2.87, "learning_rate": 2.1734313254791892e-06, "loss": 0.9881, "step": 777000 }, { "epoch": 2.87, "learning_rate": 2.142654897760707e-06, "loss": 0.9957, "step": 777500 }, { "epoch": 2.87, "learning_rate": 2.1118784700422255e-06, "loss": 0.9952, "step": 778000 }, { "epoch": 2.88, "learning_rate": 2.0811020423237435e-06, "loss": 0.9965, "step": 778500 }, { "epoch": 2.88, "learning_rate": 2.050325614605262e-06, "loss": 0.9938, "step": 779000 }, { "epoch": 2.88, "learning_rate": 2.0195491868867798e-06, "loss": 0.9886, "step": 779500 }, { "epoch": 2.88, "learning_rate": 1.9887727591682977e-06, "loss": 0.9973, "step": 780000 }, { "epoch": 2.88, "learning_rate": 1.957996331449816e-06, "loss": 0.9928, "step": 780500 }, { "epoch": 2.88, "learning_rate": 1.927219903731334e-06, "loss": 0.9938, "step": 781000 }, { "epoch": 2.89, "learning_rate": 1.8964434760128522e-06, "loss": 1.0034, "step": 781500 }, { "epoch": 2.89, "learning_rate": 1.8656670482943703e-06, "loss": 0.9944, "step": 782000 }, { "epoch": 2.89, "learning_rate": 1.8348906205758885e-06, "loss": 0.995, "step": 782500 }, { "epoch": 2.89, "learning_rate": 1.8041141928574066e-06, "loss": 0.9901, "step": 783000 }, { "epoch": 2.89, "learning_rate": 1.773337765138925e-06, "loss": 0.9913, "step": 783500 }, { "epoch": 2.9, "learning_rate": 1.7425613374204431e-06, "loss": 1.0005, "step": 784000 }, { "epoch": 2.9, "learning_rate": 1.7117849097019613e-06, "loss": 1.003, "step": 784500 }, { "epoch": 2.9, "learning_rate": 1.6810084819834794e-06, "loss": 0.9944, "step": 785000 }, { "epoch": 2.9, "learning_rate": 1.6502320542649976e-06, "loss": 1.0032, "step": 785500 }, { "epoch": 2.9, "learning_rate": 1.6194556265465157e-06, "loss": 0.9896, "step": 786000 }, { "epoch": 2.9, "learning_rate": 1.5886791988280339e-06, "loss": 0.9901, "step": 786500 }, { "epoch": 2.91, "learning_rate": 1.5579027711095518e-06, "loss": 0.9902, "step": 787000 }, { "epoch": 2.91, "learning_rate": 1.52712634339107e-06, "loss": 0.9962, "step": 787500 }, { "epoch": 2.91, "learning_rate": 1.496349915672588e-06, "loss": 0.9987, "step": 788000 }, { "epoch": 2.91, "learning_rate": 1.4655734879541062e-06, "loss": 0.9993, "step": 788500 }, { "epoch": 2.91, "learning_rate": 1.4347970602356244e-06, "loss": 0.9912, "step": 789000 }, { "epoch": 2.92, "learning_rate": 1.4040206325171425e-06, "loss": 0.9937, "step": 789500 }, { "epoch": 2.92, "learning_rate": 1.3732442047986607e-06, "loss": 0.9925, "step": 790000 }, { "epoch": 2.92, "learning_rate": 1.3424677770801788e-06, "loss": 0.991, "step": 790500 }, { "epoch": 2.92, "learning_rate": 1.311691349361697e-06, "loss": 0.9939, "step": 791000 }, { "epoch": 2.92, "learning_rate": 1.2809149216432151e-06, "loss": 0.9964, "step": 791500 }, { "epoch": 2.92, "learning_rate": 1.2501384939247333e-06, "loss": 0.9929, "step": 792000 }, { "epoch": 2.93, "learning_rate": 1.2193620662062514e-06, "loss": 0.9886, "step": 792500 }, { "epoch": 2.93, "learning_rate": 1.1885856384877696e-06, "loss": 0.9947, "step": 793000 }, { "epoch": 2.93, "learning_rate": 1.1578092107692875e-06, "loss": 1.0, "step": 793500 }, { "epoch": 2.93, "learning_rate": 1.1270327830508057e-06, "loss": 0.9931, "step": 794000 }, { "epoch": 2.93, "learning_rate": 1.096256355332324e-06, "loss": 0.9967, "step": 794500 }, { "epoch": 2.94, "learning_rate": 1.0654799276138422e-06, "loss": 0.9944, "step": 795000 }, { "epoch": 2.94, "learning_rate": 1.0347034998953603e-06, "loss": 0.9877, "step": 795500 }, { "epoch": 2.94, "learning_rate": 1.0039270721768783e-06, "loss": 0.9924, "step": 796000 }, { "epoch": 2.94, "learning_rate": 9.731506444583964e-07, "loss": 0.9876, "step": 796500 }, { "epoch": 2.94, "learning_rate": 9.423742167399145e-07, "loss": 0.9878, "step": 797000 }, { "epoch": 2.95, "learning_rate": 9.115977890214327e-07, "loss": 0.9909, "step": 797500 }, { "epoch": 2.95, "learning_rate": 8.808213613029508e-07, "loss": 0.9948, "step": 798000 }, { "epoch": 2.95, "learning_rate": 8.50044933584469e-07, "loss": 0.9887, "step": 798500 }, { "epoch": 2.95, "learning_rate": 8.192685058659872e-07, "loss": 0.9965, "step": 799000 }, { "epoch": 2.95, "learning_rate": 7.884920781475054e-07, "loss": 0.9906, "step": 799500 }, { "epoch": 2.95, "learning_rate": 7.577156504290234e-07, "loss": 0.9947, "step": 800000 }, { "epoch": 2.96, "learning_rate": 7.269392227105416e-07, "loss": 0.9913, "step": 800500 }, { "epoch": 2.96, "learning_rate": 6.961627949920597e-07, "loss": 0.9915, "step": 801000 }, { "epoch": 2.96, "learning_rate": 6.653863672735778e-07, "loss": 0.9846, "step": 801500 }, { "epoch": 2.96, "learning_rate": 6.346099395550959e-07, "loss": 1.0002, "step": 802000 }, { "epoch": 2.96, "learning_rate": 6.038335118366142e-07, "loss": 0.99, "step": 802500 }, { "epoch": 2.97, "learning_rate": 5.730570841181322e-07, "loss": 1.0009, "step": 803000 }, { "epoch": 2.97, "learning_rate": 5.422806563996504e-07, "loss": 0.9913, "step": 803500 }, { "epoch": 2.97, "learning_rate": 5.115042286811685e-07, "loss": 0.9849, "step": 804000 }, { "epoch": 2.97, "learning_rate": 4.807278009626867e-07, "loss": 0.9922, "step": 804500 }, { "epoch": 2.97, "learning_rate": 4.499513732442048e-07, "loss": 0.9885, "step": 805000 }, { "epoch": 2.97, "learning_rate": 4.1917494552572297e-07, "loss": 0.9909, "step": 805500 }, { "epoch": 2.98, "learning_rate": 3.883985178072411e-07, "loss": 0.99, "step": 806000 }, { "epoch": 2.98, "learning_rate": 3.576220900887592e-07, "loss": 0.9906, "step": 806500 }, { "epoch": 2.98, "learning_rate": 3.268456623702774e-07, "loss": 0.9865, "step": 807000 }, { "epoch": 2.98, "learning_rate": 2.960692346517955e-07, "loss": 0.9871, "step": 807500 }, { "epoch": 2.98, "learning_rate": 2.652928069333136e-07, "loss": 0.9933, "step": 808000 }, { "epoch": 2.99, "learning_rate": 2.3451637921483178e-07, "loss": 0.9954, "step": 808500 }, { "epoch": 2.99, "learning_rate": 2.037399514963499e-07, "loss": 0.9935, "step": 809000 }, { "epoch": 2.99, "learning_rate": 1.7296352377786808e-07, "loss": 0.9843, "step": 809500 }, { "epoch": 2.99, "learning_rate": 1.421870960593862e-07, "loss": 0.9897, "step": 810000 }, { "epoch": 2.99, "learning_rate": 1.1141066834090434e-07, "loss": 0.9914, "step": 810500 }, { "epoch": 3.0, "learning_rate": 8.063424062242248e-08, "loss": 0.9921, "step": 811000 }, { "epoch": 3.0, "learning_rate": 4.985781290394062e-08, "loss": 0.9981, "step": 811500 }, { "epoch": 3.0, "learning_rate": 1.9081385185458755e-08, "loss": 1.0003, "step": 812000 }, { "epoch": 3.0, "step": 812310, "total_flos": 3.420875271615529e+18, "train_loss": 1.1082924517030088, "train_runtime": 412795.0235, "train_samples_per_second": 31.485, "train_steps_per_second": 1.968 } ], "max_steps": 812310, "num_train_epochs": 3, "total_flos": 3.420875271615529e+18, "trial_name": null, "trial_params": null }