diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,9769 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "global_step": 812310, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.996922357228152e-05, + "loss": 1.5563, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9938447144563037e-05, + "loss": 1.4798, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9907670716844554e-05, + "loss": 1.4636, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 4.987689428912608e-05, + "loss": 1.4411, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9846117861407595e-05, + "loss": 1.4334, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 4.981534143368911e-05, + "loss": 1.4234, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 4.978456500597063e-05, + "loss": 1.4075, + "step": 3500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9753788578252146e-05, + "loss": 1.4069, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 4.972301215053366e-05, + "loss": 1.4132, + "step": 4500 + }, + { + "epoch": 0.02, + "learning_rate": 4.969223572281519e-05, + "loss": 1.4005, + "step": 5000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9661459295096704e-05, + "loss": 1.3938, + "step": 5500 + }, + { + "epoch": 0.02, + "learning_rate": 4.963068286737822e-05, + "loss": 1.3949, + "step": 6000 + }, + { + "epoch": 0.02, + "learning_rate": 4.959990643965974e-05, + "loss": 1.3849, + "step": 6500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9569130011941255e-05, + "loss": 1.3828, + "step": 7000 + }, + { + "epoch": 0.03, + "learning_rate": 4.953835358422277e-05, + "loss": 1.3794, + "step": 7500 + }, + { + "epoch": 0.03, + "learning_rate": 4.950757715650429e-05, + "loss": 1.3727, + "step": 8000 + }, + { + "epoch": 0.03, + "learning_rate": 4.947680072878581e-05, + "loss": 1.3797, + "step": 8500 + }, + { + "epoch": 0.03, + "learning_rate": 4.944602430106733e-05, + "loss": 1.364, + "step": 9000 + }, + { + "epoch": 0.04, + "learning_rate": 4.941524787334885e-05, + "loss": 1.3602, + "step": 9500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9384471445630365e-05, + "loss": 1.3704, + "step": 10000 + }, + { + "epoch": 0.04, + "learning_rate": 4.935369501791188e-05, + "loss": 1.3619, + "step": 10500 + }, + { + "epoch": 0.04, + "learning_rate": 4.93229185901934e-05, + "loss": 1.369, + "step": 11000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9292142162474916e-05, + "loss": 1.3687, + "step": 11500 + }, + { + "epoch": 0.04, + "learning_rate": 4.926136573475644e-05, + "loss": 1.3481, + "step": 12000 + }, + { + "epoch": 0.05, + "learning_rate": 4.923058930703796e-05, + "loss": 1.3553, + "step": 12500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9199812879319474e-05, + "loss": 1.3532, + "step": 13000 + }, + { + "epoch": 0.05, + "learning_rate": 4.916903645160099e-05, + "loss": 1.3519, + "step": 13500 + }, + { + "epoch": 0.05, + "learning_rate": 4.913826002388251e-05, + "loss": 1.3524, + "step": 14000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9107483596164025e-05, + "loss": 1.3427, + "step": 14500 + }, + { + "epoch": 0.06, + "learning_rate": 4.907670716844555e-05, + "loss": 1.3436, + "step": 15000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9045930740727066e-05, + "loss": 1.3342, + "step": 15500 + }, + { + "epoch": 0.06, + "learning_rate": 4.901515431300858e-05, + "loss": 1.332, + "step": 16000 + }, + { + "epoch": 0.06, + "learning_rate": 4.89843778852901e-05, + "loss": 1.3384, + "step": 16500 + }, + { + "epoch": 0.06, + "learning_rate": 4.895360145757162e-05, + "loss": 1.3252, + "step": 17000 + }, + { + "epoch": 0.06, + "learning_rate": 4.8922825029853134e-05, + "loss": 1.3405, + "step": 17500 + }, + { + "epoch": 0.07, + "learning_rate": 4.889204860213465e-05, + "loss": 1.3363, + "step": 18000 + }, + { + "epoch": 0.07, + "learning_rate": 4.8861272174416175e-05, + "loss": 1.3297, + "step": 18500 + }, + { + "epoch": 0.07, + "learning_rate": 4.883049574669769e-05, + "loss": 1.3268, + "step": 19000 + }, + { + "epoch": 0.07, + "learning_rate": 4.879971931897921e-05, + "loss": 1.3136, + "step": 19500 + }, + { + "epoch": 0.07, + "learning_rate": 4.876894289126073e-05, + "loss": 1.3182, + "step": 20000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8738166463542244e-05, + "loss": 1.3226, + "step": 20500 + }, + { + "epoch": 0.08, + "learning_rate": 4.870739003582376e-05, + "loss": 1.3186, + "step": 21000 + }, + { + "epoch": 0.08, + "learning_rate": 4.8676613608105285e-05, + "loss": 1.3234, + "step": 21500 + }, + { + "epoch": 0.08, + "learning_rate": 4.86458371803868e-05, + "loss": 1.3214, + "step": 22000 + }, + { + "epoch": 0.08, + "learning_rate": 4.861506075266832e-05, + "loss": 1.3184, + "step": 22500 + }, + { + "epoch": 0.08, + "learning_rate": 4.8584284324949836e-05, + "loss": 1.3074, + "step": 23000 + }, + { + "epoch": 0.09, + "learning_rate": 4.855350789723135e-05, + "loss": 1.3137, + "step": 23500 + }, + { + "epoch": 0.09, + "learning_rate": 4.852273146951287e-05, + "loss": 1.3114, + "step": 24000 + }, + { + "epoch": 0.09, + "learning_rate": 4.849195504179439e-05, + "loss": 1.3174, + "step": 24500 + }, + { + "epoch": 0.09, + "learning_rate": 4.846117861407591e-05, + "loss": 1.3094, + "step": 25000 + }, + { + "epoch": 0.09, + "learning_rate": 4.843040218635743e-05, + "loss": 1.3133, + "step": 25500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8399625758638945e-05, + "loss": 1.3095, + "step": 26000 + }, + { + "epoch": 0.1, + "learning_rate": 4.836884933092046e-05, + "loss": 1.3052, + "step": 26500 + }, + { + "epoch": 0.1, + "learning_rate": 4.833807290320198e-05, + "loss": 1.3048, + "step": 27000 + }, + { + "epoch": 0.1, + "learning_rate": 4.8307296475483497e-05, + "loss": 1.3093, + "step": 27500 + }, + { + "epoch": 0.1, + "learning_rate": 4.8276520047765014e-05, + "loss": 1.307, + "step": 28000 + }, + { + "epoch": 0.11, + "learning_rate": 4.824574362004654e-05, + "loss": 1.312, + "step": 28500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8214967192328055e-05, + "loss": 1.3006, + "step": 29000 + }, + { + "epoch": 0.11, + "learning_rate": 4.818419076460957e-05, + "loss": 1.3069, + "step": 29500 + }, + { + "epoch": 0.11, + "learning_rate": 4.815341433689109e-05, + "loss": 1.2969, + "step": 30000 + }, + { + "epoch": 0.11, + "learning_rate": 4.8122637909172606e-05, + "loss": 1.3042, + "step": 30500 + }, + { + "epoch": 0.11, + "learning_rate": 4.809186148145412e-05, + "loss": 1.2999, + "step": 31000 + }, + { + "epoch": 0.12, + "learning_rate": 4.806108505373565e-05, + "loss": 1.3, + "step": 31500 + }, + { + "epoch": 0.12, + "learning_rate": 4.8030308626017164e-05, + "loss": 1.2973, + "step": 32000 + }, + { + "epoch": 0.12, + "learning_rate": 4.799953219829868e-05, + "loss": 1.2949, + "step": 32500 + }, + { + "epoch": 0.12, + "learning_rate": 4.79687557705802e-05, + "loss": 1.293, + "step": 33000 + }, + { + "epoch": 0.12, + "learning_rate": 4.7937979342861715e-05, + "loss": 1.2834, + "step": 33500 + }, + { + "epoch": 0.13, + "learning_rate": 4.790720291514323e-05, + "loss": 1.3032, + "step": 34000 + }, + { + "epoch": 0.13, + "learning_rate": 4.787642648742475e-05, + "loss": 1.2926, + "step": 34500 + }, + { + "epoch": 0.13, + "learning_rate": 4.784565005970627e-05, + "loss": 1.291, + "step": 35000 + }, + { + "epoch": 0.13, + "learning_rate": 4.781487363198779e-05, + "loss": 1.2958, + "step": 35500 + }, + { + "epoch": 0.13, + "learning_rate": 4.778409720426931e-05, + "loss": 1.2883, + "step": 36000 + }, + { + "epoch": 0.13, + "learning_rate": 4.7753320776550824e-05, + "loss": 1.2998, + "step": 36500 + }, + { + "epoch": 0.14, + "learning_rate": 4.772254434883234e-05, + "loss": 1.2781, + "step": 37000 + }, + { + "epoch": 0.14, + "learning_rate": 4.769176792111386e-05, + "loss": 1.2933, + "step": 37500 + }, + { + "epoch": 0.14, + "learning_rate": 4.766099149339538e-05, + "loss": 1.2969, + "step": 38000 + }, + { + "epoch": 0.14, + "learning_rate": 4.76302150656769e-05, + "loss": 1.279, + "step": 38500 + }, + { + "epoch": 0.14, + "learning_rate": 4.759943863795842e-05, + "loss": 1.2929, + "step": 39000 + }, + { + "epoch": 0.15, + "learning_rate": 4.7568662210239934e-05, + "loss": 1.2873, + "step": 39500 + }, + { + "epoch": 0.15, + "learning_rate": 4.753788578252145e-05, + "loss": 1.2923, + "step": 40000 + }, + { + "epoch": 0.15, + "learning_rate": 4.750710935480297e-05, + "loss": 1.2974, + "step": 40500 + }, + { + "epoch": 0.15, + "learning_rate": 4.7476332927084485e-05, + "loss": 1.2789, + "step": 41000 + }, + { + "epoch": 0.15, + "learning_rate": 4.744555649936601e-05, + "loss": 1.2783, + "step": 41500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7414780071647526e-05, + "loss": 1.2781, + "step": 42000 + }, + { + "epoch": 0.16, + "learning_rate": 4.738400364392904e-05, + "loss": 1.2925, + "step": 42500 + }, + { + "epoch": 0.16, + "learning_rate": 4.735322721621056e-05, + "loss": 1.2818, + "step": 43000 + }, + { + "epoch": 0.16, + "learning_rate": 4.732245078849208e-05, + "loss": 1.2788, + "step": 43500 + }, + { + "epoch": 0.16, + "learning_rate": 4.7291674360773594e-05, + "loss": 1.2732, + "step": 44000 + }, + { + "epoch": 0.16, + "learning_rate": 4.726089793305512e-05, + "loss": 1.278, + "step": 44500 + }, + { + "epoch": 0.17, + "learning_rate": 4.7230121505336635e-05, + "loss": 1.2772, + "step": 45000 + }, + { + "epoch": 0.17, + "learning_rate": 4.719934507761815e-05, + "loss": 1.2752, + "step": 45500 + }, + { + "epoch": 0.17, + "learning_rate": 4.716856864989967e-05, + "loss": 1.2623, + "step": 46000 + }, + { + "epoch": 0.17, + "learning_rate": 4.713779222218119e-05, + "loss": 1.2704, + "step": 46500 + }, + { + "epoch": 0.17, + "learning_rate": 4.7107015794462704e-05, + "loss": 1.2778, + "step": 47000 + }, + { + "epoch": 0.18, + "learning_rate": 4.707623936674422e-05, + "loss": 1.2775, + "step": 47500 + }, + { + "epoch": 0.18, + "learning_rate": 4.7045462939025745e-05, + "loss": 1.2728, + "step": 48000 + }, + { + "epoch": 0.18, + "learning_rate": 4.701468651130726e-05, + "loss": 1.2722, + "step": 48500 + }, + { + "epoch": 0.18, + "learning_rate": 4.698391008358878e-05, + "loss": 1.2701, + "step": 49000 + }, + { + "epoch": 0.18, + "learning_rate": 4.6953133655870296e-05, + "loss": 1.2808, + "step": 49500 + }, + { + "epoch": 0.18, + "learning_rate": 4.692235722815181e-05, + "loss": 1.2653, + "step": 50000 + }, + { + "epoch": 0.19, + "learning_rate": 4.689158080043333e-05, + "loss": 1.2769, + "step": 50500 + }, + { + "epoch": 0.19, + "learning_rate": 4.686080437271485e-05, + "loss": 1.2752, + "step": 51000 + }, + { + "epoch": 0.19, + "learning_rate": 4.683002794499637e-05, + "loss": 1.2725, + "step": 51500 + }, + { + "epoch": 0.19, + "learning_rate": 4.679925151727789e-05, + "loss": 1.26, + "step": 52000 + }, + { + "epoch": 0.19, + "learning_rate": 4.6768475089559405e-05, + "loss": 1.2682, + "step": 52500 + }, + { + "epoch": 0.2, + "learning_rate": 4.673769866184092e-05, + "loss": 1.2759, + "step": 53000 + }, + { + "epoch": 0.2, + "learning_rate": 4.670692223412244e-05, + "loss": 1.2719, + "step": 53500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6676145806403957e-05, + "loss": 1.268, + "step": 54000 + }, + { + "epoch": 0.2, + "learning_rate": 4.664536937868548e-05, + "loss": 1.2631, + "step": 54500 + }, + { + "epoch": 0.2, + "learning_rate": 4.6614592950967e-05, + "loss": 1.2692, + "step": 55000 + }, + { + "epoch": 0.2, + "learning_rate": 4.6583816523248515e-05, + "loss": 1.269, + "step": 55500 + }, + { + "epoch": 0.21, + "learning_rate": 4.655304009553003e-05, + "loss": 1.2673, + "step": 56000 + }, + { + "epoch": 0.21, + "learning_rate": 4.652226366781155e-05, + "loss": 1.2633, + "step": 56500 + }, + { + "epoch": 0.21, + "learning_rate": 4.6491487240093066e-05, + "loss": 1.2589, + "step": 57000 + }, + { + "epoch": 0.21, + "learning_rate": 4.646071081237458e-05, + "loss": 1.2639, + "step": 57500 + }, + { + "epoch": 0.21, + "learning_rate": 4.642993438465611e-05, + "loss": 1.2571, + "step": 58000 + }, + { + "epoch": 0.22, + "learning_rate": 4.6399157956937624e-05, + "loss": 1.2647, + "step": 58500 + }, + { + "epoch": 0.22, + "learning_rate": 4.636838152921914e-05, + "loss": 1.2582, + "step": 59000 + }, + { + "epoch": 0.22, + "learning_rate": 4.633760510150066e-05, + "loss": 1.259, + "step": 59500 + }, + { + "epoch": 0.22, + "learning_rate": 4.6306828673782175e-05, + "loss": 1.2657, + "step": 60000 + }, + { + "epoch": 0.22, + "learning_rate": 4.627605224606369e-05, + "loss": 1.2602, + "step": 60500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6245275818345216e-05, + "loss": 1.2594, + "step": 61000 + }, + { + "epoch": 0.23, + "learning_rate": 4.621449939062673e-05, + "loss": 1.2608, + "step": 61500 + }, + { + "epoch": 0.23, + "learning_rate": 4.618372296290825e-05, + "loss": 1.259, + "step": 62000 + }, + { + "epoch": 0.23, + "learning_rate": 4.615294653518977e-05, + "loss": 1.2659, + "step": 62500 + }, + { + "epoch": 0.23, + "learning_rate": 4.6122170107471284e-05, + "loss": 1.2597, + "step": 63000 + }, + { + "epoch": 0.23, + "learning_rate": 4.60913936797528e-05, + "loss": 1.2524, + "step": 63500 + }, + { + "epoch": 0.24, + "learning_rate": 4.606061725203432e-05, + "loss": 1.2521, + "step": 64000 + }, + { + "epoch": 0.24, + "learning_rate": 4.602984082431584e-05, + "loss": 1.2588, + "step": 64500 + }, + { + "epoch": 0.24, + "learning_rate": 4.599906439659736e-05, + "loss": 1.2554, + "step": 65000 + }, + { + "epoch": 0.24, + "learning_rate": 4.596828796887888e-05, + "loss": 1.25, + "step": 65500 + }, + { + "epoch": 0.24, + "learning_rate": 4.5937511541160394e-05, + "loss": 1.2467, + "step": 66000 + }, + { + "epoch": 0.25, + "learning_rate": 4.590673511344191e-05, + "loss": 1.2472, + "step": 66500 + }, + { + "epoch": 0.25, + "learning_rate": 4.587595868572343e-05, + "loss": 1.2652, + "step": 67000 + }, + { + "epoch": 0.25, + "learning_rate": 4.5845182258004945e-05, + "loss": 1.2506, + "step": 67500 + }, + { + "epoch": 0.25, + "learning_rate": 4.581440583028647e-05, + "loss": 1.247, + "step": 68000 + }, + { + "epoch": 0.25, + "learning_rate": 4.5783629402567986e-05, + "loss": 1.2519, + "step": 68500 + }, + { + "epoch": 0.25, + "learning_rate": 4.57528529748495e-05, + "loss": 1.2489, + "step": 69000 + }, + { + "epoch": 0.26, + "learning_rate": 4.572207654713102e-05, + "loss": 1.2508, + "step": 69500 + }, + { + "epoch": 0.26, + "learning_rate": 4.569130011941254e-05, + "loss": 1.2556, + "step": 70000 + }, + { + "epoch": 0.26, + "learning_rate": 4.566052369169406e-05, + "loss": 1.2555, + "step": 70500 + }, + { + "epoch": 0.26, + "learning_rate": 4.562974726397558e-05, + "loss": 1.2407, + "step": 71000 + }, + { + "epoch": 0.26, + "learning_rate": 4.55989708362571e-05, + "loss": 1.2478, + "step": 71500 + }, + { + "epoch": 0.27, + "learning_rate": 4.556819440853862e-05, + "loss": 1.2537, + "step": 72000 + }, + { + "epoch": 0.27, + "learning_rate": 4.5537417980820136e-05, + "loss": 1.2487, + "step": 72500 + }, + { + "epoch": 0.27, + "learning_rate": 4.550664155310165e-05, + "loss": 1.2418, + "step": 73000 + }, + { + "epoch": 0.27, + "learning_rate": 4.547586512538317e-05, + "loss": 1.2568, + "step": 73500 + }, + { + "epoch": 0.27, + "learning_rate": 4.544508869766469e-05, + "loss": 1.2428, + "step": 74000 + }, + { + "epoch": 0.28, + "learning_rate": 4.5414312269946205e-05, + "loss": 1.2588, + "step": 74500 + }, + { + "epoch": 0.28, + "learning_rate": 4.538353584222773e-05, + "loss": 1.2455, + "step": 75000 + }, + { + "epoch": 0.28, + "learning_rate": 4.5352759414509246e-05, + "loss": 1.2459, + "step": 75500 + }, + { + "epoch": 0.28, + "learning_rate": 4.532198298679076e-05, + "loss": 1.2502, + "step": 76000 + }, + { + "epoch": 0.28, + "learning_rate": 4.529120655907228e-05, + "loss": 1.2396, + "step": 76500 + }, + { + "epoch": 0.28, + "learning_rate": 4.52604301313538e-05, + "loss": 1.2437, + "step": 77000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5229653703635314e-05, + "loss": 1.2482, + "step": 77500 + }, + { + "epoch": 0.29, + "learning_rate": 4.519887727591683e-05, + "loss": 1.2368, + "step": 78000 + }, + { + "epoch": 0.29, + "learning_rate": 4.5168100848198355e-05, + "loss": 1.2435, + "step": 78500 + }, + { + "epoch": 0.29, + "learning_rate": 4.513732442047987e-05, + "loss": 1.2463, + "step": 79000 + }, + { + "epoch": 0.29, + "learning_rate": 4.510654799276139e-05, + "loss": 1.239, + "step": 79500 + }, + { + "epoch": 0.3, + "learning_rate": 4.5075771565042906e-05, + "loss": 1.2439, + "step": 80000 + }, + { + "epoch": 0.3, + "learning_rate": 4.504499513732442e-05, + "loss": 1.2313, + "step": 80500 + }, + { + "epoch": 0.3, + "learning_rate": 4.501421870960594e-05, + "loss": 1.2419, + "step": 81000 + }, + { + "epoch": 0.3, + "learning_rate": 4.4983442281887464e-05, + "loss": 1.2356, + "step": 81500 + }, + { + "epoch": 0.3, + "learning_rate": 4.495266585416898e-05, + "loss": 1.2308, + "step": 82000 + }, + { + "epoch": 0.3, + "learning_rate": 4.49218894264505e-05, + "loss": 1.2382, + "step": 82500 + }, + { + "epoch": 0.31, + "learning_rate": 4.4891112998732015e-05, + "loss": 1.2555, + "step": 83000 + }, + { + "epoch": 0.31, + "learning_rate": 4.486033657101353e-05, + "loss": 1.245, + "step": 83500 + }, + { + "epoch": 0.31, + "learning_rate": 4.482956014329505e-05, + "loss": 1.2384, + "step": 84000 + }, + { + "epoch": 0.31, + "learning_rate": 4.479878371557657e-05, + "loss": 1.2379, + "step": 84500 + }, + { + "epoch": 0.31, + "learning_rate": 4.476800728785809e-05, + "loss": 1.241, + "step": 85000 + }, + { + "epoch": 0.32, + "learning_rate": 4.473723086013961e-05, + "loss": 1.2262, + "step": 85500 + }, + { + "epoch": 0.32, + "learning_rate": 4.4706454432421125e-05, + "loss": 1.2283, + "step": 86000 + }, + { + "epoch": 0.32, + "learning_rate": 4.467567800470264e-05, + "loss": 1.2331, + "step": 86500 + }, + { + "epoch": 0.32, + "learning_rate": 4.464490157698416e-05, + "loss": 1.2439, + "step": 87000 + }, + { + "epoch": 0.32, + "learning_rate": 4.4614125149265676e-05, + "loss": 1.2369, + "step": 87500 + }, + { + "epoch": 0.32, + "learning_rate": 4.45833487215472e-05, + "loss": 1.2335, + "step": 88000 + }, + { + "epoch": 0.33, + "learning_rate": 4.455257229382872e-05, + "loss": 1.236, + "step": 88500 + }, + { + "epoch": 0.33, + "learning_rate": 4.4521795866110234e-05, + "loss": 1.2333, + "step": 89000 + }, + { + "epoch": 0.33, + "learning_rate": 4.449101943839175e-05, + "loss": 1.2288, + "step": 89500 + }, + { + "epoch": 0.33, + "learning_rate": 4.446024301067327e-05, + "loss": 1.2315, + "step": 90000 + }, + { + "epoch": 0.33, + "learning_rate": 4.4429466582954785e-05, + "loss": 1.2349, + "step": 90500 + }, + { + "epoch": 0.34, + "learning_rate": 4.43986901552363e-05, + "loss": 1.2227, + "step": 91000 + }, + { + "epoch": 0.34, + "learning_rate": 4.4367913727517826e-05, + "loss": 1.2261, + "step": 91500 + }, + { + "epoch": 0.34, + "learning_rate": 4.4337137299799343e-05, + "loss": 1.2303, + "step": 92000 + }, + { + "epoch": 0.34, + "learning_rate": 4.430636087208086e-05, + "loss": 1.2235, + "step": 92500 + }, + { + "epoch": 0.34, + "learning_rate": 4.427558444436238e-05, + "loss": 1.2336, + "step": 93000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4244808016643895e-05, + "loss": 1.2345, + "step": 93500 + }, + { + "epoch": 0.35, + "learning_rate": 4.421403158892541e-05, + "loss": 1.2257, + "step": 94000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4183255161206936e-05, + "loss": 1.2212, + "step": 94500 + }, + { + "epoch": 0.35, + "learning_rate": 4.415247873348845e-05, + "loss": 1.2221, + "step": 95000 + }, + { + "epoch": 0.35, + "learning_rate": 4.412170230576997e-05, + "loss": 1.2238, + "step": 95500 + }, + { + "epoch": 0.35, + "learning_rate": 4.409092587805149e-05, + "loss": 1.2256, + "step": 96000 + }, + { + "epoch": 0.36, + "learning_rate": 4.4060149450333004e-05, + "loss": 1.2271, + "step": 96500 + }, + { + "epoch": 0.36, + "learning_rate": 4.402937302261452e-05, + "loss": 1.2193, + "step": 97000 + }, + { + "epoch": 0.36, + "learning_rate": 4.399859659489604e-05, + "loss": 1.223, + "step": 97500 + }, + { + "epoch": 0.36, + "learning_rate": 4.396782016717756e-05, + "loss": 1.2272, + "step": 98000 + }, + { + "epoch": 0.36, + "learning_rate": 4.393704373945908e-05, + "loss": 1.2184, + "step": 98500 + }, + { + "epoch": 0.37, + "learning_rate": 4.3906267311740596e-05, + "loss": 1.2223, + "step": 99000 + }, + { + "epoch": 0.37, + "learning_rate": 4.387549088402211e-05, + "loss": 1.2296, + "step": 99500 + }, + { + "epoch": 0.37, + "learning_rate": 4.384471445630363e-05, + "loss": 1.2229, + "step": 100000 + }, + { + "epoch": 0.37, + "learning_rate": 4.381393802858515e-05, + "loss": 1.2206, + "step": 100500 + }, + { + "epoch": 0.37, + "learning_rate": 4.3783161600866665e-05, + "loss": 1.2293, + "step": 101000 + }, + { + "epoch": 0.37, + "learning_rate": 4.375238517314819e-05, + "loss": 1.2245, + "step": 101500 + }, + { + "epoch": 0.38, + "learning_rate": 4.3721608745429706e-05, + "loss": 1.2254, + "step": 102000 + }, + { + "epoch": 0.38, + "learning_rate": 4.369083231771122e-05, + "loss": 1.2136, + "step": 102500 + }, + { + "epoch": 0.38, + "learning_rate": 4.366005588999274e-05, + "loss": 1.2289, + "step": 103000 + }, + { + "epoch": 0.38, + "learning_rate": 4.362927946227426e-05, + "loss": 1.2234, + "step": 103500 + }, + { + "epoch": 0.38, + "learning_rate": 4.3598503034555774e-05, + "loss": 1.2157, + "step": 104000 + }, + { + "epoch": 0.39, + "learning_rate": 4.35677266068373e-05, + "loss": 1.2285, + "step": 104500 + }, + { + "epoch": 0.39, + "learning_rate": 4.3536950179118815e-05, + "loss": 1.2198, + "step": 105000 + }, + { + "epoch": 0.39, + "learning_rate": 4.350617375140033e-05, + "loss": 1.2256, + "step": 105500 + }, + { + "epoch": 0.39, + "learning_rate": 4.347539732368185e-05, + "loss": 1.2267, + "step": 106000 + }, + { + "epoch": 0.39, + "learning_rate": 4.3444620895963366e-05, + "loss": 1.2103, + "step": 106500 + }, + { + "epoch": 0.4, + "learning_rate": 4.341384446824488e-05, + "loss": 1.2176, + "step": 107000 + }, + { + "epoch": 0.4, + "learning_rate": 4.33830680405264e-05, + "loss": 1.2279, + "step": 107500 + }, + { + "epoch": 0.4, + "learning_rate": 4.3352291612807924e-05, + "loss": 1.2208, + "step": 108000 + }, + { + "epoch": 0.4, + "learning_rate": 4.332151518508944e-05, + "loss": 1.221, + "step": 108500 + }, + { + "epoch": 0.4, + "learning_rate": 4.329073875737096e-05, + "loss": 1.2268, + "step": 109000 + }, + { + "epoch": 0.4, + "learning_rate": 4.3259962329652475e-05, + "loss": 1.2279, + "step": 109500 + }, + { + "epoch": 0.41, + "learning_rate": 4.322918590193399e-05, + "loss": 1.2205, + "step": 110000 + }, + { + "epoch": 0.41, + "learning_rate": 4.319840947421551e-05, + "loss": 1.2212, + "step": 110500 + }, + { + "epoch": 0.41, + "learning_rate": 4.3167633046497034e-05, + "loss": 1.2086, + "step": 111000 + }, + { + "epoch": 0.41, + "learning_rate": 4.313685661877855e-05, + "loss": 1.2144, + "step": 111500 + }, + { + "epoch": 0.41, + "learning_rate": 4.310608019106007e-05, + "loss": 1.2346, + "step": 112000 + }, + { + "epoch": 0.42, + "learning_rate": 4.3075303763341585e-05, + "loss": 1.2149, + "step": 112500 + }, + { + "epoch": 0.42, + "learning_rate": 4.30445273356231e-05, + "loss": 1.2156, + "step": 113000 + }, + { + "epoch": 0.42, + "learning_rate": 4.301375090790462e-05, + "loss": 1.2136, + "step": 113500 + }, + { + "epoch": 0.42, + "learning_rate": 4.2982974480186136e-05, + "loss": 1.2101, + "step": 114000 + }, + { + "epoch": 0.42, + "learning_rate": 4.295219805246766e-05, + "loss": 1.2077, + "step": 114500 + }, + { + "epoch": 0.42, + "learning_rate": 4.292142162474918e-05, + "loss": 1.2137, + "step": 115000 + }, + { + "epoch": 0.43, + "learning_rate": 4.2890645197030694e-05, + "loss": 1.2186, + "step": 115500 + }, + { + "epoch": 0.43, + "learning_rate": 4.285986876931221e-05, + "loss": 1.2302, + "step": 116000 + }, + { + "epoch": 0.43, + "learning_rate": 4.282909234159373e-05, + "loss": 1.2185, + "step": 116500 + }, + { + "epoch": 0.43, + "learning_rate": 4.2798315913875245e-05, + "loss": 1.21, + "step": 117000 + }, + { + "epoch": 0.43, + "learning_rate": 4.276753948615676e-05, + "loss": 1.2065, + "step": 117500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2736763058438286e-05, + "loss": 1.2174, + "step": 118000 + }, + { + "epoch": 0.44, + "learning_rate": 4.2705986630719803e-05, + "loss": 1.2078, + "step": 118500 + }, + { + "epoch": 0.44, + "learning_rate": 4.267521020300132e-05, + "loss": 1.2036, + "step": 119000 + }, + { + "epoch": 0.44, + "learning_rate": 4.264443377528284e-05, + "loss": 1.2098, + "step": 119500 + }, + { + "epoch": 0.44, + "learning_rate": 4.2613657347564355e-05, + "loss": 1.209, + "step": 120000 + }, + { + "epoch": 0.45, + "learning_rate": 4.258288091984587e-05, + "loss": 1.2026, + "step": 120500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2552104492127396e-05, + "loss": 1.2192, + "step": 121000 + }, + { + "epoch": 0.45, + "learning_rate": 4.252132806440891e-05, + "loss": 1.219, + "step": 121500 + }, + { + "epoch": 0.45, + "learning_rate": 4.249055163669043e-05, + "loss": 1.2026, + "step": 122000 + }, + { + "epoch": 0.45, + "learning_rate": 4.245977520897195e-05, + "loss": 1.2077, + "step": 122500 + }, + { + "epoch": 0.45, + "learning_rate": 4.2428998781253464e-05, + "loss": 1.2012, + "step": 123000 + }, + { + "epoch": 0.46, + "learning_rate": 4.239822235353498e-05, + "loss": 1.213, + "step": 123500 + }, + { + "epoch": 0.46, + "learning_rate": 4.23674459258165e-05, + "loss": 1.2107, + "step": 124000 + }, + { + "epoch": 0.46, + "learning_rate": 4.233666949809802e-05, + "loss": 1.2144, + "step": 124500 + }, + { + "epoch": 0.46, + "learning_rate": 4.230589307037954e-05, + "loss": 1.2044, + "step": 125000 + }, + { + "epoch": 0.46, + "learning_rate": 4.2275116642661056e-05, + "loss": 1.2031, + "step": 125500 + }, + { + "epoch": 0.47, + "learning_rate": 4.224434021494257e-05, + "loss": 1.2116, + "step": 126000 + }, + { + "epoch": 0.47, + "learning_rate": 4.221356378722409e-05, + "loss": 1.2063, + "step": 126500 + }, + { + "epoch": 0.47, + "learning_rate": 4.218278735950561e-05, + "loss": 1.211, + "step": 127000 + }, + { + "epoch": 0.47, + "learning_rate": 4.215201093178713e-05, + "loss": 1.2049, + "step": 127500 + }, + { + "epoch": 0.47, + "learning_rate": 4.212123450406865e-05, + "loss": 1.2059, + "step": 128000 + }, + { + "epoch": 0.47, + "learning_rate": 4.2090458076350166e-05, + "loss": 1.2088, + "step": 128500 + }, + { + "epoch": 0.48, + "learning_rate": 4.205968164863168e-05, + "loss": 1.2126, + "step": 129000 + }, + { + "epoch": 0.48, + "learning_rate": 4.20289052209132e-05, + "loss": 1.1968, + "step": 129500 + }, + { + "epoch": 0.48, + "learning_rate": 4.199812879319472e-05, + "loss": 1.1977, + "step": 130000 + }, + { + "epoch": 0.48, + "learning_rate": 4.1967352365476234e-05, + "loss": 1.2032, + "step": 130500 + }, + { + "epoch": 0.48, + "learning_rate": 4.193657593775776e-05, + "loss": 1.2078, + "step": 131000 + }, + { + "epoch": 0.49, + "learning_rate": 4.1905799510039275e-05, + "loss": 1.1938, + "step": 131500 + }, + { + "epoch": 0.49, + "learning_rate": 4.187502308232079e-05, + "loss": 1.2054, + "step": 132000 + }, + { + "epoch": 0.49, + "learning_rate": 4.184424665460231e-05, + "loss": 1.208, + "step": 132500 + }, + { + "epoch": 0.49, + "learning_rate": 4.1813470226883826e-05, + "loss": 1.1988, + "step": 133000 + }, + { + "epoch": 0.49, + "learning_rate": 4.178269379916534e-05, + "loss": 1.2053, + "step": 133500 + }, + { + "epoch": 0.49, + "learning_rate": 4.175191737144686e-05, + "loss": 1.1997, + "step": 134000 + }, + { + "epoch": 0.5, + "learning_rate": 4.1721140943728384e-05, + "loss": 1.2018, + "step": 134500 + }, + { + "epoch": 0.5, + "learning_rate": 4.16903645160099e-05, + "loss": 1.2037, + "step": 135000 + }, + { + "epoch": 0.5, + "learning_rate": 4.165958808829142e-05, + "loss": 1.2033, + "step": 135500 + }, + { + "epoch": 0.5, + "learning_rate": 4.1628811660572935e-05, + "loss": 1.1989, + "step": 136000 + }, + { + "epoch": 0.5, + "learning_rate": 4.159803523285445e-05, + "loss": 1.2092, + "step": 136500 + }, + { + "epoch": 0.51, + "learning_rate": 4.156725880513597e-05, + "loss": 1.2002, + "step": 137000 + }, + { + "epoch": 0.51, + "learning_rate": 4.1536482377417493e-05, + "loss": 1.2061, + "step": 137500 + }, + { + "epoch": 0.51, + "learning_rate": 4.150570594969901e-05, + "loss": 1.2063, + "step": 138000 + }, + { + "epoch": 0.51, + "learning_rate": 4.147492952198053e-05, + "loss": 1.2015, + "step": 138500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1444153094262045e-05, + "loss": 1.1987, + "step": 139000 + }, + { + "epoch": 0.52, + "learning_rate": 4.141337666654356e-05, + "loss": 1.1982, + "step": 139500 + }, + { + "epoch": 0.52, + "learning_rate": 4.138260023882508e-05, + "loss": 1.2023, + "step": 140000 + }, + { + "epoch": 0.52, + "learning_rate": 4.1351823811106596e-05, + "loss": 1.1953, + "step": 140500 + }, + { + "epoch": 0.52, + "learning_rate": 4.132104738338812e-05, + "loss": 1.1943, + "step": 141000 + }, + { + "epoch": 0.52, + "learning_rate": 4.129027095566964e-05, + "loss": 1.2085, + "step": 141500 + }, + { + "epoch": 0.52, + "learning_rate": 4.1259494527951154e-05, + "loss": 1.1943, + "step": 142000 + }, + { + "epoch": 0.53, + "learning_rate": 4.122871810023267e-05, + "loss": 1.1994, + "step": 142500 + }, + { + "epoch": 0.53, + "learning_rate": 4.119794167251419e-05, + "loss": 1.2037, + "step": 143000 + }, + { + "epoch": 0.53, + "learning_rate": 4.1167165244795705e-05, + "loss": 1.1968, + "step": 143500 + }, + { + "epoch": 0.53, + "learning_rate": 4.113638881707723e-05, + "loss": 1.2031, + "step": 144000 + }, + { + "epoch": 0.53, + "learning_rate": 4.1105612389358746e-05, + "loss": 1.2052, + "step": 144500 + }, + { + "epoch": 0.54, + "learning_rate": 4.107483596164026e-05, + "loss": 1.2006, + "step": 145000 + }, + { + "epoch": 0.54, + "learning_rate": 4.104405953392178e-05, + "loss": 1.1961, + "step": 145500 + }, + { + "epoch": 0.54, + "learning_rate": 4.10132831062033e-05, + "loss": 1.1909, + "step": 146000 + }, + { + "epoch": 0.54, + "learning_rate": 4.0982506678484815e-05, + "loss": 1.1995, + "step": 146500 + }, + { + "epoch": 0.54, + "learning_rate": 4.095173025076633e-05, + "loss": 1.1978, + "step": 147000 + }, + { + "epoch": 0.54, + "learning_rate": 4.0920953823047856e-05, + "loss": 1.1952, + "step": 147500 + }, + { + "epoch": 0.55, + "learning_rate": 4.089017739532937e-05, + "loss": 1.1901, + "step": 148000 + }, + { + "epoch": 0.55, + "learning_rate": 4.085940096761089e-05, + "loss": 1.1974, + "step": 148500 + }, + { + "epoch": 0.55, + "learning_rate": 4.082862453989241e-05, + "loss": 1.1914, + "step": 149000 + }, + { + "epoch": 0.55, + "learning_rate": 4.0797848112173924e-05, + "loss": 1.193, + "step": 149500 + }, + { + "epoch": 0.55, + "learning_rate": 4.076707168445544e-05, + "loss": 1.1915, + "step": 150000 + }, + { + "epoch": 0.56, + "learning_rate": 4.0736295256736965e-05, + "loss": 1.1945, + "step": 150500 + }, + { + "epoch": 0.56, + "learning_rate": 4.070551882901848e-05, + "loss": 1.1986, + "step": 151000 + }, + { + "epoch": 0.56, + "learning_rate": 4.06747424013e-05, + "loss": 1.1928, + "step": 151500 + }, + { + "epoch": 0.56, + "learning_rate": 4.0643965973581516e-05, + "loss": 1.1946, + "step": 152000 + }, + { + "epoch": 0.56, + "learning_rate": 4.061318954586303e-05, + "loss": 1.1959, + "step": 152500 + }, + { + "epoch": 0.57, + "learning_rate": 4.058241311814455e-05, + "loss": 1.1911, + "step": 153000 + }, + { + "epoch": 0.57, + "learning_rate": 4.055163669042607e-05, + "loss": 1.1901, + "step": 153500 + }, + { + "epoch": 0.57, + "learning_rate": 4.052086026270759e-05, + "loss": 1.195, + "step": 154000 + }, + { + "epoch": 0.57, + "learning_rate": 4.049008383498911e-05, + "loss": 1.1938, + "step": 154500 + }, + { + "epoch": 0.57, + "learning_rate": 4.0459307407270625e-05, + "loss": 1.1994, + "step": 155000 + }, + { + "epoch": 0.57, + "learning_rate": 4.042853097955214e-05, + "loss": 1.1911, + "step": 155500 + }, + { + "epoch": 0.58, + "learning_rate": 4.039775455183366e-05, + "loss": 1.1942, + "step": 156000 + }, + { + "epoch": 0.58, + "learning_rate": 4.036697812411518e-05, + "loss": 1.1889, + "step": 156500 + }, + { + "epoch": 0.58, + "learning_rate": 4.0336201696396694e-05, + "loss": 1.185, + "step": 157000 + }, + { + "epoch": 0.58, + "learning_rate": 4.030542526867822e-05, + "loss": 1.1938, + "step": 157500 + }, + { + "epoch": 0.58, + "learning_rate": 4.0274648840959735e-05, + "loss": 1.1941, + "step": 158000 + }, + { + "epoch": 0.59, + "learning_rate": 4.024387241324125e-05, + "loss": 1.1771, + "step": 158500 + }, + { + "epoch": 0.59, + "learning_rate": 4.021309598552277e-05, + "loss": 1.1895, + "step": 159000 + }, + { + "epoch": 0.59, + "learning_rate": 4.0182319557804286e-05, + "loss": 1.1915, + "step": 159500 + }, + { + "epoch": 0.59, + "learning_rate": 4.01515431300858e-05, + "loss": 1.1935, + "step": 160000 + }, + { + "epoch": 0.59, + "learning_rate": 4.012076670236733e-05, + "loss": 1.194, + "step": 160500 + }, + { + "epoch": 0.59, + "learning_rate": 4.0089990274648844e-05, + "loss": 1.189, + "step": 161000 + }, + { + "epoch": 0.6, + "learning_rate": 4.005921384693036e-05, + "loss": 1.1821, + "step": 161500 + }, + { + "epoch": 0.6, + "learning_rate": 4.002843741921188e-05, + "loss": 1.1875, + "step": 162000 + }, + { + "epoch": 0.6, + "learning_rate": 3.9997660991493395e-05, + "loss": 1.1758, + "step": 162500 + }, + { + "epoch": 0.6, + "learning_rate": 3.996688456377491e-05, + "loss": 1.1868, + "step": 163000 + }, + { + "epoch": 0.6, + "learning_rate": 3.993610813605643e-05, + "loss": 1.1873, + "step": 163500 + }, + { + "epoch": 0.61, + "learning_rate": 3.9905331708337953e-05, + "loss": 1.1875, + "step": 164000 + }, + { + "epoch": 0.61, + "learning_rate": 3.987455528061947e-05, + "loss": 1.1864, + "step": 164500 + }, + { + "epoch": 0.61, + "learning_rate": 3.984377885290099e-05, + "loss": 1.1872, + "step": 165000 + }, + { + "epoch": 0.61, + "learning_rate": 3.9813002425182505e-05, + "loss": 1.1796, + "step": 165500 + }, + { + "epoch": 0.61, + "learning_rate": 3.978222599746402e-05, + "loss": 1.1841, + "step": 166000 + }, + { + "epoch": 0.61, + "learning_rate": 3.975144956974554e-05, + "loss": 1.1899, + "step": 166500 + }, + { + "epoch": 0.62, + "learning_rate": 3.972067314202706e-05, + "loss": 1.1836, + "step": 167000 + }, + { + "epoch": 0.62, + "learning_rate": 3.968989671430858e-05, + "loss": 1.1829, + "step": 167500 + }, + { + "epoch": 0.62, + "learning_rate": 3.96591202865901e-05, + "loss": 1.1899, + "step": 168000 + }, + { + "epoch": 0.62, + "learning_rate": 3.9628343858871614e-05, + "loss": 1.1746, + "step": 168500 + }, + { + "epoch": 0.62, + "learning_rate": 3.959756743115313e-05, + "loss": 1.1882, + "step": 169000 + }, + { + "epoch": 0.63, + "learning_rate": 3.956679100343465e-05, + "loss": 1.1935, + "step": 169500 + }, + { + "epoch": 0.63, + "learning_rate": 3.9536014575716165e-05, + "loss": 1.1812, + "step": 170000 + }, + { + "epoch": 0.63, + "learning_rate": 3.950523814799769e-05, + "loss": 1.187, + "step": 170500 + }, + { + "epoch": 0.63, + "learning_rate": 3.9474461720279206e-05, + "loss": 1.1849, + "step": 171000 + }, + { + "epoch": 0.63, + "learning_rate": 3.944368529256072e-05, + "loss": 1.1839, + "step": 171500 + }, + { + "epoch": 0.64, + "learning_rate": 3.941290886484224e-05, + "loss": 1.1804, + "step": 172000 + }, + { + "epoch": 0.64, + "learning_rate": 3.938213243712376e-05, + "loss": 1.1821, + "step": 172500 + }, + { + "epoch": 0.64, + "learning_rate": 3.9351356009405275e-05, + "loss": 1.1805, + "step": 173000 + }, + { + "epoch": 0.64, + "learning_rate": 3.932057958168679e-05, + "loss": 1.1808, + "step": 173500 + }, + { + "epoch": 0.64, + "learning_rate": 3.9289803153968316e-05, + "loss": 1.1842, + "step": 174000 + }, + { + "epoch": 0.64, + "learning_rate": 3.925902672624983e-05, + "loss": 1.1811, + "step": 174500 + }, + { + "epoch": 0.65, + "learning_rate": 3.922825029853135e-05, + "loss": 1.1841, + "step": 175000 + }, + { + "epoch": 0.65, + "learning_rate": 3.919747387081287e-05, + "loss": 1.1749, + "step": 175500 + }, + { + "epoch": 0.65, + "learning_rate": 3.9166697443094384e-05, + "loss": 1.1738, + "step": 176000 + }, + { + "epoch": 0.65, + "learning_rate": 3.91359210153759e-05, + "loss": 1.1821, + "step": 176500 + }, + { + "epoch": 0.65, + "learning_rate": 3.9105144587657425e-05, + "loss": 1.1717, + "step": 177000 + }, + { + "epoch": 0.66, + "learning_rate": 3.907436815993894e-05, + "loss": 1.183, + "step": 177500 + }, + { + "epoch": 0.66, + "learning_rate": 3.904359173222046e-05, + "loss": 1.1745, + "step": 178000 + }, + { + "epoch": 0.66, + "learning_rate": 3.9012815304501976e-05, + "loss": 1.1769, + "step": 178500 + }, + { + "epoch": 0.66, + "learning_rate": 3.898203887678349e-05, + "loss": 1.181, + "step": 179000 + }, + { + "epoch": 0.66, + "learning_rate": 3.895126244906501e-05, + "loss": 1.1838, + "step": 179500 + }, + { + "epoch": 0.66, + "learning_rate": 3.892048602134653e-05, + "loss": 1.1808, + "step": 180000 + }, + { + "epoch": 0.67, + "learning_rate": 3.888970959362805e-05, + "loss": 1.1802, + "step": 180500 + }, + { + "epoch": 0.67, + "learning_rate": 3.885893316590957e-05, + "loss": 1.1713, + "step": 181000 + }, + { + "epoch": 0.67, + "learning_rate": 3.8828156738191085e-05, + "loss": 1.1832, + "step": 181500 + }, + { + "epoch": 0.67, + "learning_rate": 3.87973803104726e-05, + "loss": 1.1727, + "step": 182000 + }, + { + "epoch": 0.67, + "learning_rate": 3.876660388275412e-05, + "loss": 1.1835, + "step": 182500 + }, + { + "epoch": 0.68, + "learning_rate": 3.873582745503564e-05, + "loss": 1.1777, + "step": 183000 + }, + { + "epoch": 0.68, + "learning_rate": 3.870505102731716e-05, + "loss": 1.1784, + "step": 183500 + }, + { + "epoch": 0.68, + "learning_rate": 3.867427459959868e-05, + "loss": 1.173, + "step": 184000 + }, + { + "epoch": 0.68, + "learning_rate": 3.8643498171880195e-05, + "loss": 1.1789, + "step": 184500 + }, + { + "epoch": 0.68, + "learning_rate": 3.861272174416171e-05, + "loss": 1.1719, + "step": 185000 + }, + { + "epoch": 0.69, + "learning_rate": 3.858194531644323e-05, + "loss": 1.1741, + "step": 185500 + }, + { + "epoch": 0.69, + "learning_rate": 3.8551168888724746e-05, + "loss": 1.1817, + "step": 186000 + }, + { + "epoch": 0.69, + "learning_rate": 3.852039246100626e-05, + "loss": 1.1754, + "step": 186500 + }, + { + "epoch": 0.69, + "learning_rate": 3.848961603328779e-05, + "loss": 1.1789, + "step": 187000 + }, + { + "epoch": 0.69, + "learning_rate": 3.8458839605569304e-05, + "loss": 1.1693, + "step": 187500 + }, + { + "epoch": 0.69, + "learning_rate": 3.842806317785082e-05, + "loss": 1.1775, + "step": 188000 + }, + { + "epoch": 0.7, + "learning_rate": 3.839728675013234e-05, + "loss": 1.1698, + "step": 188500 + }, + { + "epoch": 0.7, + "learning_rate": 3.8366510322413855e-05, + "loss": 1.1773, + "step": 189000 + }, + { + "epoch": 0.7, + "learning_rate": 3.833573389469537e-05, + "loss": 1.1791, + "step": 189500 + }, + { + "epoch": 0.7, + "learning_rate": 3.830495746697689e-05, + "loss": 1.1708, + "step": 190000 + }, + { + "epoch": 0.7, + "learning_rate": 3.8274181039258413e-05, + "loss": 1.1702, + "step": 190500 + }, + { + "epoch": 0.71, + "learning_rate": 3.824340461153993e-05, + "loss": 1.1818, + "step": 191000 + }, + { + "epoch": 0.71, + "learning_rate": 3.821262818382145e-05, + "loss": 1.1785, + "step": 191500 + }, + { + "epoch": 0.71, + "learning_rate": 3.8181851756102965e-05, + "loss": 1.1732, + "step": 192000 + }, + { + "epoch": 0.71, + "learning_rate": 3.815107532838448e-05, + "loss": 1.1719, + "step": 192500 + }, + { + "epoch": 0.71, + "learning_rate": 3.8120298900666e-05, + "loss": 1.176, + "step": 193000 + }, + { + "epoch": 0.71, + "learning_rate": 3.808952247294752e-05, + "loss": 1.1743, + "step": 193500 + }, + { + "epoch": 0.72, + "learning_rate": 3.805874604522904e-05, + "loss": 1.1712, + "step": 194000 + }, + { + "epoch": 0.72, + "learning_rate": 3.802796961751056e-05, + "loss": 1.1693, + "step": 194500 + }, + { + "epoch": 0.72, + "learning_rate": 3.7997193189792074e-05, + "loss": 1.1695, + "step": 195000 + }, + { + "epoch": 0.72, + "learning_rate": 3.796641676207359e-05, + "loss": 1.1749, + "step": 195500 + }, + { + "epoch": 0.72, + "learning_rate": 3.793564033435511e-05, + "loss": 1.1658, + "step": 196000 + }, + { + "epoch": 0.73, + "learning_rate": 3.7904863906636625e-05, + "loss": 1.1661, + "step": 196500 + }, + { + "epoch": 0.73, + "learning_rate": 3.787408747891815e-05, + "loss": 1.179, + "step": 197000 + }, + { + "epoch": 0.73, + "learning_rate": 3.7843311051199666e-05, + "loss": 1.1703, + "step": 197500 + }, + { + "epoch": 0.73, + "learning_rate": 3.781253462348118e-05, + "loss": 1.1705, + "step": 198000 + }, + { + "epoch": 0.73, + "learning_rate": 3.77817581957627e-05, + "loss": 1.1607, + "step": 198500 + }, + { + "epoch": 0.73, + "learning_rate": 3.775098176804422e-05, + "loss": 1.1698, + "step": 199000 + }, + { + "epoch": 0.74, + "learning_rate": 3.7720205340325735e-05, + "loss": 1.1671, + "step": 199500 + }, + { + "epoch": 0.74, + "learning_rate": 3.768942891260726e-05, + "loss": 1.1753, + "step": 200000 + }, + { + "epoch": 0.74, + "learning_rate": 3.7658652484888776e-05, + "loss": 1.1637, + "step": 200500 + }, + { + "epoch": 0.74, + "learning_rate": 3.762787605717029e-05, + "loss": 1.169, + "step": 201000 + }, + { + "epoch": 0.74, + "learning_rate": 3.759709962945181e-05, + "loss": 1.1707, + "step": 201500 + }, + { + "epoch": 0.75, + "learning_rate": 3.756632320173333e-05, + "loss": 1.1625, + "step": 202000 + }, + { + "epoch": 0.75, + "learning_rate": 3.7535546774014844e-05, + "loss": 1.1568, + "step": 202500 + }, + { + "epoch": 0.75, + "learning_rate": 3.750477034629636e-05, + "loss": 1.1629, + "step": 203000 + }, + { + "epoch": 0.75, + "learning_rate": 3.7473993918577885e-05, + "loss": 1.1645, + "step": 203500 + }, + { + "epoch": 0.75, + "learning_rate": 3.74432174908594e-05, + "loss": 1.1689, + "step": 204000 + }, + { + "epoch": 0.76, + "learning_rate": 3.741244106314092e-05, + "loss": 1.1638, + "step": 204500 + }, + { + "epoch": 0.76, + "learning_rate": 3.7381664635422436e-05, + "loss": 1.1676, + "step": 205000 + }, + { + "epoch": 0.76, + "learning_rate": 3.735088820770395e-05, + "loss": 1.1614, + "step": 205500 + }, + { + "epoch": 0.76, + "learning_rate": 3.732011177998547e-05, + "loss": 1.1637, + "step": 206000 + }, + { + "epoch": 0.76, + "learning_rate": 3.7289335352266994e-05, + "loss": 1.161, + "step": 206500 + }, + { + "epoch": 0.76, + "learning_rate": 3.725855892454851e-05, + "loss": 1.1651, + "step": 207000 + }, + { + "epoch": 0.77, + "learning_rate": 3.722778249683003e-05, + "loss": 1.1691, + "step": 207500 + }, + { + "epoch": 0.77, + "learning_rate": 3.7197006069111545e-05, + "loss": 1.1666, + "step": 208000 + }, + { + "epoch": 0.77, + "learning_rate": 3.716622964139306e-05, + "loss": 1.1594, + "step": 208500 + }, + { + "epoch": 0.77, + "learning_rate": 3.713545321367458e-05, + "loss": 1.1636, + "step": 209000 + }, + { + "epoch": 0.77, + "learning_rate": 3.71046767859561e-05, + "loss": 1.1646, + "step": 209500 + }, + { + "epoch": 0.78, + "learning_rate": 3.707390035823762e-05, + "loss": 1.1693, + "step": 210000 + }, + { + "epoch": 0.78, + "learning_rate": 3.7043123930519144e-05, + "loss": 1.1628, + "step": 210500 + }, + { + "epoch": 0.78, + "learning_rate": 3.701234750280066e-05, + "loss": 1.1708, + "step": 211000 + }, + { + "epoch": 0.78, + "learning_rate": 3.698157107508218e-05, + "loss": 1.1617, + "step": 211500 + }, + { + "epoch": 0.78, + "learning_rate": 3.6950794647363696e-05, + "loss": 1.1649, + "step": 212000 + }, + { + "epoch": 0.78, + "learning_rate": 3.692001821964521e-05, + "loss": 1.1599, + "step": 212500 + }, + { + "epoch": 0.79, + "learning_rate": 3.688924179192673e-05, + "loss": 1.1562, + "step": 213000 + }, + { + "epoch": 0.79, + "learning_rate": 3.685846536420825e-05, + "loss": 1.1553, + "step": 213500 + }, + { + "epoch": 0.79, + "learning_rate": 3.682768893648977e-05, + "loss": 1.1671, + "step": 214000 + }, + { + "epoch": 0.79, + "learning_rate": 3.679691250877129e-05, + "loss": 1.1593, + "step": 214500 + }, + { + "epoch": 0.79, + "learning_rate": 3.6766136081052805e-05, + "loss": 1.1561, + "step": 215000 + }, + { + "epoch": 0.8, + "learning_rate": 3.673535965333432e-05, + "loss": 1.1652, + "step": 215500 + }, + { + "epoch": 0.8, + "learning_rate": 3.670458322561584e-05, + "loss": 1.1615, + "step": 216000 + }, + { + "epoch": 0.8, + "learning_rate": 3.6673806797897356e-05, + "loss": 1.1635, + "step": 216500 + }, + { + "epoch": 0.8, + "learning_rate": 3.664303037017888e-05, + "loss": 1.1586, + "step": 217000 + }, + { + "epoch": 0.8, + "learning_rate": 3.66122539424604e-05, + "loss": 1.1531, + "step": 217500 + }, + { + "epoch": 0.81, + "learning_rate": 3.6581477514741914e-05, + "loss": 1.1629, + "step": 218000 + }, + { + "epoch": 0.81, + "learning_rate": 3.655070108702343e-05, + "loss": 1.1649, + "step": 218500 + }, + { + "epoch": 0.81, + "learning_rate": 3.651992465930495e-05, + "loss": 1.1541, + "step": 219000 + }, + { + "epoch": 0.81, + "learning_rate": 3.6489148231586466e-05, + "loss": 1.1532, + "step": 219500 + }, + { + "epoch": 0.81, + "learning_rate": 3.645837180386798e-05, + "loss": 1.1637, + "step": 220000 + }, + { + "epoch": 0.81, + "learning_rate": 3.6427595376149507e-05, + "loss": 1.1622, + "step": 220500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6396818948431024e-05, + "loss": 1.1599, + "step": 221000 + }, + { + "epoch": 0.82, + "learning_rate": 3.636604252071254e-05, + "loss": 1.1606, + "step": 221500 + }, + { + "epoch": 0.82, + "learning_rate": 3.633526609299406e-05, + "loss": 1.16, + "step": 222000 + }, + { + "epoch": 0.82, + "learning_rate": 3.6304489665275575e-05, + "loss": 1.1663, + "step": 222500 + }, + { + "epoch": 0.82, + "learning_rate": 3.627371323755709e-05, + "loss": 1.1549, + "step": 223000 + }, + { + "epoch": 0.83, + "learning_rate": 3.624293680983861e-05, + "loss": 1.1636, + "step": 223500 + }, + { + "epoch": 0.83, + "learning_rate": 3.621216038212013e-05, + "loss": 1.1565, + "step": 224000 + }, + { + "epoch": 0.83, + "learning_rate": 3.618138395440165e-05, + "loss": 1.1622, + "step": 224500 + }, + { + "epoch": 0.83, + "learning_rate": 3.615060752668317e-05, + "loss": 1.1639, + "step": 225000 + }, + { + "epoch": 0.83, + "learning_rate": 3.6119831098964684e-05, + "loss": 1.1588, + "step": 225500 + }, + { + "epoch": 0.83, + "learning_rate": 3.60890546712462e-05, + "loss": 1.155, + "step": 226000 + }, + { + "epoch": 0.84, + "learning_rate": 3.605827824352772e-05, + "loss": 1.152, + "step": 226500 + }, + { + "epoch": 0.84, + "learning_rate": 3.602750181580924e-05, + "loss": 1.1504, + "step": 227000 + }, + { + "epoch": 0.84, + "learning_rate": 3.599672538809076e-05, + "loss": 1.1579, + "step": 227500 + }, + { + "epoch": 0.84, + "learning_rate": 3.5965948960372276e-05, + "loss": 1.1603, + "step": 228000 + }, + { + "epoch": 0.84, + "learning_rate": 3.5935172532653794e-05, + "loss": 1.1619, + "step": 228500 + }, + { + "epoch": 0.85, + "learning_rate": 3.590439610493531e-05, + "loss": 1.1486, + "step": 229000 + }, + { + "epoch": 0.85, + "learning_rate": 3.587361967721683e-05, + "loss": 1.1568, + "step": 229500 + }, + { + "epoch": 0.85, + "learning_rate": 3.5842843249498345e-05, + "loss": 1.1534, + "step": 230000 + }, + { + "epoch": 0.85, + "learning_rate": 3.581206682177987e-05, + "loss": 1.1532, + "step": 230500 + }, + { + "epoch": 0.85, + "learning_rate": 3.5781290394061386e-05, + "loss": 1.1587, + "step": 231000 + }, + { + "epoch": 0.85, + "learning_rate": 3.57505139663429e-05, + "loss": 1.1572, + "step": 231500 + }, + { + "epoch": 0.86, + "learning_rate": 3.571973753862442e-05, + "loss": 1.1539, + "step": 232000 + }, + { + "epoch": 0.86, + "learning_rate": 3.568896111090594e-05, + "loss": 1.1548, + "step": 232500 + }, + { + "epoch": 0.86, + "learning_rate": 3.5658184683187454e-05, + "loss": 1.1496, + "step": 233000 + }, + { + "epoch": 0.86, + "learning_rate": 3.562740825546898e-05, + "loss": 1.1589, + "step": 233500 + }, + { + "epoch": 0.86, + "learning_rate": 3.5596631827750495e-05, + "loss": 1.1562, + "step": 234000 + }, + { + "epoch": 0.87, + "learning_rate": 3.556585540003201e-05, + "loss": 1.1447, + "step": 234500 + }, + { + "epoch": 0.87, + "learning_rate": 3.553507897231353e-05, + "loss": 1.1534, + "step": 235000 + }, + { + "epoch": 0.87, + "learning_rate": 3.5504302544595046e-05, + "loss": 1.1518, + "step": 235500 + }, + { + "epoch": 0.87, + "learning_rate": 3.5473526116876563e-05, + "loss": 1.1538, + "step": 236000 + }, + { + "epoch": 0.87, + "learning_rate": 3.544274968915808e-05, + "loss": 1.1462, + "step": 236500 + }, + { + "epoch": 0.88, + "learning_rate": 3.5411973261439604e-05, + "loss": 1.1496, + "step": 237000 + }, + { + "epoch": 0.88, + "learning_rate": 3.538119683372112e-05, + "loss": 1.1523, + "step": 237500 + }, + { + "epoch": 0.88, + "learning_rate": 3.535042040600264e-05, + "loss": 1.1456, + "step": 238000 + }, + { + "epoch": 0.88, + "learning_rate": 3.5319643978284156e-05, + "loss": 1.1486, + "step": 238500 + }, + { + "epoch": 0.88, + "learning_rate": 3.528886755056567e-05, + "loss": 1.1433, + "step": 239000 + }, + { + "epoch": 0.88, + "learning_rate": 3.525809112284719e-05, + "loss": 1.1533, + "step": 239500 + }, + { + "epoch": 0.89, + "learning_rate": 3.522731469512871e-05, + "loss": 1.1488, + "step": 240000 + }, + { + "epoch": 0.89, + "learning_rate": 3.519653826741023e-05, + "loss": 1.1501, + "step": 240500 + }, + { + "epoch": 0.89, + "learning_rate": 3.516576183969175e-05, + "loss": 1.1464, + "step": 241000 + }, + { + "epoch": 0.89, + "learning_rate": 3.5134985411973265e-05, + "loss": 1.1515, + "step": 241500 + }, + { + "epoch": 0.89, + "learning_rate": 3.510420898425478e-05, + "loss": 1.1446, + "step": 242000 + }, + { + "epoch": 0.9, + "learning_rate": 3.50734325565363e-05, + "loss": 1.1515, + "step": 242500 + }, + { + "epoch": 0.9, + "learning_rate": 3.5042656128817816e-05, + "loss": 1.1478, + "step": 243000 + }, + { + "epoch": 0.9, + "learning_rate": 3.501187970109934e-05, + "loss": 1.1478, + "step": 243500 + }, + { + "epoch": 0.9, + "learning_rate": 3.498110327338086e-05, + "loss": 1.1541, + "step": 244000 + }, + { + "epoch": 0.9, + "learning_rate": 3.4950326845662374e-05, + "loss": 1.1524, + "step": 244500 + }, + { + "epoch": 0.9, + "learning_rate": 3.491955041794389e-05, + "loss": 1.1595, + "step": 245000 + }, + { + "epoch": 0.91, + "learning_rate": 3.488877399022541e-05, + "loss": 1.1546, + "step": 245500 + }, + { + "epoch": 0.91, + "learning_rate": 3.4857997562506926e-05, + "loss": 1.1554, + "step": 246000 + }, + { + "epoch": 0.91, + "learning_rate": 3.482722113478844e-05, + "loss": 1.1479, + "step": 246500 + }, + { + "epoch": 0.91, + "learning_rate": 3.4796444707069967e-05, + "loss": 1.1498, + "step": 247000 + }, + { + "epoch": 0.91, + "learning_rate": 3.4765668279351484e-05, + "loss": 1.1502, + "step": 247500 + }, + { + "epoch": 0.92, + "learning_rate": 3.4734891851633e-05, + "loss": 1.1477, + "step": 248000 + }, + { + "epoch": 0.92, + "learning_rate": 3.470411542391452e-05, + "loss": 1.1501, + "step": 248500 + }, + { + "epoch": 0.92, + "learning_rate": 3.4673338996196035e-05, + "loss": 1.1536, + "step": 249000 + }, + { + "epoch": 0.92, + "learning_rate": 3.464256256847755e-05, + "loss": 1.149, + "step": 249500 + }, + { + "epoch": 0.92, + "learning_rate": 3.4611786140759076e-05, + "loss": 1.1513, + "step": 250000 + }, + { + "epoch": 0.93, + "learning_rate": 3.458100971304059e-05, + "loss": 1.1459, + "step": 250500 + }, + { + "epoch": 0.93, + "learning_rate": 3.455023328532211e-05, + "loss": 1.1386, + "step": 251000 + }, + { + "epoch": 0.93, + "learning_rate": 3.451945685760363e-05, + "loss": 1.1449, + "step": 251500 + }, + { + "epoch": 0.93, + "learning_rate": 3.4488680429885144e-05, + "loss": 1.1577, + "step": 252000 + }, + { + "epoch": 0.93, + "learning_rate": 3.445790400216666e-05, + "loss": 1.1557, + "step": 252500 + }, + { + "epoch": 0.93, + "learning_rate": 3.442712757444818e-05, + "loss": 1.1495, + "step": 253000 + }, + { + "epoch": 0.94, + "learning_rate": 3.43963511467297e-05, + "loss": 1.1445, + "step": 253500 + }, + { + "epoch": 0.94, + "learning_rate": 3.436557471901122e-05, + "loss": 1.1511, + "step": 254000 + }, + { + "epoch": 0.94, + "learning_rate": 3.4334798291292736e-05, + "loss": 1.1509, + "step": 254500 + }, + { + "epoch": 0.94, + "learning_rate": 3.4304021863574254e-05, + "loss": 1.1537, + "step": 255000 + }, + { + "epoch": 0.94, + "learning_rate": 3.427324543585577e-05, + "loss": 1.1451, + "step": 255500 + }, + { + "epoch": 0.95, + "learning_rate": 3.424246900813729e-05, + "loss": 1.1454, + "step": 256000 + }, + { + "epoch": 0.95, + "learning_rate": 3.421169258041881e-05, + "loss": 1.15, + "step": 256500 + }, + { + "epoch": 0.95, + "learning_rate": 3.418091615270033e-05, + "loss": 1.1492, + "step": 257000 + }, + { + "epoch": 0.95, + "learning_rate": 3.4150139724981846e-05, + "loss": 1.1415, + "step": 257500 + }, + { + "epoch": 0.95, + "learning_rate": 3.411936329726336e-05, + "loss": 1.1405, + "step": 258000 + }, + { + "epoch": 0.95, + "learning_rate": 3.408858686954488e-05, + "loss": 1.1472, + "step": 258500 + }, + { + "epoch": 0.96, + "learning_rate": 3.40578104418264e-05, + "loss": 1.1429, + "step": 259000 + }, + { + "epoch": 0.96, + "learning_rate": 3.4027034014107914e-05, + "loss": 1.1368, + "step": 259500 + }, + { + "epoch": 0.96, + "learning_rate": 3.399625758638944e-05, + "loss": 1.1467, + "step": 260000 + }, + { + "epoch": 0.96, + "learning_rate": 3.3965481158670955e-05, + "loss": 1.1562, + "step": 260500 + }, + { + "epoch": 0.96, + "learning_rate": 3.393470473095247e-05, + "loss": 1.1372, + "step": 261000 + }, + { + "epoch": 0.97, + "learning_rate": 3.390392830323399e-05, + "loss": 1.1459, + "step": 261500 + }, + { + "epoch": 0.97, + "learning_rate": 3.3873151875515506e-05, + "loss": 1.1473, + "step": 262000 + }, + { + "epoch": 0.97, + "learning_rate": 3.3842375447797023e-05, + "loss": 1.1462, + "step": 262500 + }, + { + "epoch": 0.97, + "learning_rate": 3.381159902007854e-05, + "loss": 1.1363, + "step": 263000 + }, + { + "epoch": 0.97, + "learning_rate": 3.3780822592360064e-05, + "loss": 1.1392, + "step": 263500 + }, + { + "epoch": 0.97, + "learning_rate": 3.375004616464158e-05, + "loss": 1.1459, + "step": 264000 + }, + { + "epoch": 0.98, + "learning_rate": 3.37192697369231e-05, + "loss": 1.144, + "step": 264500 + }, + { + "epoch": 0.98, + "learning_rate": 3.3688493309204616e-05, + "loss": 1.1426, + "step": 265000 + }, + { + "epoch": 0.98, + "learning_rate": 3.365771688148613e-05, + "loss": 1.1369, + "step": 265500 + }, + { + "epoch": 0.98, + "learning_rate": 3.362694045376765e-05, + "loss": 1.1415, + "step": 266000 + }, + { + "epoch": 0.98, + "learning_rate": 3.3596164026049174e-05, + "loss": 1.1548, + "step": 266500 + }, + { + "epoch": 0.99, + "learning_rate": 3.356538759833069e-05, + "loss": 1.1399, + "step": 267000 + }, + { + "epoch": 0.99, + "learning_rate": 3.353461117061221e-05, + "loss": 1.1362, + "step": 267500 + }, + { + "epoch": 0.99, + "learning_rate": 3.3503834742893725e-05, + "loss": 1.1469, + "step": 268000 + }, + { + "epoch": 0.99, + "learning_rate": 3.347305831517524e-05, + "loss": 1.139, + "step": 268500 + }, + { + "epoch": 0.99, + "learning_rate": 3.344228188745676e-05, + "loss": 1.1356, + "step": 269000 + }, + { + "epoch": 1.0, + "learning_rate": 3.3411505459738276e-05, + "loss": 1.1382, + "step": 269500 + }, + { + "epoch": 1.0, + "learning_rate": 3.33807290320198e-05, + "loss": 1.1351, + "step": 270000 + }, + { + "epoch": 1.0, + "learning_rate": 3.334995260430132e-05, + "loss": 1.1331, + "step": 270500 + }, + { + "epoch": 1.0, + "learning_rate": 3.3319176176582834e-05, + "loss": 1.1403, + "step": 271000 + }, + { + "epoch": 1.0, + "learning_rate": 3.328839974886435e-05, + "loss": 1.1388, + "step": 271500 + }, + { + "epoch": 1.0, + "learning_rate": 3.325762332114587e-05, + "loss": 1.1388, + "step": 272000 + }, + { + "epoch": 1.01, + "learning_rate": 3.3226846893427386e-05, + "loss": 1.1347, + "step": 272500 + }, + { + "epoch": 1.01, + "learning_rate": 3.319607046570891e-05, + "loss": 1.1375, + "step": 273000 + }, + { + "epoch": 1.01, + "learning_rate": 3.3165294037990427e-05, + "loss": 1.1324, + "step": 273500 + }, + { + "epoch": 1.01, + "learning_rate": 3.3134517610271944e-05, + "loss": 1.1322, + "step": 274000 + }, + { + "epoch": 1.01, + "learning_rate": 3.310374118255346e-05, + "loss": 1.1442, + "step": 274500 + }, + { + "epoch": 1.02, + "learning_rate": 3.307296475483498e-05, + "loss": 1.1326, + "step": 275000 + }, + { + "epoch": 1.02, + "learning_rate": 3.3042188327116495e-05, + "loss": 1.1346, + "step": 275500 + }, + { + "epoch": 1.02, + "learning_rate": 3.301141189939801e-05, + "loss": 1.1319, + "step": 276000 + }, + { + "epoch": 1.02, + "learning_rate": 3.2980635471679536e-05, + "loss": 1.132, + "step": 276500 + }, + { + "epoch": 1.02, + "learning_rate": 3.294985904396105e-05, + "loss": 1.1366, + "step": 277000 + }, + { + "epoch": 1.02, + "learning_rate": 3.291908261624257e-05, + "loss": 1.136, + "step": 277500 + }, + { + "epoch": 1.03, + "learning_rate": 3.288830618852409e-05, + "loss": 1.1282, + "step": 278000 + }, + { + "epoch": 1.03, + "learning_rate": 3.2857529760805604e-05, + "loss": 1.1345, + "step": 278500 + }, + { + "epoch": 1.03, + "learning_rate": 3.282675333308712e-05, + "loss": 1.1328, + "step": 279000 + }, + { + "epoch": 1.03, + "learning_rate": 3.279597690536864e-05, + "loss": 1.1454, + "step": 279500 + }, + { + "epoch": 1.03, + "learning_rate": 3.276520047765016e-05, + "loss": 1.1363, + "step": 280000 + }, + { + "epoch": 1.04, + "learning_rate": 3.273442404993168e-05, + "loss": 1.1317, + "step": 280500 + }, + { + "epoch": 1.04, + "learning_rate": 3.2703647622213196e-05, + "loss": 1.1365, + "step": 281000 + }, + { + "epoch": 1.04, + "learning_rate": 3.2672871194494713e-05, + "loss": 1.1274, + "step": 281500 + }, + { + "epoch": 1.04, + "learning_rate": 3.264209476677623e-05, + "loss": 1.1183, + "step": 282000 + }, + { + "epoch": 1.04, + "learning_rate": 3.261131833905775e-05, + "loss": 1.1323, + "step": 282500 + }, + { + "epoch": 1.05, + "learning_rate": 3.258054191133927e-05, + "loss": 1.1345, + "step": 283000 + }, + { + "epoch": 1.05, + "learning_rate": 3.254976548362079e-05, + "loss": 1.1327, + "step": 283500 + }, + { + "epoch": 1.05, + "learning_rate": 3.2518989055902306e-05, + "loss": 1.1311, + "step": 284000 + }, + { + "epoch": 1.05, + "learning_rate": 3.248821262818382e-05, + "loss": 1.1279, + "step": 284500 + }, + { + "epoch": 1.05, + "learning_rate": 3.245743620046534e-05, + "loss": 1.1349, + "step": 285000 + }, + { + "epoch": 1.05, + "learning_rate": 3.242665977274686e-05, + "loss": 1.1286, + "step": 285500 + }, + { + "epoch": 1.06, + "learning_rate": 3.2395883345028374e-05, + "loss": 1.1328, + "step": 286000 + }, + { + "epoch": 1.06, + "learning_rate": 3.23651069173099e-05, + "loss": 1.1296, + "step": 286500 + }, + { + "epoch": 1.06, + "learning_rate": 3.2334330489591415e-05, + "loss": 1.1322, + "step": 287000 + }, + { + "epoch": 1.06, + "learning_rate": 3.230355406187293e-05, + "loss": 1.1292, + "step": 287500 + }, + { + "epoch": 1.06, + "learning_rate": 3.227277763415445e-05, + "loss": 1.1322, + "step": 288000 + }, + { + "epoch": 1.07, + "learning_rate": 3.2242001206435966e-05, + "loss": 1.1352, + "step": 288500 + }, + { + "epoch": 1.07, + "learning_rate": 3.2211224778717483e-05, + "loss": 1.1268, + "step": 289000 + }, + { + "epoch": 1.07, + "learning_rate": 3.218044835099901e-05, + "loss": 1.1239, + "step": 289500 + }, + { + "epoch": 1.07, + "learning_rate": 3.2149671923280524e-05, + "loss": 1.129, + "step": 290000 + }, + { + "epoch": 1.07, + "learning_rate": 3.211889549556204e-05, + "loss": 1.1321, + "step": 290500 + }, + { + "epoch": 1.07, + "learning_rate": 3.208811906784356e-05, + "loss": 1.1374, + "step": 291000 + }, + { + "epoch": 1.08, + "learning_rate": 3.2057342640125076e-05, + "loss": 1.1297, + "step": 291500 + }, + { + "epoch": 1.08, + "learning_rate": 3.202656621240659e-05, + "loss": 1.1311, + "step": 292000 + }, + { + "epoch": 1.08, + "learning_rate": 3.199578978468811e-05, + "loss": 1.1225, + "step": 292500 + }, + { + "epoch": 1.08, + "learning_rate": 3.1965013356969634e-05, + "loss": 1.1225, + "step": 293000 + }, + { + "epoch": 1.08, + "learning_rate": 3.193423692925115e-05, + "loss": 1.1295, + "step": 293500 + }, + { + "epoch": 1.09, + "learning_rate": 3.190346050153267e-05, + "loss": 1.1262, + "step": 294000 + }, + { + "epoch": 1.09, + "learning_rate": 3.1872684073814185e-05, + "loss": 1.1258, + "step": 294500 + }, + { + "epoch": 1.09, + "learning_rate": 3.18419076460957e-05, + "loss": 1.1154, + "step": 295000 + }, + { + "epoch": 1.09, + "learning_rate": 3.181113121837722e-05, + "loss": 1.1327, + "step": 295500 + }, + { + "epoch": 1.09, + "learning_rate": 3.1780354790658736e-05, + "loss": 1.1235, + "step": 296000 + }, + { + "epoch": 1.1, + "learning_rate": 3.174957836294026e-05, + "loss": 1.1287, + "step": 296500 + }, + { + "epoch": 1.1, + "learning_rate": 3.171880193522178e-05, + "loss": 1.128, + "step": 297000 + }, + { + "epoch": 1.1, + "learning_rate": 3.1688025507503294e-05, + "loss": 1.1221, + "step": 297500 + }, + { + "epoch": 1.1, + "learning_rate": 3.165724907978481e-05, + "loss": 1.1259, + "step": 298000 + }, + { + "epoch": 1.1, + "learning_rate": 3.162647265206633e-05, + "loss": 1.1263, + "step": 298500 + }, + { + "epoch": 1.1, + "learning_rate": 3.1595696224347846e-05, + "loss": 1.1298, + "step": 299000 + }, + { + "epoch": 1.11, + "learning_rate": 3.156491979662937e-05, + "loss": 1.1263, + "step": 299500 + }, + { + "epoch": 1.11, + "learning_rate": 3.1534143368910886e-05, + "loss": 1.1299, + "step": 300000 + }, + { + "epoch": 1.11, + "learning_rate": 3.1503366941192404e-05, + "loss": 1.1265, + "step": 300500 + }, + { + "epoch": 1.11, + "learning_rate": 3.147259051347392e-05, + "loss": 1.1249, + "step": 301000 + }, + { + "epoch": 1.11, + "learning_rate": 3.144181408575544e-05, + "loss": 1.1206, + "step": 301500 + }, + { + "epoch": 1.12, + "learning_rate": 3.1411037658036955e-05, + "loss": 1.1186, + "step": 302000 + }, + { + "epoch": 1.12, + "learning_rate": 3.138026123031847e-05, + "loss": 1.1181, + "step": 302500 + }, + { + "epoch": 1.12, + "learning_rate": 3.1349484802599996e-05, + "loss": 1.1291, + "step": 303000 + }, + { + "epoch": 1.12, + "learning_rate": 3.131870837488151e-05, + "loss": 1.1262, + "step": 303500 + }, + { + "epoch": 1.12, + "learning_rate": 3.128793194716303e-05, + "loss": 1.1227, + "step": 304000 + }, + { + "epoch": 1.12, + "learning_rate": 3.125715551944455e-05, + "loss": 1.1233, + "step": 304500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1226379091726064e-05, + "loss": 1.1156, + "step": 305000 + }, + { + "epoch": 1.13, + "learning_rate": 3.119560266400758e-05, + "loss": 1.1147, + "step": 305500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1164826236289105e-05, + "loss": 1.1226, + "step": 306000 + }, + { + "epoch": 1.13, + "learning_rate": 3.113404980857062e-05, + "loss": 1.1175, + "step": 306500 + }, + { + "epoch": 1.13, + "learning_rate": 3.110327338085214e-05, + "loss": 1.124, + "step": 307000 + }, + { + "epoch": 1.14, + "learning_rate": 3.1072496953133656e-05, + "loss": 1.1166, + "step": 307500 + }, + { + "epoch": 1.14, + "learning_rate": 3.1041720525415173e-05, + "loss": 1.1222, + "step": 308000 + }, + { + "epoch": 1.14, + "learning_rate": 3.101094409769669e-05, + "loss": 1.1214, + "step": 308500 + }, + { + "epoch": 1.14, + "learning_rate": 3.098016766997821e-05, + "loss": 1.1231, + "step": 309000 + }, + { + "epoch": 1.14, + "learning_rate": 3.094939124225973e-05, + "loss": 1.1208, + "step": 309500 + }, + { + "epoch": 1.14, + "learning_rate": 3.091861481454125e-05, + "loss": 1.1167, + "step": 310000 + }, + { + "epoch": 1.15, + "learning_rate": 3.0887838386822766e-05, + "loss": 1.1238, + "step": 310500 + }, + { + "epoch": 1.15, + "learning_rate": 3.085706195910428e-05, + "loss": 1.1203, + "step": 311000 + }, + { + "epoch": 1.15, + "learning_rate": 3.08262855313858e-05, + "loss": 1.1276, + "step": 311500 + }, + { + "epoch": 1.15, + "learning_rate": 3.079550910366732e-05, + "loss": 1.1169, + "step": 312000 + }, + { + "epoch": 1.15, + "learning_rate": 3.076473267594884e-05, + "loss": 1.1197, + "step": 312500 + }, + { + "epoch": 1.16, + "learning_rate": 3.073395624823036e-05, + "loss": 1.1207, + "step": 313000 + }, + { + "epoch": 1.16, + "learning_rate": 3.0703179820511875e-05, + "loss": 1.1227, + "step": 313500 + }, + { + "epoch": 1.16, + "learning_rate": 3.067240339279339e-05, + "loss": 1.1221, + "step": 314000 + }, + { + "epoch": 1.16, + "learning_rate": 3.064162696507491e-05, + "loss": 1.1189, + "step": 314500 + }, + { + "epoch": 1.16, + "learning_rate": 3.0610850537356426e-05, + "loss": 1.1168, + "step": 315000 + }, + { + "epoch": 1.17, + "learning_rate": 3.058007410963794e-05, + "loss": 1.1153, + "step": 315500 + }, + { + "epoch": 1.17, + "learning_rate": 3.054929768191947e-05, + "loss": 1.1149, + "step": 316000 + }, + { + "epoch": 1.17, + "learning_rate": 3.0518521254200984e-05, + "loss": 1.1202, + "step": 316500 + }, + { + "epoch": 1.17, + "learning_rate": 3.04877448264825e-05, + "loss": 1.1233, + "step": 317000 + }, + { + "epoch": 1.17, + "learning_rate": 3.045696839876402e-05, + "loss": 1.1165, + "step": 317500 + }, + { + "epoch": 1.17, + "learning_rate": 3.0426191971045536e-05, + "loss": 1.1274, + "step": 318000 + }, + { + "epoch": 1.18, + "learning_rate": 3.0395415543327056e-05, + "loss": 1.1161, + "step": 318500 + }, + { + "epoch": 1.18, + "learning_rate": 3.0364639115608573e-05, + "loss": 1.1149, + "step": 319000 + }, + { + "epoch": 1.18, + "learning_rate": 3.033386268789009e-05, + "loss": 1.1194, + "step": 319500 + }, + { + "epoch": 1.18, + "learning_rate": 3.030308626017161e-05, + "loss": 1.1261, + "step": 320000 + }, + { + "epoch": 1.18, + "learning_rate": 3.0272309832453128e-05, + "loss": 1.1206, + "step": 320500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0241533404734645e-05, + "loss": 1.1144, + "step": 321000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0210756977016162e-05, + "loss": 1.1186, + "step": 321500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0179980549297682e-05, + "loss": 1.129, + "step": 322000 + }, + { + "epoch": 1.19, + "learning_rate": 3.01492041215792e-05, + "loss": 1.1249, + "step": 322500 + }, + { + "epoch": 1.19, + "learning_rate": 3.0118427693860717e-05, + "loss": 1.121, + "step": 323000 + }, + { + "epoch": 1.19, + "learning_rate": 3.0087651266142237e-05, + "loss": 1.1155, + "step": 323500 + }, + { + "epoch": 1.2, + "learning_rate": 3.0056874838423754e-05, + "loss": 1.1212, + "step": 324000 + }, + { + "epoch": 1.2, + "learning_rate": 3.002609841070527e-05, + "loss": 1.1076, + "step": 324500 + }, + { + "epoch": 1.2, + "learning_rate": 2.9995321982986792e-05, + "loss": 1.1176, + "step": 325000 + }, + { + "epoch": 1.2, + "learning_rate": 2.996454555526831e-05, + "loss": 1.1167, + "step": 325500 + }, + { + "epoch": 1.2, + "learning_rate": 2.9933769127549826e-05, + "loss": 1.1186, + "step": 326000 + }, + { + "epoch": 1.21, + "learning_rate": 2.9902992699831346e-05, + "loss": 1.1161, + "step": 326500 + }, + { + "epoch": 1.21, + "learning_rate": 2.9872216272112864e-05, + "loss": 1.1168, + "step": 327000 + }, + { + "epoch": 1.21, + "learning_rate": 2.984143984439438e-05, + "loss": 1.1132, + "step": 327500 + }, + { + "epoch": 1.21, + "learning_rate": 2.9810663416675898e-05, + "loss": 1.1185, + "step": 328000 + }, + { + "epoch": 1.21, + "learning_rate": 2.9779886988957418e-05, + "loss": 1.1088, + "step": 328500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9749110561238935e-05, + "loss": 1.1146, + "step": 329000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9718334133520452e-05, + "loss": 1.1189, + "step": 329500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9687557705801973e-05, + "loss": 1.1159, + "step": 330000 + }, + { + "epoch": 1.22, + "learning_rate": 2.965678127808349e-05, + "loss": 1.1174, + "step": 330500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9626004850365007e-05, + "loss": 1.1194, + "step": 331000 + }, + { + "epoch": 1.22, + "learning_rate": 2.9595228422646528e-05, + "loss": 1.1091, + "step": 331500 + }, + { + "epoch": 1.23, + "learning_rate": 2.9564451994928045e-05, + "loss": 1.112, + "step": 332000 + }, + { + "epoch": 1.23, + "learning_rate": 2.953367556720956e-05, + "loss": 1.112, + "step": 332500 + }, + { + "epoch": 1.23, + "learning_rate": 2.950289913949108e-05, + "loss": 1.1104, + "step": 333000 + }, + { + "epoch": 1.23, + "learning_rate": 2.94721227117726e-05, + "loss": 1.1053, + "step": 333500 + }, + { + "epoch": 1.23, + "learning_rate": 2.9441346284054116e-05, + "loss": 1.1106, + "step": 334000 + }, + { + "epoch": 1.24, + "learning_rate": 2.9410569856335633e-05, + "loss": 1.1185, + "step": 334500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9379793428617154e-05, + "loss": 1.1113, + "step": 335000 + }, + { + "epoch": 1.24, + "learning_rate": 2.934901700089867e-05, + "loss": 1.1096, + "step": 335500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9318240573180188e-05, + "loss": 1.1085, + "step": 336000 + }, + { + "epoch": 1.24, + "learning_rate": 2.928746414546171e-05, + "loss": 1.1137, + "step": 336500 + }, + { + "epoch": 1.24, + "learning_rate": 2.9256687717743226e-05, + "loss": 1.1075, + "step": 337000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9225911290024743e-05, + "loss": 1.1093, + "step": 337500 + }, + { + "epoch": 1.25, + "learning_rate": 2.919513486230626e-05, + "loss": 1.1159, + "step": 338000 + }, + { + "epoch": 1.25, + "learning_rate": 2.916435843458778e-05, + "loss": 1.1063, + "step": 338500 + }, + { + "epoch": 1.25, + "learning_rate": 2.9133582006869297e-05, + "loss": 1.1135, + "step": 339000 + }, + { + "epoch": 1.25, + "learning_rate": 2.9102805579150815e-05, + "loss": 1.1186, + "step": 339500 + }, + { + "epoch": 1.26, + "learning_rate": 2.9072029151432335e-05, + "loss": 1.1128, + "step": 340000 + }, + { + "epoch": 1.26, + "learning_rate": 2.9041252723713852e-05, + "loss": 1.1148, + "step": 340500 + }, + { + "epoch": 1.26, + "learning_rate": 2.901047629599537e-05, + "loss": 1.1056, + "step": 341000 + }, + { + "epoch": 1.26, + "learning_rate": 2.897969986827689e-05, + "loss": 1.1153, + "step": 341500 + }, + { + "epoch": 1.26, + "learning_rate": 2.8948923440558407e-05, + "loss": 1.1147, + "step": 342000 + }, + { + "epoch": 1.26, + "learning_rate": 2.8918147012839924e-05, + "loss": 1.1155, + "step": 342500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8887370585121444e-05, + "loss": 1.1132, + "step": 343000 + }, + { + "epoch": 1.27, + "learning_rate": 2.885659415740296e-05, + "loss": 1.1072, + "step": 343500 + }, + { + "epoch": 1.27, + "learning_rate": 2.882581772968448e-05, + "loss": 1.1175, + "step": 344000 + }, + { + "epoch": 1.27, + "learning_rate": 2.8795041301965996e-05, + "loss": 1.1048, + "step": 344500 + }, + { + "epoch": 1.27, + "learning_rate": 2.8764264874247516e-05, + "loss": 1.1126, + "step": 345000 + }, + { + "epoch": 1.28, + "learning_rate": 2.8733488446529033e-05, + "loss": 1.1107, + "step": 345500 + }, + { + "epoch": 1.28, + "learning_rate": 2.870271201881055e-05, + "loss": 1.1108, + "step": 346000 + }, + { + "epoch": 1.28, + "learning_rate": 2.867193559109207e-05, + "loss": 1.1062, + "step": 346500 + }, + { + "epoch": 1.28, + "learning_rate": 2.8641159163373588e-05, + "loss": 1.1083, + "step": 347000 + }, + { + "epoch": 1.28, + "learning_rate": 2.8610382735655105e-05, + "loss": 1.0995, + "step": 347500 + }, + { + "epoch": 1.29, + "learning_rate": 2.8579606307936625e-05, + "loss": 1.1143, + "step": 348000 + }, + { + "epoch": 1.29, + "learning_rate": 2.8548829880218142e-05, + "loss": 1.1146, + "step": 348500 + }, + { + "epoch": 1.29, + "learning_rate": 2.851805345249966e-05, + "loss": 1.1144, + "step": 349000 + }, + { + "epoch": 1.29, + "learning_rate": 2.8487277024781177e-05, + "loss": 1.1108, + "step": 349500 + }, + { + "epoch": 1.29, + "learning_rate": 2.8456500597062697e-05, + "loss": 1.1105, + "step": 350000 + }, + { + "epoch": 1.29, + "learning_rate": 2.842572416934422e-05, + "loss": 1.1059, + "step": 350500 + }, + { + "epoch": 1.3, + "learning_rate": 2.8394947741625738e-05, + "loss": 1.1037, + "step": 351000 + }, + { + "epoch": 1.3, + "learning_rate": 2.8364171313907255e-05, + "loss": 1.1111, + "step": 351500 + }, + { + "epoch": 1.3, + "learning_rate": 2.8333394886188776e-05, + "loss": 1.1103, + "step": 352000 + }, + { + "epoch": 1.3, + "learning_rate": 2.8302618458470293e-05, + "loss": 1.1081, + "step": 352500 + }, + { + "epoch": 1.3, + "learning_rate": 2.827184203075181e-05, + "loss": 1.1095, + "step": 353000 + }, + { + "epoch": 1.31, + "learning_rate": 2.824106560303333e-05, + "loss": 1.1069, + "step": 353500 + }, + { + "epoch": 1.31, + "learning_rate": 2.8210289175314847e-05, + "loss": 1.1134, + "step": 354000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8179512747596364e-05, + "loss": 1.108, + "step": 354500 + }, + { + "epoch": 1.31, + "learning_rate": 2.814873631987788e-05, + "loss": 1.1124, + "step": 355000 + }, + { + "epoch": 1.31, + "learning_rate": 2.8117959892159402e-05, + "loss": 1.1144, + "step": 355500 + }, + { + "epoch": 1.31, + "learning_rate": 2.808718346444092e-05, + "loss": 1.1154, + "step": 356000 + }, + { + "epoch": 1.32, + "learning_rate": 2.8056407036722436e-05, + "loss": 1.1116, + "step": 356500 + }, + { + "epoch": 1.32, + "learning_rate": 2.8025630609003957e-05, + "loss": 1.1119, + "step": 357000 + }, + { + "epoch": 1.32, + "learning_rate": 2.7994854181285474e-05, + "loss": 1.1138, + "step": 357500 + }, + { + "epoch": 1.32, + "learning_rate": 2.796407775356699e-05, + "loss": 1.1047, + "step": 358000 + }, + { + "epoch": 1.32, + "learning_rate": 2.793330132584851e-05, + "loss": 1.1068, + "step": 358500 + }, + { + "epoch": 1.33, + "learning_rate": 2.790252489813003e-05, + "loss": 1.1063, + "step": 359000 + }, + { + "epoch": 1.33, + "learning_rate": 2.7871748470411546e-05, + "loss": 1.0996, + "step": 359500 + }, + { + "epoch": 1.33, + "learning_rate": 2.7840972042693063e-05, + "loss": 1.1076, + "step": 360000 + }, + { + "epoch": 1.33, + "learning_rate": 2.7810195614974583e-05, + "loss": 1.0979, + "step": 360500 + }, + { + "epoch": 1.33, + "learning_rate": 2.77794191872561e-05, + "loss": 1.1051, + "step": 361000 + }, + { + "epoch": 1.34, + "learning_rate": 2.7748642759537617e-05, + "loss": 1.1009, + "step": 361500 + }, + { + "epoch": 1.34, + "learning_rate": 2.7717866331819138e-05, + "loss": 1.104, + "step": 362000 + }, + { + "epoch": 1.34, + "learning_rate": 2.7687089904100655e-05, + "loss": 1.108, + "step": 362500 + }, + { + "epoch": 1.34, + "learning_rate": 2.7656313476382172e-05, + "loss": 1.0968, + "step": 363000 + }, + { + "epoch": 1.34, + "learning_rate": 2.7625537048663692e-05, + "loss": 1.1071, + "step": 363500 + }, + { + "epoch": 1.34, + "learning_rate": 2.759476062094521e-05, + "loss": 1.0957, + "step": 364000 + }, + { + "epoch": 1.35, + "learning_rate": 2.7563984193226727e-05, + "loss": 1.1063, + "step": 364500 + }, + { + "epoch": 1.35, + "learning_rate": 2.7533207765508247e-05, + "loss": 1.1008, + "step": 365000 + }, + { + "epoch": 1.35, + "learning_rate": 2.7502431337789764e-05, + "loss": 1.1065, + "step": 365500 + }, + { + "epoch": 1.35, + "learning_rate": 2.747165491007128e-05, + "loss": 1.1019, + "step": 366000 + }, + { + "epoch": 1.35, + "learning_rate": 2.74408784823528e-05, + "loss": 1.1028, + "step": 366500 + }, + { + "epoch": 1.36, + "learning_rate": 2.741010205463432e-05, + "loss": 1.0995, + "step": 367000 + }, + { + "epoch": 1.36, + "learning_rate": 2.7379325626915836e-05, + "loss": 1.1066, + "step": 367500 + }, + { + "epoch": 1.36, + "learning_rate": 2.7348549199197353e-05, + "loss": 1.0997, + "step": 368000 + }, + { + "epoch": 1.36, + "learning_rate": 2.7317772771478873e-05, + "loss": 1.1093, + "step": 368500 + }, + { + "epoch": 1.36, + "learning_rate": 2.728699634376039e-05, + "loss": 1.1062, + "step": 369000 + }, + { + "epoch": 1.36, + "learning_rate": 2.7256219916041908e-05, + "loss": 1.103, + "step": 369500 + }, + { + "epoch": 1.37, + "learning_rate": 2.7225443488323428e-05, + "loss": 1.115, + "step": 370000 + }, + { + "epoch": 1.37, + "learning_rate": 2.7194667060604945e-05, + "loss": 1.0995, + "step": 370500 + }, + { + "epoch": 1.37, + "learning_rate": 2.7163890632886462e-05, + "loss": 1.0975, + "step": 371000 + }, + { + "epoch": 1.37, + "learning_rate": 2.713311420516798e-05, + "loss": 1.0929, + "step": 371500 + }, + { + "epoch": 1.37, + "learning_rate": 2.71023377774495e-05, + "loss": 1.1008, + "step": 372000 + }, + { + "epoch": 1.38, + "learning_rate": 2.7071561349731017e-05, + "loss": 1.0988, + "step": 372500 + }, + { + "epoch": 1.38, + "learning_rate": 2.7040784922012534e-05, + "loss": 1.0953, + "step": 373000 + }, + { + "epoch": 1.38, + "learning_rate": 2.7010008494294055e-05, + "loss": 1.1023, + "step": 373500 + }, + { + "epoch": 1.38, + "learning_rate": 2.697923206657557e-05, + "loss": 1.1068, + "step": 374000 + }, + { + "epoch": 1.38, + "learning_rate": 2.694845563885709e-05, + "loss": 1.1052, + "step": 374500 + }, + { + "epoch": 1.38, + "learning_rate": 2.691767921113861e-05, + "loss": 1.0968, + "step": 375000 + }, + { + "epoch": 1.39, + "learning_rate": 2.6886902783420126e-05, + "loss": 1.1074, + "step": 375500 + }, + { + "epoch": 1.39, + "learning_rate": 2.6856126355701643e-05, + "loss": 1.0953, + "step": 376000 + }, + { + "epoch": 1.39, + "learning_rate": 2.682534992798316e-05, + "loss": 1.1027, + "step": 376500 + }, + { + "epoch": 1.39, + "learning_rate": 2.679457350026468e-05, + "loss": 1.0974, + "step": 377000 + }, + { + "epoch": 1.39, + "learning_rate": 2.6763797072546198e-05, + "loss": 1.0923, + "step": 377500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6733020644827715e-05, + "loss": 1.0968, + "step": 378000 + }, + { + "epoch": 1.4, + "learning_rate": 2.6702244217109236e-05, + "loss": 1.1017, + "step": 378500 + }, + { + "epoch": 1.4, + "learning_rate": 2.6671467789390753e-05, + "loss": 1.1037, + "step": 379000 + }, + { + "epoch": 1.4, + "learning_rate": 2.664069136167227e-05, + "loss": 1.1003, + "step": 379500 + }, + { + "epoch": 1.4, + "learning_rate": 2.660991493395379e-05, + "loss": 1.102, + "step": 380000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6579138506235307e-05, + "loss": 1.0969, + "step": 380500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6548362078516824e-05, + "loss": 1.1022, + "step": 381000 + }, + { + "epoch": 1.41, + "learning_rate": 2.6517585650798345e-05, + "loss": 1.0969, + "step": 381500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6486809223079862e-05, + "loss": 1.0876, + "step": 382000 + }, + { + "epoch": 1.41, + "learning_rate": 2.645603279536138e-05, + "loss": 1.1026, + "step": 382500 + }, + { + "epoch": 1.41, + "learning_rate": 2.6425256367642896e-05, + "loss": 1.0973, + "step": 383000 + }, + { + "epoch": 1.42, + "learning_rate": 2.6394479939924417e-05, + "loss": 1.1026, + "step": 383500 + }, + { + "epoch": 1.42, + "learning_rate": 2.6363703512205934e-05, + "loss": 1.0965, + "step": 384000 + }, + { + "epoch": 1.42, + "learning_rate": 2.633292708448745e-05, + "loss": 1.0984, + "step": 384500 + }, + { + "epoch": 1.42, + "learning_rate": 2.630215065676897e-05, + "loss": 1.101, + "step": 385000 + }, + { + "epoch": 1.42, + "learning_rate": 2.627137422905049e-05, + "loss": 1.0917, + "step": 385500 + }, + { + "epoch": 1.43, + "learning_rate": 2.6240597801332005e-05, + "loss": 1.0949, + "step": 386000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6209821373613526e-05, + "loss": 1.101, + "step": 386500 + }, + { + "epoch": 1.43, + "learning_rate": 2.6179044945895043e-05, + "loss": 1.0962, + "step": 387000 + }, + { + "epoch": 1.43, + "learning_rate": 2.614826851817656e-05, + "loss": 1.1, + "step": 387500 + }, + { + "epoch": 1.43, + "learning_rate": 2.6117492090458077e-05, + "loss": 1.0923, + "step": 388000 + }, + { + "epoch": 1.43, + "learning_rate": 2.6086715662739598e-05, + "loss": 1.0999, + "step": 388500 + }, + { + "epoch": 1.44, + "learning_rate": 2.6055939235021115e-05, + "loss": 1.0905, + "step": 389000 + }, + { + "epoch": 1.44, + "learning_rate": 2.6025162807302632e-05, + "loss": 1.1041, + "step": 389500 + }, + { + "epoch": 1.44, + "learning_rate": 2.5994386379584152e-05, + "loss": 1.1014, + "step": 390000 + }, + { + "epoch": 1.44, + "learning_rate": 2.596360995186567e-05, + "loss": 1.0937, + "step": 390500 + }, + { + "epoch": 1.44, + "learning_rate": 2.5932833524147187e-05, + "loss": 1.1003, + "step": 391000 + }, + { + "epoch": 1.45, + "learning_rate": 2.5902057096428707e-05, + "loss": 1.1029, + "step": 391500 + }, + { + "epoch": 1.45, + "learning_rate": 2.5871280668710224e-05, + "loss": 1.0963, + "step": 392000 + }, + { + "epoch": 1.45, + "learning_rate": 2.584050424099174e-05, + "loss": 1.0957, + "step": 392500 + }, + { + "epoch": 1.45, + "learning_rate": 2.5809727813273262e-05, + "loss": 1.1007, + "step": 393000 + }, + { + "epoch": 1.45, + "learning_rate": 2.577895138555478e-05, + "loss": 1.1006, + "step": 393500 + }, + { + "epoch": 1.46, + "learning_rate": 2.5748174957836296e-05, + "loss": 1.0938, + "step": 394000 + }, + { + "epoch": 1.46, + "learning_rate": 2.5717398530117813e-05, + "loss": 1.0928, + "step": 394500 + }, + { + "epoch": 1.46, + "learning_rate": 2.5686622102399333e-05, + "loss": 1.0849, + "step": 395000 + }, + { + "epoch": 1.46, + "learning_rate": 2.565584567468085e-05, + "loss": 1.0985, + "step": 395500 + }, + { + "epoch": 1.46, + "learning_rate": 2.5625069246962368e-05, + "loss": 1.0899, + "step": 396000 + }, + { + "epoch": 1.46, + "learning_rate": 2.5594292819243888e-05, + "loss": 1.0957, + "step": 396500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5563516391525405e-05, + "loss": 1.0867, + "step": 397000 + }, + { + "epoch": 1.47, + "learning_rate": 2.5532739963806922e-05, + "loss": 1.1027, + "step": 397500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5501963536088443e-05, + "loss": 1.0899, + "step": 398000 + }, + { + "epoch": 1.47, + "learning_rate": 2.547118710836996e-05, + "loss": 1.0836, + "step": 398500 + }, + { + "epoch": 1.47, + "learning_rate": 2.5440410680651477e-05, + "loss": 1.0939, + "step": 399000 + }, + { + "epoch": 1.48, + "learning_rate": 2.5409634252932994e-05, + "loss": 1.0925, + "step": 399500 + }, + { + "epoch": 1.48, + "learning_rate": 2.5378857825214515e-05, + "loss": 1.1019, + "step": 400000 + }, + { + "epoch": 1.48, + "learning_rate": 2.534808139749603e-05, + "loss": 1.1007, + "step": 400500 + }, + { + "epoch": 1.48, + "learning_rate": 2.531730496977755e-05, + "loss": 1.0905, + "step": 401000 + }, + { + "epoch": 1.48, + "learning_rate": 2.528652854205907e-05, + "loss": 1.0936, + "step": 401500 + }, + { + "epoch": 1.48, + "learning_rate": 2.5255752114340586e-05, + "loss": 1.0953, + "step": 402000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5224975686622103e-05, + "loss": 1.0967, + "step": 402500 + }, + { + "epoch": 1.49, + "learning_rate": 2.5194199258903624e-05, + "loss": 1.0934, + "step": 403000 + }, + { + "epoch": 1.49, + "learning_rate": 2.516342283118514e-05, + "loss": 1.1003, + "step": 403500 + }, + { + "epoch": 1.49, + "learning_rate": 2.5132646403466658e-05, + "loss": 1.0893, + "step": 404000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5101869975748175e-05, + "loss": 1.0878, + "step": 404500 + }, + { + "epoch": 1.5, + "learning_rate": 2.5071093548029696e-05, + "loss": 1.0921, + "step": 405000 + }, + { + "epoch": 1.5, + "learning_rate": 2.5040317120311213e-05, + "loss": 1.0976, + "step": 405500 + }, + { + "epoch": 1.5, + "learning_rate": 2.500954069259273e-05, + "loss": 1.0978, + "step": 406000 + }, + { + "epoch": 1.5, + "learning_rate": 2.497876426487425e-05, + "loss": 1.0948, + "step": 406500 + }, + { + "epoch": 1.5, + "learning_rate": 2.4947987837155767e-05, + "loss": 1.0894, + "step": 407000 + }, + { + "epoch": 1.5, + "learning_rate": 2.4917211409437284e-05, + "loss": 1.0933, + "step": 407500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4886434981718805e-05, + "loss": 1.0835, + "step": 408000 + }, + { + "epoch": 1.51, + "learning_rate": 2.4855658554000322e-05, + "loss": 1.0971, + "step": 408500 + }, + { + "epoch": 1.51, + "learning_rate": 2.482488212628184e-05, + "loss": 1.0921, + "step": 409000 + }, + { + "epoch": 1.51, + "learning_rate": 2.479410569856336e-05, + "loss": 1.085, + "step": 409500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4763329270844877e-05, + "loss": 1.0838, + "step": 410000 + }, + { + "epoch": 1.52, + "learning_rate": 2.4732552843126394e-05, + "loss": 1.0863, + "step": 410500 + }, + { + "epoch": 1.52, + "learning_rate": 2.470177641540791e-05, + "loss": 1.0882, + "step": 411000 + }, + { + "epoch": 1.52, + "learning_rate": 2.467099998768943e-05, + "loss": 1.0895, + "step": 411500 + }, + { + "epoch": 1.52, + "learning_rate": 2.464022355997095e-05, + "loss": 1.0949, + "step": 412000 + }, + { + "epoch": 1.52, + "learning_rate": 2.4609447132252465e-05, + "loss": 1.0864, + "step": 412500 + }, + { + "epoch": 1.53, + "learning_rate": 2.4578670704533986e-05, + "loss": 1.0848, + "step": 413000 + }, + { + "epoch": 1.53, + "learning_rate": 2.4547894276815503e-05, + "loss": 1.0895, + "step": 413500 + }, + { + "epoch": 1.53, + "learning_rate": 2.451711784909702e-05, + "loss": 1.0822, + "step": 414000 + }, + { + "epoch": 1.53, + "learning_rate": 2.448634142137854e-05, + "loss": 1.0859, + "step": 414500 + }, + { + "epoch": 1.53, + "learning_rate": 2.4455564993660058e-05, + "loss": 1.0809, + "step": 415000 + }, + { + "epoch": 1.53, + "learning_rate": 2.4424788565941575e-05, + "loss": 1.0877, + "step": 415500 + }, + { + "epoch": 1.54, + "learning_rate": 2.4394012138223092e-05, + "loss": 1.0822, + "step": 416000 + }, + { + "epoch": 1.54, + "learning_rate": 2.4363235710504612e-05, + "loss": 1.0981, + "step": 416500 + }, + { + "epoch": 1.54, + "learning_rate": 2.433245928278613e-05, + "loss": 1.0834, + "step": 417000 + }, + { + "epoch": 1.54, + "learning_rate": 2.4301682855067647e-05, + "loss": 1.0893, + "step": 417500 + }, + { + "epoch": 1.54, + "learning_rate": 2.4270906427349167e-05, + "loss": 1.0872, + "step": 418000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4240129999630684e-05, + "loss": 1.0805, + "step": 418500 + }, + { + "epoch": 1.55, + "learning_rate": 2.42093535719122e-05, + "loss": 1.0848, + "step": 419000 + }, + { + "epoch": 1.55, + "learning_rate": 2.417857714419372e-05, + "loss": 1.0869, + "step": 419500 + }, + { + "epoch": 1.55, + "learning_rate": 2.414780071647524e-05, + "loss": 1.0846, + "step": 420000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4117024288756756e-05, + "loss": 1.0872, + "step": 420500 + }, + { + "epoch": 1.55, + "learning_rate": 2.4086247861038276e-05, + "loss": 1.0835, + "step": 421000 + }, + { + "epoch": 1.56, + "learning_rate": 2.4055471433319793e-05, + "loss": 1.0825, + "step": 421500 + }, + { + "epoch": 1.56, + "learning_rate": 2.402469500560131e-05, + "loss": 1.0898, + "step": 422000 + }, + { + "epoch": 1.56, + "learning_rate": 2.3993918577882828e-05, + "loss": 1.0779, + "step": 422500 + }, + { + "epoch": 1.56, + "learning_rate": 2.3963142150164348e-05, + "loss": 1.0823, + "step": 423000 + }, + { + "epoch": 1.56, + "learning_rate": 2.3932365722445865e-05, + "loss": 1.084, + "step": 423500 + }, + { + "epoch": 1.57, + "learning_rate": 2.3901589294727382e-05, + "loss": 1.0851, + "step": 424000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3870812867008903e-05, + "loss": 1.086, + "step": 424500 + }, + { + "epoch": 1.57, + "learning_rate": 2.384003643929042e-05, + "loss": 1.0877, + "step": 425000 + }, + { + "epoch": 1.57, + "learning_rate": 2.3809260011571937e-05, + "loss": 1.0923, + "step": 425500 + }, + { + "epoch": 1.57, + "learning_rate": 2.3778483583853457e-05, + "loss": 1.0843, + "step": 426000 + }, + { + "epoch": 1.58, + "learning_rate": 2.3747707156134974e-05, + "loss": 1.0748, + "step": 426500 + }, + { + "epoch": 1.58, + "learning_rate": 2.371693072841649e-05, + "loss": 1.0833, + "step": 427000 + }, + { + "epoch": 1.58, + "learning_rate": 2.368615430069801e-05, + "loss": 1.0839, + "step": 427500 + }, + { + "epoch": 1.58, + "learning_rate": 2.365537787297953e-05, + "loss": 1.0942, + "step": 428000 + }, + { + "epoch": 1.58, + "learning_rate": 2.3624601445261046e-05, + "loss": 1.0913, + "step": 428500 + }, + { + "epoch": 1.58, + "learning_rate": 2.3593825017542563e-05, + "loss": 1.093, + "step": 429000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3563048589824084e-05, + "loss": 1.0757, + "step": 429500 + }, + { + "epoch": 1.59, + "learning_rate": 2.35322721621056e-05, + "loss": 1.0821, + "step": 430000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3501495734387118e-05, + "loss": 1.0827, + "step": 430500 + }, + { + "epoch": 1.59, + "learning_rate": 2.347071930666864e-05, + "loss": 1.0845, + "step": 431000 + }, + { + "epoch": 1.59, + "learning_rate": 2.3439942878950156e-05, + "loss": 1.0876, + "step": 431500 + }, + { + "epoch": 1.6, + "learning_rate": 2.3409166451231673e-05, + "loss": 1.0776, + "step": 432000 + }, + { + "epoch": 1.6, + "learning_rate": 2.337839002351319e-05, + "loss": 1.0837, + "step": 432500 + }, + { + "epoch": 1.6, + "learning_rate": 2.334761359579471e-05, + "loss": 1.075, + "step": 433000 + }, + { + "epoch": 1.6, + "learning_rate": 2.3316837168076227e-05, + "loss": 1.0931, + "step": 433500 + }, + { + "epoch": 1.6, + "learning_rate": 2.3286060740357744e-05, + "loss": 1.0905, + "step": 434000 + }, + { + "epoch": 1.6, + "learning_rate": 2.3255284312639265e-05, + "loss": 1.0864, + "step": 434500 + }, + { + "epoch": 1.61, + "learning_rate": 2.3224507884920782e-05, + "loss": 1.0845, + "step": 435000 + }, + { + "epoch": 1.61, + "learning_rate": 2.31937314572023e-05, + "loss": 1.0933, + "step": 435500 + }, + { + "epoch": 1.61, + "learning_rate": 2.316295502948382e-05, + "loss": 1.076, + "step": 436000 + }, + { + "epoch": 1.61, + "learning_rate": 2.3132178601765337e-05, + "loss": 1.0816, + "step": 436500 + }, + { + "epoch": 1.61, + "learning_rate": 2.3101402174046854e-05, + "loss": 1.0791, + "step": 437000 + }, + { + "epoch": 1.62, + "learning_rate": 2.3070625746328374e-05, + "loss": 1.0818, + "step": 437500 + }, + { + "epoch": 1.62, + "learning_rate": 2.303984931860989e-05, + "loss": 1.0844, + "step": 438000 + }, + { + "epoch": 1.62, + "learning_rate": 2.300907289089141e-05, + "loss": 1.0735, + "step": 438500 + }, + { + "epoch": 1.62, + "learning_rate": 2.2978296463172925e-05, + "loss": 1.0766, + "step": 439000 + }, + { + "epoch": 1.62, + "learning_rate": 2.2947520035454446e-05, + "loss": 1.0827, + "step": 439500 + }, + { + "epoch": 1.62, + "learning_rate": 2.2916743607735963e-05, + "loss": 1.0855, + "step": 440000 + }, + { + "epoch": 1.63, + "learning_rate": 2.288596718001748e-05, + "loss": 1.074, + "step": 440500 + }, + { + "epoch": 1.63, + "learning_rate": 2.2855190752299e-05, + "loss": 1.0849, + "step": 441000 + }, + { + "epoch": 1.63, + "learning_rate": 2.2824414324580518e-05, + "loss": 1.0771, + "step": 441500 + }, + { + "epoch": 1.63, + "learning_rate": 2.2793637896862035e-05, + "loss": 1.0858, + "step": 442000 + }, + { + "epoch": 1.63, + "learning_rate": 2.2762861469143555e-05, + "loss": 1.0815, + "step": 442500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2732085041425072e-05, + "loss": 1.0783, + "step": 443000 + }, + { + "epoch": 1.64, + "learning_rate": 2.270130861370659e-05, + "loss": 1.0707, + "step": 443500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2670532185988107e-05, + "loss": 1.0763, + "step": 444000 + }, + { + "epoch": 1.64, + "learning_rate": 2.2639755758269627e-05, + "loss": 1.0802, + "step": 444500 + }, + { + "epoch": 1.64, + "learning_rate": 2.2608979330551144e-05, + "loss": 1.0757, + "step": 445000 + }, + { + "epoch": 1.65, + "learning_rate": 2.257820290283266e-05, + "loss": 1.0804, + "step": 445500 + }, + { + "epoch": 1.65, + "learning_rate": 2.254742647511418e-05, + "loss": 1.0786, + "step": 446000 + }, + { + "epoch": 1.65, + "learning_rate": 2.25166500473957e-05, + "loss": 1.0785, + "step": 446500 + }, + { + "epoch": 1.65, + "learning_rate": 2.2485873619677216e-05, + "loss": 1.0696, + "step": 447000 + }, + { + "epoch": 1.65, + "learning_rate": 2.2455097191958736e-05, + "loss": 1.072, + "step": 447500 + }, + { + "epoch": 1.65, + "learning_rate": 2.2424320764240253e-05, + "loss": 1.0787, + "step": 448000 + }, + { + "epoch": 1.66, + "learning_rate": 2.239354433652177e-05, + "loss": 1.0763, + "step": 448500 + }, + { + "epoch": 1.66, + "learning_rate": 2.236276790880329e-05, + "loss": 1.0783, + "step": 449000 + }, + { + "epoch": 1.66, + "learning_rate": 2.2331991481084808e-05, + "loss": 1.0721, + "step": 449500 + }, + { + "epoch": 1.66, + "learning_rate": 2.2301215053366325e-05, + "loss": 1.0833, + "step": 450000 + }, + { + "epoch": 1.66, + "learning_rate": 2.2270438625647842e-05, + "loss": 1.0705, + "step": 450500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2239662197929363e-05, + "loss": 1.0791, + "step": 451000 + }, + { + "epoch": 1.67, + "learning_rate": 2.220888577021088e-05, + "loss": 1.0793, + "step": 451500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2178109342492397e-05, + "loss": 1.0814, + "step": 452000 + }, + { + "epoch": 1.67, + "learning_rate": 2.2147332914773917e-05, + "loss": 1.0752, + "step": 452500 + }, + { + "epoch": 1.67, + "learning_rate": 2.2116556487055434e-05, + "loss": 1.0876, + "step": 453000 + }, + { + "epoch": 1.67, + "learning_rate": 2.208578005933695e-05, + "loss": 1.072, + "step": 453500 + }, + { + "epoch": 1.68, + "learning_rate": 2.2055003631618472e-05, + "loss": 1.0739, + "step": 454000 + }, + { + "epoch": 1.68, + "learning_rate": 2.202422720389999e-05, + "loss": 1.0814, + "step": 454500 + }, + { + "epoch": 1.68, + "learning_rate": 2.1993450776181506e-05, + "loss": 1.0705, + "step": 455000 + }, + { + "epoch": 1.68, + "learning_rate": 2.1962674348463027e-05, + "loss": 1.0753, + "step": 455500 + }, + { + "epoch": 1.68, + "learning_rate": 2.1931897920744547e-05, + "loss": 1.0834, + "step": 456000 + }, + { + "epoch": 1.69, + "learning_rate": 2.1901121493026064e-05, + "loss": 1.0849, + "step": 456500 + }, + { + "epoch": 1.69, + "learning_rate": 2.187034506530758e-05, + "loss": 1.0623, + "step": 457000 + }, + { + "epoch": 1.69, + "learning_rate": 2.18395686375891e-05, + "loss": 1.0664, + "step": 457500 + }, + { + "epoch": 1.69, + "learning_rate": 2.180879220987062e-05, + "loss": 1.0791, + "step": 458000 + }, + { + "epoch": 1.69, + "learning_rate": 2.1778015782152136e-05, + "loss": 1.0707, + "step": 458500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1747239354433653e-05, + "loss": 1.0701, + "step": 459000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1716462926715174e-05, + "loss": 1.0753, + "step": 459500 + }, + { + "epoch": 1.7, + "learning_rate": 2.168568649899669e-05, + "loss": 1.0739, + "step": 460000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1654910071278208e-05, + "loss": 1.0732, + "step": 460500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1624133643559728e-05, + "loss": 1.068, + "step": 461000 + }, + { + "epoch": 1.7, + "learning_rate": 2.1593357215841245e-05, + "loss": 1.0816, + "step": 461500 + }, + { + "epoch": 1.71, + "learning_rate": 2.1562580788122762e-05, + "loss": 1.0682, + "step": 462000 + }, + { + "epoch": 1.71, + "learning_rate": 2.1531804360404283e-05, + "loss": 1.0639, + "step": 462500 + }, + { + "epoch": 1.71, + "learning_rate": 2.15010279326858e-05, + "loss": 1.0723, + "step": 463000 + }, + { + "epoch": 1.71, + "learning_rate": 2.1470251504967317e-05, + "loss": 1.074, + "step": 463500 + }, + { + "epoch": 1.71, + "learning_rate": 2.1439475077248834e-05, + "loss": 1.0734, + "step": 464000 + }, + { + "epoch": 1.72, + "learning_rate": 2.1408698649530355e-05, + "loss": 1.0698, + "step": 464500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1377922221811872e-05, + "loss": 1.0669, + "step": 465000 + }, + { + "epoch": 1.72, + "learning_rate": 2.134714579409339e-05, + "loss": 1.0744, + "step": 465500 + }, + { + "epoch": 1.72, + "learning_rate": 2.131636936637491e-05, + "loss": 1.0741, + "step": 466000 + }, + { + "epoch": 1.72, + "learning_rate": 2.1285592938656426e-05, + "loss": 1.068, + "step": 466500 + }, + { + "epoch": 1.72, + "learning_rate": 2.1254816510937943e-05, + "loss": 1.0707, + "step": 467000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1224040083219464e-05, + "loss": 1.0806, + "step": 467500 + }, + { + "epoch": 1.73, + "learning_rate": 2.119326365550098e-05, + "loss": 1.0701, + "step": 468000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1162487227782498e-05, + "loss": 1.08, + "step": 468500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1131710800064015e-05, + "loss": 1.0767, + "step": 469000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1100934372345536e-05, + "loss": 1.0618, + "step": 469500 + }, + { + "epoch": 1.74, + "learning_rate": 2.1070157944627053e-05, + "loss": 1.0704, + "step": 470000 + }, + { + "epoch": 1.74, + "learning_rate": 2.103938151690857e-05, + "loss": 1.0647, + "step": 470500 + }, + { + "epoch": 1.74, + "learning_rate": 2.100860508919009e-05, + "loss": 1.0657, + "step": 471000 + }, + { + "epoch": 1.74, + "learning_rate": 2.0977828661471607e-05, + "loss": 1.064, + "step": 471500 + }, + { + "epoch": 1.74, + "learning_rate": 2.0947052233753125e-05, + "loss": 1.0709, + "step": 472000 + }, + { + "epoch": 1.75, + "learning_rate": 2.0916275806034645e-05, + "loss": 1.0733, + "step": 472500 + }, + { + "epoch": 1.75, + "learning_rate": 2.0885499378316162e-05, + "loss": 1.0783, + "step": 473000 + }, + { + "epoch": 1.75, + "learning_rate": 2.085472295059768e-05, + "loss": 1.0638, + "step": 473500 + }, + { + "epoch": 1.75, + "learning_rate": 2.08239465228792e-05, + "loss": 1.0615, + "step": 474000 + }, + { + "epoch": 1.75, + "learning_rate": 2.0793170095160717e-05, + "loss": 1.0698, + "step": 474500 + }, + { + "epoch": 1.75, + "learning_rate": 2.0762393667442234e-05, + "loss": 1.0687, + "step": 475000 + }, + { + "epoch": 1.76, + "learning_rate": 2.073161723972375e-05, + "loss": 1.0697, + "step": 475500 + }, + { + "epoch": 1.76, + "learning_rate": 2.070084081200527e-05, + "loss": 1.0668, + "step": 476000 + }, + { + "epoch": 1.76, + "learning_rate": 2.067006438428679e-05, + "loss": 1.0686, + "step": 476500 + }, + { + "epoch": 1.76, + "learning_rate": 2.0639287956568306e-05, + "loss": 1.0624, + "step": 477000 + }, + { + "epoch": 1.76, + "learning_rate": 2.0608511528849826e-05, + "loss": 1.0668, + "step": 477500 + }, + { + "epoch": 1.77, + "learning_rate": 2.0577735101131343e-05, + "loss": 1.0646, + "step": 478000 + }, + { + "epoch": 1.77, + "learning_rate": 2.054695867341286e-05, + "loss": 1.0695, + "step": 478500 + }, + { + "epoch": 1.77, + "learning_rate": 2.051618224569438e-05, + "loss": 1.0713, + "step": 479000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0485405817975898e-05, + "loss": 1.0712, + "step": 479500 + }, + { + "epoch": 1.77, + "learning_rate": 2.0454629390257415e-05, + "loss": 1.065, + "step": 480000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0423852962538932e-05, + "loss": 1.0627, + "step": 480500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0393076534820452e-05, + "loss": 1.0649, + "step": 481000 + }, + { + "epoch": 1.78, + "learning_rate": 2.036230010710197e-05, + "loss": 1.0738, + "step": 481500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0331523679383487e-05, + "loss": 1.0716, + "step": 482000 + }, + { + "epoch": 1.78, + "learning_rate": 2.0300747251665007e-05, + "loss": 1.0641, + "step": 482500 + }, + { + "epoch": 1.78, + "learning_rate": 2.0269970823946524e-05, + "loss": 1.0614, + "step": 483000 + }, + { + "epoch": 1.79, + "learning_rate": 2.023919439622804e-05, + "loss": 1.0647, + "step": 483500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0208417968509562e-05, + "loss": 1.0593, + "step": 484000 + }, + { + "epoch": 1.79, + "learning_rate": 2.017764154079108e-05, + "loss": 1.0743, + "step": 484500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0146865113072596e-05, + "loss": 1.0641, + "step": 485000 + }, + { + "epoch": 1.79, + "learning_rate": 2.0116088685354113e-05, + "loss": 1.0732, + "step": 485500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0085312257635634e-05, + "loss": 1.0735, + "step": 486000 + }, + { + "epoch": 1.8, + "learning_rate": 2.005453582991715e-05, + "loss": 1.0573, + "step": 486500 + }, + { + "epoch": 1.8, + "learning_rate": 2.0023759402198668e-05, + "loss": 1.0641, + "step": 487000 + }, + { + "epoch": 1.8, + "learning_rate": 1.9992982974480188e-05, + "loss": 1.0585, + "step": 487500 + }, + { + "epoch": 1.8, + "learning_rate": 1.9962206546761705e-05, + "loss": 1.068, + "step": 488000 + }, + { + "epoch": 1.8, + "learning_rate": 1.9931430119043222e-05, + "loss": 1.0679, + "step": 488500 + }, + { + "epoch": 1.81, + "learning_rate": 1.9900653691324743e-05, + "loss": 1.0714, + "step": 489000 + }, + { + "epoch": 1.81, + "learning_rate": 1.986987726360626e-05, + "loss": 1.0592, + "step": 489500 + }, + { + "epoch": 1.81, + "learning_rate": 1.9839100835887777e-05, + "loss": 1.0718, + "step": 490000 + }, + { + "epoch": 1.81, + "learning_rate": 1.9808324408169297e-05, + "loss": 1.0697, + "step": 490500 + }, + { + "epoch": 1.81, + "learning_rate": 1.9777547980450815e-05, + "loss": 1.0602, + "step": 491000 + }, + { + "epoch": 1.82, + "learning_rate": 1.974677155273233e-05, + "loss": 1.0675, + "step": 491500 + }, + { + "epoch": 1.82, + "learning_rate": 1.971599512501385e-05, + "loss": 1.0659, + "step": 492000 + }, + { + "epoch": 1.82, + "learning_rate": 1.968521869729537e-05, + "loss": 1.0687, + "step": 492500 + }, + { + "epoch": 1.82, + "learning_rate": 1.9654442269576886e-05, + "loss": 1.0665, + "step": 493000 + }, + { + "epoch": 1.82, + "learning_rate": 1.9623665841858403e-05, + "loss": 1.0683, + "step": 493500 + }, + { + "epoch": 1.82, + "learning_rate": 1.9592889414139924e-05, + "loss": 1.0626, + "step": 494000 + }, + { + "epoch": 1.83, + "learning_rate": 1.956211298642144e-05, + "loss": 1.0596, + "step": 494500 + }, + { + "epoch": 1.83, + "learning_rate": 1.9531336558702958e-05, + "loss": 1.065, + "step": 495000 + }, + { + "epoch": 1.83, + "learning_rate": 1.950056013098448e-05, + "loss": 1.0709, + "step": 495500 + }, + { + "epoch": 1.83, + "learning_rate": 1.9469783703265996e-05, + "loss": 1.0693, + "step": 496000 + }, + { + "epoch": 1.83, + "learning_rate": 1.9439007275547513e-05, + "loss": 1.0558, + "step": 496500 + }, + { + "epoch": 1.84, + "learning_rate": 1.940823084782903e-05, + "loss": 1.0681, + "step": 497000 + }, + { + "epoch": 1.84, + "learning_rate": 1.937745442011055e-05, + "loss": 1.0686, + "step": 497500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9346677992392067e-05, + "loss": 1.0641, + "step": 498000 + }, + { + "epoch": 1.84, + "learning_rate": 1.9315901564673584e-05, + "loss": 1.0601, + "step": 498500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9285125136955105e-05, + "loss": 1.0638, + "step": 499000 + }, + { + "epoch": 1.84, + "learning_rate": 1.9254348709236622e-05, + "loss": 1.0572, + "step": 499500 + }, + { + "epoch": 1.85, + "learning_rate": 1.922357228151814e-05, + "loss": 1.063, + "step": 500000 + }, + { + "epoch": 1.85, + "learning_rate": 1.919279585379966e-05, + "loss": 1.0585, + "step": 500500 + }, + { + "epoch": 1.85, + "learning_rate": 1.9162019426081177e-05, + "loss": 1.0664, + "step": 501000 + }, + { + "epoch": 1.85, + "learning_rate": 1.9131242998362694e-05, + "loss": 1.0591, + "step": 501500 + }, + { + "epoch": 1.85, + "learning_rate": 1.9100466570644214e-05, + "loss": 1.0509, + "step": 502000 + }, + { + "epoch": 1.86, + "learning_rate": 1.906969014292573e-05, + "loss": 1.0532, + "step": 502500 + }, + { + "epoch": 1.86, + "learning_rate": 1.903891371520725e-05, + "loss": 1.0721, + "step": 503000 + }, + { + "epoch": 1.86, + "learning_rate": 1.9008137287488766e-05, + "loss": 1.0554, + "step": 503500 + }, + { + "epoch": 1.86, + "learning_rate": 1.8977360859770286e-05, + "loss": 1.0689, + "step": 504000 + }, + { + "epoch": 1.86, + "learning_rate": 1.8946584432051803e-05, + "loss": 1.0692, + "step": 504500 + }, + { + "epoch": 1.87, + "learning_rate": 1.891580800433332e-05, + "loss": 1.0658, + "step": 505000 + }, + { + "epoch": 1.87, + "learning_rate": 1.888503157661484e-05, + "loss": 1.0602, + "step": 505500 + }, + { + "epoch": 1.87, + "learning_rate": 1.8854255148896358e-05, + "loss": 1.0624, + "step": 506000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8823478721177875e-05, + "loss": 1.0545, + "step": 506500 + }, + { + "epoch": 1.87, + "learning_rate": 1.8792702293459395e-05, + "loss": 1.0551, + "step": 507000 + }, + { + "epoch": 1.87, + "learning_rate": 1.8761925865740912e-05, + "loss": 1.0572, + "step": 507500 + }, + { + "epoch": 1.88, + "learning_rate": 1.873114943802243e-05, + "loss": 1.0695, + "step": 508000 + }, + { + "epoch": 1.88, + "learning_rate": 1.8700373010303947e-05, + "loss": 1.0517, + "step": 508500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8669596582585467e-05, + "loss": 1.0665, + "step": 509000 + }, + { + "epoch": 1.88, + "learning_rate": 1.8638820154866984e-05, + "loss": 1.0576, + "step": 509500 + }, + { + "epoch": 1.88, + "learning_rate": 1.86080437271485e-05, + "loss": 1.0605, + "step": 510000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8577267299430022e-05, + "loss": 1.0606, + "step": 510500 + }, + { + "epoch": 1.89, + "learning_rate": 1.854649087171154e-05, + "loss": 1.0629, + "step": 511000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8515714443993056e-05, + "loss": 1.06, + "step": 511500 + }, + { + "epoch": 1.89, + "learning_rate": 1.8484938016274576e-05, + "loss": 1.0572, + "step": 512000 + }, + { + "epoch": 1.89, + "learning_rate": 1.8454161588556093e-05, + "loss": 1.059, + "step": 512500 + }, + { + "epoch": 1.89, + "learning_rate": 1.842338516083761e-05, + "loss": 1.0534, + "step": 513000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8392608733119128e-05, + "loss": 1.0593, + "step": 513500 + }, + { + "epoch": 1.9, + "learning_rate": 1.8361832305400648e-05, + "loss": 1.065, + "step": 514000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8331055877682165e-05, + "loss": 1.0593, + "step": 514500 + }, + { + "epoch": 1.9, + "learning_rate": 1.8300279449963682e-05, + "loss": 1.0554, + "step": 515000 + }, + { + "epoch": 1.9, + "learning_rate": 1.8269503022245203e-05, + "loss": 1.0605, + "step": 515500 + }, + { + "epoch": 1.91, + "learning_rate": 1.823872659452672e-05, + "loss": 1.0586, + "step": 516000 + }, + { + "epoch": 1.91, + "learning_rate": 1.8207950166808237e-05, + "loss": 1.0597, + "step": 516500 + }, + { + "epoch": 1.91, + "learning_rate": 1.8177173739089757e-05, + "loss": 1.0561, + "step": 517000 + }, + { + "epoch": 1.91, + "learning_rate": 1.8146397311371275e-05, + "loss": 1.0484, + "step": 517500 + }, + { + "epoch": 1.91, + "learning_rate": 1.811562088365279e-05, + "loss": 1.0564, + "step": 518000 + }, + { + "epoch": 1.91, + "learning_rate": 1.8084844455934312e-05, + "loss": 1.0528, + "step": 518500 + }, + { + "epoch": 1.92, + "learning_rate": 1.805406802821583e-05, + "loss": 1.0546, + "step": 519000 + }, + { + "epoch": 1.92, + "learning_rate": 1.8023291600497346e-05, + "loss": 1.0573, + "step": 519500 + }, + { + "epoch": 1.92, + "learning_rate": 1.7992515172778863e-05, + "loss": 1.0481, + "step": 520000 + }, + { + "epoch": 1.92, + "learning_rate": 1.7961738745060384e-05, + "loss": 1.0636, + "step": 520500 + }, + { + "epoch": 1.92, + "learning_rate": 1.79309623173419e-05, + "loss": 1.0585, + "step": 521000 + }, + { + "epoch": 1.93, + "learning_rate": 1.7900185889623418e-05, + "loss": 1.0595, + "step": 521500 + }, + { + "epoch": 1.93, + "learning_rate": 1.786940946190494e-05, + "loss": 1.0621, + "step": 522000 + }, + { + "epoch": 1.93, + "learning_rate": 1.7838633034186456e-05, + "loss": 1.0544, + "step": 522500 + }, + { + "epoch": 1.93, + "learning_rate": 1.7807856606467973e-05, + "loss": 1.0543, + "step": 523000 + }, + { + "epoch": 1.93, + "learning_rate": 1.7777080178749493e-05, + "loss": 1.0646, + "step": 523500 + }, + { + "epoch": 1.94, + "learning_rate": 1.774630375103101e-05, + "loss": 1.0572, + "step": 524000 + }, + { + "epoch": 1.94, + "learning_rate": 1.7715527323312527e-05, + "loss": 1.0555, + "step": 524500 + }, + { + "epoch": 1.94, + "learning_rate": 1.7684750895594044e-05, + "loss": 1.06, + "step": 525000 + }, + { + "epoch": 1.94, + "learning_rate": 1.765397446787557e-05, + "loss": 1.0527, + "step": 525500 + }, + { + "epoch": 1.94, + "learning_rate": 1.7623198040157085e-05, + "loss": 1.0552, + "step": 526000 + }, + { + "epoch": 1.94, + "learning_rate": 1.7592421612438603e-05, + "loss": 1.0586, + "step": 526500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7561645184720123e-05, + "loss": 1.0532, + "step": 527000 + }, + { + "epoch": 1.95, + "learning_rate": 1.753086875700164e-05, + "loss": 1.0546, + "step": 527500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7500092329283157e-05, + "loss": 1.0597, + "step": 528000 + }, + { + "epoch": 1.95, + "learning_rate": 1.7469315901564674e-05, + "loss": 1.0543, + "step": 528500 + }, + { + "epoch": 1.95, + "learning_rate": 1.7438539473846195e-05, + "loss": 1.0525, + "step": 529000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7407763046127712e-05, + "loss": 1.0597, + "step": 529500 + }, + { + "epoch": 1.96, + "learning_rate": 1.737698661840923e-05, + "loss": 1.0478, + "step": 530000 + }, + { + "epoch": 1.96, + "learning_rate": 1.734621019069075e-05, + "loss": 1.0532, + "step": 530500 + }, + { + "epoch": 1.96, + "learning_rate": 1.7315433762972266e-05, + "loss": 1.0545, + "step": 531000 + }, + { + "epoch": 1.96, + "learning_rate": 1.7284657335253784e-05, + "loss": 1.0438, + "step": 531500 + }, + { + "epoch": 1.96, + "learning_rate": 1.7253880907535304e-05, + "loss": 1.0544, + "step": 532000 + }, + { + "epoch": 1.97, + "learning_rate": 1.722310447981682e-05, + "loss": 1.0539, + "step": 532500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7192328052098338e-05, + "loss": 1.0544, + "step": 533000 + }, + { + "epoch": 1.97, + "learning_rate": 1.7161551624379855e-05, + "loss": 1.0535, + "step": 533500 + }, + { + "epoch": 1.97, + "learning_rate": 1.7130775196661376e-05, + "loss": 1.0501, + "step": 534000 + }, + { + "epoch": 1.97, + "learning_rate": 1.7099998768942893e-05, + "loss": 1.0558, + "step": 534500 + }, + { + "epoch": 1.98, + "learning_rate": 1.706922234122441e-05, + "loss": 1.0466, + "step": 535000 + }, + { + "epoch": 1.98, + "learning_rate": 1.703844591350593e-05, + "loss": 1.0561, + "step": 535500 + }, + { + "epoch": 1.98, + "learning_rate": 1.7007669485787448e-05, + "loss": 1.0504, + "step": 536000 + }, + { + "epoch": 1.98, + "learning_rate": 1.6976893058068965e-05, + "loss": 1.0419, + "step": 536500 + }, + { + "epoch": 1.98, + "learning_rate": 1.6946116630350485e-05, + "loss": 1.0501, + "step": 537000 + }, + { + "epoch": 1.99, + "learning_rate": 1.6915340202632002e-05, + "loss": 1.0584, + "step": 537500 + }, + { + "epoch": 1.99, + "learning_rate": 1.688456377491352e-05, + "loss": 1.0499, + "step": 538000 + }, + { + "epoch": 1.99, + "learning_rate": 1.6853787347195036e-05, + "loss": 1.0525, + "step": 538500 + }, + { + "epoch": 1.99, + "learning_rate": 1.6823010919476557e-05, + "loss": 1.0508, + "step": 539000 + }, + { + "epoch": 1.99, + "learning_rate": 1.6792234491758074e-05, + "loss": 1.0516, + "step": 539500 + }, + { + "epoch": 1.99, + "learning_rate": 1.676145806403959e-05, + "loss": 1.0469, + "step": 540000 + }, + { + "epoch": 2.0, + "learning_rate": 1.673068163632111e-05, + "loss": 1.0507, + "step": 540500 + }, + { + "epoch": 2.0, + "learning_rate": 1.669990520860263e-05, + "loss": 1.053, + "step": 541000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6669128780884146e-05, + "loss": 1.0577, + "step": 541500 + }, + { + "epoch": 2.0, + "learning_rate": 1.6638352353165666e-05, + "loss": 1.0475, + "step": 542000 + }, + { + "epoch": 2.0, + "learning_rate": 1.6607575925447183e-05, + "loss": 1.0467, + "step": 542500 + }, + { + "epoch": 2.01, + "learning_rate": 1.65767994977287e-05, + "loss": 1.0514, + "step": 543000 + }, + { + "epoch": 2.01, + "learning_rate": 1.654602307001022e-05, + "loss": 1.0497, + "step": 543500 + }, + { + "epoch": 2.01, + "learning_rate": 1.6515246642291738e-05, + "loss": 1.0483, + "step": 544000 + }, + { + "epoch": 2.01, + "learning_rate": 1.6484470214573255e-05, + "loss": 1.0512, + "step": 544500 + }, + { + "epoch": 2.01, + "learning_rate": 1.6453693786854772e-05, + "loss": 1.0497, + "step": 545000 + }, + { + "epoch": 2.01, + "learning_rate": 1.6422917359136293e-05, + "loss": 1.0509, + "step": 545500 + }, + { + "epoch": 2.02, + "learning_rate": 1.639214093141781e-05, + "loss": 1.0495, + "step": 546000 + }, + { + "epoch": 2.02, + "learning_rate": 1.6361364503699327e-05, + "loss": 1.0474, + "step": 546500 + }, + { + "epoch": 2.02, + "learning_rate": 1.6330588075980847e-05, + "loss": 1.0404, + "step": 547000 + }, + { + "epoch": 2.02, + "learning_rate": 1.6299811648262364e-05, + "loss": 1.0409, + "step": 547500 + }, + { + "epoch": 2.02, + "learning_rate": 1.626903522054388e-05, + "loss": 1.0383, + "step": 548000 + }, + { + "epoch": 2.03, + "learning_rate": 1.6238258792825402e-05, + "loss": 1.0425, + "step": 548500 + }, + { + "epoch": 2.03, + "learning_rate": 1.620748236510692e-05, + "loss": 1.0421, + "step": 549000 + }, + { + "epoch": 2.03, + "learning_rate": 1.6176705937388436e-05, + "loss": 1.0518, + "step": 549500 + }, + { + "epoch": 2.03, + "learning_rate": 1.6145929509669953e-05, + "loss": 1.0409, + "step": 550000 + }, + { + "epoch": 2.03, + "learning_rate": 1.6115153081951474e-05, + "loss": 1.0464, + "step": 550500 + }, + { + "epoch": 2.03, + "learning_rate": 1.608437665423299e-05, + "loss": 1.038, + "step": 551000 + }, + { + "epoch": 2.04, + "learning_rate": 1.6053600226514508e-05, + "loss": 1.0489, + "step": 551500 + }, + { + "epoch": 2.04, + "learning_rate": 1.6022823798796028e-05, + "loss": 1.0533, + "step": 552000 + }, + { + "epoch": 2.04, + "learning_rate": 1.5992047371077545e-05, + "loss": 1.0447, + "step": 552500 + }, + { + "epoch": 2.04, + "learning_rate": 1.5961270943359062e-05, + "loss": 1.0416, + "step": 553000 + }, + { + "epoch": 2.04, + "learning_rate": 1.5930494515640583e-05, + "loss": 1.0463, + "step": 553500 + }, + { + "epoch": 2.05, + "learning_rate": 1.58997180879221e-05, + "loss": 1.0365, + "step": 554000 + }, + { + "epoch": 2.05, + "learning_rate": 1.5868941660203617e-05, + "loss": 1.0371, + "step": 554500 + }, + { + "epoch": 2.05, + "learning_rate": 1.5838165232485138e-05, + "loss": 1.0506, + "step": 555000 + }, + { + "epoch": 2.05, + "learning_rate": 1.5807388804766655e-05, + "loss": 1.0464, + "step": 555500 + }, + { + "epoch": 2.05, + "learning_rate": 1.5776612377048172e-05, + "loss": 1.0452, + "step": 556000 + }, + { + "epoch": 2.06, + "learning_rate": 1.574583594932969e-05, + "loss": 1.0499, + "step": 556500 + }, + { + "epoch": 2.06, + "learning_rate": 1.571505952161121e-05, + "loss": 1.0477, + "step": 557000 + }, + { + "epoch": 2.06, + "learning_rate": 1.5684283093892726e-05, + "loss": 1.0486, + "step": 557500 + }, + { + "epoch": 2.06, + "learning_rate": 1.5653506666174244e-05, + "loss": 1.0367, + "step": 558000 + }, + { + "epoch": 2.06, + "learning_rate": 1.5622730238455764e-05, + "loss": 1.0467, + "step": 558500 + }, + { + "epoch": 2.06, + "learning_rate": 1.559195381073728e-05, + "loss": 1.0455, + "step": 559000 + }, + { + "epoch": 2.07, + "learning_rate": 1.5561177383018798e-05, + "loss": 1.0463, + "step": 559500 + }, + { + "epoch": 2.07, + "learning_rate": 1.553040095530032e-05, + "loss": 1.0446, + "step": 560000 + }, + { + "epoch": 2.07, + "learning_rate": 1.5499624527581836e-05, + "loss": 1.0366, + "step": 560500 + }, + { + "epoch": 2.07, + "learning_rate": 1.5468848099863353e-05, + "loss": 1.0396, + "step": 561000 + }, + { + "epoch": 2.07, + "learning_rate": 1.543807167214487e-05, + "loss": 1.041, + "step": 561500 + }, + { + "epoch": 2.08, + "learning_rate": 1.540729524442639e-05, + "loss": 1.0454, + "step": 562000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5376518816707908e-05, + "loss": 1.0422, + "step": 562500 + }, + { + "epoch": 2.08, + "learning_rate": 1.5345742388989425e-05, + "loss": 1.042, + "step": 563000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5314965961270945e-05, + "loss": 1.0425, + "step": 563500 + }, + { + "epoch": 2.08, + "learning_rate": 1.5284189533552462e-05, + "loss": 1.0461, + "step": 564000 + }, + { + "epoch": 2.08, + "learning_rate": 1.5253413105833981e-05, + "loss": 1.0457, + "step": 564500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5222636678115498e-05, + "loss": 1.0344, + "step": 565000 + }, + { + "epoch": 2.09, + "learning_rate": 1.5191860250397017e-05, + "loss": 1.0468, + "step": 565500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5161083822678534e-05, + "loss": 1.0469, + "step": 566000 + }, + { + "epoch": 2.09, + "learning_rate": 1.5130307394960053e-05, + "loss": 1.0401, + "step": 566500 + }, + { + "epoch": 2.09, + "learning_rate": 1.5099530967241571e-05, + "loss": 1.0404, + "step": 567000 + }, + { + "epoch": 2.1, + "learning_rate": 1.5068754539523089e-05, + "loss": 1.0428, + "step": 567500 + }, + { + "epoch": 2.1, + "learning_rate": 1.5037978111804607e-05, + "loss": 1.0439, + "step": 568000 + }, + { + "epoch": 2.1, + "learning_rate": 1.5007201684086124e-05, + "loss": 1.0397, + "step": 568500 + }, + { + "epoch": 2.1, + "learning_rate": 1.4976425256367643e-05, + "loss": 1.0346, + "step": 569000 + }, + { + "epoch": 2.1, + "learning_rate": 1.4945648828649162e-05, + "loss": 1.0366, + "step": 569500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4914872400930679e-05, + "loss": 1.0415, + "step": 570000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4884095973212198e-05, + "loss": 1.0388, + "step": 570500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4853319545493715e-05, + "loss": 1.0404, + "step": 571000 + }, + { + "epoch": 2.11, + "learning_rate": 1.4822543117775234e-05, + "loss": 1.0359, + "step": 571500 + }, + { + "epoch": 2.11, + "learning_rate": 1.4791766690056753e-05, + "loss": 1.0466, + "step": 572000 + }, + { + "epoch": 2.11, + "learning_rate": 1.476099026233827e-05, + "loss": 1.0335, + "step": 572500 + }, + { + "epoch": 2.12, + "learning_rate": 1.4730213834619788e-05, + "loss": 1.0328, + "step": 573000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4699437406901306e-05, + "loss": 1.0387, + "step": 573500 + }, + { + "epoch": 2.12, + "learning_rate": 1.4668660979182824e-05, + "loss": 1.0415, + "step": 574000 + }, + { + "epoch": 2.12, + "learning_rate": 1.4637884551464343e-05, + "loss": 1.0438, + "step": 574500 + }, + { + "epoch": 2.12, + "learning_rate": 1.460710812374586e-05, + "loss": 1.0379, + "step": 575000 + }, + { + "epoch": 2.13, + "learning_rate": 1.4576331696027379e-05, + "loss": 1.0424, + "step": 575500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4545555268308898e-05, + "loss": 1.0308, + "step": 576000 + }, + { + "epoch": 2.13, + "learning_rate": 1.4514778840590415e-05, + "loss": 1.0334, + "step": 576500 + }, + { + "epoch": 2.13, + "learning_rate": 1.4484002412871934e-05, + "loss": 1.0349, + "step": 577000 + }, + { + "epoch": 2.13, + "learning_rate": 1.445322598515345e-05, + "loss": 1.0468, + "step": 577500 + }, + { + "epoch": 2.13, + "learning_rate": 1.442244955743497e-05, + "loss": 1.0394, + "step": 578000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4391673129716488e-05, + "loss": 1.0469, + "step": 578500 + }, + { + "epoch": 2.14, + "learning_rate": 1.4360896701998005e-05, + "loss": 1.0423, + "step": 579000 + }, + { + "epoch": 2.14, + "learning_rate": 1.4330120274279524e-05, + "loss": 1.0297, + "step": 579500 + }, + { + "epoch": 2.14, + "learning_rate": 1.4299343846561041e-05, + "loss": 1.0411, + "step": 580000 + }, + { + "epoch": 2.14, + "learning_rate": 1.426856741884256e-05, + "loss": 1.0392, + "step": 580500 + }, + { + "epoch": 2.15, + "learning_rate": 1.4237790991124079e-05, + "loss": 1.0259, + "step": 581000 + }, + { + "epoch": 2.15, + "learning_rate": 1.4207014563405596e-05, + "loss": 1.035, + "step": 581500 + }, + { + "epoch": 2.15, + "learning_rate": 1.4176238135687115e-05, + "loss": 1.0395, + "step": 582000 + }, + { + "epoch": 2.15, + "learning_rate": 1.4145461707968632e-05, + "loss": 1.0358, + "step": 582500 + }, + { + "epoch": 2.15, + "learning_rate": 1.411468528025015e-05, + "loss": 1.0413, + "step": 583000 + }, + { + "epoch": 2.15, + "learning_rate": 1.408390885253167e-05, + "loss": 1.0329, + "step": 583500 + }, + { + "epoch": 2.16, + "learning_rate": 1.4053132424813186e-05, + "loss": 1.039, + "step": 584000 + }, + { + "epoch": 2.16, + "learning_rate": 1.4022355997094705e-05, + "loss": 1.0357, + "step": 584500 + }, + { + "epoch": 2.16, + "learning_rate": 1.3991579569376222e-05, + "loss": 1.0297, + "step": 585000 + }, + { + "epoch": 2.16, + "learning_rate": 1.3960803141657741e-05, + "loss": 1.0311, + "step": 585500 + }, + { + "epoch": 2.16, + "learning_rate": 1.393002671393926e-05, + "loss": 1.0412, + "step": 586000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3899250286220777e-05, + "loss": 1.0364, + "step": 586500 + }, + { + "epoch": 2.17, + "learning_rate": 1.3868473858502296e-05, + "loss": 1.0375, + "step": 587000 + }, + { + "epoch": 2.17, + "learning_rate": 1.3837697430783813e-05, + "loss": 1.0295, + "step": 587500 + }, + { + "epoch": 2.17, + "learning_rate": 1.3806921003065332e-05, + "loss": 1.0396, + "step": 588000 + }, + { + "epoch": 2.17, + "learning_rate": 1.377614457534685e-05, + "loss": 1.0308, + "step": 588500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3745368147628367e-05, + "loss": 1.0396, + "step": 589000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3714591719909886e-05, + "loss": 1.0346, + "step": 589500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3683815292191405e-05, + "loss": 1.0344, + "step": 590000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3653038864472922e-05, + "loss": 1.0344, + "step": 590500 + }, + { + "epoch": 2.18, + "learning_rate": 1.3622262436754441e-05, + "loss": 1.0258, + "step": 591000 + }, + { + "epoch": 2.18, + "learning_rate": 1.3591486009035958e-05, + "loss": 1.0383, + "step": 591500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3560709581317477e-05, + "loss": 1.0299, + "step": 592000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3529933153598996e-05, + "loss": 1.0326, + "step": 592500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3499156725880513e-05, + "loss": 1.0393, + "step": 593000 + }, + { + "epoch": 2.19, + "learning_rate": 1.3468380298162031e-05, + "loss": 1.0327, + "step": 593500 + }, + { + "epoch": 2.19, + "learning_rate": 1.3437603870443549e-05, + "loss": 1.0373, + "step": 594000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3406827442725067e-05, + "loss": 1.038, + "step": 594500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3376051015006586e-05, + "loss": 1.0346, + "step": 595000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3345274587288107e-05, + "loss": 1.025, + "step": 595500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3314498159569624e-05, + "loss": 1.0369, + "step": 596000 + }, + { + "epoch": 2.2, + "learning_rate": 1.3283721731851142e-05, + "loss": 1.027, + "step": 596500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3252945304132661e-05, + "loss": 1.037, + "step": 597000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3222168876414178e-05, + "loss": 1.0388, + "step": 597500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3191392448695697e-05, + "loss": 1.0262, + "step": 598000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3160616020977214e-05, + "loss": 1.0306, + "step": 598500 + }, + { + "epoch": 2.21, + "learning_rate": 1.3129839593258733e-05, + "loss": 1.0307, + "step": 599000 + }, + { + "epoch": 2.21, + "learning_rate": 1.3099063165540252e-05, + "loss": 1.0347, + "step": 599500 + }, + { + "epoch": 2.22, + "learning_rate": 1.3068286737821769e-05, + "loss": 1.0303, + "step": 600000 + }, + { + "epoch": 2.22, + "learning_rate": 1.3037510310103288e-05, + "loss": 1.0367, + "step": 600500 + }, + { + "epoch": 2.22, + "learning_rate": 1.3006733882384806e-05, + "loss": 1.0267, + "step": 601000 + }, + { + "epoch": 2.22, + "learning_rate": 1.2975957454666324e-05, + "loss": 1.0302, + "step": 601500 + }, + { + "epoch": 2.22, + "learning_rate": 1.2945181026947842e-05, + "loss": 1.033, + "step": 602000 + }, + { + "epoch": 2.23, + "learning_rate": 1.291440459922936e-05, + "loss": 1.0207, + "step": 602500 + }, + { + "epoch": 2.23, + "learning_rate": 1.2883628171510878e-05, + "loss": 1.0293, + "step": 603000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2852851743792397e-05, + "loss": 1.0309, + "step": 603500 + }, + { + "epoch": 2.23, + "learning_rate": 1.2822075316073914e-05, + "loss": 1.04, + "step": 604000 + }, + { + "epoch": 2.23, + "learning_rate": 1.2791298888355433e-05, + "loss": 1.029, + "step": 604500 + }, + { + "epoch": 2.23, + "learning_rate": 1.276052246063695e-05, + "loss": 1.0284, + "step": 605000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2729746032918469e-05, + "loss": 1.0186, + "step": 605500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2698969605199987e-05, + "loss": 1.0257, + "step": 606000 + }, + { + "epoch": 2.24, + "learning_rate": 1.2668193177481505e-05, + "loss": 1.0252, + "step": 606500 + }, + { + "epoch": 2.24, + "learning_rate": 1.2637416749763023e-05, + "loss": 1.0336, + "step": 607000 + }, + { + "epoch": 2.24, + "learning_rate": 1.260664032204454e-05, + "loss": 1.0255, + "step": 607500 + }, + { + "epoch": 2.25, + "learning_rate": 1.257586389432606e-05, + "loss": 1.0264, + "step": 608000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2545087466607578e-05, + "loss": 1.0306, + "step": 608500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2514311038889095e-05, + "loss": 1.0351, + "step": 609000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2483534611170612e-05, + "loss": 1.0356, + "step": 609500 + }, + { + "epoch": 2.25, + "learning_rate": 1.2452758183452131e-05, + "loss": 1.024, + "step": 610000 + }, + { + "epoch": 2.25, + "learning_rate": 1.2421981755733648e-05, + "loss": 1.028, + "step": 610500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2391205328015167e-05, + "loss": 1.0263, + "step": 611000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2360428900296684e-05, + "loss": 1.0299, + "step": 611500 + }, + { + "epoch": 2.26, + "learning_rate": 1.2329652472578203e-05, + "loss": 1.0258, + "step": 612000 + }, + { + "epoch": 2.26, + "learning_rate": 1.2298876044859722e-05, + "loss": 1.0265, + "step": 612500 + }, + { + "epoch": 2.26, + "learning_rate": 1.226809961714124e-05, + "loss": 1.0224, + "step": 613000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2237323189422759e-05, + "loss": 1.0276, + "step": 613500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2206546761704276e-05, + "loss": 1.0318, + "step": 614000 + }, + { + "epoch": 2.27, + "learning_rate": 1.2175770333985795e-05, + "loss": 1.0265, + "step": 614500 + }, + { + "epoch": 2.27, + "learning_rate": 1.2144993906267314e-05, + "loss": 1.0305, + "step": 615000 + }, + { + "epoch": 2.27, + "learning_rate": 1.211421747854883e-05, + "loss": 1.0289, + "step": 615500 + }, + { + "epoch": 2.27, + "learning_rate": 1.208344105083035e-05, + "loss": 1.0209, + "step": 616000 + }, + { + "epoch": 2.28, + "learning_rate": 1.2052664623111867e-05, + "loss": 1.0253, + "step": 616500 + }, + { + "epoch": 2.28, + "learning_rate": 1.2021888195393385e-05, + "loss": 1.0313, + "step": 617000 + }, + { + "epoch": 2.28, + "learning_rate": 1.1991111767674904e-05, + "loss": 1.025, + "step": 617500 + }, + { + "epoch": 2.28, + "learning_rate": 1.1960335339956421e-05, + "loss": 1.0258, + "step": 618000 + }, + { + "epoch": 2.28, + "learning_rate": 1.192955891223794e-05, + "loss": 1.016, + "step": 618500 + }, + { + "epoch": 2.29, + "learning_rate": 1.1898782484519457e-05, + "loss": 1.0291, + "step": 619000 + }, + { + "epoch": 2.29, + "learning_rate": 1.1868006056800976e-05, + "loss": 1.0263, + "step": 619500 + }, + { + "epoch": 2.29, + "learning_rate": 1.1837229629082495e-05, + "loss": 1.0274, + "step": 620000 + }, + { + "epoch": 2.29, + "learning_rate": 1.1806453201364012e-05, + "loss": 1.0261, + "step": 620500 + }, + { + "epoch": 2.29, + "learning_rate": 1.177567677364553e-05, + "loss": 1.0293, + "step": 621000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1744900345927048e-05, + "loss": 1.0311, + "step": 621500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1714123918208567e-05, + "loss": 1.0236, + "step": 622000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1683347490490085e-05, + "loss": 1.0212, + "step": 622500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1652571062771602e-05, + "loss": 1.0271, + "step": 623000 + }, + { + "epoch": 2.3, + "learning_rate": 1.1621794635053121e-05, + "loss": 1.018, + "step": 623500 + }, + { + "epoch": 2.3, + "learning_rate": 1.1591018207334638e-05, + "loss": 1.032, + "step": 624000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1560241779616157e-05, + "loss": 1.0238, + "step": 624500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1529465351897676e-05, + "loss": 1.0294, + "step": 625000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1498688924179193e-05, + "loss": 1.0274, + "step": 625500 + }, + { + "epoch": 2.31, + "learning_rate": 1.1467912496460712e-05, + "loss": 1.0283, + "step": 626000 + }, + { + "epoch": 2.31, + "learning_rate": 1.1437136068742229e-05, + "loss": 1.0293, + "step": 626500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1406359641023748e-05, + "loss": 1.023, + "step": 627000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1375583213305266e-05, + "loss": 1.0227, + "step": 627500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1344806785586783e-05, + "loss": 1.0293, + "step": 628000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1314030357868302e-05, + "loss": 1.022, + "step": 628500 + }, + { + "epoch": 2.32, + "learning_rate": 1.1283253930149821e-05, + "loss": 1.0209, + "step": 629000 + }, + { + "epoch": 2.32, + "learning_rate": 1.1252477502431338e-05, + "loss": 1.0243, + "step": 629500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1221701074712857e-05, + "loss": 1.0291, + "step": 630000 + }, + { + "epoch": 2.33, + "learning_rate": 1.1190924646994374e-05, + "loss": 1.0269, + "step": 630500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1160148219275893e-05, + "loss": 1.0256, + "step": 631000 + }, + { + "epoch": 2.33, + "learning_rate": 1.1129371791557412e-05, + "loss": 1.0227, + "step": 631500 + }, + { + "epoch": 2.33, + "learning_rate": 1.1098595363838929e-05, + "loss": 1.0253, + "step": 632000 + }, + { + "epoch": 2.34, + "learning_rate": 1.1067818936120447e-05, + "loss": 1.0284, + "step": 632500 + }, + { + "epoch": 2.34, + "learning_rate": 1.1037042508401965e-05, + "loss": 1.0226, + "step": 633000 + }, + { + "epoch": 2.34, + "learning_rate": 1.1006266080683483e-05, + "loss": 1.0222, + "step": 633500 + }, + { + "epoch": 2.34, + "learning_rate": 1.0975489652965002e-05, + "loss": 1.0258, + "step": 634000 + }, + { + "epoch": 2.34, + "learning_rate": 1.094471322524652e-05, + "loss": 1.0206, + "step": 634500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0913936797528038e-05, + "loss": 1.0256, + "step": 635000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0883160369809555e-05, + "loss": 1.0255, + "step": 635500 + }, + { + "epoch": 2.35, + "learning_rate": 1.0852383942091074e-05, + "loss": 1.0245, + "step": 636000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0821607514372593e-05, + "loss": 1.0236, + "step": 636500 + }, + { + "epoch": 2.35, + "learning_rate": 1.079083108665411e-05, + "loss": 1.0197, + "step": 637000 + }, + { + "epoch": 2.35, + "learning_rate": 1.0760054658935629e-05, + "loss": 1.0218, + "step": 637500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0729278231217146e-05, + "loss": 1.0248, + "step": 638000 + }, + { + "epoch": 2.36, + "learning_rate": 1.0698501803498664e-05, + "loss": 1.0275, + "step": 638500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0667725375780183e-05, + "loss": 1.0304, + "step": 639000 + }, + { + "epoch": 2.36, + "learning_rate": 1.06369489480617e-05, + "loss": 1.0205, + "step": 639500 + }, + { + "epoch": 2.36, + "learning_rate": 1.0606172520343219e-05, + "loss": 1.038, + "step": 640000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0575396092624736e-05, + "loss": 1.0189, + "step": 640500 + }, + { + "epoch": 2.37, + "learning_rate": 1.0544619664906255e-05, + "loss": 1.0216, + "step": 641000 + }, + { + "epoch": 2.37, + "learning_rate": 1.0513843237187774e-05, + "loss": 1.0206, + "step": 641500 + }, + { + "epoch": 2.37, + "learning_rate": 1.048306680946929e-05, + "loss": 1.0216, + "step": 642000 + }, + { + "epoch": 2.37, + "learning_rate": 1.045229038175081e-05, + "loss": 1.0243, + "step": 642500 + }, + { + "epoch": 2.37, + "learning_rate": 1.0421513954032328e-05, + "loss": 1.015, + "step": 643000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0390737526313845e-05, + "loss": 1.0225, + "step": 643500 + }, + { + "epoch": 2.38, + "learning_rate": 1.0359961098595364e-05, + "loss": 1.0217, + "step": 644000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0329184670876881e-05, + "loss": 1.0163, + "step": 644500 + }, + { + "epoch": 2.38, + "learning_rate": 1.02984082431584e-05, + "loss": 1.0086, + "step": 645000 + }, + { + "epoch": 2.38, + "learning_rate": 1.0267631815439919e-05, + "loss": 1.0167, + "step": 645500 + }, + { + "epoch": 2.39, + "learning_rate": 1.0236855387721436e-05, + "loss": 1.0152, + "step": 646000 + }, + { + "epoch": 2.39, + "learning_rate": 1.0206078960002955e-05, + "loss": 1.0267, + "step": 646500 + }, + { + "epoch": 2.39, + "learning_rate": 1.0175302532284472e-05, + "loss": 1.0146, + "step": 647000 + }, + { + "epoch": 2.39, + "learning_rate": 1.014452610456599e-05, + "loss": 1.0155, + "step": 647500 + }, + { + "epoch": 2.39, + "learning_rate": 1.011374967684751e-05, + "loss": 1.0239, + "step": 648000 + }, + { + "epoch": 2.4, + "learning_rate": 1.0082973249129028e-05, + "loss": 1.0219, + "step": 648500 + }, + { + "epoch": 2.4, + "learning_rate": 1.0052196821410547e-05, + "loss": 1.0199, + "step": 649000 + }, + { + "epoch": 2.4, + "learning_rate": 1.0021420393692064e-05, + "loss": 1.0163, + "step": 649500 + }, + { + "epoch": 2.4, + "learning_rate": 9.990643965973583e-06, + "loss": 1.0213, + "step": 650000 + }, + { + "epoch": 2.4, + "learning_rate": 9.9598675382551e-06, + "loss": 1.0201, + "step": 650500 + }, + { + "epoch": 2.4, + "learning_rate": 9.929091110536619e-06, + "loss": 1.0217, + "step": 651000 + }, + { + "epoch": 2.41, + "learning_rate": 9.898314682818138e-06, + "loss": 1.0217, + "step": 651500 + }, + { + "epoch": 2.41, + "learning_rate": 9.867538255099655e-06, + "loss": 1.0152, + "step": 652000 + }, + { + "epoch": 2.41, + "learning_rate": 9.836761827381173e-06, + "loss": 1.0236, + "step": 652500 + }, + { + "epoch": 2.41, + "learning_rate": 9.80598539966269e-06, + "loss": 1.0183, + "step": 653000 + }, + { + "epoch": 2.41, + "learning_rate": 9.77520897194421e-06, + "loss": 1.0152, + "step": 653500 + }, + { + "epoch": 2.42, + "learning_rate": 9.744432544225728e-06, + "loss": 1.0205, + "step": 654000 + }, + { + "epoch": 2.42, + "learning_rate": 9.713656116507245e-06, + "loss": 1.0268, + "step": 654500 + }, + { + "epoch": 2.42, + "learning_rate": 9.682879688788764e-06, + "loss": 1.03, + "step": 655000 + }, + { + "epoch": 2.42, + "learning_rate": 9.652103261070283e-06, + "loss": 1.018, + "step": 655500 + }, + { + "epoch": 2.42, + "learning_rate": 9.6213268333518e-06, + "loss": 1.02, + "step": 656000 + }, + { + "epoch": 2.42, + "learning_rate": 9.590550405633319e-06, + "loss": 1.0198, + "step": 656500 + }, + { + "epoch": 2.43, + "learning_rate": 9.559773977914836e-06, + "loss": 1.0178, + "step": 657000 + }, + { + "epoch": 2.43, + "learning_rate": 9.528997550196354e-06, + "loss": 1.016, + "step": 657500 + }, + { + "epoch": 2.43, + "learning_rate": 9.498221122477873e-06, + "loss": 1.0213, + "step": 658000 + }, + { + "epoch": 2.43, + "learning_rate": 9.46744469475939e-06, + "loss": 1.0184, + "step": 658500 + }, + { + "epoch": 2.43, + "learning_rate": 9.436668267040909e-06, + "loss": 1.0244, + "step": 659000 + }, + { + "epoch": 2.44, + "learning_rate": 9.405891839322426e-06, + "loss": 1.0191, + "step": 659500 + }, + { + "epoch": 2.44, + "learning_rate": 9.375115411603945e-06, + "loss": 1.0232, + "step": 660000 + }, + { + "epoch": 2.44, + "learning_rate": 9.344338983885464e-06, + "loss": 1.0143, + "step": 660500 + }, + { + "epoch": 2.44, + "learning_rate": 9.313562556166981e-06, + "loss": 1.0198, + "step": 661000 + }, + { + "epoch": 2.44, + "learning_rate": 9.2827861284485e-06, + "loss": 1.0201, + "step": 661500 + }, + { + "epoch": 2.44, + "learning_rate": 9.252009700730017e-06, + "loss": 1.0174, + "step": 662000 + }, + { + "epoch": 2.45, + "learning_rate": 9.221233273011536e-06, + "loss": 1.0231, + "step": 662500 + }, + { + "epoch": 2.45, + "learning_rate": 9.190456845293054e-06, + "loss": 1.0119, + "step": 663000 + }, + { + "epoch": 2.45, + "learning_rate": 9.159680417574571e-06, + "loss": 1.0127, + "step": 663500 + }, + { + "epoch": 2.45, + "learning_rate": 9.12890398985609e-06, + "loss": 1.0168, + "step": 664000 + }, + { + "epoch": 2.45, + "learning_rate": 9.098127562137607e-06, + "loss": 1.0155, + "step": 664500 + }, + { + "epoch": 2.46, + "learning_rate": 9.067351134419126e-06, + "loss": 1.0175, + "step": 665000 + }, + { + "epoch": 2.46, + "learning_rate": 9.036574706700645e-06, + "loss": 1.0174, + "step": 665500 + }, + { + "epoch": 2.46, + "learning_rate": 9.005798278982162e-06, + "loss": 1.0173, + "step": 666000 + }, + { + "epoch": 2.46, + "learning_rate": 8.97502185126368e-06, + "loss": 1.0178, + "step": 666500 + }, + { + "epoch": 2.46, + "learning_rate": 8.944245423545198e-06, + "loss": 1.0124, + "step": 667000 + }, + { + "epoch": 2.47, + "learning_rate": 8.913468995826717e-06, + "loss": 1.0189, + "step": 667500 + }, + { + "epoch": 2.47, + "learning_rate": 8.882692568108235e-06, + "loss": 1.0231, + "step": 668000 + }, + { + "epoch": 2.47, + "learning_rate": 8.851916140389752e-06, + "loss": 1.0143, + "step": 668500 + }, + { + "epoch": 2.47, + "learning_rate": 8.821139712671271e-06, + "loss": 1.0153, + "step": 669000 + }, + { + "epoch": 2.47, + "learning_rate": 8.79036328495279e-06, + "loss": 1.0174, + "step": 669500 + }, + { + "epoch": 2.47, + "learning_rate": 8.759586857234307e-06, + "loss": 1.0158, + "step": 670000 + }, + { + "epoch": 2.48, + "learning_rate": 8.728810429515826e-06, + "loss": 1.0116, + "step": 670500 + }, + { + "epoch": 2.48, + "learning_rate": 8.698034001797343e-06, + "loss": 1.02, + "step": 671000 + }, + { + "epoch": 2.48, + "learning_rate": 8.667257574078862e-06, + "loss": 1.014, + "step": 671500 + }, + { + "epoch": 2.48, + "learning_rate": 8.63648114636038e-06, + "loss": 1.016, + "step": 672000 + }, + { + "epoch": 2.48, + "learning_rate": 8.605704718641898e-06, + "loss": 1.0173, + "step": 672500 + }, + { + "epoch": 2.49, + "learning_rate": 8.574928290923416e-06, + "loss": 1.0045, + "step": 673000 + }, + { + "epoch": 2.49, + "learning_rate": 8.544151863204934e-06, + "loss": 1.0185, + "step": 673500 + }, + { + "epoch": 2.49, + "learning_rate": 8.513375435486452e-06, + "loss": 1.0156, + "step": 674000 + }, + { + "epoch": 2.49, + "learning_rate": 8.482599007767971e-06, + "loss": 1.0106, + "step": 674500 + }, + { + "epoch": 2.49, + "learning_rate": 8.451822580049488e-06, + "loss": 1.0153, + "step": 675000 + }, + { + "epoch": 2.49, + "learning_rate": 8.421046152331007e-06, + "loss": 1.01, + "step": 675500 + }, + { + "epoch": 2.5, + "learning_rate": 8.390269724612524e-06, + "loss": 1.0161, + "step": 676000 + }, + { + "epoch": 2.5, + "learning_rate": 8.359493296894043e-06, + "loss": 1.0146, + "step": 676500 + }, + { + "epoch": 2.5, + "learning_rate": 8.328716869175562e-06, + "loss": 1.0152, + "step": 677000 + }, + { + "epoch": 2.5, + "learning_rate": 8.297940441457079e-06, + "loss": 1.0089, + "step": 677500 + }, + { + "epoch": 2.5, + "learning_rate": 8.267164013738598e-06, + "loss": 1.0139, + "step": 678000 + }, + { + "epoch": 2.51, + "learning_rate": 8.236387586020115e-06, + "loss": 1.0202, + "step": 678500 + }, + { + "epoch": 2.51, + "learning_rate": 8.205611158301633e-06, + "loss": 1.0082, + "step": 679000 + }, + { + "epoch": 2.51, + "learning_rate": 8.174834730583152e-06, + "loss": 1.0067, + "step": 679500 + }, + { + "epoch": 2.51, + "learning_rate": 8.14405830286467e-06, + "loss": 1.0156, + "step": 680000 + }, + { + "epoch": 2.51, + "learning_rate": 8.113281875146188e-06, + "loss": 1.0075, + "step": 680500 + }, + { + "epoch": 2.52, + "learning_rate": 8.082505447427705e-06, + "loss": 1.0108, + "step": 681000 + }, + { + "epoch": 2.52, + "learning_rate": 8.051729019709224e-06, + "loss": 1.0115, + "step": 681500 + }, + { + "epoch": 2.52, + "learning_rate": 8.020952591990743e-06, + "loss": 1.014, + "step": 682000 + }, + { + "epoch": 2.52, + "learning_rate": 7.99017616427226e-06, + "loss": 1.0147, + "step": 682500 + }, + { + "epoch": 2.52, + "learning_rate": 7.95939973655378e-06, + "loss": 1.006, + "step": 683000 + }, + { + "epoch": 2.52, + "learning_rate": 7.928623308835297e-06, + "loss": 1.016, + "step": 683500 + }, + { + "epoch": 2.53, + "learning_rate": 7.897846881116816e-06, + "loss": 1.025, + "step": 684000 + }, + { + "epoch": 2.53, + "learning_rate": 7.867070453398335e-06, + "loss": 1.0174, + "step": 684500 + }, + { + "epoch": 2.53, + "learning_rate": 7.836294025679852e-06, + "loss": 1.0158, + "step": 685000 + }, + { + "epoch": 2.53, + "learning_rate": 7.80551759796137e-06, + "loss": 1.0128, + "step": 685500 + }, + { + "epoch": 2.53, + "learning_rate": 7.774741170242888e-06, + "loss": 1.0142, + "step": 686000 + }, + { + "epoch": 2.54, + "learning_rate": 7.743964742524407e-06, + "loss": 1.0199, + "step": 686500 + }, + { + "epoch": 2.54, + "learning_rate": 7.713188314805925e-06, + "loss": 1.0173, + "step": 687000 + }, + { + "epoch": 2.54, + "learning_rate": 7.682411887087443e-06, + "loss": 1.0143, + "step": 687500 + }, + { + "epoch": 2.54, + "learning_rate": 7.651635459368961e-06, + "loss": 1.0135, + "step": 688000 + }, + { + "epoch": 2.54, + "learning_rate": 7.620859031650479e-06, + "loss": 1.0042, + "step": 688500 + }, + { + "epoch": 2.54, + "learning_rate": 7.590082603931997e-06, + "loss": 1.0161, + "step": 689000 + }, + { + "epoch": 2.55, + "learning_rate": 7.559306176213515e-06, + "loss": 1.01, + "step": 689500 + }, + { + "epoch": 2.55, + "learning_rate": 7.528529748495033e-06, + "loss": 1.0041, + "step": 690000 + }, + { + "epoch": 2.55, + "learning_rate": 7.497753320776552e-06, + "loss": 1.0185, + "step": 690500 + }, + { + "epoch": 2.55, + "learning_rate": 7.46697689305807e-06, + "loss": 1.0042, + "step": 691000 + }, + { + "epoch": 2.55, + "learning_rate": 7.436200465339588e-06, + "loss": 0.9985, + "step": 691500 + }, + { + "epoch": 2.56, + "learning_rate": 7.405424037621106e-06, + "loss": 1.0094, + "step": 692000 + }, + { + "epoch": 2.56, + "learning_rate": 7.3746476099026245e-06, + "loss": 1.0084, + "step": 692500 + }, + { + "epoch": 2.56, + "learning_rate": 7.343871182184142e-06, + "loss": 1.0106, + "step": 693000 + }, + { + "epoch": 2.56, + "learning_rate": 7.31309475446566e-06, + "loss": 1.0174, + "step": 693500 + }, + { + "epoch": 2.56, + "learning_rate": 7.282318326747178e-06, + "loss": 1.0137, + "step": 694000 + }, + { + "epoch": 2.56, + "learning_rate": 7.251541899028696e-06, + "loss": 1.0066, + "step": 694500 + }, + { + "epoch": 2.57, + "learning_rate": 7.220765471310215e-06, + "loss": 1.0081, + "step": 695000 + }, + { + "epoch": 2.57, + "learning_rate": 7.189989043591733e-06, + "loss": 1.0042, + "step": 695500 + }, + { + "epoch": 2.57, + "learning_rate": 7.159212615873251e-06, + "loss": 1.0139, + "step": 696000 + }, + { + "epoch": 2.57, + "learning_rate": 7.128436188154769e-06, + "loss": 1.0137, + "step": 696500 + }, + { + "epoch": 2.57, + "learning_rate": 7.097659760436287e-06, + "loss": 1.0135, + "step": 697000 + }, + { + "epoch": 2.58, + "learning_rate": 7.0668833327178055e-06, + "loss": 1.01, + "step": 697500 + }, + { + "epoch": 2.58, + "learning_rate": 7.0361069049993235e-06, + "loss": 1.0041, + "step": 698000 + }, + { + "epoch": 2.58, + "learning_rate": 7.005330477280841e-06, + "loss": 1.0142, + "step": 698500 + }, + { + "epoch": 2.58, + "learning_rate": 6.974554049562359e-06, + "loss": 1.0058, + "step": 699000 + }, + { + "epoch": 2.58, + "learning_rate": 6.943777621843878e-06, + "loss": 1.0086, + "step": 699500 + }, + { + "epoch": 2.59, + "learning_rate": 6.913001194125396e-06, + "loss": 1.0055, + "step": 700000 + }, + { + "epoch": 2.59, + "learning_rate": 6.882224766406914e-06, + "loss": 1.0148, + "step": 700500 + }, + { + "epoch": 2.59, + "learning_rate": 6.851448338688432e-06, + "loss": 1.004, + "step": 701000 + }, + { + "epoch": 2.59, + "learning_rate": 6.82067191096995e-06, + "loss": 1.0096, + "step": 701500 + }, + { + "epoch": 2.59, + "learning_rate": 6.789895483251469e-06, + "loss": 1.0043, + "step": 702000 + }, + { + "epoch": 2.59, + "learning_rate": 6.759119055532987e-06, + "loss": 1.0115, + "step": 702500 + }, + { + "epoch": 2.6, + "learning_rate": 6.7283426278145045e-06, + "loss": 1.0067, + "step": 703000 + }, + { + "epoch": 2.6, + "learning_rate": 6.6975662000960225e-06, + "loss": 1.0118, + "step": 703500 + }, + { + "epoch": 2.6, + "learning_rate": 6.66678977237754e-06, + "loss": 1.0056, + "step": 704000 + }, + { + "epoch": 2.6, + "learning_rate": 6.636013344659059e-06, + "loss": 1.0146, + "step": 704500 + }, + { + "epoch": 2.6, + "learning_rate": 6.605236916940577e-06, + "loss": 1.0111, + "step": 705000 + }, + { + "epoch": 2.61, + "learning_rate": 6.574460489222095e-06, + "loss": 1.0074, + "step": 705500 + }, + { + "epoch": 2.61, + "learning_rate": 6.543684061503613e-06, + "loss": 1.0116, + "step": 706000 + }, + { + "epoch": 2.61, + "learning_rate": 6.512907633785132e-06, + "loss": 1.0076, + "step": 706500 + }, + { + "epoch": 2.61, + "learning_rate": 6.48213120606665e-06, + "loss": 1.008, + "step": 707000 + }, + { + "epoch": 2.61, + "learning_rate": 6.451354778348168e-06, + "loss": 1.0123, + "step": 707500 + }, + { + "epoch": 2.61, + "learning_rate": 6.420578350629686e-06, + "loss": 1.0056, + "step": 708000 + }, + { + "epoch": 2.62, + "learning_rate": 6.3898019229112035e-06, + "loss": 1.0049, + "step": 708500 + }, + { + "epoch": 2.62, + "learning_rate": 6.359025495192722e-06, + "loss": 1.0028, + "step": 709000 + }, + { + "epoch": 2.62, + "learning_rate": 6.32824906747424e-06, + "loss": 1.0069, + "step": 709500 + }, + { + "epoch": 2.62, + "learning_rate": 6.297472639755758e-06, + "loss": 1.0091, + "step": 710000 + }, + { + "epoch": 2.62, + "learning_rate": 6.266696212037276e-06, + "loss": 1.0159, + "step": 710500 + }, + { + "epoch": 2.63, + "learning_rate": 6.235919784318795e-06, + "loss": 1.0079, + "step": 711000 + }, + { + "epoch": 2.63, + "learning_rate": 6.205143356600313e-06, + "loss": 1.0096, + "step": 711500 + }, + { + "epoch": 2.63, + "learning_rate": 6.174366928881832e-06, + "loss": 1.0024, + "step": 712000 + }, + { + "epoch": 2.63, + "learning_rate": 6.1435905011633496e-06, + "loss": 0.9978, + "step": 712500 + }, + { + "epoch": 2.63, + "learning_rate": 6.1128140734448675e-06, + "loss": 1.0059, + "step": 713000 + }, + { + "epoch": 2.64, + "learning_rate": 6.0820376457263854e-06, + "loss": 1.0076, + "step": 713500 + }, + { + "epoch": 2.64, + "learning_rate": 6.051261218007904e-06, + "loss": 1.0115, + "step": 714000 + }, + { + "epoch": 2.64, + "learning_rate": 6.020484790289422e-06, + "loss": 1.0101, + "step": 714500 + }, + { + "epoch": 2.64, + "learning_rate": 5.98970836257094e-06, + "loss": 1.011, + "step": 715000 + }, + { + "epoch": 2.64, + "learning_rate": 5.958931934852458e-06, + "loss": 1.0119, + "step": 715500 + }, + { + "epoch": 2.64, + "learning_rate": 5.928155507133976e-06, + "loss": 1.0122, + "step": 716000 + }, + { + "epoch": 2.65, + "learning_rate": 5.897379079415495e-06, + "loss": 1.0039, + "step": 716500 + }, + { + "epoch": 2.65, + "learning_rate": 5.866602651697013e-06, + "loss": 1.0031, + "step": 717000 + }, + { + "epoch": 2.65, + "learning_rate": 5.835826223978531e-06, + "loss": 1.0059, + "step": 717500 + }, + { + "epoch": 2.65, + "learning_rate": 5.8050497962600486e-06, + "loss": 1.0082, + "step": 718000 + }, + { + "epoch": 2.65, + "learning_rate": 5.7742733685415665e-06, + "loss": 1.0027, + "step": 718500 + }, + { + "epoch": 2.66, + "learning_rate": 5.743496940823085e-06, + "loss": 0.9952, + "step": 719000 + }, + { + "epoch": 2.66, + "learning_rate": 5.712720513104603e-06, + "loss": 1.0025, + "step": 719500 + }, + { + "epoch": 2.66, + "learning_rate": 5.681944085386121e-06, + "loss": 1.0005, + "step": 720000 + }, + { + "epoch": 2.66, + "learning_rate": 5.651167657667639e-06, + "loss": 1.0066, + "step": 720500 + }, + { + "epoch": 2.66, + "learning_rate": 5.620391229949158e-06, + "loss": 1.0004, + "step": 721000 + }, + { + "epoch": 2.66, + "learning_rate": 5.589614802230676e-06, + "loss": 1.0066, + "step": 721500 + }, + { + "epoch": 2.67, + "learning_rate": 5.558838374512194e-06, + "loss": 1.0082, + "step": 722000 + }, + { + "epoch": 2.67, + "learning_rate": 5.528061946793712e-06, + "loss": 1.0082, + "step": 722500 + }, + { + "epoch": 2.67, + "learning_rate": 5.49728551907523e-06, + "loss": 1.0108, + "step": 723000 + }, + { + "epoch": 2.67, + "learning_rate": 5.466509091356748e-06, + "loss": 1.0073, + "step": 723500 + }, + { + "epoch": 2.67, + "learning_rate": 5.435732663638266e-06, + "loss": 1.0071, + "step": 724000 + }, + { + "epoch": 2.68, + "learning_rate": 5.404956235919784e-06, + "loss": 1.0023, + "step": 724500 + }, + { + "epoch": 2.68, + "learning_rate": 5.374179808201302e-06, + "loss": 0.9992, + "step": 725000 + }, + { + "epoch": 2.68, + "learning_rate": 5.34340338048282e-06, + "loss": 1.0023, + "step": 725500 + }, + { + "epoch": 2.68, + "learning_rate": 5.312626952764339e-06, + "loss": 0.9983, + "step": 726000 + }, + { + "epoch": 2.68, + "learning_rate": 5.281850525045857e-06, + "loss": 1.0009, + "step": 726500 + }, + { + "epoch": 2.68, + "learning_rate": 5.251074097327376e-06, + "loss": 1.0027, + "step": 727000 + }, + { + "epoch": 2.69, + "learning_rate": 5.220297669608894e-06, + "loss": 1.0083, + "step": 727500 + }, + { + "epoch": 2.69, + "learning_rate": 5.1895212418904115e-06, + "loss": 1.0076, + "step": 728000 + }, + { + "epoch": 2.69, + "learning_rate": 5.15874481417193e-06, + "loss": 0.9977, + "step": 728500 + }, + { + "epoch": 2.69, + "learning_rate": 5.127968386453448e-06, + "loss": 1.0028, + "step": 729000 + }, + { + "epoch": 2.69, + "learning_rate": 5.097191958734966e-06, + "loss": 1.0072, + "step": 729500 + }, + { + "epoch": 2.7, + "learning_rate": 5.066415531016484e-06, + "loss": 1.0031, + "step": 730000 + }, + { + "epoch": 2.7, + "learning_rate": 5.035639103298002e-06, + "loss": 1.0051, + "step": 730500 + }, + { + "epoch": 2.7, + "learning_rate": 5.004862675579521e-06, + "loss": 1.0055, + "step": 731000 + }, + { + "epoch": 2.7, + "learning_rate": 4.974086247861039e-06, + "loss": 1.0034, + "step": 731500 + }, + { + "epoch": 2.7, + "learning_rate": 4.943309820142557e-06, + "loss": 1.0042, + "step": 732000 + }, + { + "epoch": 2.71, + "learning_rate": 4.912533392424075e-06, + "loss": 1.0039, + "step": 732500 + }, + { + "epoch": 2.71, + "learning_rate": 4.8817569647055934e-06, + "loss": 1.0033, + "step": 733000 + }, + { + "epoch": 2.71, + "learning_rate": 4.850980536987111e-06, + "loss": 0.9988, + "step": 733500 + }, + { + "epoch": 2.71, + "learning_rate": 4.820204109268629e-06, + "loss": 1.0052, + "step": 734000 + }, + { + "epoch": 2.71, + "learning_rate": 4.789427681550147e-06, + "loss": 0.9979, + "step": 734500 + }, + { + "epoch": 2.71, + "learning_rate": 4.758651253831665e-06, + "loss": 0.9971, + "step": 735000 + }, + { + "epoch": 2.72, + "learning_rate": 4.727874826113184e-06, + "loss": 1.0054, + "step": 735500 + }, + { + "epoch": 2.72, + "learning_rate": 4.697098398394702e-06, + "loss": 0.9994, + "step": 736000 + }, + { + "epoch": 2.72, + "learning_rate": 4.66632197067622e-06, + "loss": 0.9963, + "step": 736500 + }, + { + "epoch": 2.72, + "learning_rate": 4.635545542957738e-06, + "loss": 0.9992, + "step": 737000 + }, + { + "epoch": 2.72, + "learning_rate": 4.604769115239256e-06, + "loss": 1.0022, + "step": 737500 + }, + { + "epoch": 2.73, + "learning_rate": 4.5739926875207745e-06, + "loss": 1.0108, + "step": 738000 + }, + { + "epoch": 2.73, + "learning_rate": 4.5432162598022924e-06, + "loss": 0.9971, + "step": 738500 + }, + { + "epoch": 2.73, + "learning_rate": 4.51243983208381e-06, + "loss": 0.9978, + "step": 739000 + }, + { + "epoch": 2.73, + "learning_rate": 4.481663404365328e-06, + "loss": 1.0005, + "step": 739500 + }, + { + "epoch": 2.73, + "learning_rate": 4.450886976646847e-06, + "loss": 1.0058, + "step": 740000 + }, + { + "epoch": 2.73, + "learning_rate": 4.420110548928365e-06, + "loss": 0.9967, + "step": 740500 + }, + { + "epoch": 2.74, + "learning_rate": 4.389334121209883e-06, + "loss": 0.9919, + "step": 741000 + }, + { + "epoch": 2.74, + "learning_rate": 4.358557693491401e-06, + "loss": 1.0065, + "step": 741500 + }, + { + "epoch": 2.74, + "learning_rate": 4.327781265772919e-06, + "loss": 1.0024, + "step": 742000 + }, + { + "epoch": 2.74, + "learning_rate": 4.297004838054438e-06, + "loss": 0.9953, + "step": 742500 + }, + { + "epoch": 2.74, + "learning_rate": 4.2662284103359556e-06, + "loss": 0.9981, + "step": 743000 + }, + { + "epoch": 2.75, + "learning_rate": 4.2354519826174735e-06, + "loss": 0.9962, + "step": 743500 + }, + { + "epoch": 2.75, + "learning_rate": 4.2046755548989914e-06, + "loss": 1.0003, + "step": 744000 + }, + { + "epoch": 2.75, + "learning_rate": 4.17389912718051e-06, + "loss": 0.995, + "step": 744500 + }, + { + "epoch": 2.75, + "learning_rate": 4.143122699462028e-06, + "loss": 1.0064, + "step": 745000 + }, + { + "epoch": 2.75, + "learning_rate": 4.112346271743547e-06, + "loss": 1.001, + "step": 745500 + }, + { + "epoch": 2.76, + "learning_rate": 4.081569844025065e-06, + "loss": 0.9993, + "step": 746000 + }, + { + "epoch": 2.76, + "learning_rate": 4.050793416306583e-06, + "loss": 0.9941, + "step": 746500 + }, + { + "epoch": 2.76, + "learning_rate": 4.020016988588101e-06, + "loss": 1.0024, + "step": 747000 + }, + { + "epoch": 2.76, + "learning_rate": 3.9892405608696195e-06, + "loss": 1.0016, + "step": 747500 + }, + { + "epoch": 2.76, + "learning_rate": 3.9584641331511375e-06, + "loss": 1.004, + "step": 748000 + }, + { + "epoch": 2.76, + "learning_rate": 3.927687705432655e-06, + "loss": 1.0006, + "step": 748500 + }, + { + "epoch": 2.77, + "learning_rate": 3.896911277714173e-06, + "loss": 1.0003, + "step": 749000 + }, + { + "epoch": 2.77, + "learning_rate": 3.866134849995691e-06, + "loss": 0.9968, + "step": 749500 + }, + { + "epoch": 2.77, + "learning_rate": 3.83535842227721e-06, + "loss": 0.9979, + "step": 750000 + }, + { + "epoch": 2.77, + "learning_rate": 3.804581994558728e-06, + "loss": 1.0025, + "step": 750500 + }, + { + "epoch": 2.77, + "learning_rate": 3.773805566840246e-06, + "loss": 0.9947, + "step": 751000 + }, + { + "epoch": 2.78, + "learning_rate": 3.7430291391217643e-06, + "loss": 0.9975, + "step": 751500 + }, + { + "epoch": 2.78, + "learning_rate": 3.7122527114032822e-06, + "loss": 1.0006, + "step": 752000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6814762836848e-06, + "loss": 0.9894, + "step": 752500 + }, + { + "epoch": 2.78, + "learning_rate": 3.6506998559663185e-06, + "loss": 1.0005, + "step": 753000 + }, + { + "epoch": 2.78, + "learning_rate": 3.6199234282478365e-06, + "loss": 1.0106, + "step": 753500 + }, + { + "epoch": 2.78, + "learning_rate": 3.589147000529355e-06, + "loss": 1.0023, + "step": 754000 + }, + { + "epoch": 2.79, + "learning_rate": 3.5583705728108728e-06, + "loss": 0.9934, + "step": 754500 + }, + { + "epoch": 2.79, + "learning_rate": 3.527594145092391e-06, + "loss": 0.9958, + "step": 755000 + }, + { + "epoch": 2.79, + "learning_rate": 3.496817717373909e-06, + "loss": 0.9978, + "step": 755500 + }, + { + "epoch": 2.79, + "learning_rate": 3.466041289655427e-06, + "loss": 0.9994, + "step": 756000 + }, + { + "epoch": 2.79, + "learning_rate": 3.4352648619369454e-06, + "loss": 1.0067, + "step": 756500 + }, + { + "epoch": 2.8, + "learning_rate": 3.4044884342184633e-06, + "loss": 0.9952, + "step": 757000 + }, + { + "epoch": 2.8, + "learning_rate": 3.3737120064999817e-06, + "loss": 1.0057, + "step": 757500 + }, + { + "epoch": 2.8, + "learning_rate": 3.3429355787814996e-06, + "loss": 0.9982, + "step": 758000 + }, + { + "epoch": 2.8, + "learning_rate": 3.312159151063018e-06, + "loss": 0.9944, + "step": 758500 + }, + { + "epoch": 2.8, + "learning_rate": 3.281382723344536e-06, + "loss": 0.9935, + "step": 759000 + }, + { + "epoch": 2.8, + "learning_rate": 3.250606295626054e-06, + "loss": 0.9997, + "step": 759500 + }, + { + "epoch": 2.81, + "learning_rate": 3.219829867907572e-06, + "loss": 0.9982, + "step": 760000 + }, + { + "epoch": 2.81, + "learning_rate": 3.18905344018909e-06, + "loss": 0.9938, + "step": 760500 + }, + { + "epoch": 2.81, + "learning_rate": 3.1582770124706085e-06, + "loss": 0.9998, + "step": 761000 + }, + { + "epoch": 2.81, + "learning_rate": 3.1275005847521264e-06, + "loss": 0.9983, + "step": 761500 + }, + { + "epoch": 2.81, + "learning_rate": 3.0967241570336448e-06, + "loss": 1.0039, + "step": 762000 + }, + { + "epoch": 2.82, + "learning_rate": 3.065947729315163e-06, + "loss": 0.9966, + "step": 762500 + }, + { + "epoch": 2.82, + "learning_rate": 3.035171301596681e-06, + "loss": 0.9901, + "step": 763000 + }, + { + "epoch": 2.82, + "learning_rate": 3.0043948738781994e-06, + "loss": 0.9968, + "step": 763500 + }, + { + "epoch": 2.82, + "learning_rate": 2.9736184461597174e-06, + "loss": 0.9932, + "step": 764000 + }, + { + "epoch": 2.82, + "learning_rate": 2.9428420184412353e-06, + "loss": 0.9969, + "step": 764500 + }, + { + "epoch": 2.83, + "learning_rate": 2.9120655907227537e-06, + "loss": 0.9881, + "step": 765000 + }, + { + "epoch": 2.83, + "learning_rate": 2.8812891630042716e-06, + "loss": 0.9991, + "step": 765500 + }, + { + "epoch": 2.83, + "learning_rate": 2.8505127352857904e-06, + "loss": 0.994, + "step": 766000 + }, + { + "epoch": 2.83, + "learning_rate": 2.8197363075673083e-06, + "loss": 0.9993, + "step": 766500 + }, + { + "epoch": 2.83, + "learning_rate": 2.7889598798488263e-06, + "loss": 0.987, + "step": 767000 + }, + { + "epoch": 2.83, + "learning_rate": 2.7581834521303446e-06, + "loss": 0.9931, + "step": 767500 + }, + { + "epoch": 2.84, + "learning_rate": 2.7274070244118626e-06, + "loss": 0.9928, + "step": 768000 + }, + { + "epoch": 2.84, + "learning_rate": 2.696630596693381e-06, + "loss": 0.9971, + "step": 768500 + }, + { + "epoch": 2.84, + "learning_rate": 2.665854168974899e-06, + "loss": 0.9889, + "step": 769000 + }, + { + "epoch": 2.84, + "learning_rate": 2.6350777412564172e-06, + "loss": 0.9953, + "step": 769500 + }, + { + "epoch": 2.84, + "learning_rate": 2.604301313537935e-06, + "loss": 0.9981, + "step": 770000 + }, + { + "epoch": 2.85, + "learning_rate": 2.573524885819453e-06, + "loss": 0.9927, + "step": 770500 + }, + { + "epoch": 2.85, + "learning_rate": 2.5427484581009715e-06, + "loss": 0.9945, + "step": 771000 + }, + { + "epoch": 2.85, + "learning_rate": 2.5119720303824894e-06, + "loss": 1.0024, + "step": 771500 + }, + { + "epoch": 2.85, + "learning_rate": 2.4811956026640078e-06, + "loss": 0.9978, + "step": 772000 + }, + { + "epoch": 2.85, + "learning_rate": 2.4504191749455257e-06, + "loss": 0.991, + "step": 772500 + }, + { + "epoch": 2.85, + "learning_rate": 2.419642747227044e-06, + "loss": 0.9995, + "step": 773000 + }, + { + "epoch": 2.86, + "learning_rate": 2.388866319508562e-06, + "loss": 1.0027, + "step": 773500 + }, + { + "epoch": 2.86, + "learning_rate": 2.35808989179008e-06, + "loss": 0.9949, + "step": 774000 + }, + { + "epoch": 2.86, + "learning_rate": 2.3273134640715983e-06, + "loss": 0.9938, + "step": 774500 + }, + { + "epoch": 2.86, + "learning_rate": 2.2965370363531166e-06, + "loss": 1.0029, + "step": 775000 + }, + { + "epoch": 2.86, + "learning_rate": 2.265760608634635e-06, + "loss": 0.9957, + "step": 775500 + }, + { + "epoch": 2.87, + "learning_rate": 2.234984180916153e-06, + "loss": 0.9978, + "step": 776000 + }, + { + "epoch": 2.87, + "learning_rate": 2.204207753197671e-06, + "loss": 0.9916, + "step": 776500 + }, + { + "epoch": 2.87, + "learning_rate": 2.1734313254791892e-06, + "loss": 0.9881, + "step": 777000 + }, + { + "epoch": 2.87, + "learning_rate": 2.142654897760707e-06, + "loss": 0.9957, + "step": 777500 + }, + { + "epoch": 2.87, + "learning_rate": 2.1118784700422255e-06, + "loss": 0.9952, + "step": 778000 + }, + { + "epoch": 2.88, + "learning_rate": 2.0811020423237435e-06, + "loss": 0.9965, + "step": 778500 + }, + { + "epoch": 2.88, + "learning_rate": 2.050325614605262e-06, + "loss": 0.9938, + "step": 779000 + }, + { + "epoch": 2.88, + "learning_rate": 2.0195491868867798e-06, + "loss": 0.9886, + "step": 779500 + }, + { + "epoch": 2.88, + "learning_rate": 1.9887727591682977e-06, + "loss": 0.9973, + "step": 780000 + }, + { + "epoch": 2.88, + "learning_rate": 1.957996331449816e-06, + "loss": 0.9928, + "step": 780500 + }, + { + "epoch": 2.88, + "learning_rate": 1.927219903731334e-06, + "loss": 0.9938, + "step": 781000 + }, + { + "epoch": 2.89, + "learning_rate": 1.8964434760128522e-06, + "loss": 1.0034, + "step": 781500 + }, + { + "epoch": 2.89, + "learning_rate": 1.8656670482943703e-06, + "loss": 0.9944, + "step": 782000 + }, + { + "epoch": 2.89, + "learning_rate": 1.8348906205758885e-06, + "loss": 0.995, + "step": 782500 + }, + { + "epoch": 2.89, + "learning_rate": 1.8041141928574066e-06, + "loss": 0.9901, + "step": 783000 + }, + { + "epoch": 2.89, + "learning_rate": 1.773337765138925e-06, + "loss": 0.9913, + "step": 783500 + }, + { + "epoch": 2.9, + "learning_rate": 1.7425613374204431e-06, + "loss": 1.0005, + "step": 784000 + }, + { + "epoch": 2.9, + "learning_rate": 1.7117849097019613e-06, + "loss": 1.003, + "step": 784500 + }, + { + "epoch": 2.9, + "learning_rate": 1.6810084819834794e-06, + "loss": 0.9944, + "step": 785000 + }, + { + "epoch": 2.9, + "learning_rate": 1.6502320542649976e-06, + "loss": 1.0032, + "step": 785500 + }, + { + "epoch": 2.9, + "learning_rate": 1.6194556265465157e-06, + "loss": 0.9896, + "step": 786000 + }, + { + "epoch": 2.9, + "learning_rate": 1.5886791988280339e-06, + "loss": 0.9901, + "step": 786500 + }, + { + "epoch": 2.91, + "learning_rate": 1.5579027711095518e-06, + "loss": 0.9902, + "step": 787000 + }, + { + "epoch": 2.91, + "learning_rate": 1.52712634339107e-06, + "loss": 0.9962, + "step": 787500 + }, + { + "epoch": 2.91, + "learning_rate": 1.496349915672588e-06, + "loss": 0.9987, + "step": 788000 + }, + { + "epoch": 2.91, + "learning_rate": 1.4655734879541062e-06, + "loss": 0.9993, + "step": 788500 + }, + { + "epoch": 2.91, + "learning_rate": 1.4347970602356244e-06, + "loss": 0.9912, + "step": 789000 + }, + { + "epoch": 2.92, + "learning_rate": 1.4040206325171425e-06, + "loss": 0.9937, + "step": 789500 + }, + { + "epoch": 2.92, + "learning_rate": 1.3732442047986607e-06, + "loss": 0.9925, + "step": 790000 + }, + { + "epoch": 2.92, + "learning_rate": 1.3424677770801788e-06, + "loss": 0.991, + "step": 790500 + }, + { + "epoch": 2.92, + "learning_rate": 1.311691349361697e-06, + "loss": 0.9939, + "step": 791000 + }, + { + "epoch": 2.92, + "learning_rate": 1.2809149216432151e-06, + "loss": 0.9964, + "step": 791500 + }, + { + "epoch": 2.92, + "learning_rate": 1.2501384939247333e-06, + "loss": 0.9929, + "step": 792000 + }, + { + "epoch": 2.93, + "learning_rate": 1.2193620662062514e-06, + "loss": 0.9886, + "step": 792500 + }, + { + "epoch": 2.93, + "learning_rate": 1.1885856384877696e-06, + "loss": 0.9947, + "step": 793000 + }, + { + "epoch": 2.93, + "learning_rate": 1.1578092107692875e-06, + "loss": 1.0, + "step": 793500 + }, + { + "epoch": 2.93, + "learning_rate": 1.1270327830508057e-06, + "loss": 0.9931, + "step": 794000 + }, + { + "epoch": 2.93, + "learning_rate": 1.096256355332324e-06, + "loss": 0.9967, + "step": 794500 + }, + { + "epoch": 2.94, + "learning_rate": 1.0654799276138422e-06, + "loss": 0.9944, + "step": 795000 + }, + { + "epoch": 2.94, + "learning_rate": 1.0347034998953603e-06, + "loss": 0.9877, + "step": 795500 + }, + { + "epoch": 2.94, + "learning_rate": 1.0039270721768783e-06, + "loss": 0.9924, + "step": 796000 + }, + { + "epoch": 2.94, + "learning_rate": 9.731506444583964e-07, + "loss": 0.9876, + "step": 796500 + }, + { + "epoch": 2.94, + "learning_rate": 9.423742167399145e-07, + "loss": 0.9878, + "step": 797000 + }, + { + "epoch": 2.95, + "learning_rate": 9.115977890214327e-07, + "loss": 0.9909, + "step": 797500 + }, + { + "epoch": 2.95, + "learning_rate": 8.808213613029508e-07, + "loss": 0.9948, + "step": 798000 + }, + { + "epoch": 2.95, + "learning_rate": 8.50044933584469e-07, + "loss": 0.9887, + "step": 798500 + }, + { + "epoch": 2.95, + "learning_rate": 8.192685058659872e-07, + "loss": 0.9965, + "step": 799000 + }, + { + "epoch": 2.95, + "learning_rate": 7.884920781475054e-07, + "loss": 0.9906, + "step": 799500 + }, + { + "epoch": 2.95, + "learning_rate": 7.577156504290234e-07, + "loss": 0.9947, + "step": 800000 + }, + { + "epoch": 2.96, + "learning_rate": 7.269392227105416e-07, + "loss": 0.9913, + "step": 800500 + }, + { + "epoch": 2.96, + "learning_rate": 6.961627949920597e-07, + "loss": 0.9915, + "step": 801000 + }, + { + "epoch": 2.96, + "learning_rate": 6.653863672735778e-07, + "loss": 0.9846, + "step": 801500 + }, + { + "epoch": 2.96, + "learning_rate": 6.346099395550959e-07, + "loss": 1.0002, + "step": 802000 + }, + { + "epoch": 2.96, + "learning_rate": 6.038335118366142e-07, + "loss": 0.99, + "step": 802500 + }, + { + "epoch": 2.97, + "learning_rate": 5.730570841181322e-07, + "loss": 1.0009, + "step": 803000 + }, + { + "epoch": 2.97, + "learning_rate": 5.422806563996504e-07, + "loss": 0.9913, + "step": 803500 + }, + { + "epoch": 2.97, + "learning_rate": 5.115042286811685e-07, + "loss": 0.9849, + "step": 804000 + }, + { + "epoch": 2.97, + "learning_rate": 4.807278009626867e-07, + "loss": 0.9922, + "step": 804500 + }, + { + "epoch": 2.97, + "learning_rate": 4.499513732442048e-07, + "loss": 0.9885, + "step": 805000 + }, + { + "epoch": 2.97, + "learning_rate": 4.1917494552572297e-07, + "loss": 0.9909, + "step": 805500 + }, + { + "epoch": 2.98, + "learning_rate": 3.883985178072411e-07, + "loss": 0.99, + "step": 806000 + }, + { + "epoch": 2.98, + "learning_rate": 3.576220900887592e-07, + "loss": 0.9906, + "step": 806500 + }, + { + "epoch": 2.98, + "learning_rate": 3.268456623702774e-07, + "loss": 0.9865, + "step": 807000 + }, + { + "epoch": 2.98, + "learning_rate": 2.960692346517955e-07, + "loss": 0.9871, + "step": 807500 + }, + { + "epoch": 2.98, + "learning_rate": 2.652928069333136e-07, + "loss": 0.9933, + "step": 808000 + }, + { + "epoch": 2.99, + "learning_rate": 2.3451637921483178e-07, + "loss": 0.9954, + "step": 808500 + }, + { + "epoch": 2.99, + "learning_rate": 2.037399514963499e-07, + "loss": 0.9935, + "step": 809000 + }, + { + "epoch": 2.99, + "learning_rate": 1.7296352377786808e-07, + "loss": 0.9843, + "step": 809500 + }, + { + "epoch": 2.99, + "learning_rate": 1.421870960593862e-07, + "loss": 0.9897, + "step": 810000 + }, + { + "epoch": 2.99, + "learning_rate": 1.1141066834090434e-07, + "loss": 0.9914, + "step": 810500 + }, + { + "epoch": 3.0, + "learning_rate": 8.063424062242248e-08, + "loss": 0.9921, + "step": 811000 + }, + { + "epoch": 3.0, + "learning_rate": 4.985781290394062e-08, + "loss": 0.9981, + "step": 811500 + }, + { + "epoch": 3.0, + "learning_rate": 1.9081385185458755e-08, + "loss": 1.0003, + "step": 812000 + }, + { + "epoch": 3.0, + "step": 812310, + "total_flos": 3.420875271615529e+18, + "train_loss": 1.1082924517030088, + "train_runtime": 412795.0235, + "train_samples_per_second": 31.485, + "train_steps_per_second": 1.968 + } + ], + "max_steps": 812310, + "num_train_epochs": 3, + "total_flos": 3.420875271615529e+18, + "trial_name": null, + "trial_params": null +}