ConfliBERT-Spanish-Beto-Cased-v1 / trainer_state.json
salsarra's picture
Upload 13 files
9380076 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 812310,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.996922357228152e-05,
"loss": 1.5563,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.9938447144563037e-05,
"loss": 1.4798,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.9907670716844554e-05,
"loss": 1.4636,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.987689428912608e-05,
"loss": 1.4411,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.9846117861407595e-05,
"loss": 1.4334,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.981534143368911e-05,
"loss": 1.4234,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.978456500597063e-05,
"loss": 1.4075,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.9753788578252146e-05,
"loss": 1.4069,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 4.972301215053366e-05,
"loss": 1.4132,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.969223572281519e-05,
"loss": 1.4005,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.9661459295096704e-05,
"loss": 1.3938,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.963068286737822e-05,
"loss": 1.3949,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.959990643965974e-05,
"loss": 1.3849,
"step": 6500
},
{
"epoch": 0.03,
"learning_rate": 4.9569130011941255e-05,
"loss": 1.3828,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 4.953835358422277e-05,
"loss": 1.3794,
"step": 7500
},
{
"epoch": 0.03,
"learning_rate": 4.950757715650429e-05,
"loss": 1.3727,
"step": 8000
},
{
"epoch": 0.03,
"learning_rate": 4.947680072878581e-05,
"loss": 1.3797,
"step": 8500
},
{
"epoch": 0.03,
"learning_rate": 4.944602430106733e-05,
"loss": 1.364,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 4.941524787334885e-05,
"loss": 1.3602,
"step": 9500
},
{
"epoch": 0.04,
"learning_rate": 4.9384471445630365e-05,
"loss": 1.3704,
"step": 10000
},
{
"epoch": 0.04,
"learning_rate": 4.935369501791188e-05,
"loss": 1.3619,
"step": 10500
},
{
"epoch": 0.04,
"learning_rate": 4.93229185901934e-05,
"loss": 1.369,
"step": 11000
},
{
"epoch": 0.04,
"learning_rate": 4.9292142162474916e-05,
"loss": 1.3687,
"step": 11500
},
{
"epoch": 0.04,
"learning_rate": 4.926136573475644e-05,
"loss": 1.3481,
"step": 12000
},
{
"epoch": 0.05,
"learning_rate": 4.923058930703796e-05,
"loss": 1.3553,
"step": 12500
},
{
"epoch": 0.05,
"learning_rate": 4.9199812879319474e-05,
"loss": 1.3532,
"step": 13000
},
{
"epoch": 0.05,
"learning_rate": 4.916903645160099e-05,
"loss": 1.3519,
"step": 13500
},
{
"epoch": 0.05,
"learning_rate": 4.913826002388251e-05,
"loss": 1.3524,
"step": 14000
},
{
"epoch": 0.05,
"learning_rate": 4.9107483596164025e-05,
"loss": 1.3427,
"step": 14500
},
{
"epoch": 0.06,
"learning_rate": 4.907670716844555e-05,
"loss": 1.3436,
"step": 15000
},
{
"epoch": 0.06,
"learning_rate": 4.9045930740727066e-05,
"loss": 1.3342,
"step": 15500
},
{
"epoch": 0.06,
"learning_rate": 4.901515431300858e-05,
"loss": 1.332,
"step": 16000
},
{
"epoch": 0.06,
"learning_rate": 4.89843778852901e-05,
"loss": 1.3384,
"step": 16500
},
{
"epoch": 0.06,
"learning_rate": 4.895360145757162e-05,
"loss": 1.3252,
"step": 17000
},
{
"epoch": 0.06,
"learning_rate": 4.8922825029853134e-05,
"loss": 1.3405,
"step": 17500
},
{
"epoch": 0.07,
"learning_rate": 4.889204860213465e-05,
"loss": 1.3363,
"step": 18000
},
{
"epoch": 0.07,
"learning_rate": 4.8861272174416175e-05,
"loss": 1.3297,
"step": 18500
},
{
"epoch": 0.07,
"learning_rate": 4.883049574669769e-05,
"loss": 1.3268,
"step": 19000
},
{
"epoch": 0.07,
"learning_rate": 4.879971931897921e-05,
"loss": 1.3136,
"step": 19500
},
{
"epoch": 0.07,
"learning_rate": 4.876894289126073e-05,
"loss": 1.3182,
"step": 20000
},
{
"epoch": 0.08,
"learning_rate": 4.8738166463542244e-05,
"loss": 1.3226,
"step": 20500
},
{
"epoch": 0.08,
"learning_rate": 4.870739003582376e-05,
"loss": 1.3186,
"step": 21000
},
{
"epoch": 0.08,
"learning_rate": 4.8676613608105285e-05,
"loss": 1.3234,
"step": 21500
},
{
"epoch": 0.08,
"learning_rate": 4.86458371803868e-05,
"loss": 1.3214,
"step": 22000
},
{
"epoch": 0.08,
"learning_rate": 4.861506075266832e-05,
"loss": 1.3184,
"step": 22500
},
{
"epoch": 0.08,
"learning_rate": 4.8584284324949836e-05,
"loss": 1.3074,
"step": 23000
},
{
"epoch": 0.09,
"learning_rate": 4.855350789723135e-05,
"loss": 1.3137,
"step": 23500
},
{
"epoch": 0.09,
"learning_rate": 4.852273146951287e-05,
"loss": 1.3114,
"step": 24000
},
{
"epoch": 0.09,
"learning_rate": 4.849195504179439e-05,
"loss": 1.3174,
"step": 24500
},
{
"epoch": 0.09,
"learning_rate": 4.846117861407591e-05,
"loss": 1.3094,
"step": 25000
},
{
"epoch": 0.09,
"learning_rate": 4.843040218635743e-05,
"loss": 1.3133,
"step": 25500
},
{
"epoch": 0.1,
"learning_rate": 4.8399625758638945e-05,
"loss": 1.3095,
"step": 26000
},
{
"epoch": 0.1,
"learning_rate": 4.836884933092046e-05,
"loss": 1.3052,
"step": 26500
},
{
"epoch": 0.1,
"learning_rate": 4.833807290320198e-05,
"loss": 1.3048,
"step": 27000
},
{
"epoch": 0.1,
"learning_rate": 4.8307296475483497e-05,
"loss": 1.3093,
"step": 27500
},
{
"epoch": 0.1,
"learning_rate": 4.8276520047765014e-05,
"loss": 1.307,
"step": 28000
},
{
"epoch": 0.11,
"learning_rate": 4.824574362004654e-05,
"loss": 1.312,
"step": 28500
},
{
"epoch": 0.11,
"learning_rate": 4.8214967192328055e-05,
"loss": 1.3006,
"step": 29000
},
{
"epoch": 0.11,
"learning_rate": 4.818419076460957e-05,
"loss": 1.3069,
"step": 29500
},
{
"epoch": 0.11,
"learning_rate": 4.815341433689109e-05,
"loss": 1.2969,
"step": 30000
},
{
"epoch": 0.11,
"learning_rate": 4.8122637909172606e-05,
"loss": 1.3042,
"step": 30500
},
{
"epoch": 0.11,
"learning_rate": 4.809186148145412e-05,
"loss": 1.2999,
"step": 31000
},
{
"epoch": 0.12,
"learning_rate": 4.806108505373565e-05,
"loss": 1.3,
"step": 31500
},
{
"epoch": 0.12,
"learning_rate": 4.8030308626017164e-05,
"loss": 1.2973,
"step": 32000
},
{
"epoch": 0.12,
"learning_rate": 4.799953219829868e-05,
"loss": 1.2949,
"step": 32500
},
{
"epoch": 0.12,
"learning_rate": 4.79687557705802e-05,
"loss": 1.293,
"step": 33000
},
{
"epoch": 0.12,
"learning_rate": 4.7937979342861715e-05,
"loss": 1.2834,
"step": 33500
},
{
"epoch": 0.13,
"learning_rate": 4.790720291514323e-05,
"loss": 1.3032,
"step": 34000
},
{
"epoch": 0.13,
"learning_rate": 4.787642648742475e-05,
"loss": 1.2926,
"step": 34500
},
{
"epoch": 0.13,
"learning_rate": 4.784565005970627e-05,
"loss": 1.291,
"step": 35000
},
{
"epoch": 0.13,
"learning_rate": 4.781487363198779e-05,
"loss": 1.2958,
"step": 35500
},
{
"epoch": 0.13,
"learning_rate": 4.778409720426931e-05,
"loss": 1.2883,
"step": 36000
},
{
"epoch": 0.13,
"learning_rate": 4.7753320776550824e-05,
"loss": 1.2998,
"step": 36500
},
{
"epoch": 0.14,
"learning_rate": 4.772254434883234e-05,
"loss": 1.2781,
"step": 37000
},
{
"epoch": 0.14,
"learning_rate": 4.769176792111386e-05,
"loss": 1.2933,
"step": 37500
},
{
"epoch": 0.14,
"learning_rate": 4.766099149339538e-05,
"loss": 1.2969,
"step": 38000
},
{
"epoch": 0.14,
"learning_rate": 4.76302150656769e-05,
"loss": 1.279,
"step": 38500
},
{
"epoch": 0.14,
"learning_rate": 4.759943863795842e-05,
"loss": 1.2929,
"step": 39000
},
{
"epoch": 0.15,
"learning_rate": 4.7568662210239934e-05,
"loss": 1.2873,
"step": 39500
},
{
"epoch": 0.15,
"learning_rate": 4.753788578252145e-05,
"loss": 1.2923,
"step": 40000
},
{
"epoch": 0.15,
"learning_rate": 4.750710935480297e-05,
"loss": 1.2974,
"step": 40500
},
{
"epoch": 0.15,
"learning_rate": 4.7476332927084485e-05,
"loss": 1.2789,
"step": 41000
},
{
"epoch": 0.15,
"learning_rate": 4.744555649936601e-05,
"loss": 1.2783,
"step": 41500
},
{
"epoch": 0.16,
"learning_rate": 4.7414780071647526e-05,
"loss": 1.2781,
"step": 42000
},
{
"epoch": 0.16,
"learning_rate": 4.738400364392904e-05,
"loss": 1.2925,
"step": 42500
},
{
"epoch": 0.16,
"learning_rate": 4.735322721621056e-05,
"loss": 1.2818,
"step": 43000
},
{
"epoch": 0.16,
"learning_rate": 4.732245078849208e-05,
"loss": 1.2788,
"step": 43500
},
{
"epoch": 0.16,
"learning_rate": 4.7291674360773594e-05,
"loss": 1.2732,
"step": 44000
},
{
"epoch": 0.16,
"learning_rate": 4.726089793305512e-05,
"loss": 1.278,
"step": 44500
},
{
"epoch": 0.17,
"learning_rate": 4.7230121505336635e-05,
"loss": 1.2772,
"step": 45000
},
{
"epoch": 0.17,
"learning_rate": 4.719934507761815e-05,
"loss": 1.2752,
"step": 45500
},
{
"epoch": 0.17,
"learning_rate": 4.716856864989967e-05,
"loss": 1.2623,
"step": 46000
},
{
"epoch": 0.17,
"learning_rate": 4.713779222218119e-05,
"loss": 1.2704,
"step": 46500
},
{
"epoch": 0.17,
"learning_rate": 4.7107015794462704e-05,
"loss": 1.2778,
"step": 47000
},
{
"epoch": 0.18,
"learning_rate": 4.707623936674422e-05,
"loss": 1.2775,
"step": 47500
},
{
"epoch": 0.18,
"learning_rate": 4.7045462939025745e-05,
"loss": 1.2728,
"step": 48000
},
{
"epoch": 0.18,
"learning_rate": 4.701468651130726e-05,
"loss": 1.2722,
"step": 48500
},
{
"epoch": 0.18,
"learning_rate": 4.698391008358878e-05,
"loss": 1.2701,
"step": 49000
},
{
"epoch": 0.18,
"learning_rate": 4.6953133655870296e-05,
"loss": 1.2808,
"step": 49500
},
{
"epoch": 0.18,
"learning_rate": 4.692235722815181e-05,
"loss": 1.2653,
"step": 50000
},
{
"epoch": 0.19,
"learning_rate": 4.689158080043333e-05,
"loss": 1.2769,
"step": 50500
},
{
"epoch": 0.19,
"learning_rate": 4.686080437271485e-05,
"loss": 1.2752,
"step": 51000
},
{
"epoch": 0.19,
"learning_rate": 4.683002794499637e-05,
"loss": 1.2725,
"step": 51500
},
{
"epoch": 0.19,
"learning_rate": 4.679925151727789e-05,
"loss": 1.26,
"step": 52000
},
{
"epoch": 0.19,
"learning_rate": 4.6768475089559405e-05,
"loss": 1.2682,
"step": 52500
},
{
"epoch": 0.2,
"learning_rate": 4.673769866184092e-05,
"loss": 1.2759,
"step": 53000
},
{
"epoch": 0.2,
"learning_rate": 4.670692223412244e-05,
"loss": 1.2719,
"step": 53500
},
{
"epoch": 0.2,
"learning_rate": 4.6676145806403957e-05,
"loss": 1.268,
"step": 54000
},
{
"epoch": 0.2,
"learning_rate": 4.664536937868548e-05,
"loss": 1.2631,
"step": 54500
},
{
"epoch": 0.2,
"learning_rate": 4.6614592950967e-05,
"loss": 1.2692,
"step": 55000
},
{
"epoch": 0.2,
"learning_rate": 4.6583816523248515e-05,
"loss": 1.269,
"step": 55500
},
{
"epoch": 0.21,
"learning_rate": 4.655304009553003e-05,
"loss": 1.2673,
"step": 56000
},
{
"epoch": 0.21,
"learning_rate": 4.652226366781155e-05,
"loss": 1.2633,
"step": 56500
},
{
"epoch": 0.21,
"learning_rate": 4.6491487240093066e-05,
"loss": 1.2589,
"step": 57000
},
{
"epoch": 0.21,
"learning_rate": 4.646071081237458e-05,
"loss": 1.2639,
"step": 57500
},
{
"epoch": 0.21,
"learning_rate": 4.642993438465611e-05,
"loss": 1.2571,
"step": 58000
},
{
"epoch": 0.22,
"learning_rate": 4.6399157956937624e-05,
"loss": 1.2647,
"step": 58500
},
{
"epoch": 0.22,
"learning_rate": 4.636838152921914e-05,
"loss": 1.2582,
"step": 59000
},
{
"epoch": 0.22,
"learning_rate": 4.633760510150066e-05,
"loss": 1.259,
"step": 59500
},
{
"epoch": 0.22,
"learning_rate": 4.6306828673782175e-05,
"loss": 1.2657,
"step": 60000
},
{
"epoch": 0.22,
"learning_rate": 4.627605224606369e-05,
"loss": 1.2602,
"step": 60500
},
{
"epoch": 0.23,
"learning_rate": 4.6245275818345216e-05,
"loss": 1.2594,
"step": 61000
},
{
"epoch": 0.23,
"learning_rate": 4.621449939062673e-05,
"loss": 1.2608,
"step": 61500
},
{
"epoch": 0.23,
"learning_rate": 4.618372296290825e-05,
"loss": 1.259,
"step": 62000
},
{
"epoch": 0.23,
"learning_rate": 4.615294653518977e-05,
"loss": 1.2659,
"step": 62500
},
{
"epoch": 0.23,
"learning_rate": 4.6122170107471284e-05,
"loss": 1.2597,
"step": 63000
},
{
"epoch": 0.23,
"learning_rate": 4.60913936797528e-05,
"loss": 1.2524,
"step": 63500
},
{
"epoch": 0.24,
"learning_rate": 4.606061725203432e-05,
"loss": 1.2521,
"step": 64000
},
{
"epoch": 0.24,
"learning_rate": 4.602984082431584e-05,
"loss": 1.2588,
"step": 64500
},
{
"epoch": 0.24,
"learning_rate": 4.599906439659736e-05,
"loss": 1.2554,
"step": 65000
},
{
"epoch": 0.24,
"learning_rate": 4.596828796887888e-05,
"loss": 1.25,
"step": 65500
},
{
"epoch": 0.24,
"learning_rate": 4.5937511541160394e-05,
"loss": 1.2467,
"step": 66000
},
{
"epoch": 0.25,
"learning_rate": 4.590673511344191e-05,
"loss": 1.2472,
"step": 66500
},
{
"epoch": 0.25,
"learning_rate": 4.587595868572343e-05,
"loss": 1.2652,
"step": 67000
},
{
"epoch": 0.25,
"learning_rate": 4.5845182258004945e-05,
"loss": 1.2506,
"step": 67500
},
{
"epoch": 0.25,
"learning_rate": 4.581440583028647e-05,
"loss": 1.247,
"step": 68000
},
{
"epoch": 0.25,
"learning_rate": 4.5783629402567986e-05,
"loss": 1.2519,
"step": 68500
},
{
"epoch": 0.25,
"learning_rate": 4.57528529748495e-05,
"loss": 1.2489,
"step": 69000
},
{
"epoch": 0.26,
"learning_rate": 4.572207654713102e-05,
"loss": 1.2508,
"step": 69500
},
{
"epoch": 0.26,
"learning_rate": 4.569130011941254e-05,
"loss": 1.2556,
"step": 70000
},
{
"epoch": 0.26,
"learning_rate": 4.566052369169406e-05,
"loss": 1.2555,
"step": 70500
},
{
"epoch": 0.26,
"learning_rate": 4.562974726397558e-05,
"loss": 1.2407,
"step": 71000
},
{
"epoch": 0.26,
"learning_rate": 4.55989708362571e-05,
"loss": 1.2478,
"step": 71500
},
{
"epoch": 0.27,
"learning_rate": 4.556819440853862e-05,
"loss": 1.2537,
"step": 72000
},
{
"epoch": 0.27,
"learning_rate": 4.5537417980820136e-05,
"loss": 1.2487,
"step": 72500
},
{
"epoch": 0.27,
"learning_rate": 4.550664155310165e-05,
"loss": 1.2418,
"step": 73000
},
{
"epoch": 0.27,
"learning_rate": 4.547586512538317e-05,
"loss": 1.2568,
"step": 73500
},
{
"epoch": 0.27,
"learning_rate": 4.544508869766469e-05,
"loss": 1.2428,
"step": 74000
},
{
"epoch": 0.28,
"learning_rate": 4.5414312269946205e-05,
"loss": 1.2588,
"step": 74500
},
{
"epoch": 0.28,
"learning_rate": 4.538353584222773e-05,
"loss": 1.2455,
"step": 75000
},
{
"epoch": 0.28,
"learning_rate": 4.5352759414509246e-05,
"loss": 1.2459,
"step": 75500
},
{
"epoch": 0.28,
"learning_rate": 4.532198298679076e-05,
"loss": 1.2502,
"step": 76000
},
{
"epoch": 0.28,
"learning_rate": 4.529120655907228e-05,
"loss": 1.2396,
"step": 76500
},
{
"epoch": 0.28,
"learning_rate": 4.52604301313538e-05,
"loss": 1.2437,
"step": 77000
},
{
"epoch": 0.29,
"learning_rate": 4.5229653703635314e-05,
"loss": 1.2482,
"step": 77500
},
{
"epoch": 0.29,
"learning_rate": 4.519887727591683e-05,
"loss": 1.2368,
"step": 78000
},
{
"epoch": 0.29,
"learning_rate": 4.5168100848198355e-05,
"loss": 1.2435,
"step": 78500
},
{
"epoch": 0.29,
"learning_rate": 4.513732442047987e-05,
"loss": 1.2463,
"step": 79000
},
{
"epoch": 0.29,
"learning_rate": 4.510654799276139e-05,
"loss": 1.239,
"step": 79500
},
{
"epoch": 0.3,
"learning_rate": 4.5075771565042906e-05,
"loss": 1.2439,
"step": 80000
},
{
"epoch": 0.3,
"learning_rate": 4.504499513732442e-05,
"loss": 1.2313,
"step": 80500
},
{
"epoch": 0.3,
"learning_rate": 4.501421870960594e-05,
"loss": 1.2419,
"step": 81000
},
{
"epoch": 0.3,
"learning_rate": 4.4983442281887464e-05,
"loss": 1.2356,
"step": 81500
},
{
"epoch": 0.3,
"learning_rate": 4.495266585416898e-05,
"loss": 1.2308,
"step": 82000
},
{
"epoch": 0.3,
"learning_rate": 4.49218894264505e-05,
"loss": 1.2382,
"step": 82500
},
{
"epoch": 0.31,
"learning_rate": 4.4891112998732015e-05,
"loss": 1.2555,
"step": 83000
},
{
"epoch": 0.31,
"learning_rate": 4.486033657101353e-05,
"loss": 1.245,
"step": 83500
},
{
"epoch": 0.31,
"learning_rate": 4.482956014329505e-05,
"loss": 1.2384,
"step": 84000
},
{
"epoch": 0.31,
"learning_rate": 4.479878371557657e-05,
"loss": 1.2379,
"step": 84500
},
{
"epoch": 0.31,
"learning_rate": 4.476800728785809e-05,
"loss": 1.241,
"step": 85000
},
{
"epoch": 0.32,
"learning_rate": 4.473723086013961e-05,
"loss": 1.2262,
"step": 85500
},
{
"epoch": 0.32,
"learning_rate": 4.4706454432421125e-05,
"loss": 1.2283,
"step": 86000
},
{
"epoch": 0.32,
"learning_rate": 4.467567800470264e-05,
"loss": 1.2331,
"step": 86500
},
{
"epoch": 0.32,
"learning_rate": 4.464490157698416e-05,
"loss": 1.2439,
"step": 87000
},
{
"epoch": 0.32,
"learning_rate": 4.4614125149265676e-05,
"loss": 1.2369,
"step": 87500
},
{
"epoch": 0.32,
"learning_rate": 4.45833487215472e-05,
"loss": 1.2335,
"step": 88000
},
{
"epoch": 0.33,
"learning_rate": 4.455257229382872e-05,
"loss": 1.236,
"step": 88500
},
{
"epoch": 0.33,
"learning_rate": 4.4521795866110234e-05,
"loss": 1.2333,
"step": 89000
},
{
"epoch": 0.33,
"learning_rate": 4.449101943839175e-05,
"loss": 1.2288,
"step": 89500
},
{
"epoch": 0.33,
"learning_rate": 4.446024301067327e-05,
"loss": 1.2315,
"step": 90000
},
{
"epoch": 0.33,
"learning_rate": 4.4429466582954785e-05,
"loss": 1.2349,
"step": 90500
},
{
"epoch": 0.34,
"learning_rate": 4.43986901552363e-05,
"loss": 1.2227,
"step": 91000
},
{
"epoch": 0.34,
"learning_rate": 4.4367913727517826e-05,
"loss": 1.2261,
"step": 91500
},
{
"epoch": 0.34,
"learning_rate": 4.4337137299799343e-05,
"loss": 1.2303,
"step": 92000
},
{
"epoch": 0.34,
"learning_rate": 4.430636087208086e-05,
"loss": 1.2235,
"step": 92500
},
{
"epoch": 0.34,
"learning_rate": 4.427558444436238e-05,
"loss": 1.2336,
"step": 93000
},
{
"epoch": 0.35,
"learning_rate": 4.4244808016643895e-05,
"loss": 1.2345,
"step": 93500
},
{
"epoch": 0.35,
"learning_rate": 4.421403158892541e-05,
"loss": 1.2257,
"step": 94000
},
{
"epoch": 0.35,
"learning_rate": 4.4183255161206936e-05,
"loss": 1.2212,
"step": 94500
},
{
"epoch": 0.35,
"learning_rate": 4.415247873348845e-05,
"loss": 1.2221,
"step": 95000
},
{
"epoch": 0.35,
"learning_rate": 4.412170230576997e-05,
"loss": 1.2238,
"step": 95500
},
{
"epoch": 0.35,
"learning_rate": 4.409092587805149e-05,
"loss": 1.2256,
"step": 96000
},
{
"epoch": 0.36,
"learning_rate": 4.4060149450333004e-05,
"loss": 1.2271,
"step": 96500
},
{
"epoch": 0.36,
"learning_rate": 4.402937302261452e-05,
"loss": 1.2193,
"step": 97000
},
{
"epoch": 0.36,
"learning_rate": 4.399859659489604e-05,
"loss": 1.223,
"step": 97500
},
{
"epoch": 0.36,
"learning_rate": 4.396782016717756e-05,
"loss": 1.2272,
"step": 98000
},
{
"epoch": 0.36,
"learning_rate": 4.393704373945908e-05,
"loss": 1.2184,
"step": 98500
},
{
"epoch": 0.37,
"learning_rate": 4.3906267311740596e-05,
"loss": 1.2223,
"step": 99000
},
{
"epoch": 0.37,
"learning_rate": 4.387549088402211e-05,
"loss": 1.2296,
"step": 99500
},
{
"epoch": 0.37,
"learning_rate": 4.384471445630363e-05,
"loss": 1.2229,
"step": 100000
},
{
"epoch": 0.37,
"learning_rate": 4.381393802858515e-05,
"loss": 1.2206,
"step": 100500
},
{
"epoch": 0.37,
"learning_rate": 4.3783161600866665e-05,
"loss": 1.2293,
"step": 101000
},
{
"epoch": 0.37,
"learning_rate": 4.375238517314819e-05,
"loss": 1.2245,
"step": 101500
},
{
"epoch": 0.38,
"learning_rate": 4.3721608745429706e-05,
"loss": 1.2254,
"step": 102000
},
{
"epoch": 0.38,
"learning_rate": 4.369083231771122e-05,
"loss": 1.2136,
"step": 102500
},
{
"epoch": 0.38,
"learning_rate": 4.366005588999274e-05,
"loss": 1.2289,
"step": 103000
},
{
"epoch": 0.38,
"learning_rate": 4.362927946227426e-05,
"loss": 1.2234,
"step": 103500
},
{
"epoch": 0.38,
"learning_rate": 4.3598503034555774e-05,
"loss": 1.2157,
"step": 104000
},
{
"epoch": 0.39,
"learning_rate": 4.35677266068373e-05,
"loss": 1.2285,
"step": 104500
},
{
"epoch": 0.39,
"learning_rate": 4.3536950179118815e-05,
"loss": 1.2198,
"step": 105000
},
{
"epoch": 0.39,
"learning_rate": 4.350617375140033e-05,
"loss": 1.2256,
"step": 105500
},
{
"epoch": 0.39,
"learning_rate": 4.347539732368185e-05,
"loss": 1.2267,
"step": 106000
},
{
"epoch": 0.39,
"learning_rate": 4.3444620895963366e-05,
"loss": 1.2103,
"step": 106500
},
{
"epoch": 0.4,
"learning_rate": 4.341384446824488e-05,
"loss": 1.2176,
"step": 107000
},
{
"epoch": 0.4,
"learning_rate": 4.33830680405264e-05,
"loss": 1.2279,
"step": 107500
},
{
"epoch": 0.4,
"learning_rate": 4.3352291612807924e-05,
"loss": 1.2208,
"step": 108000
},
{
"epoch": 0.4,
"learning_rate": 4.332151518508944e-05,
"loss": 1.221,
"step": 108500
},
{
"epoch": 0.4,
"learning_rate": 4.329073875737096e-05,
"loss": 1.2268,
"step": 109000
},
{
"epoch": 0.4,
"learning_rate": 4.3259962329652475e-05,
"loss": 1.2279,
"step": 109500
},
{
"epoch": 0.41,
"learning_rate": 4.322918590193399e-05,
"loss": 1.2205,
"step": 110000
},
{
"epoch": 0.41,
"learning_rate": 4.319840947421551e-05,
"loss": 1.2212,
"step": 110500
},
{
"epoch": 0.41,
"learning_rate": 4.3167633046497034e-05,
"loss": 1.2086,
"step": 111000
},
{
"epoch": 0.41,
"learning_rate": 4.313685661877855e-05,
"loss": 1.2144,
"step": 111500
},
{
"epoch": 0.41,
"learning_rate": 4.310608019106007e-05,
"loss": 1.2346,
"step": 112000
},
{
"epoch": 0.42,
"learning_rate": 4.3075303763341585e-05,
"loss": 1.2149,
"step": 112500
},
{
"epoch": 0.42,
"learning_rate": 4.30445273356231e-05,
"loss": 1.2156,
"step": 113000
},
{
"epoch": 0.42,
"learning_rate": 4.301375090790462e-05,
"loss": 1.2136,
"step": 113500
},
{
"epoch": 0.42,
"learning_rate": 4.2982974480186136e-05,
"loss": 1.2101,
"step": 114000
},
{
"epoch": 0.42,
"learning_rate": 4.295219805246766e-05,
"loss": 1.2077,
"step": 114500
},
{
"epoch": 0.42,
"learning_rate": 4.292142162474918e-05,
"loss": 1.2137,
"step": 115000
},
{
"epoch": 0.43,
"learning_rate": 4.2890645197030694e-05,
"loss": 1.2186,
"step": 115500
},
{
"epoch": 0.43,
"learning_rate": 4.285986876931221e-05,
"loss": 1.2302,
"step": 116000
},
{
"epoch": 0.43,
"learning_rate": 4.282909234159373e-05,
"loss": 1.2185,
"step": 116500
},
{
"epoch": 0.43,
"learning_rate": 4.2798315913875245e-05,
"loss": 1.21,
"step": 117000
},
{
"epoch": 0.43,
"learning_rate": 4.276753948615676e-05,
"loss": 1.2065,
"step": 117500
},
{
"epoch": 0.44,
"learning_rate": 4.2736763058438286e-05,
"loss": 1.2174,
"step": 118000
},
{
"epoch": 0.44,
"learning_rate": 4.2705986630719803e-05,
"loss": 1.2078,
"step": 118500
},
{
"epoch": 0.44,
"learning_rate": 4.267521020300132e-05,
"loss": 1.2036,
"step": 119000
},
{
"epoch": 0.44,
"learning_rate": 4.264443377528284e-05,
"loss": 1.2098,
"step": 119500
},
{
"epoch": 0.44,
"learning_rate": 4.2613657347564355e-05,
"loss": 1.209,
"step": 120000
},
{
"epoch": 0.45,
"learning_rate": 4.258288091984587e-05,
"loss": 1.2026,
"step": 120500
},
{
"epoch": 0.45,
"learning_rate": 4.2552104492127396e-05,
"loss": 1.2192,
"step": 121000
},
{
"epoch": 0.45,
"learning_rate": 4.252132806440891e-05,
"loss": 1.219,
"step": 121500
},
{
"epoch": 0.45,
"learning_rate": 4.249055163669043e-05,
"loss": 1.2026,
"step": 122000
},
{
"epoch": 0.45,
"learning_rate": 4.245977520897195e-05,
"loss": 1.2077,
"step": 122500
},
{
"epoch": 0.45,
"learning_rate": 4.2428998781253464e-05,
"loss": 1.2012,
"step": 123000
},
{
"epoch": 0.46,
"learning_rate": 4.239822235353498e-05,
"loss": 1.213,
"step": 123500
},
{
"epoch": 0.46,
"learning_rate": 4.23674459258165e-05,
"loss": 1.2107,
"step": 124000
},
{
"epoch": 0.46,
"learning_rate": 4.233666949809802e-05,
"loss": 1.2144,
"step": 124500
},
{
"epoch": 0.46,
"learning_rate": 4.230589307037954e-05,
"loss": 1.2044,
"step": 125000
},
{
"epoch": 0.46,
"learning_rate": 4.2275116642661056e-05,
"loss": 1.2031,
"step": 125500
},
{
"epoch": 0.47,
"learning_rate": 4.224434021494257e-05,
"loss": 1.2116,
"step": 126000
},
{
"epoch": 0.47,
"learning_rate": 4.221356378722409e-05,
"loss": 1.2063,
"step": 126500
},
{
"epoch": 0.47,
"learning_rate": 4.218278735950561e-05,
"loss": 1.211,
"step": 127000
},
{
"epoch": 0.47,
"learning_rate": 4.215201093178713e-05,
"loss": 1.2049,
"step": 127500
},
{
"epoch": 0.47,
"learning_rate": 4.212123450406865e-05,
"loss": 1.2059,
"step": 128000
},
{
"epoch": 0.47,
"learning_rate": 4.2090458076350166e-05,
"loss": 1.2088,
"step": 128500
},
{
"epoch": 0.48,
"learning_rate": 4.205968164863168e-05,
"loss": 1.2126,
"step": 129000
},
{
"epoch": 0.48,
"learning_rate": 4.20289052209132e-05,
"loss": 1.1968,
"step": 129500
},
{
"epoch": 0.48,
"learning_rate": 4.199812879319472e-05,
"loss": 1.1977,
"step": 130000
},
{
"epoch": 0.48,
"learning_rate": 4.1967352365476234e-05,
"loss": 1.2032,
"step": 130500
},
{
"epoch": 0.48,
"learning_rate": 4.193657593775776e-05,
"loss": 1.2078,
"step": 131000
},
{
"epoch": 0.49,
"learning_rate": 4.1905799510039275e-05,
"loss": 1.1938,
"step": 131500
},
{
"epoch": 0.49,
"learning_rate": 4.187502308232079e-05,
"loss": 1.2054,
"step": 132000
},
{
"epoch": 0.49,
"learning_rate": 4.184424665460231e-05,
"loss": 1.208,
"step": 132500
},
{
"epoch": 0.49,
"learning_rate": 4.1813470226883826e-05,
"loss": 1.1988,
"step": 133000
},
{
"epoch": 0.49,
"learning_rate": 4.178269379916534e-05,
"loss": 1.2053,
"step": 133500
},
{
"epoch": 0.49,
"learning_rate": 4.175191737144686e-05,
"loss": 1.1997,
"step": 134000
},
{
"epoch": 0.5,
"learning_rate": 4.1721140943728384e-05,
"loss": 1.2018,
"step": 134500
},
{
"epoch": 0.5,
"learning_rate": 4.16903645160099e-05,
"loss": 1.2037,
"step": 135000
},
{
"epoch": 0.5,
"learning_rate": 4.165958808829142e-05,
"loss": 1.2033,
"step": 135500
},
{
"epoch": 0.5,
"learning_rate": 4.1628811660572935e-05,
"loss": 1.1989,
"step": 136000
},
{
"epoch": 0.5,
"learning_rate": 4.159803523285445e-05,
"loss": 1.2092,
"step": 136500
},
{
"epoch": 0.51,
"learning_rate": 4.156725880513597e-05,
"loss": 1.2002,
"step": 137000
},
{
"epoch": 0.51,
"learning_rate": 4.1536482377417493e-05,
"loss": 1.2061,
"step": 137500
},
{
"epoch": 0.51,
"learning_rate": 4.150570594969901e-05,
"loss": 1.2063,
"step": 138000
},
{
"epoch": 0.51,
"learning_rate": 4.147492952198053e-05,
"loss": 1.2015,
"step": 138500
},
{
"epoch": 0.51,
"learning_rate": 4.1444153094262045e-05,
"loss": 1.1987,
"step": 139000
},
{
"epoch": 0.52,
"learning_rate": 4.141337666654356e-05,
"loss": 1.1982,
"step": 139500
},
{
"epoch": 0.52,
"learning_rate": 4.138260023882508e-05,
"loss": 1.2023,
"step": 140000
},
{
"epoch": 0.52,
"learning_rate": 4.1351823811106596e-05,
"loss": 1.1953,
"step": 140500
},
{
"epoch": 0.52,
"learning_rate": 4.132104738338812e-05,
"loss": 1.1943,
"step": 141000
},
{
"epoch": 0.52,
"learning_rate": 4.129027095566964e-05,
"loss": 1.2085,
"step": 141500
},
{
"epoch": 0.52,
"learning_rate": 4.1259494527951154e-05,
"loss": 1.1943,
"step": 142000
},
{
"epoch": 0.53,
"learning_rate": 4.122871810023267e-05,
"loss": 1.1994,
"step": 142500
},
{
"epoch": 0.53,
"learning_rate": 4.119794167251419e-05,
"loss": 1.2037,
"step": 143000
},
{
"epoch": 0.53,
"learning_rate": 4.1167165244795705e-05,
"loss": 1.1968,
"step": 143500
},
{
"epoch": 0.53,
"learning_rate": 4.113638881707723e-05,
"loss": 1.2031,
"step": 144000
},
{
"epoch": 0.53,
"learning_rate": 4.1105612389358746e-05,
"loss": 1.2052,
"step": 144500
},
{
"epoch": 0.54,
"learning_rate": 4.107483596164026e-05,
"loss": 1.2006,
"step": 145000
},
{
"epoch": 0.54,
"learning_rate": 4.104405953392178e-05,
"loss": 1.1961,
"step": 145500
},
{
"epoch": 0.54,
"learning_rate": 4.10132831062033e-05,
"loss": 1.1909,
"step": 146000
},
{
"epoch": 0.54,
"learning_rate": 4.0982506678484815e-05,
"loss": 1.1995,
"step": 146500
},
{
"epoch": 0.54,
"learning_rate": 4.095173025076633e-05,
"loss": 1.1978,
"step": 147000
},
{
"epoch": 0.54,
"learning_rate": 4.0920953823047856e-05,
"loss": 1.1952,
"step": 147500
},
{
"epoch": 0.55,
"learning_rate": 4.089017739532937e-05,
"loss": 1.1901,
"step": 148000
},
{
"epoch": 0.55,
"learning_rate": 4.085940096761089e-05,
"loss": 1.1974,
"step": 148500
},
{
"epoch": 0.55,
"learning_rate": 4.082862453989241e-05,
"loss": 1.1914,
"step": 149000
},
{
"epoch": 0.55,
"learning_rate": 4.0797848112173924e-05,
"loss": 1.193,
"step": 149500
},
{
"epoch": 0.55,
"learning_rate": 4.076707168445544e-05,
"loss": 1.1915,
"step": 150000
},
{
"epoch": 0.56,
"learning_rate": 4.0736295256736965e-05,
"loss": 1.1945,
"step": 150500
},
{
"epoch": 0.56,
"learning_rate": 4.070551882901848e-05,
"loss": 1.1986,
"step": 151000
},
{
"epoch": 0.56,
"learning_rate": 4.06747424013e-05,
"loss": 1.1928,
"step": 151500
},
{
"epoch": 0.56,
"learning_rate": 4.0643965973581516e-05,
"loss": 1.1946,
"step": 152000
},
{
"epoch": 0.56,
"learning_rate": 4.061318954586303e-05,
"loss": 1.1959,
"step": 152500
},
{
"epoch": 0.57,
"learning_rate": 4.058241311814455e-05,
"loss": 1.1911,
"step": 153000
},
{
"epoch": 0.57,
"learning_rate": 4.055163669042607e-05,
"loss": 1.1901,
"step": 153500
},
{
"epoch": 0.57,
"learning_rate": 4.052086026270759e-05,
"loss": 1.195,
"step": 154000
},
{
"epoch": 0.57,
"learning_rate": 4.049008383498911e-05,
"loss": 1.1938,
"step": 154500
},
{
"epoch": 0.57,
"learning_rate": 4.0459307407270625e-05,
"loss": 1.1994,
"step": 155000
},
{
"epoch": 0.57,
"learning_rate": 4.042853097955214e-05,
"loss": 1.1911,
"step": 155500
},
{
"epoch": 0.58,
"learning_rate": 4.039775455183366e-05,
"loss": 1.1942,
"step": 156000
},
{
"epoch": 0.58,
"learning_rate": 4.036697812411518e-05,
"loss": 1.1889,
"step": 156500
},
{
"epoch": 0.58,
"learning_rate": 4.0336201696396694e-05,
"loss": 1.185,
"step": 157000
},
{
"epoch": 0.58,
"learning_rate": 4.030542526867822e-05,
"loss": 1.1938,
"step": 157500
},
{
"epoch": 0.58,
"learning_rate": 4.0274648840959735e-05,
"loss": 1.1941,
"step": 158000
},
{
"epoch": 0.59,
"learning_rate": 4.024387241324125e-05,
"loss": 1.1771,
"step": 158500
},
{
"epoch": 0.59,
"learning_rate": 4.021309598552277e-05,
"loss": 1.1895,
"step": 159000
},
{
"epoch": 0.59,
"learning_rate": 4.0182319557804286e-05,
"loss": 1.1915,
"step": 159500
},
{
"epoch": 0.59,
"learning_rate": 4.01515431300858e-05,
"loss": 1.1935,
"step": 160000
},
{
"epoch": 0.59,
"learning_rate": 4.012076670236733e-05,
"loss": 1.194,
"step": 160500
},
{
"epoch": 0.59,
"learning_rate": 4.0089990274648844e-05,
"loss": 1.189,
"step": 161000
},
{
"epoch": 0.6,
"learning_rate": 4.005921384693036e-05,
"loss": 1.1821,
"step": 161500
},
{
"epoch": 0.6,
"learning_rate": 4.002843741921188e-05,
"loss": 1.1875,
"step": 162000
},
{
"epoch": 0.6,
"learning_rate": 3.9997660991493395e-05,
"loss": 1.1758,
"step": 162500
},
{
"epoch": 0.6,
"learning_rate": 3.996688456377491e-05,
"loss": 1.1868,
"step": 163000
},
{
"epoch": 0.6,
"learning_rate": 3.993610813605643e-05,
"loss": 1.1873,
"step": 163500
},
{
"epoch": 0.61,
"learning_rate": 3.9905331708337953e-05,
"loss": 1.1875,
"step": 164000
},
{
"epoch": 0.61,
"learning_rate": 3.987455528061947e-05,
"loss": 1.1864,
"step": 164500
},
{
"epoch": 0.61,
"learning_rate": 3.984377885290099e-05,
"loss": 1.1872,
"step": 165000
},
{
"epoch": 0.61,
"learning_rate": 3.9813002425182505e-05,
"loss": 1.1796,
"step": 165500
},
{
"epoch": 0.61,
"learning_rate": 3.978222599746402e-05,
"loss": 1.1841,
"step": 166000
},
{
"epoch": 0.61,
"learning_rate": 3.975144956974554e-05,
"loss": 1.1899,
"step": 166500
},
{
"epoch": 0.62,
"learning_rate": 3.972067314202706e-05,
"loss": 1.1836,
"step": 167000
},
{
"epoch": 0.62,
"learning_rate": 3.968989671430858e-05,
"loss": 1.1829,
"step": 167500
},
{
"epoch": 0.62,
"learning_rate": 3.96591202865901e-05,
"loss": 1.1899,
"step": 168000
},
{
"epoch": 0.62,
"learning_rate": 3.9628343858871614e-05,
"loss": 1.1746,
"step": 168500
},
{
"epoch": 0.62,
"learning_rate": 3.959756743115313e-05,
"loss": 1.1882,
"step": 169000
},
{
"epoch": 0.63,
"learning_rate": 3.956679100343465e-05,
"loss": 1.1935,
"step": 169500
},
{
"epoch": 0.63,
"learning_rate": 3.9536014575716165e-05,
"loss": 1.1812,
"step": 170000
},
{
"epoch": 0.63,
"learning_rate": 3.950523814799769e-05,
"loss": 1.187,
"step": 170500
},
{
"epoch": 0.63,
"learning_rate": 3.9474461720279206e-05,
"loss": 1.1849,
"step": 171000
},
{
"epoch": 0.63,
"learning_rate": 3.944368529256072e-05,
"loss": 1.1839,
"step": 171500
},
{
"epoch": 0.64,
"learning_rate": 3.941290886484224e-05,
"loss": 1.1804,
"step": 172000
},
{
"epoch": 0.64,
"learning_rate": 3.938213243712376e-05,
"loss": 1.1821,
"step": 172500
},
{
"epoch": 0.64,
"learning_rate": 3.9351356009405275e-05,
"loss": 1.1805,
"step": 173000
},
{
"epoch": 0.64,
"learning_rate": 3.932057958168679e-05,
"loss": 1.1808,
"step": 173500
},
{
"epoch": 0.64,
"learning_rate": 3.9289803153968316e-05,
"loss": 1.1842,
"step": 174000
},
{
"epoch": 0.64,
"learning_rate": 3.925902672624983e-05,
"loss": 1.1811,
"step": 174500
},
{
"epoch": 0.65,
"learning_rate": 3.922825029853135e-05,
"loss": 1.1841,
"step": 175000
},
{
"epoch": 0.65,
"learning_rate": 3.919747387081287e-05,
"loss": 1.1749,
"step": 175500
},
{
"epoch": 0.65,
"learning_rate": 3.9166697443094384e-05,
"loss": 1.1738,
"step": 176000
},
{
"epoch": 0.65,
"learning_rate": 3.91359210153759e-05,
"loss": 1.1821,
"step": 176500
},
{
"epoch": 0.65,
"learning_rate": 3.9105144587657425e-05,
"loss": 1.1717,
"step": 177000
},
{
"epoch": 0.66,
"learning_rate": 3.907436815993894e-05,
"loss": 1.183,
"step": 177500
},
{
"epoch": 0.66,
"learning_rate": 3.904359173222046e-05,
"loss": 1.1745,
"step": 178000
},
{
"epoch": 0.66,
"learning_rate": 3.9012815304501976e-05,
"loss": 1.1769,
"step": 178500
},
{
"epoch": 0.66,
"learning_rate": 3.898203887678349e-05,
"loss": 1.181,
"step": 179000
},
{
"epoch": 0.66,
"learning_rate": 3.895126244906501e-05,
"loss": 1.1838,
"step": 179500
},
{
"epoch": 0.66,
"learning_rate": 3.892048602134653e-05,
"loss": 1.1808,
"step": 180000
},
{
"epoch": 0.67,
"learning_rate": 3.888970959362805e-05,
"loss": 1.1802,
"step": 180500
},
{
"epoch": 0.67,
"learning_rate": 3.885893316590957e-05,
"loss": 1.1713,
"step": 181000
},
{
"epoch": 0.67,
"learning_rate": 3.8828156738191085e-05,
"loss": 1.1832,
"step": 181500
},
{
"epoch": 0.67,
"learning_rate": 3.87973803104726e-05,
"loss": 1.1727,
"step": 182000
},
{
"epoch": 0.67,
"learning_rate": 3.876660388275412e-05,
"loss": 1.1835,
"step": 182500
},
{
"epoch": 0.68,
"learning_rate": 3.873582745503564e-05,
"loss": 1.1777,
"step": 183000
},
{
"epoch": 0.68,
"learning_rate": 3.870505102731716e-05,
"loss": 1.1784,
"step": 183500
},
{
"epoch": 0.68,
"learning_rate": 3.867427459959868e-05,
"loss": 1.173,
"step": 184000
},
{
"epoch": 0.68,
"learning_rate": 3.8643498171880195e-05,
"loss": 1.1789,
"step": 184500
},
{
"epoch": 0.68,
"learning_rate": 3.861272174416171e-05,
"loss": 1.1719,
"step": 185000
},
{
"epoch": 0.69,
"learning_rate": 3.858194531644323e-05,
"loss": 1.1741,
"step": 185500
},
{
"epoch": 0.69,
"learning_rate": 3.8551168888724746e-05,
"loss": 1.1817,
"step": 186000
},
{
"epoch": 0.69,
"learning_rate": 3.852039246100626e-05,
"loss": 1.1754,
"step": 186500
},
{
"epoch": 0.69,
"learning_rate": 3.848961603328779e-05,
"loss": 1.1789,
"step": 187000
},
{
"epoch": 0.69,
"learning_rate": 3.8458839605569304e-05,
"loss": 1.1693,
"step": 187500
},
{
"epoch": 0.69,
"learning_rate": 3.842806317785082e-05,
"loss": 1.1775,
"step": 188000
},
{
"epoch": 0.7,
"learning_rate": 3.839728675013234e-05,
"loss": 1.1698,
"step": 188500
},
{
"epoch": 0.7,
"learning_rate": 3.8366510322413855e-05,
"loss": 1.1773,
"step": 189000
},
{
"epoch": 0.7,
"learning_rate": 3.833573389469537e-05,
"loss": 1.1791,
"step": 189500
},
{
"epoch": 0.7,
"learning_rate": 3.830495746697689e-05,
"loss": 1.1708,
"step": 190000
},
{
"epoch": 0.7,
"learning_rate": 3.8274181039258413e-05,
"loss": 1.1702,
"step": 190500
},
{
"epoch": 0.71,
"learning_rate": 3.824340461153993e-05,
"loss": 1.1818,
"step": 191000
},
{
"epoch": 0.71,
"learning_rate": 3.821262818382145e-05,
"loss": 1.1785,
"step": 191500
},
{
"epoch": 0.71,
"learning_rate": 3.8181851756102965e-05,
"loss": 1.1732,
"step": 192000
},
{
"epoch": 0.71,
"learning_rate": 3.815107532838448e-05,
"loss": 1.1719,
"step": 192500
},
{
"epoch": 0.71,
"learning_rate": 3.8120298900666e-05,
"loss": 1.176,
"step": 193000
},
{
"epoch": 0.71,
"learning_rate": 3.808952247294752e-05,
"loss": 1.1743,
"step": 193500
},
{
"epoch": 0.72,
"learning_rate": 3.805874604522904e-05,
"loss": 1.1712,
"step": 194000
},
{
"epoch": 0.72,
"learning_rate": 3.802796961751056e-05,
"loss": 1.1693,
"step": 194500
},
{
"epoch": 0.72,
"learning_rate": 3.7997193189792074e-05,
"loss": 1.1695,
"step": 195000
},
{
"epoch": 0.72,
"learning_rate": 3.796641676207359e-05,
"loss": 1.1749,
"step": 195500
},
{
"epoch": 0.72,
"learning_rate": 3.793564033435511e-05,
"loss": 1.1658,
"step": 196000
},
{
"epoch": 0.73,
"learning_rate": 3.7904863906636625e-05,
"loss": 1.1661,
"step": 196500
},
{
"epoch": 0.73,
"learning_rate": 3.787408747891815e-05,
"loss": 1.179,
"step": 197000
},
{
"epoch": 0.73,
"learning_rate": 3.7843311051199666e-05,
"loss": 1.1703,
"step": 197500
},
{
"epoch": 0.73,
"learning_rate": 3.781253462348118e-05,
"loss": 1.1705,
"step": 198000
},
{
"epoch": 0.73,
"learning_rate": 3.77817581957627e-05,
"loss": 1.1607,
"step": 198500
},
{
"epoch": 0.73,
"learning_rate": 3.775098176804422e-05,
"loss": 1.1698,
"step": 199000
},
{
"epoch": 0.74,
"learning_rate": 3.7720205340325735e-05,
"loss": 1.1671,
"step": 199500
},
{
"epoch": 0.74,
"learning_rate": 3.768942891260726e-05,
"loss": 1.1753,
"step": 200000
},
{
"epoch": 0.74,
"learning_rate": 3.7658652484888776e-05,
"loss": 1.1637,
"step": 200500
},
{
"epoch": 0.74,
"learning_rate": 3.762787605717029e-05,
"loss": 1.169,
"step": 201000
},
{
"epoch": 0.74,
"learning_rate": 3.759709962945181e-05,
"loss": 1.1707,
"step": 201500
},
{
"epoch": 0.75,
"learning_rate": 3.756632320173333e-05,
"loss": 1.1625,
"step": 202000
},
{
"epoch": 0.75,
"learning_rate": 3.7535546774014844e-05,
"loss": 1.1568,
"step": 202500
},
{
"epoch": 0.75,
"learning_rate": 3.750477034629636e-05,
"loss": 1.1629,
"step": 203000
},
{
"epoch": 0.75,
"learning_rate": 3.7473993918577885e-05,
"loss": 1.1645,
"step": 203500
},
{
"epoch": 0.75,
"learning_rate": 3.74432174908594e-05,
"loss": 1.1689,
"step": 204000
},
{
"epoch": 0.76,
"learning_rate": 3.741244106314092e-05,
"loss": 1.1638,
"step": 204500
},
{
"epoch": 0.76,
"learning_rate": 3.7381664635422436e-05,
"loss": 1.1676,
"step": 205000
},
{
"epoch": 0.76,
"learning_rate": 3.735088820770395e-05,
"loss": 1.1614,
"step": 205500
},
{
"epoch": 0.76,
"learning_rate": 3.732011177998547e-05,
"loss": 1.1637,
"step": 206000
},
{
"epoch": 0.76,
"learning_rate": 3.7289335352266994e-05,
"loss": 1.161,
"step": 206500
},
{
"epoch": 0.76,
"learning_rate": 3.725855892454851e-05,
"loss": 1.1651,
"step": 207000
},
{
"epoch": 0.77,
"learning_rate": 3.722778249683003e-05,
"loss": 1.1691,
"step": 207500
},
{
"epoch": 0.77,
"learning_rate": 3.7197006069111545e-05,
"loss": 1.1666,
"step": 208000
},
{
"epoch": 0.77,
"learning_rate": 3.716622964139306e-05,
"loss": 1.1594,
"step": 208500
},
{
"epoch": 0.77,
"learning_rate": 3.713545321367458e-05,
"loss": 1.1636,
"step": 209000
},
{
"epoch": 0.77,
"learning_rate": 3.71046767859561e-05,
"loss": 1.1646,
"step": 209500
},
{
"epoch": 0.78,
"learning_rate": 3.707390035823762e-05,
"loss": 1.1693,
"step": 210000
},
{
"epoch": 0.78,
"learning_rate": 3.7043123930519144e-05,
"loss": 1.1628,
"step": 210500
},
{
"epoch": 0.78,
"learning_rate": 3.701234750280066e-05,
"loss": 1.1708,
"step": 211000
},
{
"epoch": 0.78,
"learning_rate": 3.698157107508218e-05,
"loss": 1.1617,
"step": 211500
},
{
"epoch": 0.78,
"learning_rate": 3.6950794647363696e-05,
"loss": 1.1649,
"step": 212000
},
{
"epoch": 0.78,
"learning_rate": 3.692001821964521e-05,
"loss": 1.1599,
"step": 212500
},
{
"epoch": 0.79,
"learning_rate": 3.688924179192673e-05,
"loss": 1.1562,
"step": 213000
},
{
"epoch": 0.79,
"learning_rate": 3.685846536420825e-05,
"loss": 1.1553,
"step": 213500
},
{
"epoch": 0.79,
"learning_rate": 3.682768893648977e-05,
"loss": 1.1671,
"step": 214000
},
{
"epoch": 0.79,
"learning_rate": 3.679691250877129e-05,
"loss": 1.1593,
"step": 214500
},
{
"epoch": 0.79,
"learning_rate": 3.6766136081052805e-05,
"loss": 1.1561,
"step": 215000
},
{
"epoch": 0.8,
"learning_rate": 3.673535965333432e-05,
"loss": 1.1652,
"step": 215500
},
{
"epoch": 0.8,
"learning_rate": 3.670458322561584e-05,
"loss": 1.1615,
"step": 216000
},
{
"epoch": 0.8,
"learning_rate": 3.6673806797897356e-05,
"loss": 1.1635,
"step": 216500
},
{
"epoch": 0.8,
"learning_rate": 3.664303037017888e-05,
"loss": 1.1586,
"step": 217000
},
{
"epoch": 0.8,
"learning_rate": 3.66122539424604e-05,
"loss": 1.1531,
"step": 217500
},
{
"epoch": 0.81,
"learning_rate": 3.6581477514741914e-05,
"loss": 1.1629,
"step": 218000
},
{
"epoch": 0.81,
"learning_rate": 3.655070108702343e-05,
"loss": 1.1649,
"step": 218500
},
{
"epoch": 0.81,
"learning_rate": 3.651992465930495e-05,
"loss": 1.1541,
"step": 219000
},
{
"epoch": 0.81,
"learning_rate": 3.6489148231586466e-05,
"loss": 1.1532,
"step": 219500
},
{
"epoch": 0.81,
"learning_rate": 3.645837180386798e-05,
"loss": 1.1637,
"step": 220000
},
{
"epoch": 0.81,
"learning_rate": 3.6427595376149507e-05,
"loss": 1.1622,
"step": 220500
},
{
"epoch": 0.82,
"learning_rate": 3.6396818948431024e-05,
"loss": 1.1599,
"step": 221000
},
{
"epoch": 0.82,
"learning_rate": 3.636604252071254e-05,
"loss": 1.1606,
"step": 221500
},
{
"epoch": 0.82,
"learning_rate": 3.633526609299406e-05,
"loss": 1.16,
"step": 222000
},
{
"epoch": 0.82,
"learning_rate": 3.6304489665275575e-05,
"loss": 1.1663,
"step": 222500
},
{
"epoch": 0.82,
"learning_rate": 3.627371323755709e-05,
"loss": 1.1549,
"step": 223000
},
{
"epoch": 0.83,
"learning_rate": 3.624293680983861e-05,
"loss": 1.1636,
"step": 223500
},
{
"epoch": 0.83,
"learning_rate": 3.621216038212013e-05,
"loss": 1.1565,
"step": 224000
},
{
"epoch": 0.83,
"learning_rate": 3.618138395440165e-05,
"loss": 1.1622,
"step": 224500
},
{
"epoch": 0.83,
"learning_rate": 3.615060752668317e-05,
"loss": 1.1639,
"step": 225000
},
{
"epoch": 0.83,
"learning_rate": 3.6119831098964684e-05,
"loss": 1.1588,
"step": 225500
},
{
"epoch": 0.83,
"learning_rate": 3.60890546712462e-05,
"loss": 1.155,
"step": 226000
},
{
"epoch": 0.84,
"learning_rate": 3.605827824352772e-05,
"loss": 1.152,
"step": 226500
},
{
"epoch": 0.84,
"learning_rate": 3.602750181580924e-05,
"loss": 1.1504,
"step": 227000
},
{
"epoch": 0.84,
"learning_rate": 3.599672538809076e-05,
"loss": 1.1579,
"step": 227500
},
{
"epoch": 0.84,
"learning_rate": 3.5965948960372276e-05,
"loss": 1.1603,
"step": 228000
},
{
"epoch": 0.84,
"learning_rate": 3.5935172532653794e-05,
"loss": 1.1619,
"step": 228500
},
{
"epoch": 0.85,
"learning_rate": 3.590439610493531e-05,
"loss": 1.1486,
"step": 229000
},
{
"epoch": 0.85,
"learning_rate": 3.587361967721683e-05,
"loss": 1.1568,
"step": 229500
},
{
"epoch": 0.85,
"learning_rate": 3.5842843249498345e-05,
"loss": 1.1534,
"step": 230000
},
{
"epoch": 0.85,
"learning_rate": 3.581206682177987e-05,
"loss": 1.1532,
"step": 230500
},
{
"epoch": 0.85,
"learning_rate": 3.5781290394061386e-05,
"loss": 1.1587,
"step": 231000
},
{
"epoch": 0.85,
"learning_rate": 3.57505139663429e-05,
"loss": 1.1572,
"step": 231500
},
{
"epoch": 0.86,
"learning_rate": 3.571973753862442e-05,
"loss": 1.1539,
"step": 232000
},
{
"epoch": 0.86,
"learning_rate": 3.568896111090594e-05,
"loss": 1.1548,
"step": 232500
},
{
"epoch": 0.86,
"learning_rate": 3.5658184683187454e-05,
"loss": 1.1496,
"step": 233000
},
{
"epoch": 0.86,
"learning_rate": 3.562740825546898e-05,
"loss": 1.1589,
"step": 233500
},
{
"epoch": 0.86,
"learning_rate": 3.5596631827750495e-05,
"loss": 1.1562,
"step": 234000
},
{
"epoch": 0.87,
"learning_rate": 3.556585540003201e-05,
"loss": 1.1447,
"step": 234500
},
{
"epoch": 0.87,
"learning_rate": 3.553507897231353e-05,
"loss": 1.1534,
"step": 235000
},
{
"epoch": 0.87,
"learning_rate": 3.5504302544595046e-05,
"loss": 1.1518,
"step": 235500
},
{
"epoch": 0.87,
"learning_rate": 3.5473526116876563e-05,
"loss": 1.1538,
"step": 236000
},
{
"epoch": 0.87,
"learning_rate": 3.544274968915808e-05,
"loss": 1.1462,
"step": 236500
},
{
"epoch": 0.88,
"learning_rate": 3.5411973261439604e-05,
"loss": 1.1496,
"step": 237000
},
{
"epoch": 0.88,
"learning_rate": 3.538119683372112e-05,
"loss": 1.1523,
"step": 237500
},
{
"epoch": 0.88,
"learning_rate": 3.535042040600264e-05,
"loss": 1.1456,
"step": 238000
},
{
"epoch": 0.88,
"learning_rate": 3.5319643978284156e-05,
"loss": 1.1486,
"step": 238500
},
{
"epoch": 0.88,
"learning_rate": 3.528886755056567e-05,
"loss": 1.1433,
"step": 239000
},
{
"epoch": 0.88,
"learning_rate": 3.525809112284719e-05,
"loss": 1.1533,
"step": 239500
},
{
"epoch": 0.89,
"learning_rate": 3.522731469512871e-05,
"loss": 1.1488,
"step": 240000
},
{
"epoch": 0.89,
"learning_rate": 3.519653826741023e-05,
"loss": 1.1501,
"step": 240500
},
{
"epoch": 0.89,
"learning_rate": 3.516576183969175e-05,
"loss": 1.1464,
"step": 241000
},
{
"epoch": 0.89,
"learning_rate": 3.5134985411973265e-05,
"loss": 1.1515,
"step": 241500
},
{
"epoch": 0.89,
"learning_rate": 3.510420898425478e-05,
"loss": 1.1446,
"step": 242000
},
{
"epoch": 0.9,
"learning_rate": 3.50734325565363e-05,
"loss": 1.1515,
"step": 242500
},
{
"epoch": 0.9,
"learning_rate": 3.5042656128817816e-05,
"loss": 1.1478,
"step": 243000
},
{
"epoch": 0.9,
"learning_rate": 3.501187970109934e-05,
"loss": 1.1478,
"step": 243500
},
{
"epoch": 0.9,
"learning_rate": 3.498110327338086e-05,
"loss": 1.1541,
"step": 244000
},
{
"epoch": 0.9,
"learning_rate": 3.4950326845662374e-05,
"loss": 1.1524,
"step": 244500
},
{
"epoch": 0.9,
"learning_rate": 3.491955041794389e-05,
"loss": 1.1595,
"step": 245000
},
{
"epoch": 0.91,
"learning_rate": 3.488877399022541e-05,
"loss": 1.1546,
"step": 245500
},
{
"epoch": 0.91,
"learning_rate": 3.4857997562506926e-05,
"loss": 1.1554,
"step": 246000
},
{
"epoch": 0.91,
"learning_rate": 3.482722113478844e-05,
"loss": 1.1479,
"step": 246500
},
{
"epoch": 0.91,
"learning_rate": 3.4796444707069967e-05,
"loss": 1.1498,
"step": 247000
},
{
"epoch": 0.91,
"learning_rate": 3.4765668279351484e-05,
"loss": 1.1502,
"step": 247500
},
{
"epoch": 0.92,
"learning_rate": 3.4734891851633e-05,
"loss": 1.1477,
"step": 248000
},
{
"epoch": 0.92,
"learning_rate": 3.470411542391452e-05,
"loss": 1.1501,
"step": 248500
},
{
"epoch": 0.92,
"learning_rate": 3.4673338996196035e-05,
"loss": 1.1536,
"step": 249000
},
{
"epoch": 0.92,
"learning_rate": 3.464256256847755e-05,
"loss": 1.149,
"step": 249500
},
{
"epoch": 0.92,
"learning_rate": 3.4611786140759076e-05,
"loss": 1.1513,
"step": 250000
},
{
"epoch": 0.93,
"learning_rate": 3.458100971304059e-05,
"loss": 1.1459,
"step": 250500
},
{
"epoch": 0.93,
"learning_rate": 3.455023328532211e-05,
"loss": 1.1386,
"step": 251000
},
{
"epoch": 0.93,
"learning_rate": 3.451945685760363e-05,
"loss": 1.1449,
"step": 251500
},
{
"epoch": 0.93,
"learning_rate": 3.4488680429885144e-05,
"loss": 1.1577,
"step": 252000
},
{
"epoch": 0.93,
"learning_rate": 3.445790400216666e-05,
"loss": 1.1557,
"step": 252500
},
{
"epoch": 0.93,
"learning_rate": 3.442712757444818e-05,
"loss": 1.1495,
"step": 253000
},
{
"epoch": 0.94,
"learning_rate": 3.43963511467297e-05,
"loss": 1.1445,
"step": 253500
},
{
"epoch": 0.94,
"learning_rate": 3.436557471901122e-05,
"loss": 1.1511,
"step": 254000
},
{
"epoch": 0.94,
"learning_rate": 3.4334798291292736e-05,
"loss": 1.1509,
"step": 254500
},
{
"epoch": 0.94,
"learning_rate": 3.4304021863574254e-05,
"loss": 1.1537,
"step": 255000
},
{
"epoch": 0.94,
"learning_rate": 3.427324543585577e-05,
"loss": 1.1451,
"step": 255500
},
{
"epoch": 0.95,
"learning_rate": 3.424246900813729e-05,
"loss": 1.1454,
"step": 256000
},
{
"epoch": 0.95,
"learning_rate": 3.421169258041881e-05,
"loss": 1.15,
"step": 256500
},
{
"epoch": 0.95,
"learning_rate": 3.418091615270033e-05,
"loss": 1.1492,
"step": 257000
},
{
"epoch": 0.95,
"learning_rate": 3.4150139724981846e-05,
"loss": 1.1415,
"step": 257500
},
{
"epoch": 0.95,
"learning_rate": 3.411936329726336e-05,
"loss": 1.1405,
"step": 258000
},
{
"epoch": 0.95,
"learning_rate": 3.408858686954488e-05,
"loss": 1.1472,
"step": 258500
},
{
"epoch": 0.96,
"learning_rate": 3.40578104418264e-05,
"loss": 1.1429,
"step": 259000
},
{
"epoch": 0.96,
"learning_rate": 3.4027034014107914e-05,
"loss": 1.1368,
"step": 259500
},
{
"epoch": 0.96,
"learning_rate": 3.399625758638944e-05,
"loss": 1.1467,
"step": 260000
},
{
"epoch": 0.96,
"learning_rate": 3.3965481158670955e-05,
"loss": 1.1562,
"step": 260500
},
{
"epoch": 0.96,
"learning_rate": 3.393470473095247e-05,
"loss": 1.1372,
"step": 261000
},
{
"epoch": 0.97,
"learning_rate": 3.390392830323399e-05,
"loss": 1.1459,
"step": 261500
},
{
"epoch": 0.97,
"learning_rate": 3.3873151875515506e-05,
"loss": 1.1473,
"step": 262000
},
{
"epoch": 0.97,
"learning_rate": 3.3842375447797023e-05,
"loss": 1.1462,
"step": 262500
},
{
"epoch": 0.97,
"learning_rate": 3.381159902007854e-05,
"loss": 1.1363,
"step": 263000
},
{
"epoch": 0.97,
"learning_rate": 3.3780822592360064e-05,
"loss": 1.1392,
"step": 263500
},
{
"epoch": 0.97,
"learning_rate": 3.375004616464158e-05,
"loss": 1.1459,
"step": 264000
},
{
"epoch": 0.98,
"learning_rate": 3.37192697369231e-05,
"loss": 1.144,
"step": 264500
},
{
"epoch": 0.98,
"learning_rate": 3.3688493309204616e-05,
"loss": 1.1426,
"step": 265000
},
{
"epoch": 0.98,
"learning_rate": 3.365771688148613e-05,
"loss": 1.1369,
"step": 265500
},
{
"epoch": 0.98,
"learning_rate": 3.362694045376765e-05,
"loss": 1.1415,
"step": 266000
},
{
"epoch": 0.98,
"learning_rate": 3.3596164026049174e-05,
"loss": 1.1548,
"step": 266500
},
{
"epoch": 0.99,
"learning_rate": 3.356538759833069e-05,
"loss": 1.1399,
"step": 267000
},
{
"epoch": 0.99,
"learning_rate": 3.353461117061221e-05,
"loss": 1.1362,
"step": 267500
},
{
"epoch": 0.99,
"learning_rate": 3.3503834742893725e-05,
"loss": 1.1469,
"step": 268000
},
{
"epoch": 0.99,
"learning_rate": 3.347305831517524e-05,
"loss": 1.139,
"step": 268500
},
{
"epoch": 0.99,
"learning_rate": 3.344228188745676e-05,
"loss": 1.1356,
"step": 269000
},
{
"epoch": 1.0,
"learning_rate": 3.3411505459738276e-05,
"loss": 1.1382,
"step": 269500
},
{
"epoch": 1.0,
"learning_rate": 3.33807290320198e-05,
"loss": 1.1351,
"step": 270000
},
{
"epoch": 1.0,
"learning_rate": 3.334995260430132e-05,
"loss": 1.1331,
"step": 270500
},
{
"epoch": 1.0,
"learning_rate": 3.3319176176582834e-05,
"loss": 1.1403,
"step": 271000
},
{
"epoch": 1.0,
"learning_rate": 3.328839974886435e-05,
"loss": 1.1388,
"step": 271500
},
{
"epoch": 1.0,
"learning_rate": 3.325762332114587e-05,
"loss": 1.1388,
"step": 272000
},
{
"epoch": 1.01,
"learning_rate": 3.3226846893427386e-05,
"loss": 1.1347,
"step": 272500
},
{
"epoch": 1.01,
"learning_rate": 3.319607046570891e-05,
"loss": 1.1375,
"step": 273000
},
{
"epoch": 1.01,
"learning_rate": 3.3165294037990427e-05,
"loss": 1.1324,
"step": 273500
},
{
"epoch": 1.01,
"learning_rate": 3.3134517610271944e-05,
"loss": 1.1322,
"step": 274000
},
{
"epoch": 1.01,
"learning_rate": 3.310374118255346e-05,
"loss": 1.1442,
"step": 274500
},
{
"epoch": 1.02,
"learning_rate": 3.307296475483498e-05,
"loss": 1.1326,
"step": 275000
},
{
"epoch": 1.02,
"learning_rate": 3.3042188327116495e-05,
"loss": 1.1346,
"step": 275500
},
{
"epoch": 1.02,
"learning_rate": 3.301141189939801e-05,
"loss": 1.1319,
"step": 276000
},
{
"epoch": 1.02,
"learning_rate": 3.2980635471679536e-05,
"loss": 1.132,
"step": 276500
},
{
"epoch": 1.02,
"learning_rate": 3.294985904396105e-05,
"loss": 1.1366,
"step": 277000
},
{
"epoch": 1.02,
"learning_rate": 3.291908261624257e-05,
"loss": 1.136,
"step": 277500
},
{
"epoch": 1.03,
"learning_rate": 3.288830618852409e-05,
"loss": 1.1282,
"step": 278000
},
{
"epoch": 1.03,
"learning_rate": 3.2857529760805604e-05,
"loss": 1.1345,
"step": 278500
},
{
"epoch": 1.03,
"learning_rate": 3.282675333308712e-05,
"loss": 1.1328,
"step": 279000
},
{
"epoch": 1.03,
"learning_rate": 3.279597690536864e-05,
"loss": 1.1454,
"step": 279500
},
{
"epoch": 1.03,
"learning_rate": 3.276520047765016e-05,
"loss": 1.1363,
"step": 280000
},
{
"epoch": 1.04,
"learning_rate": 3.273442404993168e-05,
"loss": 1.1317,
"step": 280500
},
{
"epoch": 1.04,
"learning_rate": 3.2703647622213196e-05,
"loss": 1.1365,
"step": 281000
},
{
"epoch": 1.04,
"learning_rate": 3.2672871194494713e-05,
"loss": 1.1274,
"step": 281500
},
{
"epoch": 1.04,
"learning_rate": 3.264209476677623e-05,
"loss": 1.1183,
"step": 282000
},
{
"epoch": 1.04,
"learning_rate": 3.261131833905775e-05,
"loss": 1.1323,
"step": 282500
},
{
"epoch": 1.05,
"learning_rate": 3.258054191133927e-05,
"loss": 1.1345,
"step": 283000
},
{
"epoch": 1.05,
"learning_rate": 3.254976548362079e-05,
"loss": 1.1327,
"step": 283500
},
{
"epoch": 1.05,
"learning_rate": 3.2518989055902306e-05,
"loss": 1.1311,
"step": 284000
},
{
"epoch": 1.05,
"learning_rate": 3.248821262818382e-05,
"loss": 1.1279,
"step": 284500
},
{
"epoch": 1.05,
"learning_rate": 3.245743620046534e-05,
"loss": 1.1349,
"step": 285000
},
{
"epoch": 1.05,
"learning_rate": 3.242665977274686e-05,
"loss": 1.1286,
"step": 285500
},
{
"epoch": 1.06,
"learning_rate": 3.2395883345028374e-05,
"loss": 1.1328,
"step": 286000
},
{
"epoch": 1.06,
"learning_rate": 3.23651069173099e-05,
"loss": 1.1296,
"step": 286500
},
{
"epoch": 1.06,
"learning_rate": 3.2334330489591415e-05,
"loss": 1.1322,
"step": 287000
},
{
"epoch": 1.06,
"learning_rate": 3.230355406187293e-05,
"loss": 1.1292,
"step": 287500
},
{
"epoch": 1.06,
"learning_rate": 3.227277763415445e-05,
"loss": 1.1322,
"step": 288000
},
{
"epoch": 1.07,
"learning_rate": 3.2242001206435966e-05,
"loss": 1.1352,
"step": 288500
},
{
"epoch": 1.07,
"learning_rate": 3.2211224778717483e-05,
"loss": 1.1268,
"step": 289000
},
{
"epoch": 1.07,
"learning_rate": 3.218044835099901e-05,
"loss": 1.1239,
"step": 289500
},
{
"epoch": 1.07,
"learning_rate": 3.2149671923280524e-05,
"loss": 1.129,
"step": 290000
},
{
"epoch": 1.07,
"learning_rate": 3.211889549556204e-05,
"loss": 1.1321,
"step": 290500
},
{
"epoch": 1.07,
"learning_rate": 3.208811906784356e-05,
"loss": 1.1374,
"step": 291000
},
{
"epoch": 1.08,
"learning_rate": 3.2057342640125076e-05,
"loss": 1.1297,
"step": 291500
},
{
"epoch": 1.08,
"learning_rate": 3.202656621240659e-05,
"loss": 1.1311,
"step": 292000
},
{
"epoch": 1.08,
"learning_rate": 3.199578978468811e-05,
"loss": 1.1225,
"step": 292500
},
{
"epoch": 1.08,
"learning_rate": 3.1965013356969634e-05,
"loss": 1.1225,
"step": 293000
},
{
"epoch": 1.08,
"learning_rate": 3.193423692925115e-05,
"loss": 1.1295,
"step": 293500
},
{
"epoch": 1.09,
"learning_rate": 3.190346050153267e-05,
"loss": 1.1262,
"step": 294000
},
{
"epoch": 1.09,
"learning_rate": 3.1872684073814185e-05,
"loss": 1.1258,
"step": 294500
},
{
"epoch": 1.09,
"learning_rate": 3.18419076460957e-05,
"loss": 1.1154,
"step": 295000
},
{
"epoch": 1.09,
"learning_rate": 3.181113121837722e-05,
"loss": 1.1327,
"step": 295500
},
{
"epoch": 1.09,
"learning_rate": 3.1780354790658736e-05,
"loss": 1.1235,
"step": 296000
},
{
"epoch": 1.1,
"learning_rate": 3.174957836294026e-05,
"loss": 1.1287,
"step": 296500
},
{
"epoch": 1.1,
"learning_rate": 3.171880193522178e-05,
"loss": 1.128,
"step": 297000
},
{
"epoch": 1.1,
"learning_rate": 3.1688025507503294e-05,
"loss": 1.1221,
"step": 297500
},
{
"epoch": 1.1,
"learning_rate": 3.165724907978481e-05,
"loss": 1.1259,
"step": 298000
},
{
"epoch": 1.1,
"learning_rate": 3.162647265206633e-05,
"loss": 1.1263,
"step": 298500
},
{
"epoch": 1.1,
"learning_rate": 3.1595696224347846e-05,
"loss": 1.1298,
"step": 299000
},
{
"epoch": 1.11,
"learning_rate": 3.156491979662937e-05,
"loss": 1.1263,
"step": 299500
},
{
"epoch": 1.11,
"learning_rate": 3.1534143368910886e-05,
"loss": 1.1299,
"step": 300000
},
{
"epoch": 1.11,
"learning_rate": 3.1503366941192404e-05,
"loss": 1.1265,
"step": 300500
},
{
"epoch": 1.11,
"learning_rate": 3.147259051347392e-05,
"loss": 1.1249,
"step": 301000
},
{
"epoch": 1.11,
"learning_rate": 3.144181408575544e-05,
"loss": 1.1206,
"step": 301500
},
{
"epoch": 1.12,
"learning_rate": 3.1411037658036955e-05,
"loss": 1.1186,
"step": 302000
},
{
"epoch": 1.12,
"learning_rate": 3.138026123031847e-05,
"loss": 1.1181,
"step": 302500
},
{
"epoch": 1.12,
"learning_rate": 3.1349484802599996e-05,
"loss": 1.1291,
"step": 303000
},
{
"epoch": 1.12,
"learning_rate": 3.131870837488151e-05,
"loss": 1.1262,
"step": 303500
},
{
"epoch": 1.12,
"learning_rate": 3.128793194716303e-05,
"loss": 1.1227,
"step": 304000
},
{
"epoch": 1.12,
"learning_rate": 3.125715551944455e-05,
"loss": 1.1233,
"step": 304500
},
{
"epoch": 1.13,
"learning_rate": 3.1226379091726064e-05,
"loss": 1.1156,
"step": 305000
},
{
"epoch": 1.13,
"learning_rate": 3.119560266400758e-05,
"loss": 1.1147,
"step": 305500
},
{
"epoch": 1.13,
"learning_rate": 3.1164826236289105e-05,
"loss": 1.1226,
"step": 306000
},
{
"epoch": 1.13,
"learning_rate": 3.113404980857062e-05,
"loss": 1.1175,
"step": 306500
},
{
"epoch": 1.13,
"learning_rate": 3.110327338085214e-05,
"loss": 1.124,
"step": 307000
},
{
"epoch": 1.14,
"learning_rate": 3.1072496953133656e-05,
"loss": 1.1166,
"step": 307500
},
{
"epoch": 1.14,
"learning_rate": 3.1041720525415173e-05,
"loss": 1.1222,
"step": 308000
},
{
"epoch": 1.14,
"learning_rate": 3.101094409769669e-05,
"loss": 1.1214,
"step": 308500
},
{
"epoch": 1.14,
"learning_rate": 3.098016766997821e-05,
"loss": 1.1231,
"step": 309000
},
{
"epoch": 1.14,
"learning_rate": 3.094939124225973e-05,
"loss": 1.1208,
"step": 309500
},
{
"epoch": 1.14,
"learning_rate": 3.091861481454125e-05,
"loss": 1.1167,
"step": 310000
},
{
"epoch": 1.15,
"learning_rate": 3.0887838386822766e-05,
"loss": 1.1238,
"step": 310500
},
{
"epoch": 1.15,
"learning_rate": 3.085706195910428e-05,
"loss": 1.1203,
"step": 311000
},
{
"epoch": 1.15,
"learning_rate": 3.08262855313858e-05,
"loss": 1.1276,
"step": 311500
},
{
"epoch": 1.15,
"learning_rate": 3.079550910366732e-05,
"loss": 1.1169,
"step": 312000
},
{
"epoch": 1.15,
"learning_rate": 3.076473267594884e-05,
"loss": 1.1197,
"step": 312500
},
{
"epoch": 1.16,
"learning_rate": 3.073395624823036e-05,
"loss": 1.1207,
"step": 313000
},
{
"epoch": 1.16,
"learning_rate": 3.0703179820511875e-05,
"loss": 1.1227,
"step": 313500
},
{
"epoch": 1.16,
"learning_rate": 3.067240339279339e-05,
"loss": 1.1221,
"step": 314000
},
{
"epoch": 1.16,
"learning_rate": 3.064162696507491e-05,
"loss": 1.1189,
"step": 314500
},
{
"epoch": 1.16,
"learning_rate": 3.0610850537356426e-05,
"loss": 1.1168,
"step": 315000
},
{
"epoch": 1.17,
"learning_rate": 3.058007410963794e-05,
"loss": 1.1153,
"step": 315500
},
{
"epoch": 1.17,
"learning_rate": 3.054929768191947e-05,
"loss": 1.1149,
"step": 316000
},
{
"epoch": 1.17,
"learning_rate": 3.0518521254200984e-05,
"loss": 1.1202,
"step": 316500
},
{
"epoch": 1.17,
"learning_rate": 3.04877448264825e-05,
"loss": 1.1233,
"step": 317000
},
{
"epoch": 1.17,
"learning_rate": 3.045696839876402e-05,
"loss": 1.1165,
"step": 317500
},
{
"epoch": 1.17,
"learning_rate": 3.0426191971045536e-05,
"loss": 1.1274,
"step": 318000
},
{
"epoch": 1.18,
"learning_rate": 3.0395415543327056e-05,
"loss": 1.1161,
"step": 318500
},
{
"epoch": 1.18,
"learning_rate": 3.0364639115608573e-05,
"loss": 1.1149,
"step": 319000
},
{
"epoch": 1.18,
"learning_rate": 3.033386268789009e-05,
"loss": 1.1194,
"step": 319500
},
{
"epoch": 1.18,
"learning_rate": 3.030308626017161e-05,
"loss": 1.1261,
"step": 320000
},
{
"epoch": 1.18,
"learning_rate": 3.0272309832453128e-05,
"loss": 1.1206,
"step": 320500
},
{
"epoch": 1.19,
"learning_rate": 3.0241533404734645e-05,
"loss": 1.1144,
"step": 321000
},
{
"epoch": 1.19,
"learning_rate": 3.0210756977016162e-05,
"loss": 1.1186,
"step": 321500
},
{
"epoch": 1.19,
"learning_rate": 3.0179980549297682e-05,
"loss": 1.129,
"step": 322000
},
{
"epoch": 1.19,
"learning_rate": 3.01492041215792e-05,
"loss": 1.1249,
"step": 322500
},
{
"epoch": 1.19,
"learning_rate": 3.0118427693860717e-05,
"loss": 1.121,
"step": 323000
},
{
"epoch": 1.19,
"learning_rate": 3.0087651266142237e-05,
"loss": 1.1155,
"step": 323500
},
{
"epoch": 1.2,
"learning_rate": 3.0056874838423754e-05,
"loss": 1.1212,
"step": 324000
},
{
"epoch": 1.2,
"learning_rate": 3.002609841070527e-05,
"loss": 1.1076,
"step": 324500
},
{
"epoch": 1.2,
"learning_rate": 2.9995321982986792e-05,
"loss": 1.1176,
"step": 325000
},
{
"epoch": 1.2,
"learning_rate": 2.996454555526831e-05,
"loss": 1.1167,
"step": 325500
},
{
"epoch": 1.2,
"learning_rate": 2.9933769127549826e-05,
"loss": 1.1186,
"step": 326000
},
{
"epoch": 1.21,
"learning_rate": 2.9902992699831346e-05,
"loss": 1.1161,
"step": 326500
},
{
"epoch": 1.21,
"learning_rate": 2.9872216272112864e-05,
"loss": 1.1168,
"step": 327000
},
{
"epoch": 1.21,
"learning_rate": 2.984143984439438e-05,
"loss": 1.1132,
"step": 327500
},
{
"epoch": 1.21,
"learning_rate": 2.9810663416675898e-05,
"loss": 1.1185,
"step": 328000
},
{
"epoch": 1.21,
"learning_rate": 2.9779886988957418e-05,
"loss": 1.1088,
"step": 328500
},
{
"epoch": 1.22,
"learning_rate": 2.9749110561238935e-05,
"loss": 1.1146,
"step": 329000
},
{
"epoch": 1.22,
"learning_rate": 2.9718334133520452e-05,
"loss": 1.1189,
"step": 329500
},
{
"epoch": 1.22,
"learning_rate": 2.9687557705801973e-05,
"loss": 1.1159,
"step": 330000
},
{
"epoch": 1.22,
"learning_rate": 2.965678127808349e-05,
"loss": 1.1174,
"step": 330500
},
{
"epoch": 1.22,
"learning_rate": 2.9626004850365007e-05,
"loss": 1.1194,
"step": 331000
},
{
"epoch": 1.22,
"learning_rate": 2.9595228422646528e-05,
"loss": 1.1091,
"step": 331500
},
{
"epoch": 1.23,
"learning_rate": 2.9564451994928045e-05,
"loss": 1.112,
"step": 332000
},
{
"epoch": 1.23,
"learning_rate": 2.953367556720956e-05,
"loss": 1.112,
"step": 332500
},
{
"epoch": 1.23,
"learning_rate": 2.950289913949108e-05,
"loss": 1.1104,
"step": 333000
},
{
"epoch": 1.23,
"learning_rate": 2.94721227117726e-05,
"loss": 1.1053,
"step": 333500
},
{
"epoch": 1.23,
"learning_rate": 2.9441346284054116e-05,
"loss": 1.1106,
"step": 334000
},
{
"epoch": 1.24,
"learning_rate": 2.9410569856335633e-05,
"loss": 1.1185,
"step": 334500
},
{
"epoch": 1.24,
"learning_rate": 2.9379793428617154e-05,
"loss": 1.1113,
"step": 335000
},
{
"epoch": 1.24,
"learning_rate": 2.934901700089867e-05,
"loss": 1.1096,
"step": 335500
},
{
"epoch": 1.24,
"learning_rate": 2.9318240573180188e-05,
"loss": 1.1085,
"step": 336000
},
{
"epoch": 1.24,
"learning_rate": 2.928746414546171e-05,
"loss": 1.1137,
"step": 336500
},
{
"epoch": 1.24,
"learning_rate": 2.9256687717743226e-05,
"loss": 1.1075,
"step": 337000
},
{
"epoch": 1.25,
"learning_rate": 2.9225911290024743e-05,
"loss": 1.1093,
"step": 337500
},
{
"epoch": 1.25,
"learning_rate": 2.919513486230626e-05,
"loss": 1.1159,
"step": 338000
},
{
"epoch": 1.25,
"learning_rate": 2.916435843458778e-05,
"loss": 1.1063,
"step": 338500
},
{
"epoch": 1.25,
"learning_rate": 2.9133582006869297e-05,
"loss": 1.1135,
"step": 339000
},
{
"epoch": 1.25,
"learning_rate": 2.9102805579150815e-05,
"loss": 1.1186,
"step": 339500
},
{
"epoch": 1.26,
"learning_rate": 2.9072029151432335e-05,
"loss": 1.1128,
"step": 340000
},
{
"epoch": 1.26,
"learning_rate": 2.9041252723713852e-05,
"loss": 1.1148,
"step": 340500
},
{
"epoch": 1.26,
"learning_rate": 2.901047629599537e-05,
"loss": 1.1056,
"step": 341000
},
{
"epoch": 1.26,
"learning_rate": 2.897969986827689e-05,
"loss": 1.1153,
"step": 341500
},
{
"epoch": 1.26,
"learning_rate": 2.8948923440558407e-05,
"loss": 1.1147,
"step": 342000
},
{
"epoch": 1.26,
"learning_rate": 2.8918147012839924e-05,
"loss": 1.1155,
"step": 342500
},
{
"epoch": 1.27,
"learning_rate": 2.8887370585121444e-05,
"loss": 1.1132,
"step": 343000
},
{
"epoch": 1.27,
"learning_rate": 2.885659415740296e-05,
"loss": 1.1072,
"step": 343500
},
{
"epoch": 1.27,
"learning_rate": 2.882581772968448e-05,
"loss": 1.1175,
"step": 344000
},
{
"epoch": 1.27,
"learning_rate": 2.8795041301965996e-05,
"loss": 1.1048,
"step": 344500
},
{
"epoch": 1.27,
"learning_rate": 2.8764264874247516e-05,
"loss": 1.1126,
"step": 345000
},
{
"epoch": 1.28,
"learning_rate": 2.8733488446529033e-05,
"loss": 1.1107,
"step": 345500
},
{
"epoch": 1.28,
"learning_rate": 2.870271201881055e-05,
"loss": 1.1108,
"step": 346000
},
{
"epoch": 1.28,
"learning_rate": 2.867193559109207e-05,
"loss": 1.1062,
"step": 346500
},
{
"epoch": 1.28,
"learning_rate": 2.8641159163373588e-05,
"loss": 1.1083,
"step": 347000
},
{
"epoch": 1.28,
"learning_rate": 2.8610382735655105e-05,
"loss": 1.0995,
"step": 347500
},
{
"epoch": 1.29,
"learning_rate": 2.8579606307936625e-05,
"loss": 1.1143,
"step": 348000
},
{
"epoch": 1.29,
"learning_rate": 2.8548829880218142e-05,
"loss": 1.1146,
"step": 348500
},
{
"epoch": 1.29,
"learning_rate": 2.851805345249966e-05,
"loss": 1.1144,
"step": 349000
},
{
"epoch": 1.29,
"learning_rate": 2.8487277024781177e-05,
"loss": 1.1108,
"step": 349500
},
{
"epoch": 1.29,
"learning_rate": 2.8456500597062697e-05,
"loss": 1.1105,
"step": 350000
},
{
"epoch": 1.29,
"learning_rate": 2.842572416934422e-05,
"loss": 1.1059,
"step": 350500
},
{
"epoch": 1.3,
"learning_rate": 2.8394947741625738e-05,
"loss": 1.1037,
"step": 351000
},
{
"epoch": 1.3,
"learning_rate": 2.8364171313907255e-05,
"loss": 1.1111,
"step": 351500
},
{
"epoch": 1.3,
"learning_rate": 2.8333394886188776e-05,
"loss": 1.1103,
"step": 352000
},
{
"epoch": 1.3,
"learning_rate": 2.8302618458470293e-05,
"loss": 1.1081,
"step": 352500
},
{
"epoch": 1.3,
"learning_rate": 2.827184203075181e-05,
"loss": 1.1095,
"step": 353000
},
{
"epoch": 1.31,
"learning_rate": 2.824106560303333e-05,
"loss": 1.1069,
"step": 353500
},
{
"epoch": 1.31,
"learning_rate": 2.8210289175314847e-05,
"loss": 1.1134,
"step": 354000
},
{
"epoch": 1.31,
"learning_rate": 2.8179512747596364e-05,
"loss": 1.108,
"step": 354500
},
{
"epoch": 1.31,
"learning_rate": 2.814873631987788e-05,
"loss": 1.1124,
"step": 355000
},
{
"epoch": 1.31,
"learning_rate": 2.8117959892159402e-05,
"loss": 1.1144,
"step": 355500
},
{
"epoch": 1.31,
"learning_rate": 2.808718346444092e-05,
"loss": 1.1154,
"step": 356000
},
{
"epoch": 1.32,
"learning_rate": 2.8056407036722436e-05,
"loss": 1.1116,
"step": 356500
},
{
"epoch": 1.32,
"learning_rate": 2.8025630609003957e-05,
"loss": 1.1119,
"step": 357000
},
{
"epoch": 1.32,
"learning_rate": 2.7994854181285474e-05,
"loss": 1.1138,
"step": 357500
},
{
"epoch": 1.32,
"learning_rate": 2.796407775356699e-05,
"loss": 1.1047,
"step": 358000
},
{
"epoch": 1.32,
"learning_rate": 2.793330132584851e-05,
"loss": 1.1068,
"step": 358500
},
{
"epoch": 1.33,
"learning_rate": 2.790252489813003e-05,
"loss": 1.1063,
"step": 359000
},
{
"epoch": 1.33,
"learning_rate": 2.7871748470411546e-05,
"loss": 1.0996,
"step": 359500
},
{
"epoch": 1.33,
"learning_rate": 2.7840972042693063e-05,
"loss": 1.1076,
"step": 360000
},
{
"epoch": 1.33,
"learning_rate": 2.7810195614974583e-05,
"loss": 1.0979,
"step": 360500
},
{
"epoch": 1.33,
"learning_rate": 2.77794191872561e-05,
"loss": 1.1051,
"step": 361000
},
{
"epoch": 1.34,
"learning_rate": 2.7748642759537617e-05,
"loss": 1.1009,
"step": 361500
},
{
"epoch": 1.34,
"learning_rate": 2.7717866331819138e-05,
"loss": 1.104,
"step": 362000
},
{
"epoch": 1.34,
"learning_rate": 2.7687089904100655e-05,
"loss": 1.108,
"step": 362500
},
{
"epoch": 1.34,
"learning_rate": 2.7656313476382172e-05,
"loss": 1.0968,
"step": 363000
},
{
"epoch": 1.34,
"learning_rate": 2.7625537048663692e-05,
"loss": 1.1071,
"step": 363500
},
{
"epoch": 1.34,
"learning_rate": 2.759476062094521e-05,
"loss": 1.0957,
"step": 364000
},
{
"epoch": 1.35,
"learning_rate": 2.7563984193226727e-05,
"loss": 1.1063,
"step": 364500
},
{
"epoch": 1.35,
"learning_rate": 2.7533207765508247e-05,
"loss": 1.1008,
"step": 365000
},
{
"epoch": 1.35,
"learning_rate": 2.7502431337789764e-05,
"loss": 1.1065,
"step": 365500
},
{
"epoch": 1.35,
"learning_rate": 2.747165491007128e-05,
"loss": 1.1019,
"step": 366000
},
{
"epoch": 1.35,
"learning_rate": 2.74408784823528e-05,
"loss": 1.1028,
"step": 366500
},
{
"epoch": 1.36,
"learning_rate": 2.741010205463432e-05,
"loss": 1.0995,
"step": 367000
},
{
"epoch": 1.36,
"learning_rate": 2.7379325626915836e-05,
"loss": 1.1066,
"step": 367500
},
{
"epoch": 1.36,
"learning_rate": 2.7348549199197353e-05,
"loss": 1.0997,
"step": 368000
},
{
"epoch": 1.36,
"learning_rate": 2.7317772771478873e-05,
"loss": 1.1093,
"step": 368500
},
{
"epoch": 1.36,
"learning_rate": 2.728699634376039e-05,
"loss": 1.1062,
"step": 369000
},
{
"epoch": 1.36,
"learning_rate": 2.7256219916041908e-05,
"loss": 1.103,
"step": 369500
},
{
"epoch": 1.37,
"learning_rate": 2.7225443488323428e-05,
"loss": 1.115,
"step": 370000
},
{
"epoch": 1.37,
"learning_rate": 2.7194667060604945e-05,
"loss": 1.0995,
"step": 370500
},
{
"epoch": 1.37,
"learning_rate": 2.7163890632886462e-05,
"loss": 1.0975,
"step": 371000
},
{
"epoch": 1.37,
"learning_rate": 2.713311420516798e-05,
"loss": 1.0929,
"step": 371500
},
{
"epoch": 1.37,
"learning_rate": 2.71023377774495e-05,
"loss": 1.1008,
"step": 372000
},
{
"epoch": 1.38,
"learning_rate": 2.7071561349731017e-05,
"loss": 1.0988,
"step": 372500
},
{
"epoch": 1.38,
"learning_rate": 2.7040784922012534e-05,
"loss": 1.0953,
"step": 373000
},
{
"epoch": 1.38,
"learning_rate": 2.7010008494294055e-05,
"loss": 1.1023,
"step": 373500
},
{
"epoch": 1.38,
"learning_rate": 2.697923206657557e-05,
"loss": 1.1068,
"step": 374000
},
{
"epoch": 1.38,
"learning_rate": 2.694845563885709e-05,
"loss": 1.1052,
"step": 374500
},
{
"epoch": 1.38,
"learning_rate": 2.691767921113861e-05,
"loss": 1.0968,
"step": 375000
},
{
"epoch": 1.39,
"learning_rate": 2.6886902783420126e-05,
"loss": 1.1074,
"step": 375500
},
{
"epoch": 1.39,
"learning_rate": 2.6856126355701643e-05,
"loss": 1.0953,
"step": 376000
},
{
"epoch": 1.39,
"learning_rate": 2.682534992798316e-05,
"loss": 1.1027,
"step": 376500
},
{
"epoch": 1.39,
"learning_rate": 2.679457350026468e-05,
"loss": 1.0974,
"step": 377000
},
{
"epoch": 1.39,
"learning_rate": 2.6763797072546198e-05,
"loss": 1.0923,
"step": 377500
},
{
"epoch": 1.4,
"learning_rate": 2.6733020644827715e-05,
"loss": 1.0968,
"step": 378000
},
{
"epoch": 1.4,
"learning_rate": 2.6702244217109236e-05,
"loss": 1.1017,
"step": 378500
},
{
"epoch": 1.4,
"learning_rate": 2.6671467789390753e-05,
"loss": 1.1037,
"step": 379000
},
{
"epoch": 1.4,
"learning_rate": 2.664069136167227e-05,
"loss": 1.1003,
"step": 379500
},
{
"epoch": 1.4,
"learning_rate": 2.660991493395379e-05,
"loss": 1.102,
"step": 380000
},
{
"epoch": 1.41,
"learning_rate": 2.6579138506235307e-05,
"loss": 1.0969,
"step": 380500
},
{
"epoch": 1.41,
"learning_rate": 2.6548362078516824e-05,
"loss": 1.1022,
"step": 381000
},
{
"epoch": 1.41,
"learning_rate": 2.6517585650798345e-05,
"loss": 1.0969,
"step": 381500
},
{
"epoch": 1.41,
"learning_rate": 2.6486809223079862e-05,
"loss": 1.0876,
"step": 382000
},
{
"epoch": 1.41,
"learning_rate": 2.645603279536138e-05,
"loss": 1.1026,
"step": 382500
},
{
"epoch": 1.41,
"learning_rate": 2.6425256367642896e-05,
"loss": 1.0973,
"step": 383000
},
{
"epoch": 1.42,
"learning_rate": 2.6394479939924417e-05,
"loss": 1.1026,
"step": 383500
},
{
"epoch": 1.42,
"learning_rate": 2.6363703512205934e-05,
"loss": 1.0965,
"step": 384000
},
{
"epoch": 1.42,
"learning_rate": 2.633292708448745e-05,
"loss": 1.0984,
"step": 384500
},
{
"epoch": 1.42,
"learning_rate": 2.630215065676897e-05,
"loss": 1.101,
"step": 385000
},
{
"epoch": 1.42,
"learning_rate": 2.627137422905049e-05,
"loss": 1.0917,
"step": 385500
},
{
"epoch": 1.43,
"learning_rate": 2.6240597801332005e-05,
"loss": 1.0949,
"step": 386000
},
{
"epoch": 1.43,
"learning_rate": 2.6209821373613526e-05,
"loss": 1.101,
"step": 386500
},
{
"epoch": 1.43,
"learning_rate": 2.6179044945895043e-05,
"loss": 1.0962,
"step": 387000
},
{
"epoch": 1.43,
"learning_rate": 2.614826851817656e-05,
"loss": 1.1,
"step": 387500
},
{
"epoch": 1.43,
"learning_rate": 2.6117492090458077e-05,
"loss": 1.0923,
"step": 388000
},
{
"epoch": 1.43,
"learning_rate": 2.6086715662739598e-05,
"loss": 1.0999,
"step": 388500
},
{
"epoch": 1.44,
"learning_rate": 2.6055939235021115e-05,
"loss": 1.0905,
"step": 389000
},
{
"epoch": 1.44,
"learning_rate": 2.6025162807302632e-05,
"loss": 1.1041,
"step": 389500
},
{
"epoch": 1.44,
"learning_rate": 2.5994386379584152e-05,
"loss": 1.1014,
"step": 390000
},
{
"epoch": 1.44,
"learning_rate": 2.596360995186567e-05,
"loss": 1.0937,
"step": 390500
},
{
"epoch": 1.44,
"learning_rate": 2.5932833524147187e-05,
"loss": 1.1003,
"step": 391000
},
{
"epoch": 1.45,
"learning_rate": 2.5902057096428707e-05,
"loss": 1.1029,
"step": 391500
},
{
"epoch": 1.45,
"learning_rate": 2.5871280668710224e-05,
"loss": 1.0963,
"step": 392000
},
{
"epoch": 1.45,
"learning_rate": 2.584050424099174e-05,
"loss": 1.0957,
"step": 392500
},
{
"epoch": 1.45,
"learning_rate": 2.5809727813273262e-05,
"loss": 1.1007,
"step": 393000
},
{
"epoch": 1.45,
"learning_rate": 2.577895138555478e-05,
"loss": 1.1006,
"step": 393500
},
{
"epoch": 1.46,
"learning_rate": 2.5748174957836296e-05,
"loss": 1.0938,
"step": 394000
},
{
"epoch": 1.46,
"learning_rate": 2.5717398530117813e-05,
"loss": 1.0928,
"step": 394500
},
{
"epoch": 1.46,
"learning_rate": 2.5686622102399333e-05,
"loss": 1.0849,
"step": 395000
},
{
"epoch": 1.46,
"learning_rate": 2.565584567468085e-05,
"loss": 1.0985,
"step": 395500
},
{
"epoch": 1.46,
"learning_rate": 2.5625069246962368e-05,
"loss": 1.0899,
"step": 396000
},
{
"epoch": 1.46,
"learning_rate": 2.5594292819243888e-05,
"loss": 1.0957,
"step": 396500
},
{
"epoch": 1.47,
"learning_rate": 2.5563516391525405e-05,
"loss": 1.0867,
"step": 397000
},
{
"epoch": 1.47,
"learning_rate": 2.5532739963806922e-05,
"loss": 1.1027,
"step": 397500
},
{
"epoch": 1.47,
"learning_rate": 2.5501963536088443e-05,
"loss": 1.0899,
"step": 398000
},
{
"epoch": 1.47,
"learning_rate": 2.547118710836996e-05,
"loss": 1.0836,
"step": 398500
},
{
"epoch": 1.47,
"learning_rate": 2.5440410680651477e-05,
"loss": 1.0939,
"step": 399000
},
{
"epoch": 1.48,
"learning_rate": 2.5409634252932994e-05,
"loss": 1.0925,
"step": 399500
},
{
"epoch": 1.48,
"learning_rate": 2.5378857825214515e-05,
"loss": 1.1019,
"step": 400000
},
{
"epoch": 1.48,
"learning_rate": 2.534808139749603e-05,
"loss": 1.1007,
"step": 400500
},
{
"epoch": 1.48,
"learning_rate": 2.531730496977755e-05,
"loss": 1.0905,
"step": 401000
},
{
"epoch": 1.48,
"learning_rate": 2.528652854205907e-05,
"loss": 1.0936,
"step": 401500
},
{
"epoch": 1.48,
"learning_rate": 2.5255752114340586e-05,
"loss": 1.0953,
"step": 402000
},
{
"epoch": 1.49,
"learning_rate": 2.5224975686622103e-05,
"loss": 1.0967,
"step": 402500
},
{
"epoch": 1.49,
"learning_rate": 2.5194199258903624e-05,
"loss": 1.0934,
"step": 403000
},
{
"epoch": 1.49,
"learning_rate": 2.516342283118514e-05,
"loss": 1.1003,
"step": 403500
},
{
"epoch": 1.49,
"learning_rate": 2.5132646403466658e-05,
"loss": 1.0893,
"step": 404000
},
{
"epoch": 1.49,
"learning_rate": 2.5101869975748175e-05,
"loss": 1.0878,
"step": 404500
},
{
"epoch": 1.5,
"learning_rate": 2.5071093548029696e-05,
"loss": 1.0921,
"step": 405000
},
{
"epoch": 1.5,
"learning_rate": 2.5040317120311213e-05,
"loss": 1.0976,
"step": 405500
},
{
"epoch": 1.5,
"learning_rate": 2.500954069259273e-05,
"loss": 1.0978,
"step": 406000
},
{
"epoch": 1.5,
"learning_rate": 2.497876426487425e-05,
"loss": 1.0948,
"step": 406500
},
{
"epoch": 1.5,
"learning_rate": 2.4947987837155767e-05,
"loss": 1.0894,
"step": 407000
},
{
"epoch": 1.5,
"learning_rate": 2.4917211409437284e-05,
"loss": 1.0933,
"step": 407500
},
{
"epoch": 1.51,
"learning_rate": 2.4886434981718805e-05,
"loss": 1.0835,
"step": 408000
},
{
"epoch": 1.51,
"learning_rate": 2.4855658554000322e-05,
"loss": 1.0971,
"step": 408500
},
{
"epoch": 1.51,
"learning_rate": 2.482488212628184e-05,
"loss": 1.0921,
"step": 409000
},
{
"epoch": 1.51,
"learning_rate": 2.479410569856336e-05,
"loss": 1.085,
"step": 409500
},
{
"epoch": 1.51,
"learning_rate": 2.4763329270844877e-05,
"loss": 1.0838,
"step": 410000
},
{
"epoch": 1.52,
"learning_rate": 2.4732552843126394e-05,
"loss": 1.0863,
"step": 410500
},
{
"epoch": 1.52,
"learning_rate": 2.470177641540791e-05,
"loss": 1.0882,
"step": 411000
},
{
"epoch": 1.52,
"learning_rate": 2.467099998768943e-05,
"loss": 1.0895,
"step": 411500
},
{
"epoch": 1.52,
"learning_rate": 2.464022355997095e-05,
"loss": 1.0949,
"step": 412000
},
{
"epoch": 1.52,
"learning_rate": 2.4609447132252465e-05,
"loss": 1.0864,
"step": 412500
},
{
"epoch": 1.53,
"learning_rate": 2.4578670704533986e-05,
"loss": 1.0848,
"step": 413000
},
{
"epoch": 1.53,
"learning_rate": 2.4547894276815503e-05,
"loss": 1.0895,
"step": 413500
},
{
"epoch": 1.53,
"learning_rate": 2.451711784909702e-05,
"loss": 1.0822,
"step": 414000
},
{
"epoch": 1.53,
"learning_rate": 2.448634142137854e-05,
"loss": 1.0859,
"step": 414500
},
{
"epoch": 1.53,
"learning_rate": 2.4455564993660058e-05,
"loss": 1.0809,
"step": 415000
},
{
"epoch": 1.53,
"learning_rate": 2.4424788565941575e-05,
"loss": 1.0877,
"step": 415500
},
{
"epoch": 1.54,
"learning_rate": 2.4394012138223092e-05,
"loss": 1.0822,
"step": 416000
},
{
"epoch": 1.54,
"learning_rate": 2.4363235710504612e-05,
"loss": 1.0981,
"step": 416500
},
{
"epoch": 1.54,
"learning_rate": 2.433245928278613e-05,
"loss": 1.0834,
"step": 417000
},
{
"epoch": 1.54,
"learning_rate": 2.4301682855067647e-05,
"loss": 1.0893,
"step": 417500
},
{
"epoch": 1.54,
"learning_rate": 2.4270906427349167e-05,
"loss": 1.0872,
"step": 418000
},
{
"epoch": 1.55,
"learning_rate": 2.4240129999630684e-05,
"loss": 1.0805,
"step": 418500
},
{
"epoch": 1.55,
"learning_rate": 2.42093535719122e-05,
"loss": 1.0848,
"step": 419000
},
{
"epoch": 1.55,
"learning_rate": 2.417857714419372e-05,
"loss": 1.0869,
"step": 419500
},
{
"epoch": 1.55,
"learning_rate": 2.414780071647524e-05,
"loss": 1.0846,
"step": 420000
},
{
"epoch": 1.55,
"learning_rate": 2.4117024288756756e-05,
"loss": 1.0872,
"step": 420500
},
{
"epoch": 1.55,
"learning_rate": 2.4086247861038276e-05,
"loss": 1.0835,
"step": 421000
},
{
"epoch": 1.56,
"learning_rate": 2.4055471433319793e-05,
"loss": 1.0825,
"step": 421500
},
{
"epoch": 1.56,
"learning_rate": 2.402469500560131e-05,
"loss": 1.0898,
"step": 422000
},
{
"epoch": 1.56,
"learning_rate": 2.3993918577882828e-05,
"loss": 1.0779,
"step": 422500
},
{
"epoch": 1.56,
"learning_rate": 2.3963142150164348e-05,
"loss": 1.0823,
"step": 423000
},
{
"epoch": 1.56,
"learning_rate": 2.3932365722445865e-05,
"loss": 1.084,
"step": 423500
},
{
"epoch": 1.57,
"learning_rate": 2.3901589294727382e-05,
"loss": 1.0851,
"step": 424000
},
{
"epoch": 1.57,
"learning_rate": 2.3870812867008903e-05,
"loss": 1.086,
"step": 424500
},
{
"epoch": 1.57,
"learning_rate": 2.384003643929042e-05,
"loss": 1.0877,
"step": 425000
},
{
"epoch": 1.57,
"learning_rate": 2.3809260011571937e-05,
"loss": 1.0923,
"step": 425500
},
{
"epoch": 1.57,
"learning_rate": 2.3778483583853457e-05,
"loss": 1.0843,
"step": 426000
},
{
"epoch": 1.58,
"learning_rate": 2.3747707156134974e-05,
"loss": 1.0748,
"step": 426500
},
{
"epoch": 1.58,
"learning_rate": 2.371693072841649e-05,
"loss": 1.0833,
"step": 427000
},
{
"epoch": 1.58,
"learning_rate": 2.368615430069801e-05,
"loss": 1.0839,
"step": 427500
},
{
"epoch": 1.58,
"learning_rate": 2.365537787297953e-05,
"loss": 1.0942,
"step": 428000
},
{
"epoch": 1.58,
"learning_rate": 2.3624601445261046e-05,
"loss": 1.0913,
"step": 428500
},
{
"epoch": 1.58,
"learning_rate": 2.3593825017542563e-05,
"loss": 1.093,
"step": 429000
},
{
"epoch": 1.59,
"learning_rate": 2.3563048589824084e-05,
"loss": 1.0757,
"step": 429500
},
{
"epoch": 1.59,
"learning_rate": 2.35322721621056e-05,
"loss": 1.0821,
"step": 430000
},
{
"epoch": 1.59,
"learning_rate": 2.3501495734387118e-05,
"loss": 1.0827,
"step": 430500
},
{
"epoch": 1.59,
"learning_rate": 2.347071930666864e-05,
"loss": 1.0845,
"step": 431000
},
{
"epoch": 1.59,
"learning_rate": 2.3439942878950156e-05,
"loss": 1.0876,
"step": 431500
},
{
"epoch": 1.6,
"learning_rate": 2.3409166451231673e-05,
"loss": 1.0776,
"step": 432000
},
{
"epoch": 1.6,
"learning_rate": 2.337839002351319e-05,
"loss": 1.0837,
"step": 432500
},
{
"epoch": 1.6,
"learning_rate": 2.334761359579471e-05,
"loss": 1.075,
"step": 433000
},
{
"epoch": 1.6,
"learning_rate": 2.3316837168076227e-05,
"loss": 1.0931,
"step": 433500
},
{
"epoch": 1.6,
"learning_rate": 2.3286060740357744e-05,
"loss": 1.0905,
"step": 434000
},
{
"epoch": 1.6,
"learning_rate": 2.3255284312639265e-05,
"loss": 1.0864,
"step": 434500
},
{
"epoch": 1.61,
"learning_rate": 2.3224507884920782e-05,
"loss": 1.0845,
"step": 435000
},
{
"epoch": 1.61,
"learning_rate": 2.31937314572023e-05,
"loss": 1.0933,
"step": 435500
},
{
"epoch": 1.61,
"learning_rate": 2.316295502948382e-05,
"loss": 1.076,
"step": 436000
},
{
"epoch": 1.61,
"learning_rate": 2.3132178601765337e-05,
"loss": 1.0816,
"step": 436500
},
{
"epoch": 1.61,
"learning_rate": 2.3101402174046854e-05,
"loss": 1.0791,
"step": 437000
},
{
"epoch": 1.62,
"learning_rate": 2.3070625746328374e-05,
"loss": 1.0818,
"step": 437500
},
{
"epoch": 1.62,
"learning_rate": 2.303984931860989e-05,
"loss": 1.0844,
"step": 438000
},
{
"epoch": 1.62,
"learning_rate": 2.300907289089141e-05,
"loss": 1.0735,
"step": 438500
},
{
"epoch": 1.62,
"learning_rate": 2.2978296463172925e-05,
"loss": 1.0766,
"step": 439000
},
{
"epoch": 1.62,
"learning_rate": 2.2947520035454446e-05,
"loss": 1.0827,
"step": 439500
},
{
"epoch": 1.62,
"learning_rate": 2.2916743607735963e-05,
"loss": 1.0855,
"step": 440000
},
{
"epoch": 1.63,
"learning_rate": 2.288596718001748e-05,
"loss": 1.074,
"step": 440500
},
{
"epoch": 1.63,
"learning_rate": 2.2855190752299e-05,
"loss": 1.0849,
"step": 441000
},
{
"epoch": 1.63,
"learning_rate": 2.2824414324580518e-05,
"loss": 1.0771,
"step": 441500
},
{
"epoch": 1.63,
"learning_rate": 2.2793637896862035e-05,
"loss": 1.0858,
"step": 442000
},
{
"epoch": 1.63,
"learning_rate": 2.2762861469143555e-05,
"loss": 1.0815,
"step": 442500
},
{
"epoch": 1.64,
"learning_rate": 2.2732085041425072e-05,
"loss": 1.0783,
"step": 443000
},
{
"epoch": 1.64,
"learning_rate": 2.270130861370659e-05,
"loss": 1.0707,
"step": 443500
},
{
"epoch": 1.64,
"learning_rate": 2.2670532185988107e-05,
"loss": 1.0763,
"step": 444000
},
{
"epoch": 1.64,
"learning_rate": 2.2639755758269627e-05,
"loss": 1.0802,
"step": 444500
},
{
"epoch": 1.64,
"learning_rate": 2.2608979330551144e-05,
"loss": 1.0757,
"step": 445000
},
{
"epoch": 1.65,
"learning_rate": 2.257820290283266e-05,
"loss": 1.0804,
"step": 445500
},
{
"epoch": 1.65,
"learning_rate": 2.254742647511418e-05,
"loss": 1.0786,
"step": 446000
},
{
"epoch": 1.65,
"learning_rate": 2.25166500473957e-05,
"loss": 1.0785,
"step": 446500
},
{
"epoch": 1.65,
"learning_rate": 2.2485873619677216e-05,
"loss": 1.0696,
"step": 447000
},
{
"epoch": 1.65,
"learning_rate": 2.2455097191958736e-05,
"loss": 1.072,
"step": 447500
},
{
"epoch": 1.65,
"learning_rate": 2.2424320764240253e-05,
"loss": 1.0787,
"step": 448000
},
{
"epoch": 1.66,
"learning_rate": 2.239354433652177e-05,
"loss": 1.0763,
"step": 448500
},
{
"epoch": 1.66,
"learning_rate": 2.236276790880329e-05,
"loss": 1.0783,
"step": 449000
},
{
"epoch": 1.66,
"learning_rate": 2.2331991481084808e-05,
"loss": 1.0721,
"step": 449500
},
{
"epoch": 1.66,
"learning_rate": 2.2301215053366325e-05,
"loss": 1.0833,
"step": 450000
},
{
"epoch": 1.66,
"learning_rate": 2.2270438625647842e-05,
"loss": 1.0705,
"step": 450500
},
{
"epoch": 1.67,
"learning_rate": 2.2239662197929363e-05,
"loss": 1.0791,
"step": 451000
},
{
"epoch": 1.67,
"learning_rate": 2.220888577021088e-05,
"loss": 1.0793,
"step": 451500
},
{
"epoch": 1.67,
"learning_rate": 2.2178109342492397e-05,
"loss": 1.0814,
"step": 452000
},
{
"epoch": 1.67,
"learning_rate": 2.2147332914773917e-05,
"loss": 1.0752,
"step": 452500
},
{
"epoch": 1.67,
"learning_rate": 2.2116556487055434e-05,
"loss": 1.0876,
"step": 453000
},
{
"epoch": 1.67,
"learning_rate": 2.208578005933695e-05,
"loss": 1.072,
"step": 453500
},
{
"epoch": 1.68,
"learning_rate": 2.2055003631618472e-05,
"loss": 1.0739,
"step": 454000
},
{
"epoch": 1.68,
"learning_rate": 2.202422720389999e-05,
"loss": 1.0814,
"step": 454500
},
{
"epoch": 1.68,
"learning_rate": 2.1993450776181506e-05,
"loss": 1.0705,
"step": 455000
},
{
"epoch": 1.68,
"learning_rate": 2.1962674348463027e-05,
"loss": 1.0753,
"step": 455500
},
{
"epoch": 1.68,
"learning_rate": 2.1931897920744547e-05,
"loss": 1.0834,
"step": 456000
},
{
"epoch": 1.69,
"learning_rate": 2.1901121493026064e-05,
"loss": 1.0849,
"step": 456500
},
{
"epoch": 1.69,
"learning_rate": 2.187034506530758e-05,
"loss": 1.0623,
"step": 457000
},
{
"epoch": 1.69,
"learning_rate": 2.18395686375891e-05,
"loss": 1.0664,
"step": 457500
},
{
"epoch": 1.69,
"learning_rate": 2.180879220987062e-05,
"loss": 1.0791,
"step": 458000
},
{
"epoch": 1.69,
"learning_rate": 2.1778015782152136e-05,
"loss": 1.0707,
"step": 458500
},
{
"epoch": 1.7,
"learning_rate": 2.1747239354433653e-05,
"loss": 1.0701,
"step": 459000
},
{
"epoch": 1.7,
"learning_rate": 2.1716462926715174e-05,
"loss": 1.0753,
"step": 459500
},
{
"epoch": 1.7,
"learning_rate": 2.168568649899669e-05,
"loss": 1.0739,
"step": 460000
},
{
"epoch": 1.7,
"learning_rate": 2.1654910071278208e-05,
"loss": 1.0732,
"step": 460500
},
{
"epoch": 1.7,
"learning_rate": 2.1624133643559728e-05,
"loss": 1.068,
"step": 461000
},
{
"epoch": 1.7,
"learning_rate": 2.1593357215841245e-05,
"loss": 1.0816,
"step": 461500
},
{
"epoch": 1.71,
"learning_rate": 2.1562580788122762e-05,
"loss": 1.0682,
"step": 462000
},
{
"epoch": 1.71,
"learning_rate": 2.1531804360404283e-05,
"loss": 1.0639,
"step": 462500
},
{
"epoch": 1.71,
"learning_rate": 2.15010279326858e-05,
"loss": 1.0723,
"step": 463000
},
{
"epoch": 1.71,
"learning_rate": 2.1470251504967317e-05,
"loss": 1.074,
"step": 463500
},
{
"epoch": 1.71,
"learning_rate": 2.1439475077248834e-05,
"loss": 1.0734,
"step": 464000
},
{
"epoch": 1.72,
"learning_rate": 2.1408698649530355e-05,
"loss": 1.0698,
"step": 464500
},
{
"epoch": 1.72,
"learning_rate": 2.1377922221811872e-05,
"loss": 1.0669,
"step": 465000
},
{
"epoch": 1.72,
"learning_rate": 2.134714579409339e-05,
"loss": 1.0744,
"step": 465500
},
{
"epoch": 1.72,
"learning_rate": 2.131636936637491e-05,
"loss": 1.0741,
"step": 466000
},
{
"epoch": 1.72,
"learning_rate": 2.1285592938656426e-05,
"loss": 1.068,
"step": 466500
},
{
"epoch": 1.72,
"learning_rate": 2.1254816510937943e-05,
"loss": 1.0707,
"step": 467000
},
{
"epoch": 1.73,
"learning_rate": 2.1224040083219464e-05,
"loss": 1.0806,
"step": 467500
},
{
"epoch": 1.73,
"learning_rate": 2.119326365550098e-05,
"loss": 1.0701,
"step": 468000
},
{
"epoch": 1.73,
"learning_rate": 2.1162487227782498e-05,
"loss": 1.08,
"step": 468500
},
{
"epoch": 1.73,
"learning_rate": 2.1131710800064015e-05,
"loss": 1.0767,
"step": 469000
},
{
"epoch": 1.73,
"learning_rate": 2.1100934372345536e-05,
"loss": 1.0618,
"step": 469500
},
{
"epoch": 1.74,
"learning_rate": 2.1070157944627053e-05,
"loss": 1.0704,
"step": 470000
},
{
"epoch": 1.74,
"learning_rate": 2.103938151690857e-05,
"loss": 1.0647,
"step": 470500
},
{
"epoch": 1.74,
"learning_rate": 2.100860508919009e-05,
"loss": 1.0657,
"step": 471000
},
{
"epoch": 1.74,
"learning_rate": 2.0977828661471607e-05,
"loss": 1.064,
"step": 471500
},
{
"epoch": 1.74,
"learning_rate": 2.0947052233753125e-05,
"loss": 1.0709,
"step": 472000
},
{
"epoch": 1.75,
"learning_rate": 2.0916275806034645e-05,
"loss": 1.0733,
"step": 472500
},
{
"epoch": 1.75,
"learning_rate": 2.0885499378316162e-05,
"loss": 1.0783,
"step": 473000
},
{
"epoch": 1.75,
"learning_rate": 2.085472295059768e-05,
"loss": 1.0638,
"step": 473500
},
{
"epoch": 1.75,
"learning_rate": 2.08239465228792e-05,
"loss": 1.0615,
"step": 474000
},
{
"epoch": 1.75,
"learning_rate": 2.0793170095160717e-05,
"loss": 1.0698,
"step": 474500
},
{
"epoch": 1.75,
"learning_rate": 2.0762393667442234e-05,
"loss": 1.0687,
"step": 475000
},
{
"epoch": 1.76,
"learning_rate": 2.073161723972375e-05,
"loss": 1.0697,
"step": 475500
},
{
"epoch": 1.76,
"learning_rate": 2.070084081200527e-05,
"loss": 1.0668,
"step": 476000
},
{
"epoch": 1.76,
"learning_rate": 2.067006438428679e-05,
"loss": 1.0686,
"step": 476500
},
{
"epoch": 1.76,
"learning_rate": 2.0639287956568306e-05,
"loss": 1.0624,
"step": 477000
},
{
"epoch": 1.76,
"learning_rate": 2.0608511528849826e-05,
"loss": 1.0668,
"step": 477500
},
{
"epoch": 1.77,
"learning_rate": 2.0577735101131343e-05,
"loss": 1.0646,
"step": 478000
},
{
"epoch": 1.77,
"learning_rate": 2.054695867341286e-05,
"loss": 1.0695,
"step": 478500
},
{
"epoch": 1.77,
"learning_rate": 2.051618224569438e-05,
"loss": 1.0713,
"step": 479000
},
{
"epoch": 1.77,
"learning_rate": 2.0485405817975898e-05,
"loss": 1.0712,
"step": 479500
},
{
"epoch": 1.77,
"learning_rate": 2.0454629390257415e-05,
"loss": 1.065,
"step": 480000
},
{
"epoch": 1.77,
"learning_rate": 2.0423852962538932e-05,
"loss": 1.0627,
"step": 480500
},
{
"epoch": 1.78,
"learning_rate": 2.0393076534820452e-05,
"loss": 1.0649,
"step": 481000
},
{
"epoch": 1.78,
"learning_rate": 2.036230010710197e-05,
"loss": 1.0738,
"step": 481500
},
{
"epoch": 1.78,
"learning_rate": 2.0331523679383487e-05,
"loss": 1.0716,
"step": 482000
},
{
"epoch": 1.78,
"learning_rate": 2.0300747251665007e-05,
"loss": 1.0641,
"step": 482500
},
{
"epoch": 1.78,
"learning_rate": 2.0269970823946524e-05,
"loss": 1.0614,
"step": 483000
},
{
"epoch": 1.79,
"learning_rate": 2.023919439622804e-05,
"loss": 1.0647,
"step": 483500
},
{
"epoch": 1.79,
"learning_rate": 2.0208417968509562e-05,
"loss": 1.0593,
"step": 484000
},
{
"epoch": 1.79,
"learning_rate": 2.017764154079108e-05,
"loss": 1.0743,
"step": 484500
},
{
"epoch": 1.79,
"learning_rate": 2.0146865113072596e-05,
"loss": 1.0641,
"step": 485000
},
{
"epoch": 1.79,
"learning_rate": 2.0116088685354113e-05,
"loss": 1.0732,
"step": 485500
},
{
"epoch": 1.79,
"learning_rate": 2.0085312257635634e-05,
"loss": 1.0735,
"step": 486000
},
{
"epoch": 1.8,
"learning_rate": 2.005453582991715e-05,
"loss": 1.0573,
"step": 486500
},
{
"epoch": 1.8,
"learning_rate": 2.0023759402198668e-05,
"loss": 1.0641,
"step": 487000
},
{
"epoch": 1.8,
"learning_rate": 1.9992982974480188e-05,
"loss": 1.0585,
"step": 487500
},
{
"epoch": 1.8,
"learning_rate": 1.9962206546761705e-05,
"loss": 1.068,
"step": 488000
},
{
"epoch": 1.8,
"learning_rate": 1.9931430119043222e-05,
"loss": 1.0679,
"step": 488500
},
{
"epoch": 1.81,
"learning_rate": 1.9900653691324743e-05,
"loss": 1.0714,
"step": 489000
},
{
"epoch": 1.81,
"learning_rate": 1.986987726360626e-05,
"loss": 1.0592,
"step": 489500
},
{
"epoch": 1.81,
"learning_rate": 1.9839100835887777e-05,
"loss": 1.0718,
"step": 490000
},
{
"epoch": 1.81,
"learning_rate": 1.9808324408169297e-05,
"loss": 1.0697,
"step": 490500
},
{
"epoch": 1.81,
"learning_rate": 1.9777547980450815e-05,
"loss": 1.0602,
"step": 491000
},
{
"epoch": 1.82,
"learning_rate": 1.974677155273233e-05,
"loss": 1.0675,
"step": 491500
},
{
"epoch": 1.82,
"learning_rate": 1.971599512501385e-05,
"loss": 1.0659,
"step": 492000
},
{
"epoch": 1.82,
"learning_rate": 1.968521869729537e-05,
"loss": 1.0687,
"step": 492500
},
{
"epoch": 1.82,
"learning_rate": 1.9654442269576886e-05,
"loss": 1.0665,
"step": 493000
},
{
"epoch": 1.82,
"learning_rate": 1.9623665841858403e-05,
"loss": 1.0683,
"step": 493500
},
{
"epoch": 1.82,
"learning_rate": 1.9592889414139924e-05,
"loss": 1.0626,
"step": 494000
},
{
"epoch": 1.83,
"learning_rate": 1.956211298642144e-05,
"loss": 1.0596,
"step": 494500
},
{
"epoch": 1.83,
"learning_rate": 1.9531336558702958e-05,
"loss": 1.065,
"step": 495000
},
{
"epoch": 1.83,
"learning_rate": 1.950056013098448e-05,
"loss": 1.0709,
"step": 495500
},
{
"epoch": 1.83,
"learning_rate": 1.9469783703265996e-05,
"loss": 1.0693,
"step": 496000
},
{
"epoch": 1.83,
"learning_rate": 1.9439007275547513e-05,
"loss": 1.0558,
"step": 496500
},
{
"epoch": 1.84,
"learning_rate": 1.940823084782903e-05,
"loss": 1.0681,
"step": 497000
},
{
"epoch": 1.84,
"learning_rate": 1.937745442011055e-05,
"loss": 1.0686,
"step": 497500
},
{
"epoch": 1.84,
"learning_rate": 1.9346677992392067e-05,
"loss": 1.0641,
"step": 498000
},
{
"epoch": 1.84,
"learning_rate": 1.9315901564673584e-05,
"loss": 1.0601,
"step": 498500
},
{
"epoch": 1.84,
"learning_rate": 1.9285125136955105e-05,
"loss": 1.0638,
"step": 499000
},
{
"epoch": 1.84,
"learning_rate": 1.9254348709236622e-05,
"loss": 1.0572,
"step": 499500
},
{
"epoch": 1.85,
"learning_rate": 1.922357228151814e-05,
"loss": 1.063,
"step": 500000
},
{
"epoch": 1.85,
"learning_rate": 1.919279585379966e-05,
"loss": 1.0585,
"step": 500500
},
{
"epoch": 1.85,
"learning_rate": 1.9162019426081177e-05,
"loss": 1.0664,
"step": 501000
},
{
"epoch": 1.85,
"learning_rate": 1.9131242998362694e-05,
"loss": 1.0591,
"step": 501500
},
{
"epoch": 1.85,
"learning_rate": 1.9100466570644214e-05,
"loss": 1.0509,
"step": 502000
},
{
"epoch": 1.86,
"learning_rate": 1.906969014292573e-05,
"loss": 1.0532,
"step": 502500
},
{
"epoch": 1.86,
"learning_rate": 1.903891371520725e-05,
"loss": 1.0721,
"step": 503000
},
{
"epoch": 1.86,
"learning_rate": 1.9008137287488766e-05,
"loss": 1.0554,
"step": 503500
},
{
"epoch": 1.86,
"learning_rate": 1.8977360859770286e-05,
"loss": 1.0689,
"step": 504000
},
{
"epoch": 1.86,
"learning_rate": 1.8946584432051803e-05,
"loss": 1.0692,
"step": 504500
},
{
"epoch": 1.87,
"learning_rate": 1.891580800433332e-05,
"loss": 1.0658,
"step": 505000
},
{
"epoch": 1.87,
"learning_rate": 1.888503157661484e-05,
"loss": 1.0602,
"step": 505500
},
{
"epoch": 1.87,
"learning_rate": 1.8854255148896358e-05,
"loss": 1.0624,
"step": 506000
},
{
"epoch": 1.87,
"learning_rate": 1.8823478721177875e-05,
"loss": 1.0545,
"step": 506500
},
{
"epoch": 1.87,
"learning_rate": 1.8792702293459395e-05,
"loss": 1.0551,
"step": 507000
},
{
"epoch": 1.87,
"learning_rate": 1.8761925865740912e-05,
"loss": 1.0572,
"step": 507500
},
{
"epoch": 1.88,
"learning_rate": 1.873114943802243e-05,
"loss": 1.0695,
"step": 508000
},
{
"epoch": 1.88,
"learning_rate": 1.8700373010303947e-05,
"loss": 1.0517,
"step": 508500
},
{
"epoch": 1.88,
"learning_rate": 1.8669596582585467e-05,
"loss": 1.0665,
"step": 509000
},
{
"epoch": 1.88,
"learning_rate": 1.8638820154866984e-05,
"loss": 1.0576,
"step": 509500
},
{
"epoch": 1.88,
"learning_rate": 1.86080437271485e-05,
"loss": 1.0605,
"step": 510000
},
{
"epoch": 1.89,
"learning_rate": 1.8577267299430022e-05,
"loss": 1.0606,
"step": 510500
},
{
"epoch": 1.89,
"learning_rate": 1.854649087171154e-05,
"loss": 1.0629,
"step": 511000
},
{
"epoch": 1.89,
"learning_rate": 1.8515714443993056e-05,
"loss": 1.06,
"step": 511500
},
{
"epoch": 1.89,
"learning_rate": 1.8484938016274576e-05,
"loss": 1.0572,
"step": 512000
},
{
"epoch": 1.89,
"learning_rate": 1.8454161588556093e-05,
"loss": 1.059,
"step": 512500
},
{
"epoch": 1.89,
"learning_rate": 1.842338516083761e-05,
"loss": 1.0534,
"step": 513000
},
{
"epoch": 1.9,
"learning_rate": 1.8392608733119128e-05,
"loss": 1.0593,
"step": 513500
},
{
"epoch": 1.9,
"learning_rate": 1.8361832305400648e-05,
"loss": 1.065,
"step": 514000
},
{
"epoch": 1.9,
"learning_rate": 1.8331055877682165e-05,
"loss": 1.0593,
"step": 514500
},
{
"epoch": 1.9,
"learning_rate": 1.8300279449963682e-05,
"loss": 1.0554,
"step": 515000
},
{
"epoch": 1.9,
"learning_rate": 1.8269503022245203e-05,
"loss": 1.0605,
"step": 515500
},
{
"epoch": 1.91,
"learning_rate": 1.823872659452672e-05,
"loss": 1.0586,
"step": 516000
},
{
"epoch": 1.91,
"learning_rate": 1.8207950166808237e-05,
"loss": 1.0597,
"step": 516500
},
{
"epoch": 1.91,
"learning_rate": 1.8177173739089757e-05,
"loss": 1.0561,
"step": 517000
},
{
"epoch": 1.91,
"learning_rate": 1.8146397311371275e-05,
"loss": 1.0484,
"step": 517500
},
{
"epoch": 1.91,
"learning_rate": 1.811562088365279e-05,
"loss": 1.0564,
"step": 518000
},
{
"epoch": 1.91,
"learning_rate": 1.8084844455934312e-05,
"loss": 1.0528,
"step": 518500
},
{
"epoch": 1.92,
"learning_rate": 1.805406802821583e-05,
"loss": 1.0546,
"step": 519000
},
{
"epoch": 1.92,
"learning_rate": 1.8023291600497346e-05,
"loss": 1.0573,
"step": 519500
},
{
"epoch": 1.92,
"learning_rate": 1.7992515172778863e-05,
"loss": 1.0481,
"step": 520000
},
{
"epoch": 1.92,
"learning_rate": 1.7961738745060384e-05,
"loss": 1.0636,
"step": 520500
},
{
"epoch": 1.92,
"learning_rate": 1.79309623173419e-05,
"loss": 1.0585,
"step": 521000
},
{
"epoch": 1.93,
"learning_rate": 1.7900185889623418e-05,
"loss": 1.0595,
"step": 521500
},
{
"epoch": 1.93,
"learning_rate": 1.786940946190494e-05,
"loss": 1.0621,
"step": 522000
},
{
"epoch": 1.93,
"learning_rate": 1.7838633034186456e-05,
"loss": 1.0544,
"step": 522500
},
{
"epoch": 1.93,
"learning_rate": 1.7807856606467973e-05,
"loss": 1.0543,
"step": 523000
},
{
"epoch": 1.93,
"learning_rate": 1.7777080178749493e-05,
"loss": 1.0646,
"step": 523500
},
{
"epoch": 1.94,
"learning_rate": 1.774630375103101e-05,
"loss": 1.0572,
"step": 524000
},
{
"epoch": 1.94,
"learning_rate": 1.7715527323312527e-05,
"loss": 1.0555,
"step": 524500
},
{
"epoch": 1.94,
"learning_rate": 1.7684750895594044e-05,
"loss": 1.06,
"step": 525000
},
{
"epoch": 1.94,
"learning_rate": 1.765397446787557e-05,
"loss": 1.0527,
"step": 525500
},
{
"epoch": 1.94,
"learning_rate": 1.7623198040157085e-05,
"loss": 1.0552,
"step": 526000
},
{
"epoch": 1.94,
"learning_rate": 1.7592421612438603e-05,
"loss": 1.0586,
"step": 526500
},
{
"epoch": 1.95,
"learning_rate": 1.7561645184720123e-05,
"loss": 1.0532,
"step": 527000
},
{
"epoch": 1.95,
"learning_rate": 1.753086875700164e-05,
"loss": 1.0546,
"step": 527500
},
{
"epoch": 1.95,
"learning_rate": 1.7500092329283157e-05,
"loss": 1.0597,
"step": 528000
},
{
"epoch": 1.95,
"learning_rate": 1.7469315901564674e-05,
"loss": 1.0543,
"step": 528500
},
{
"epoch": 1.95,
"learning_rate": 1.7438539473846195e-05,
"loss": 1.0525,
"step": 529000
},
{
"epoch": 1.96,
"learning_rate": 1.7407763046127712e-05,
"loss": 1.0597,
"step": 529500
},
{
"epoch": 1.96,
"learning_rate": 1.737698661840923e-05,
"loss": 1.0478,
"step": 530000
},
{
"epoch": 1.96,
"learning_rate": 1.734621019069075e-05,
"loss": 1.0532,
"step": 530500
},
{
"epoch": 1.96,
"learning_rate": 1.7315433762972266e-05,
"loss": 1.0545,
"step": 531000
},
{
"epoch": 1.96,
"learning_rate": 1.7284657335253784e-05,
"loss": 1.0438,
"step": 531500
},
{
"epoch": 1.96,
"learning_rate": 1.7253880907535304e-05,
"loss": 1.0544,
"step": 532000
},
{
"epoch": 1.97,
"learning_rate": 1.722310447981682e-05,
"loss": 1.0539,
"step": 532500
},
{
"epoch": 1.97,
"learning_rate": 1.7192328052098338e-05,
"loss": 1.0544,
"step": 533000
},
{
"epoch": 1.97,
"learning_rate": 1.7161551624379855e-05,
"loss": 1.0535,
"step": 533500
},
{
"epoch": 1.97,
"learning_rate": 1.7130775196661376e-05,
"loss": 1.0501,
"step": 534000
},
{
"epoch": 1.97,
"learning_rate": 1.7099998768942893e-05,
"loss": 1.0558,
"step": 534500
},
{
"epoch": 1.98,
"learning_rate": 1.706922234122441e-05,
"loss": 1.0466,
"step": 535000
},
{
"epoch": 1.98,
"learning_rate": 1.703844591350593e-05,
"loss": 1.0561,
"step": 535500
},
{
"epoch": 1.98,
"learning_rate": 1.7007669485787448e-05,
"loss": 1.0504,
"step": 536000
},
{
"epoch": 1.98,
"learning_rate": 1.6976893058068965e-05,
"loss": 1.0419,
"step": 536500
},
{
"epoch": 1.98,
"learning_rate": 1.6946116630350485e-05,
"loss": 1.0501,
"step": 537000
},
{
"epoch": 1.99,
"learning_rate": 1.6915340202632002e-05,
"loss": 1.0584,
"step": 537500
},
{
"epoch": 1.99,
"learning_rate": 1.688456377491352e-05,
"loss": 1.0499,
"step": 538000
},
{
"epoch": 1.99,
"learning_rate": 1.6853787347195036e-05,
"loss": 1.0525,
"step": 538500
},
{
"epoch": 1.99,
"learning_rate": 1.6823010919476557e-05,
"loss": 1.0508,
"step": 539000
},
{
"epoch": 1.99,
"learning_rate": 1.6792234491758074e-05,
"loss": 1.0516,
"step": 539500
},
{
"epoch": 1.99,
"learning_rate": 1.676145806403959e-05,
"loss": 1.0469,
"step": 540000
},
{
"epoch": 2.0,
"learning_rate": 1.673068163632111e-05,
"loss": 1.0507,
"step": 540500
},
{
"epoch": 2.0,
"learning_rate": 1.669990520860263e-05,
"loss": 1.053,
"step": 541000
},
{
"epoch": 2.0,
"learning_rate": 1.6669128780884146e-05,
"loss": 1.0577,
"step": 541500
},
{
"epoch": 2.0,
"learning_rate": 1.6638352353165666e-05,
"loss": 1.0475,
"step": 542000
},
{
"epoch": 2.0,
"learning_rate": 1.6607575925447183e-05,
"loss": 1.0467,
"step": 542500
},
{
"epoch": 2.01,
"learning_rate": 1.65767994977287e-05,
"loss": 1.0514,
"step": 543000
},
{
"epoch": 2.01,
"learning_rate": 1.654602307001022e-05,
"loss": 1.0497,
"step": 543500
},
{
"epoch": 2.01,
"learning_rate": 1.6515246642291738e-05,
"loss": 1.0483,
"step": 544000
},
{
"epoch": 2.01,
"learning_rate": 1.6484470214573255e-05,
"loss": 1.0512,
"step": 544500
},
{
"epoch": 2.01,
"learning_rate": 1.6453693786854772e-05,
"loss": 1.0497,
"step": 545000
},
{
"epoch": 2.01,
"learning_rate": 1.6422917359136293e-05,
"loss": 1.0509,
"step": 545500
},
{
"epoch": 2.02,
"learning_rate": 1.639214093141781e-05,
"loss": 1.0495,
"step": 546000
},
{
"epoch": 2.02,
"learning_rate": 1.6361364503699327e-05,
"loss": 1.0474,
"step": 546500
},
{
"epoch": 2.02,
"learning_rate": 1.6330588075980847e-05,
"loss": 1.0404,
"step": 547000
},
{
"epoch": 2.02,
"learning_rate": 1.6299811648262364e-05,
"loss": 1.0409,
"step": 547500
},
{
"epoch": 2.02,
"learning_rate": 1.626903522054388e-05,
"loss": 1.0383,
"step": 548000
},
{
"epoch": 2.03,
"learning_rate": 1.6238258792825402e-05,
"loss": 1.0425,
"step": 548500
},
{
"epoch": 2.03,
"learning_rate": 1.620748236510692e-05,
"loss": 1.0421,
"step": 549000
},
{
"epoch": 2.03,
"learning_rate": 1.6176705937388436e-05,
"loss": 1.0518,
"step": 549500
},
{
"epoch": 2.03,
"learning_rate": 1.6145929509669953e-05,
"loss": 1.0409,
"step": 550000
},
{
"epoch": 2.03,
"learning_rate": 1.6115153081951474e-05,
"loss": 1.0464,
"step": 550500
},
{
"epoch": 2.03,
"learning_rate": 1.608437665423299e-05,
"loss": 1.038,
"step": 551000
},
{
"epoch": 2.04,
"learning_rate": 1.6053600226514508e-05,
"loss": 1.0489,
"step": 551500
},
{
"epoch": 2.04,
"learning_rate": 1.6022823798796028e-05,
"loss": 1.0533,
"step": 552000
},
{
"epoch": 2.04,
"learning_rate": 1.5992047371077545e-05,
"loss": 1.0447,
"step": 552500
},
{
"epoch": 2.04,
"learning_rate": 1.5961270943359062e-05,
"loss": 1.0416,
"step": 553000
},
{
"epoch": 2.04,
"learning_rate": 1.5930494515640583e-05,
"loss": 1.0463,
"step": 553500
},
{
"epoch": 2.05,
"learning_rate": 1.58997180879221e-05,
"loss": 1.0365,
"step": 554000
},
{
"epoch": 2.05,
"learning_rate": 1.5868941660203617e-05,
"loss": 1.0371,
"step": 554500
},
{
"epoch": 2.05,
"learning_rate": 1.5838165232485138e-05,
"loss": 1.0506,
"step": 555000
},
{
"epoch": 2.05,
"learning_rate": 1.5807388804766655e-05,
"loss": 1.0464,
"step": 555500
},
{
"epoch": 2.05,
"learning_rate": 1.5776612377048172e-05,
"loss": 1.0452,
"step": 556000
},
{
"epoch": 2.06,
"learning_rate": 1.574583594932969e-05,
"loss": 1.0499,
"step": 556500
},
{
"epoch": 2.06,
"learning_rate": 1.571505952161121e-05,
"loss": 1.0477,
"step": 557000
},
{
"epoch": 2.06,
"learning_rate": 1.5684283093892726e-05,
"loss": 1.0486,
"step": 557500
},
{
"epoch": 2.06,
"learning_rate": 1.5653506666174244e-05,
"loss": 1.0367,
"step": 558000
},
{
"epoch": 2.06,
"learning_rate": 1.5622730238455764e-05,
"loss": 1.0467,
"step": 558500
},
{
"epoch": 2.06,
"learning_rate": 1.559195381073728e-05,
"loss": 1.0455,
"step": 559000
},
{
"epoch": 2.07,
"learning_rate": 1.5561177383018798e-05,
"loss": 1.0463,
"step": 559500
},
{
"epoch": 2.07,
"learning_rate": 1.553040095530032e-05,
"loss": 1.0446,
"step": 560000
},
{
"epoch": 2.07,
"learning_rate": 1.5499624527581836e-05,
"loss": 1.0366,
"step": 560500
},
{
"epoch": 2.07,
"learning_rate": 1.5468848099863353e-05,
"loss": 1.0396,
"step": 561000
},
{
"epoch": 2.07,
"learning_rate": 1.543807167214487e-05,
"loss": 1.041,
"step": 561500
},
{
"epoch": 2.08,
"learning_rate": 1.540729524442639e-05,
"loss": 1.0454,
"step": 562000
},
{
"epoch": 2.08,
"learning_rate": 1.5376518816707908e-05,
"loss": 1.0422,
"step": 562500
},
{
"epoch": 2.08,
"learning_rate": 1.5345742388989425e-05,
"loss": 1.042,
"step": 563000
},
{
"epoch": 2.08,
"learning_rate": 1.5314965961270945e-05,
"loss": 1.0425,
"step": 563500
},
{
"epoch": 2.08,
"learning_rate": 1.5284189533552462e-05,
"loss": 1.0461,
"step": 564000
},
{
"epoch": 2.08,
"learning_rate": 1.5253413105833981e-05,
"loss": 1.0457,
"step": 564500
},
{
"epoch": 2.09,
"learning_rate": 1.5222636678115498e-05,
"loss": 1.0344,
"step": 565000
},
{
"epoch": 2.09,
"learning_rate": 1.5191860250397017e-05,
"loss": 1.0468,
"step": 565500
},
{
"epoch": 2.09,
"learning_rate": 1.5161083822678534e-05,
"loss": 1.0469,
"step": 566000
},
{
"epoch": 2.09,
"learning_rate": 1.5130307394960053e-05,
"loss": 1.0401,
"step": 566500
},
{
"epoch": 2.09,
"learning_rate": 1.5099530967241571e-05,
"loss": 1.0404,
"step": 567000
},
{
"epoch": 2.1,
"learning_rate": 1.5068754539523089e-05,
"loss": 1.0428,
"step": 567500
},
{
"epoch": 2.1,
"learning_rate": 1.5037978111804607e-05,
"loss": 1.0439,
"step": 568000
},
{
"epoch": 2.1,
"learning_rate": 1.5007201684086124e-05,
"loss": 1.0397,
"step": 568500
},
{
"epoch": 2.1,
"learning_rate": 1.4976425256367643e-05,
"loss": 1.0346,
"step": 569000
},
{
"epoch": 2.1,
"learning_rate": 1.4945648828649162e-05,
"loss": 1.0366,
"step": 569500
},
{
"epoch": 2.11,
"learning_rate": 1.4914872400930679e-05,
"loss": 1.0415,
"step": 570000
},
{
"epoch": 2.11,
"learning_rate": 1.4884095973212198e-05,
"loss": 1.0388,
"step": 570500
},
{
"epoch": 2.11,
"learning_rate": 1.4853319545493715e-05,
"loss": 1.0404,
"step": 571000
},
{
"epoch": 2.11,
"learning_rate": 1.4822543117775234e-05,
"loss": 1.0359,
"step": 571500
},
{
"epoch": 2.11,
"learning_rate": 1.4791766690056753e-05,
"loss": 1.0466,
"step": 572000
},
{
"epoch": 2.11,
"learning_rate": 1.476099026233827e-05,
"loss": 1.0335,
"step": 572500
},
{
"epoch": 2.12,
"learning_rate": 1.4730213834619788e-05,
"loss": 1.0328,
"step": 573000
},
{
"epoch": 2.12,
"learning_rate": 1.4699437406901306e-05,
"loss": 1.0387,
"step": 573500
},
{
"epoch": 2.12,
"learning_rate": 1.4668660979182824e-05,
"loss": 1.0415,
"step": 574000
},
{
"epoch": 2.12,
"learning_rate": 1.4637884551464343e-05,
"loss": 1.0438,
"step": 574500
},
{
"epoch": 2.12,
"learning_rate": 1.460710812374586e-05,
"loss": 1.0379,
"step": 575000
},
{
"epoch": 2.13,
"learning_rate": 1.4576331696027379e-05,
"loss": 1.0424,
"step": 575500
},
{
"epoch": 2.13,
"learning_rate": 1.4545555268308898e-05,
"loss": 1.0308,
"step": 576000
},
{
"epoch": 2.13,
"learning_rate": 1.4514778840590415e-05,
"loss": 1.0334,
"step": 576500
},
{
"epoch": 2.13,
"learning_rate": 1.4484002412871934e-05,
"loss": 1.0349,
"step": 577000
},
{
"epoch": 2.13,
"learning_rate": 1.445322598515345e-05,
"loss": 1.0468,
"step": 577500
},
{
"epoch": 2.13,
"learning_rate": 1.442244955743497e-05,
"loss": 1.0394,
"step": 578000
},
{
"epoch": 2.14,
"learning_rate": 1.4391673129716488e-05,
"loss": 1.0469,
"step": 578500
},
{
"epoch": 2.14,
"learning_rate": 1.4360896701998005e-05,
"loss": 1.0423,
"step": 579000
},
{
"epoch": 2.14,
"learning_rate": 1.4330120274279524e-05,
"loss": 1.0297,
"step": 579500
},
{
"epoch": 2.14,
"learning_rate": 1.4299343846561041e-05,
"loss": 1.0411,
"step": 580000
},
{
"epoch": 2.14,
"learning_rate": 1.426856741884256e-05,
"loss": 1.0392,
"step": 580500
},
{
"epoch": 2.15,
"learning_rate": 1.4237790991124079e-05,
"loss": 1.0259,
"step": 581000
},
{
"epoch": 2.15,
"learning_rate": 1.4207014563405596e-05,
"loss": 1.035,
"step": 581500
},
{
"epoch": 2.15,
"learning_rate": 1.4176238135687115e-05,
"loss": 1.0395,
"step": 582000
},
{
"epoch": 2.15,
"learning_rate": 1.4145461707968632e-05,
"loss": 1.0358,
"step": 582500
},
{
"epoch": 2.15,
"learning_rate": 1.411468528025015e-05,
"loss": 1.0413,
"step": 583000
},
{
"epoch": 2.15,
"learning_rate": 1.408390885253167e-05,
"loss": 1.0329,
"step": 583500
},
{
"epoch": 2.16,
"learning_rate": 1.4053132424813186e-05,
"loss": 1.039,
"step": 584000
},
{
"epoch": 2.16,
"learning_rate": 1.4022355997094705e-05,
"loss": 1.0357,
"step": 584500
},
{
"epoch": 2.16,
"learning_rate": 1.3991579569376222e-05,
"loss": 1.0297,
"step": 585000
},
{
"epoch": 2.16,
"learning_rate": 1.3960803141657741e-05,
"loss": 1.0311,
"step": 585500
},
{
"epoch": 2.16,
"learning_rate": 1.393002671393926e-05,
"loss": 1.0412,
"step": 586000
},
{
"epoch": 2.17,
"learning_rate": 1.3899250286220777e-05,
"loss": 1.0364,
"step": 586500
},
{
"epoch": 2.17,
"learning_rate": 1.3868473858502296e-05,
"loss": 1.0375,
"step": 587000
},
{
"epoch": 2.17,
"learning_rate": 1.3837697430783813e-05,
"loss": 1.0295,
"step": 587500
},
{
"epoch": 2.17,
"learning_rate": 1.3806921003065332e-05,
"loss": 1.0396,
"step": 588000
},
{
"epoch": 2.17,
"learning_rate": 1.377614457534685e-05,
"loss": 1.0308,
"step": 588500
},
{
"epoch": 2.18,
"learning_rate": 1.3745368147628367e-05,
"loss": 1.0396,
"step": 589000
},
{
"epoch": 2.18,
"learning_rate": 1.3714591719909886e-05,
"loss": 1.0346,
"step": 589500
},
{
"epoch": 2.18,
"learning_rate": 1.3683815292191405e-05,
"loss": 1.0344,
"step": 590000
},
{
"epoch": 2.18,
"learning_rate": 1.3653038864472922e-05,
"loss": 1.0344,
"step": 590500
},
{
"epoch": 2.18,
"learning_rate": 1.3622262436754441e-05,
"loss": 1.0258,
"step": 591000
},
{
"epoch": 2.18,
"learning_rate": 1.3591486009035958e-05,
"loss": 1.0383,
"step": 591500
},
{
"epoch": 2.19,
"learning_rate": 1.3560709581317477e-05,
"loss": 1.0299,
"step": 592000
},
{
"epoch": 2.19,
"learning_rate": 1.3529933153598996e-05,
"loss": 1.0326,
"step": 592500
},
{
"epoch": 2.19,
"learning_rate": 1.3499156725880513e-05,
"loss": 1.0393,
"step": 593000
},
{
"epoch": 2.19,
"learning_rate": 1.3468380298162031e-05,
"loss": 1.0327,
"step": 593500
},
{
"epoch": 2.19,
"learning_rate": 1.3437603870443549e-05,
"loss": 1.0373,
"step": 594000
},
{
"epoch": 2.2,
"learning_rate": 1.3406827442725067e-05,
"loss": 1.038,
"step": 594500
},
{
"epoch": 2.2,
"learning_rate": 1.3376051015006586e-05,
"loss": 1.0346,
"step": 595000
},
{
"epoch": 2.2,
"learning_rate": 1.3345274587288107e-05,
"loss": 1.025,
"step": 595500
},
{
"epoch": 2.2,
"learning_rate": 1.3314498159569624e-05,
"loss": 1.0369,
"step": 596000
},
{
"epoch": 2.2,
"learning_rate": 1.3283721731851142e-05,
"loss": 1.027,
"step": 596500
},
{
"epoch": 2.2,
"learning_rate": 1.3252945304132661e-05,
"loss": 1.037,
"step": 597000
},
{
"epoch": 2.21,
"learning_rate": 1.3222168876414178e-05,
"loss": 1.0388,
"step": 597500
},
{
"epoch": 2.21,
"learning_rate": 1.3191392448695697e-05,
"loss": 1.0262,
"step": 598000
},
{
"epoch": 2.21,
"learning_rate": 1.3160616020977214e-05,
"loss": 1.0306,
"step": 598500
},
{
"epoch": 2.21,
"learning_rate": 1.3129839593258733e-05,
"loss": 1.0307,
"step": 599000
},
{
"epoch": 2.21,
"learning_rate": 1.3099063165540252e-05,
"loss": 1.0347,
"step": 599500
},
{
"epoch": 2.22,
"learning_rate": 1.3068286737821769e-05,
"loss": 1.0303,
"step": 600000
},
{
"epoch": 2.22,
"learning_rate": 1.3037510310103288e-05,
"loss": 1.0367,
"step": 600500
},
{
"epoch": 2.22,
"learning_rate": 1.3006733882384806e-05,
"loss": 1.0267,
"step": 601000
},
{
"epoch": 2.22,
"learning_rate": 1.2975957454666324e-05,
"loss": 1.0302,
"step": 601500
},
{
"epoch": 2.22,
"learning_rate": 1.2945181026947842e-05,
"loss": 1.033,
"step": 602000
},
{
"epoch": 2.23,
"learning_rate": 1.291440459922936e-05,
"loss": 1.0207,
"step": 602500
},
{
"epoch": 2.23,
"learning_rate": 1.2883628171510878e-05,
"loss": 1.0293,
"step": 603000
},
{
"epoch": 2.23,
"learning_rate": 1.2852851743792397e-05,
"loss": 1.0309,
"step": 603500
},
{
"epoch": 2.23,
"learning_rate": 1.2822075316073914e-05,
"loss": 1.04,
"step": 604000
},
{
"epoch": 2.23,
"learning_rate": 1.2791298888355433e-05,
"loss": 1.029,
"step": 604500
},
{
"epoch": 2.23,
"learning_rate": 1.276052246063695e-05,
"loss": 1.0284,
"step": 605000
},
{
"epoch": 2.24,
"learning_rate": 1.2729746032918469e-05,
"loss": 1.0186,
"step": 605500
},
{
"epoch": 2.24,
"learning_rate": 1.2698969605199987e-05,
"loss": 1.0257,
"step": 606000
},
{
"epoch": 2.24,
"learning_rate": 1.2668193177481505e-05,
"loss": 1.0252,
"step": 606500
},
{
"epoch": 2.24,
"learning_rate": 1.2637416749763023e-05,
"loss": 1.0336,
"step": 607000
},
{
"epoch": 2.24,
"learning_rate": 1.260664032204454e-05,
"loss": 1.0255,
"step": 607500
},
{
"epoch": 2.25,
"learning_rate": 1.257586389432606e-05,
"loss": 1.0264,
"step": 608000
},
{
"epoch": 2.25,
"learning_rate": 1.2545087466607578e-05,
"loss": 1.0306,
"step": 608500
},
{
"epoch": 2.25,
"learning_rate": 1.2514311038889095e-05,
"loss": 1.0351,
"step": 609000
},
{
"epoch": 2.25,
"learning_rate": 1.2483534611170612e-05,
"loss": 1.0356,
"step": 609500
},
{
"epoch": 2.25,
"learning_rate": 1.2452758183452131e-05,
"loss": 1.024,
"step": 610000
},
{
"epoch": 2.25,
"learning_rate": 1.2421981755733648e-05,
"loss": 1.028,
"step": 610500
},
{
"epoch": 2.26,
"learning_rate": 1.2391205328015167e-05,
"loss": 1.0263,
"step": 611000
},
{
"epoch": 2.26,
"learning_rate": 1.2360428900296684e-05,
"loss": 1.0299,
"step": 611500
},
{
"epoch": 2.26,
"learning_rate": 1.2329652472578203e-05,
"loss": 1.0258,
"step": 612000
},
{
"epoch": 2.26,
"learning_rate": 1.2298876044859722e-05,
"loss": 1.0265,
"step": 612500
},
{
"epoch": 2.26,
"learning_rate": 1.226809961714124e-05,
"loss": 1.0224,
"step": 613000
},
{
"epoch": 2.27,
"learning_rate": 1.2237323189422759e-05,
"loss": 1.0276,
"step": 613500
},
{
"epoch": 2.27,
"learning_rate": 1.2206546761704276e-05,
"loss": 1.0318,
"step": 614000
},
{
"epoch": 2.27,
"learning_rate": 1.2175770333985795e-05,
"loss": 1.0265,
"step": 614500
},
{
"epoch": 2.27,
"learning_rate": 1.2144993906267314e-05,
"loss": 1.0305,
"step": 615000
},
{
"epoch": 2.27,
"learning_rate": 1.211421747854883e-05,
"loss": 1.0289,
"step": 615500
},
{
"epoch": 2.27,
"learning_rate": 1.208344105083035e-05,
"loss": 1.0209,
"step": 616000
},
{
"epoch": 2.28,
"learning_rate": 1.2052664623111867e-05,
"loss": 1.0253,
"step": 616500
},
{
"epoch": 2.28,
"learning_rate": 1.2021888195393385e-05,
"loss": 1.0313,
"step": 617000
},
{
"epoch": 2.28,
"learning_rate": 1.1991111767674904e-05,
"loss": 1.025,
"step": 617500
},
{
"epoch": 2.28,
"learning_rate": 1.1960335339956421e-05,
"loss": 1.0258,
"step": 618000
},
{
"epoch": 2.28,
"learning_rate": 1.192955891223794e-05,
"loss": 1.016,
"step": 618500
},
{
"epoch": 2.29,
"learning_rate": 1.1898782484519457e-05,
"loss": 1.0291,
"step": 619000
},
{
"epoch": 2.29,
"learning_rate": 1.1868006056800976e-05,
"loss": 1.0263,
"step": 619500
},
{
"epoch": 2.29,
"learning_rate": 1.1837229629082495e-05,
"loss": 1.0274,
"step": 620000
},
{
"epoch": 2.29,
"learning_rate": 1.1806453201364012e-05,
"loss": 1.0261,
"step": 620500
},
{
"epoch": 2.29,
"learning_rate": 1.177567677364553e-05,
"loss": 1.0293,
"step": 621000
},
{
"epoch": 2.3,
"learning_rate": 1.1744900345927048e-05,
"loss": 1.0311,
"step": 621500
},
{
"epoch": 2.3,
"learning_rate": 1.1714123918208567e-05,
"loss": 1.0236,
"step": 622000
},
{
"epoch": 2.3,
"learning_rate": 1.1683347490490085e-05,
"loss": 1.0212,
"step": 622500
},
{
"epoch": 2.3,
"learning_rate": 1.1652571062771602e-05,
"loss": 1.0271,
"step": 623000
},
{
"epoch": 2.3,
"learning_rate": 1.1621794635053121e-05,
"loss": 1.018,
"step": 623500
},
{
"epoch": 2.3,
"learning_rate": 1.1591018207334638e-05,
"loss": 1.032,
"step": 624000
},
{
"epoch": 2.31,
"learning_rate": 1.1560241779616157e-05,
"loss": 1.0238,
"step": 624500
},
{
"epoch": 2.31,
"learning_rate": 1.1529465351897676e-05,
"loss": 1.0294,
"step": 625000
},
{
"epoch": 2.31,
"learning_rate": 1.1498688924179193e-05,
"loss": 1.0274,
"step": 625500
},
{
"epoch": 2.31,
"learning_rate": 1.1467912496460712e-05,
"loss": 1.0283,
"step": 626000
},
{
"epoch": 2.31,
"learning_rate": 1.1437136068742229e-05,
"loss": 1.0293,
"step": 626500
},
{
"epoch": 2.32,
"learning_rate": 1.1406359641023748e-05,
"loss": 1.023,
"step": 627000
},
{
"epoch": 2.32,
"learning_rate": 1.1375583213305266e-05,
"loss": 1.0227,
"step": 627500
},
{
"epoch": 2.32,
"learning_rate": 1.1344806785586783e-05,
"loss": 1.0293,
"step": 628000
},
{
"epoch": 2.32,
"learning_rate": 1.1314030357868302e-05,
"loss": 1.022,
"step": 628500
},
{
"epoch": 2.32,
"learning_rate": 1.1283253930149821e-05,
"loss": 1.0209,
"step": 629000
},
{
"epoch": 2.32,
"learning_rate": 1.1252477502431338e-05,
"loss": 1.0243,
"step": 629500
},
{
"epoch": 2.33,
"learning_rate": 1.1221701074712857e-05,
"loss": 1.0291,
"step": 630000
},
{
"epoch": 2.33,
"learning_rate": 1.1190924646994374e-05,
"loss": 1.0269,
"step": 630500
},
{
"epoch": 2.33,
"learning_rate": 1.1160148219275893e-05,
"loss": 1.0256,
"step": 631000
},
{
"epoch": 2.33,
"learning_rate": 1.1129371791557412e-05,
"loss": 1.0227,
"step": 631500
},
{
"epoch": 2.33,
"learning_rate": 1.1098595363838929e-05,
"loss": 1.0253,
"step": 632000
},
{
"epoch": 2.34,
"learning_rate": 1.1067818936120447e-05,
"loss": 1.0284,
"step": 632500
},
{
"epoch": 2.34,
"learning_rate": 1.1037042508401965e-05,
"loss": 1.0226,
"step": 633000
},
{
"epoch": 2.34,
"learning_rate": 1.1006266080683483e-05,
"loss": 1.0222,
"step": 633500
},
{
"epoch": 2.34,
"learning_rate": 1.0975489652965002e-05,
"loss": 1.0258,
"step": 634000
},
{
"epoch": 2.34,
"learning_rate": 1.094471322524652e-05,
"loss": 1.0206,
"step": 634500
},
{
"epoch": 2.35,
"learning_rate": 1.0913936797528038e-05,
"loss": 1.0256,
"step": 635000
},
{
"epoch": 2.35,
"learning_rate": 1.0883160369809555e-05,
"loss": 1.0255,
"step": 635500
},
{
"epoch": 2.35,
"learning_rate": 1.0852383942091074e-05,
"loss": 1.0245,
"step": 636000
},
{
"epoch": 2.35,
"learning_rate": 1.0821607514372593e-05,
"loss": 1.0236,
"step": 636500
},
{
"epoch": 2.35,
"learning_rate": 1.079083108665411e-05,
"loss": 1.0197,
"step": 637000
},
{
"epoch": 2.35,
"learning_rate": 1.0760054658935629e-05,
"loss": 1.0218,
"step": 637500
},
{
"epoch": 2.36,
"learning_rate": 1.0729278231217146e-05,
"loss": 1.0248,
"step": 638000
},
{
"epoch": 2.36,
"learning_rate": 1.0698501803498664e-05,
"loss": 1.0275,
"step": 638500
},
{
"epoch": 2.36,
"learning_rate": 1.0667725375780183e-05,
"loss": 1.0304,
"step": 639000
},
{
"epoch": 2.36,
"learning_rate": 1.06369489480617e-05,
"loss": 1.0205,
"step": 639500
},
{
"epoch": 2.36,
"learning_rate": 1.0606172520343219e-05,
"loss": 1.038,
"step": 640000
},
{
"epoch": 2.37,
"learning_rate": 1.0575396092624736e-05,
"loss": 1.0189,
"step": 640500
},
{
"epoch": 2.37,
"learning_rate": 1.0544619664906255e-05,
"loss": 1.0216,
"step": 641000
},
{
"epoch": 2.37,
"learning_rate": 1.0513843237187774e-05,
"loss": 1.0206,
"step": 641500
},
{
"epoch": 2.37,
"learning_rate": 1.048306680946929e-05,
"loss": 1.0216,
"step": 642000
},
{
"epoch": 2.37,
"learning_rate": 1.045229038175081e-05,
"loss": 1.0243,
"step": 642500
},
{
"epoch": 2.37,
"learning_rate": 1.0421513954032328e-05,
"loss": 1.015,
"step": 643000
},
{
"epoch": 2.38,
"learning_rate": 1.0390737526313845e-05,
"loss": 1.0225,
"step": 643500
},
{
"epoch": 2.38,
"learning_rate": 1.0359961098595364e-05,
"loss": 1.0217,
"step": 644000
},
{
"epoch": 2.38,
"learning_rate": 1.0329184670876881e-05,
"loss": 1.0163,
"step": 644500
},
{
"epoch": 2.38,
"learning_rate": 1.02984082431584e-05,
"loss": 1.0086,
"step": 645000
},
{
"epoch": 2.38,
"learning_rate": 1.0267631815439919e-05,
"loss": 1.0167,
"step": 645500
},
{
"epoch": 2.39,
"learning_rate": 1.0236855387721436e-05,
"loss": 1.0152,
"step": 646000
},
{
"epoch": 2.39,
"learning_rate": 1.0206078960002955e-05,
"loss": 1.0267,
"step": 646500
},
{
"epoch": 2.39,
"learning_rate": 1.0175302532284472e-05,
"loss": 1.0146,
"step": 647000
},
{
"epoch": 2.39,
"learning_rate": 1.014452610456599e-05,
"loss": 1.0155,
"step": 647500
},
{
"epoch": 2.39,
"learning_rate": 1.011374967684751e-05,
"loss": 1.0239,
"step": 648000
},
{
"epoch": 2.4,
"learning_rate": 1.0082973249129028e-05,
"loss": 1.0219,
"step": 648500
},
{
"epoch": 2.4,
"learning_rate": 1.0052196821410547e-05,
"loss": 1.0199,
"step": 649000
},
{
"epoch": 2.4,
"learning_rate": 1.0021420393692064e-05,
"loss": 1.0163,
"step": 649500
},
{
"epoch": 2.4,
"learning_rate": 9.990643965973583e-06,
"loss": 1.0213,
"step": 650000
},
{
"epoch": 2.4,
"learning_rate": 9.9598675382551e-06,
"loss": 1.0201,
"step": 650500
},
{
"epoch": 2.4,
"learning_rate": 9.929091110536619e-06,
"loss": 1.0217,
"step": 651000
},
{
"epoch": 2.41,
"learning_rate": 9.898314682818138e-06,
"loss": 1.0217,
"step": 651500
},
{
"epoch": 2.41,
"learning_rate": 9.867538255099655e-06,
"loss": 1.0152,
"step": 652000
},
{
"epoch": 2.41,
"learning_rate": 9.836761827381173e-06,
"loss": 1.0236,
"step": 652500
},
{
"epoch": 2.41,
"learning_rate": 9.80598539966269e-06,
"loss": 1.0183,
"step": 653000
},
{
"epoch": 2.41,
"learning_rate": 9.77520897194421e-06,
"loss": 1.0152,
"step": 653500
},
{
"epoch": 2.42,
"learning_rate": 9.744432544225728e-06,
"loss": 1.0205,
"step": 654000
},
{
"epoch": 2.42,
"learning_rate": 9.713656116507245e-06,
"loss": 1.0268,
"step": 654500
},
{
"epoch": 2.42,
"learning_rate": 9.682879688788764e-06,
"loss": 1.03,
"step": 655000
},
{
"epoch": 2.42,
"learning_rate": 9.652103261070283e-06,
"loss": 1.018,
"step": 655500
},
{
"epoch": 2.42,
"learning_rate": 9.6213268333518e-06,
"loss": 1.02,
"step": 656000
},
{
"epoch": 2.42,
"learning_rate": 9.590550405633319e-06,
"loss": 1.0198,
"step": 656500
},
{
"epoch": 2.43,
"learning_rate": 9.559773977914836e-06,
"loss": 1.0178,
"step": 657000
},
{
"epoch": 2.43,
"learning_rate": 9.528997550196354e-06,
"loss": 1.016,
"step": 657500
},
{
"epoch": 2.43,
"learning_rate": 9.498221122477873e-06,
"loss": 1.0213,
"step": 658000
},
{
"epoch": 2.43,
"learning_rate": 9.46744469475939e-06,
"loss": 1.0184,
"step": 658500
},
{
"epoch": 2.43,
"learning_rate": 9.436668267040909e-06,
"loss": 1.0244,
"step": 659000
},
{
"epoch": 2.44,
"learning_rate": 9.405891839322426e-06,
"loss": 1.0191,
"step": 659500
},
{
"epoch": 2.44,
"learning_rate": 9.375115411603945e-06,
"loss": 1.0232,
"step": 660000
},
{
"epoch": 2.44,
"learning_rate": 9.344338983885464e-06,
"loss": 1.0143,
"step": 660500
},
{
"epoch": 2.44,
"learning_rate": 9.313562556166981e-06,
"loss": 1.0198,
"step": 661000
},
{
"epoch": 2.44,
"learning_rate": 9.2827861284485e-06,
"loss": 1.0201,
"step": 661500
},
{
"epoch": 2.44,
"learning_rate": 9.252009700730017e-06,
"loss": 1.0174,
"step": 662000
},
{
"epoch": 2.45,
"learning_rate": 9.221233273011536e-06,
"loss": 1.0231,
"step": 662500
},
{
"epoch": 2.45,
"learning_rate": 9.190456845293054e-06,
"loss": 1.0119,
"step": 663000
},
{
"epoch": 2.45,
"learning_rate": 9.159680417574571e-06,
"loss": 1.0127,
"step": 663500
},
{
"epoch": 2.45,
"learning_rate": 9.12890398985609e-06,
"loss": 1.0168,
"step": 664000
},
{
"epoch": 2.45,
"learning_rate": 9.098127562137607e-06,
"loss": 1.0155,
"step": 664500
},
{
"epoch": 2.46,
"learning_rate": 9.067351134419126e-06,
"loss": 1.0175,
"step": 665000
},
{
"epoch": 2.46,
"learning_rate": 9.036574706700645e-06,
"loss": 1.0174,
"step": 665500
},
{
"epoch": 2.46,
"learning_rate": 9.005798278982162e-06,
"loss": 1.0173,
"step": 666000
},
{
"epoch": 2.46,
"learning_rate": 8.97502185126368e-06,
"loss": 1.0178,
"step": 666500
},
{
"epoch": 2.46,
"learning_rate": 8.944245423545198e-06,
"loss": 1.0124,
"step": 667000
},
{
"epoch": 2.47,
"learning_rate": 8.913468995826717e-06,
"loss": 1.0189,
"step": 667500
},
{
"epoch": 2.47,
"learning_rate": 8.882692568108235e-06,
"loss": 1.0231,
"step": 668000
},
{
"epoch": 2.47,
"learning_rate": 8.851916140389752e-06,
"loss": 1.0143,
"step": 668500
},
{
"epoch": 2.47,
"learning_rate": 8.821139712671271e-06,
"loss": 1.0153,
"step": 669000
},
{
"epoch": 2.47,
"learning_rate": 8.79036328495279e-06,
"loss": 1.0174,
"step": 669500
},
{
"epoch": 2.47,
"learning_rate": 8.759586857234307e-06,
"loss": 1.0158,
"step": 670000
},
{
"epoch": 2.48,
"learning_rate": 8.728810429515826e-06,
"loss": 1.0116,
"step": 670500
},
{
"epoch": 2.48,
"learning_rate": 8.698034001797343e-06,
"loss": 1.02,
"step": 671000
},
{
"epoch": 2.48,
"learning_rate": 8.667257574078862e-06,
"loss": 1.014,
"step": 671500
},
{
"epoch": 2.48,
"learning_rate": 8.63648114636038e-06,
"loss": 1.016,
"step": 672000
},
{
"epoch": 2.48,
"learning_rate": 8.605704718641898e-06,
"loss": 1.0173,
"step": 672500
},
{
"epoch": 2.49,
"learning_rate": 8.574928290923416e-06,
"loss": 1.0045,
"step": 673000
},
{
"epoch": 2.49,
"learning_rate": 8.544151863204934e-06,
"loss": 1.0185,
"step": 673500
},
{
"epoch": 2.49,
"learning_rate": 8.513375435486452e-06,
"loss": 1.0156,
"step": 674000
},
{
"epoch": 2.49,
"learning_rate": 8.482599007767971e-06,
"loss": 1.0106,
"step": 674500
},
{
"epoch": 2.49,
"learning_rate": 8.451822580049488e-06,
"loss": 1.0153,
"step": 675000
},
{
"epoch": 2.49,
"learning_rate": 8.421046152331007e-06,
"loss": 1.01,
"step": 675500
},
{
"epoch": 2.5,
"learning_rate": 8.390269724612524e-06,
"loss": 1.0161,
"step": 676000
},
{
"epoch": 2.5,
"learning_rate": 8.359493296894043e-06,
"loss": 1.0146,
"step": 676500
},
{
"epoch": 2.5,
"learning_rate": 8.328716869175562e-06,
"loss": 1.0152,
"step": 677000
},
{
"epoch": 2.5,
"learning_rate": 8.297940441457079e-06,
"loss": 1.0089,
"step": 677500
},
{
"epoch": 2.5,
"learning_rate": 8.267164013738598e-06,
"loss": 1.0139,
"step": 678000
},
{
"epoch": 2.51,
"learning_rate": 8.236387586020115e-06,
"loss": 1.0202,
"step": 678500
},
{
"epoch": 2.51,
"learning_rate": 8.205611158301633e-06,
"loss": 1.0082,
"step": 679000
},
{
"epoch": 2.51,
"learning_rate": 8.174834730583152e-06,
"loss": 1.0067,
"step": 679500
},
{
"epoch": 2.51,
"learning_rate": 8.14405830286467e-06,
"loss": 1.0156,
"step": 680000
},
{
"epoch": 2.51,
"learning_rate": 8.113281875146188e-06,
"loss": 1.0075,
"step": 680500
},
{
"epoch": 2.52,
"learning_rate": 8.082505447427705e-06,
"loss": 1.0108,
"step": 681000
},
{
"epoch": 2.52,
"learning_rate": 8.051729019709224e-06,
"loss": 1.0115,
"step": 681500
},
{
"epoch": 2.52,
"learning_rate": 8.020952591990743e-06,
"loss": 1.014,
"step": 682000
},
{
"epoch": 2.52,
"learning_rate": 7.99017616427226e-06,
"loss": 1.0147,
"step": 682500
},
{
"epoch": 2.52,
"learning_rate": 7.95939973655378e-06,
"loss": 1.006,
"step": 683000
},
{
"epoch": 2.52,
"learning_rate": 7.928623308835297e-06,
"loss": 1.016,
"step": 683500
},
{
"epoch": 2.53,
"learning_rate": 7.897846881116816e-06,
"loss": 1.025,
"step": 684000
},
{
"epoch": 2.53,
"learning_rate": 7.867070453398335e-06,
"loss": 1.0174,
"step": 684500
},
{
"epoch": 2.53,
"learning_rate": 7.836294025679852e-06,
"loss": 1.0158,
"step": 685000
},
{
"epoch": 2.53,
"learning_rate": 7.80551759796137e-06,
"loss": 1.0128,
"step": 685500
},
{
"epoch": 2.53,
"learning_rate": 7.774741170242888e-06,
"loss": 1.0142,
"step": 686000
},
{
"epoch": 2.54,
"learning_rate": 7.743964742524407e-06,
"loss": 1.0199,
"step": 686500
},
{
"epoch": 2.54,
"learning_rate": 7.713188314805925e-06,
"loss": 1.0173,
"step": 687000
},
{
"epoch": 2.54,
"learning_rate": 7.682411887087443e-06,
"loss": 1.0143,
"step": 687500
},
{
"epoch": 2.54,
"learning_rate": 7.651635459368961e-06,
"loss": 1.0135,
"step": 688000
},
{
"epoch": 2.54,
"learning_rate": 7.620859031650479e-06,
"loss": 1.0042,
"step": 688500
},
{
"epoch": 2.54,
"learning_rate": 7.590082603931997e-06,
"loss": 1.0161,
"step": 689000
},
{
"epoch": 2.55,
"learning_rate": 7.559306176213515e-06,
"loss": 1.01,
"step": 689500
},
{
"epoch": 2.55,
"learning_rate": 7.528529748495033e-06,
"loss": 1.0041,
"step": 690000
},
{
"epoch": 2.55,
"learning_rate": 7.497753320776552e-06,
"loss": 1.0185,
"step": 690500
},
{
"epoch": 2.55,
"learning_rate": 7.46697689305807e-06,
"loss": 1.0042,
"step": 691000
},
{
"epoch": 2.55,
"learning_rate": 7.436200465339588e-06,
"loss": 0.9985,
"step": 691500
},
{
"epoch": 2.56,
"learning_rate": 7.405424037621106e-06,
"loss": 1.0094,
"step": 692000
},
{
"epoch": 2.56,
"learning_rate": 7.3746476099026245e-06,
"loss": 1.0084,
"step": 692500
},
{
"epoch": 2.56,
"learning_rate": 7.343871182184142e-06,
"loss": 1.0106,
"step": 693000
},
{
"epoch": 2.56,
"learning_rate": 7.31309475446566e-06,
"loss": 1.0174,
"step": 693500
},
{
"epoch": 2.56,
"learning_rate": 7.282318326747178e-06,
"loss": 1.0137,
"step": 694000
},
{
"epoch": 2.56,
"learning_rate": 7.251541899028696e-06,
"loss": 1.0066,
"step": 694500
},
{
"epoch": 2.57,
"learning_rate": 7.220765471310215e-06,
"loss": 1.0081,
"step": 695000
},
{
"epoch": 2.57,
"learning_rate": 7.189989043591733e-06,
"loss": 1.0042,
"step": 695500
},
{
"epoch": 2.57,
"learning_rate": 7.159212615873251e-06,
"loss": 1.0139,
"step": 696000
},
{
"epoch": 2.57,
"learning_rate": 7.128436188154769e-06,
"loss": 1.0137,
"step": 696500
},
{
"epoch": 2.57,
"learning_rate": 7.097659760436287e-06,
"loss": 1.0135,
"step": 697000
},
{
"epoch": 2.58,
"learning_rate": 7.0668833327178055e-06,
"loss": 1.01,
"step": 697500
},
{
"epoch": 2.58,
"learning_rate": 7.0361069049993235e-06,
"loss": 1.0041,
"step": 698000
},
{
"epoch": 2.58,
"learning_rate": 7.005330477280841e-06,
"loss": 1.0142,
"step": 698500
},
{
"epoch": 2.58,
"learning_rate": 6.974554049562359e-06,
"loss": 1.0058,
"step": 699000
},
{
"epoch": 2.58,
"learning_rate": 6.943777621843878e-06,
"loss": 1.0086,
"step": 699500
},
{
"epoch": 2.59,
"learning_rate": 6.913001194125396e-06,
"loss": 1.0055,
"step": 700000
},
{
"epoch": 2.59,
"learning_rate": 6.882224766406914e-06,
"loss": 1.0148,
"step": 700500
},
{
"epoch": 2.59,
"learning_rate": 6.851448338688432e-06,
"loss": 1.004,
"step": 701000
},
{
"epoch": 2.59,
"learning_rate": 6.82067191096995e-06,
"loss": 1.0096,
"step": 701500
},
{
"epoch": 2.59,
"learning_rate": 6.789895483251469e-06,
"loss": 1.0043,
"step": 702000
},
{
"epoch": 2.59,
"learning_rate": 6.759119055532987e-06,
"loss": 1.0115,
"step": 702500
},
{
"epoch": 2.6,
"learning_rate": 6.7283426278145045e-06,
"loss": 1.0067,
"step": 703000
},
{
"epoch": 2.6,
"learning_rate": 6.6975662000960225e-06,
"loss": 1.0118,
"step": 703500
},
{
"epoch": 2.6,
"learning_rate": 6.66678977237754e-06,
"loss": 1.0056,
"step": 704000
},
{
"epoch": 2.6,
"learning_rate": 6.636013344659059e-06,
"loss": 1.0146,
"step": 704500
},
{
"epoch": 2.6,
"learning_rate": 6.605236916940577e-06,
"loss": 1.0111,
"step": 705000
},
{
"epoch": 2.61,
"learning_rate": 6.574460489222095e-06,
"loss": 1.0074,
"step": 705500
},
{
"epoch": 2.61,
"learning_rate": 6.543684061503613e-06,
"loss": 1.0116,
"step": 706000
},
{
"epoch": 2.61,
"learning_rate": 6.512907633785132e-06,
"loss": 1.0076,
"step": 706500
},
{
"epoch": 2.61,
"learning_rate": 6.48213120606665e-06,
"loss": 1.008,
"step": 707000
},
{
"epoch": 2.61,
"learning_rate": 6.451354778348168e-06,
"loss": 1.0123,
"step": 707500
},
{
"epoch": 2.61,
"learning_rate": 6.420578350629686e-06,
"loss": 1.0056,
"step": 708000
},
{
"epoch": 2.62,
"learning_rate": 6.3898019229112035e-06,
"loss": 1.0049,
"step": 708500
},
{
"epoch": 2.62,
"learning_rate": 6.359025495192722e-06,
"loss": 1.0028,
"step": 709000
},
{
"epoch": 2.62,
"learning_rate": 6.32824906747424e-06,
"loss": 1.0069,
"step": 709500
},
{
"epoch": 2.62,
"learning_rate": 6.297472639755758e-06,
"loss": 1.0091,
"step": 710000
},
{
"epoch": 2.62,
"learning_rate": 6.266696212037276e-06,
"loss": 1.0159,
"step": 710500
},
{
"epoch": 2.63,
"learning_rate": 6.235919784318795e-06,
"loss": 1.0079,
"step": 711000
},
{
"epoch": 2.63,
"learning_rate": 6.205143356600313e-06,
"loss": 1.0096,
"step": 711500
},
{
"epoch": 2.63,
"learning_rate": 6.174366928881832e-06,
"loss": 1.0024,
"step": 712000
},
{
"epoch": 2.63,
"learning_rate": 6.1435905011633496e-06,
"loss": 0.9978,
"step": 712500
},
{
"epoch": 2.63,
"learning_rate": 6.1128140734448675e-06,
"loss": 1.0059,
"step": 713000
},
{
"epoch": 2.64,
"learning_rate": 6.0820376457263854e-06,
"loss": 1.0076,
"step": 713500
},
{
"epoch": 2.64,
"learning_rate": 6.051261218007904e-06,
"loss": 1.0115,
"step": 714000
},
{
"epoch": 2.64,
"learning_rate": 6.020484790289422e-06,
"loss": 1.0101,
"step": 714500
},
{
"epoch": 2.64,
"learning_rate": 5.98970836257094e-06,
"loss": 1.011,
"step": 715000
},
{
"epoch": 2.64,
"learning_rate": 5.958931934852458e-06,
"loss": 1.0119,
"step": 715500
},
{
"epoch": 2.64,
"learning_rate": 5.928155507133976e-06,
"loss": 1.0122,
"step": 716000
},
{
"epoch": 2.65,
"learning_rate": 5.897379079415495e-06,
"loss": 1.0039,
"step": 716500
},
{
"epoch": 2.65,
"learning_rate": 5.866602651697013e-06,
"loss": 1.0031,
"step": 717000
},
{
"epoch": 2.65,
"learning_rate": 5.835826223978531e-06,
"loss": 1.0059,
"step": 717500
},
{
"epoch": 2.65,
"learning_rate": 5.8050497962600486e-06,
"loss": 1.0082,
"step": 718000
},
{
"epoch": 2.65,
"learning_rate": 5.7742733685415665e-06,
"loss": 1.0027,
"step": 718500
},
{
"epoch": 2.66,
"learning_rate": 5.743496940823085e-06,
"loss": 0.9952,
"step": 719000
},
{
"epoch": 2.66,
"learning_rate": 5.712720513104603e-06,
"loss": 1.0025,
"step": 719500
},
{
"epoch": 2.66,
"learning_rate": 5.681944085386121e-06,
"loss": 1.0005,
"step": 720000
},
{
"epoch": 2.66,
"learning_rate": 5.651167657667639e-06,
"loss": 1.0066,
"step": 720500
},
{
"epoch": 2.66,
"learning_rate": 5.620391229949158e-06,
"loss": 1.0004,
"step": 721000
},
{
"epoch": 2.66,
"learning_rate": 5.589614802230676e-06,
"loss": 1.0066,
"step": 721500
},
{
"epoch": 2.67,
"learning_rate": 5.558838374512194e-06,
"loss": 1.0082,
"step": 722000
},
{
"epoch": 2.67,
"learning_rate": 5.528061946793712e-06,
"loss": 1.0082,
"step": 722500
},
{
"epoch": 2.67,
"learning_rate": 5.49728551907523e-06,
"loss": 1.0108,
"step": 723000
},
{
"epoch": 2.67,
"learning_rate": 5.466509091356748e-06,
"loss": 1.0073,
"step": 723500
},
{
"epoch": 2.67,
"learning_rate": 5.435732663638266e-06,
"loss": 1.0071,
"step": 724000
},
{
"epoch": 2.68,
"learning_rate": 5.404956235919784e-06,
"loss": 1.0023,
"step": 724500
},
{
"epoch": 2.68,
"learning_rate": 5.374179808201302e-06,
"loss": 0.9992,
"step": 725000
},
{
"epoch": 2.68,
"learning_rate": 5.34340338048282e-06,
"loss": 1.0023,
"step": 725500
},
{
"epoch": 2.68,
"learning_rate": 5.312626952764339e-06,
"loss": 0.9983,
"step": 726000
},
{
"epoch": 2.68,
"learning_rate": 5.281850525045857e-06,
"loss": 1.0009,
"step": 726500
},
{
"epoch": 2.68,
"learning_rate": 5.251074097327376e-06,
"loss": 1.0027,
"step": 727000
},
{
"epoch": 2.69,
"learning_rate": 5.220297669608894e-06,
"loss": 1.0083,
"step": 727500
},
{
"epoch": 2.69,
"learning_rate": 5.1895212418904115e-06,
"loss": 1.0076,
"step": 728000
},
{
"epoch": 2.69,
"learning_rate": 5.15874481417193e-06,
"loss": 0.9977,
"step": 728500
},
{
"epoch": 2.69,
"learning_rate": 5.127968386453448e-06,
"loss": 1.0028,
"step": 729000
},
{
"epoch": 2.69,
"learning_rate": 5.097191958734966e-06,
"loss": 1.0072,
"step": 729500
},
{
"epoch": 2.7,
"learning_rate": 5.066415531016484e-06,
"loss": 1.0031,
"step": 730000
},
{
"epoch": 2.7,
"learning_rate": 5.035639103298002e-06,
"loss": 1.0051,
"step": 730500
},
{
"epoch": 2.7,
"learning_rate": 5.004862675579521e-06,
"loss": 1.0055,
"step": 731000
},
{
"epoch": 2.7,
"learning_rate": 4.974086247861039e-06,
"loss": 1.0034,
"step": 731500
},
{
"epoch": 2.7,
"learning_rate": 4.943309820142557e-06,
"loss": 1.0042,
"step": 732000
},
{
"epoch": 2.71,
"learning_rate": 4.912533392424075e-06,
"loss": 1.0039,
"step": 732500
},
{
"epoch": 2.71,
"learning_rate": 4.8817569647055934e-06,
"loss": 1.0033,
"step": 733000
},
{
"epoch": 2.71,
"learning_rate": 4.850980536987111e-06,
"loss": 0.9988,
"step": 733500
},
{
"epoch": 2.71,
"learning_rate": 4.820204109268629e-06,
"loss": 1.0052,
"step": 734000
},
{
"epoch": 2.71,
"learning_rate": 4.789427681550147e-06,
"loss": 0.9979,
"step": 734500
},
{
"epoch": 2.71,
"learning_rate": 4.758651253831665e-06,
"loss": 0.9971,
"step": 735000
},
{
"epoch": 2.72,
"learning_rate": 4.727874826113184e-06,
"loss": 1.0054,
"step": 735500
},
{
"epoch": 2.72,
"learning_rate": 4.697098398394702e-06,
"loss": 0.9994,
"step": 736000
},
{
"epoch": 2.72,
"learning_rate": 4.66632197067622e-06,
"loss": 0.9963,
"step": 736500
},
{
"epoch": 2.72,
"learning_rate": 4.635545542957738e-06,
"loss": 0.9992,
"step": 737000
},
{
"epoch": 2.72,
"learning_rate": 4.604769115239256e-06,
"loss": 1.0022,
"step": 737500
},
{
"epoch": 2.73,
"learning_rate": 4.5739926875207745e-06,
"loss": 1.0108,
"step": 738000
},
{
"epoch": 2.73,
"learning_rate": 4.5432162598022924e-06,
"loss": 0.9971,
"step": 738500
},
{
"epoch": 2.73,
"learning_rate": 4.51243983208381e-06,
"loss": 0.9978,
"step": 739000
},
{
"epoch": 2.73,
"learning_rate": 4.481663404365328e-06,
"loss": 1.0005,
"step": 739500
},
{
"epoch": 2.73,
"learning_rate": 4.450886976646847e-06,
"loss": 1.0058,
"step": 740000
},
{
"epoch": 2.73,
"learning_rate": 4.420110548928365e-06,
"loss": 0.9967,
"step": 740500
},
{
"epoch": 2.74,
"learning_rate": 4.389334121209883e-06,
"loss": 0.9919,
"step": 741000
},
{
"epoch": 2.74,
"learning_rate": 4.358557693491401e-06,
"loss": 1.0065,
"step": 741500
},
{
"epoch": 2.74,
"learning_rate": 4.327781265772919e-06,
"loss": 1.0024,
"step": 742000
},
{
"epoch": 2.74,
"learning_rate": 4.297004838054438e-06,
"loss": 0.9953,
"step": 742500
},
{
"epoch": 2.74,
"learning_rate": 4.2662284103359556e-06,
"loss": 0.9981,
"step": 743000
},
{
"epoch": 2.75,
"learning_rate": 4.2354519826174735e-06,
"loss": 0.9962,
"step": 743500
},
{
"epoch": 2.75,
"learning_rate": 4.2046755548989914e-06,
"loss": 1.0003,
"step": 744000
},
{
"epoch": 2.75,
"learning_rate": 4.17389912718051e-06,
"loss": 0.995,
"step": 744500
},
{
"epoch": 2.75,
"learning_rate": 4.143122699462028e-06,
"loss": 1.0064,
"step": 745000
},
{
"epoch": 2.75,
"learning_rate": 4.112346271743547e-06,
"loss": 1.001,
"step": 745500
},
{
"epoch": 2.76,
"learning_rate": 4.081569844025065e-06,
"loss": 0.9993,
"step": 746000
},
{
"epoch": 2.76,
"learning_rate": 4.050793416306583e-06,
"loss": 0.9941,
"step": 746500
},
{
"epoch": 2.76,
"learning_rate": 4.020016988588101e-06,
"loss": 1.0024,
"step": 747000
},
{
"epoch": 2.76,
"learning_rate": 3.9892405608696195e-06,
"loss": 1.0016,
"step": 747500
},
{
"epoch": 2.76,
"learning_rate": 3.9584641331511375e-06,
"loss": 1.004,
"step": 748000
},
{
"epoch": 2.76,
"learning_rate": 3.927687705432655e-06,
"loss": 1.0006,
"step": 748500
},
{
"epoch": 2.77,
"learning_rate": 3.896911277714173e-06,
"loss": 1.0003,
"step": 749000
},
{
"epoch": 2.77,
"learning_rate": 3.866134849995691e-06,
"loss": 0.9968,
"step": 749500
},
{
"epoch": 2.77,
"learning_rate": 3.83535842227721e-06,
"loss": 0.9979,
"step": 750000
},
{
"epoch": 2.77,
"learning_rate": 3.804581994558728e-06,
"loss": 1.0025,
"step": 750500
},
{
"epoch": 2.77,
"learning_rate": 3.773805566840246e-06,
"loss": 0.9947,
"step": 751000
},
{
"epoch": 2.78,
"learning_rate": 3.7430291391217643e-06,
"loss": 0.9975,
"step": 751500
},
{
"epoch": 2.78,
"learning_rate": 3.7122527114032822e-06,
"loss": 1.0006,
"step": 752000
},
{
"epoch": 2.78,
"learning_rate": 3.6814762836848e-06,
"loss": 0.9894,
"step": 752500
},
{
"epoch": 2.78,
"learning_rate": 3.6506998559663185e-06,
"loss": 1.0005,
"step": 753000
},
{
"epoch": 2.78,
"learning_rate": 3.6199234282478365e-06,
"loss": 1.0106,
"step": 753500
},
{
"epoch": 2.78,
"learning_rate": 3.589147000529355e-06,
"loss": 1.0023,
"step": 754000
},
{
"epoch": 2.79,
"learning_rate": 3.5583705728108728e-06,
"loss": 0.9934,
"step": 754500
},
{
"epoch": 2.79,
"learning_rate": 3.527594145092391e-06,
"loss": 0.9958,
"step": 755000
},
{
"epoch": 2.79,
"learning_rate": 3.496817717373909e-06,
"loss": 0.9978,
"step": 755500
},
{
"epoch": 2.79,
"learning_rate": 3.466041289655427e-06,
"loss": 0.9994,
"step": 756000
},
{
"epoch": 2.79,
"learning_rate": 3.4352648619369454e-06,
"loss": 1.0067,
"step": 756500
},
{
"epoch": 2.8,
"learning_rate": 3.4044884342184633e-06,
"loss": 0.9952,
"step": 757000
},
{
"epoch": 2.8,
"learning_rate": 3.3737120064999817e-06,
"loss": 1.0057,
"step": 757500
},
{
"epoch": 2.8,
"learning_rate": 3.3429355787814996e-06,
"loss": 0.9982,
"step": 758000
},
{
"epoch": 2.8,
"learning_rate": 3.312159151063018e-06,
"loss": 0.9944,
"step": 758500
},
{
"epoch": 2.8,
"learning_rate": 3.281382723344536e-06,
"loss": 0.9935,
"step": 759000
},
{
"epoch": 2.8,
"learning_rate": 3.250606295626054e-06,
"loss": 0.9997,
"step": 759500
},
{
"epoch": 2.81,
"learning_rate": 3.219829867907572e-06,
"loss": 0.9982,
"step": 760000
},
{
"epoch": 2.81,
"learning_rate": 3.18905344018909e-06,
"loss": 0.9938,
"step": 760500
},
{
"epoch": 2.81,
"learning_rate": 3.1582770124706085e-06,
"loss": 0.9998,
"step": 761000
},
{
"epoch": 2.81,
"learning_rate": 3.1275005847521264e-06,
"loss": 0.9983,
"step": 761500
},
{
"epoch": 2.81,
"learning_rate": 3.0967241570336448e-06,
"loss": 1.0039,
"step": 762000
},
{
"epoch": 2.82,
"learning_rate": 3.065947729315163e-06,
"loss": 0.9966,
"step": 762500
},
{
"epoch": 2.82,
"learning_rate": 3.035171301596681e-06,
"loss": 0.9901,
"step": 763000
},
{
"epoch": 2.82,
"learning_rate": 3.0043948738781994e-06,
"loss": 0.9968,
"step": 763500
},
{
"epoch": 2.82,
"learning_rate": 2.9736184461597174e-06,
"loss": 0.9932,
"step": 764000
},
{
"epoch": 2.82,
"learning_rate": 2.9428420184412353e-06,
"loss": 0.9969,
"step": 764500
},
{
"epoch": 2.83,
"learning_rate": 2.9120655907227537e-06,
"loss": 0.9881,
"step": 765000
},
{
"epoch": 2.83,
"learning_rate": 2.8812891630042716e-06,
"loss": 0.9991,
"step": 765500
},
{
"epoch": 2.83,
"learning_rate": 2.8505127352857904e-06,
"loss": 0.994,
"step": 766000
},
{
"epoch": 2.83,
"learning_rate": 2.8197363075673083e-06,
"loss": 0.9993,
"step": 766500
},
{
"epoch": 2.83,
"learning_rate": 2.7889598798488263e-06,
"loss": 0.987,
"step": 767000
},
{
"epoch": 2.83,
"learning_rate": 2.7581834521303446e-06,
"loss": 0.9931,
"step": 767500
},
{
"epoch": 2.84,
"learning_rate": 2.7274070244118626e-06,
"loss": 0.9928,
"step": 768000
},
{
"epoch": 2.84,
"learning_rate": 2.696630596693381e-06,
"loss": 0.9971,
"step": 768500
},
{
"epoch": 2.84,
"learning_rate": 2.665854168974899e-06,
"loss": 0.9889,
"step": 769000
},
{
"epoch": 2.84,
"learning_rate": 2.6350777412564172e-06,
"loss": 0.9953,
"step": 769500
},
{
"epoch": 2.84,
"learning_rate": 2.604301313537935e-06,
"loss": 0.9981,
"step": 770000
},
{
"epoch": 2.85,
"learning_rate": 2.573524885819453e-06,
"loss": 0.9927,
"step": 770500
},
{
"epoch": 2.85,
"learning_rate": 2.5427484581009715e-06,
"loss": 0.9945,
"step": 771000
},
{
"epoch": 2.85,
"learning_rate": 2.5119720303824894e-06,
"loss": 1.0024,
"step": 771500
},
{
"epoch": 2.85,
"learning_rate": 2.4811956026640078e-06,
"loss": 0.9978,
"step": 772000
},
{
"epoch": 2.85,
"learning_rate": 2.4504191749455257e-06,
"loss": 0.991,
"step": 772500
},
{
"epoch": 2.85,
"learning_rate": 2.419642747227044e-06,
"loss": 0.9995,
"step": 773000
},
{
"epoch": 2.86,
"learning_rate": 2.388866319508562e-06,
"loss": 1.0027,
"step": 773500
},
{
"epoch": 2.86,
"learning_rate": 2.35808989179008e-06,
"loss": 0.9949,
"step": 774000
},
{
"epoch": 2.86,
"learning_rate": 2.3273134640715983e-06,
"loss": 0.9938,
"step": 774500
},
{
"epoch": 2.86,
"learning_rate": 2.2965370363531166e-06,
"loss": 1.0029,
"step": 775000
},
{
"epoch": 2.86,
"learning_rate": 2.265760608634635e-06,
"loss": 0.9957,
"step": 775500
},
{
"epoch": 2.87,
"learning_rate": 2.234984180916153e-06,
"loss": 0.9978,
"step": 776000
},
{
"epoch": 2.87,
"learning_rate": 2.204207753197671e-06,
"loss": 0.9916,
"step": 776500
},
{
"epoch": 2.87,
"learning_rate": 2.1734313254791892e-06,
"loss": 0.9881,
"step": 777000
},
{
"epoch": 2.87,
"learning_rate": 2.142654897760707e-06,
"loss": 0.9957,
"step": 777500
},
{
"epoch": 2.87,
"learning_rate": 2.1118784700422255e-06,
"loss": 0.9952,
"step": 778000
},
{
"epoch": 2.88,
"learning_rate": 2.0811020423237435e-06,
"loss": 0.9965,
"step": 778500
},
{
"epoch": 2.88,
"learning_rate": 2.050325614605262e-06,
"loss": 0.9938,
"step": 779000
},
{
"epoch": 2.88,
"learning_rate": 2.0195491868867798e-06,
"loss": 0.9886,
"step": 779500
},
{
"epoch": 2.88,
"learning_rate": 1.9887727591682977e-06,
"loss": 0.9973,
"step": 780000
},
{
"epoch": 2.88,
"learning_rate": 1.957996331449816e-06,
"loss": 0.9928,
"step": 780500
},
{
"epoch": 2.88,
"learning_rate": 1.927219903731334e-06,
"loss": 0.9938,
"step": 781000
},
{
"epoch": 2.89,
"learning_rate": 1.8964434760128522e-06,
"loss": 1.0034,
"step": 781500
},
{
"epoch": 2.89,
"learning_rate": 1.8656670482943703e-06,
"loss": 0.9944,
"step": 782000
},
{
"epoch": 2.89,
"learning_rate": 1.8348906205758885e-06,
"loss": 0.995,
"step": 782500
},
{
"epoch": 2.89,
"learning_rate": 1.8041141928574066e-06,
"loss": 0.9901,
"step": 783000
},
{
"epoch": 2.89,
"learning_rate": 1.773337765138925e-06,
"loss": 0.9913,
"step": 783500
},
{
"epoch": 2.9,
"learning_rate": 1.7425613374204431e-06,
"loss": 1.0005,
"step": 784000
},
{
"epoch": 2.9,
"learning_rate": 1.7117849097019613e-06,
"loss": 1.003,
"step": 784500
},
{
"epoch": 2.9,
"learning_rate": 1.6810084819834794e-06,
"loss": 0.9944,
"step": 785000
},
{
"epoch": 2.9,
"learning_rate": 1.6502320542649976e-06,
"loss": 1.0032,
"step": 785500
},
{
"epoch": 2.9,
"learning_rate": 1.6194556265465157e-06,
"loss": 0.9896,
"step": 786000
},
{
"epoch": 2.9,
"learning_rate": 1.5886791988280339e-06,
"loss": 0.9901,
"step": 786500
},
{
"epoch": 2.91,
"learning_rate": 1.5579027711095518e-06,
"loss": 0.9902,
"step": 787000
},
{
"epoch": 2.91,
"learning_rate": 1.52712634339107e-06,
"loss": 0.9962,
"step": 787500
},
{
"epoch": 2.91,
"learning_rate": 1.496349915672588e-06,
"loss": 0.9987,
"step": 788000
},
{
"epoch": 2.91,
"learning_rate": 1.4655734879541062e-06,
"loss": 0.9993,
"step": 788500
},
{
"epoch": 2.91,
"learning_rate": 1.4347970602356244e-06,
"loss": 0.9912,
"step": 789000
},
{
"epoch": 2.92,
"learning_rate": 1.4040206325171425e-06,
"loss": 0.9937,
"step": 789500
},
{
"epoch": 2.92,
"learning_rate": 1.3732442047986607e-06,
"loss": 0.9925,
"step": 790000
},
{
"epoch": 2.92,
"learning_rate": 1.3424677770801788e-06,
"loss": 0.991,
"step": 790500
},
{
"epoch": 2.92,
"learning_rate": 1.311691349361697e-06,
"loss": 0.9939,
"step": 791000
},
{
"epoch": 2.92,
"learning_rate": 1.2809149216432151e-06,
"loss": 0.9964,
"step": 791500
},
{
"epoch": 2.92,
"learning_rate": 1.2501384939247333e-06,
"loss": 0.9929,
"step": 792000
},
{
"epoch": 2.93,
"learning_rate": 1.2193620662062514e-06,
"loss": 0.9886,
"step": 792500
},
{
"epoch": 2.93,
"learning_rate": 1.1885856384877696e-06,
"loss": 0.9947,
"step": 793000
},
{
"epoch": 2.93,
"learning_rate": 1.1578092107692875e-06,
"loss": 1.0,
"step": 793500
},
{
"epoch": 2.93,
"learning_rate": 1.1270327830508057e-06,
"loss": 0.9931,
"step": 794000
},
{
"epoch": 2.93,
"learning_rate": 1.096256355332324e-06,
"loss": 0.9967,
"step": 794500
},
{
"epoch": 2.94,
"learning_rate": 1.0654799276138422e-06,
"loss": 0.9944,
"step": 795000
},
{
"epoch": 2.94,
"learning_rate": 1.0347034998953603e-06,
"loss": 0.9877,
"step": 795500
},
{
"epoch": 2.94,
"learning_rate": 1.0039270721768783e-06,
"loss": 0.9924,
"step": 796000
},
{
"epoch": 2.94,
"learning_rate": 9.731506444583964e-07,
"loss": 0.9876,
"step": 796500
},
{
"epoch": 2.94,
"learning_rate": 9.423742167399145e-07,
"loss": 0.9878,
"step": 797000
},
{
"epoch": 2.95,
"learning_rate": 9.115977890214327e-07,
"loss": 0.9909,
"step": 797500
},
{
"epoch": 2.95,
"learning_rate": 8.808213613029508e-07,
"loss": 0.9948,
"step": 798000
},
{
"epoch": 2.95,
"learning_rate": 8.50044933584469e-07,
"loss": 0.9887,
"step": 798500
},
{
"epoch": 2.95,
"learning_rate": 8.192685058659872e-07,
"loss": 0.9965,
"step": 799000
},
{
"epoch": 2.95,
"learning_rate": 7.884920781475054e-07,
"loss": 0.9906,
"step": 799500
},
{
"epoch": 2.95,
"learning_rate": 7.577156504290234e-07,
"loss": 0.9947,
"step": 800000
},
{
"epoch": 2.96,
"learning_rate": 7.269392227105416e-07,
"loss": 0.9913,
"step": 800500
},
{
"epoch": 2.96,
"learning_rate": 6.961627949920597e-07,
"loss": 0.9915,
"step": 801000
},
{
"epoch": 2.96,
"learning_rate": 6.653863672735778e-07,
"loss": 0.9846,
"step": 801500
},
{
"epoch": 2.96,
"learning_rate": 6.346099395550959e-07,
"loss": 1.0002,
"step": 802000
},
{
"epoch": 2.96,
"learning_rate": 6.038335118366142e-07,
"loss": 0.99,
"step": 802500
},
{
"epoch": 2.97,
"learning_rate": 5.730570841181322e-07,
"loss": 1.0009,
"step": 803000
},
{
"epoch": 2.97,
"learning_rate": 5.422806563996504e-07,
"loss": 0.9913,
"step": 803500
},
{
"epoch": 2.97,
"learning_rate": 5.115042286811685e-07,
"loss": 0.9849,
"step": 804000
},
{
"epoch": 2.97,
"learning_rate": 4.807278009626867e-07,
"loss": 0.9922,
"step": 804500
},
{
"epoch": 2.97,
"learning_rate": 4.499513732442048e-07,
"loss": 0.9885,
"step": 805000
},
{
"epoch": 2.97,
"learning_rate": 4.1917494552572297e-07,
"loss": 0.9909,
"step": 805500
},
{
"epoch": 2.98,
"learning_rate": 3.883985178072411e-07,
"loss": 0.99,
"step": 806000
},
{
"epoch": 2.98,
"learning_rate": 3.576220900887592e-07,
"loss": 0.9906,
"step": 806500
},
{
"epoch": 2.98,
"learning_rate": 3.268456623702774e-07,
"loss": 0.9865,
"step": 807000
},
{
"epoch": 2.98,
"learning_rate": 2.960692346517955e-07,
"loss": 0.9871,
"step": 807500
},
{
"epoch": 2.98,
"learning_rate": 2.652928069333136e-07,
"loss": 0.9933,
"step": 808000
},
{
"epoch": 2.99,
"learning_rate": 2.3451637921483178e-07,
"loss": 0.9954,
"step": 808500
},
{
"epoch": 2.99,
"learning_rate": 2.037399514963499e-07,
"loss": 0.9935,
"step": 809000
},
{
"epoch": 2.99,
"learning_rate": 1.7296352377786808e-07,
"loss": 0.9843,
"step": 809500
},
{
"epoch": 2.99,
"learning_rate": 1.421870960593862e-07,
"loss": 0.9897,
"step": 810000
},
{
"epoch": 2.99,
"learning_rate": 1.1141066834090434e-07,
"loss": 0.9914,
"step": 810500
},
{
"epoch": 3.0,
"learning_rate": 8.063424062242248e-08,
"loss": 0.9921,
"step": 811000
},
{
"epoch": 3.0,
"learning_rate": 4.985781290394062e-08,
"loss": 0.9981,
"step": 811500
},
{
"epoch": 3.0,
"learning_rate": 1.9081385185458755e-08,
"loss": 1.0003,
"step": 812000
},
{
"epoch": 3.0,
"step": 812310,
"total_flos": 3.420875271615529e+18,
"train_loss": 1.1082924517030088,
"train_runtime": 412795.0235,
"train_samples_per_second": 31.485,
"train_steps_per_second": 1.968
}
],
"max_steps": 812310,
"num_train_epochs": 3,
"total_flos": 3.420875271615529e+18,
"trial_name": null,
"trial_params": null
}