indicner-oriya-finetuned / trainer_state.json
dheerajpai's picture
Upload 12 files
744ad18
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9992697108389406,
"eval_steps": 500,
"global_step": 358500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.993031591974624e-05,
"loss": 0.6149,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.986063183949248e-05,
"loss": 0.4578,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.9790947759238714e-05,
"loss": 0.4321,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.972126367898496e-05,
"loss": 0.4103,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.9651579598731195e-05,
"loss": 0.4003,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.958189551847743e-05,
"loss": 0.3793,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.951221143822367e-05,
"loss": 0.3678,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.944252735796991e-05,
"loss": 0.386,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.9372843277716144e-05,
"loss": 0.3704,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 4.930315919746239e-05,
"loss": 0.3623,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.9233475117208625e-05,
"loss": 0.3508,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.916379103695486e-05,
"loss": 0.3505,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.90941069567011e-05,
"loss": 0.3406,
"step": 6500
},
{
"epoch": 0.02,
"learning_rate": 4.902442287644734e-05,
"loss": 0.337,
"step": 7000
},
{
"epoch": 0.02,
"learning_rate": 4.895473879619358e-05,
"loss": 0.342,
"step": 7500
},
{
"epoch": 0.02,
"learning_rate": 4.888505471593982e-05,
"loss": 0.3322,
"step": 8000
},
{
"epoch": 0.02,
"learning_rate": 4.881537063568606e-05,
"loss": 0.3352,
"step": 8500
},
{
"epoch": 0.03,
"learning_rate": 4.87456865554323e-05,
"loss": 0.3293,
"step": 9000
},
{
"epoch": 0.03,
"learning_rate": 4.8676002475178536e-05,
"loss": 0.3249,
"step": 9500
},
{
"epoch": 0.03,
"learning_rate": 4.860631839492477e-05,
"loss": 0.3265,
"step": 10000
},
{
"epoch": 0.03,
"learning_rate": 4.853663431467101e-05,
"loss": 0.3125,
"step": 10500
},
{
"epoch": 0.03,
"learning_rate": 4.846695023441725e-05,
"loss": 0.3207,
"step": 11000
},
{
"epoch": 0.03,
"learning_rate": 4.8397266154163485e-05,
"loss": 0.3271,
"step": 11500
},
{
"epoch": 0.03,
"learning_rate": 4.832758207390973e-05,
"loss": 0.3187,
"step": 12000
},
{
"epoch": 0.03,
"learning_rate": 4.8257897993655966e-05,
"loss": 0.3163,
"step": 12500
},
{
"epoch": 0.04,
"learning_rate": 4.81882139134022e-05,
"loss": 0.3197,
"step": 13000
},
{
"epoch": 0.04,
"learning_rate": 4.811852983314844e-05,
"loss": 0.3132,
"step": 13500
},
{
"epoch": 0.04,
"learning_rate": 4.804884575289468e-05,
"loss": 0.306,
"step": 14000
},
{
"epoch": 0.04,
"learning_rate": 4.7979161672640915e-05,
"loss": 0.3053,
"step": 14500
},
{
"epoch": 0.04,
"learning_rate": 4.790947759238716e-05,
"loss": 0.3011,
"step": 15000
},
{
"epoch": 0.04,
"learning_rate": 4.7839793512133396e-05,
"loss": 0.3033,
"step": 15500
},
{
"epoch": 0.04,
"learning_rate": 4.777010943187963e-05,
"loss": 0.3103,
"step": 16000
},
{
"epoch": 0.05,
"learning_rate": 4.770042535162587e-05,
"loss": 0.3059,
"step": 16500
},
{
"epoch": 0.05,
"learning_rate": 4.763074127137211e-05,
"loss": 0.298,
"step": 17000
},
{
"epoch": 0.05,
"learning_rate": 4.7561057191118345e-05,
"loss": 0.2967,
"step": 17500
},
{
"epoch": 0.05,
"learning_rate": 4.749137311086458e-05,
"loss": 0.2844,
"step": 18000
},
{
"epoch": 0.05,
"learning_rate": 4.7421689030610826e-05,
"loss": 0.3001,
"step": 18500
},
{
"epoch": 0.05,
"learning_rate": 4.735200495035706e-05,
"loss": 0.3088,
"step": 19000
},
{
"epoch": 0.05,
"learning_rate": 4.72823208701033e-05,
"loss": 0.3079,
"step": 19500
},
{
"epoch": 0.06,
"learning_rate": 4.721263678984954e-05,
"loss": 0.2908,
"step": 20000
},
{
"epoch": 0.06,
"learning_rate": 4.714295270959578e-05,
"loss": 0.2879,
"step": 20500
},
{
"epoch": 0.06,
"learning_rate": 4.707326862934202e-05,
"loss": 0.2891,
"step": 21000
},
{
"epoch": 0.06,
"learning_rate": 4.7003584549088256e-05,
"loss": 0.3005,
"step": 21500
},
{
"epoch": 0.06,
"learning_rate": 4.69339004688345e-05,
"loss": 0.3008,
"step": 22000
},
{
"epoch": 0.06,
"learning_rate": 4.686421638858074e-05,
"loss": 0.2914,
"step": 22500
},
{
"epoch": 0.06,
"learning_rate": 4.6794532308326974e-05,
"loss": 0.2864,
"step": 23000
},
{
"epoch": 0.07,
"learning_rate": 4.672484822807321e-05,
"loss": 0.2885,
"step": 23500
},
{
"epoch": 0.07,
"learning_rate": 4.665516414781945e-05,
"loss": 0.289,
"step": 24000
},
{
"epoch": 0.07,
"learning_rate": 4.6585480067565685e-05,
"loss": 0.283,
"step": 24500
},
{
"epoch": 0.07,
"learning_rate": 4.651579598731193e-05,
"loss": 0.2917,
"step": 25000
},
{
"epoch": 0.07,
"learning_rate": 4.6446111907058167e-05,
"loss": 0.2895,
"step": 25500
},
{
"epoch": 0.07,
"learning_rate": 4.6376427826804404e-05,
"loss": 0.276,
"step": 26000
},
{
"epoch": 0.07,
"learning_rate": 4.630674374655064e-05,
"loss": 0.2847,
"step": 26500
},
{
"epoch": 0.08,
"learning_rate": 4.623705966629688e-05,
"loss": 0.2774,
"step": 27000
},
{
"epoch": 0.08,
"learning_rate": 4.6167375586043115e-05,
"loss": 0.283,
"step": 27500
},
{
"epoch": 0.08,
"learning_rate": 4.609769150578935e-05,
"loss": 0.2859,
"step": 28000
},
{
"epoch": 0.08,
"learning_rate": 4.6028007425535596e-05,
"loss": 0.2793,
"step": 28500
},
{
"epoch": 0.08,
"learning_rate": 4.5958323345281834e-05,
"loss": 0.2769,
"step": 29000
},
{
"epoch": 0.08,
"learning_rate": 4.588863926502807e-05,
"loss": 0.2763,
"step": 29500
},
{
"epoch": 0.08,
"learning_rate": 4.581895518477431e-05,
"loss": 0.2886,
"step": 30000
},
{
"epoch": 0.09,
"learning_rate": 4.5749271104520545e-05,
"loss": 0.2721,
"step": 30500
},
{
"epoch": 0.09,
"learning_rate": 4.567958702426678e-05,
"loss": 0.2805,
"step": 31000
},
{
"epoch": 0.09,
"learning_rate": 4.560990294401302e-05,
"loss": 0.2779,
"step": 31500
},
{
"epoch": 0.09,
"learning_rate": 4.5540218863759264e-05,
"loss": 0.2833,
"step": 32000
},
{
"epoch": 0.09,
"learning_rate": 4.54705347835055e-05,
"loss": 0.2881,
"step": 32500
},
{
"epoch": 0.09,
"learning_rate": 4.540085070325174e-05,
"loss": 0.2802,
"step": 33000
},
{
"epoch": 0.09,
"learning_rate": 4.5331166622997975e-05,
"loss": 0.2683,
"step": 33500
},
{
"epoch": 0.09,
"learning_rate": 4.526148254274422e-05,
"loss": 0.2732,
"step": 34000
},
{
"epoch": 0.1,
"learning_rate": 4.5191798462490456e-05,
"loss": 0.2773,
"step": 34500
},
{
"epoch": 0.1,
"learning_rate": 4.5122114382236693e-05,
"loss": 0.2791,
"step": 35000
},
{
"epoch": 0.1,
"learning_rate": 4.505243030198294e-05,
"loss": 0.2842,
"step": 35500
},
{
"epoch": 0.1,
"learning_rate": 4.4982746221729175e-05,
"loss": 0.278,
"step": 36000
},
{
"epoch": 0.1,
"learning_rate": 4.491306214147541e-05,
"loss": 0.2757,
"step": 36500
},
{
"epoch": 0.1,
"learning_rate": 4.484337806122165e-05,
"loss": 0.2741,
"step": 37000
},
{
"epoch": 0.1,
"learning_rate": 4.4773693980967886e-05,
"loss": 0.2708,
"step": 37500
},
{
"epoch": 0.11,
"learning_rate": 4.470400990071412e-05,
"loss": 0.2725,
"step": 38000
},
{
"epoch": 0.11,
"learning_rate": 4.463432582046037e-05,
"loss": 0.2735,
"step": 38500
},
{
"epoch": 0.11,
"learning_rate": 4.4564641740206604e-05,
"loss": 0.2735,
"step": 39000
},
{
"epoch": 0.11,
"learning_rate": 4.449495765995284e-05,
"loss": 0.2704,
"step": 39500
},
{
"epoch": 0.11,
"learning_rate": 4.442527357969908e-05,
"loss": 0.2757,
"step": 40000
},
{
"epoch": 0.11,
"learning_rate": 4.4355589499445316e-05,
"loss": 0.2851,
"step": 40500
},
{
"epoch": 0.11,
"learning_rate": 4.428590541919155e-05,
"loss": 0.2681,
"step": 41000
},
{
"epoch": 0.12,
"learning_rate": 4.421622133893779e-05,
"loss": 0.27,
"step": 41500
},
{
"epoch": 0.12,
"learning_rate": 4.4146537258684034e-05,
"loss": 0.2701,
"step": 42000
},
{
"epoch": 0.12,
"learning_rate": 4.407685317843027e-05,
"loss": 0.2648,
"step": 42500
},
{
"epoch": 0.12,
"learning_rate": 4.400716909817651e-05,
"loss": 0.2698,
"step": 43000
},
{
"epoch": 0.12,
"learning_rate": 4.3937485017922746e-05,
"loss": 0.2693,
"step": 43500
},
{
"epoch": 0.12,
"learning_rate": 4.386780093766898e-05,
"loss": 0.2665,
"step": 44000
},
{
"epoch": 0.12,
"learning_rate": 4.379811685741522e-05,
"loss": 0.2688,
"step": 44500
},
{
"epoch": 0.13,
"learning_rate": 4.3728432777161464e-05,
"loss": 0.2689,
"step": 45000
},
{
"epoch": 0.13,
"learning_rate": 4.36587486969077e-05,
"loss": 0.273,
"step": 45500
},
{
"epoch": 0.13,
"learning_rate": 4.358906461665394e-05,
"loss": 0.2717,
"step": 46000
},
{
"epoch": 0.13,
"learning_rate": 4.3519380536400176e-05,
"loss": 0.2691,
"step": 46500
},
{
"epoch": 0.13,
"learning_rate": 4.344969645614641e-05,
"loss": 0.2596,
"step": 47000
},
{
"epoch": 0.13,
"learning_rate": 4.338001237589266e-05,
"loss": 0.2639,
"step": 47500
},
{
"epoch": 0.13,
"learning_rate": 4.3310328295638894e-05,
"loss": 0.2724,
"step": 48000
},
{
"epoch": 0.14,
"learning_rate": 4.324064421538513e-05,
"loss": 0.2572,
"step": 48500
},
{
"epoch": 0.14,
"learning_rate": 4.3170960135131375e-05,
"loss": 0.2657,
"step": 49000
},
{
"epoch": 0.14,
"learning_rate": 4.310127605487761e-05,
"loss": 0.2681,
"step": 49500
},
{
"epoch": 0.14,
"learning_rate": 4.303159197462385e-05,
"loss": 0.2654,
"step": 50000
},
{
"epoch": 0.14,
"learning_rate": 4.296190789437009e-05,
"loss": 0.2655,
"step": 50500
},
{
"epoch": 0.14,
"learning_rate": 4.2892223814116324e-05,
"loss": 0.2658,
"step": 51000
},
{
"epoch": 0.14,
"learning_rate": 4.282253973386256e-05,
"loss": 0.2582,
"step": 51500
},
{
"epoch": 0.14,
"learning_rate": 4.2752855653608805e-05,
"loss": 0.2664,
"step": 52000
},
{
"epoch": 0.15,
"learning_rate": 4.268317157335504e-05,
"loss": 0.2508,
"step": 52500
},
{
"epoch": 0.15,
"learning_rate": 4.261348749310128e-05,
"loss": 0.2569,
"step": 53000
},
{
"epoch": 0.15,
"learning_rate": 4.254380341284752e-05,
"loss": 0.2592,
"step": 53500
},
{
"epoch": 0.15,
"learning_rate": 4.2474119332593754e-05,
"loss": 0.2645,
"step": 54000
},
{
"epoch": 0.15,
"learning_rate": 4.240443525233999e-05,
"loss": 0.2597,
"step": 54500
},
{
"epoch": 0.15,
"learning_rate": 4.2334751172086235e-05,
"loss": 0.2545,
"step": 55000
},
{
"epoch": 0.15,
"learning_rate": 4.226506709183247e-05,
"loss": 0.2652,
"step": 55500
},
{
"epoch": 0.16,
"learning_rate": 4.219538301157871e-05,
"loss": 0.2677,
"step": 56000
},
{
"epoch": 0.16,
"learning_rate": 4.2125698931324947e-05,
"loss": 0.2671,
"step": 56500
},
{
"epoch": 0.16,
"learning_rate": 4.2056014851071184e-05,
"loss": 0.2601,
"step": 57000
},
{
"epoch": 0.16,
"learning_rate": 4.198633077081742e-05,
"loss": 0.2728,
"step": 57500
},
{
"epoch": 0.16,
"learning_rate": 4.191664669056366e-05,
"loss": 0.2628,
"step": 58000
},
{
"epoch": 0.16,
"learning_rate": 4.18469626103099e-05,
"loss": 0.2628,
"step": 58500
},
{
"epoch": 0.16,
"learning_rate": 4.177727853005614e-05,
"loss": 0.2508,
"step": 59000
},
{
"epoch": 0.17,
"learning_rate": 4.1707594449802376e-05,
"loss": 0.2661,
"step": 59500
},
{
"epoch": 0.17,
"learning_rate": 4.1637910369548614e-05,
"loss": 0.2575,
"step": 60000
},
{
"epoch": 0.17,
"learning_rate": 4.156822628929485e-05,
"loss": 0.2537,
"step": 60500
},
{
"epoch": 0.17,
"learning_rate": 4.149854220904109e-05,
"loss": 0.2495,
"step": 61000
},
{
"epoch": 0.17,
"learning_rate": 4.142885812878733e-05,
"loss": 0.2564,
"step": 61500
},
{
"epoch": 0.17,
"learning_rate": 4.135917404853357e-05,
"loss": 0.2515,
"step": 62000
},
{
"epoch": 0.17,
"learning_rate": 4.128948996827981e-05,
"loss": 0.2582,
"step": 62500
},
{
"epoch": 0.18,
"learning_rate": 4.121980588802605e-05,
"loss": 0.266,
"step": 63000
},
{
"epoch": 0.18,
"learning_rate": 4.115012180777229e-05,
"loss": 0.2492,
"step": 63500
},
{
"epoch": 0.18,
"learning_rate": 4.1080437727518525e-05,
"loss": 0.2677,
"step": 64000
},
{
"epoch": 0.18,
"learning_rate": 4.101075364726476e-05,
"loss": 0.257,
"step": 64500
},
{
"epoch": 0.18,
"learning_rate": 4.0941069567011006e-05,
"loss": 0.2586,
"step": 65000
},
{
"epoch": 0.18,
"learning_rate": 4.087138548675724e-05,
"loss": 0.2571,
"step": 65500
},
{
"epoch": 0.18,
"learning_rate": 4.080170140650348e-05,
"loss": 0.2549,
"step": 66000
},
{
"epoch": 0.19,
"learning_rate": 4.073201732624972e-05,
"loss": 0.258,
"step": 66500
},
{
"epoch": 0.19,
"learning_rate": 4.0662333245995955e-05,
"loss": 0.2594,
"step": 67000
},
{
"epoch": 0.19,
"learning_rate": 4.059264916574219e-05,
"loss": 0.2564,
"step": 67500
},
{
"epoch": 0.19,
"learning_rate": 4.052296508548843e-05,
"loss": 0.2646,
"step": 68000
},
{
"epoch": 0.19,
"learning_rate": 4.045328100523467e-05,
"loss": 0.2577,
"step": 68500
},
{
"epoch": 0.19,
"learning_rate": 4.038359692498091e-05,
"loss": 0.2567,
"step": 69000
},
{
"epoch": 0.19,
"learning_rate": 4.031391284472715e-05,
"loss": 0.2518,
"step": 69500
},
{
"epoch": 0.2,
"learning_rate": 4.0244228764473384e-05,
"loss": 0.2541,
"step": 70000
},
{
"epoch": 0.2,
"learning_rate": 4.017454468421962e-05,
"loss": 0.2504,
"step": 70500
},
{
"epoch": 0.2,
"learning_rate": 4.010486060396586e-05,
"loss": 0.2595,
"step": 71000
},
{
"epoch": 0.2,
"learning_rate": 4.00351765237121e-05,
"loss": 0.2618,
"step": 71500
},
{
"epoch": 0.2,
"learning_rate": 3.996549244345834e-05,
"loss": 0.2604,
"step": 72000
},
{
"epoch": 0.2,
"learning_rate": 3.989580836320458e-05,
"loss": 0.2525,
"step": 72500
},
{
"epoch": 0.2,
"learning_rate": 3.9826124282950814e-05,
"loss": 0.2447,
"step": 73000
},
{
"epoch": 0.2,
"learning_rate": 3.975644020269705e-05,
"loss": 0.2535,
"step": 73500
},
{
"epoch": 0.21,
"learning_rate": 3.968675612244329e-05,
"loss": 0.256,
"step": 74000
},
{
"epoch": 0.21,
"learning_rate": 3.9617072042189526e-05,
"loss": 0.2475,
"step": 74500
},
{
"epoch": 0.21,
"learning_rate": 3.954738796193577e-05,
"loss": 0.2591,
"step": 75000
},
{
"epoch": 0.21,
"learning_rate": 3.947770388168201e-05,
"loss": 0.2464,
"step": 75500
},
{
"epoch": 0.21,
"learning_rate": 3.9408019801428244e-05,
"loss": 0.2535,
"step": 76000
},
{
"epoch": 0.21,
"learning_rate": 3.933833572117449e-05,
"loss": 0.2514,
"step": 76500
},
{
"epoch": 0.21,
"learning_rate": 3.9268651640920725e-05,
"loss": 0.2484,
"step": 77000
},
{
"epoch": 0.22,
"learning_rate": 3.919896756066696e-05,
"loss": 0.2483,
"step": 77500
},
{
"epoch": 0.22,
"learning_rate": 3.91292834804132e-05,
"loss": 0.2559,
"step": 78000
},
{
"epoch": 0.22,
"learning_rate": 3.9059599400159444e-05,
"loss": 0.2526,
"step": 78500
},
{
"epoch": 0.22,
"learning_rate": 3.898991531990568e-05,
"loss": 0.2537,
"step": 79000
},
{
"epoch": 0.22,
"learning_rate": 3.892023123965192e-05,
"loss": 0.2586,
"step": 79500
},
{
"epoch": 0.22,
"learning_rate": 3.8850547159398155e-05,
"loss": 0.2407,
"step": 80000
},
{
"epoch": 0.22,
"learning_rate": 3.878086307914439e-05,
"loss": 0.2518,
"step": 80500
},
{
"epoch": 0.23,
"learning_rate": 3.871117899889063e-05,
"loss": 0.254,
"step": 81000
},
{
"epoch": 0.23,
"learning_rate": 3.8641494918636874e-05,
"loss": 0.2517,
"step": 81500
},
{
"epoch": 0.23,
"learning_rate": 3.857181083838311e-05,
"loss": 0.248,
"step": 82000
},
{
"epoch": 0.23,
"learning_rate": 3.850212675812935e-05,
"loss": 0.2483,
"step": 82500
},
{
"epoch": 0.23,
"learning_rate": 3.8432442677875585e-05,
"loss": 0.2465,
"step": 83000
},
{
"epoch": 0.23,
"learning_rate": 3.836275859762182e-05,
"loss": 0.243,
"step": 83500
},
{
"epoch": 0.23,
"learning_rate": 3.829307451736806e-05,
"loss": 0.2518,
"step": 84000
},
{
"epoch": 0.24,
"learning_rate": 3.82233904371143e-05,
"loss": 0.2548,
"step": 84500
},
{
"epoch": 0.24,
"learning_rate": 3.815370635686054e-05,
"loss": 0.2492,
"step": 85000
},
{
"epoch": 0.24,
"learning_rate": 3.808402227660678e-05,
"loss": 0.2444,
"step": 85500
},
{
"epoch": 0.24,
"learning_rate": 3.8014338196353015e-05,
"loss": 0.2436,
"step": 86000
},
{
"epoch": 0.24,
"learning_rate": 3.794465411609925e-05,
"loss": 0.2494,
"step": 86500
},
{
"epoch": 0.24,
"learning_rate": 3.787497003584549e-05,
"loss": 0.2595,
"step": 87000
},
{
"epoch": 0.24,
"learning_rate": 3.7805285955591727e-05,
"loss": 0.2507,
"step": 87500
},
{
"epoch": 0.25,
"learning_rate": 3.7735601875337964e-05,
"loss": 0.2388,
"step": 88000
},
{
"epoch": 0.25,
"learning_rate": 3.766591779508421e-05,
"loss": 0.2455,
"step": 88500
},
{
"epoch": 0.25,
"learning_rate": 3.7596233714830445e-05,
"loss": 0.2364,
"step": 89000
},
{
"epoch": 0.25,
"learning_rate": 3.752654963457668e-05,
"loss": 0.2433,
"step": 89500
},
{
"epoch": 0.25,
"learning_rate": 3.7456865554322926e-05,
"loss": 0.2519,
"step": 90000
},
{
"epoch": 0.25,
"learning_rate": 3.738718147406916e-05,
"loss": 0.2495,
"step": 90500
},
{
"epoch": 0.25,
"learning_rate": 3.73174973938154e-05,
"loss": 0.2405,
"step": 91000
},
{
"epoch": 0.26,
"learning_rate": 3.7247813313561644e-05,
"loss": 0.2431,
"step": 91500
},
{
"epoch": 0.26,
"learning_rate": 3.717812923330788e-05,
"loss": 0.2474,
"step": 92000
},
{
"epoch": 0.26,
"learning_rate": 3.710844515305412e-05,
"loss": 0.2468,
"step": 92500
},
{
"epoch": 0.26,
"learning_rate": 3.7038761072800356e-05,
"loss": 0.2464,
"step": 93000
},
{
"epoch": 0.26,
"learning_rate": 3.696907699254659e-05,
"loss": 0.2464,
"step": 93500
},
{
"epoch": 0.26,
"learning_rate": 3.689939291229283e-05,
"loss": 0.2424,
"step": 94000
},
{
"epoch": 0.26,
"learning_rate": 3.682970883203907e-05,
"loss": 0.2379,
"step": 94500
},
{
"epoch": 0.26,
"learning_rate": 3.676002475178531e-05,
"loss": 0.2442,
"step": 95000
},
{
"epoch": 0.27,
"learning_rate": 3.669034067153155e-05,
"loss": 0.246,
"step": 95500
},
{
"epoch": 0.27,
"learning_rate": 3.6620656591277786e-05,
"loss": 0.2403,
"step": 96000
},
{
"epoch": 0.27,
"learning_rate": 3.655097251102402e-05,
"loss": 0.2429,
"step": 96500
},
{
"epoch": 0.27,
"learning_rate": 3.648128843077026e-05,
"loss": 0.2482,
"step": 97000
},
{
"epoch": 0.27,
"learning_rate": 3.64116043505165e-05,
"loss": 0.2432,
"step": 97500
},
{
"epoch": 0.27,
"learning_rate": 3.6341920270262735e-05,
"loss": 0.2374,
"step": 98000
},
{
"epoch": 0.27,
"learning_rate": 3.627223619000898e-05,
"loss": 0.2539,
"step": 98500
},
{
"epoch": 0.28,
"learning_rate": 3.6202552109755216e-05,
"loss": 0.2496,
"step": 99000
},
{
"epoch": 0.28,
"learning_rate": 3.613286802950145e-05,
"loss": 0.2447,
"step": 99500
},
{
"epoch": 0.28,
"learning_rate": 3.606318394924769e-05,
"loss": 0.2396,
"step": 100000
},
{
"epoch": 0.28,
"learning_rate": 3.599349986899393e-05,
"loss": 0.2472,
"step": 100500
},
{
"epoch": 0.28,
"learning_rate": 3.5923815788740164e-05,
"loss": 0.2431,
"step": 101000
},
{
"epoch": 0.28,
"learning_rate": 3.585413170848641e-05,
"loss": 0.2414,
"step": 101500
},
{
"epoch": 0.28,
"learning_rate": 3.5784447628232646e-05,
"loss": 0.2398,
"step": 102000
},
{
"epoch": 0.29,
"learning_rate": 3.571476354797888e-05,
"loss": 0.2415,
"step": 102500
},
{
"epoch": 0.29,
"learning_rate": 3.564507946772512e-05,
"loss": 0.2428,
"step": 103000
},
{
"epoch": 0.29,
"learning_rate": 3.5575395387471364e-05,
"loss": 0.2341,
"step": 103500
},
{
"epoch": 0.29,
"learning_rate": 3.55057113072176e-05,
"loss": 0.2461,
"step": 104000
},
{
"epoch": 0.29,
"learning_rate": 3.543602722696384e-05,
"loss": 0.2481,
"step": 104500
},
{
"epoch": 0.29,
"learning_rate": 3.536634314671008e-05,
"loss": 0.2409,
"step": 105000
},
{
"epoch": 0.29,
"learning_rate": 3.529665906645632e-05,
"loss": 0.2388,
"step": 105500
},
{
"epoch": 0.3,
"learning_rate": 3.522697498620256e-05,
"loss": 0.2427,
"step": 106000
},
{
"epoch": 0.3,
"learning_rate": 3.5157290905948794e-05,
"loss": 0.238,
"step": 106500
},
{
"epoch": 0.3,
"learning_rate": 3.508760682569503e-05,
"loss": 0.2453,
"step": 107000
},
{
"epoch": 0.3,
"learning_rate": 3.501792274544127e-05,
"loss": 0.245,
"step": 107500
},
{
"epoch": 0.3,
"learning_rate": 3.4948238665187505e-05,
"loss": 0.2395,
"step": 108000
},
{
"epoch": 0.3,
"learning_rate": 3.487855458493375e-05,
"loss": 0.2369,
"step": 108500
},
{
"epoch": 0.3,
"learning_rate": 3.4808870504679986e-05,
"loss": 0.2369,
"step": 109000
},
{
"epoch": 0.31,
"learning_rate": 3.4739186424426224e-05,
"loss": 0.2476,
"step": 109500
},
{
"epoch": 0.31,
"learning_rate": 3.466950234417246e-05,
"loss": 0.238,
"step": 110000
},
{
"epoch": 0.31,
"learning_rate": 3.45998182639187e-05,
"loss": 0.2417,
"step": 110500
},
{
"epoch": 0.31,
"learning_rate": 3.4530134183664935e-05,
"loss": 0.2433,
"step": 111000
},
{
"epoch": 0.31,
"learning_rate": 3.446045010341118e-05,
"loss": 0.2431,
"step": 111500
},
{
"epoch": 0.31,
"learning_rate": 3.4390766023157416e-05,
"loss": 0.2434,
"step": 112000
},
{
"epoch": 0.31,
"learning_rate": 3.4321081942903654e-05,
"loss": 0.2459,
"step": 112500
},
{
"epoch": 0.31,
"learning_rate": 3.425139786264989e-05,
"loss": 0.2464,
"step": 113000
},
{
"epoch": 0.32,
"learning_rate": 3.418171378239613e-05,
"loss": 0.2357,
"step": 113500
},
{
"epoch": 0.32,
"learning_rate": 3.4112029702142365e-05,
"loss": 0.2394,
"step": 114000
},
{
"epoch": 0.32,
"learning_rate": 3.40423456218886e-05,
"loss": 0.2404,
"step": 114500
},
{
"epoch": 0.32,
"learning_rate": 3.3972661541634846e-05,
"loss": 0.2333,
"step": 115000
},
{
"epoch": 0.32,
"learning_rate": 3.3902977461381083e-05,
"loss": 0.2323,
"step": 115500
},
{
"epoch": 0.32,
"learning_rate": 3.383329338112732e-05,
"loss": 0.2562,
"step": 116000
},
{
"epoch": 0.32,
"learning_rate": 3.376360930087356e-05,
"loss": 0.238,
"step": 116500
},
{
"epoch": 0.33,
"learning_rate": 3.36939252206198e-05,
"loss": 0.2409,
"step": 117000
},
{
"epoch": 0.33,
"learning_rate": 3.362424114036604e-05,
"loss": 0.2435,
"step": 117500
},
{
"epoch": 0.33,
"learning_rate": 3.3554557060112276e-05,
"loss": 0.2377,
"step": 118000
},
{
"epoch": 0.33,
"learning_rate": 3.348487297985852e-05,
"loss": 0.2287,
"step": 118500
},
{
"epoch": 0.33,
"learning_rate": 3.341518889960476e-05,
"loss": 0.2427,
"step": 119000
},
{
"epoch": 0.33,
"learning_rate": 3.3345504819350994e-05,
"loss": 0.2438,
"step": 119500
},
{
"epoch": 0.33,
"learning_rate": 3.327582073909723e-05,
"loss": 0.2447,
"step": 120000
},
{
"epoch": 0.34,
"learning_rate": 3.320613665884347e-05,
"loss": 0.2375,
"step": 120500
},
{
"epoch": 0.34,
"learning_rate": 3.3136452578589706e-05,
"loss": 0.2399,
"step": 121000
},
{
"epoch": 0.34,
"learning_rate": 3.306676849833595e-05,
"loss": 0.2416,
"step": 121500
},
{
"epoch": 0.34,
"learning_rate": 3.299708441808219e-05,
"loss": 0.2405,
"step": 122000
},
{
"epoch": 0.34,
"learning_rate": 3.2927400337828424e-05,
"loss": 0.2322,
"step": 122500
},
{
"epoch": 0.34,
"learning_rate": 3.285771625757466e-05,
"loss": 0.2337,
"step": 123000
},
{
"epoch": 0.34,
"learning_rate": 3.27880321773209e-05,
"loss": 0.2391,
"step": 123500
},
{
"epoch": 0.35,
"learning_rate": 3.2718348097067136e-05,
"loss": 0.239,
"step": 124000
},
{
"epoch": 0.35,
"learning_rate": 3.264866401681337e-05,
"loss": 0.2363,
"step": 124500
},
{
"epoch": 0.35,
"learning_rate": 3.257897993655962e-05,
"loss": 0.239,
"step": 125000
},
{
"epoch": 0.35,
"learning_rate": 3.2509295856305854e-05,
"loss": 0.243,
"step": 125500
},
{
"epoch": 0.35,
"learning_rate": 3.243961177605209e-05,
"loss": 0.2338,
"step": 126000
},
{
"epoch": 0.35,
"learning_rate": 3.236992769579833e-05,
"loss": 0.2386,
"step": 126500
},
{
"epoch": 0.35,
"learning_rate": 3.2300243615544566e-05,
"loss": 0.2339,
"step": 127000
},
{
"epoch": 0.36,
"learning_rate": 3.22305595352908e-05,
"loss": 0.2372,
"step": 127500
},
{
"epoch": 0.36,
"learning_rate": 3.216087545503705e-05,
"loss": 0.2368,
"step": 128000
},
{
"epoch": 0.36,
"learning_rate": 3.2091191374783284e-05,
"loss": 0.2344,
"step": 128500
},
{
"epoch": 0.36,
"learning_rate": 3.202150729452952e-05,
"loss": 0.2402,
"step": 129000
},
{
"epoch": 0.36,
"learning_rate": 3.195182321427576e-05,
"loss": 0.2352,
"step": 129500
},
{
"epoch": 0.36,
"learning_rate": 3.1882139134021996e-05,
"loss": 0.2358,
"step": 130000
},
{
"epoch": 0.36,
"learning_rate": 3.181245505376824e-05,
"loss": 0.2431,
"step": 130500
},
{
"epoch": 0.37,
"learning_rate": 3.174277097351448e-05,
"loss": 0.2312,
"step": 131000
},
{
"epoch": 0.37,
"learning_rate": 3.1673086893260714e-05,
"loss": 0.2384,
"step": 131500
},
{
"epoch": 0.37,
"learning_rate": 3.160340281300696e-05,
"loss": 0.2352,
"step": 132000
},
{
"epoch": 0.37,
"learning_rate": 3.1533718732753195e-05,
"loss": 0.236,
"step": 132500
},
{
"epoch": 0.37,
"learning_rate": 3.146403465249943e-05,
"loss": 0.2491,
"step": 133000
},
{
"epoch": 0.37,
"learning_rate": 3.139435057224567e-05,
"loss": 0.2431,
"step": 133500
},
{
"epoch": 0.37,
"learning_rate": 3.132466649199191e-05,
"loss": 0.2351,
"step": 134000
},
{
"epoch": 0.37,
"learning_rate": 3.1254982411738144e-05,
"loss": 0.2347,
"step": 134500
},
{
"epoch": 0.38,
"learning_rate": 3.118529833148439e-05,
"loss": 0.2349,
"step": 135000
},
{
"epoch": 0.38,
"learning_rate": 3.1115614251230625e-05,
"loss": 0.2296,
"step": 135500
},
{
"epoch": 0.38,
"learning_rate": 3.104593017097686e-05,
"loss": 0.2408,
"step": 136000
},
{
"epoch": 0.38,
"learning_rate": 3.09762460907231e-05,
"loss": 0.2287,
"step": 136500
},
{
"epoch": 0.38,
"learning_rate": 3.0906562010469337e-05,
"loss": 0.2327,
"step": 137000
},
{
"epoch": 0.38,
"learning_rate": 3.0836877930215574e-05,
"loss": 0.2329,
"step": 137500
},
{
"epoch": 0.38,
"learning_rate": 3.076719384996182e-05,
"loss": 0.2401,
"step": 138000
},
{
"epoch": 0.39,
"learning_rate": 3.0697509769708055e-05,
"loss": 0.2366,
"step": 138500
},
{
"epoch": 0.39,
"learning_rate": 3.062782568945429e-05,
"loss": 0.2351,
"step": 139000
},
{
"epoch": 0.39,
"learning_rate": 3.055814160920053e-05,
"loss": 0.236,
"step": 139500
},
{
"epoch": 0.39,
"learning_rate": 3.0488457528946766e-05,
"loss": 0.2282,
"step": 140000
},
{
"epoch": 0.39,
"learning_rate": 3.0418773448693007e-05,
"loss": 0.2339,
"step": 140500
},
{
"epoch": 0.39,
"learning_rate": 3.0349089368439244e-05,
"loss": 0.2336,
"step": 141000
},
{
"epoch": 0.39,
"learning_rate": 3.0279405288185485e-05,
"loss": 0.2384,
"step": 141500
},
{
"epoch": 0.4,
"learning_rate": 3.0209721207931725e-05,
"loss": 0.2247,
"step": 142000
},
{
"epoch": 0.4,
"learning_rate": 3.0140037127677963e-05,
"loss": 0.2345,
"step": 142500
},
{
"epoch": 0.4,
"learning_rate": 3.00703530474242e-05,
"loss": 0.2296,
"step": 143000
},
{
"epoch": 0.4,
"learning_rate": 3.0000668967170437e-05,
"loss": 0.2319,
"step": 143500
},
{
"epoch": 0.4,
"learning_rate": 2.9930984886916674e-05,
"loss": 0.2454,
"step": 144000
},
{
"epoch": 0.4,
"learning_rate": 2.986130080666291e-05,
"loss": 0.234,
"step": 144500
},
{
"epoch": 0.4,
"learning_rate": 2.9791616726409155e-05,
"loss": 0.2305,
"step": 145000
},
{
"epoch": 0.41,
"learning_rate": 2.9721932646155392e-05,
"loss": 0.2346,
"step": 145500
},
{
"epoch": 0.41,
"learning_rate": 2.965224856590163e-05,
"loss": 0.2336,
"step": 146000
},
{
"epoch": 0.41,
"learning_rate": 2.9582564485647867e-05,
"loss": 0.2312,
"step": 146500
},
{
"epoch": 0.41,
"learning_rate": 2.9512880405394104e-05,
"loss": 0.2349,
"step": 147000
},
{
"epoch": 0.41,
"learning_rate": 2.9443196325140345e-05,
"loss": 0.2343,
"step": 147500
},
{
"epoch": 0.41,
"learning_rate": 2.9373512244886585e-05,
"loss": 0.23,
"step": 148000
},
{
"epoch": 0.41,
"learning_rate": 2.9303828164632822e-05,
"loss": 0.2221,
"step": 148500
},
{
"epoch": 0.42,
"learning_rate": 2.9234144084379063e-05,
"loss": 0.2368,
"step": 149000
},
{
"epoch": 0.42,
"learning_rate": 2.91644600041253e-05,
"loss": 0.2262,
"step": 149500
},
{
"epoch": 0.42,
"learning_rate": 2.9094775923871537e-05,
"loss": 0.2276,
"step": 150000
},
{
"epoch": 0.42,
"learning_rate": 2.9025091843617774e-05,
"loss": 0.2368,
"step": 150500
},
{
"epoch": 0.42,
"learning_rate": 2.895540776336401e-05,
"loss": 0.2373,
"step": 151000
},
{
"epoch": 0.42,
"learning_rate": 2.8885723683110256e-05,
"loss": 0.2397,
"step": 151500
},
{
"epoch": 0.42,
"learning_rate": 2.8816039602856493e-05,
"loss": 0.2328,
"step": 152000
},
{
"epoch": 0.43,
"learning_rate": 2.874635552260273e-05,
"loss": 0.2284,
"step": 152500
},
{
"epoch": 0.43,
"learning_rate": 2.8676671442348967e-05,
"loss": 0.229,
"step": 153000
},
{
"epoch": 0.43,
"learning_rate": 2.8606987362095204e-05,
"loss": 0.228,
"step": 153500
},
{
"epoch": 0.43,
"learning_rate": 2.853730328184144e-05,
"loss": 0.2335,
"step": 154000
},
{
"epoch": 0.43,
"learning_rate": 2.8467619201587682e-05,
"loss": 0.2281,
"step": 154500
},
{
"epoch": 0.43,
"learning_rate": 2.8397935121333923e-05,
"loss": 0.2329,
"step": 155000
},
{
"epoch": 0.43,
"learning_rate": 2.8328251041080163e-05,
"loss": 0.2294,
"step": 155500
},
{
"epoch": 0.43,
"learning_rate": 2.82585669608264e-05,
"loss": 0.2292,
"step": 156000
},
{
"epoch": 0.44,
"learning_rate": 2.8188882880572638e-05,
"loss": 0.23,
"step": 156500
},
{
"epoch": 0.44,
"learning_rate": 2.8119198800318875e-05,
"loss": 0.2255,
"step": 157000
},
{
"epoch": 0.44,
"learning_rate": 2.8049514720065112e-05,
"loss": 0.235,
"step": 157500
},
{
"epoch": 0.44,
"learning_rate": 2.7979830639811356e-05,
"loss": 0.2368,
"step": 158000
},
{
"epoch": 0.44,
"learning_rate": 2.7910146559557593e-05,
"loss": 0.2277,
"step": 158500
},
{
"epoch": 0.44,
"learning_rate": 2.784046247930383e-05,
"loss": 0.2381,
"step": 159000
},
{
"epoch": 0.44,
"learning_rate": 2.7770778399050068e-05,
"loss": 0.2302,
"step": 159500
},
{
"epoch": 0.45,
"learning_rate": 2.7701094318796305e-05,
"loss": 0.225,
"step": 160000
},
{
"epoch": 0.45,
"learning_rate": 2.7631410238542542e-05,
"loss": 0.227,
"step": 160500
},
{
"epoch": 0.45,
"learning_rate": 2.7561726158288782e-05,
"loss": 0.2362,
"step": 161000
},
{
"epoch": 0.45,
"learning_rate": 2.7492042078035023e-05,
"loss": 0.2283,
"step": 161500
},
{
"epoch": 0.45,
"learning_rate": 2.742235799778126e-05,
"loss": 0.2201,
"step": 162000
},
{
"epoch": 0.45,
"learning_rate": 2.73526739175275e-05,
"loss": 0.2327,
"step": 162500
},
{
"epoch": 0.45,
"learning_rate": 2.7282989837273738e-05,
"loss": 0.2236,
"step": 163000
},
{
"epoch": 0.46,
"learning_rate": 2.7213305757019975e-05,
"loss": 0.2362,
"step": 163500
},
{
"epoch": 0.46,
"learning_rate": 2.7143621676766212e-05,
"loss": 0.2282,
"step": 164000
},
{
"epoch": 0.46,
"learning_rate": 2.707393759651245e-05,
"loss": 0.2388,
"step": 164500
},
{
"epoch": 0.46,
"learning_rate": 2.7004253516258693e-05,
"loss": 0.2274,
"step": 165000
},
{
"epoch": 0.46,
"learning_rate": 2.693456943600493e-05,
"loss": 0.2294,
"step": 165500
},
{
"epoch": 0.46,
"learning_rate": 2.6864885355751168e-05,
"loss": 0.2206,
"step": 166000
},
{
"epoch": 0.46,
"learning_rate": 2.6795201275497405e-05,
"loss": 0.2253,
"step": 166500
},
{
"epoch": 0.47,
"learning_rate": 2.6725517195243642e-05,
"loss": 0.2387,
"step": 167000
},
{
"epoch": 0.47,
"learning_rate": 2.665583311498988e-05,
"loss": 0.232,
"step": 167500
},
{
"epoch": 0.47,
"learning_rate": 2.6586149034736123e-05,
"loss": 0.2296,
"step": 168000
},
{
"epoch": 0.47,
"learning_rate": 2.651646495448236e-05,
"loss": 0.2168,
"step": 168500
},
{
"epoch": 0.47,
"learning_rate": 2.6446780874228598e-05,
"loss": 0.2217,
"step": 169000
},
{
"epoch": 0.47,
"learning_rate": 2.637709679397484e-05,
"loss": 0.2333,
"step": 169500
},
{
"epoch": 0.47,
"learning_rate": 2.6307412713721076e-05,
"loss": 0.2269,
"step": 170000
},
{
"epoch": 0.48,
"learning_rate": 2.6237728633467313e-05,
"loss": 0.2326,
"step": 170500
},
{
"epoch": 0.48,
"learning_rate": 2.616804455321355e-05,
"loss": 0.2262,
"step": 171000
},
{
"epoch": 0.48,
"learning_rate": 2.6098360472959794e-05,
"loss": 0.2245,
"step": 171500
},
{
"epoch": 0.48,
"learning_rate": 2.602867639270603e-05,
"loss": 0.2269,
"step": 172000
},
{
"epoch": 0.48,
"learning_rate": 2.5958992312452268e-05,
"loss": 0.2313,
"step": 172500
},
{
"epoch": 0.48,
"learning_rate": 2.5889308232198505e-05,
"loss": 0.2239,
"step": 173000
},
{
"epoch": 0.48,
"learning_rate": 2.5819624151944743e-05,
"loss": 0.2328,
"step": 173500
},
{
"epoch": 0.49,
"learning_rate": 2.574994007169098e-05,
"loss": 0.2284,
"step": 174000
},
{
"epoch": 0.49,
"learning_rate": 2.568025599143722e-05,
"loss": 0.2314,
"step": 174500
},
{
"epoch": 0.49,
"learning_rate": 2.561057191118346e-05,
"loss": 0.2284,
"step": 175000
},
{
"epoch": 0.49,
"learning_rate": 2.5540887830929698e-05,
"loss": 0.216,
"step": 175500
},
{
"epoch": 0.49,
"learning_rate": 2.547120375067594e-05,
"loss": 0.2232,
"step": 176000
},
{
"epoch": 0.49,
"learning_rate": 2.5401519670422176e-05,
"loss": 0.2267,
"step": 176500
},
{
"epoch": 0.49,
"learning_rate": 2.5331835590168413e-05,
"loss": 0.227,
"step": 177000
},
{
"epoch": 0.49,
"learning_rate": 2.526215150991465e-05,
"loss": 0.2336,
"step": 177500
},
{
"epoch": 0.5,
"learning_rate": 2.5192467429660894e-05,
"loss": 0.2279,
"step": 178000
},
{
"epoch": 0.5,
"learning_rate": 2.512278334940713e-05,
"loss": 0.2236,
"step": 178500
},
{
"epoch": 0.5,
"learning_rate": 2.505309926915337e-05,
"loss": 0.2309,
"step": 179000
},
{
"epoch": 0.5,
"learning_rate": 2.4983415188899606e-05,
"loss": 0.2229,
"step": 179500
},
{
"epoch": 0.5,
"learning_rate": 2.4913731108645843e-05,
"loss": 0.2181,
"step": 180000
},
{
"epoch": 0.5,
"learning_rate": 2.4844047028392083e-05,
"loss": 0.2312,
"step": 180500
},
{
"epoch": 0.5,
"learning_rate": 2.477436294813832e-05,
"loss": 0.2264,
"step": 181000
},
{
"epoch": 0.51,
"learning_rate": 2.4704678867884558e-05,
"loss": 0.2313,
"step": 181500
},
{
"epoch": 0.51,
"learning_rate": 2.46349947876308e-05,
"loss": 0.2267,
"step": 182000
},
{
"epoch": 0.51,
"learning_rate": 2.4565310707377036e-05,
"loss": 0.234,
"step": 182500
},
{
"epoch": 0.51,
"learning_rate": 2.4495626627123276e-05,
"loss": 0.2303,
"step": 183000
},
{
"epoch": 0.51,
"learning_rate": 2.4425942546869513e-05,
"loss": 0.2201,
"step": 183500
},
{
"epoch": 0.51,
"learning_rate": 2.4356258466615754e-05,
"loss": 0.23,
"step": 184000
},
{
"epoch": 0.51,
"learning_rate": 2.428657438636199e-05,
"loss": 0.2192,
"step": 184500
},
{
"epoch": 0.52,
"learning_rate": 2.421689030610823e-05,
"loss": 0.2319,
"step": 185000
},
{
"epoch": 0.52,
"learning_rate": 2.414720622585447e-05,
"loss": 0.226,
"step": 185500
},
{
"epoch": 0.52,
"learning_rate": 2.4077522145600706e-05,
"loss": 0.232,
"step": 186000
},
{
"epoch": 0.52,
"learning_rate": 2.4007838065346943e-05,
"loss": 0.2327,
"step": 186500
},
{
"epoch": 0.52,
"learning_rate": 2.3938153985093184e-05,
"loss": 0.2231,
"step": 187000
},
{
"epoch": 0.52,
"learning_rate": 2.386846990483942e-05,
"loss": 0.22,
"step": 187500
},
{
"epoch": 0.52,
"learning_rate": 2.3798785824585658e-05,
"loss": 0.2243,
"step": 188000
},
{
"epoch": 0.53,
"learning_rate": 2.3729101744331895e-05,
"loss": 0.2287,
"step": 188500
},
{
"epoch": 0.53,
"learning_rate": 2.3659417664078136e-05,
"loss": 0.2227,
"step": 189000
},
{
"epoch": 0.53,
"learning_rate": 2.3589733583824377e-05,
"loss": 0.2321,
"step": 189500
},
{
"epoch": 0.53,
"learning_rate": 2.3520049503570614e-05,
"loss": 0.22,
"step": 190000
},
{
"epoch": 0.53,
"learning_rate": 2.3450365423316854e-05,
"loss": 0.2247,
"step": 190500
},
{
"epoch": 0.53,
"learning_rate": 2.338068134306309e-05,
"loss": 0.2251,
"step": 191000
},
{
"epoch": 0.53,
"learning_rate": 2.331099726280933e-05,
"loss": 0.2182,
"step": 191500
},
{
"epoch": 0.54,
"learning_rate": 2.324131318255557e-05,
"loss": 0.2244,
"step": 192000
},
{
"epoch": 0.54,
"learning_rate": 2.3171629102301806e-05,
"loss": 0.2236,
"step": 192500
},
{
"epoch": 0.54,
"learning_rate": 2.3101945022048044e-05,
"loss": 0.2228,
"step": 193000
},
{
"epoch": 0.54,
"learning_rate": 2.303226094179428e-05,
"loss": 0.216,
"step": 193500
},
{
"epoch": 0.54,
"learning_rate": 2.296257686154052e-05,
"loss": 0.2137,
"step": 194000
},
{
"epoch": 0.54,
"learning_rate": 2.289289278128676e-05,
"loss": 0.2207,
"step": 194500
},
{
"epoch": 0.54,
"learning_rate": 2.2823208701032996e-05,
"loss": 0.2285,
"step": 195000
},
{
"epoch": 0.54,
"learning_rate": 2.2753524620779236e-05,
"loss": 0.2209,
"step": 195500
},
{
"epoch": 0.55,
"learning_rate": 2.2683840540525473e-05,
"loss": 0.2313,
"step": 196000
},
{
"epoch": 0.55,
"learning_rate": 2.2614156460271714e-05,
"loss": 0.2304,
"step": 196500
},
{
"epoch": 0.55,
"learning_rate": 2.2544472380017955e-05,
"loss": 0.2236,
"step": 197000
},
{
"epoch": 0.55,
"learning_rate": 2.2474788299764192e-05,
"loss": 0.2211,
"step": 197500
},
{
"epoch": 0.55,
"learning_rate": 2.240510421951043e-05,
"loss": 0.2294,
"step": 198000
},
{
"epoch": 0.55,
"learning_rate": 2.2335420139256666e-05,
"loss": 0.221,
"step": 198500
},
{
"epoch": 0.55,
"learning_rate": 2.2265736059002907e-05,
"loss": 0.2227,
"step": 199000
},
{
"epoch": 0.56,
"learning_rate": 2.2196051978749144e-05,
"loss": 0.2204,
"step": 199500
},
{
"epoch": 0.56,
"learning_rate": 2.212636789849538e-05,
"loss": 0.2248,
"step": 200000
},
{
"epoch": 0.56,
"learning_rate": 2.2056683818241622e-05,
"loss": 0.2206,
"step": 200500
},
{
"epoch": 0.56,
"learning_rate": 2.198699973798786e-05,
"loss": 0.2322,
"step": 201000
},
{
"epoch": 0.56,
"learning_rate": 2.1917315657734096e-05,
"loss": 0.2211,
"step": 201500
},
{
"epoch": 0.56,
"learning_rate": 2.1847631577480337e-05,
"loss": 0.2307,
"step": 202000
},
{
"epoch": 0.56,
"learning_rate": 2.1777947497226574e-05,
"loss": 0.2143,
"step": 202500
},
{
"epoch": 0.57,
"learning_rate": 2.1708263416972814e-05,
"loss": 0.2201,
"step": 203000
},
{
"epoch": 0.57,
"learning_rate": 2.163857933671905e-05,
"loss": 0.2269,
"step": 203500
},
{
"epoch": 0.57,
"learning_rate": 2.1568895256465292e-05,
"loss": 0.2222,
"step": 204000
},
{
"epoch": 0.57,
"learning_rate": 2.149921117621153e-05,
"loss": 0.2158,
"step": 204500
},
{
"epoch": 0.57,
"learning_rate": 2.1429527095957767e-05,
"loss": 0.216,
"step": 205000
},
{
"epoch": 0.57,
"learning_rate": 2.1359843015704007e-05,
"loss": 0.2322,
"step": 205500
},
{
"epoch": 0.57,
"learning_rate": 2.1290158935450244e-05,
"loss": 0.2167,
"step": 206000
},
{
"epoch": 0.58,
"learning_rate": 2.122047485519648e-05,
"loss": 0.2231,
"step": 206500
},
{
"epoch": 0.58,
"learning_rate": 2.1150790774942722e-05,
"loss": 0.2242,
"step": 207000
},
{
"epoch": 0.58,
"learning_rate": 2.108110669468896e-05,
"loss": 0.237,
"step": 207500
},
{
"epoch": 0.58,
"learning_rate": 2.1011422614435196e-05,
"loss": 0.221,
"step": 208000
},
{
"epoch": 0.58,
"learning_rate": 2.0941738534181434e-05,
"loss": 0.2251,
"step": 208500
},
{
"epoch": 0.58,
"learning_rate": 2.0872054453927674e-05,
"loss": 0.2272,
"step": 209000
},
{
"epoch": 0.58,
"learning_rate": 2.080237037367391e-05,
"loss": 0.2235,
"step": 209500
},
{
"epoch": 0.59,
"learning_rate": 2.0732686293420152e-05,
"loss": 0.2179,
"step": 210000
},
{
"epoch": 0.59,
"learning_rate": 2.0663002213166392e-05,
"loss": 0.2276,
"step": 210500
},
{
"epoch": 0.59,
"learning_rate": 2.059331813291263e-05,
"loss": 0.2215,
"step": 211000
},
{
"epoch": 0.59,
"learning_rate": 2.0523634052658867e-05,
"loss": 0.2266,
"step": 211500
},
{
"epoch": 0.59,
"learning_rate": 2.0453949972405107e-05,
"loss": 0.2185,
"step": 212000
},
{
"epoch": 0.59,
"learning_rate": 2.0384265892151345e-05,
"loss": 0.2255,
"step": 212500
},
{
"epoch": 0.59,
"learning_rate": 2.0314581811897582e-05,
"loss": 0.2307,
"step": 213000
},
{
"epoch": 0.6,
"learning_rate": 2.024489773164382e-05,
"loss": 0.2201,
"step": 213500
},
{
"epoch": 0.6,
"learning_rate": 2.017521365139006e-05,
"loss": 0.2208,
"step": 214000
},
{
"epoch": 0.6,
"learning_rate": 2.0105529571136297e-05,
"loss": 0.2177,
"step": 214500
},
{
"epoch": 0.6,
"learning_rate": 2.0035845490882534e-05,
"loss": 0.2136,
"step": 215000
},
{
"epoch": 0.6,
"learning_rate": 1.9966161410628775e-05,
"loss": 0.2254,
"step": 215500
},
{
"epoch": 0.6,
"learning_rate": 1.9896477330375012e-05,
"loss": 0.2208,
"step": 216000
},
{
"epoch": 0.6,
"learning_rate": 1.982679325012125e-05,
"loss": 0.2213,
"step": 216500
},
{
"epoch": 0.6,
"learning_rate": 1.975710916986749e-05,
"loss": 0.2253,
"step": 217000
},
{
"epoch": 0.61,
"learning_rate": 1.968742508961373e-05,
"loss": 0.2172,
"step": 217500
},
{
"epoch": 0.61,
"learning_rate": 1.9617741009359967e-05,
"loss": 0.2342,
"step": 218000
},
{
"epoch": 0.61,
"learning_rate": 1.9548056929106204e-05,
"loss": 0.2177,
"step": 218500
},
{
"epoch": 0.61,
"learning_rate": 1.9478372848852445e-05,
"loss": 0.2263,
"step": 219000
},
{
"epoch": 0.61,
"learning_rate": 1.9408688768598682e-05,
"loss": 0.2334,
"step": 219500
},
{
"epoch": 0.61,
"learning_rate": 1.933900468834492e-05,
"loss": 0.2152,
"step": 220000
},
{
"epoch": 0.61,
"learning_rate": 1.926932060809116e-05,
"loss": 0.2154,
"step": 220500
},
{
"epoch": 0.62,
"learning_rate": 1.9199636527837397e-05,
"loss": 0.2123,
"step": 221000
},
{
"epoch": 0.62,
"learning_rate": 1.9129952447583634e-05,
"loss": 0.2204,
"step": 221500
},
{
"epoch": 0.62,
"learning_rate": 1.9060268367329875e-05,
"loss": 0.2167,
"step": 222000
},
{
"epoch": 0.62,
"learning_rate": 1.8990584287076112e-05,
"loss": 0.2234,
"step": 222500
},
{
"epoch": 0.62,
"learning_rate": 1.892090020682235e-05,
"loss": 0.2244,
"step": 223000
},
{
"epoch": 0.62,
"learning_rate": 1.885121612656859e-05,
"loss": 0.2227,
"step": 223500
},
{
"epoch": 0.62,
"learning_rate": 1.8781532046314827e-05,
"loss": 0.2177,
"step": 224000
},
{
"epoch": 0.63,
"learning_rate": 1.8711847966061068e-05,
"loss": 0.2212,
"step": 224500
},
{
"epoch": 0.63,
"learning_rate": 1.8642163885807305e-05,
"loss": 0.2218,
"step": 225000
},
{
"epoch": 0.63,
"learning_rate": 1.8572479805553545e-05,
"loss": 0.2278,
"step": 225500
},
{
"epoch": 0.63,
"learning_rate": 1.8502795725299782e-05,
"loss": 0.2233,
"step": 226000
},
{
"epoch": 0.63,
"learning_rate": 1.843311164504602e-05,
"loss": 0.2242,
"step": 226500
},
{
"epoch": 0.63,
"learning_rate": 1.836342756479226e-05,
"loss": 0.2268,
"step": 227000
},
{
"epoch": 0.63,
"learning_rate": 1.8293743484538497e-05,
"loss": 0.2189,
"step": 227500
},
{
"epoch": 0.64,
"learning_rate": 1.8224059404284735e-05,
"loss": 0.2257,
"step": 228000
},
{
"epoch": 0.64,
"learning_rate": 1.8154375324030972e-05,
"loss": 0.2134,
"step": 228500
},
{
"epoch": 0.64,
"learning_rate": 1.8084691243777212e-05,
"loss": 0.2147,
"step": 229000
},
{
"epoch": 0.64,
"learning_rate": 1.801500716352345e-05,
"loss": 0.2204,
"step": 229500
},
{
"epoch": 0.64,
"learning_rate": 1.7945323083269687e-05,
"loss": 0.215,
"step": 230000
},
{
"epoch": 0.64,
"learning_rate": 1.7875639003015927e-05,
"loss": 0.2182,
"step": 230500
},
{
"epoch": 0.64,
"learning_rate": 1.7805954922762168e-05,
"loss": 0.2259,
"step": 231000
},
{
"epoch": 0.65,
"learning_rate": 1.7736270842508405e-05,
"loss": 0.2207,
"step": 231500
},
{
"epoch": 0.65,
"learning_rate": 1.7666586762254646e-05,
"loss": 0.2197,
"step": 232000
},
{
"epoch": 0.65,
"learning_rate": 1.7596902682000883e-05,
"loss": 0.2229,
"step": 232500
},
{
"epoch": 0.65,
"learning_rate": 1.752721860174712e-05,
"loss": 0.2196,
"step": 233000
},
{
"epoch": 0.65,
"learning_rate": 1.7457534521493357e-05,
"loss": 0.217,
"step": 233500
},
{
"epoch": 0.65,
"learning_rate": 1.7387850441239598e-05,
"loss": 0.2143,
"step": 234000
},
{
"epoch": 0.65,
"learning_rate": 1.7318166360985835e-05,
"loss": 0.2177,
"step": 234500
},
{
"epoch": 0.66,
"learning_rate": 1.7248482280732072e-05,
"loss": 0.2198,
"step": 235000
},
{
"epoch": 0.66,
"learning_rate": 1.7178798200478313e-05,
"loss": 0.2264,
"step": 235500
},
{
"epoch": 0.66,
"learning_rate": 1.710911412022455e-05,
"loss": 0.2157,
"step": 236000
},
{
"epoch": 0.66,
"learning_rate": 1.7039430039970787e-05,
"loss": 0.2275,
"step": 236500
},
{
"epoch": 0.66,
"learning_rate": 1.6969745959717028e-05,
"loss": 0.2197,
"step": 237000
},
{
"epoch": 0.66,
"learning_rate": 1.6900061879463265e-05,
"loss": 0.2203,
"step": 237500
},
{
"epoch": 0.66,
"learning_rate": 1.6830377799209505e-05,
"loss": 0.2242,
"step": 238000
},
{
"epoch": 0.66,
"learning_rate": 1.6760693718955743e-05,
"loss": 0.2214,
"step": 238500
},
{
"epoch": 0.67,
"learning_rate": 1.6691009638701983e-05,
"loss": 0.2226,
"step": 239000
},
{
"epoch": 0.67,
"learning_rate": 1.662132555844822e-05,
"loss": 0.2247,
"step": 239500
},
{
"epoch": 0.67,
"learning_rate": 1.6551641478194458e-05,
"loss": 0.2177,
"step": 240000
},
{
"epoch": 0.67,
"learning_rate": 1.6481957397940698e-05,
"loss": 0.215,
"step": 240500
},
{
"epoch": 0.67,
"learning_rate": 1.6412273317686935e-05,
"loss": 0.211,
"step": 241000
},
{
"epoch": 0.67,
"learning_rate": 1.6342589237433172e-05,
"loss": 0.2142,
"step": 241500
},
{
"epoch": 0.67,
"learning_rate": 1.6272905157179413e-05,
"loss": 0.2119,
"step": 242000
},
{
"epoch": 0.68,
"learning_rate": 1.620322107692565e-05,
"loss": 0.2178,
"step": 242500
},
{
"epoch": 0.68,
"learning_rate": 1.6133536996671887e-05,
"loss": 0.2145,
"step": 243000
},
{
"epoch": 0.68,
"learning_rate": 1.6063852916418125e-05,
"loss": 0.2108,
"step": 243500
},
{
"epoch": 0.68,
"learning_rate": 1.5994168836164365e-05,
"loss": 0.2207,
"step": 244000
},
{
"epoch": 0.68,
"learning_rate": 1.5924484755910606e-05,
"loss": 0.2127,
"step": 244500
},
{
"epoch": 0.68,
"learning_rate": 1.5854800675656843e-05,
"loss": 0.2249,
"step": 245000
},
{
"epoch": 0.68,
"learning_rate": 1.5785116595403084e-05,
"loss": 0.2257,
"step": 245500
},
{
"epoch": 0.69,
"learning_rate": 1.571543251514932e-05,
"loss": 0.2178,
"step": 246000
},
{
"epoch": 0.69,
"learning_rate": 1.5645748434895558e-05,
"loss": 0.2237,
"step": 246500
},
{
"epoch": 0.69,
"learning_rate": 1.55760643546418e-05,
"loss": 0.2141,
"step": 247000
},
{
"epoch": 0.69,
"learning_rate": 1.5506380274388036e-05,
"loss": 0.2165,
"step": 247500
},
{
"epoch": 0.69,
"learning_rate": 1.5436696194134273e-05,
"loss": 0.2096,
"step": 248000
},
{
"epoch": 0.69,
"learning_rate": 1.5367012113880513e-05,
"loss": 0.2096,
"step": 248500
},
{
"epoch": 0.69,
"learning_rate": 1.529732803362675e-05,
"loss": 0.2258,
"step": 249000
},
{
"epoch": 0.7,
"learning_rate": 1.522764395337299e-05,
"loss": 0.2141,
"step": 249500
},
{
"epoch": 0.7,
"learning_rate": 1.5157959873119227e-05,
"loss": 0.2116,
"step": 250000
},
{
"epoch": 0.7,
"learning_rate": 1.5088275792865467e-05,
"loss": 0.2153,
"step": 250500
},
{
"epoch": 0.7,
"learning_rate": 1.5018591712611704e-05,
"loss": 0.218,
"step": 251000
},
{
"epoch": 0.7,
"learning_rate": 1.4948907632357942e-05,
"loss": 0.2172,
"step": 251500
},
{
"epoch": 0.7,
"learning_rate": 1.4879223552104182e-05,
"loss": 0.2281,
"step": 252000
},
{
"epoch": 0.7,
"learning_rate": 1.480953947185042e-05,
"loss": 0.2171,
"step": 252500
},
{
"epoch": 0.71,
"learning_rate": 1.4739855391596658e-05,
"loss": 0.2155,
"step": 253000
},
{
"epoch": 0.71,
"learning_rate": 1.4670171311342897e-05,
"loss": 0.2148,
"step": 253500
},
{
"epoch": 0.71,
"learning_rate": 1.4600487231089136e-05,
"loss": 0.2189,
"step": 254000
},
{
"epoch": 0.71,
"learning_rate": 1.4530803150835373e-05,
"loss": 0.2221,
"step": 254500
},
{
"epoch": 0.71,
"learning_rate": 1.446111907058161e-05,
"loss": 0.2223,
"step": 255000
},
{
"epoch": 0.71,
"learning_rate": 1.4391434990327851e-05,
"loss": 0.2191,
"step": 255500
},
{
"epoch": 0.71,
"learning_rate": 1.4321750910074088e-05,
"loss": 0.2195,
"step": 256000
},
{
"epoch": 0.71,
"learning_rate": 1.4252066829820327e-05,
"loss": 0.2182,
"step": 256500
},
{
"epoch": 0.72,
"learning_rate": 1.4182382749566568e-05,
"loss": 0.2169,
"step": 257000
},
{
"epoch": 0.72,
"learning_rate": 1.4112698669312805e-05,
"loss": 0.2239,
"step": 257500
},
{
"epoch": 0.72,
"learning_rate": 1.4043014589059042e-05,
"loss": 0.2209,
"step": 258000
},
{
"epoch": 0.72,
"learning_rate": 1.3973330508805283e-05,
"loss": 0.2043,
"step": 258500
},
{
"epoch": 0.72,
"learning_rate": 1.390364642855152e-05,
"loss": 0.2211,
"step": 259000
},
{
"epoch": 0.72,
"learning_rate": 1.3833962348297757e-05,
"loss": 0.2159,
"step": 259500
},
{
"epoch": 0.72,
"learning_rate": 1.3764278268043996e-05,
"loss": 0.2114,
"step": 260000
},
{
"epoch": 0.73,
"learning_rate": 1.3694594187790236e-05,
"loss": 0.2229,
"step": 260500
},
{
"epoch": 0.73,
"learning_rate": 1.3624910107536474e-05,
"loss": 0.2158,
"step": 261000
},
{
"epoch": 0.73,
"learning_rate": 1.355522602728271e-05,
"loss": 0.2145,
"step": 261500
},
{
"epoch": 0.73,
"learning_rate": 1.3485541947028951e-05,
"loss": 0.2132,
"step": 262000
},
{
"epoch": 0.73,
"learning_rate": 1.3415857866775188e-05,
"loss": 0.2184,
"step": 262500
},
{
"epoch": 0.73,
"learning_rate": 1.3346173786521426e-05,
"loss": 0.2164,
"step": 263000
},
{
"epoch": 0.73,
"learning_rate": 1.3276489706267666e-05,
"loss": 0.2111,
"step": 263500
},
{
"epoch": 0.74,
"learning_rate": 1.3206805626013905e-05,
"loss": 0.2128,
"step": 264000
},
{
"epoch": 0.74,
"learning_rate": 1.3137121545760142e-05,
"loss": 0.2203,
"step": 264500
},
{
"epoch": 0.74,
"learning_rate": 1.306743746550638e-05,
"loss": 0.2163,
"step": 265000
},
{
"epoch": 0.74,
"learning_rate": 1.299775338525262e-05,
"loss": 0.2169,
"step": 265500
},
{
"epoch": 0.74,
"learning_rate": 1.2928069304998857e-05,
"loss": 0.2157,
"step": 266000
},
{
"epoch": 0.74,
"learning_rate": 1.2858385224745096e-05,
"loss": 0.2246,
"step": 266500
},
{
"epoch": 0.74,
"learning_rate": 1.2788701144491335e-05,
"loss": 0.2204,
"step": 267000
},
{
"epoch": 0.75,
"learning_rate": 1.2719017064237574e-05,
"loss": 0.2137,
"step": 267500
},
{
"epoch": 0.75,
"learning_rate": 1.2649332983983811e-05,
"loss": 0.2195,
"step": 268000
},
{
"epoch": 0.75,
"learning_rate": 1.2579648903730052e-05,
"loss": 0.2189,
"step": 268500
},
{
"epoch": 0.75,
"learning_rate": 1.2509964823476289e-05,
"loss": 0.208,
"step": 269000
},
{
"epoch": 0.75,
"learning_rate": 1.2440280743222526e-05,
"loss": 0.2109,
"step": 269500
},
{
"epoch": 0.75,
"learning_rate": 1.2370596662968765e-05,
"loss": 0.2249,
"step": 270000
},
{
"epoch": 0.75,
"learning_rate": 1.2300912582715004e-05,
"loss": 0.2159,
"step": 270500
},
{
"epoch": 0.76,
"learning_rate": 1.2231228502461243e-05,
"loss": 0.2114,
"step": 271000
},
{
"epoch": 0.76,
"learning_rate": 1.2161544422207482e-05,
"loss": 0.2095,
"step": 271500
},
{
"epoch": 0.76,
"learning_rate": 1.2091860341953719e-05,
"loss": 0.218,
"step": 272000
},
{
"epoch": 0.76,
"learning_rate": 1.2022176261699958e-05,
"loss": 0.2208,
"step": 272500
},
{
"epoch": 0.76,
"learning_rate": 1.1952492181446195e-05,
"loss": 0.2098,
"step": 273000
},
{
"epoch": 0.76,
"learning_rate": 1.1882808101192434e-05,
"loss": 0.2073,
"step": 273500
},
{
"epoch": 0.76,
"learning_rate": 1.1813124020938674e-05,
"loss": 0.2084,
"step": 274000
},
{
"epoch": 0.77,
"learning_rate": 1.1743439940684911e-05,
"loss": 0.2071,
"step": 274500
},
{
"epoch": 0.77,
"learning_rate": 1.167375586043115e-05,
"loss": 0.2207,
"step": 275000
},
{
"epoch": 0.77,
"learning_rate": 1.1604071780177387e-05,
"loss": 0.2186,
"step": 275500
},
{
"epoch": 0.77,
"learning_rate": 1.1534387699923626e-05,
"loss": 0.2132,
"step": 276000
},
{
"epoch": 0.77,
"learning_rate": 1.1464703619669865e-05,
"loss": 0.2107,
"step": 276500
},
{
"epoch": 0.77,
"learning_rate": 1.1395019539416104e-05,
"loss": 0.2157,
"step": 277000
},
{
"epoch": 0.77,
"learning_rate": 1.1325335459162343e-05,
"loss": 0.2138,
"step": 277500
},
{
"epoch": 0.77,
"learning_rate": 1.1255651378908582e-05,
"loss": 0.2157,
"step": 278000
},
{
"epoch": 0.78,
"learning_rate": 1.1185967298654819e-05,
"loss": 0.2101,
"step": 278500
},
{
"epoch": 0.78,
"learning_rate": 1.1116283218401058e-05,
"loss": 0.2177,
"step": 279000
},
{
"epoch": 0.78,
"learning_rate": 1.1046599138147295e-05,
"loss": 0.2142,
"step": 279500
},
{
"epoch": 0.78,
"learning_rate": 1.0976915057893534e-05,
"loss": 0.2122,
"step": 280000
},
{
"epoch": 0.78,
"learning_rate": 1.0907230977639773e-05,
"loss": 0.2212,
"step": 280500
},
{
"epoch": 0.78,
"learning_rate": 1.0837546897386012e-05,
"loss": 0.2096,
"step": 281000
},
{
"epoch": 0.78,
"learning_rate": 1.076786281713225e-05,
"loss": 0.2091,
"step": 281500
},
{
"epoch": 0.79,
"learning_rate": 1.0698178736878488e-05,
"loss": 0.2124,
"step": 282000
},
{
"epoch": 0.79,
"learning_rate": 1.0628494656624727e-05,
"loss": 0.2175,
"step": 282500
},
{
"epoch": 0.79,
"learning_rate": 1.0558810576370966e-05,
"loss": 0.2124,
"step": 283000
},
{
"epoch": 0.79,
"learning_rate": 1.0489126496117203e-05,
"loss": 0.2046,
"step": 283500
},
{
"epoch": 0.79,
"learning_rate": 1.0419442415863442e-05,
"loss": 0.2072,
"step": 284000
},
{
"epoch": 0.79,
"learning_rate": 1.034975833560968e-05,
"loss": 0.2181,
"step": 284500
},
{
"epoch": 0.79,
"learning_rate": 1.028007425535592e-05,
"loss": 0.22,
"step": 285000
},
{
"epoch": 0.8,
"learning_rate": 1.0210390175102158e-05,
"loss": 0.2087,
"step": 285500
},
{
"epoch": 0.8,
"learning_rate": 1.0140706094848395e-05,
"loss": 0.2074,
"step": 286000
},
{
"epoch": 0.8,
"learning_rate": 1.0071022014594634e-05,
"loss": 0.211,
"step": 286500
},
{
"epoch": 0.8,
"learning_rate": 1.0001337934340872e-05,
"loss": 0.2156,
"step": 287000
},
{
"epoch": 0.8,
"learning_rate": 9.931653854087112e-06,
"loss": 0.2081,
"step": 287500
},
{
"epoch": 0.8,
"learning_rate": 9.861969773833351e-06,
"loss": 0.2158,
"step": 288000
},
{
"epoch": 0.8,
"learning_rate": 9.792285693579588e-06,
"loss": 0.2212,
"step": 288500
},
{
"epoch": 0.81,
"learning_rate": 9.722601613325827e-06,
"loss": 0.2113,
"step": 289000
},
{
"epoch": 0.81,
"learning_rate": 9.652917533072064e-06,
"loss": 0.2052,
"step": 289500
},
{
"epoch": 0.81,
"learning_rate": 9.583233452818303e-06,
"loss": 0.2151,
"step": 290000
},
{
"epoch": 0.81,
"learning_rate": 9.513549372564542e-06,
"loss": 0.2147,
"step": 290500
},
{
"epoch": 0.81,
"learning_rate": 9.44386529231078e-06,
"loss": 0.2114,
"step": 291000
},
{
"epoch": 0.81,
"learning_rate": 9.37418121205702e-06,
"loss": 0.2175,
"step": 291500
},
{
"epoch": 0.81,
"learning_rate": 9.304497131803257e-06,
"loss": 0.2145,
"step": 292000
},
{
"epoch": 0.82,
"learning_rate": 9.234813051549496e-06,
"loss": 0.212,
"step": 292500
},
{
"epoch": 0.82,
"learning_rate": 9.165128971295735e-06,
"loss": 0.2117,
"step": 293000
},
{
"epoch": 0.82,
"learning_rate": 9.095444891041972e-06,
"loss": 0.2065,
"step": 293500
},
{
"epoch": 0.82,
"learning_rate": 9.02576081078821e-06,
"loss": 0.2192,
"step": 294000
},
{
"epoch": 0.82,
"learning_rate": 8.95607673053445e-06,
"loss": 0.2074,
"step": 294500
},
{
"epoch": 0.82,
"learning_rate": 8.886392650280688e-06,
"loss": 0.2155,
"step": 295000
},
{
"epoch": 0.82,
"learning_rate": 8.816708570026927e-06,
"loss": 0.2099,
"step": 295500
},
{
"epoch": 0.83,
"learning_rate": 8.747024489773165e-06,
"loss": 0.2192,
"step": 296000
},
{
"epoch": 0.83,
"learning_rate": 8.677340409519403e-06,
"loss": 0.2117,
"step": 296500
},
{
"epoch": 0.83,
"learning_rate": 8.60765632926564e-06,
"loss": 0.2118,
"step": 297000
},
{
"epoch": 0.83,
"learning_rate": 8.53797224901188e-06,
"loss": 0.2074,
"step": 297500
},
{
"epoch": 0.83,
"learning_rate": 8.468288168758118e-06,
"loss": 0.2071,
"step": 298000
},
{
"epoch": 0.83,
"learning_rate": 8.398604088504357e-06,
"loss": 0.2113,
"step": 298500
},
{
"epoch": 0.83,
"learning_rate": 8.328920008250596e-06,
"loss": 0.2072,
"step": 299000
},
{
"epoch": 0.83,
"learning_rate": 8.259235927996833e-06,
"loss": 0.2048,
"step": 299500
},
{
"epoch": 0.84,
"learning_rate": 8.189551847743072e-06,
"loss": 0.2104,
"step": 300000
},
{
"epoch": 0.84,
"learning_rate": 8.119867767489311e-06,
"loss": 0.218,
"step": 300500
},
{
"epoch": 0.84,
"learning_rate": 8.050183687235548e-06,
"loss": 0.2168,
"step": 301000
},
{
"epoch": 0.84,
"learning_rate": 7.980499606981789e-06,
"loss": 0.2217,
"step": 301500
},
{
"epoch": 0.84,
"learning_rate": 7.910815526728026e-06,
"loss": 0.2126,
"step": 302000
},
{
"epoch": 0.84,
"learning_rate": 7.841131446474265e-06,
"loss": 0.2084,
"step": 302500
},
{
"epoch": 0.84,
"learning_rate": 7.771447366220504e-06,
"loss": 0.2098,
"step": 303000
},
{
"epoch": 0.85,
"learning_rate": 7.701763285966741e-06,
"loss": 0.2131,
"step": 303500
},
{
"epoch": 0.85,
"learning_rate": 7.63207920571298e-06,
"loss": 0.2078,
"step": 304000
},
{
"epoch": 0.85,
"learning_rate": 7.562395125459218e-06,
"loss": 0.2164,
"step": 304500
},
{
"epoch": 0.85,
"learning_rate": 7.492711045205457e-06,
"loss": 0.2114,
"step": 305000
},
{
"epoch": 0.85,
"learning_rate": 7.423026964951696e-06,
"loss": 0.206,
"step": 305500
},
{
"epoch": 0.85,
"learning_rate": 7.353342884697934e-06,
"loss": 0.2108,
"step": 306000
},
{
"epoch": 0.85,
"learning_rate": 7.2836588044441725e-06,
"loss": 0.2156,
"step": 306500
},
{
"epoch": 0.86,
"learning_rate": 7.2139747241904106e-06,
"loss": 0.2124,
"step": 307000
},
{
"epoch": 0.86,
"learning_rate": 7.1442906439366494e-06,
"loss": 0.2197,
"step": 307500
},
{
"epoch": 0.86,
"learning_rate": 7.074606563682888e-06,
"loss": 0.2127,
"step": 308000
},
{
"epoch": 0.86,
"learning_rate": 7.0049224834291255e-06,
"loss": 0.2113,
"step": 308500
},
{
"epoch": 0.86,
"learning_rate": 6.935238403175364e-06,
"loss": 0.2093,
"step": 309000
},
{
"epoch": 0.86,
"learning_rate": 6.865554322921602e-06,
"loss": 0.2191,
"step": 309500
},
{
"epoch": 0.86,
"learning_rate": 6.795870242667841e-06,
"loss": 0.2023,
"step": 310000
},
{
"epoch": 0.87,
"learning_rate": 6.72618616241408e-06,
"loss": 0.2092,
"step": 310500
},
{
"epoch": 0.87,
"learning_rate": 6.656502082160318e-06,
"loss": 0.2088,
"step": 311000
},
{
"epoch": 0.87,
"learning_rate": 6.586818001906557e-06,
"loss": 0.2077,
"step": 311500
},
{
"epoch": 0.87,
"learning_rate": 6.517133921652794e-06,
"loss": 0.2075,
"step": 312000
},
{
"epoch": 0.87,
"learning_rate": 6.447449841399034e-06,
"loss": 0.2123,
"step": 312500
},
{
"epoch": 0.87,
"learning_rate": 6.377765761145273e-06,
"loss": 0.2137,
"step": 313000
},
{
"epoch": 0.87,
"learning_rate": 6.30808168089151e-06,
"loss": 0.2154,
"step": 313500
},
{
"epoch": 0.88,
"learning_rate": 6.238397600637749e-06,
"loss": 0.2137,
"step": 314000
},
{
"epoch": 0.88,
"learning_rate": 6.168713520383988e-06,
"loss": 0.2106,
"step": 314500
},
{
"epoch": 0.88,
"learning_rate": 6.099029440130226e-06,
"loss": 0.2101,
"step": 315000
},
{
"epoch": 0.88,
"learning_rate": 6.029345359876464e-06,
"loss": 0.2154,
"step": 315500
},
{
"epoch": 0.88,
"learning_rate": 5.959661279622703e-06,
"loss": 0.2122,
"step": 316000
},
{
"epoch": 0.88,
"learning_rate": 5.889977199368942e-06,
"loss": 0.2119,
"step": 316500
},
{
"epoch": 0.88,
"learning_rate": 5.82029311911518e-06,
"loss": 0.2069,
"step": 317000
},
{
"epoch": 0.88,
"learning_rate": 5.750609038861418e-06,
"loss": 0.2166,
"step": 317500
},
{
"epoch": 0.89,
"learning_rate": 5.6809249586076566e-06,
"loss": 0.2135,
"step": 318000
},
{
"epoch": 0.89,
"learning_rate": 5.611240878353895e-06,
"loss": 0.216,
"step": 318500
},
{
"epoch": 0.89,
"learning_rate": 5.5415567981001335e-06,
"loss": 0.2078,
"step": 319000
},
{
"epoch": 0.89,
"learning_rate": 5.471872717846372e-06,
"loss": 0.2033,
"step": 319500
},
{
"epoch": 0.89,
"learning_rate": 5.40218863759261e-06,
"loss": 0.218,
"step": 320000
},
{
"epoch": 0.89,
"learning_rate": 5.3325045573388484e-06,
"loss": 0.2128,
"step": 320500
},
{
"epoch": 0.89,
"learning_rate": 5.262820477085087e-06,
"loss": 0.2115,
"step": 321000
},
{
"epoch": 0.9,
"learning_rate": 5.193136396831326e-06,
"loss": 0.2094,
"step": 321500
},
{
"epoch": 0.9,
"learning_rate": 5.123452316577564e-06,
"loss": 0.2096,
"step": 322000
},
{
"epoch": 0.9,
"learning_rate": 5.053768236323802e-06,
"loss": 0.2107,
"step": 322500
},
{
"epoch": 0.9,
"learning_rate": 4.984084156070041e-06,
"loss": 0.2123,
"step": 323000
},
{
"epoch": 0.9,
"learning_rate": 4.91440007581628e-06,
"loss": 0.2089,
"step": 323500
},
{
"epoch": 0.9,
"learning_rate": 4.844715995562518e-06,
"loss": 0.2103,
"step": 324000
},
{
"epoch": 0.9,
"learning_rate": 4.775031915308757e-06,
"loss": 0.2089,
"step": 324500
},
{
"epoch": 0.91,
"learning_rate": 4.705347835054995e-06,
"loss": 0.2041,
"step": 325000
},
{
"epoch": 0.91,
"learning_rate": 4.635663754801233e-06,
"loss": 0.2033,
"step": 325500
},
{
"epoch": 0.91,
"learning_rate": 4.565979674547472e-06,
"loss": 0.2161,
"step": 326000
},
{
"epoch": 0.91,
"learning_rate": 4.496295594293711e-06,
"loss": 0.2082,
"step": 326500
},
{
"epoch": 0.91,
"learning_rate": 4.426611514039949e-06,
"loss": 0.2064,
"step": 327000
},
{
"epoch": 0.91,
"learning_rate": 4.356927433786187e-06,
"loss": 0.2146,
"step": 327500
},
{
"epoch": 0.91,
"learning_rate": 4.287243353532426e-06,
"loss": 0.2149,
"step": 328000
},
{
"epoch": 0.92,
"learning_rate": 4.2175592732786646e-06,
"loss": 0.2031,
"step": 328500
},
{
"epoch": 0.92,
"learning_rate": 4.147875193024903e-06,
"loss": 0.2051,
"step": 329000
},
{
"epoch": 0.92,
"learning_rate": 4.078191112771141e-06,
"loss": 0.2138,
"step": 329500
},
{
"epoch": 0.92,
"learning_rate": 4.0085070325173795e-06,
"loss": 0.2039,
"step": 330000
},
{
"epoch": 0.92,
"learning_rate": 3.9388229522636175e-06,
"loss": 0.2155,
"step": 330500
},
{
"epoch": 0.92,
"learning_rate": 3.869138872009856e-06,
"loss": 0.2039,
"step": 331000
},
{
"epoch": 0.92,
"learning_rate": 3.799454791756095e-06,
"loss": 0.2121,
"step": 331500
},
{
"epoch": 0.93,
"learning_rate": 3.7297707115023333e-06,
"loss": 0.2129,
"step": 332000
},
{
"epoch": 0.93,
"learning_rate": 3.6600866312485714e-06,
"loss": 0.2153,
"step": 332500
},
{
"epoch": 0.93,
"learning_rate": 3.59040255099481e-06,
"loss": 0.2072,
"step": 333000
},
{
"epoch": 0.93,
"learning_rate": 3.5207184707410487e-06,
"loss": 0.2059,
"step": 333500
},
{
"epoch": 0.93,
"learning_rate": 3.451034390487287e-06,
"loss": 0.2111,
"step": 334000
},
{
"epoch": 0.93,
"learning_rate": 3.3813503102335256e-06,
"loss": 0.2161,
"step": 334500
},
{
"epoch": 0.93,
"learning_rate": 3.3116662299797636e-06,
"loss": 0.2153,
"step": 335000
},
{
"epoch": 0.94,
"learning_rate": 3.241982149726002e-06,
"loss": 0.2092,
"step": 335500
},
{
"epoch": 0.94,
"learning_rate": 3.172298069472241e-06,
"loss": 0.2151,
"step": 336000
},
{
"epoch": 0.94,
"learning_rate": 3.1026139892184794e-06,
"loss": 0.2036,
"step": 336500
},
{
"epoch": 0.94,
"learning_rate": 3.032929908964718e-06,
"loss": 0.214,
"step": 337000
},
{
"epoch": 0.94,
"learning_rate": 2.963245828710956e-06,
"loss": 0.214,
"step": 337500
},
{
"epoch": 0.94,
"learning_rate": 2.8935617484571948e-06,
"loss": 0.2063,
"step": 338000
},
{
"epoch": 0.94,
"learning_rate": 2.823877668203433e-06,
"loss": 0.2064,
"step": 338500
},
{
"epoch": 0.94,
"learning_rate": 2.7541935879496717e-06,
"loss": 0.2051,
"step": 339000
},
{
"epoch": 0.95,
"learning_rate": 2.68450950769591e-06,
"loss": 0.2069,
"step": 339500
},
{
"epoch": 0.95,
"learning_rate": 2.614825427442148e-06,
"loss": 0.2058,
"step": 340000
},
{
"epoch": 0.95,
"learning_rate": 2.545141347188387e-06,
"loss": 0.2122,
"step": 340500
},
{
"epoch": 0.95,
"learning_rate": 2.475457266934625e-06,
"loss": 0.202,
"step": 341000
},
{
"epoch": 0.95,
"learning_rate": 2.405773186680864e-06,
"loss": 0.2069,
"step": 341500
},
{
"epoch": 0.95,
"learning_rate": 2.336089106427102e-06,
"loss": 0.2081,
"step": 342000
},
{
"epoch": 0.95,
"learning_rate": 2.2664050261733405e-06,
"loss": 0.2072,
"step": 342500
},
{
"epoch": 0.96,
"learning_rate": 2.1967209459195793e-06,
"loss": 0.2092,
"step": 343000
},
{
"epoch": 0.96,
"learning_rate": 2.1270368656658174e-06,
"loss": 0.2121,
"step": 343500
},
{
"epoch": 0.96,
"learning_rate": 2.0573527854120563e-06,
"loss": 0.2068,
"step": 344000
},
{
"epoch": 0.96,
"learning_rate": 1.9876687051582943e-06,
"loss": 0.201,
"step": 344500
},
{
"epoch": 0.96,
"learning_rate": 1.9179846249045327e-06,
"loss": 0.2171,
"step": 345000
},
{
"epoch": 0.96,
"learning_rate": 1.8483005446507714e-06,
"loss": 0.2043,
"step": 345500
},
{
"epoch": 0.96,
"learning_rate": 1.7786164643970096e-06,
"loss": 0.2038,
"step": 346000
},
{
"epoch": 0.97,
"learning_rate": 1.7089323841432483e-06,
"loss": 0.2107,
"step": 346500
},
{
"epoch": 0.97,
"learning_rate": 1.6392483038894866e-06,
"loss": 0.2078,
"step": 347000
},
{
"epoch": 0.97,
"learning_rate": 1.5695642236357252e-06,
"loss": 0.209,
"step": 347500
},
{
"epoch": 0.97,
"learning_rate": 1.4998801433819637e-06,
"loss": 0.2118,
"step": 348000
},
{
"epoch": 0.97,
"learning_rate": 1.4301960631282021e-06,
"loss": 0.2247,
"step": 348500
},
{
"epoch": 0.97,
"learning_rate": 1.3605119828744404e-06,
"loss": 0.2094,
"step": 349000
},
{
"epoch": 0.97,
"learning_rate": 1.2908279026206788e-06,
"loss": 0.2154,
"step": 349500
},
{
"epoch": 0.98,
"learning_rate": 1.2211438223669173e-06,
"loss": 0.2062,
"step": 350000
},
{
"epoch": 0.98,
"learning_rate": 1.151459742113156e-06,
"loss": 0.2105,
"step": 350500
},
{
"epoch": 0.98,
"learning_rate": 1.0817756618593944e-06,
"loss": 0.2177,
"step": 351000
},
{
"epoch": 0.98,
"learning_rate": 1.0120915816056329e-06,
"loss": 0.2062,
"step": 351500
},
{
"epoch": 0.98,
"learning_rate": 9.424075013518711e-07,
"loss": 0.205,
"step": 352000
},
{
"epoch": 0.98,
"learning_rate": 8.727234210981097e-07,
"loss": 0.2032,
"step": 352500
},
{
"epoch": 0.98,
"learning_rate": 8.030393408443481e-07,
"loss": 0.2169,
"step": 353000
},
{
"epoch": 0.99,
"learning_rate": 7.333552605905866e-07,
"loss": 0.2153,
"step": 353500
},
{
"epoch": 0.99,
"learning_rate": 6.636711803368249e-07,
"loss": 0.2046,
"step": 354000
},
{
"epoch": 0.99,
"learning_rate": 5.939871000830635e-07,
"loss": 0.2068,
"step": 354500
},
{
"epoch": 0.99,
"learning_rate": 5.243030198293018e-07,
"loss": 0.2188,
"step": 355000
},
{
"epoch": 0.99,
"learning_rate": 4.5461893957554035e-07,
"loss": 0.2139,
"step": 355500
},
{
"epoch": 0.99,
"learning_rate": 3.849348593217788e-07,
"loss": 0.2085,
"step": 356000
},
{
"epoch": 0.99,
"learning_rate": 3.1525077906801726e-07,
"loss": 0.1994,
"step": 356500
},
{
"epoch": 1.0,
"learning_rate": 2.455666988142557e-07,
"loss": 0.216,
"step": 357000
},
{
"epoch": 1.0,
"learning_rate": 1.7588261856049415e-07,
"loss": 0.2123,
"step": 357500
},
{
"epoch": 1.0,
"learning_rate": 1.061985383067326e-07,
"loss": 0.2063,
"step": 358000
},
{
"epoch": 1.0,
"learning_rate": 3.651445805297105e-08,
"loss": 0.2098,
"step": 358500
}
],
"logging_steps": 500,
"max_steps": 358762,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 1.26740447502336e+17,
"trial_name": null,
"trial_params": null
}