toy_retnet_1.3b_pretrain / trainer_state.json
wac81's picture
Upload folder using huggingface_hub
8f7942f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9058960565963444,
"global_step": 301500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.99749613653868e-05,
"loss": 4.5704,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.9949922730773584e-05,
"loss": 3.9367,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.992488409616038e-05,
"loss": 4.3169,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.9899845461547165e-05,
"loss": 4.4226,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.987480682693396e-05,
"loss": 4.3777,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.984976819232076e-05,
"loss": 4.3272,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.982472955770754e-05,
"loss": 4.2964,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.979969092309434e-05,
"loss": 4.2587,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.9774652288481124e-05,
"loss": 4.2217,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.974961365386792e-05,
"loss": 4.2056,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.972457501925471e-05,
"loss": 4.2108,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.96995363846415e-05,
"loss": 4.135,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.96744977500283e-05,
"loss": 4.1552,
"step": 6500
},
{
"epoch": 0.02,
"learning_rate": 4.9649459115415084e-05,
"loss": 4.0666,
"step": 7000
},
{
"epoch": 0.02,
"learning_rate": 4.962442048080188e-05,
"loss": 4.1164,
"step": 7500
},
{
"epoch": 0.02,
"learning_rate": 4.959938184618867e-05,
"loss": 4.0798,
"step": 8000
},
{
"epoch": 0.03,
"learning_rate": 4.957434321157546e-05,
"loss": 4.0284,
"step": 8500
},
{
"epoch": 0.03,
"learning_rate": 4.954930457696226e-05,
"loss": 4.046,
"step": 9000
},
{
"epoch": 0.03,
"learning_rate": 4.9524265942349043e-05,
"loss": 4.0595,
"step": 9500
},
{
"epoch": 0.03,
"learning_rate": 4.949922730773584e-05,
"loss": 4.0243,
"step": 10000
},
{
"epoch": 0.03,
"learning_rate": 4.947418867312263e-05,
"loss": 4.0064,
"step": 10500
},
{
"epoch": 0.03,
"learning_rate": 4.944915003850942e-05,
"loss": 3.9907,
"step": 11000
},
{
"epoch": 0.03,
"learning_rate": 4.942411140389621e-05,
"loss": 3.9488,
"step": 11500
},
{
"epoch": 0.04,
"learning_rate": 4.9399072769283e-05,
"loss": 3.95,
"step": 12000
},
{
"epoch": 0.04,
"learning_rate": 4.93740341346698e-05,
"loss": 3.9148,
"step": 12500
},
{
"epoch": 0.04,
"learning_rate": 4.934899550005659e-05,
"loss": 3.917,
"step": 13000
},
{
"epoch": 0.04,
"learning_rate": 4.932395686544338e-05,
"loss": 3.8958,
"step": 13500
},
{
"epoch": 0.04,
"learning_rate": 4.929891823083017e-05,
"loss": 3.9248,
"step": 14000
},
{
"epoch": 0.04,
"learning_rate": 4.927387959621696e-05,
"loss": 3.8752,
"step": 14500
},
{
"epoch": 0.05,
"learning_rate": 4.924884096160376e-05,
"loss": 3.8548,
"step": 15000
},
{
"epoch": 0.05,
"learning_rate": 4.922380232699055e-05,
"loss": 3.8882,
"step": 15500
},
{
"epoch": 0.05,
"learning_rate": 4.919876369237734e-05,
"loss": 3.8457,
"step": 16000
},
{
"epoch": 0.05,
"learning_rate": 4.917372505776413e-05,
"loss": 3.8472,
"step": 16500
},
{
"epoch": 0.05,
"learning_rate": 4.914868642315092e-05,
"loss": 3.8743,
"step": 17000
},
{
"epoch": 0.05,
"learning_rate": 4.912364778853771e-05,
"loss": 3.8541,
"step": 17500
},
{
"epoch": 0.05,
"learning_rate": 4.909860915392451e-05,
"loss": 3.823,
"step": 18000
},
{
"epoch": 0.06,
"learning_rate": 4.90735705193113e-05,
"loss": 3.841,
"step": 18500
},
{
"epoch": 0.06,
"learning_rate": 4.904853188469809e-05,
"loss": 3.8262,
"step": 19000
},
{
"epoch": 0.06,
"learning_rate": 4.902349325008488e-05,
"loss": 3.7779,
"step": 19500
},
{
"epoch": 0.06,
"learning_rate": 4.899845461547167e-05,
"loss": 3.7952,
"step": 20000
},
{
"epoch": 0.06,
"learning_rate": 4.897341598085847e-05,
"loss": 3.8115,
"step": 20500
},
{
"epoch": 0.06,
"learning_rate": 4.894837734624526e-05,
"loss": 3.7978,
"step": 21000
},
{
"epoch": 0.06,
"learning_rate": 4.892333871163205e-05,
"loss": 3.7757,
"step": 21500
},
{
"epoch": 0.07,
"learning_rate": 4.889830007701884e-05,
"loss": 3.7953,
"step": 22000
},
{
"epoch": 0.07,
"learning_rate": 4.887326144240563e-05,
"loss": 3.7988,
"step": 22500
},
{
"epoch": 0.07,
"learning_rate": 4.884822280779243e-05,
"loss": 3.779,
"step": 23000
},
{
"epoch": 0.07,
"learning_rate": 4.882318417317922e-05,
"loss": 3.7545,
"step": 23500
},
{
"epoch": 0.07,
"learning_rate": 4.879814553856601e-05,
"loss": 3.7502,
"step": 24000
},
{
"epoch": 0.07,
"learning_rate": 4.87731069039528e-05,
"loss": 3.762,
"step": 24500
},
{
"epoch": 0.08,
"learning_rate": 4.874806826933959e-05,
"loss": 3.7711,
"step": 25000
},
{
"epoch": 0.08,
"learning_rate": 4.872302963472639e-05,
"loss": 3.7314,
"step": 25500
},
{
"epoch": 0.08,
"learning_rate": 4.869799100011317e-05,
"loss": 3.7427,
"step": 26000
},
{
"epoch": 0.08,
"learning_rate": 4.867295236549997e-05,
"loss": 3.7024,
"step": 26500
},
{
"epoch": 0.08,
"learning_rate": 4.864791373088676e-05,
"loss": 3.7356,
"step": 27000
},
{
"epoch": 0.08,
"learning_rate": 4.862287509627355e-05,
"loss": 3.7117,
"step": 27500
},
{
"epoch": 0.08,
"learning_rate": 4.859783646166035e-05,
"loss": 3.7235,
"step": 28000
},
{
"epoch": 0.09,
"learning_rate": 4.857279782704713e-05,
"loss": 3.6791,
"step": 28500
},
{
"epoch": 0.09,
"learning_rate": 4.854775919243393e-05,
"loss": 3.696,
"step": 29000
},
{
"epoch": 0.09,
"learning_rate": 4.852272055782072e-05,
"loss": 3.6852,
"step": 29500
},
{
"epoch": 0.09,
"learning_rate": 4.849768192320751e-05,
"loss": 3.6896,
"step": 30000
},
{
"epoch": 0.09,
"learning_rate": 4.847264328859431e-05,
"loss": 3.6759,
"step": 30500
},
{
"epoch": 0.09,
"learning_rate": 4.844760465398109e-05,
"loss": 3.7012,
"step": 31000
},
{
"epoch": 0.09,
"learning_rate": 4.842256601936789e-05,
"loss": 3.6378,
"step": 31500
},
{
"epoch": 0.1,
"learning_rate": 4.839752738475467e-05,
"loss": 3.6746,
"step": 32000
},
{
"epoch": 0.1,
"learning_rate": 4.837248875014147e-05,
"loss": 3.68,
"step": 32500
},
{
"epoch": 0.1,
"learning_rate": 4.834745011552827e-05,
"loss": 3.6775,
"step": 33000
},
{
"epoch": 0.1,
"learning_rate": 4.832241148091505e-05,
"loss": 3.6471,
"step": 33500
},
{
"epoch": 0.1,
"learning_rate": 4.829737284630185e-05,
"loss": 3.662,
"step": 34000
},
{
"epoch": 0.1,
"learning_rate": 4.827233421168863e-05,
"loss": 3.6359,
"step": 34500
},
{
"epoch": 0.11,
"learning_rate": 4.824729557707543e-05,
"loss": 3.6494,
"step": 35000
},
{
"epoch": 0.11,
"learning_rate": 4.822225694246222e-05,
"loss": 3.6187,
"step": 35500
},
{
"epoch": 0.11,
"learning_rate": 4.819721830784901e-05,
"loss": 3.6115,
"step": 36000
},
{
"epoch": 0.11,
"learning_rate": 4.817217967323581e-05,
"loss": 3.6062,
"step": 36500
},
{
"epoch": 0.11,
"learning_rate": 4.814714103862259e-05,
"loss": 3.6109,
"step": 37000
},
{
"epoch": 0.11,
"learning_rate": 4.812210240400939e-05,
"loss": 3.6239,
"step": 37500
},
{
"epoch": 0.11,
"learning_rate": 4.809706376939618e-05,
"loss": 3.646,
"step": 38000
},
{
"epoch": 0.12,
"learning_rate": 4.807202513478297e-05,
"loss": 3.5898,
"step": 38500
},
{
"epoch": 0.12,
"learning_rate": 4.804698650016977e-05,
"loss": 3.6008,
"step": 39000
},
{
"epoch": 0.12,
"learning_rate": 4.802194786555655e-05,
"loss": 3.6265,
"step": 39500
},
{
"epoch": 0.12,
"learning_rate": 4.799690923094335e-05,
"loss": 3.6111,
"step": 40000
},
{
"epoch": 0.12,
"learning_rate": 4.797187059633014e-05,
"loss": 3.5766,
"step": 40500
},
{
"epoch": 0.12,
"learning_rate": 4.794683196171693e-05,
"loss": 3.5916,
"step": 41000
},
{
"epoch": 0.12,
"learning_rate": 4.792179332710373e-05,
"loss": 3.641,
"step": 41500
},
{
"epoch": 0.13,
"learning_rate": 4.789675469249051e-05,
"loss": 3.5743,
"step": 42000
},
{
"epoch": 0.13,
"learning_rate": 4.787171605787731e-05,
"loss": 3.6153,
"step": 42500
},
{
"epoch": 0.13,
"learning_rate": 4.78466774232641e-05,
"loss": 3.6073,
"step": 43000
},
{
"epoch": 0.13,
"learning_rate": 4.782163878865089e-05,
"loss": 3.5881,
"step": 43500
},
{
"epoch": 0.13,
"learning_rate": 4.779660015403769e-05,
"loss": 3.5649,
"step": 44000
},
{
"epoch": 0.13,
"learning_rate": 4.777156151942447e-05,
"loss": 3.5602,
"step": 44500
},
{
"epoch": 0.14,
"learning_rate": 4.774652288481127e-05,
"loss": 3.5707,
"step": 45000
},
{
"epoch": 0.14,
"learning_rate": 4.772148425019806e-05,
"loss": 3.5546,
"step": 45500
},
{
"epoch": 0.14,
"learning_rate": 4.769644561558485e-05,
"loss": 3.5327,
"step": 46000
},
{
"epoch": 0.14,
"learning_rate": 4.767140698097164e-05,
"loss": 3.5489,
"step": 46500
},
{
"epoch": 0.14,
"learning_rate": 4.764636834635843e-05,
"loss": 3.5559,
"step": 47000
},
{
"epoch": 0.14,
"learning_rate": 4.762132971174523e-05,
"loss": 3.5514,
"step": 47500
},
{
"epoch": 0.14,
"learning_rate": 4.759629107713202e-05,
"loss": 3.5295,
"step": 48000
},
{
"epoch": 0.15,
"learning_rate": 4.757125244251881e-05,
"loss": 3.5198,
"step": 48500
},
{
"epoch": 0.15,
"learning_rate": 4.75462138079056e-05,
"loss": 3.5231,
"step": 49000
},
{
"epoch": 0.15,
"learning_rate": 4.752117517329239e-05,
"loss": 3.5695,
"step": 49500
},
{
"epoch": 0.15,
"learning_rate": 4.749613653867919e-05,
"loss": 3.581,
"step": 50000
},
{
"epoch": 0.15,
"learning_rate": 4.747109790406598e-05,
"loss": 3.5228,
"step": 50500
},
{
"epoch": 0.15,
"learning_rate": 4.744605926945277e-05,
"loss": 3.5382,
"step": 51000
},
{
"epoch": 0.15,
"learning_rate": 4.742102063483956e-05,
"loss": 3.5384,
"step": 51500
},
{
"epoch": 0.16,
"learning_rate": 4.739598200022635e-05,
"loss": 3.4945,
"step": 52000
},
{
"epoch": 0.16,
"learning_rate": 4.737094336561314e-05,
"loss": 3.503,
"step": 52500
},
{
"epoch": 0.16,
"learning_rate": 4.734590473099994e-05,
"loss": 3.523,
"step": 53000
},
{
"epoch": 0.16,
"learning_rate": 4.732086609638673e-05,
"loss": 3.5115,
"step": 53500
},
{
"epoch": 0.16,
"learning_rate": 4.729582746177352e-05,
"loss": 3.5014,
"step": 54000
},
{
"epoch": 0.16,
"learning_rate": 4.727078882716031e-05,
"loss": 3.5253,
"step": 54500
},
{
"epoch": 0.17,
"learning_rate": 4.72457501925471e-05,
"loss": 3.4913,
"step": 55000
},
{
"epoch": 0.17,
"learning_rate": 4.72207115579339e-05,
"loss": 3.5152,
"step": 55500
},
{
"epoch": 0.17,
"learning_rate": 4.719567292332069e-05,
"loss": 3.4847,
"step": 56000
},
{
"epoch": 0.17,
"learning_rate": 4.717063428870748e-05,
"loss": 3.5086,
"step": 56500
},
{
"epoch": 0.17,
"learning_rate": 4.714559565409427e-05,
"loss": 3.5071,
"step": 57000
},
{
"epoch": 0.17,
"learning_rate": 4.712055701948106e-05,
"loss": 3.4918,
"step": 57500
},
{
"epoch": 0.17,
"learning_rate": 4.709551838486786e-05,
"loss": 3.4677,
"step": 58000
},
{
"epoch": 0.18,
"learning_rate": 4.707047975025464e-05,
"loss": 3.4594,
"step": 58500
},
{
"epoch": 0.18,
"learning_rate": 4.704544111564144e-05,
"loss": 3.5236,
"step": 59000
},
{
"epoch": 0.18,
"learning_rate": 4.702040248102823e-05,
"loss": 3.4743,
"step": 59500
},
{
"epoch": 0.18,
"learning_rate": 4.699536384641502e-05,
"loss": 3.478,
"step": 60000
},
{
"epoch": 0.18,
"learning_rate": 4.6970325211801817e-05,
"loss": 3.458,
"step": 60500
},
{
"epoch": 0.18,
"learning_rate": 4.69452865771886e-05,
"loss": 3.464,
"step": 61000
},
{
"epoch": 0.18,
"learning_rate": 4.69202479425754e-05,
"loss": 3.4501,
"step": 61500
},
{
"epoch": 0.19,
"learning_rate": 4.689520930796219e-05,
"loss": 3.4772,
"step": 62000
},
{
"epoch": 0.19,
"learning_rate": 4.687017067334898e-05,
"loss": 3.4522,
"step": 62500
},
{
"epoch": 0.19,
"learning_rate": 4.6845132038735776e-05,
"loss": 3.4415,
"step": 63000
},
{
"epoch": 0.19,
"learning_rate": 4.682009340412256e-05,
"loss": 3.4953,
"step": 63500
},
{
"epoch": 0.19,
"learning_rate": 4.679505476950936e-05,
"loss": 3.4713,
"step": 64000
},
{
"epoch": 0.19,
"learning_rate": 4.677001613489615e-05,
"loss": 3.4588,
"step": 64500
},
{
"epoch": 0.2,
"learning_rate": 4.674497750028294e-05,
"loss": 3.4765,
"step": 65000
},
{
"epoch": 0.2,
"learning_rate": 4.671993886566973e-05,
"loss": 3.4408,
"step": 65500
},
{
"epoch": 0.2,
"learning_rate": 4.669490023105652e-05,
"loss": 3.4454,
"step": 66000
},
{
"epoch": 0.2,
"learning_rate": 4.666986159644332e-05,
"loss": 3.4629,
"step": 66500
},
{
"epoch": 0.2,
"learning_rate": 4.66448229618301e-05,
"loss": 3.4751,
"step": 67000
},
{
"epoch": 0.2,
"learning_rate": 4.66197843272169e-05,
"loss": 3.446,
"step": 67500
},
{
"epoch": 0.2,
"learning_rate": 4.659474569260369e-05,
"loss": 3.4132,
"step": 68000
},
{
"epoch": 0.21,
"learning_rate": 4.656970705799048e-05,
"loss": 3.4266,
"step": 68500
},
{
"epoch": 0.21,
"learning_rate": 4.6544668423377277e-05,
"loss": 3.4197,
"step": 69000
},
{
"epoch": 0.21,
"learning_rate": 4.651962978876406e-05,
"loss": 3.444,
"step": 69500
},
{
"epoch": 0.21,
"learning_rate": 4.649459115415086e-05,
"loss": 3.4453,
"step": 70000
},
{
"epoch": 0.21,
"learning_rate": 4.646955251953765e-05,
"loss": 3.4215,
"step": 70500
},
{
"epoch": 0.21,
"learning_rate": 4.644451388492444e-05,
"loss": 3.4524,
"step": 71000
},
{
"epoch": 0.21,
"learning_rate": 4.6419475250311236e-05,
"loss": 3.4432,
"step": 71500
},
{
"epoch": 0.22,
"learning_rate": 4.639443661569802e-05,
"loss": 3.4105,
"step": 72000
},
{
"epoch": 0.22,
"learning_rate": 4.636939798108482e-05,
"loss": 3.415,
"step": 72500
},
{
"epoch": 0.22,
"learning_rate": 4.634435934647161e-05,
"loss": 3.4441,
"step": 73000
},
{
"epoch": 0.22,
"learning_rate": 4.63193207118584e-05,
"loss": 3.4117,
"step": 73500
},
{
"epoch": 0.22,
"learning_rate": 4.6294282077245196e-05,
"loss": 3.4101,
"step": 74000
},
{
"epoch": 0.22,
"learning_rate": 4.626924344263198e-05,
"loss": 3.3981,
"step": 74500
},
{
"epoch": 0.23,
"learning_rate": 4.624420480801878e-05,
"loss": 3.4246,
"step": 75000
},
{
"epoch": 0.23,
"learning_rate": 4.621916617340557e-05,
"loss": 3.423,
"step": 75500
},
{
"epoch": 0.23,
"learning_rate": 4.619412753879236e-05,
"loss": 3.4062,
"step": 76000
},
{
"epoch": 0.23,
"learning_rate": 4.6169088904179155e-05,
"loss": 3.4059,
"step": 76500
},
{
"epoch": 0.23,
"learning_rate": 4.614405026956594e-05,
"loss": 3.3988,
"step": 77000
},
{
"epoch": 0.23,
"learning_rate": 4.6119011634952736e-05,
"loss": 3.4308,
"step": 77500
},
{
"epoch": 0.23,
"learning_rate": 4.609397300033953e-05,
"loss": 3.3904,
"step": 78000
},
{
"epoch": 0.24,
"learning_rate": 4.606893436572632e-05,
"loss": 3.3847,
"step": 78500
},
{
"epoch": 0.24,
"learning_rate": 4.6043895731113115e-05,
"loss": 3.3941,
"step": 79000
},
{
"epoch": 0.24,
"learning_rate": 4.60188570964999e-05,
"loss": 3.3834,
"step": 79500
},
{
"epoch": 0.24,
"learning_rate": 4.5993818461886696e-05,
"loss": 3.4271,
"step": 80000
},
{
"epoch": 0.24,
"learning_rate": 4.596877982727349e-05,
"loss": 3.3714,
"step": 80500
},
{
"epoch": 0.24,
"learning_rate": 4.594374119266028e-05,
"loss": 3.3709,
"step": 81000
},
{
"epoch": 0.24,
"learning_rate": 4.591870255804707e-05,
"loss": 3.4005,
"step": 81500
},
{
"epoch": 0.25,
"learning_rate": 4.589366392343386e-05,
"loss": 3.3783,
"step": 82000
},
{
"epoch": 0.25,
"learning_rate": 4.5868625288820656e-05,
"loss": 3.385,
"step": 82500
},
{
"epoch": 0.25,
"learning_rate": 4.5843586654207446e-05,
"loss": 3.3812,
"step": 83000
},
{
"epoch": 0.25,
"learning_rate": 4.581854801959424e-05,
"loss": 3.362,
"step": 83500
},
{
"epoch": 0.25,
"learning_rate": 4.579350938498103e-05,
"loss": 3.3715,
"step": 84000
},
{
"epoch": 0.25,
"learning_rate": 4.576847075036782e-05,
"loss": 3.387,
"step": 84500
},
{
"epoch": 0.26,
"learning_rate": 4.5743432115754615e-05,
"loss": 3.3741,
"step": 85000
},
{
"epoch": 0.26,
"learning_rate": 4.5718393481141406e-05,
"loss": 3.3461,
"step": 85500
},
{
"epoch": 0.26,
"learning_rate": 4.5693354846528196e-05,
"loss": 3.3659,
"step": 86000
},
{
"epoch": 0.26,
"learning_rate": 4.566831621191499e-05,
"loss": 3.3693,
"step": 86500
},
{
"epoch": 0.26,
"learning_rate": 4.564327757730178e-05,
"loss": 3.3776,
"step": 87000
},
{
"epoch": 0.26,
"learning_rate": 4.561823894268857e-05,
"loss": 3.3391,
"step": 87500
},
{
"epoch": 0.26,
"learning_rate": 4.5593200308075365e-05,
"loss": 3.3516,
"step": 88000
},
{
"epoch": 0.27,
"learning_rate": 4.5568161673462156e-05,
"loss": 3.3808,
"step": 88500
},
{
"epoch": 0.27,
"learning_rate": 4.5543123038848947e-05,
"loss": 3.3933,
"step": 89000
},
{
"epoch": 0.27,
"learning_rate": 4.551808440423574e-05,
"loss": 3.3717,
"step": 89500
},
{
"epoch": 0.27,
"learning_rate": 4.549304576962253e-05,
"loss": 3.3625,
"step": 90000
},
{
"epoch": 0.27,
"learning_rate": 4.5468007135009325e-05,
"loss": 3.3391,
"step": 90500
},
{
"epoch": 0.27,
"learning_rate": 4.5442968500396116e-05,
"loss": 3.3361,
"step": 91000
},
{
"epoch": 0.27,
"learning_rate": 4.5417929865782906e-05,
"loss": 3.3201,
"step": 91500
},
{
"epoch": 0.28,
"learning_rate": 4.53928912311697e-05,
"loss": 3.3444,
"step": 92000
},
{
"epoch": 0.28,
"learning_rate": 4.536785259655649e-05,
"loss": 3.3624,
"step": 92500
},
{
"epoch": 0.28,
"learning_rate": 4.5342813961943285e-05,
"loss": 3.3289,
"step": 93000
},
{
"epoch": 0.28,
"learning_rate": 4.531777532733007e-05,
"loss": 3.342,
"step": 93500
},
{
"epoch": 0.28,
"learning_rate": 4.5292736692716866e-05,
"loss": 3.3372,
"step": 94000
},
{
"epoch": 0.28,
"learning_rate": 4.5267698058103656e-05,
"loss": 3.3458,
"step": 94500
},
{
"epoch": 0.29,
"learning_rate": 4.524265942349045e-05,
"loss": 3.386,
"step": 95000
},
{
"epoch": 0.29,
"learning_rate": 4.521762078887724e-05,
"loss": 3.336,
"step": 95500
},
{
"epoch": 0.29,
"learning_rate": 4.519258215426403e-05,
"loss": 3.3324,
"step": 96000
},
{
"epoch": 0.29,
"learning_rate": 4.5167543519650825e-05,
"loss": 3.3054,
"step": 96500
},
{
"epoch": 0.29,
"learning_rate": 4.5142504885037616e-05,
"loss": 3.3409,
"step": 97000
},
{
"epoch": 0.29,
"learning_rate": 4.5117466250424406e-05,
"loss": 3.3233,
"step": 97500
},
{
"epoch": 0.29,
"learning_rate": 4.50924276158112e-05,
"loss": 3.3296,
"step": 98000
},
{
"epoch": 0.3,
"learning_rate": 4.506738898119799e-05,
"loss": 3.3135,
"step": 98500
},
{
"epoch": 0.3,
"learning_rate": 4.5042350346584785e-05,
"loss": 3.3307,
"step": 99000
},
{
"epoch": 0.3,
"learning_rate": 4.5017311711971576e-05,
"loss": 3.3117,
"step": 99500
},
{
"epoch": 0.3,
"learning_rate": 4.4992273077358366e-05,
"loss": 3.3407,
"step": 100000
},
{
"epoch": 0.3,
"learning_rate": 4.496723444274516e-05,
"loss": 3.3376,
"step": 100500
},
{
"epoch": 0.3,
"learning_rate": 4.494219580813195e-05,
"loss": 3.3178,
"step": 101000
},
{
"epoch": 0.3,
"learning_rate": 4.4917157173518745e-05,
"loss": 3.3416,
"step": 101500
},
{
"epoch": 0.31,
"learning_rate": 4.489211853890553e-05,
"loss": 3.2907,
"step": 102000
},
{
"epoch": 0.31,
"learning_rate": 4.4867079904292326e-05,
"loss": 3.3315,
"step": 102500
},
{
"epoch": 0.31,
"learning_rate": 4.4842041269679116e-05,
"loss": 3.3199,
"step": 103000
},
{
"epoch": 0.31,
"learning_rate": 4.481700263506591e-05,
"loss": 3.3082,
"step": 103500
},
{
"epoch": 0.31,
"learning_rate": 4.4791964000452704e-05,
"loss": 3.3224,
"step": 104000
},
{
"epoch": 0.31,
"learning_rate": 4.476692536583949e-05,
"loss": 3.3023,
"step": 104500
},
{
"epoch": 0.32,
"learning_rate": 4.4741886731226285e-05,
"loss": 3.3068,
"step": 105000
},
{
"epoch": 0.32,
"learning_rate": 4.4716848096613076e-05,
"loss": 3.3125,
"step": 105500
},
{
"epoch": 0.32,
"learning_rate": 4.4691809461999866e-05,
"loss": 3.3026,
"step": 106000
},
{
"epoch": 0.32,
"learning_rate": 4.4666770827386664e-05,
"loss": 3.302,
"step": 106500
},
{
"epoch": 0.32,
"learning_rate": 4.464173219277345e-05,
"loss": 3.299,
"step": 107000
},
{
"epoch": 0.32,
"learning_rate": 4.4616693558160245e-05,
"loss": 3.2802,
"step": 107500
},
{
"epoch": 0.32,
"learning_rate": 4.4591654923547035e-05,
"loss": 3.2993,
"step": 108000
},
{
"epoch": 0.33,
"learning_rate": 4.4566616288933826e-05,
"loss": 3.2822,
"step": 108500
},
{
"epoch": 0.33,
"learning_rate": 4.454157765432062e-05,
"loss": 3.2963,
"step": 109000
},
{
"epoch": 0.33,
"learning_rate": 4.451653901970741e-05,
"loss": 3.3129,
"step": 109500
},
{
"epoch": 0.33,
"learning_rate": 4.4491500385094204e-05,
"loss": 3.3025,
"step": 110000
},
{
"epoch": 0.33,
"learning_rate": 4.4466461750480995e-05,
"loss": 3.3245,
"step": 110500
},
{
"epoch": 0.33,
"learning_rate": 4.4441423115867786e-05,
"loss": 3.2676,
"step": 111000
},
{
"epoch": 0.34,
"learning_rate": 4.441638448125458e-05,
"loss": 3.317,
"step": 111500
},
{
"epoch": 0.34,
"learning_rate": 4.439134584664137e-05,
"loss": 3.3008,
"step": 112000
},
{
"epoch": 0.34,
"learning_rate": 4.4366307212028164e-05,
"loss": 3.3093,
"step": 112500
},
{
"epoch": 0.34,
"learning_rate": 4.4341268577414955e-05,
"loss": 3.3066,
"step": 113000
},
{
"epoch": 0.34,
"learning_rate": 4.4316229942801745e-05,
"loss": 3.2671,
"step": 113500
},
{
"epoch": 0.34,
"learning_rate": 4.4291191308188536e-05,
"loss": 3.2873,
"step": 114000
},
{
"epoch": 0.34,
"learning_rate": 4.4266152673575326e-05,
"loss": 3.2857,
"step": 114500
},
{
"epoch": 0.35,
"learning_rate": 4.4241114038962124e-05,
"loss": 3.2992,
"step": 115000
},
{
"epoch": 0.35,
"learning_rate": 4.4216075404348914e-05,
"loss": 3.2959,
"step": 115500
},
{
"epoch": 0.35,
"learning_rate": 4.4191036769735705e-05,
"loss": 3.2688,
"step": 116000
},
{
"epoch": 0.35,
"learning_rate": 4.4165998135122495e-05,
"loss": 3.2958,
"step": 116500
},
{
"epoch": 0.35,
"learning_rate": 4.4140959500509286e-05,
"loss": 3.2933,
"step": 117000
},
{
"epoch": 0.35,
"learning_rate": 4.411592086589608e-05,
"loss": 3.2796,
"step": 117500
},
{
"epoch": 0.35,
"learning_rate": 4.4090882231282874e-05,
"loss": 3.2577,
"step": 118000
},
{
"epoch": 0.36,
"learning_rate": 4.4065843596669664e-05,
"loss": 3.2797,
"step": 118500
},
{
"epoch": 0.36,
"learning_rate": 4.4040804962056455e-05,
"loss": 3.3004,
"step": 119000
},
{
"epoch": 0.36,
"learning_rate": 4.4015766327443246e-05,
"loss": 3.2539,
"step": 119500
},
{
"epoch": 0.36,
"learning_rate": 4.399072769283004e-05,
"loss": 3.2525,
"step": 120000
},
{
"epoch": 0.36,
"learning_rate": 4.3965689058216833e-05,
"loss": 3.2668,
"step": 120500
},
{
"epoch": 0.36,
"learning_rate": 4.3940650423603624e-05,
"loss": 3.2693,
"step": 121000
},
{
"epoch": 0.37,
"learning_rate": 4.3915611788990415e-05,
"loss": 3.2792,
"step": 121500
},
{
"epoch": 0.37,
"learning_rate": 4.3890573154377205e-05,
"loss": 3.2649,
"step": 122000
},
{
"epoch": 0.37,
"learning_rate": 4.3865534519763996e-05,
"loss": 3.2556,
"step": 122500
},
{
"epoch": 0.37,
"learning_rate": 4.3840495885150786e-05,
"loss": 3.2509,
"step": 123000
},
{
"epoch": 0.37,
"learning_rate": 4.3815457250537584e-05,
"loss": 3.2616,
"step": 123500
},
{
"epoch": 0.37,
"learning_rate": 4.3790418615924374e-05,
"loss": 3.2654,
"step": 124000
},
{
"epoch": 0.37,
"learning_rate": 4.3765379981311165e-05,
"loss": 3.2628,
"step": 124500
},
{
"epoch": 0.38,
"learning_rate": 4.3740341346697955e-05,
"loss": 3.2462,
"step": 125000
},
{
"epoch": 0.38,
"learning_rate": 4.3715302712084746e-05,
"loss": 3.2517,
"step": 125500
},
{
"epoch": 0.38,
"learning_rate": 4.369026407747154e-05,
"loss": 3.2897,
"step": 126000
},
{
"epoch": 0.38,
"learning_rate": 4.3665225442858334e-05,
"loss": 3.2707,
"step": 126500
},
{
"epoch": 0.38,
"learning_rate": 4.3640186808245124e-05,
"loss": 3.269,
"step": 127000
},
{
"epoch": 0.38,
"learning_rate": 4.3615148173631915e-05,
"loss": 3.2331,
"step": 127500
},
{
"epoch": 0.38,
"learning_rate": 4.3590109539018705e-05,
"loss": 3.2644,
"step": 128000
},
{
"epoch": 0.39,
"learning_rate": 4.3565070904405496e-05,
"loss": 3.2406,
"step": 128500
},
{
"epoch": 0.39,
"learning_rate": 4.354003226979229e-05,
"loss": 3.2499,
"step": 129000
},
{
"epoch": 0.39,
"learning_rate": 4.3514993635179084e-05,
"loss": 3.2325,
"step": 129500
},
{
"epoch": 0.39,
"learning_rate": 4.3489955000565875e-05,
"loss": 3.2631,
"step": 130000
},
{
"epoch": 0.39,
"learning_rate": 4.3464916365952665e-05,
"loss": 3.2447,
"step": 130500
},
{
"epoch": 0.39,
"learning_rate": 4.3439877731339456e-05,
"loss": 3.2349,
"step": 131000
},
{
"epoch": 0.4,
"learning_rate": 4.341483909672625e-05,
"loss": 3.2372,
"step": 131500
},
{
"epoch": 0.4,
"learning_rate": 4.3389800462113044e-05,
"loss": 3.2456,
"step": 132000
},
{
"epoch": 0.4,
"learning_rate": 4.3364761827499834e-05,
"loss": 3.2477,
"step": 132500
},
{
"epoch": 0.4,
"learning_rate": 4.3339723192886625e-05,
"loss": 3.243,
"step": 133000
},
{
"epoch": 0.4,
"learning_rate": 4.3314684558273415e-05,
"loss": 3.2287,
"step": 133500
},
{
"epoch": 0.4,
"learning_rate": 4.328964592366021e-05,
"loss": 3.2613,
"step": 134000
},
{
"epoch": 0.4,
"learning_rate": 4.3264607289046996e-05,
"loss": 3.2242,
"step": 134500
},
{
"epoch": 0.41,
"learning_rate": 4.3239568654433794e-05,
"loss": 3.2481,
"step": 135000
},
{
"epoch": 0.41,
"learning_rate": 4.3214530019820584e-05,
"loss": 3.2482,
"step": 135500
},
{
"epoch": 0.41,
"learning_rate": 4.3189491385207375e-05,
"loss": 3.2439,
"step": 136000
},
{
"epoch": 0.41,
"learning_rate": 4.316445275059417e-05,
"loss": 3.2321,
"step": 136500
},
{
"epoch": 0.41,
"learning_rate": 4.3139414115980956e-05,
"loss": 3.2422,
"step": 137000
},
{
"epoch": 0.41,
"learning_rate": 4.311437548136775e-05,
"loss": 3.249,
"step": 137500
},
{
"epoch": 0.41,
"learning_rate": 4.3089336846754544e-05,
"loss": 3.2349,
"step": 138000
},
{
"epoch": 0.42,
"learning_rate": 4.3064298212141334e-05,
"loss": 3.218,
"step": 138500
},
{
"epoch": 0.42,
"learning_rate": 4.303925957752813e-05,
"loss": 3.2232,
"step": 139000
},
{
"epoch": 0.42,
"learning_rate": 4.3014220942914916e-05,
"loss": 3.2183,
"step": 139500
},
{
"epoch": 0.42,
"learning_rate": 4.298918230830171e-05,
"loss": 3.2547,
"step": 140000
},
{
"epoch": 0.42,
"learning_rate": 4.2964143673688503e-05,
"loss": 3.2168,
"step": 140500
},
{
"epoch": 0.42,
"learning_rate": 4.2939105039075294e-05,
"loss": 3.2378,
"step": 141000
},
{
"epoch": 0.43,
"learning_rate": 4.291406640446209e-05,
"loss": 3.2035,
"step": 141500
},
{
"epoch": 0.43,
"learning_rate": 4.2889027769848875e-05,
"loss": 3.2112,
"step": 142000
},
{
"epoch": 0.43,
"learning_rate": 4.286398913523567e-05,
"loss": 3.1962,
"step": 142500
},
{
"epoch": 0.43,
"learning_rate": 4.283895050062246e-05,
"loss": 3.2278,
"step": 143000
},
{
"epoch": 0.43,
"learning_rate": 4.2813911866009254e-05,
"loss": 3.2453,
"step": 143500
},
{
"epoch": 0.43,
"learning_rate": 4.278887323139605e-05,
"loss": 3.2265,
"step": 144000
},
{
"epoch": 0.43,
"learning_rate": 4.2763834596782835e-05,
"loss": 3.1941,
"step": 144500
},
{
"epoch": 0.44,
"learning_rate": 4.273879596216963e-05,
"loss": 3.2519,
"step": 145000
},
{
"epoch": 0.44,
"learning_rate": 4.271375732755642e-05,
"loss": 3.1941,
"step": 145500
},
{
"epoch": 0.44,
"learning_rate": 4.268871869294321e-05,
"loss": 3.2421,
"step": 146000
},
{
"epoch": 0.44,
"learning_rate": 4.266368005833001e-05,
"loss": 3.2396,
"step": 146500
},
{
"epoch": 0.44,
"learning_rate": 4.2638641423716794e-05,
"loss": 3.2021,
"step": 147000
},
{
"epoch": 0.44,
"learning_rate": 4.261360278910359e-05,
"loss": 3.2229,
"step": 147500
},
{
"epoch": 0.44,
"learning_rate": 4.258856415449038e-05,
"loss": 3.2251,
"step": 148000
},
{
"epoch": 0.45,
"learning_rate": 4.256352551987717e-05,
"loss": 3.1941,
"step": 148500
},
{
"epoch": 0.45,
"learning_rate": 4.2538486885263963e-05,
"loss": 3.2002,
"step": 149000
},
{
"epoch": 0.45,
"learning_rate": 4.2513448250650754e-05,
"loss": 3.2177,
"step": 149500
},
{
"epoch": 0.45,
"learning_rate": 4.248840961603755e-05,
"loss": 3.2386,
"step": 150000
},
{
"epoch": 0.45,
"learning_rate": 4.246337098142434e-05,
"loss": 3.1961,
"step": 150500
},
{
"epoch": 0.45,
"learning_rate": 4.243833234681113e-05,
"loss": 3.1943,
"step": 151000
},
{
"epoch": 0.46,
"learning_rate": 4.241329371219792e-05,
"loss": 3.1818,
"step": 151500
},
{
"epoch": 0.46,
"learning_rate": 4.2388255077584714e-05,
"loss": 3.2139,
"step": 152000
},
{
"epoch": 0.46,
"learning_rate": 4.236321644297151e-05,
"loss": 3.1935,
"step": 152500
},
{
"epoch": 0.46,
"learning_rate": 4.2338177808358295e-05,
"loss": 3.206,
"step": 153000
},
{
"epoch": 0.46,
"learning_rate": 4.231313917374509e-05,
"loss": 3.2269,
"step": 153500
},
{
"epoch": 0.46,
"learning_rate": 4.228810053913188e-05,
"loss": 3.2269,
"step": 154000
},
{
"epoch": 0.46,
"learning_rate": 4.226306190451867e-05,
"loss": 3.1957,
"step": 154500
},
{
"epoch": 0.47,
"learning_rate": 4.2238023269905464e-05,
"loss": 3.2306,
"step": 155000
},
{
"epoch": 0.47,
"learning_rate": 4.2212984635292254e-05,
"loss": 3.224,
"step": 155500
},
{
"epoch": 0.47,
"learning_rate": 4.218794600067905e-05,
"loss": 3.2183,
"step": 156000
},
{
"epoch": 0.47,
"learning_rate": 4.216290736606584e-05,
"loss": 3.2007,
"step": 156500
},
{
"epoch": 0.47,
"learning_rate": 4.213786873145263e-05,
"loss": 3.1717,
"step": 157000
},
{
"epoch": 0.47,
"learning_rate": 4.211283009683942e-05,
"loss": 3.1949,
"step": 157500
},
{
"epoch": 0.47,
"learning_rate": 4.2087791462226214e-05,
"loss": 3.1852,
"step": 158000
},
{
"epoch": 0.48,
"learning_rate": 4.206275282761301e-05,
"loss": 3.1967,
"step": 158500
},
{
"epoch": 0.48,
"learning_rate": 4.20377141929998e-05,
"loss": 3.1857,
"step": 159000
},
{
"epoch": 0.48,
"learning_rate": 4.201267555838659e-05,
"loss": 3.184,
"step": 159500
},
{
"epoch": 0.48,
"learning_rate": 4.198763692377338e-05,
"loss": 3.1921,
"step": 160000
},
{
"epoch": 0.48,
"learning_rate": 4.1962598289160173e-05,
"loss": 3.1994,
"step": 160500
},
{
"epoch": 0.48,
"learning_rate": 4.193755965454697e-05,
"loss": 3.1853,
"step": 161000
},
{
"epoch": 0.49,
"learning_rate": 4.191252101993376e-05,
"loss": 3.207,
"step": 161500
},
{
"epoch": 0.49,
"learning_rate": 4.188748238532055e-05,
"loss": 3.1822,
"step": 162000
},
{
"epoch": 0.49,
"learning_rate": 4.186244375070734e-05,
"loss": 3.1786,
"step": 162500
},
{
"epoch": 0.49,
"learning_rate": 4.183740511609413e-05,
"loss": 3.1686,
"step": 163000
},
{
"epoch": 0.49,
"learning_rate": 4.1812366481480924e-05,
"loss": 3.2091,
"step": 163500
},
{
"epoch": 0.49,
"learning_rate": 4.178732784686772e-05,
"loss": 3.1702,
"step": 164000
},
{
"epoch": 0.49,
"learning_rate": 4.176228921225451e-05,
"loss": 3.1915,
"step": 164500
},
{
"epoch": 0.5,
"learning_rate": 4.17372505776413e-05,
"loss": 3.1912,
"step": 165000
},
{
"epoch": 0.5,
"learning_rate": 4.171221194302809e-05,
"loss": 3.2141,
"step": 165500
},
{
"epoch": 0.5,
"learning_rate": 4.168717330841488e-05,
"loss": 3.1828,
"step": 166000
},
{
"epoch": 0.5,
"learning_rate": 4.166213467380168e-05,
"loss": 3.1877,
"step": 166500
},
{
"epoch": 0.5,
"learning_rate": 4.163709603918847e-05,
"loss": 3.1859,
"step": 167000
},
{
"epoch": 0.5,
"learning_rate": 4.161205740457526e-05,
"loss": 3.1695,
"step": 167500
},
{
"epoch": 0.5,
"learning_rate": 4.158701876996205e-05,
"loss": 3.227,
"step": 168000
},
{
"epoch": 0.51,
"learning_rate": 4.156198013534884e-05,
"loss": 3.1862,
"step": 168500
},
{
"epoch": 0.51,
"learning_rate": 4.153694150073564e-05,
"loss": 3.2209,
"step": 169000
},
{
"epoch": 0.51,
"learning_rate": 4.1511902866122424e-05,
"loss": 3.1829,
"step": 169500
},
{
"epoch": 0.51,
"learning_rate": 4.148686423150922e-05,
"loss": 3.1981,
"step": 170000
},
{
"epoch": 0.51,
"learning_rate": 4.146182559689601e-05,
"loss": 3.1563,
"step": 170500
},
{
"epoch": 0.51,
"learning_rate": 4.14367869622828e-05,
"loss": 3.1743,
"step": 171000
},
{
"epoch": 0.52,
"learning_rate": 4.14117483276696e-05,
"loss": 3.1754,
"step": 171500
},
{
"epoch": 0.52,
"learning_rate": 4.1386709693056384e-05,
"loss": 3.1729,
"step": 172000
},
{
"epoch": 0.52,
"learning_rate": 4.136167105844318e-05,
"loss": 3.1476,
"step": 172500
},
{
"epoch": 0.52,
"learning_rate": 4.133663242382997e-05,
"loss": 3.1561,
"step": 173000
},
{
"epoch": 0.52,
"learning_rate": 4.131159378921676e-05,
"loss": 3.1844,
"step": 173500
},
{
"epoch": 0.52,
"learning_rate": 4.128655515460356e-05,
"loss": 3.1756,
"step": 174000
},
{
"epoch": 0.52,
"learning_rate": 4.126151651999034e-05,
"loss": 3.181,
"step": 174500
},
{
"epoch": 0.53,
"learning_rate": 4.123647788537714e-05,
"loss": 3.1479,
"step": 175000
},
{
"epoch": 0.53,
"learning_rate": 4.121143925076393e-05,
"loss": 3.1654,
"step": 175500
},
{
"epoch": 0.53,
"learning_rate": 4.118640061615072e-05,
"loss": 3.1903,
"step": 176000
},
{
"epoch": 0.53,
"learning_rate": 4.116136198153752e-05,
"loss": 3.1761,
"step": 176500
},
{
"epoch": 0.53,
"learning_rate": 4.11363233469243e-05,
"loss": 3.1882,
"step": 177000
},
{
"epoch": 0.53,
"learning_rate": 4.11112847123111e-05,
"loss": 3.175,
"step": 177500
},
{
"epoch": 0.53,
"learning_rate": 4.108624607769789e-05,
"loss": 3.1517,
"step": 178000
},
{
"epoch": 0.54,
"learning_rate": 4.106120744308468e-05,
"loss": 3.1663,
"step": 178500
},
{
"epoch": 0.54,
"learning_rate": 4.103616880847148e-05,
"loss": 3.1669,
"step": 179000
},
{
"epoch": 0.54,
"learning_rate": 4.101113017385826e-05,
"loss": 3.1764,
"step": 179500
},
{
"epoch": 0.54,
"learning_rate": 4.098609153924506e-05,
"loss": 3.1554,
"step": 180000
},
{
"epoch": 0.54,
"learning_rate": 4.096105290463185e-05,
"loss": 3.1488,
"step": 180500
},
{
"epoch": 0.54,
"learning_rate": 4.093601427001864e-05,
"loss": 3.1657,
"step": 181000
},
{
"epoch": 0.55,
"learning_rate": 4.091097563540544e-05,
"loss": 3.1415,
"step": 181500
},
{
"epoch": 0.55,
"learning_rate": 4.088593700079222e-05,
"loss": 3.1519,
"step": 182000
},
{
"epoch": 0.55,
"learning_rate": 4.086089836617902e-05,
"loss": 3.1763,
"step": 182500
},
{
"epoch": 0.55,
"learning_rate": 4.08358597315658e-05,
"loss": 3.1373,
"step": 183000
},
{
"epoch": 0.55,
"learning_rate": 4.08108210969526e-05,
"loss": 3.1619,
"step": 183500
},
{
"epoch": 0.55,
"learning_rate": 4.078578246233939e-05,
"loss": 3.1682,
"step": 184000
},
{
"epoch": 0.55,
"learning_rate": 4.076074382772618e-05,
"loss": 3.1474,
"step": 184500
},
{
"epoch": 0.56,
"learning_rate": 4.073570519311298e-05,
"loss": 3.2058,
"step": 185000
},
{
"epoch": 0.56,
"learning_rate": 4.071066655849976e-05,
"loss": 3.1558,
"step": 185500
},
{
"epoch": 0.56,
"learning_rate": 4.068562792388656e-05,
"loss": 3.1667,
"step": 186000
},
{
"epoch": 0.56,
"learning_rate": 4.066058928927335e-05,
"loss": 3.1724,
"step": 186500
},
{
"epoch": 0.56,
"learning_rate": 4.063555065466014e-05,
"loss": 3.1421,
"step": 187000
},
{
"epoch": 0.56,
"learning_rate": 4.061051202004694e-05,
"loss": 3.1602,
"step": 187500
},
{
"epoch": 0.56,
"learning_rate": 4.058547338543372e-05,
"loss": 3.1505,
"step": 188000
},
{
"epoch": 0.57,
"learning_rate": 4.056043475082052e-05,
"loss": 3.1426,
"step": 188500
},
{
"epoch": 0.57,
"learning_rate": 4.053539611620731e-05,
"loss": 3.1507,
"step": 189000
},
{
"epoch": 0.57,
"learning_rate": 4.05103574815941e-05,
"loss": 3.1579,
"step": 189500
},
{
"epoch": 0.57,
"learning_rate": 4.048531884698089e-05,
"loss": 3.1455,
"step": 190000
},
{
"epoch": 0.57,
"learning_rate": 4.046028021236768e-05,
"loss": 3.1776,
"step": 190500
},
{
"epoch": 0.57,
"learning_rate": 4.043524157775448e-05,
"loss": 3.1343,
"step": 191000
},
{
"epoch": 0.58,
"learning_rate": 4.041020294314127e-05,
"loss": 3.1487,
"step": 191500
},
{
"epoch": 0.58,
"learning_rate": 4.038516430852806e-05,
"loss": 3.15,
"step": 192000
},
{
"epoch": 0.58,
"learning_rate": 4.036012567391485e-05,
"loss": 3.1351,
"step": 192500
},
{
"epoch": 0.58,
"learning_rate": 4.033508703930164e-05,
"loss": 3.1242,
"step": 193000
},
{
"epoch": 0.58,
"learning_rate": 4.031004840468844e-05,
"loss": 3.1655,
"step": 193500
},
{
"epoch": 0.58,
"learning_rate": 4.028500977007523e-05,
"loss": 3.1386,
"step": 194000
},
{
"epoch": 0.58,
"learning_rate": 4.025997113546202e-05,
"loss": 3.1384,
"step": 194500
},
{
"epoch": 0.59,
"learning_rate": 4.023493250084881e-05,
"loss": 3.1433,
"step": 195000
},
{
"epoch": 0.59,
"learning_rate": 4.02098938662356e-05,
"loss": 3.1139,
"step": 195500
},
{
"epoch": 0.59,
"learning_rate": 4.018485523162239e-05,
"loss": 3.1579,
"step": 196000
},
{
"epoch": 0.59,
"learning_rate": 4.015981659700919e-05,
"loss": 3.1283,
"step": 196500
},
{
"epoch": 0.59,
"learning_rate": 4.013477796239598e-05,
"loss": 3.138,
"step": 197000
},
{
"epoch": 0.59,
"learning_rate": 4.010973932778277e-05,
"loss": 3.1325,
"step": 197500
},
{
"epoch": 0.59,
"learning_rate": 4.008470069316956e-05,
"loss": 3.1043,
"step": 198000
},
{
"epoch": 0.6,
"learning_rate": 4.005966205855635e-05,
"loss": 3.1572,
"step": 198500
},
{
"epoch": 0.6,
"learning_rate": 4.003462342394315e-05,
"loss": 3.1539,
"step": 199000
},
{
"epoch": 0.6,
"learning_rate": 4.000958478932994e-05,
"loss": 3.1408,
"step": 199500
},
{
"epoch": 0.6,
"learning_rate": 3.998454615471673e-05,
"loss": 3.1204,
"step": 200000
},
{
"epoch": 0.6,
"learning_rate": 3.995950752010352e-05,
"loss": 3.1164,
"step": 200500
},
{
"epoch": 0.6,
"learning_rate": 3.993446888549031e-05,
"loss": 3.1518,
"step": 201000
},
{
"epoch": 0.61,
"learning_rate": 3.990943025087711e-05,
"loss": 3.1399,
"step": 201500
},
{
"epoch": 0.61,
"learning_rate": 3.98843916162639e-05,
"loss": 3.116,
"step": 202000
},
{
"epoch": 0.61,
"learning_rate": 3.985935298165069e-05,
"loss": 3.1405,
"step": 202500
},
{
"epoch": 0.61,
"learning_rate": 3.983431434703748e-05,
"loss": 3.116,
"step": 203000
},
{
"epoch": 0.61,
"learning_rate": 3.980927571242427e-05,
"loss": 3.1357,
"step": 203500
},
{
"epoch": 0.61,
"learning_rate": 3.978423707781107e-05,
"loss": 3.149,
"step": 204000
},
{
"epoch": 0.61,
"learning_rate": 3.975919844319785e-05,
"loss": 3.1418,
"step": 204500
},
{
"epoch": 0.62,
"learning_rate": 3.973415980858465e-05,
"loss": 3.1229,
"step": 205000
},
{
"epoch": 0.62,
"learning_rate": 3.970912117397144e-05,
"loss": 3.141,
"step": 205500
},
{
"epoch": 0.62,
"learning_rate": 3.968408253935823e-05,
"loss": 3.1376,
"step": 206000
},
{
"epoch": 0.62,
"learning_rate": 3.965904390474503e-05,
"loss": 3.1284,
"step": 206500
},
{
"epoch": 0.62,
"learning_rate": 3.963400527013181e-05,
"loss": 3.1405,
"step": 207000
},
{
"epoch": 0.62,
"learning_rate": 3.960896663551861e-05,
"loss": 3.1543,
"step": 207500
},
{
"epoch": 0.62,
"learning_rate": 3.95839280009054e-05,
"loss": 3.1693,
"step": 208000
},
{
"epoch": 0.63,
"learning_rate": 3.955888936629219e-05,
"loss": 3.116,
"step": 208500
},
{
"epoch": 0.63,
"learning_rate": 3.953385073167899e-05,
"loss": 3.125,
"step": 209000
},
{
"epoch": 0.63,
"learning_rate": 3.950881209706577e-05,
"loss": 3.1362,
"step": 209500
},
{
"epoch": 0.63,
"learning_rate": 3.948377346245257e-05,
"loss": 3.0917,
"step": 210000
},
{
"epoch": 0.63,
"learning_rate": 3.945873482783936e-05,
"loss": 3.121,
"step": 210500
},
{
"epoch": 0.63,
"learning_rate": 3.943369619322615e-05,
"loss": 3.0947,
"step": 211000
},
{
"epoch": 0.64,
"learning_rate": 3.940865755861295e-05,
"loss": 3.1065,
"step": 211500
},
{
"epoch": 0.64,
"learning_rate": 3.938361892399973e-05,
"loss": 3.1274,
"step": 212000
},
{
"epoch": 0.64,
"learning_rate": 3.935858028938653e-05,
"loss": 3.1071,
"step": 212500
},
{
"epoch": 0.64,
"learning_rate": 3.933354165477331e-05,
"loss": 3.1405,
"step": 213000
},
{
"epoch": 0.64,
"learning_rate": 3.930850302016011e-05,
"loss": 3.1544,
"step": 213500
},
{
"epoch": 0.64,
"learning_rate": 3.9283464385546906e-05,
"loss": 3.1427,
"step": 214000
},
{
"epoch": 0.64,
"learning_rate": 3.925842575093369e-05,
"loss": 3.1102,
"step": 214500
},
{
"epoch": 0.65,
"learning_rate": 3.923338711632049e-05,
"loss": 3.1475,
"step": 215000
},
{
"epoch": 0.65,
"learning_rate": 3.920834848170727e-05,
"loss": 3.125,
"step": 215500
},
{
"epoch": 0.65,
"learning_rate": 3.918330984709407e-05,
"loss": 3.1,
"step": 216000
},
{
"epoch": 0.65,
"learning_rate": 3.9158271212480866e-05,
"loss": 3.1193,
"step": 216500
},
{
"epoch": 0.65,
"learning_rate": 3.913323257786765e-05,
"loss": 3.1084,
"step": 217000
},
{
"epoch": 0.65,
"learning_rate": 3.910819394325445e-05,
"loss": 3.1027,
"step": 217500
},
{
"epoch": 0.66,
"learning_rate": 3.908315530864123e-05,
"loss": 3.1362,
"step": 218000
},
{
"epoch": 0.66,
"learning_rate": 3.905811667402803e-05,
"loss": 3.1049,
"step": 218500
},
{
"epoch": 0.66,
"learning_rate": 3.903307803941482e-05,
"loss": 3.1278,
"step": 219000
},
{
"epoch": 0.66,
"learning_rate": 3.900803940480161e-05,
"loss": 3.1058,
"step": 219500
},
{
"epoch": 0.66,
"learning_rate": 3.8983000770188407e-05,
"loss": 3.0892,
"step": 220000
},
{
"epoch": 0.66,
"learning_rate": 3.895796213557519e-05,
"loss": 3.1202,
"step": 220500
},
{
"epoch": 0.66,
"learning_rate": 3.893292350096199e-05,
"loss": 3.1286,
"step": 221000
},
{
"epoch": 0.67,
"learning_rate": 3.890788486634878e-05,
"loss": 3.1254,
"step": 221500
},
{
"epoch": 0.67,
"learning_rate": 3.888284623173557e-05,
"loss": 3.1017,
"step": 222000
},
{
"epoch": 0.67,
"learning_rate": 3.8857807597122366e-05,
"loss": 3.0974,
"step": 222500
},
{
"epoch": 0.67,
"learning_rate": 3.883276896250915e-05,
"loss": 3.0832,
"step": 223000
},
{
"epoch": 0.67,
"learning_rate": 3.880773032789595e-05,
"loss": 3.1195,
"step": 223500
},
{
"epoch": 0.67,
"learning_rate": 3.878269169328274e-05,
"loss": 3.1234,
"step": 224000
},
{
"epoch": 0.67,
"learning_rate": 3.875765305866953e-05,
"loss": 3.0852,
"step": 224500
},
{
"epoch": 0.68,
"learning_rate": 3.873261442405632e-05,
"loss": 3.1366,
"step": 225000
},
{
"epoch": 0.68,
"learning_rate": 3.870757578944311e-05,
"loss": 3.1166,
"step": 225500
},
{
"epoch": 0.68,
"learning_rate": 3.868253715482991e-05,
"loss": 3.1384,
"step": 226000
},
{
"epoch": 0.68,
"learning_rate": 3.86574985202167e-05,
"loss": 3.1121,
"step": 226500
},
{
"epoch": 0.68,
"learning_rate": 3.863245988560349e-05,
"loss": 3.1149,
"step": 227000
},
{
"epoch": 0.68,
"learning_rate": 3.860742125099028e-05,
"loss": 3.1072,
"step": 227500
},
{
"epoch": 0.69,
"learning_rate": 3.858238261637707e-05,
"loss": 3.0932,
"step": 228000
},
{
"epoch": 0.69,
"learning_rate": 3.8557343981763866e-05,
"loss": 3.0846,
"step": 228500
},
{
"epoch": 0.69,
"learning_rate": 3.853230534715066e-05,
"loss": 3.0819,
"step": 229000
},
{
"epoch": 0.69,
"learning_rate": 3.850726671253745e-05,
"loss": 3.104,
"step": 229500
},
{
"epoch": 0.69,
"learning_rate": 3.848222807792424e-05,
"loss": 3.1016,
"step": 230000
},
{
"epoch": 0.69,
"learning_rate": 3.845718944331103e-05,
"loss": 3.075,
"step": 230500
},
{
"epoch": 0.69,
"learning_rate": 3.843215080869782e-05,
"loss": 3.0945,
"step": 231000
},
{
"epoch": 0.7,
"learning_rate": 3.840711217408462e-05,
"loss": 3.0949,
"step": 231500
},
{
"epoch": 0.7,
"learning_rate": 3.838207353947141e-05,
"loss": 3.1097,
"step": 232000
},
{
"epoch": 0.7,
"learning_rate": 3.83570349048582e-05,
"loss": 3.0881,
"step": 232500
},
{
"epoch": 0.7,
"learning_rate": 3.833199627024499e-05,
"loss": 3.1087,
"step": 233000
},
{
"epoch": 0.7,
"learning_rate": 3.830695763563178e-05,
"loss": 3.1164,
"step": 233500
},
{
"epoch": 0.7,
"learning_rate": 3.8281919001018576e-05,
"loss": 3.1171,
"step": 234000
},
{
"epoch": 0.7,
"learning_rate": 3.825688036640537e-05,
"loss": 3.0979,
"step": 234500
},
{
"epoch": 0.71,
"learning_rate": 3.823184173179216e-05,
"loss": 3.0912,
"step": 235000
},
{
"epoch": 0.71,
"learning_rate": 3.820680309717895e-05,
"loss": 3.1041,
"step": 235500
},
{
"epoch": 0.71,
"learning_rate": 3.818176446256574e-05,
"loss": 3.0904,
"step": 236000
},
{
"epoch": 0.71,
"learning_rate": 3.8156725827952536e-05,
"loss": 3.0874,
"step": 236500
},
{
"epoch": 0.71,
"learning_rate": 3.8131687193339326e-05,
"loss": 3.0746,
"step": 237000
},
{
"epoch": 0.71,
"learning_rate": 3.810664855872612e-05,
"loss": 3.1111,
"step": 237500
},
{
"epoch": 0.72,
"learning_rate": 3.808160992411291e-05,
"loss": 3.0794,
"step": 238000
},
{
"epoch": 0.72,
"learning_rate": 3.80565712894997e-05,
"loss": 3.0809,
"step": 238500
},
{
"epoch": 0.72,
"learning_rate": 3.8031532654886495e-05,
"loss": 3.0907,
"step": 239000
},
{
"epoch": 0.72,
"learning_rate": 3.800649402027328e-05,
"loss": 3.0692,
"step": 239500
},
{
"epoch": 0.72,
"learning_rate": 3.7981455385660077e-05,
"loss": 3.1191,
"step": 240000
},
{
"epoch": 0.72,
"learning_rate": 3.795641675104687e-05,
"loss": 3.0884,
"step": 240500
},
{
"epoch": 0.72,
"learning_rate": 3.793137811643366e-05,
"loss": 3.0955,
"step": 241000
},
{
"epoch": 0.73,
"learning_rate": 3.7906339481820455e-05,
"loss": 3.1308,
"step": 241500
},
{
"epoch": 0.73,
"learning_rate": 3.788130084720724e-05,
"loss": 3.0821,
"step": 242000
},
{
"epoch": 0.73,
"learning_rate": 3.7856262212594036e-05,
"loss": 3.1044,
"step": 242500
},
{
"epoch": 0.73,
"learning_rate": 3.783122357798083e-05,
"loss": 3.0543,
"step": 243000
},
{
"epoch": 0.73,
"learning_rate": 3.780618494336762e-05,
"loss": 3.0943,
"step": 243500
},
{
"epoch": 0.73,
"learning_rate": 3.7781146308754415e-05,
"loss": 3.0787,
"step": 244000
},
{
"epoch": 0.73,
"learning_rate": 3.77561076741412e-05,
"loss": 3.0943,
"step": 244500
},
{
"epoch": 0.74,
"learning_rate": 3.7731069039527996e-05,
"loss": 3.0755,
"step": 245000
},
{
"epoch": 0.74,
"learning_rate": 3.770603040491478e-05,
"loss": 3.0912,
"step": 245500
},
{
"epoch": 0.74,
"learning_rate": 3.768099177030158e-05,
"loss": 3.0616,
"step": 246000
},
{
"epoch": 0.74,
"learning_rate": 3.7655953135688374e-05,
"loss": 3.0884,
"step": 246500
},
{
"epoch": 0.74,
"learning_rate": 3.763091450107516e-05,
"loss": 3.0833,
"step": 247000
},
{
"epoch": 0.74,
"learning_rate": 3.7605875866461955e-05,
"loss": 3.0648,
"step": 247500
},
{
"epoch": 0.75,
"learning_rate": 3.758083723184874e-05,
"loss": 3.1001,
"step": 248000
},
{
"epoch": 0.75,
"learning_rate": 3.7555798597235537e-05,
"loss": 3.0793,
"step": 248500
},
{
"epoch": 0.75,
"learning_rate": 3.7530759962622334e-05,
"loss": 3.0914,
"step": 249000
},
{
"epoch": 0.75,
"learning_rate": 3.750572132800912e-05,
"loss": 3.0715,
"step": 249500
},
{
"epoch": 0.75,
"learning_rate": 3.7480682693395915e-05,
"loss": 3.0778,
"step": 250000
},
{
"epoch": 0.75,
"learning_rate": 3.74556440587827e-05,
"loss": 3.0749,
"step": 250500
},
{
"epoch": 0.75,
"learning_rate": 3.7430605424169496e-05,
"loss": 3.0795,
"step": 251000
},
{
"epoch": 0.76,
"learning_rate": 3.740556678955629e-05,
"loss": 3.0838,
"step": 251500
},
{
"epoch": 0.76,
"learning_rate": 3.738052815494308e-05,
"loss": 3.0874,
"step": 252000
},
{
"epoch": 0.76,
"learning_rate": 3.7355489520329875e-05,
"loss": 3.1254,
"step": 252500
},
{
"epoch": 0.76,
"learning_rate": 3.733045088571666e-05,
"loss": 3.0686,
"step": 253000
},
{
"epoch": 0.76,
"learning_rate": 3.7305412251103456e-05,
"loss": 3.0645,
"step": 253500
},
{
"epoch": 0.76,
"learning_rate": 3.7280373616490246e-05,
"loss": 3.0608,
"step": 254000
},
{
"epoch": 0.76,
"learning_rate": 3.725533498187704e-05,
"loss": 3.0896,
"step": 254500
},
{
"epoch": 0.77,
"learning_rate": 3.7230296347263834e-05,
"loss": 3.0622,
"step": 255000
},
{
"epoch": 0.77,
"learning_rate": 3.720525771265062e-05,
"loss": 3.0935,
"step": 255500
},
{
"epoch": 0.77,
"learning_rate": 3.7180219078037415e-05,
"loss": 3.051,
"step": 256000
},
{
"epoch": 0.77,
"learning_rate": 3.7155180443424206e-05,
"loss": 3.05,
"step": 256500
},
{
"epoch": 0.77,
"learning_rate": 3.7130141808810996e-05,
"loss": 3.0711,
"step": 257000
},
{
"epoch": 0.77,
"learning_rate": 3.7105103174197794e-05,
"loss": 3.086,
"step": 257500
},
{
"epoch": 0.78,
"learning_rate": 3.708006453958458e-05,
"loss": 3.0711,
"step": 258000
},
{
"epoch": 0.78,
"learning_rate": 3.7055025904971375e-05,
"loss": 3.0573,
"step": 258500
},
{
"epoch": 0.78,
"learning_rate": 3.7029987270358165e-05,
"loss": 3.0937,
"step": 259000
},
{
"epoch": 0.78,
"learning_rate": 3.7004948635744956e-05,
"loss": 3.0803,
"step": 259500
},
{
"epoch": 0.78,
"learning_rate": 3.6979910001131747e-05,
"loss": 3.0894,
"step": 260000
},
{
"epoch": 0.78,
"learning_rate": 3.695487136651854e-05,
"loss": 3.0443,
"step": 260500
},
{
"epoch": 0.78,
"learning_rate": 3.6929832731905335e-05,
"loss": 3.0307,
"step": 261000
},
{
"epoch": 0.79,
"learning_rate": 3.6904794097292125e-05,
"loss": 3.0806,
"step": 261500
},
{
"epoch": 0.79,
"learning_rate": 3.6879755462678916e-05,
"loss": 3.0864,
"step": 262000
},
{
"epoch": 0.79,
"learning_rate": 3.6854716828065706e-05,
"loss": 3.0655,
"step": 262500
},
{
"epoch": 0.79,
"learning_rate": 3.68296781934525e-05,
"loss": 3.0769,
"step": 263000
},
{
"epoch": 0.79,
"learning_rate": 3.6804639558839294e-05,
"loss": 3.0842,
"step": 263500
},
{
"epoch": 0.79,
"learning_rate": 3.6779600924226085e-05,
"loss": 3.0599,
"step": 264000
},
{
"epoch": 0.79,
"learning_rate": 3.6754562289612875e-05,
"loss": 3.0444,
"step": 264500
},
{
"epoch": 0.8,
"learning_rate": 3.6729523654999666e-05,
"loss": 3.0919,
"step": 265000
},
{
"epoch": 0.8,
"learning_rate": 3.6704485020386456e-05,
"loss": 3.0803,
"step": 265500
},
{
"epoch": 0.8,
"learning_rate": 3.667944638577325e-05,
"loss": 3.0625,
"step": 266000
},
{
"epoch": 0.8,
"learning_rate": 3.6654407751160044e-05,
"loss": 3.0659,
"step": 266500
},
{
"epoch": 0.8,
"learning_rate": 3.6629369116546835e-05,
"loss": 3.058,
"step": 267000
},
{
"epoch": 0.8,
"learning_rate": 3.6604330481933625e-05,
"loss": 3.039,
"step": 267500
},
{
"epoch": 0.81,
"learning_rate": 3.6579291847320416e-05,
"loss": 3.0609,
"step": 268000
},
{
"epoch": 0.81,
"learning_rate": 3.6554253212707207e-05,
"loss": 3.0642,
"step": 268500
},
{
"epoch": 0.81,
"learning_rate": 3.6529214578094004e-05,
"loss": 3.0689,
"step": 269000
},
{
"epoch": 0.81,
"learning_rate": 3.6504175943480794e-05,
"loss": 3.0742,
"step": 269500
},
{
"epoch": 0.81,
"learning_rate": 3.6479137308867585e-05,
"loss": 3.0593,
"step": 270000
},
{
"epoch": 0.81,
"learning_rate": 3.6454098674254376e-05,
"loss": 3.0275,
"step": 270500
},
{
"epoch": 0.81,
"learning_rate": 3.6429060039641166e-05,
"loss": 3.0494,
"step": 271000
},
{
"epoch": 0.82,
"learning_rate": 3.6404021405027963e-05,
"loss": 3.0539,
"step": 271500
},
{
"epoch": 0.82,
"learning_rate": 3.637898277041475e-05,
"loss": 3.0219,
"step": 272000
},
{
"epoch": 0.82,
"learning_rate": 3.6353944135801545e-05,
"loss": 3.0594,
"step": 272500
},
{
"epoch": 0.82,
"learning_rate": 3.6328905501188335e-05,
"loss": 3.0226,
"step": 273000
},
{
"epoch": 0.82,
"learning_rate": 3.6303866866575126e-05,
"loss": 3.062,
"step": 273500
},
{
"epoch": 0.82,
"learning_rate": 3.627882823196192e-05,
"loss": 3.0278,
"step": 274000
},
{
"epoch": 0.82,
"learning_rate": 3.625378959734871e-05,
"loss": 3.0481,
"step": 274500
},
{
"epoch": 0.83,
"learning_rate": 3.6228750962735504e-05,
"loss": 3.0711,
"step": 275000
},
{
"epoch": 0.83,
"learning_rate": 3.6203712328122295e-05,
"loss": 3.0537,
"step": 275500
},
{
"epoch": 0.83,
"learning_rate": 3.6178673693509085e-05,
"loss": 3.0285,
"step": 276000
},
{
"epoch": 0.83,
"learning_rate": 3.615363505889588e-05,
"loss": 3.059,
"step": 276500
},
{
"epoch": 0.83,
"learning_rate": 3.6128596424282666e-05,
"loss": 3.0548,
"step": 277000
},
{
"epoch": 0.83,
"learning_rate": 3.6103557789669464e-05,
"loss": 3.0681,
"step": 277500
},
{
"epoch": 0.84,
"learning_rate": 3.6078519155056254e-05,
"loss": 3.0542,
"step": 278000
},
{
"epoch": 0.84,
"learning_rate": 3.6053480520443045e-05,
"loss": 3.0432,
"step": 278500
},
{
"epoch": 0.84,
"learning_rate": 3.602844188582984e-05,
"loss": 3.0262,
"step": 279000
},
{
"epoch": 0.84,
"learning_rate": 3.6003403251216626e-05,
"loss": 3.0477,
"step": 279500
},
{
"epoch": 0.84,
"learning_rate": 3.5978364616603423e-05,
"loss": 3.0659,
"step": 280000
},
{
"epoch": 0.84,
"learning_rate": 3.595332598199021e-05,
"loss": 3.0485,
"step": 280500
},
{
"epoch": 0.84,
"learning_rate": 3.5928287347377005e-05,
"loss": 3.0876,
"step": 281000
},
{
"epoch": 0.85,
"learning_rate": 3.5903248712763795e-05,
"loss": 3.0533,
"step": 281500
},
{
"epoch": 0.85,
"learning_rate": 3.5878210078150586e-05,
"loss": 3.0515,
"step": 282000
},
{
"epoch": 0.85,
"learning_rate": 3.585317144353738e-05,
"loss": 3.0581,
"step": 282500
},
{
"epoch": 0.85,
"learning_rate": 3.582813280892417e-05,
"loss": 3.0733,
"step": 283000
},
{
"epoch": 0.85,
"learning_rate": 3.5803094174310964e-05,
"loss": 3.0407,
"step": 283500
},
{
"epoch": 0.85,
"learning_rate": 3.5778055539697755e-05,
"loss": 3.0176,
"step": 284000
},
{
"epoch": 0.85,
"learning_rate": 3.5753016905084545e-05,
"loss": 3.0331,
"step": 284500
},
{
"epoch": 0.86,
"learning_rate": 3.572797827047134e-05,
"loss": 3.0587,
"step": 285000
},
{
"epoch": 0.86,
"learning_rate": 3.5702939635858126e-05,
"loss": 3.036,
"step": 285500
},
{
"epoch": 0.86,
"learning_rate": 3.5677901001244924e-05,
"loss": 3.0488,
"step": 286000
},
{
"epoch": 0.86,
"learning_rate": 3.5652862366631714e-05,
"loss": 3.0591,
"step": 286500
},
{
"epoch": 0.86,
"learning_rate": 3.5627823732018505e-05,
"loss": 3.0318,
"step": 287000
},
{
"epoch": 0.86,
"learning_rate": 3.56027850974053e-05,
"loss": 3.0268,
"step": 287500
},
{
"epoch": 0.87,
"learning_rate": 3.5577746462792086e-05,
"loss": 3.0655,
"step": 288000
},
{
"epoch": 0.87,
"learning_rate": 3.555270782817888e-05,
"loss": 3.0387,
"step": 288500
},
{
"epoch": 0.87,
"learning_rate": 3.5527669193565674e-05,
"loss": 3.0587,
"step": 289000
},
{
"epoch": 0.87,
"learning_rate": 3.5502630558952464e-05,
"loss": 3.0486,
"step": 289500
},
{
"epoch": 0.87,
"learning_rate": 3.547759192433926e-05,
"loss": 3.078,
"step": 290000
},
{
"epoch": 0.87,
"learning_rate": 3.5452553289726046e-05,
"loss": 3.0346,
"step": 290500
},
{
"epoch": 0.87,
"learning_rate": 3.542751465511284e-05,
"loss": 3.0631,
"step": 291000
},
{
"epoch": 0.88,
"learning_rate": 3.5402476020499634e-05,
"loss": 3.0286,
"step": 291500
},
{
"epoch": 0.88,
"learning_rate": 3.5377437385886424e-05,
"loss": 3.0451,
"step": 292000
},
{
"epoch": 0.88,
"learning_rate": 3.5352398751273215e-05,
"loss": 3.0457,
"step": 292500
},
{
"epoch": 0.88,
"learning_rate": 3.5327360116660005e-05,
"loss": 3.0523,
"step": 293000
},
{
"epoch": 0.88,
"learning_rate": 3.53023214820468e-05,
"loss": 3.0505,
"step": 293500
},
{
"epoch": 0.88,
"learning_rate": 3.527728284743359e-05,
"loss": 3.0391,
"step": 294000
},
{
"epoch": 0.88,
"learning_rate": 3.5252244212820384e-05,
"loss": 3.032,
"step": 294500
},
{
"epoch": 0.89,
"learning_rate": 3.5227205578207174e-05,
"loss": 3.048,
"step": 295000
},
{
"epoch": 0.89,
"learning_rate": 3.5202166943593965e-05,
"loss": 3.0276,
"step": 295500
},
{
"epoch": 0.89,
"learning_rate": 3.517712830898076e-05,
"loss": 3.0132,
"step": 296000
},
{
"epoch": 0.89,
"learning_rate": 3.515208967436755e-05,
"loss": 3.0059,
"step": 296500
},
{
"epoch": 0.89,
"learning_rate": 3.512705103975434e-05,
"loss": 3.0293,
"step": 297000
},
{
"epoch": 0.89,
"learning_rate": 3.5102012405141134e-05,
"loss": 3.0349,
"step": 297500
},
{
"epoch": 0.9,
"learning_rate": 3.5076973770527924e-05,
"loss": 3.0338,
"step": 298000
},
{
"epoch": 0.9,
"learning_rate": 3.505193513591472e-05,
"loss": 3.0429,
"step": 298500
},
{
"epoch": 0.9,
"learning_rate": 3.502689650130151e-05,
"loss": 3.017,
"step": 299000
},
{
"epoch": 0.9,
"learning_rate": 3.50018578666883e-05,
"loss": 3.0174,
"step": 299500
},
{
"epoch": 0.9,
"learning_rate": 3.4976819232075093e-05,
"loss": 3.0668,
"step": 300000
},
{
"epoch": 0.9,
"learning_rate": 3.4951780597461884e-05,
"loss": 3.0236,
"step": 300500
},
{
"epoch": 0.9,
"learning_rate": 3.4926741962848675e-05,
"loss": 3.0195,
"step": 301000
},
{
"epoch": 0.91,
"learning_rate": 3.490170332823547e-05,
"loss": 3.0443,
"step": 301500
}
],
"max_steps": 998457,
"num_train_epochs": 3,
"total_flos": 4.46482559989836e+18,
"trial_name": null,
"trial_params": null
}