zhko_mbartLarge_sup_run1 / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
74a314e verified
{
"best_metric": 0.7818750739097595,
"best_model_checkpoint": "./zhko_mbartLarge_100p_run1/checkpoint-208330",
"epoch": 6.0,
"eval_steps": 500,
"global_step": 416661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.3855236632427764e-06,
"loss": 2.8874,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.785446865700298e-06,
"loss": 2.0531,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 7.185370068157819e-06,
"loss": 1.9116,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 9.585293270615341e-06,
"loss": 1.8191,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 1.1985216473072862e-05,
"loss": 1.7481,
"step": 2500
},
{
"epoch": 0.04,
"learning_rate": 1.4385139675530384e-05,
"loss": 1.6872,
"step": 3000
},
{
"epoch": 0.05,
"learning_rate": 1.6785062877987908e-05,
"loss": 1.6479,
"step": 3500
},
{
"epoch": 0.06,
"learning_rate": 1.9184986080445428e-05,
"loss": 1.6213,
"step": 4000
},
{
"epoch": 0.06,
"learning_rate": 2.158490928290295e-05,
"loss": 1.581,
"step": 4500
},
{
"epoch": 0.07,
"learning_rate": 2.3980032638955555e-05,
"loss": 1.56,
"step": 5000
},
{
"epoch": 0.08,
"learning_rate": 2.637995584141308e-05,
"loss": 1.5333,
"step": 5500
},
{
"epoch": 0.09,
"learning_rate": 2.877507919746568e-05,
"loss": 1.513,
"step": 6000
},
{
"epoch": 0.09,
"learning_rate": 3.11750023999232e-05,
"loss": 1.5147,
"step": 6500
},
{
"epoch": 0.1,
"learning_rate": 3.3574925602380726e-05,
"loss": 1.5006,
"step": 7000
},
{
"epoch": 0.11,
"learning_rate": 3.597484880483824e-05,
"loss": 1.4869,
"step": 7500
},
{
"epoch": 0.12,
"learning_rate": 3.8374772007295767e-05,
"loss": 1.4786,
"step": 8000
},
{
"epoch": 0.12,
"learning_rate": 4.077469520975329e-05,
"loss": 1.4796,
"step": 8500
},
{
"epoch": 0.13,
"learning_rate": 4.317461841221081e-05,
"loss": 1.4451,
"step": 9000
},
{
"epoch": 0.14,
"learning_rate": 4.557454161466833e-05,
"loss": 1.4554,
"step": 9500
},
{
"epoch": 0.14,
"learning_rate": 4.7974464817125855e-05,
"loss": 1.4498,
"step": 10000
},
{
"epoch": 0.15,
"learning_rate": 4.9996315072903376e-05,
"loss": 1.4393,
"step": 10500
},
{
"epoch": 0.16,
"learning_rate": 4.997212061736105e-05,
"loss": 1.4227,
"step": 11000
},
{
"epoch": 0.17,
"learning_rate": 4.994787767593588e-05,
"loss": 1.427,
"step": 11500
},
{
"epoch": 0.17,
"learning_rate": 4.99236347345107e-05,
"loss": 1.4113,
"step": 12000
},
{
"epoch": 0.18,
"learning_rate": 4.989939179308553e-05,
"loss": 1.3914,
"step": 12500
},
{
"epoch": 0.19,
"learning_rate": 4.9875148851660354e-05,
"loss": 1.3769,
"step": 13000
},
{
"epoch": 0.19,
"learning_rate": 4.985095439611803e-05,
"loss": 1.378,
"step": 13500
},
{
"epoch": 0.2,
"learning_rate": 4.9826759940575705e-05,
"loss": 1.3477,
"step": 14000
},
{
"epoch": 0.21,
"learning_rate": 4.980251699915053e-05,
"loss": 1.3498,
"step": 14500
},
{
"epoch": 0.22,
"learning_rate": 4.9778274057725356e-05,
"loss": 1.3406,
"step": 15000
},
{
"epoch": 0.22,
"learning_rate": 4.975403111630018e-05,
"loss": 1.3279,
"step": 15500
},
{
"epoch": 0.23,
"learning_rate": 4.972983666075786e-05,
"loss": 1.3323,
"step": 16000
},
{
"epoch": 0.24,
"learning_rate": 4.970564220521553e-05,
"loss": 1.3034,
"step": 16500
},
{
"epoch": 0.24,
"learning_rate": 4.968139926379036e-05,
"loss": 1.3118,
"step": 17000
},
{
"epoch": 0.25,
"learning_rate": 4.9657156322365184e-05,
"loss": 1.2919,
"step": 17500
},
{
"epoch": 0.26,
"learning_rate": 4.963296186682286e-05,
"loss": 1.3006,
"step": 18000
},
{
"epoch": 0.27,
"learning_rate": 4.9608718925397685e-05,
"loss": 1.2771,
"step": 18500
},
{
"epoch": 0.27,
"learning_rate": 4.958447598397251e-05,
"loss": 1.2859,
"step": 19000
},
{
"epoch": 0.28,
"learning_rate": 4.956023304254733e-05,
"loss": 1.2668,
"step": 19500
},
{
"epoch": 0.29,
"learning_rate": 4.953599010112216e-05,
"loss": 1.2662,
"step": 20000
},
{
"epoch": 0.3,
"learning_rate": 4.951179564557983e-05,
"loss": 1.2618,
"step": 20500
},
{
"epoch": 0.3,
"learning_rate": 4.948755270415466e-05,
"loss": 1.2647,
"step": 21000
},
{
"epoch": 0.31,
"learning_rate": 4.946330976272949e-05,
"loss": 1.27,
"step": 21500
},
{
"epoch": 0.32,
"learning_rate": 4.9439066821304314e-05,
"loss": 1.2298,
"step": 22000
},
{
"epoch": 0.32,
"learning_rate": 4.941482387987913e-05,
"loss": 1.2255,
"step": 22500
},
{
"epoch": 0.33,
"learning_rate": 4.9390580938453965e-05,
"loss": 1.2062,
"step": 23000
},
{
"epoch": 0.34,
"learning_rate": 4.936633799702879e-05,
"loss": 1.2167,
"step": 23500
},
{
"epoch": 0.35,
"learning_rate": 4.9342095055603616e-05,
"loss": 1.2009,
"step": 24000
},
{
"epoch": 0.35,
"learning_rate": 4.9317852114178435e-05,
"loss": 1.2182,
"step": 24500
},
{
"epoch": 0.36,
"learning_rate": 4.929360917275326e-05,
"loss": 1.1898,
"step": 25000
},
{
"epoch": 0.37,
"learning_rate": 4.926936623132809e-05,
"loss": 1.2008,
"step": 25500
},
{
"epoch": 0.37,
"learning_rate": 4.924512328990292e-05,
"loss": 1.2015,
"step": 26000
},
{
"epoch": 0.38,
"learning_rate": 4.922088034847774e-05,
"loss": 1.1932,
"step": 26500
},
{
"epoch": 0.39,
"learning_rate": 4.919663740705256e-05,
"loss": 1.189,
"step": 27000
},
{
"epoch": 0.4,
"learning_rate": 4.9172394465627395e-05,
"loss": 1.1872,
"step": 27500
},
{
"epoch": 0.4,
"learning_rate": 4.914815152420222e-05,
"loss": 1.189,
"step": 28000
},
{
"epoch": 0.41,
"learning_rate": 4.9124054040425596e-05,
"loss": 1.1778,
"step": 28500
},
{
"epoch": 0.42,
"learning_rate": 4.9099859584883265e-05,
"loss": 1.1891,
"step": 29000
},
{
"epoch": 0.42,
"learning_rate": 4.90756166434581e-05,
"loss": 1.1887,
"step": 29500
},
{
"epoch": 0.43,
"learning_rate": 4.905137370203292e-05,
"loss": 1.1741,
"step": 30000
},
{
"epoch": 0.44,
"learning_rate": 4.902713076060774e-05,
"loss": 1.1698,
"step": 30500
},
{
"epoch": 0.45,
"learning_rate": 4.900288781918257e-05,
"loss": 1.1662,
"step": 31000
},
{
"epoch": 0.45,
"learning_rate": 4.897864487775739e-05,
"loss": 1.1631,
"step": 31500
},
{
"epoch": 0.46,
"learning_rate": 4.8954401936332225e-05,
"loss": 1.1545,
"step": 32000
},
{
"epoch": 0.47,
"learning_rate": 4.8930158994907044e-05,
"loss": 1.1479,
"step": 32500
},
{
"epoch": 0.48,
"learning_rate": 4.8905964539364727e-05,
"loss": 1.1533,
"step": 33000
},
{
"epoch": 0.48,
"learning_rate": 4.8881721597939545e-05,
"loss": 1.1437,
"step": 33500
},
{
"epoch": 0.49,
"learning_rate": 4.885747865651437e-05,
"loss": 1.1414,
"step": 34000
},
{
"epoch": 0.5,
"learning_rate": 4.8833235715089196e-05,
"loss": 1.1423,
"step": 34500
},
{
"epoch": 0.5,
"learning_rate": 4.880899277366402e-05,
"loss": 1.1471,
"step": 35000
},
{
"epoch": 0.51,
"learning_rate": 4.878474983223885e-05,
"loss": 1.1345,
"step": 35500
},
{
"epoch": 0.52,
"learning_rate": 4.876050689081367e-05,
"loss": 1.1356,
"step": 36000
},
{
"epoch": 0.53,
"learning_rate": 4.87362639493885e-05,
"loss": 1.1395,
"step": 36500
},
{
"epoch": 0.53,
"learning_rate": 4.8712021007963324e-05,
"loss": 1.1184,
"step": 37000
},
{
"epoch": 0.54,
"learning_rate": 4.868777806653815e-05,
"loss": 1.1306,
"step": 37500
},
{
"epoch": 0.55,
"learning_rate": 4.8663535125112975e-05,
"loss": 1.1185,
"step": 38000
},
{
"epoch": 0.55,
"learning_rate": 4.86392921836878e-05,
"loss": 1.1229,
"step": 38500
},
{
"epoch": 0.56,
"learning_rate": 4.8615146214028326e-05,
"loss": 1.1162,
"step": 39000
},
{
"epoch": 0.57,
"learning_rate": 4.859090327260315e-05,
"loss": 1.1163,
"step": 39500
},
{
"epoch": 0.58,
"learning_rate": 4.856666033117798e-05,
"loss": 1.1314,
"step": 40000
},
{
"epoch": 0.58,
"learning_rate": 4.85424173897528e-05,
"loss": 1.1227,
"step": 40500
},
{
"epoch": 0.59,
"learning_rate": 4.851817444832762e-05,
"loss": 1.1236,
"step": 41000
},
{
"epoch": 0.6,
"learning_rate": 4.8493931506902454e-05,
"loss": 1.117,
"step": 41500
},
{
"epoch": 0.6,
"learning_rate": 4.846968856547728e-05,
"loss": 1.1063,
"step": 42000
},
{
"epoch": 0.61,
"learning_rate": 4.8445445624052105e-05,
"loss": 1.1144,
"step": 42500
},
{
"epoch": 0.62,
"learning_rate": 4.8421202682626924e-05,
"loss": 1.1052,
"step": 43000
},
{
"epoch": 0.63,
"learning_rate": 4.8396959741201756e-05,
"loss": 1.1203,
"step": 43500
},
{
"epoch": 0.63,
"learning_rate": 4.8372765285659425e-05,
"loss": 1.0938,
"step": 44000
},
{
"epoch": 0.64,
"learning_rate": 4.834852234423425e-05,
"loss": 1.0856,
"step": 44500
},
{
"epoch": 0.65,
"learning_rate": 4.832427940280908e-05,
"loss": 1.0768,
"step": 45000
},
{
"epoch": 0.66,
"learning_rate": 4.830003646138391e-05,
"loss": 1.0968,
"step": 45500
},
{
"epoch": 0.66,
"learning_rate": 4.827579351995873e-05,
"loss": 1.1018,
"step": 46000
},
{
"epoch": 0.67,
"learning_rate": 4.825155057853355e-05,
"loss": 1.0795,
"step": 46500
},
{
"epoch": 0.68,
"learning_rate": 4.8227307637108385e-05,
"loss": 1.0894,
"step": 47000
},
{
"epoch": 0.68,
"learning_rate": 4.820306469568321e-05,
"loss": 1.0946,
"step": 47500
},
{
"epoch": 0.69,
"learning_rate": 4.817882175425803e-05,
"loss": 1.0842,
"step": 48000
},
{
"epoch": 0.7,
"learning_rate": 4.8154578812832855e-05,
"loss": 1.0779,
"step": 48500
},
{
"epoch": 0.71,
"learning_rate": 4.813033587140768e-05,
"loss": 1.0849,
"step": 49000
},
{
"epoch": 0.71,
"learning_rate": 4.810609292998251e-05,
"loss": 1.0757,
"step": 49500
},
{
"epoch": 0.72,
"learning_rate": 4.808184998855733e-05,
"loss": 1.068,
"step": 50000
},
{
"epoch": 0.73,
"learning_rate": 4.8057655533015014e-05,
"loss": 1.0793,
"step": 50500
},
{
"epoch": 0.73,
"learning_rate": 4.803350956335553e-05,
"loss": 1.0865,
"step": 51000
},
{
"epoch": 0.74,
"learning_rate": 4.8009315107813215e-05,
"loss": 1.0698,
"step": 51500
},
{
"epoch": 0.75,
"learning_rate": 4.7985072166388034e-05,
"loss": 1.0703,
"step": 52000
},
{
"epoch": 0.76,
"learning_rate": 4.796082922496286e-05,
"loss": 1.0692,
"step": 52500
},
{
"epoch": 0.76,
"learning_rate": 4.7936586283537685e-05,
"loss": 1.0591,
"step": 53000
},
{
"epoch": 0.77,
"learning_rate": 4.791234334211252e-05,
"loss": 1.0642,
"step": 53500
},
{
"epoch": 0.78,
"learning_rate": 4.7888100400687336e-05,
"loss": 1.0771,
"step": 54000
},
{
"epoch": 0.78,
"learning_rate": 4.786385745926216e-05,
"loss": 1.0802,
"step": 54500
},
{
"epoch": 0.79,
"learning_rate": 4.783961451783699e-05,
"loss": 1.0765,
"step": 55000
},
{
"epoch": 0.8,
"learning_rate": 4.781537157641181e-05,
"loss": 1.058,
"step": 55500
},
{
"epoch": 0.81,
"learning_rate": 4.779112863498664e-05,
"loss": 1.0521,
"step": 56000
},
{
"epoch": 0.81,
"learning_rate": 4.7766934179444314e-05,
"loss": 1.053,
"step": 56500
},
{
"epoch": 0.82,
"learning_rate": 4.774269123801914e-05,
"loss": 1.0517,
"step": 57000
},
{
"epoch": 0.83,
"learning_rate": 4.7718448296593965e-05,
"loss": 1.0613,
"step": 57500
},
{
"epoch": 0.84,
"learning_rate": 4.769420535516879e-05,
"loss": 1.0551,
"step": 58000
},
{
"epoch": 0.84,
"learning_rate": 4.7669962413743616e-05,
"loss": 1.0462,
"step": 58500
},
{
"epoch": 0.85,
"learning_rate": 4.764576795820129e-05,
"loss": 1.0496,
"step": 59000
},
{
"epoch": 0.86,
"learning_rate": 4.762152501677612e-05,
"loss": 1.0407,
"step": 59500
},
{
"epoch": 0.86,
"learning_rate": 4.759728207535094e-05,
"loss": 1.06,
"step": 60000
},
{
"epoch": 0.87,
"learning_rate": 4.757303913392577e-05,
"loss": 1.042,
"step": 60500
},
{
"epoch": 0.88,
"learning_rate": 4.7548796192500594e-05,
"loss": 1.0401,
"step": 61000
},
{
"epoch": 0.89,
"learning_rate": 4.752455325107542e-05,
"loss": 1.036,
"step": 61500
},
{
"epoch": 0.89,
"learning_rate": 4.7500310309650245e-05,
"loss": 1.0461,
"step": 62000
},
{
"epoch": 0.9,
"learning_rate": 4.747606736822507e-05,
"loss": 1.0457,
"step": 62500
},
{
"epoch": 0.91,
"learning_rate": 4.7451824426799896e-05,
"loss": 1.0325,
"step": 63000
},
{
"epoch": 0.91,
"learning_rate": 4.742762997125757e-05,
"loss": 1.0353,
"step": 63500
},
{
"epoch": 0.92,
"learning_rate": 4.74033870298324e-05,
"loss": 1.0417,
"step": 64000
},
{
"epoch": 0.93,
"learning_rate": 4.737914408840722e-05,
"loss": 1.0486,
"step": 64500
},
{
"epoch": 0.94,
"learning_rate": 4.735490114698204e-05,
"loss": 1.0366,
"step": 65000
},
{
"epoch": 0.94,
"learning_rate": 4.7330658205556874e-05,
"loss": 1.0195,
"step": 65500
},
{
"epoch": 0.95,
"learning_rate": 4.73064152641317e-05,
"loss": 1.0377,
"step": 66000
},
{
"epoch": 0.96,
"learning_rate": 4.7282172322706525e-05,
"loss": 1.0373,
"step": 66500
},
{
"epoch": 0.96,
"learning_rate": 4.72579778671642e-05,
"loss": 1.0247,
"step": 67000
},
{
"epoch": 0.97,
"learning_rate": 4.7233734925739026e-05,
"loss": 1.021,
"step": 67500
},
{
"epoch": 0.98,
"learning_rate": 4.7209491984313845e-05,
"loss": 1.0328,
"step": 68000
},
{
"epoch": 0.99,
"learning_rate": 4.718524904288868e-05,
"loss": 1.0113,
"step": 68500
},
{
"epoch": 0.99,
"learning_rate": 4.71610061014635e-05,
"loss": 1.0208,
"step": 69000
},
{
"epoch": 1.0,
"eval_bleu": 41.5541,
"eval_gen_len": 13.044,
"eval_loss": 0.8814196586608887,
"eval_runtime": 8677.1132,
"eval_samples_per_second": 15.503,
"eval_steps_per_second": 1.938,
"step": 69443
},
{
"epoch": 1.0,
"learning_rate": 4.713676316003833e-05,
"loss": 1.0263,
"step": 69500
},
{
"epoch": 1.01,
"learning_rate": 4.711252021861315e-05,
"loss": 0.8753,
"step": 70000
},
{
"epoch": 1.02,
"learning_rate": 4.708837424895367e-05,
"loss": 0.883,
"step": 70500
},
{
"epoch": 1.02,
"learning_rate": 4.7064131307528505e-05,
"loss": 0.867,
"step": 71000
},
{
"epoch": 1.03,
"learning_rate": 4.703988836610333e-05,
"loss": 0.8803,
"step": 71500
},
{
"epoch": 1.04,
"learning_rate": 4.701564542467815e-05,
"loss": 0.8856,
"step": 72000
},
{
"epoch": 1.04,
"learning_rate": 4.6991402483252975e-05,
"loss": 0.88,
"step": 72500
},
{
"epoch": 1.05,
"learning_rate": 4.696720802771065e-05,
"loss": 0.8847,
"step": 73000
},
{
"epoch": 1.06,
"learning_rate": 4.6942965086285476e-05,
"loss": 0.8794,
"step": 73500
},
{
"epoch": 1.07,
"learning_rate": 4.691872214486031e-05,
"loss": 0.8884,
"step": 74000
},
{
"epoch": 1.07,
"learning_rate": 4.6894479203435134e-05,
"loss": 0.877,
"step": 74500
},
{
"epoch": 1.08,
"learning_rate": 4.687023626200995e-05,
"loss": 0.8921,
"step": 75000
},
{
"epoch": 1.09,
"learning_rate": 4.684599332058478e-05,
"loss": 0.8823,
"step": 75500
},
{
"epoch": 1.09,
"learning_rate": 4.6821750379159604e-05,
"loss": 0.8741,
"step": 76000
},
{
"epoch": 1.1,
"learning_rate": 4.6797507437734436e-05,
"loss": 0.8902,
"step": 76500
},
{
"epoch": 1.11,
"learning_rate": 4.6773264496309255e-05,
"loss": 0.8735,
"step": 77000
},
{
"epoch": 1.12,
"learning_rate": 4.674902155488408e-05,
"loss": 0.889,
"step": 77500
},
{
"epoch": 1.12,
"learning_rate": 4.6724778613458906e-05,
"loss": 0.8863,
"step": 78000
},
{
"epoch": 1.13,
"learning_rate": 4.670058415791658e-05,
"loss": 0.8907,
"step": 78500
},
{
"epoch": 1.14,
"learning_rate": 4.667634121649141e-05,
"loss": 0.8783,
"step": 79000
},
{
"epoch": 1.14,
"learning_rate": 4.665209827506623e-05,
"loss": 0.8888,
"step": 79500
},
{
"epoch": 1.15,
"learning_rate": 4.662790381952391e-05,
"loss": 0.8844,
"step": 80000
},
{
"epoch": 1.16,
"learning_rate": 4.6603660878098734e-05,
"loss": 0.8711,
"step": 80500
},
{
"epoch": 1.17,
"learning_rate": 4.657941793667356e-05,
"loss": 0.8942,
"step": 81000
},
{
"epoch": 1.17,
"learning_rate": 4.6555174995248385e-05,
"loss": 0.888,
"step": 81500
},
{
"epoch": 1.18,
"learning_rate": 4.653093205382321e-05,
"loss": 0.8906,
"step": 82000
},
{
"epoch": 1.19,
"learning_rate": 4.6506689112398036e-05,
"loss": 0.8909,
"step": 82500
},
{
"epoch": 1.2,
"learning_rate": 4.648244617097286e-05,
"loss": 0.8837,
"step": 83000
},
{
"epoch": 1.2,
"learning_rate": 4.645820322954769e-05,
"loss": 0.8836,
"step": 83500
},
{
"epoch": 1.21,
"learning_rate": 4.643396028812251e-05,
"loss": 0.8819,
"step": 84000
},
{
"epoch": 1.22,
"learning_rate": 4.640971734669734e-05,
"loss": 0.8995,
"step": 84500
},
{
"epoch": 1.22,
"learning_rate": 4.6385474405272164e-05,
"loss": 0.8934,
"step": 85000
},
{
"epoch": 1.23,
"learning_rate": 4.636123146384699e-05,
"loss": 0.8836,
"step": 85500
},
{
"epoch": 1.24,
"learning_rate": 4.6337037008304665e-05,
"loss": 0.8941,
"step": 86000
},
{
"epoch": 1.25,
"learning_rate": 4.631279406687949e-05,
"loss": 0.8934,
"step": 86500
},
{
"epoch": 1.25,
"learning_rate": 4.6288551125454316e-05,
"loss": 0.8988,
"step": 87000
},
{
"epoch": 1.26,
"learning_rate": 4.626430818402914e-05,
"loss": 0.8805,
"step": 87500
},
{
"epoch": 1.27,
"learning_rate": 4.624006524260397e-05,
"loss": 0.8931,
"step": 88000
},
{
"epoch": 1.27,
"learning_rate": 4.621582230117879e-05,
"loss": 0.8956,
"step": 88500
},
{
"epoch": 1.28,
"learning_rate": 4.619157935975362e-05,
"loss": 0.8872,
"step": 89000
},
{
"epoch": 1.29,
"learning_rate": 4.6167336418328444e-05,
"loss": 0.8945,
"step": 89500
},
{
"epoch": 1.3,
"learning_rate": 4.614314196278612e-05,
"loss": 0.8919,
"step": 90000
},
{
"epoch": 1.3,
"learning_rate": 4.6118899021360945e-05,
"loss": 0.8801,
"step": 90500
},
{
"epoch": 1.31,
"learning_rate": 4.6094656079935764e-05,
"loss": 0.883,
"step": 91000
},
{
"epoch": 1.32,
"learning_rate": 4.6070413138510596e-05,
"loss": 0.888,
"step": 91500
},
{
"epoch": 1.32,
"learning_rate": 4.6046218682968265e-05,
"loss": 0.8878,
"step": 92000
},
{
"epoch": 1.33,
"learning_rate": 4.60219757415431e-05,
"loss": 0.8809,
"step": 92500
},
{
"epoch": 1.34,
"learning_rate": 4.599773280011792e-05,
"loss": 0.883,
"step": 93000
},
{
"epoch": 1.35,
"learning_rate": 4.597348985869275e-05,
"loss": 0.8769,
"step": 93500
},
{
"epoch": 1.35,
"learning_rate": 4.594924691726757e-05,
"loss": 0.8799,
"step": 94000
},
{
"epoch": 1.36,
"learning_rate": 4.592500397584239e-05,
"loss": 0.8907,
"step": 94500
},
{
"epoch": 1.37,
"learning_rate": 4.5900761034417225e-05,
"loss": 0.8813,
"step": 95000
},
{
"epoch": 1.38,
"learning_rate": 4.587651809299205e-05,
"loss": 0.8712,
"step": 95500
},
{
"epoch": 1.38,
"learning_rate": 4.585227515156687e-05,
"loss": 0.8903,
"step": 96000
},
{
"epoch": 1.39,
"learning_rate": 4.5828032210141695e-05,
"loss": 0.8948,
"step": 96500
},
{
"epoch": 1.4,
"learning_rate": 4.580378926871653e-05,
"loss": 0.8758,
"step": 97000
},
{
"epoch": 1.4,
"learning_rate": 4.5779691784939896e-05,
"loss": 0.8771,
"step": 97500
},
{
"epoch": 1.41,
"learning_rate": 4.575544884351473e-05,
"loss": 0.8867,
"step": 98000
},
{
"epoch": 1.42,
"learning_rate": 4.5731205902089554e-05,
"loss": 0.8853,
"step": 98500
},
{
"epoch": 1.43,
"learning_rate": 4.570696296066437e-05,
"loss": 0.8892,
"step": 99000
},
{
"epoch": 1.43,
"learning_rate": 4.5682768505122055e-05,
"loss": 0.8932,
"step": 99500
},
{
"epoch": 1.44,
"learning_rate": 4.5658525563696874e-05,
"loss": 0.8909,
"step": 100000
},
{
"epoch": 1.45,
"learning_rate": 4.5634331108154556e-05,
"loss": 0.9005,
"step": 100500
},
{
"epoch": 1.45,
"learning_rate": 4.5610088166729375e-05,
"loss": 0.8884,
"step": 101000
},
{
"epoch": 1.46,
"learning_rate": 4.55858452253042e-05,
"loss": 0.8807,
"step": 101500
},
{
"epoch": 1.47,
"learning_rate": 4.5561602283879026e-05,
"loss": 0.8842,
"step": 102000
},
{
"epoch": 1.48,
"learning_rate": 4.553735934245386e-05,
"loss": 0.8888,
"step": 102500
},
{
"epoch": 1.48,
"learning_rate": 4.551311640102868e-05,
"loss": 0.884,
"step": 103000
},
{
"epoch": 1.49,
"learning_rate": 4.54888734596035e-05,
"loss": 0.8828,
"step": 103500
},
{
"epoch": 1.5,
"learning_rate": 4.546463051817833e-05,
"loss": 0.893,
"step": 104000
},
{
"epoch": 1.5,
"learning_rate": 4.5440387576753154e-05,
"loss": 0.8732,
"step": 104500
},
{
"epoch": 1.51,
"learning_rate": 4.541619312121083e-05,
"loss": 0.8681,
"step": 105000
},
{
"epoch": 1.52,
"learning_rate": 4.5391950179785655e-05,
"loss": 0.874,
"step": 105500
},
{
"epoch": 1.53,
"learning_rate": 4.536770723836048e-05,
"loss": 0.8795,
"step": 106000
},
{
"epoch": 1.53,
"learning_rate": 4.5343464296935306e-05,
"loss": 0.8849,
"step": 106500
},
{
"epoch": 1.54,
"learning_rate": 4.531922135551013e-05,
"loss": 0.8806,
"step": 107000
},
{
"epoch": 1.55,
"learning_rate": 4.529502689996781e-05,
"loss": 0.8967,
"step": 107500
},
{
"epoch": 1.56,
"learning_rate": 4.527078395854263e-05,
"loss": 0.8782,
"step": 108000
},
{
"epoch": 1.56,
"learning_rate": 4.524654101711746e-05,
"loss": 0.8813,
"step": 108500
},
{
"epoch": 1.57,
"learning_rate": 4.5222298075692284e-05,
"loss": 0.8801,
"step": 109000
},
{
"epoch": 1.58,
"learning_rate": 4.519805513426711e-05,
"loss": 0.8816,
"step": 109500
},
{
"epoch": 1.58,
"learning_rate": 4.5173812192841935e-05,
"loss": 0.8842,
"step": 110000
},
{
"epoch": 1.59,
"learning_rate": 4.514956925141676e-05,
"loss": 0.8843,
"step": 110500
},
{
"epoch": 1.6,
"learning_rate": 4.5125326309991586e-05,
"loss": 0.896,
"step": 111000
},
{
"epoch": 1.61,
"learning_rate": 4.510108336856641e-05,
"loss": 0.8911,
"step": 111500
},
{
"epoch": 1.61,
"learning_rate": 4.507684042714124e-05,
"loss": 0.8879,
"step": 112000
},
{
"epoch": 1.62,
"learning_rate": 4.505269445748176e-05,
"loss": 0.8897,
"step": 112500
},
{
"epoch": 1.63,
"learning_rate": 4.502845151605659e-05,
"loss": 0.8814,
"step": 113000
},
{
"epoch": 1.63,
"learning_rate": 4.5004257060514264e-05,
"loss": 0.8737,
"step": 113500
},
{
"epoch": 1.64,
"learning_rate": 4.498001411908909e-05,
"loss": 0.8884,
"step": 114000
},
{
"epoch": 1.65,
"learning_rate": 4.4955771177663915e-05,
"loss": 0.8796,
"step": 114500
},
{
"epoch": 1.66,
"learning_rate": 4.493152823623874e-05,
"loss": 0.8753,
"step": 115000
},
{
"epoch": 1.66,
"learning_rate": 4.4907285294813566e-05,
"loss": 0.8796,
"step": 115500
},
{
"epoch": 1.67,
"learning_rate": 4.4883042353388385e-05,
"loss": 0.8846,
"step": 116000
},
{
"epoch": 1.68,
"learning_rate": 4.485879941196322e-05,
"loss": 0.8676,
"step": 116500
},
{
"epoch": 1.68,
"learning_rate": 4.483455647053804e-05,
"loss": 0.8861,
"step": 117000
},
{
"epoch": 1.69,
"learning_rate": 4.481031352911287e-05,
"loss": 0.8822,
"step": 117500
},
{
"epoch": 1.7,
"learning_rate": 4.478607058768769e-05,
"loss": 0.8665,
"step": 118000
},
{
"epoch": 1.71,
"learning_rate": 4.476182764626252e-05,
"loss": 0.8903,
"step": 118500
},
{
"epoch": 1.71,
"learning_rate": 4.4737584704837345e-05,
"loss": 0.8798,
"step": 119000
},
{
"epoch": 1.72,
"learning_rate": 4.4713341763412164e-05,
"loss": 0.8828,
"step": 119500
},
{
"epoch": 1.73,
"learning_rate": 4.468909882198699e-05,
"loss": 0.8782,
"step": 120000
},
{
"epoch": 1.74,
"learning_rate": 4.4664855880561815e-05,
"loss": 0.8732,
"step": 120500
},
{
"epoch": 1.74,
"learning_rate": 4.464066142501949e-05,
"loss": 0.8836,
"step": 121000
},
{
"epoch": 1.75,
"learning_rate": 4.4616418483594316e-05,
"loss": 0.8874,
"step": 121500
},
{
"epoch": 1.76,
"learning_rate": 4.459217554216915e-05,
"loss": 0.8798,
"step": 122000
},
{
"epoch": 1.76,
"learning_rate": 4.456798108662682e-05,
"loss": 0.8641,
"step": 122500
},
{
"epoch": 1.77,
"learning_rate": 4.454373814520165e-05,
"loss": 0.8652,
"step": 123000
},
{
"epoch": 1.78,
"learning_rate": 4.451949520377647e-05,
"loss": 0.8847,
"step": 123500
},
{
"epoch": 1.79,
"learning_rate": 4.4495252262351294e-05,
"loss": 0.8695,
"step": 124000
},
{
"epoch": 1.79,
"learning_rate": 4.447100932092612e-05,
"loss": 0.8761,
"step": 124500
},
{
"epoch": 1.8,
"learning_rate": 4.4446766379500945e-05,
"loss": 0.8881,
"step": 125000
},
{
"epoch": 1.81,
"learning_rate": 4.442252343807577e-05,
"loss": 0.8588,
"step": 125500
},
{
"epoch": 1.81,
"learning_rate": 4.4398280496650596e-05,
"loss": 0.8938,
"step": 126000
},
{
"epoch": 1.82,
"learning_rate": 4.437403755522542e-05,
"loss": 0.8678,
"step": 126500
},
{
"epoch": 1.83,
"learning_rate": 4.434979461380025e-05,
"loss": 0.8745,
"step": 127000
},
{
"epoch": 1.84,
"learning_rate": 4.432555167237507e-05,
"loss": 0.8833,
"step": 127500
},
{
"epoch": 1.84,
"learning_rate": 4.43013087309499e-05,
"loss": 0.8794,
"step": 128000
},
{
"epoch": 1.85,
"learning_rate": 4.4277065789524724e-05,
"loss": 0.8794,
"step": 128500
},
{
"epoch": 1.86,
"learning_rate": 4.425282284809955e-05,
"loss": 0.8747,
"step": 129000
},
{
"epoch": 1.86,
"learning_rate": 4.4228579906674375e-05,
"loss": 0.8746,
"step": 129500
},
{
"epoch": 1.87,
"learning_rate": 4.42043369652492e-05,
"loss": 0.8638,
"step": 130000
},
{
"epoch": 1.88,
"learning_rate": 4.4180094023824026e-05,
"loss": 0.8747,
"step": 130500
},
{
"epoch": 1.89,
"learning_rate": 4.41558995682817e-05,
"loss": 0.8934,
"step": 131000
},
{
"epoch": 1.89,
"learning_rate": 4.413165662685653e-05,
"loss": 0.8816,
"step": 131500
},
{
"epoch": 1.9,
"learning_rate": 4.410741368543135e-05,
"loss": 0.8681,
"step": 132000
},
{
"epoch": 1.91,
"learning_rate": 4.408317074400618e-05,
"loss": 0.871,
"step": 132500
},
{
"epoch": 1.92,
"learning_rate": 4.4058927802581004e-05,
"loss": 0.8615,
"step": 133000
},
{
"epoch": 1.92,
"learning_rate": 4.403473334703867e-05,
"loss": 0.8795,
"step": 133500
},
{
"epoch": 1.93,
"learning_rate": 4.4010538891496355e-05,
"loss": 0.8724,
"step": 134000
},
{
"epoch": 1.94,
"learning_rate": 4.3986295950071174e-05,
"loss": 0.8768,
"step": 134500
},
{
"epoch": 1.94,
"learning_rate": 4.3962101494528856e-05,
"loss": 0.8721,
"step": 135000
},
{
"epoch": 1.95,
"learning_rate": 4.3937858553103675e-05,
"loss": 0.8705,
"step": 135500
},
{
"epoch": 1.96,
"learning_rate": 4.391361561167851e-05,
"loss": 0.8741,
"step": 136000
},
{
"epoch": 1.97,
"learning_rate": 4.388937267025333e-05,
"loss": 0.8823,
"step": 136500
},
{
"epoch": 1.97,
"learning_rate": 4.386527518647671e-05,
"loss": 0.8669,
"step": 137000
},
{
"epoch": 1.98,
"learning_rate": 4.3841032245051534e-05,
"loss": 0.871,
"step": 137500
},
{
"epoch": 1.99,
"learning_rate": 4.381678930362636e-05,
"loss": 0.8676,
"step": 138000
},
{
"epoch": 1.99,
"learning_rate": 4.379254636220118e-05,
"loss": 0.8756,
"step": 138500
},
{
"epoch": 2.0,
"eval_bleu": 44.6906,
"eval_gen_len": 13.1329,
"eval_loss": 0.8033798336982727,
"eval_runtime": 8678.5438,
"eval_samples_per_second": 15.5,
"eval_steps_per_second": 1.938,
"step": 138887
},
{
"epoch": 2.0,
"learning_rate": 4.376830342077601e-05,
"loss": 0.8397,
"step": 139000
},
{
"epoch": 2.01,
"learning_rate": 4.3744060479350836e-05,
"loss": 0.7043,
"step": 139500
},
{
"epoch": 2.02,
"learning_rate": 4.371981753792566e-05,
"loss": 0.6911,
"step": 140000
},
{
"epoch": 2.02,
"learning_rate": 4.369557459650048e-05,
"loss": 0.6938,
"step": 140500
},
{
"epoch": 2.03,
"learning_rate": 4.367138014095816e-05,
"loss": 0.696,
"step": 141000
},
{
"epoch": 2.04,
"learning_rate": 4.364718568541584e-05,
"loss": 0.6957,
"step": 141500
},
{
"epoch": 2.04,
"learning_rate": 4.3622942743990664e-05,
"loss": 0.6974,
"step": 142000
},
{
"epoch": 2.05,
"learning_rate": 4.359869980256548e-05,
"loss": 0.7081,
"step": 142500
},
{
"epoch": 2.06,
"learning_rate": 4.357445686114031e-05,
"loss": 0.7022,
"step": 143000
},
{
"epoch": 2.07,
"learning_rate": 4.355021391971514e-05,
"loss": 0.7026,
"step": 143500
},
{
"epoch": 2.07,
"learning_rate": 4.3525970978289967e-05,
"loss": 0.698,
"step": 144000
},
{
"epoch": 2.08,
"learning_rate": 4.3501728036864785e-05,
"loss": 0.7091,
"step": 144500
},
{
"epoch": 2.09,
"learning_rate": 4.347748509543961e-05,
"loss": 0.6997,
"step": 145000
},
{
"epoch": 2.1,
"learning_rate": 4.3453242154014436e-05,
"loss": 0.7156,
"step": 145500
},
{
"epoch": 2.1,
"learning_rate": 4.342899921258927e-05,
"loss": 0.7017,
"step": 146000
},
{
"epoch": 2.11,
"learning_rate": 4.340475627116409e-05,
"loss": 0.703,
"step": 146500
},
{
"epoch": 2.12,
"learning_rate": 4.338051332973891e-05,
"loss": 0.6993,
"step": 147000
},
{
"epoch": 2.12,
"learning_rate": 4.335627038831374e-05,
"loss": 0.7043,
"step": 147500
},
{
"epoch": 2.13,
"learning_rate": 4.3332075932771414e-05,
"loss": 0.7203,
"step": 148000
},
{
"epoch": 2.14,
"learning_rate": 4.330783299134624e-05,
"loss": 0.7097,
"step": 148500
},
{
"epoch": 2.15,
"learning_rate": 4.328359004992107e-05,
"loss": 0.7115,
"step": 149000
},
{
"epoch": 2.15,
"learning_rate": 4.325934710849589e-05,
"loss": 0.6984,
"step": 149500
},
{
"epoch": 2.16,
"learning_rate": 4.3235104167070716e-05,
"loss": 0.7154,
"step": 150000
},
{
"epoch": 2.17,
"learning_rate": 4.321090971152839e-05,
"loss": 0.7102,
"step": 150500
},
{
"epoch": 2.17,
"learning_rate": 4.318666677010322e-05,
"loss": 0.7123,
"step": 151000
},
{
"epoch": 2.18,
"learning_rate": 4.316242382867804e-05,
"loss": 0.7167,
"step": 151500
},
{
"epoch": 2.19,
"learning_rate": 4.313818088725287e-05,
"loss": 0.7244,
"step": 152000
},
{
"epoch": 2.2,
"learning_rate": 4.3113937945827694e-05,
"loss": 0.7088,
"step": 152500
},
{
"epoch": 2.2,
"learning_rate": 4.308969500440252e-05,
"loss": 0.7123,
"step": 153000
},
{
"epoch": 2.21,
"learning_rate": 4.3065452062977345e-05,
"loss": 0.7179,
"step": 153500
},
{
"epoch": 2.22,
"learning_rate": 4.304120912155217e-05,
"loss": 0.7166,
"step": 154000
},
{
"epoch": 2.22,
"learning_rate": 4.3017014666009846e-05,
"loss": 0.7137,
"step": 154500
},
{
"epoch": 2.23,
"learning_rate": 4.299277172458467e-05,
"loss": 0.7256,
"step": 155000
},
{
"epoch": 2.24,
"learning_rate": 4.29685287831595e-05,
"loss": 0.7179,
"step": 155500
},
{
"epoch": 2.25,
"learning_rate": 4.294428584173432e-05,
"loss": 0.7123,
"step": 156000
},
{
"epoch": 2.25,
"learning_rate": 4.292004290030915e-05,
"loss": 0.7211,
"step": 156500
},
{
"epoch": 2.26,
"learning_rate": 4.2895799958883974e-05,
"loss": 0.7181,
"step": 157000
},
{
"epoch": 2.27,
"learning_rate": 4.28715570174588e-05,
"loss": 0.7225,
"step": 157500
},
{
"epoch": 2.28,
"learning_rate": 4.2847314076033625e-05,
"loss": 0.7156,
"step": 158000
},
{
"epoch": 2.28,
"learning_rate": 4.28231196204913e-05,
"loss": 0.7202,
"step": 158500
},
{
"epoch": 2.29,
"learning_rate": 4.2798876679066126e-05,
"loss": 0.7301,
"step": 159000
},
{
"epoch": 2.3,
"learning_rate": 4.277463373764095e-05,
"loss": 0.7135,
"step": 159500
},
{
"epoch": 2.3,
"learning_rate": 4.275039079621578e-05,
"loss": 0.7267,
"step": 160000
},
{
"epoch": 2.31,
"learning_rate": 4.2726244826556296e-05,
"loss": 0.7152,
"step": 160500
},
{
"epoch": 2.32,
"learning_rate": 4.270200188513113e-05,
"loss": 0.735,
"step": 161000
},
{
"epoch": 2.33,
"learning_rate": 4.2677758943705954e-05,
"loss": 0.7134,
"step": 161500
},
{
"epoch": 2.33,
"learning_rate": 4.265351600228078e-05,
"loss": 0.7326,
"step": 162000
},
{
"epoch": 2.34,
"learning_rate": 4.26292730608556e-05,
"loss": 0.7329,
"step": 162500
},
{
"epoch": 2.35,
"learning_rate": 4.260503011943043e-05,
"loss": 0.7182,
"step": 163000
},
{
"epoch": 2.35,
"learning_rate": 4.2580787178005256e-05,
"loss": 0.7203,
"step": 163500
},
{
"epoch": 2.36,
"learning_rate": 4.255654423658008e-05,
"loss": 0.7227,
"step": 164000
},
{
"epoch": 2.37,
"learning_rate": 4.25323012951549e-05,
"loss": 0.7258,
"step": 164500
},
{
"epoch": 2.38,
"learning_rate": 4.2508058353729726e-05,
"loss": 0.7253,
"step": 165000
},
{
"epoch": 2.38,
"learning_rate": 4.248381541230456e-05,
"loss": 0.7289,
"step": 165500
},
{
"epoch": 2.39,
"learning_rate": 4.245962095676223e-05,
"loss": 0.7253,
"step": 166000
},
{
"epoch": 2.4,
"learning_rate": 4.243537801533706e-05,
"loss": 0.7309,
"step": 166500
},
{
"epoch": 2.4,
"learning_rate": 4.2411135073911885e-05,
"loss": 0.7304,
"step": 167000
},
{
"epoch": 2.41,
"learning_rate": 4.2386892132486704e-05,
"loss": 0.739,
"step": 167500
},
{
"epoch": 2.42,
"learning_rate": 4.236264919106153e-05,
"loss": 0.7291,
"step": 168000
},
{
"epoch": 2.43,
"learning_rate": 4.233840624963636e-05,
"loss": 0.7393,
"step": 168500
},
{
"epoch": 2.43,
"learning_rate": 4.231416330821119e-05,
"loss": 0.7237,
"step": 169000
},
{
"epoch": 2.44,
"learning_rate": 4.2289920366786006e-05,
"loss": 0.7326,
"step": 169500
},
{
"epoch": 2.45,
"learning_rate": 4.226567742536083e-05,
"loss": 0.7266,
"step": 170000
},
{
"epoch": 2.46,
"learning_rate": 4.224153145570136e-05,
"loss": 0.7212,
"step": 170500
},
{
"epoch": 2.46,
"learning_rate": 4.221728851427619e-05,
"loss": 0.7222,
"step": 171000
},
{
"epoch": 2.47,
"learning_rate": 4.219304557285101e-05,
"loss": 0.7362,
"step": 171500
},
{
"epoch": 2.48,
"learning_rate": 4.216885111730869e-05,
"loss": 0.7218,
"step": 172000
},
{
"epoch": 2.48,
"learning_rate": 4.214460817588351e-05,
"loss": 0.7372,
"step": 172500
},
{
"epoch": 2.49,
"learning_rate": 4.2120365234458335e-05,
"loss": 0.7184,
"step": 173000
},
{
"epoch": 2.5,
"learning_rate": 4.209612229303316e-05,
"loss": 0.7405,
"step": 173500
},
{
"epoch": 2.51,
"learning_rate": 4.207187935160799e-05,
"loss": 0.7246,
"step": 174000
},
{
"epoch": 2.51,
"learning_rate": 4.204763641018281e-05,
"loss": 0.7313,
"step": 174500
},
{
"epoch": 2.52,
"learning_rate": 4.2023441954640494e-05,
"loss": 0.7346,
"step": 175000
},
{
"epoch": 2.53,
"learning_rate": 4.199919901321531e-05,
"loss": 0.725,
"step": 175500
},
{
"epoch": 2.53,
"learning_rate": 4.197495607179014e-05,
"loss": 0.7358,
"step": 176000
},
{
"epoch": 2.54,
"learning_rate": 4.1950713130364964e-05,
"loss": 0.7369,
"step": 176500
},
{
"epoch": 2.55,
"learning_rate": 4.192647018893979e-05,
"loss": 0.7405,
"step": 177000
},
{
"epoch": 2.56,
"learning_rate": 4.1902227247514615e-05,
"loss": 0.7399,
"step": 177500
},
{
"epoch": 2.56,
"learning_rate": 4.187803279197229e-05,
"loss": 0.7273,
"step": 178000
},
{
"epoch": 2.57,
"learning_rate": 4.1853789850547116e-05,
"loss": 0.7332,
"step": 178500
},
{
"epoch": 2.58,
"learning_rate": 4.182954690912194e-05,
"loss": 0.7395,
"step": 179000
},
{
"epoch": 2.58,
"learning_rate": 4.180535245357962e-05,
"loss": 0.7434,
"step": 179500
},
{
"epoch": 2.59,
"learning_rate": 4.178110951215444e-05,
"loss": 0.7389,
"step": 180000
},
{
"epoch": 2.6,
"learning_rate": 4.175686657072927e-05,
"loss": 0.7344,
"step": 180500
},
{
"epoch": 2.61,
"learning_rate": 4.1732623629304094e-05,
"loss": 0.7314,
"step": 181000
},
{
"epoch": 2.61,
"learning_rate": 4.170838068787892e-05,
"loss": 0.7315,
"step": 181500
},
{
"epoch": 2.62,
"learning_rate": 4.1684137746453745e-05,
"loss": 0.732,
"step": 182000
},
{
"epoch": 2.63,
"learning_rate": 4.165989480502857e-05,
"loss": 0.7289,
"step": 182500
},
{
"epoch": 2.64,
"learning_rate": 4.1635651863603396e-05,
"loss": 0.7296,
"step": 183000
},
{
"epoch": 2.64,
"learning_rate": 4.161140892217822e-05,
"loss": 0.731,
"step": 183500
},
{
"epoch": 2.65,
"learning_rate": 4.158716598075305e-05,
"loss": 0.73,
"step": 184000
},
{
"epoch": 2.66,
"learning_rate": 4.156292303932787e-05,
"loss": 0.7343,
"step": 184500
},
{
"epoch": 2.66,
"learning_rate": 4.153872858378555e-05,
"loss": 0.7286,
"step": 185000
},
{
"epoch": 2.67,
"learning_rate": 4.1514485642360374e-05,
"loss": 0.7434,
"step": 185500
},
{
"epoch": 2.68,
"learning_rate": 4.14902427009352e-05,
"loss": 0.744,
"step": 186000
},
{
"epoch": 2.69,
"learning_rate": 4.146599975951002e-05,
"loss": 0.737,
"step": 186500
},
{
"epoch": 2.69,
"learning_rate": 4.144175681808485e-05,
"loss": 0.7445,
"step": 187000
},
{
"epoch": 2.7,
"learning_rate": 4.1417513876659676e-05,
"loss": 0.7396,
"step": 187500
},
{
"epoch": 2.71,
"learning_rate": 4.13932709352345e-05,
"loss": 0.7276,
"step": 188000
},
{
"epoch": 2.71,
"learning_rate": 4.136902799380932e-05,
"loss": 0.7468,
"step": 188500
},
{
"epoch": 2.72,
"learning_rate": 4.134478505238415e-05,
"loss": 0.7417,
"step": 189000
},
{
"epoch": 2.73,
"learning_rate": 4.132054211095898e-05,
"loss": 0.7424,
"step": 189500
},
{
"epoch": 2.74,
"learning_rate": 4.1296299169533804e-05,
"loss": 0.7314,
"step": 190000
},
{
"epoch": 2.74,
"learning_rate": 4.127210471399148e-05,
"loss": 0.7477,
"step": 190500
},
{
"epoch": 2.75,
"learning_rate": 4.1247861772566305e-05,
"loss": 0.7442,
"step": 191000
},
{
"epoch": 2.76,
"learning_rate": 4.1223618831141124e-05,
"loss": 0.7325,
"step": 191500
},
{
"epoch": 2.76,
"learning_rate": 4.119937588971595e-05,
"loss": 0.7442,
"step": 192000
},
{
"epoch": 2.77,
"learning_rate": 4.117513294829078e-05,
"loss": 0.7405,
"step": 192500
},
{
"epoch": 2.78,
"learning_rate": 4.115089000686561e-05,
"loss": 0.7345,
"step": 193000
},
{
"epoch": 2.79,
"learning_rate": 4.1126647065440426e-05,
"loss": 0.7328,
"step": 193500
},
{
"epoch": 2.79,
"learning_rate": 4.110245260989811e-05,
"loss": 0.7358,
"step": 194000
},
{
"epoch": 2.8,
"learning_rate": 4.107820966847293e-05,
"loss": 0.7534,
"step": 194500
},
{
"epoch": 2.81,
"learning_rate": 4.105396672704775e-05,
"loss": 0.7353,
"step": 195000
},
{
"epoch": 2.82,
"learning_rate": 4.102972378562258e-05,
"loss": 0.7441,
"step": 195500
},
{
"epoch": 2.82,
"learning_rate": 4.100548084419741e-05,
"loss": 0.7424,
"step": 196000
},
{
"epoch": 2.83,
"learning_rate": 4.098123790277223e-05,
"loss": 0.7383,
"step": 196500
},
{
"epoch": 2.84,
"learning_rate": 4.0956994961347055e-05,
"loss": 0.744,
"step": 197000
},
{
"epoch": 2.84,
"learning_rate": 4.093275201992188e-05,
"loss": 0.7392,
"step": 197500
},
{
"epoch": 2.85,
"learning_rate": 4.090850907849671e-05,
"loss": 0.731,
"step": 198000
},
{
"epoch": 2.86,
"learning_rate": 4.088426613707153e-05,
"loss": 0.7334,
"step": 198500
},
{
"epoch": 2.87,
"learning_rate": 4.0860071681529214e-05,
"loss": 0.7305,
"step": 199000
},
{
"epoch": 2.87,
"learning_rate": 4.083582874010403e-05,
"loss": 0.7339,
"step": 199500
},
{
"epoch": 2.88,
"learning_rate": 4.081158579867886e-05,
"loss": 0.7397,
"step": 200000
},
{
"epoch": 2.89,
"learning_rate": 4.0787342857253684e-05,
"loss": 0.7419,
"step": 200500
},
{
"epoch": 2.89,
"learning_rate": 4.076309991582851e-05,
"loss": 0.7392,
"step": 201000
},
{
"epoch": 2.9,
"learning_rate": 4.0738856974403335e-05,
"loss": 0.7375,
"step": 201500
},
{
"epoch": 2.91,
"learning_rate": 4.071466251886101e-05,
"loss": 0.7351,
"step": 202000
},
{
"epoch": 2.92,
"learning_rate": 4.0690419577435836e-05,
"loss": 0.753,
"step": 202500
},
{
"epoch": 2.92,
"learning_rate": 4.066617663601066e-05,
"loss": 0.7369,
"step": 203000
},
{
"epoch": 2.93,
"learning_rate": 4.064193369458549e-05,
"loss": 0.751,
"step": 203500
},
{
"epoch": 2.94,
"learning_rate": 4.0617690753160306e-05,
"loss": 0.7409,
"step": 204000
},
{
"epoch": 2.94,
"learning_rate": 4.059349629761799e-05,
"loss": 0.7454,
"step": 204500
},
{
"epoch": 2.95,
"learning_rate": 4.056925335619281e-05,
"loss": 0.7531,
"step": 205000
},
{
"epoch": 2.96,
"learning_rate": 4.054501041476764e-05,
"loss": 0.7482,
"step": 205500
},
{
"epoch": 2.97,
"learning_rate": 4.0520767473342465e-05,
"loss": 0.7299,
"step": 206000
},
{
"epoch": 2.97,
"learning_rate": 4.049657301780014e-05,
"loss": 0.7366,
"step": 206500
},
{
"epoch": 2.98,
"learning_rate": 4.0472330076374966e-05,
"loss": 0.7545,
"step": 207000
},
{
"epoch": 2.99,
"learning_rate": 4.044808713494979e-05,
"loss": 0.7475,
"step": 207500
},
{
"epoch": 3.0,
"learning_rate": 4.042384419352461e-05,
"loss": 0.7566,
"step": 208000
},
{
"epoch": 3.0,
"eval_bleu": 45.7383,
"eval_gen_len": 13.0535,
"eval_loss": 0.7818750739097595,
"eval_runtime": 8588.0716,
"eval_samples_per_second": 15.663,
"eval_steps_per_second": 1.958,
"step": 208330
},
{
"epoch": 3.0,
"learning_rate": 4.039960125209944e-05,
"loss": 0.6753,
"step": 208500
},
{
"epoch": 3.01,
"learning_rate": 4.037535831067427e-05,
"loss": 0.5634,
"step": 209000
},
{
"epoch": 3.02,
"learning_rate": 4.035116385513194e-05,
"loss": 0.5652,
"step": 209500
},
{
"epoch": 3.02,
"learning_rate": 4.032692091370677e-05,
"loss": 0.5603,
"step": 210000
},
{
"epoch": 3.03,
"learning_rate": 4.0302677972281595e-05,
"loss": 0.5516,
"step": 210500
},
{
"epoch": 3.04,
"learning_rate": 4.027848351673927e-05,
"loss": 0.5561,
"step": 211000
},
{
"epoch": 3.05,
"learning_rate": 4.0254240575314096e-05,
"loss": 0.5682,
"step": 211500
},
{
"epoch": 3.05,
"learning_rate": 4.0229997633888915e-05,
"loss": 0.5612,
"step": 212000
},
{
"epoch": 3.06,
"learning_rate": 4.020575469246374e-05,
"loss": 0.5686,
"step": 212500
},
{
"epoch": 3.07,
"learning_rate": 4.0181560236921416e-05,
"loss": 0.5705,
"step": 213000
},
{
"epoch": 3.07,
"learning_rate": 4.015731729549624e-05,
"loss": 0.5604,
"step": 213500
},
{
"epoch": 3.08,
"learning_rate": 4.0133074354071074e-05,
"loss": 0.5596,
"step": 214000
},
{
"epoch": 3.09,
"learning_rate": 4.01088314126459e-05,
"loss": 0.5693,
"step": 214500
},
{
"epoch": 3.1,
"learning_rate": 4.008458847122072e-05,
"loss": 0.5712,
"step": 215000
},
{
"epoch": 3.1,
"learning_rate": 4.0060345529795544e-05,
"loss": 0.5826,
"step": 215500
},
{
"epoch": 3.11,
"learning_rate": 4.003610258837037e-05,
"loss": 0.5733,
"step": 216000
},
{
"epoch": 3.12,
"learning_rate": 4.00118596469452e-05,
"loss": 0.571,
"step": 216500
},
{
"epoch": 3.12,
"learning_rate": 3.998761670552002e-05,
"loss": 0.5771,
"step": 217000
},
{
"epoch": 3.13,
"learning_rate": 3.9963373764094846e-05,
"loss": 0.564,
"step": 217500
},
{
"epoch": 3.14,
"learning_rate": 3.993913082266967e-05,
"loss": 0.573,
"step": 218000
},
{
"epoch": 3.15,
"learning_rate": 3.9914887881244504e-05,
"loss": 0.5772,
"step": 218500
},
{
"epoch": 3.15,
"learning_rate": 3.989069342570217e-05,
"loss": 0.5759,
"step": 219000
},
{
"epoch": 3.16,
"learning_rate": 3.9866450484277e-05,
"loss": 0.578,
"step": 219500
},
{
"epoch": 3.17,
"learning_rate": 3.9842207542851824e-05,
"loss": 0.5694,
"step": 220000
},
{
"epoch": 3.18,
"learning_rate": 3.981796460142665e-05,
"loss": 0.5799,
"step": 220500
},
{
"epoch": 3.18,
"learning_rate": 3.9793721660001475e-05,
"loss": 0.5709,
"step": 221000
},
{
"epoch": 3.19,
"learning_rate": 3.97694787185763e-05,
"loss": 0.5824,
"step": 221500
},
{
"epoch": 3.2,
"learning_rate": 3.9745235777151126e-05,
"loss": 0.5866,
"step": 222000
},
{
"epoch": 3.2,
"learning_rate": 3.972099283572595e-05,
"loss": 0.5748,
"step": 222500
},
{
"epoch": 3.21,
"learning_rate": 3.969679838018363e-05,
"loss": 0.5772,
"step": 223000
},
{
"epoch": 3.22,
"learning_rate": 3.967255543875845e-05,
"loss": 0.59,
"step": 223500
},
{
"epoch": 3.23,
"learning_rate": 3.964836098321613e-05,
"loss": 0.5777,
"step": 224000
},
{
"epoch": 3.23,
"learning_rate": 3.9624118041790954e-05,
"loss": 0.5797,
"step": 224500
},
{
"epoch": 3.24,
"learning_rate": 3.959987510036578e-05,
"loss": 0.5866,
"step": 225000
},
{
"epoch": 3.25,
"learning_rate": 3.9575632158940605e-05,
"loss": 0.5912,
"step": 225500
},
{
"epoch": 3.25,
"learning_rate": 3.955138921751543e-05,
"loss": 0.5867,
"step": 226000
},
{
"epoch": 3.26,
"learning_rate": 3.9527146276090256e-05,
"loss": 0.5958,
"step": 226500
},
{
"epoch": 3.27,
"learning_rate": 3.950290333466508e-05,
"loss": 0.5864,
"step": 227000
},
{
"epoch": 3.28,
"learning_rate": 3.947866039323991e-05,
"loss": 0.5919,
"step": 227500
},
{
"epoch": 3.28,
"learning_rate": 3.945446593769758e-05,
"loss": 0.5968,
"step": 228000
},
{
"epoch": 3.29,
"learning_rate": 3.943022299627241e-05,
"loss": 0.6008,
"step": 228500
},
{
"epoch": 3.3,
"learning_rate": 3.940598005484723e-05,
"loss": 0.5913,
"step": 229000
},
{
"epoch": 3.3,
"learning_rate": 3.938173711342206e-05,
"loss": 0.5915,
"step": 229500
},
{
"epoch": 3.31,
"learning_rate": 3.9357494171996885e-05,
"loss": 0.5995,
"step": 230000
},
{
"epoch": 3.32,
"learning_rate": 3.933329971645456e-05,
"loss": 0.5883,
"step": 230500
},
{
"epoch": 3.33,
"learning_rate": 3.9309056775029386e-05,
"loss": 0.5973,
"step": 231000
},
{
"epoch": 3.33,
"learning_rate": 3.928481383360421e-05,
"loss": 0.5969,
"step": 231500
},
{
"epoch": 3.34,
"learning_rate": 3.926057089217903e-05,
"loss": 0.5864,
"step": 232000
},
{
"epoch": 3.35,
"learning_rate": 3.923632795075386e-05,
"loss": 0.6016,
"step": 232500
},
{
"epoch": 3.36,
"learning_rate": 3.921208500932869e-05,
"loss": 0.6012,
"step": 233000
},
{
"epoch": 3.36,
"learning_rate": 3.9187842067903514e-05,
"loss": 0.5976,
"step": 233500
},
{
"epoch": 3.37,
"learning_rate": 3.916359912647833e-05,
"loss": 0.5963,
"step": 234000
},
{
"epoch": 3.38,
"learning_rate": 3.9139404670936015e-05,
"loss": 0.6039,
"step": 234500
},
{
"epoch": 3.38,
"learning_rate": 3.911521021539369e-05,
"loss": 0.6036,
"step": 235000
},
{
"epoch": 3.39,
"learning_rate": 3.9090967273968516e-05,
"loss": 0.5949,
"step": 235500
},
{
"epoch": 3.4,
"learning_rate": 3.9066724332543335e-05,
"loss": 0.6058,
"step": 236000
},
{
"epoch": 3.41,
"learning_rate": 3.904252987700102e-05,
"loss": 0.5999,
"step": 236500
},
{
"epoch": 3.41,
"learning_rate": 3.9018286935575836e-05,
"loss": 0.6034,
"step": 237000
},
{
"epoch": 3.42,
"learning_rate": 3.899404399415066e-05,
"loss": 0.6161,
"step": 237500
},
{
"epoch": 3.43,
"learning_rate": 3.8969801052725494e-05,
"loss": 0.5963,
"step": 238000
},
{
"epoch": 3.43,
"learning_rate": 3.894555811130032e-05,
"loss": 0.6032,
"step": 238500
},
{
"epoch": 3.44,
"learning_rate": 3.8921363655757995e-05,
"loss": 0.6042,
"step": 239000
},
{
"epoch": 3.45,
"learning_rate": 3.8897169200215664e-05,
"loss": 0.6079,
"step": 239500
},
{
"epoch": 3.46,
"learning_rate": 3.8872926258790496e-05,
"loss": 0.6014,
"step": 240000
},
{
"epoch": 3.46,
"learning_rate": 3.884868331736532e-05,
"loss": 0.6053,
"step": 240500
},
{
"epoch": 3.47,
"learning_rate": 3.882444037594014e-05,
"loss": 0.6021,
"step": 241000
},
{
"epoch": 3.48,
"learning_rate": 3.8800197434514966e-05,
"loss": 0.6112,
"step": 241500
},
{
"epoch": 3.48,
"learning_rate": 3.877595449308979e-05,
"loss": 0.6094,
"step": 242000
},
{
"epoch": 3.49,
"learning_rate": 3.8751711551664624e-05,
"loss": 0.6032,
"step": 242500
},
{
"epoch": 3.5,
"learning_rate": 3.872746861023944e-05,
"loss": 0.5961,
"step": 243000
},
{
"epoch": 3.51,
"learning_rate": 3.8703274154697125e-05,
"loss": 0.6179,
"step": 243500
},
{
"epoch": 3.51,
"learning_rate": 3.8679031213271944e-05,
"loss": 0.612,
"step": 244000
},
{
"epoch": 3.52,
"learning_rate": 3.865478827184677e-05,
"loss": 0.6134,
"step": 244500
},
{
"epoch": 3.53,
"learning_rate": 3.8630545330421595e-05,
"loss": 0.6046,
"step": 245000
},
{
"epoch": 3.54,
"learning_rate": 3.860630238899642e-05,
"loss": 0.6184,
"step": 245500
},
{
"epoch": 3.54,
"learning_rate": 3.8582059447571246e-05,
"loss": 0.6116,
"step": 246000
},
{
"epoch": 3.55,
"learning_rate": 3.855786499202892e-05,
"loss": 0.6098,
"step": 246500
},
{
"epoch": 3.56,
"learning_rate": 3.853362205060375e-05,
"loss": 0.6051,
"step": 247000
},
{
"epoch": 3.56,
"learning_rate": 3.850937910917857e-05,
"loss": 0.6083,
"step": 247500
},
{
"epoch": 3.57,
"learning_rate": 3.84851361677534e-05,
"loss": 0.6211,
"step": 248000
},
{
"epoch": 3.58,
"learning_rate": 3.8460893226328224e-05,
"loss": 0.6174,
"step": 248500
},
{
"epoch": 3.59,
"learning_rate": 3.843665028490305e-05,
"loss": 0.623,
"step": 249000
},
{
"epoch": 3.59,
"learning_rate": 3.8412407343477875e-05,
"loss": 0.6225,
"step": 249500
},
{
"epoch": 3.6,
"learning_rate": 3.83881644020527e-05,
"loss": 0.6202,
"step": 250000
},
{
"epoch": 3.61,
"learning_rate": 3.8363969946510376e-05,
"loss": 0.6165,
"step": 250500
},
{
"epoch": 3.61,
"learning_rate": 3.83397270050852e-05,
"loss": 0.6138,
"step": 251000
},
{
"epoch": 3.62,
"learning_rate": 3.831548406366003e-05,
"loss": 0.6103,
"step": 251500
},
{
"epoch": 3.63,
"learning_rate": 3.829124112223485e-05,
"loss": 0.6202,
"step": 252000
},
{
"epoch": 3.64,
"learning_rate": 3.826704666669253e-05,
"loss": 0.615,
"step": 252500
},
{
"epoch": 3.64,
"learning_rate": 3.8242803725267354e-05,
"loss": 0.6095,
"step": 253000
},
{
"epoch": 3.65,
"learning_rate": 3.821856078384218e-05,
"loss": 0.6181,
"step": 253500
},
{
"epoch": 3.66,
"learning_rate": 3.8194317842417005e-05,
"loss": 0.6153,
"step": 254000
},
{
"epoch": 3.66,
"learning_rate": 3.817007490099183e-05,
"loss": 0.6153,
"step": 254500
},
{
"epoch": 3.67,
"learning_rate": 3.814583195956665e-05,
"loss": 0.6108,
"step": 255000
},
{
"epoch": 3.68,
"learning_rate": 3.812158901814148e-05,
"loss": 0.6162,
"step": 255500
},
{
"epoch": 3.69,
"learning_rate": 3.809734607671631e-05,
"loss": 0.6132,
"step": 256000
},
{
"epoch": 3.69,
"learning_rate": 3.807310313529113e-05,
"loss": 0.6124,
"step": 256500
},
{
"epoch": 3.7,
"learning_rate": 3.804886019386595e-05,
"loss": 0.627,
"step": 257000
},
{
"epoch": 3.71,
"learning_rate": 3.8024665738323634e-05,
"loss": 0.6221,
"step": 257500
},
{
"epoch": 3.72,
"learning_rate": 3.800042279689845e-05,
"loss": 0.6122,
"step": 258000
},
{
"epoch": 3.72,
"learning_rate": 3.7976179855473285e-05,
"loss": 0.6049,
"step": 258500
},
{
"epoch": 3.73,
"learning_rate": 3.795193691404811e-05,
"loss": 0.6228,
"step": 259000
},
{
"epoch": 3.74,
"learning_rate": 3.7927742458505786e-05,
"loss": 0.616,
"step": 259500
},
{
"epoch": 3.74,
"learning_rate": 3.790349951708061e-05,
"loss": 0.6167,
"step": 260000
},
{
"epoch": 3.75,
"learning_rate": 3.787925657565544e-05,
"loss": 0.6266,
"step": 260500
},
{
"epoch": 3.76,
"learning_rate": 3.7855013634230256e-05,
"loss": 0.6334,
"step": 261000
},
{
"epoch": 3.77,
"learning_rate": 3.783077069280508e-05,
"loss": 0.6152,
"step": 261500
},
{
"epoch": 3.77,
"learning_rate": 3.7806527751379914e-05,
"loss": 0.6214,
"step": 262000
},
{
"epoch": 3.78,
"learning_rate": 3.778228480995474e-05,
"loss": 0.6052,
"step": 262500
},
{
"epoch": 3.79,
"learning_rate": 3.775804186852956e-05,
"loss": 0.6202,
"step": 263000
},
{
"epoch": 3.79,
"learning_rate": 3.773384741298724e-05,
"loss": 0.6245,
"step": 263500
},
{
"epoch": 3.8,
"learning_rate": 3.7709652957444916e-05,
"loss": 0.6264,
"step": 264000
},
{
"epoch": 3.81,
"learning_rate": 3.7685458501902585e-05,
"loss": 0.6282,
"step": 264500
},
{
"epoch": 3.82,
"learning_rate": 3.766121556047742e-05,
"loss": 0.6225,
"step": 265000
},
{
"epoch": 3.82,
"learning_rate": 3.763697261905224e-05,
"loss": 0.6157,
"step": 265500
},
{
"epoch": 3.83,
"learning_rate": 3.761277816350991e-05,
"loss": 0.6173,
"step": 266000
},
{
"epoch": 3.84,
"learning_rate": 3.7588535222084744e-05,
"loss": 0.6224,
"step": 266500
},
{
"epoch": 3.84,
"learning_rate": 3.756429228065956e-05,
"loss": 0.6239,
"step": 267000
},
{
"epoch": 3.85,
"learning_rate": 3.754004933923439e-05,
"loss": 0.6242,
"step": 267500
},
{
"epoch": 3.86,
"learning_rate": 3.7515806397809214e-05,
"loss": 0.6284,
"step": 268000
},
{
"epoch": 3.87,
"learning_rate": 3.7491563456384046e-05,
"loss": 0.6279,
"step": 268500
},
{
"epoch": 3.87,
"learning_rate": 3.7467320514958865e-05,
"loss": 0.6289,
"step": 269000
},
{
"epoch": 3.88,
"learning_rate": 3.744307757353369e-05,
"loss": 0.6222,
"step": 269500
},
{
"epoch": 3.89,
"learning_rate": 3.7418834632108516e-05,
"loss": 0.6195,
"step": 270000
},
{
"epoch": 3.9,
"learning_rate": 3.739459169068334e-05,
"loss": 0.627,
"step": 270500
},
{
"epoch": 3.9,
"learning_rate": 3.737034874925817e-05,
"loss": 0.6222,
"step": 271000
},
{
"epoch": 3.91,
"learning_rate": 3.734610580783299e-05,
"loss": 0.6236,
"step": 271500
},
{
"epoch": 3.92,
"learning_rate": 3.732186286640782e-05,
"loss": 0.627,
"step": 272000
},
{
"epoch": 3.92,
"learning_rate": 3.7297619924982644e-05,
"loss": 0.6254,
"step": 272500
},
{
"epoch": 3.93,
"learning_rate": 3.727337698355747e-05,
"loss": 0.627,
"step": 273000
},
{
"epoch": 3.94,
"learning_rate": 3.7249134042132295e-05,
"loss": 0.635,
"step": 273500
},
{
"epoch": 3.95,
"learning_rate": 3.722489110070712e-05,
"loss": 0.619,
"step": 274000
},
{
"epoch": 3.95,
"learning_rate": 3.7200696645164796e-05,
"loss": 0.6212,
"step": 274500
},
{
"epoch": 3.96,
"learning_rate": 3.717645370373962e-05,
"loss": 0.6289,
"step": 275000
},
{
"epoch": 3.97,
"learning_rate": 3.715221076231445e-05,
"loss": 0.6251,
"step": 275500
},
{
"epoch": 3.97,
"learning_rate": 3.712796782088927e-05,
"loss": 0.6262,
"step": 276000
},
{
"epoch": 3.98,
"learning_rate": 3.71037248794641e-05,
"loss": 0.6294,
"step": 276500
},
{
"epoch": 3.99,
"learning_rate": 3.7079530423921774e-05,
"loss": 0.6298,
"step": 277000
},
{
"epoch": 4.0,
"learning_rate": 3.70552874824966e-05,
"loss": 0.6239,
"step": 277500
},
{
"epoch": 4.0,
"eval_bleu": 46.5398,
"eval_gen_len": 12.9925,
"eval_loss": 0.7927529215812683,
"eval_runtime": 8543.5664,
"eval_samples_per_second": 15.745,
"eval_steps_per_second": 1.968,
"step": 277774
},
{
"epoch": 4.0,
"learning_rate": 3.7031044541071425e-05,
"loss": 0.5457,
"step": 278000
},
{
"epoch": 4.01,
"learning_rate": 3.70068500855291e-05,
"loss": 0.4447,
"step": 278500
},
{
"epoch": 4.02,
"learning_rate": 3.6982607144103926e-05,
"loss": 0.4393,
"step": 279000
},
{
"epoch": 4.02,
"learning_rate": 3.695836420267875e-05,
"loss": 0.4442,
"step": 279500
},
{
"epoch": 4.03,
"learning_rate": 3.693416974713643e-05,
"loss": 0.4464,
"step": 280000
},
{
"epoch": 4.04,
"learning_rate": 3.690992680571125e-05,
"loss": 0.4616,
"step": 280500
},
{
"epoch": 4.05,
"learning_rate": 3.688568386428607e-05,
"loss": 0.4539,
"step": 281000
},
{
"epoch": 4.05,
"learning_rate": 3.6861440922860904e-05,
"loss": 0.4506,
"step": 281500
},
{
"epoch": 4.06,
"learning_rate": 3.683719798143573e-05,
"loss": 0.4513,
"step": 282000
},
{
"epoch": 4.07,
"learning_rate": 3.6812955040010555e-05,
"loss": 0.4494,
"step": 282500
},
{
"epoch": 4.08,
"learning_rate": 3.6788712098585374e-05,
"loss": 0.4543,
"step": 283000
},
{
"epoch": 4.08,
"learning_rate": 3.6764469157160206e-05,
"loss": 0.4521,
"step": 283500
},
{
"epoch": 4.09,
"learning_rate": 3.674022621573503e-05,
"loss": 0.4621,
"step": 284000
},
{
"epoch": 4.1,
"learning_rate": 3.671598327430986e-05,
"loss": 0.4531,
"step": 284500
},
{
"epoch": 4.1,
"learning_rate": 3.669178881876753e-05,
"loss": 0.4547,
"step": 285000
},
{
"epoch": 4.11,
"learning_rate": 3.666754587734236e-05,
"loss": 0.4573,
"step": 285500
},
{
"epoch": 4.12,
"learning_rate": 3.6643351421800034e-05,
"loss": 0.4629,
"step": 286000
},
{
"epoch": 4.13,
"learning_rate": 3.66191569662577e-05,
"loss": 0.4655,
"step": 286500
},
{
"epoch": 4.13,
"learning_rate": 3.6594914024832535e-05,
"loss": 0.4582,
"step": 287000
},
{
"epoch": 4.14,
"learning_rate": 3.657067108340736e-05,
"loss": 0.4728,
"step": 287500
},
{
"epoch": 4.15,
"learning_rate": 3.654642814198218e-05,
"loss": 0.4531,
"step": 288000
},
{
"epoch": 4.15,
"learning_rate": 3.6522185200557005e-05,
"loss": 0.4707,
"step": 288500
},
{
"epoch": 4.16,
"learning_rate": 3.649794225913184e-05,
"loss": 0.4699,
"step": 289000
},
{
"epoch": 4.17,
"learning_rate": 3.647369931770666e-05,
"loss": 0.4679,
"step": 289500
},
{
"epoch": 4.18,
"learning_rate": 3.644945637628148e-05,
"loss": 0.468,
"step": 290000
},
{
"epoch": 4.18,
"learning_rate": 3.642521343485631e-05,
"loss": 0.4692,
"step": 290500
},
{
"epoch": 4.19,
"learning_rate": 3.640097049343113e-05,
"loss": 0.466,
"step": 291000
},
{
"epoch": 4.2,
"learning_rate": 3.637672755200596e-05,
"loss": 0.464,
"step": 291500
},
{
"epoch": 4.2,
"learning_rate": 3.6352484610580784e-05,
"loss": 0.472,
"step": 292000
},
{
"epoch": 4.21,
"learning_rate": 3.632829015503846e-05,
"loss": 0.4728,
"step": 292500
},
{
"epoch": 4.22,
"learning_rate": 3.6304047213613285e-05,
"loss": 0.4765,
"step": 293000
},
{
"epoch": 4.23,
"learning_rate": 3.627980427218811e-05,
"loss": 0.4767,
"step": 293500
},
{
"epoch": 4.23,
"learning_rate": 3.6255561330762936e-05,
"loss": 0.4739,
"step": 294000
},
{
"epoch": 4.24,
"learning_rate": 3.623131838933776e-05,
"loss": 0.475,
"step": 294500
},
{
"epoch": 4.25,
"learning_rate": 3.620707544791259e-05,
"loss": 0.4735,
"step": 295000
},
{
"epoch": 4.26,
"learning_rate": 3.618283250648741e-05,
"loss": 0.4783,
"step": 295500
},
{
"epoch": 4.26,
"learning_rate": 3.615858956506224e-05,
"loss": 0.4768,
"step": 296000
},
{
"epoch": 4.27,
"learning_rate": 3.6134346623637064e-05,
"loss": 0.481,
"step": 296500
},
{
"epoch": 4.28,
"learning_rate": 3.611015216809474e-05,
"loss": 0.4861,
"step": 297000
},
{
"epoch": 4.28,
"learning_rate": 3.6085909226669565e-05,
"loss": 0.4804,
"step": 297500
},
{
"epoch": 4.29,
"learning_rate": 3.606166628524439e-05,
"loss": 0.483,
"step": 298000
},
{
"epoch": 4.3,
"learning_rate": 3.6037423343819216e-05,
"loss": 0.4827,
"step": 298500
},
{
"epoch": 4.31,
"learning_rate": 3.601318040239404e-05,
"loss": 0.4827,
"step": 299000
},
{
"epoch": 4.31,
"learning_rate": 3.598893746096886e-05,
"loss": 0.4861,
"step": 299500
},
{
"epoch": 4.32,
"learning_rate": 3.596474300542654e-05,
"loss": 0.4876,
"step": 300000
},
{
"epoch": 4.33,
"learning_rate": 3.594054854988422e-05,
"loss": 0.4849,
"step": 300500
},
{
"epoch": 4.33,
"learning_rate": 3.5916305608459044e-05,
"loss": 0.4825,
"step": 301000
},
{
"epoch": 4.34,
"learning_rate": 3.589206266703386e-05,
"loss": 0.4884,
"step": 301500
},
{
"epoch": 4.35,
"learning_rate": 3.5867819725608695e-05,
"loss": 0.4805,
"step": 302000
},
{
"epoch": 4.36,
"learning_rate": 3.584357678418352e-05,
"loss": 0.4856,
"step": 302500
},
{
"epoch": 4.36,
"learning_rate": 3.5819333842758346e-05,
"loss": 0.4856,
"step": 303000
},
{
"epoch": 4.37,
"learning_rate": 3.5795090901333165e-05,
"loss": 0.4799,
"step": 303500
},
{
"epoch": 4.38,
"learning_rate": 3.5770847959908e-05,
"loss": 0.4898,
"step": 304000
},
{
"epoch": 4.38,
"learning_rate": 3.574660501848282e-05,
"loss": 0.4885,
"step": 304500
},
{
"epoch": 4.39,
"learning_rate": 3.572236207705765e-05,
"loss": 0.4878,
"step": 305000
},
{
"epoch": 4.4,
"learning_rate": 3.569811913563247e-05,
"loss": 0.4832,
"step": 305500
},
{
"epoch": 4.41,
"learning_rate": 3.567387619420729e-05,
"loss": 0.4869,
"step": 306000
},
{
"epoch": 4.41,
"learning_rate": 3.5649633252782125e-05,
"loss": 0.4993,
"step": 306500
},
{
"epoch": 4.42,
"learning_rate": 3.562539031135695e-05,
"loss": 0.4983,
"step": 307000
},
{
"epoch": 4.43,
"learning_rate": 3.5601195855814626e-05,
"loss": 0.4918,
"step": 307500
},
{
"epoch": 4.44,
"learning_rate": 3.557695291438945e-05,
"loss": 0.4882,
"step": 308000
},
{
"epoch": 4.44,
"learning_rate": 3.555270997296427e-05,
"loss": 0.4985,
"step": 308500
},
{
"epoch": 4.45,
"learning_rate": 3.5528467031539096e-05,
"loss": 0.4898,
"step": 309000
},
{
"epoch": 4.46,
"learning_rate": 3.550422409011392e-05,
"loss": 0.4955,
"step": 309500
},
{
"epoch": 4.46,
"learning_rate": 3.5479981148688754e-05,
"loss": 0.4886,
"step": 310000
},
{
"epoch": 4.47,
"learning_rate": 3.545573820726357e-05,
"loss": 0.489,
"step": 310500
},
{
"epoch": 4.48,
"learning_rate": 3.5431543751721255e-05,
"loss": 0.5072,
"step": 311000
},
{
"epoch": 4.49,
"learning_rate": 3.5407300810296074e-05,
"loss": 0.5013,
"step": 311500
},
{
"epoch": 4.49,
"learning_rate": 3.53830578688709e-05,
"loss": 0.4902,
"step": 312000
},
{
"epoch": 4.5,
"learning_rate": 3.5358814927445725e-05,
"loss": 0.4912,
"step": 312500
},
{
"epoch": 4.51,
"learning_rate": 3.53346204719034e-05,
"loss": 0.5037,
"step": 313000
},
{
"epoch": 4.51,
"learning_rate": 3.5310377530478226e-05,
"loss": 0.4952,
"step": 313500
},
{
"epoch": 4.52,
"learning_rate": 3.528613458905306e-05,
"loss": 0.4948,
"step": 314000
},
{
"epoch": 4.53,
"learning_rate": 3.526189164762788e-05,
"loss": 0.4971,
"step": 314500
},
{
"epoch": 4.54,
"learning_rate": 3.523769719208555e-05,
"loss": 0.4999,
"step": 315000
},
{
"epoch": 4.54,
"learning_rate": 3.521350273654323e-05,
"loss": 0.5081,
"step": 315500
},
{
"epoch": 4.55,
"learning_rate": 3.5189259795118054e-05,
"loss": 0.498,
"step": 316000
},
{
"epoch": 4.56,
"learning_rate": 3.516501685369288e-05,
"loss": 0.5034,
"step": 316500
},
{
"epoch": 4.56,
"learning_rate": 3.5140773912267705e-05,
"loss": 0.4946,
"step": 317000
},
{
"epoch": 4.57,
"learning_rate": 3.511653097084253e-05,
"loss": 0.5005,
"step": 317500
},
{
"epoch": 4.58,
"learning_rate": 3.5092288029417356e-05,
"loss": 0.502,
"step": 318000
},
{
"epoch": 4.59,
"learning_rate": 3.506804508799218e-05,
"loss": 0.5057,
"step": 318500
},
{
"epoch": 4.59,
"learning_rate": 3.504380214656701e-05,
"loss": 0.5064,
"step": 319000
},
{
"epoch": 4.6,
"learning_rate": 3.501960769102468e-05,
"loss": 0.5086,
"step": 319500
},
{
"epoch": 4.61,
"learning_rate": 3.499536474959951e-05,
"loss": 0.4985,
"step": 320000
},
{
"epoch": 4.62,
"learning_rate": 3.4971121808174334e-05,
"loss": 0.5065,
"step": 320500
},
{
"epoch": 4.62,
"learning_rate": 3.494687886674916e-05,
"loss": 0.4943,
"step": 321000
},
{
"epoch": 4.63,
"learning_rate": 3.4922635925323985e-05,
"loss": 0.5075,
"step": 321500
},
{
"epoch": 4.64,
"learning_rate": 3.489844146978166e-05,
"loss": 0.5142,
"step": 322000
},
{
"epoch": 4.64,
"learning_rate": 3.4874198528356486e-05,
"loss": 0.5056,
"step": 322500
},
{
"epoch": 4.65,
"learning_rate": 3.484995558693131e-05,
"loss": 0.5121,
"step": 323000
},
{
"epoch": 4.66,
"learning_rate": 3.482576113138899e-05,
"loss": 0.5106,
"step": 323500
},
{
"epoch": 4.67,
"learning_rate": 3.480151818996381e-05,
"loss": 0.5029,
"step": 324000
},
{
"epoch": 4.67,
"learning_rate": 3.477727524853864e-05,
"loss": 0.4978,
"step": 324500
},
{
"epoch": 4.68,
"learning_rate": 3.4753032307113464e-05,
"loss": 0.5099,
"step": 325000
},
{
"epoch": 4.69,
"learning_rate": 3.472878936568828e-05,
"loss": 0.5093,
"step": 325500
},
{
"epoch": 4.69,
"learning_rate": 3.4704546424263115e-05,
"loss": 0.5085,
"step": 326000
},
{
"epoch": 4.7,
"learning_rate": 3.468030348283794e-05,
"loss": 0.5245,
"step": 326500
},
{
"epoch": 4.71,
"learning_rate": 3.4656060541412766e-05,
"loss": 0.5107,
"step": 327000
},
{
"epoch": 4.72,
"learning_rate": 3.4631817599987585e-05,
"loss": 0.505,
"step": 327500
},
{
"epoch": 4.72,
"learning_rate": 3.460757465856242e-05,
"loss": 0.5029,
"step": 328000
},
{
"epoch": 4.73,
"learning_rate": 3.458333171713724e-05,
"loss": 0.5024,
"step": 328500
},
{
"epoch": 4.74,
"learning_rate": 3.455908877571207e-05,
"loss": 0.5099,
"step": 329000
},
{
"epoch": 4.74,
"learning_rate": 3.453484583428689e-05,
"loss": 0.517,
"step": 329500
},
{
"epoch": 4.75,
"learning_rate": 3.451060289286171e-05,
"loss": 0.5026,
"step": 330000
},
{
"epoch": 4.76,
"learning_rate": 3.448640843731939e-05,
"loss": 0.5098,
"step": 330500
},
{
"epoch": 4.77,
"learning_rate": 3.4462165495894214e-05,
"loss": 0.52,
"step": 331000
},
{
"epoch": 4.77,
"learning_rate": 3.443797104035189e-05,
"loss": 0.5084,
"step": 331500
},
{
"epoch": 4.78,
"learning_rate": 3.4413728098926715e-05,
"loss": 0.5184,
"step": 332000
},
{
"epoch": 4.79,
"learning_rate": 3.438948515750155e-05,
"loss": 0.5215,
"step": 332500
},
{
"epoch": 4.8,
"learning_rate": 3.436524221607637e-05,
"loss": 0.525,
"step": 333000
},
{
"epoch": 4.8,
"learning_rate": 3.434099927465119e-05,
"loss": 0.5016,
"step": 333500
},
{
"epoch": 4.81,
"learning_rate": 3.431675633322602e-05,
"loss": 0.5183,
"step": 334000
},
{
"epoch": 4.82,
"learning_rate": 3.429256187768369e-05,
"loss": 0.5177,
"step": 334500
},
{
"epoch": 4.82,
"learning_rate": 3.426831893625852e-05,
"loss": 0.5171,
"step": 335000
},
{
"epoch": 4.83,
"learning_rate": 3.4244075994833344e-05,
"loss": 0.5143,
"step": 335500
},
{
"epoch": 4.84,
"learning_rate": 3.4219833053408176e-05,
"loss": 0.5099,
"step": 336000
},
{
"epoch": 4.85,
"learning_rate": 3.4195590111982995e-05,
"loss": 0.5138,
"step": 336500
},
{
"epoch": 4.85,
"learning_rate": 3.417134717055782e-05,
"loss": 0.5219,
"step": 337000
},
{
"epoch": 4.86,
"learning_rate": 3.4147104229132646e-05,
"loss": 0.5089,
"step": 337500
},
{
"epoch": 4.87,
"learning_rate": 3.412286128770748e-05,
"loss": 0.5252,
"step": 338000
},
{
"epoch": 4.87,
"learning_rate": 3.40986183462823e-05,
"loss": 0.5239,
"step": 338500
},
{
"epoch": 4.88,
"learning_rate": 3.407437540485712e-05,
"loss": 0.516,
"step": 339000
},
{
"epoch": 4.89,
"learning_rate": 3.405013246343195e-05,
"loss": 0.517,
"step": 339500
},
{
"epoch": 4.9,
"learning_rate": 3.4025889522006773e-05,
"loss": 0.5149,
"step": 340000
},
{
"epoch": 4.9,
"learning_rate": 3.400169506646445e-05,
"loss": 0.514,
"step": 340500
},
{
"epoch": 4.91,
"learning_rate": 3.3977452125039275e-05,
"loss": 0.5215,
"step": 341000
},
{
"epoch": 4.92,
"learning_rate": 3.39532091836141e-05,
"loss": 0.5126,
"step": 341500
},
{
"epoch": 4.92,
"learning_rate": 3.3929014728071776e-05,
"loss": 0.529,
"step": 342000
},
{
"epoch": 4.93,
"learning_rate": 3.39047717866466e-05,
"loss": 0.5155,
"step": 342500
},
{
"epoch": 4.94,
"learning_rate": 3.388052884522143e-05,
"loss": 0.5285,
"step": 343000
},
{
"epoch": 4.95,
"learning_rate": 3.385628590379625e-05,
"loss": 0.5231,
"step": 343500
},
{
"epoch": 4.95,
"learning_rate": 3.383204296237108e-05,
"loss": 0.5187,
"step": 344000
},
{
"epoch": 4.96,
"learning_rate": 3.3807800020945903e-05,
"loss": 0.5146,
"step": 344500
},
{
"epoch": 4.97,
"learning_rate": 3.378355707952073e-05,
"loss": 0.5305,
"step": 345000
},
{
"epoch": 4.98,
"learning_rate": 3.3759314138095555e-05,
"loss": 0.5251,
"step": 345500
},
{
"epoch": 4.98,
"learning_rate": 3.373511968255323e-05,
"loss": 0.5334,
"step": 346000
},
{
"epoch": 4.99,
"learning_rate": 3.3710876741128056e-05,
"loss": 0.5168,
"step": 346500
},
{
"epoch": 5.0,
"learning_rate": 3.368663379970288e-05,
"loss": 0.5245,
"step": 347000
},
{
"epoch": 5.0,
"eval_bleu": 46.9153,
"eval_gen_len": 13.0368,
"eval_loss": 0.8332552313804626,
"eval_runtime": 8588.4827,
"eval_samples_per_second": 15.662,
"eval_steps_per_second": 1.958,
"step": 347217
},
{
"epoch": 5.0,
"learning_rate": 3.366239085827771e-05,
"loss": 0.4247,
"step": 347500
},
{
"epoch": 5.01,
"learning_rate": 3.363814791685253e-05,
"loss": 0.3521,
"step": 348000
},
{
"epoch": 5.02,
"learning_rate": 3.361395346131021e-05,
"loss": 0.3483,
"step": 348500
},
{
"epoch": 5.03,
"learning_rate": 3.3589759005767884e-05,
"loss": 0.3564,
"step": 349000
},
{
"epoch": 5.03,
"learning_rate": 3.356556455022556e-05,
"loss": 0.3537,
"step": 349500
},
{
"epoch": 5.04,
"learning_rate": 3.3541321608800385e-05,
"loss": 0.3593,
"step": 350000
},
{
"epoch": 5.05,
"learning_rate": 3.3517078667375204e-05,
"loss": 0.3542,
"step": 350500
},
{
"epoch": 5.05,
"learning_rate": 3.3492835725950036e-05,
"loss": 0.3607,
"step": 351000
},
{
"epoch": 5.06,
"learning_rate": 3.346859278452486e-05,
"loss": 0.3539,
"step": 351500
},
{
"epoch": 5.07,
"learning_rate": 3.344439832898254e-05,
"loss": 0.3558,
"step": 352000
},
{
"epoch": 5.08,
"learning_rate": 3.342015538755736e-05,
"loss": 0.3599,
"step": 352500
},
{
"epoch": 5.08,
"learning_rate": 3.339591244613219e-05,
"loss": 0.3625,
"step": 353000
},
{
"epoch": 5.09,
"learning_rate": 3.337166950470701e-05,
"loss": 0.3583,
"step": 353500
},
{
"epoch": 5.1,
"learning_rate": 3.334747504916469e-05,
"loss": 0.3628,
"step": 354000
},
{
"epoch": 5.1,
"learning_rate": 3.332323210773951e-05,
"loss": 0.3698,
"step": 354500
},
{
"epoch": 5.11,
"learning_rate": 3.3298989166314334e-05,
"loss": 0.3605,
"step": 355000
},
{
"epoch": 5.12,
"learning_rate": 3.3274746224889166e-05,
"loss": 0.3647,
"step": 355500
},
{
"epoch": 5.13,
"learning_rate": 3.325050328346399e-05,
"loss": 0.3602,
"step": 356000
},
{
"epoch": 5.13,
"learning_rate": 3.322626034203881e-05,
"loss": 0.3623,
"step": 356500
},
{
"epoch": 5.14,
"learning_rate": 3.3202017400613636e-05,
"loss": 0.355,
"step": 357000
},
{
"epoch": 5.15,
"learning_rate": 3.317777445918847e-05,
"loss": 0.3662,
"step": 357500
},
{
"epoch": 5.16,
"learning_rate": 3.3153531517763294e-05,
"loss": 0.3699,
"step": 358000
},
{
"epoch": 5.16,
"learning_rate": 3.312928857633811e-05,
"loss": 0.3638,
"step": 358500
},
{
"epoch": 5.17,
"learning_rate": 3.310504563491294e-05,
"loss": 0.3698,
"step": 359000
},
{
"epoch": 5.18,
"learning_rate": 3.3080851179370614e-05,
"loss": 0.3782,
"step": 359500
},
{
"epoch": 5.18,
"learning_rate": 3.305660823794544e-05,
"loss": 0.378,
"step": 360000
},
{
"epoch": 5.19,
"learning_rate": 3.3032365296520265e-05,
"loss": 0.3739,
"step": 360500
},
{
"epoch": 5.2,
"learning_rate": 3.30081223550951e-05,
"loss": 0.3848,
"step": 361000
},
{
"epoch": 5.21,
"learning_rate": 3.2983879413669916e-05,
"loss": 0.3696,
"step": 361500
},
{
"epoch": 5.21,
"learning_rate": 3.295963647224474e-05,
"loss": 0.3749,
"step": 362000
},
{
"epoch": 5.22,
"learning_rate": 3.293539353081957e-05,
"loss": 0.3749,
"step": 362500
},
{
"epoch": 5.23,
"learning_rate": 3.29111505893944e-05,
"loss": 0.3758,
"step": 363000
},
{
"epoch": 5.23,
"learning_rate": 3.288690764796922e-05,
"loss": 0.38,
"step": 363500
},
{
"epoch": 5.24,
"learning_rate": 3.2862664706544043e-05,
"loss": 0.3833,
"step": 364000
},
{
"epoch": 5.25,
"learning_rate": 3.283842176511887e-05,
"loss": 0.3792,
"step": 364500
},
{
"epoch": 5.26,
"learning_rate": 3.2814178823693695e-05,
"loss": 0.3809,
"step": 365000
},
{
"epoch": 5.26,
"learning_rate": 3.278998436815137e-05,
"loss": 0.3887,
"step": 365500
},
{
"epoch": 5.27,
"learning_rate": 3.2765741426726196e-05,
"loss": 0.3762,
"step": 366000
},
{
"epoch": 5.28,
"learning_rate": 3.274149848530102e-05,
"loss": 0.3816,
"step": 366500
},
{
"epoch": 5.28,
"learning_rate": 3.27173040297587e-05,
"loss": 0.3895,
"step": 367000
},
{
"epoch": 5.29,
"learning_rate": 3.269306108833352e-05,
"loss": 0.391,
"step": 367500
},
{
"epoch": 5.3,
"learning_rate": 3.266881814690835e-05,
"loss": 0.3844,
"step": 368000
},
{
"epoch": 5.31,
"learning_rate": 3.2644575205483174e-05,
"loss": 0.3836,
"step": 368500
},
{
"epoch": 5.31,
"learning_rate": 3.2620332264058e-05,
"loss": 0.3894,
"step": 369000
},
{
"epoch": 5.32,
"learning_rate": 3.2596089322632825e-05,
"loss": 0.3864,
"step": 369500
},
{
"epoch": 5.33,
"learning_rate": 3.257184638120765e-05,
"loss": 0.3869,
"step": 370000
},
{
"epoch": 5.34,
"learning_rate": 3.2547603439782476e-05,
"loss": 0.388,
"step": 370500
},
{
"epoch": 5.34,
"learning_rate": 3.2523457470123e-05,
"loss": 0.3891,
"step": 371000
},
{
"epoch": 5.35,
"learning_rate": 3.249921452869783e-05,
"loss": 0.391,
"step": 371500
},
{
"epoch": 5.36,
"learning_rate": 3.247497158727265e-05,
"loss": 0.3888,
"step": 372000
},
{
"epoch": 5.36,
"learning_rate": 3.245072864584748e-05,
"loss": 0.3858,
"step": 372500
},
{
"epoch": 5.37,
"learning_rate": 3.2426485704422304e-05,
"loss": 0.385,
"step": 373000
},
{
"epoch": 5.38,
"learning_rate": 3.240229124887998e-05,
"loss": 0.3935,
"step": 373500
},
{
"epoch": 5.39,
"learning_rate": 3.2378048307454805e-05,
"loss": 0.3886,
"step": 374000
},
{
"epoch": 5.39,
"learning_rate": 3.2353805366029624e-05,
"loss": 0.3928,
"step": 374500
},
{
"epoch": 5.4,
"learning_rate": 3.2329562424604456e-05,
"loss": 0.3903,
"step": 375000
},
{
"epoch": 5.41,
"learning_rate": 3.230531948317928e-05,
"loss": 0.4016,
"step": 375500
},
{
"epoch": 5.41,
"learning_rate": 3.22810765417541e-05,
"loss": 0.3944,
"step": 376000
},
{
"epoch": 5.42,
"learning_rate": 3.2256833600328926e-05,
"loss": 0.4017,
"step": 376500
},
{
"epoch": 5.43,
"learning_rate": 3.223259065890376e-05,
"loss": 0.3953,
"step": 377000
},
{
"epoch": 5.44,
"learning_rate": 3.2208347717478584e-05,
"loss": 0.3985,
"step": 377500
},
{
"epoch": 5.44,
"learning_rate": 3.21842017478191e-05,
"loss": 0.3957,
"step": 378000
},
{
"epoch": 5.45,
"learning_rate": 3.215995880639393e-05,
"loss": 0.3971,
"step": 378500
},
{
"epoch": 5.46,
"learning_rate": 3.213571586496876e-05,
"loss": 0.3952,
"step": 379000
},
{
"epoch": 5.46,
"learning_rate": 3.211152140942643e-05,
"loss": 0.3943,
"step": 379500
},
{
"epoch": 5.47,
"learning_rate": 3.2087326953884105e-05,
"loss": 0.3969,
"step": 380000
},
{
"epoch": 5.48,
"learning_rate": 3.206308401245893e-05,
"loss": 0.3996,
"step": 380500
},
{
"epoch": 5.49,
"learning_rate": 3.2038841071033756e-05,
"loss": 0.3963,
"step": 381000
},
{
"epoch": 5.49,
"learning_rate": 3.201459812960859e-05,
"loss": 0.404,
"step": 381500
},
{
"epoch": 5.5,
"learning_rate": 3.199035518818341e-05,
"loss": 0.3968,
"step": 382000
},
{
"epoch": 5.51,
"learning_rate": 3.196611224675823e-05,
"loss": 0.3917,
"step": 382500
},
{
"epoch": 5.52,
"learning_rate": 3.194186930533306e-05,
"loss": 0.4022,
"step": 383000
},
{
"epoch": 5.52,
"learning_rate": 3.191762636390789e-05,
"loss": 0.4027,
"step": 383500
},
{
"epoch": 5.53,
"learning_rate": 3.189338342248271e-05,
"loss": 0.405,
"step": 384000
},
{
"epoch": 5.54,
"learning_rate": 3.1869140481057535e-05,
"loss": 0.405,
"step": 384500
},
{
"epoch": 5.54,
"learning_rate": 3.184494602551521e-05,
"loss": 0.4053,
"step": 385000
},
{
"epoch": 5.55,
"learning_rate": 3.1820703084090036e-05,
"loss": 0.4063,
"step": 385500
},
{
"epoch": 5.56,
"learning_rate": 3.179646014266486e-05,
"loss": 0.4076,
"step": 386000
},
{
"epoch": 5.57,
"learning_rate": 3.177221720123969e-05,
"loss": 0.4099,
"step": 386500
},
{
"epoch": 5.57,
"learning_rate": 3.174797425981451e-05,
"loss": 0.3982,
"step": 387000
},
{
"epoch": 5.58,
"learning_rate": 3.172373131838934e-05,
"loss": 0.4,
"step": 387500
},
{
"epoch": 5.59,
"learning_rate": 3.1699488376964164e-05,
"loss": 0.407,
"step": 388000
},
{
"epoch": 5.59,
"learning_rate": 3.167529392142184e-05,
"loss": 0.4046,
"step": 388500
},
{
"epoch": 5.6,
"learning_rate": 3.1651050979996665e-05,
"loss": 0.4076,
"step": 389000
},
{
"epoch": 5.61,
"learning_rate": 3.162680803857149e-05,
"loss": 0.4031,
"step": 389500
},
{
"epoch": 5.62,
"learning_rate": 3.1602565097146316e-05,
"loss": 0.4142,
"step": 390000
},
{
"epoch": 5.62,
"learning_rate": 3.157832215572114e-05,
"loss": 0.4085,
"step": 390500
},
{
"epoch": 5.63,
"learning_rate": 3.155407921429597e-05,
"loss": 0.4037,
"step": 391000
},
{
"epoch": 5.64,
"learning_rate": 3.152983627287079e-05,
"loss": 0.405,
"step": 391500
},
{
"epoch": 5.64,
"learning_rate": 3.150559333144562e-05,
"loss": 0.3994,
"step": 392000
},
{
"epoch": 5.65,
"learning_rate": 3.1481350390020444e-05,
"loss": 0.4124,
"step": 392500
},
{
"epoch": 5.66,
"learning_rate": 3.145715593447812e-05,
"loss": 0.4099,
"step": 393000
},
{
"epoch": 5.67,
"learning_rate": 3.1432912993052945e-05,
"loss": 0.4144,
"step": 393500
},
{
"epoch": 5.67,
"learning_rate": 3.140867005162777e-05,
"loss": 0.4116,
"step": 394000
},
{
"epoch": 5.68,
"learning_rate": 3.1384427110202596e-05,
"loss": 0.4118,
"step": 394500
},
{
"epoch": 5.69,
"learning_rate": 3.1360184168777415e-05,
"loss": 0.4041,
"step": 395000
},
{
"epoch": 5.7,
"learning_rate": 3.133594122735225e-05,
"loss": 0.4075,
"step": 395500
},
{
"epoch": 5.7,
"learning_rate": 3.1311746771809916e-05,
"loss": 0.4136,
"step": 396000
},
{
"epoch": 5.71,
"learning_rate": 3.128750383038475e-05,
"loss": 0.4119,
"step": 396500
},
{
"epoch": 5.72,
"learning_rate": 3.126330937484242e-05,
"loss": 0.4129,
"step": 397000
},
{
"epoch": 5.72,
"learning_rate": 3.123906643341725e-05,
"loss": 0.4128,
"step": 397500
},
{
"epoch": 5.73,
"learning_rate": 3.1214823491992075e-05,
"loss": 0.4187,
"step": 398000
},
{
"epoch": 5.74,
"learning_rate": 3.11905805505669e-05,
"loss": 0.4133,
"step": 398500
},
{
"epoch": 5.75,
"learning_rate": 3.116633760914172e-05,
"loss": 0.4204,
"step": 399000
},
{
"epoch": 5.75,
"learning_rate": 3.114209466771655e-05,
"loss": 0.4102,
"step": 399500
},
{
"epoch": 5.76,
"learning_rate": 3.111785172629138e-05,
"loss": 0.4191,
"step": 400000
},
{
"epoch": 5.77,
"learning_rate": 3.10936087848662e-05,
"loss": 0.4137,
"step": 400500
},
{
"epoch": 5.77,
"learning_rate": 3.106936584344102e-05,
"loss": 0.4156,
"step": 401000
},
{
"epoch": 5.78,
"learning_rate": 3.104512290201585e-05,
"loss": 0.4229,
"step": 401500
},
{
"epoch": 5.79,
"learning_rate": 3.102087996059068e-05,
"loss": 0.4145,
"step": 402000
},
{
"epoch": 5.8,
"learning_rate": 3.0996637019165505e-05,
"loss": 0.4152,
"step": 402500
},
{
"epoch": 5.8,
"learning_rate": 3.097244256362318e-05,
"loss": 0.4161,
"step": 403000
},
{
"epoch": 5.81,
"learning_rate": 3.0948199622198006e-05,
"loss": 0.4131,
"step": 403500
},
{
"epoch": 5.82,
"learning_rate": 3.0923956680772825e-05,
"loss": 0.4234,
"step": 404000
},
{
"epoch": 5.82,
"learning_rate": 3.089976222523051e-05,
"loss": 0.422,
"step": 404500
},
{
"epoch": 5.83,
"learning_rate": 3.0875519283805326e-05,
"loss": 0.4174,
"step": 405000
},
{
"epoch": 5.84,
"learning_rate": 3.085127634238015e-05,
"loss": 0.4186,
"step": 405500
},
{
"epoch": 5.85,
"learning_rate": 3.082708188683783e-05,
"loss": 0.4215,
"step": 406000
},
{
"epoch": 5.85,
"learning_rate": 3.080283894541265e-05,
"loss": 0.4247,
"step": 406500
},
{
"epoch": 5.86,
"learning_rate": 3.077859600398748e-05,
"loss": 0.4287,
"step": 407000
},
{
"epoch": 5.87,
"learning_rate": 3.075435306256231e-05,
"loss": 0.4263,
"step": 407500
},
{
"epoch": 5.88,
"learning_rate": 3.073011012113713e-05,
"loss": 0.4208,
"step": 408000
},
{
"epoch": 5.88,
"learning_rate": 3.0705867179711955e-05,
"loss": 0.4225,
"step": 408500
},
{
"epoch": 5.89,
"learning_rate": 3.068167272416963e-05,
"loss": 0.4225,
"step": 409000
},
{
"epoch": 5.9,
"learning_rate": 3.0657429782744456e-05,
"loss": 0.4158,
"step": 409500
},
{
"epoch": 5.9,
"learning_rate": 3.063318684131928e-05,
"loss": 0.4212,
"step": 410000
},
{
"epoch": 5.91,
"learning_rate": 3.060894389989411e-05,
"loss": 0.4246,
"step": 410500
},
{
"epoch": 5.92,
"learning_rate": 3.058470095846893e-05,
"loss": 0.4238,
"step": 411000
},
{
"epoch": 5.93,
"learning_rate": 3.056045801704376e-05,
"loss": 0.4321,
"step": 411500
},
{
"epoch": 5.93,
"learning_rate": 3.0536215075618584e-05,
"loss": 0.4271,
"step": 412000
},
{
"epoch": 5.94,
"learning_rate": 3.0511972134193413e-05,
"loss": 0.4302,
"step": 412500
},
{
"epoch": 5.95,
"learning_rate": 3.048772919276823e-05,
"loss": 0.422,
"step": 413000
},
{
"epoch": 5.95,
"learning_rate": 3.046348625134306e-05,
"loss": 0.4267,
"step": 413500
},
{
"epoch": 5.96,
"learning_rate": 3.0439243309917886e-05,
"loss": 0.4291,
"step": 414000
},
{
"epoch": 5.97,
"learning_rate": 3.0415097340258415e-05,
"loss": 0.4291,
"step": 414500
},
{
"epoch": 5.98,
"learning_rate": 3.0390854398833234e-05,
"loss": 0.4251,
"step": 415000
},
{
"epoch": 5.98,
"learning_rate": 3.0366611457408063e-05,
"loss": 0.4233,
"step": 415500
},
{
"epoch": 5.99,
"learning_rate": 3.0342417001865735e-05,
"loss": 0.4266,
"step": 416000
},
{
"epoch": 6.0,
"learning_rate": 3.0318174060440564e-05,
"loss": 0.4333,
"step": 416500
},
{
"epoch": 6.0,
"eval_bleu": 46.5833,
"eval_gen_len": 12.9999,
"eval_loss": 0.8992630243301392,
"eval_runtime": 8567.0337,
"eval_samples_per_second": 15.702,
"eval_steps_per_second": 1.963,
"step": 416661
},
{
"epoch": 6.0,
"step": 416661,
"total_flos": 1.4447335300747231e+19,
"train_loss": 0.7209485457693631,
"train_runtime": 374884.8401,
"train_samples_per_second": 44.457,
"train_steps_per_second": 2.779
}
],
"logging_steps": 500,
"max_steps": 1041645,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 1.4447335300747231e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}