ko-en_mbartLarge_exp15 / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
f12647c
{
"best_metric": 28.8215,
"best_model_checkpoint": "./ko-en_mbartLarge_exp10p/checkpoint-32000",
"epoch": 6.188118811881188,
"eval_steps": 2000,
"global_step": 40000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 2.5e-05,
"loss": 1.9221,
"step": 500
},
{
"epoch": 0.15,
"learning_rate": 5e-05,
"loss": 1.7751,
"step": 1000
},
{
"epoch": 0.23,
"learning_rate": 4.9999535065698766e-05,
"loss": 1.5628,
"step": 1500
},
{
"epoch": 0.31,
"learning_rate": 4.9998140280088176e-05,
"loss": 1.4782,
"step": 2000
},
{
"epoch": 0.31,
"eval_bleu": 21.538,
"eval_gen_len": 18.6032,
"eval_loss": 1.435985803604126,
"eval_runtime": 861.7869,
"eval_samples_per_second": 15.0,
"eval_steps_per_second": 0.938,
"step": 2000
},
{
"epoch": 0.39,
"learning_rate": 4.999581569504692e-05,
"loss": 1.4515,
"step": 2500
},
{
"epoch": 0.46,
"learning_rate": 4.999256139703734e-05,
"loss": 1.4195,
"step": 3000
},
{
"epoch": 0.54,
"learning_rate": 4.998837750710223e-05,
"loss": 1.3946,
"step": 3500
},
{
"epoch": 0.62,
"learning_rate": 4.99832641808603e-05,
"loss": 1.3618,
"step": 4000
},
{
"epoch": 0.62,
"eval_bleu": 23.8354,
"eval_gen_len": 18.5594,
"eval_loss": 1.3225533962249756,
"eval_runtime": 848.5716,
"eval_samples_per_second": 15.234,
"eval_steps_per_second": 0.952,
"step": 4000
},
{
"epoch": 0.7,
"learning_rate": 4.99772216085004e-05,
"loss": 1.3398,
"step": 4500
},
{
"epoch": 0.77,
"learning_rate": 4.9970250014774486e-05,
"loss": 1.3278,
"step": 5000
},
{
"epoch": 0.85,
"learning_rate": 4.996234965898918e-05,
"loss": 1.3002,
"step": 5500
},
{
"epoch": 0.93,
"learning_rate": 4.9953520834996206e-05,
"loss": 1.2983,
"step": 6000
},
{
"epoch": 0.93,
"eval_bleu": 25.0795,
"eval_gen_len": 18.7894,
"eval_loss": 1.2636672258377075,
"eval_runtime": 866.6867,
"eval_samples_per_second": 14.915,
"eval_steps_per_second": 0.932,
"step": 6000
},
{
"epoch": 1.01,
"learning_rate": 4.994376387118141e-05,
"loss": 1.2851,
"step": 6500
},
{
"epoch": 1.08,
"learning_rate": 4.993307913045257e-05,
"loss": 1.2246,
"step": 7000
},
{
"epoch": 1.16,
"learning_rate": 4.9921467010225866e-05,
"loss": 1.2111,
"step": 7500
},
{
"epoch": 1.24,
"learning_rate": 4.9908927942411154e-05,
"loss": 1.2065,
"step": 8000
},
{
"epoch": 1.24,
"eval_bleu": 25.7409,
"eval_gen_len": 18.5615,
"eval_loss": 1.237057089805603,
"eval_runtime": 857.6049,
"eval_samples_per_second": 15.073,
"eval_steps_per_second": 0.942,
"step": 8000
},
{
"epoch": 1.31,
"learning_rate": 4.989546239339585e-05,
"loss": 1.201,
"step": 8500
},
{
"epoch": 1.39,
"learning_rate": 4.98810708640276e-05,
"loss": 1.2098,
"step": 9000
},
{
"epoch": 1.47,
"learning_rate": 4.986575388959566e-05,
"loss": 1.1832,
"step": 9500
},
{
"epoch": 1.55,
"learning_rate": 4.984951203981097e-05,
"loss": 1.1926,
"step": 10000
},
{
"epoch": 1.55,
"eval_bleu": 26.0527,
"eval_gen_len": 18.4019,
"eval_loss": 1.2115644216537476,
"eval_runtime": 851.6364,
"eval_samples_per_second": 15.179,
"eval_steps_per_second": 0.949,
"step": 10000
},
{
"epoch": 1.62,
"learning_rate": 4.983234591878498e-05,
"loss": 1.1858,
"step": 10500
},
{
"epoch": 1.7,
"learning_rate": 4.981425616500717e-05,
"loss": 1.1939,
"step": 11000
},
{
"epoch": 1.78,
"learning_rate": 4.9795243451321304e-05,
"loss": 1.1693,
"step": 11500
},
{
"epoch": 1.86,
"learning_rate": 4.977530848490039e-05,
"loss": 1.1734,
"step": 12000
},
{
"epoch": 1.86,
"eval_bleu": 26.9802,
"eval_gen_len": 18.6141,
"eval_loss": 1.190738558769226,
"eval_runtime": 858.138,
"eval_samples_per_second": 15.064,
"eval_steps_per_second": 0.942,
"step": 12000
},
{
"epoch": 1.93,
"learning_rate": 4.975445200722042e-05,
"loss": 1.173,
"step": 12500
},
{
"epoch": 2.01,
"learning_rate": 4.973267479403273e-05,
"loss": 1.1541,
"step": 13000
},
{
"epoch": 2.09,
"learning_rate": 4.9709977655335196e-05,
"loss": 1.0598,
"step": 13500
},
{
"epoch": 2.17,
"learning_rate": 4.968636143534208e-05,
"loss": 1.0677,
"step": 14000
},
{
"epoch": 2.17,
"eval_bleu": 27.1925,
"eval_gen_len": 18.4547,
"eval_loss": 1.1801778078079224,
"eval_runtime": 850.3421,
"eval_samples_per_second": 15.202,
"eval_steps_per_second": 0.95,
"step": 14000
},
{
"epoch": 2.24,
"learning_rate": 4.966182701245266e-05,
"loss": 1.0789,
"step": 14500
},
{
"epoch": 2.32,
"learning_rate": 4.9636375299218484e-05,
"loss": 1.0706,
"step": 15000
},
{
"epoch": 2.4,
"learning_rate": 4.961000724230954e-05,
"loss": 1.082,
"step": 15500
},
{
"epoch": 2.48,
"learning_rate": 4.958272382247895e-05,
"loss": 1.0773,
"step": 16000
},
{
"epoch": 2.48,
"eval_bleu": 27.5641,
"eval_gen_len": 18.6726,
"eval_loss": 1.1654949188232422,
"eval_runtime": 856.0869,
"eval_samples_per_second": 15.1,
"eval_steps_per_second": 0.944,
"step": 16000
},
{
"epoch": 2.55,
"learning_rate": 4.955452605452653e-05,
"loss": 1.0749,
"step": 16500
},
{
"epoch": 2.63,
"learning_rate": 4.952541498726105e-05,
"loss": 1.0845,
"step": 17000
},
{
"epoch": 2.71,
"learning_rate": 4.949539170346119e-05,
"loss": 1.0725,
"step": 17500
},
{
"epoch": 2.78,
"learning_rate": 4.9464457319835334e-05,
"loss": 1.0688,
"step": 18000
},
{
"epoch": 2.78,
"eval_bleu": 27.6261,
"eval_gen_len": 18.6127,
"eval_loss": 1.1520819664001465,
"eval_runtime": 854.4229,
"eval_samples_per_second": 15.13,
"eval_steps_per_second": 0.946,
"step": 18000
},
{
"epoch": 2.86,
"learning_rate": 4.9432612986979945e-05,
"loss": 1.0801,
"step": 18500
},
{
"epoch": 2.94,
"learning_rate": 4.939985988933683e-05,
"loss": 1.0993,
"step": 19000
},
{
"epoch": 3.02,
"learning_rate": 4.9366199245149095e-05,
"loss": 1.0534,
"step": 19500
},
{
"epoch": 3.09,
"learning_rate": 4.9331632306415776e-05,
"loss": 0.9542,
"step": 20000
},
{
"epoch": 3.09,
"eval_bleu": 27.16,
"eval_gen_len": 18.3782,
"eval_loss": 1.1709098815917969,
"eval_runtime": 856.056,
"eval_samples_per_second": 15.101,
"eval_steps_per_second": 0.944,
"step": 20000
},
{
"epoch": 3.17,
"learning_rate": 4.929616035884531e-05,
"loss": 0.9688,
"step": 20500
},
{
"epoch": 3.25,
"learning_rate": 4.925978472180771e-05,
"loss": 0.9453,
"step": 21000
},
{
"epoch": 3.33,
"learning_rate": 4.9222506748285495e-05,
"loss": 0.9471,
"step": 21500
},
{
"epoch": 3.4,
"learning_rate": 4.918432782482335e-05,
"loss": 0.9531,
"step": 22000
},
{
"epoch": 3.4,
"eval_bleu": 28.0684,
"eval_gen_len": 18.436,
"eval_loss": 1.1434855461120605,
"eval_runtime": 852.3756,
"eval_samples_per_second": 15.166,
"eval_steps_per_second": 0.948,
"step": 22000
},
{
"epoch": 3.48,
"learning_rate": 4.914524937147655e-05,
"loss": 0.9478,
"step": 22500
},
{
"epoch": 3.56,
"learning_rate": 4.910527284175818e-05,
"loss": 0.9594,
"step": 23000
},
{
"epoch": 3.64,
"learning_rate": 4.906439972258503e-05,
"loss": 0.9483,
"step": 23500
},
{
"epoch": 3.71,
"learning_rate": 4.902263153422231e-05,
"loss": 0.9756,
"step": 24000
},
{
"epoch": 3.71,
"eval_bleu": 27.6025,
"eval_gen_len": 18.7284,
"eval_loss": 1.1564555168151855,
"eval_runtime": 860.0664,
"eval_samples_per_second": 15.03,
"eval_steps_per_second": 0.939,
"step": 24000
},
{
"epoch": 3.79,
"learning_rate": 4.8979969830227086e-05,
"loss": 0.9912,
"step": 24500
},
{
"epoch": 3.87,
"learning_rate": 4.893641619739053e-05,
"loss": 0.9789,
"step": 25000
},
{
"epoch": 3.94,
"learning_rate": 4.8891972255678876e-05,
"loss": 1.0068,
"step": 25500
},
{
"epoch": 4.02,
"learning_rate": 4.8846639658173156e-05,
"loss": 0.9964,
"step": 26000
},
{
"epoch": 4.02,
"eval_bleu": 25.6999,
"eval_gen_len": 18.3255,
"eval_loss": 1.2285293340682983,
"eval_runtime": 852.9084,
"eval_samples_per_second": 15.156,
"eval_steps_per_second": 0.947,
"step": 26000
},
{
"epoch": 4.1,
"learning_rate": 4.880042009100772e-05,
"loss": 0.9576,
"step": 26500
},
{
"epoch": 4.18,
"learning_rate": 4.8753315273307575e-05,
"loss": 0.9583,
"step": 27000
},
{
"epoch": 4.25,
"learning_rate": 4.8705326957124334e-05,
"loss": 0.9696,
"step": 27500
},
{
"epoch": 4.33,
"learning_rate": 4.865645692737114e-05,
"loss": 0.9721,
"step": 28000
},
{
"epoch": 4.33,
"eval_bleu": 27.3499,
"eval_gen_len": 18.5409,
"eval_loss": 1.1880507469177246,
"eval_runtime": 849.7595,
"eval_samples_per_second": 15.213,
"eval_steps_per_second": 0.951,
"step": 28000
},
{
"epoch": 4.41,
"learning_rate": 4.860670700175625e-05,
"loss": 0.9355,
"step": 28500
},
{
"epoch": 4.49,
"learning_rate": 4.855607903071542e-05,
"loss": 0.9314,
"step": 29000
},
{
"epoch": 4.56,
"learning_rate": 4.850457489734306e-05,
"loss": 0.9142,
"step": 29500
},
{
"epoch": 4.64,
"learning_rate": 4.845219651732225e-05,
"loss": 0.9237,
"step": 30000
},
{
"epoch": 4.64,
"eval_bleu": 28.2692,
"eval_gen_len": 18.6614,
"eval_loss": 1.1497083902359009,
"eval_runtime": 859.3006,
"eval_samples_per_second": 15.044,
"eval_steps_per_second": 0.94,
"step": 30000
},
{
"epoch": 4.72,
"learning_rate": 4.839894583885341e-05,
"loss": 0.9102,
"step": 30500
},
{
"epoch": 4.8,
"learning_rate": 4.834482484258193e-05,
"loss": 0.8956,
"step": 31000
},
{
"epoch": 4.87,
"learning_rate": 4.8289835541524394e-05,
"loss": 0.8902,
"step": 31500
},
{
"epoch": 4.95,
"learning_rate": 4.8233979980993785e-05,
"loss": 0.9041,
"step": 32000
},
{
"epoch": 4.95,
"eval_bleu": 28.8215,
"eval_gen_len": 18.5493,
"eval_loss": 1.1282682418823242,
"eval_runtime": 861.0284,
"eval_samples_per_second": 15.013,
"eval_steps_per_second": 0.938,
"step": 32000
},
{
"epoch": 5.03,
"learning_rate": 4.817726023852338e-05,
"loss": 0.8124,
"step": 32500
},
{
"epoch": 5.11,
"learning_rate": 4.81196784237895e-05,
"loss": 0.6719,
"step": 33000
},
{
"epoch": 5.18,
"learning_rate": 4.8061236678533e-05,
"loss": 0.6779,
"step": 33500
},
{
"epoch": 5.26,
"learning_rate": 4.800193717647964e-05,
"loss": 0.6842,
"step": 34000
},
{
"epoch": 5.26,
"eval_bleu": 28.6873,
"eval_gen_len": 18.515,
"eval_loss": 1.174131989479065,
"eval_runtime": 856.2355,
"eval_samples_per_second": 15.097,
"eval_steps_per_second": 0.944,
"step": 34000
},
{
"epoch": 5.34,
"learning_rate": 4.794178212325922e-05,
"loss": 0.6763,
"step": 34500
},
{
"epoch": 5.41,
"learning_rate": 4.7880773756323556e-05,
"loss": 0.6887,
"step": 35000
},
{
"epoch": 5.49,
"learning_rate": 4.781891434486324e-05,
"loss": 0.7097,
"step": 35500
},
{
"epoch": 5.57,
"learning_rate": 4.775620618972326e-05,
"loss": 0.7101,
"step": 36000
},
{
"epoch": 5.57,
"eval_bleu": 28.0778,
"eval_gen_len": 18.3422,
"eval_loss": 1.1875863075256348,
"eval_runtime": 848.3505,
"eval_samples_per_second": 15.238,
"eval_steps_per_second": 0.952,
"step": 36000
},
{
"epoch": 5.65,
"learning_rate": 4.7692651623317395e-05,
"loss": 0.7219,
"step": 36500
},
{
"epoch": 5.72,
"learning_rate": 4.762825300954147e-05,
"loss": 0.7489,
"step": 37000
},
{
"epoch": 5.8,
"learning_rate": 4.756301274368545e-05,
"loss": 0.7671,
"step": 37500
},
{
"epoch": 5.88,
"learning_rate": 4.749693325234434e-05,
"loss": 0.7697,
"step": 38000
},
{
"epoch": 5.88,
"eval_bleu": 27.6338,
"eval_gen_len": 18.6766,
"eval_loss": 1.1897605657577515,
"eval_runtime": 865.3805,
"eval_samples_per_second": 14.938,
"eval_steps_per_second": 0.934,
"step": 38000
},
{
"epoch": 5.96,
"learning_rate": 4.74300169933279e-05,
"loss": 0.7846,
"step": 38500
},
{
"epoch": 6.03,
"learning_rate": 4.736226645556926e-05,
"loss": 0.6989,
"step": 39000
},
{
"epoch": 6.11,
"learning_rate": 4.729368415903233e-05,
"loss": 0.615,
"step": 39500
},
{
"epoch": 6.19,
"learning_rate": 4.722427265461809e-05,
"loss": 0.6028,
"step": 40000
},
{
"epoch": 6.19,
"eval_bleu": 28.0713,
"eval_gen_len": 18.5903,
"eval_loss": 1.2392680644989014,
"eval_runtime": 864.5773,
"eval_samples_per_second": 14.952,
"eval_steps_per_second": 0.935,
"step": 40000
},
{
"epoch": 6.19,
"step": 40000,
"total_flos": 1.3870670734394655e+18,
"train_loss": 1.0430022468566895,
"train_runtime": 46145.4904,
"train_samples_per_second": 89.65,
"train_steps_per_second": 5.603
}
],
"logging_steps": 500,
"max_steps": 258560,
"num_train_epochs": 40,
"save_steps": 2000,
"total_flos": 1.3870670734394655e+18,
"trial_name": null,
"trial_params": null
}