ko-en_mbartLarge_exp10p / trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
894c861
{
"best_metric": 27.7431,
"best_model_checkpoint": "./ko-en_mbartLarge_exp10p/checkpoint-20000",
"epoch": 6.497273465599258,
"eval_steps": 2000,
"global_step": 28000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 2.5e-05,
"loss": 1.9237,
"step": 500
},
{
"epoch": 0.23,
"learning_rate": 5e-05,
"loss": 1.801,
"step": 1000
},
{
"epoch": 0.35,
"learning_rate": 4.999894966504487e-05,
"loss": 1.6023,
"step": 1500
},
{
"epoch": 0.46,
"learning_rate": 4.9995798748435765e-05,
"loss": 1.5087,
"step": 2000
},
{
"epoch": 0.46,
"eval_bleu": 21.689,
"eval_gen_len": 18.6869,
"eval_loss": 1.4382537603378296,
"eval_runtime": 574.749,
"eval_samples_per_second": 14.994,
"eval_steps_per_second": 0.938,
"step": 2000
},
{
"epoch": 0.58,
"learning_rate": 4.999054751493411e-05,
"loss": 1.4587,
"step": 2500
},
{
"epoch": 0.7,
"learning_rate": 4.9983196405784234e-05,
"loss": 1.4081,
"step": 3000
},
{
"epoch": 0.81,
"learning_rate": 4.997374603867628e-05,
"loss": 1.3885,
"step": 3500
},
{
"epoch": 0.93,
"learning_rate": 4.996219720769434e-05,
"loss": 1.3739,
"step": 4000
},
{
"epoch": 0.93,
"eval_bleu": 23.8363,
"eval_gen_len": 18.7463,
"eval_loss": 1.3327512741088867,
"eval_runtime": 572.6687,
"eval_samples_per_second": 15.049,
"eval_steps_per_second": 0.941,
"step": 4000
},
{
"epoch": 1.04,
"learning_rate": 4.994855088324968e-05,
"loss": 1.3303,
"step": 4500
},
{
"epoch": 1.16,
"learning_rate": 4.993280821199921e-05,
"loss": 1.289,
"step": 5000
},
{
"epoch": 1.28,
"learning_rate": 4.991497051674917e-05,
"loss": 1.277,
"step": 5500
},
{
"epoch": 1.39,
"learning_rate": 4.9895039296343946e-05,
"loss": 1.2585,
"step": 6000
},
{
"epoch": 1.39,
"eval_bleu": 24.7319,
"eval_gen_len": 18.4624,
"eval_loss": 1.2720204591751099,
"eval_runtime": 575.4318,
"eval_samples_per_second": 14.977,
"eval_steps_per_second": 0.937,
"step": 6000
},
{
"epoch": 1.51,
"learning_rate": 4.987301622554015e-05,
"loss": 1.2625,
"step": 6500
},
{
"epoch": 1.62,
"learning_rate": 4.984890315486586e-05,
"loss": 1.2415,
"step": 7000
},
{
"epoch": 1.74,
"learning_rate": 4.982270211046515e-05,
"loss": 1.2369,
"step": 7500
},
{
"epoch": 1.86,
"learning_rate": 4.9794415293927846e-05,
"loss": 1.2355,
"step": 8000
},
{
"epoch": 1.86,
"eval_bleu": 26.1612,
"eval_gen_len": 18.484,
"eval_loss": 1.2356343269348145,
"eval_runtime": 568.7222,
"eval_samples_per_second": 15.153,
"eval_steps_per_second": 0.948,
"step": 8000
},
{
"epoch": 1.97,
"learning_rate": 4.9764045082104526e-05,
"loss": 1.2264,
"step": 8500
},
{
"epoch": 2.09,
"learning_rate": 4.97315940269068e-05,
"loss": 1.1309,
"step": 9000
},
{
"epoch": 2.2,
"learning_rate": 4.969706485509287e-05,
"loss": 1.1245,
"step": 9500
},
{
"epoch": 2.32,
"learning_rate": 4.966046046803843e-05,
"loss": 1.0973,
"step": 10000
},
{
"epoch": 2.32,
"eval_bleu": 26.6567,
"eval_gen_len": 18.554,
"eval_loss": 1.207370638847351,
"eval_runtime": 569.9204,
"eval_samples_per_second": 15.121,
"eval_steps_per_second": 0.946,
"step": 10000
},
{
"epoch": 2.44,
"learning_rate": 4.962178394149285e-05,
"loss": 1.1047,
"step": 10500
},
{
"epoch": 2.55,
"learning_rate": 4.9581038525320756e-05,
"loss": 1.11,
"step": 11000
},
{
"epoch": 2.67,
"learning_rate": 4.953822764322895e-05,
"loss": 1.1161,
"step": 11500
},
{
"epoch": 2.78,
"learning_rate": 4.949335489247869e-05,
"loss": 1.1157,
"step": 12000
},
{
"epoch": 2.78,
"eval_bleu": 26.4733,
"eval_gen_len": 18.8044,
"eval_loss": 1.2069162130355835,
"eval_runtime": 576.6015,
"eval_samples_per_second": 14.946,
"eval_steps_per_second": 0.935,
"step": 12000
},
{
"epoch": 2.9,
"learning_rate": 4.944642404358347e-05,
"loss": 1.121,
"step": 12500
},
{
"epoch": 3.02,
"learning_rate": 4.939743903999218e-05,
"loss": 1.101,
"step": 13000
},
{
"epoch": 3.13,
"learning_rate": 4.9346403997757745e-05,
"loss": 0.9717,
"step": 13500
},
{
"epoch": 3.25,
"learning_rate": 4.9293323205191266e-05,
"loss": 0.9631,
"step": 14000
},
{
"epoch": 3.25,
"eval_bleu": 27.1062,
"eval_gen_len": 18.6803,
"eval_loss": 1.190077543258667,
"eval_runtime": 575.2393,
"eval_samples_per_second": 14.982,
"eval_steps_per_second": 0.937,
"step": 14000
},
{
"epoch": 3.36,
"learning_rate": 4.9238201122501694e-05,
"loss": 0.9695,
"step": 14500
},
{
"epoch": 3.48,
"learning_rate": 4.918104238142104e-05,
"loss": 0.9694,
"step": 15000
},
{
"epoch": 3.6,
"learning_rate": 4.912185178481522e-05,
"loss": 0.9877,
"step": 15500
},
{
"epoch": 3.71,
"learning_rate": 4.9060634306280435e-05,
"loss": 1.0223,
"step": 16000
},
{
"epoch": 3.71,
"eval_bleu": 26.3038,
"eval_gen_len": 18.7993,
"eval_loss": 1.2279616594314575,
"eval_runtime": 577.4522,
"eval_samples_per_second": 14.924,
"eval_steps_per_second": 0.933,
"step": 16000
},
{
"epoch": 3.83,
"learning_rate": 4.899739508972529e-05,
"loss": 1.0419,
"step": 16500
},
{
"epoch": 3.94,
"learning_rate": 4.893213944893856e-05,
"loss": 1.0316,
"step": 17000
},
{
"epoch": 4.06,
"learning_rate": 4.88648728671427e-05,
"loss": 0.9474,
"step": 17500
},
{
"epoch": 4.18,
"learning_rate": 4.879560099653307e-05,
"loss": 0.8621,
"step": 18000
},
{
"epoch": 4.18,
"eval_bleu": 26.8035,
"eval_gen_len": 18.6679,
"eval_loss": 1.218488335609436,
"eval_runtime": 569.6982,
"eval_samples_per_second": 15.127,
"eval_steps_per_second": 0.946,
"step": 18000
},
{
"epoch": 4.29,
"learning_rate": 4.872432965780303e-05,
"loss": 0.8472,
"step": 18500
},
{
"epoch": 4.41,
"learning_rate": 4.865106483965487e-05,
"loss": 0.8593,
"step": 19000
},
{
"epoch": 4.52,
"learning_rate": 4.857581269829654e-05,
"loss": 0.8685,
"step": 19500
},
{
"epoch": 4.64,
"learning_rate": 4.84985795569244e-05,
"loss": 0.866,
"step": 20000
},
{
"epoch": 4.64,
"eval_bleu": 27.7431,
"eval_gen_len": 18.6157,
"eval_loss": 1.176997184753418,
"eval_runtime": 573.1877,
"eval_samples_per_second": 15.035,
"eval_steps_per_second": 0.94,
"step": 20000
},
{
"epoch": 4.76,
"learning_rate": 4.84193719051919e-05,
"loss": 0.838,
"step": 20500
},
{
"epoch": 4.87,
"learning_rate": 4.833819639866426e-05,
"loss": 0.8356,
"step": 21000
},
{
"epoch": 4.99,
"learning_rate": 4.825505985825925e-05,
"loss": 0.8329,
"step": 21500
},
{
"epoch": 5.11,
"learning_rate": 4.8169969269674016e-05,
"loss": 0.7063,
"step": 22000
},
{
"epoch": 5.11,
"eval_bleu": 27.7268,
"eval_gen_len": 18.6026,
"eval_loss": 1.21764075756073,
"eval_runtime": 573.1543,
"eval_samples_per_second": 15.036,
"eval_steps_per_second": 0.94,
"step": 22000
},
{
"epoch": 5.22,
"learning_rate": 4.808293178279813e-05,
"loss": 0.7073,
"step": 22500
},
{
"epoch": 5.34,
"learning_rate": 4.7993954711112764e-05,
"loss": 0.7178,
"step": 23000
},
{
"epoch": 5.45,
"learning_rate": 4.790304553107623e-05,
"loss": 0.7403,
"step": 23500
},
{
"epoch": 5.57,
"learning_rate": 4.781021188149567e-05,
"loss": 0.7504,
"step": 24000
},
{
"epoch": 5.57,
"eval_bleu": 27.053,
"eval_gen_len": 18.5299,
"eval_loss": 1.2267982959747314,
"eval_runtime": 570.7714,
"eval_samples_per_second": 15.099,
"eval_steps_per_second": 0.944,
"step": 24000
},
{
"epoch": 5.69,
"learning_rate": 4.771546156288527e-05,
"loss": 0.7545,
"step": 24500
},
{
"epoch": 5.8,
"learning_rate": 4.761880253681076e-05,
"loss": 0.7437,
"step": 25000
},
{
"epoch": 5.92,
"learning_rate": 4.7520242925220445e-05,
"loss": 0.7369,
"step": 25500
},
{
"epoch": 6.03,
"learning_rate": 4.741979100976274e-05,
"loss": 0.6986,
"step": 26000
},
{
"epoch": 6.03,
"eval_bleu": 27.5119,
"eval_gen_len": 18.7806,
"eval_loss": 1.273938775062561,
"eval_runtime": 578.7118,
"eval_samples_per_second": 14.892,
"eval_steps_per_second": 0.931,
"step": 26000
},
{
"epoch": 6.15,
"learning_rate": 4.731745523109029e-05,
"loss": 0.606,
"step": 26500
},
{
"epoch": 6.27,
"learning_rate": 4.721324418815074e-05,
"loss": 0.6064,
"step": 27000
},
{
"epoch": 6.38,
"learning_rate": 4.7107166637464184e-05,
"loss": 0.6108,
"step": 27500
},
{
"epoch": 6.5,
"learning_rate": 4.699923149238737e-05,
"loss": 0.6193,
"step": 28000
},
{
"epoch": 6.5,
"eval_bleu": 27.3877,
"eval_gen_len": 18.5109,
"eval_loss": 1.2744600772857666,
"eval_runtime": 567.3201,
"eval_samples_per_second": 15.191,
"eval_steps_per_second": 0.95,
"step": 28000
},
{
"epoch": 6.5,
"step": 28000,
"total_flos": 9.709781579957535e+17,
"train_loss": 1.0456493214198521,
"train_runtime": 28457.9407,
"train_samples_per_second": 96.914,
"train_steps_per_second": 6.057
}
],
"logging_steps": 500,
"max_steps": 172360,
"num_train_epochs": 40,
"save_steps": 2000,
"total_flos": 9.709781579957535e+17,
"trial_name": null,
"trial_params": null
}