yesj1234's picture
Upload folder using huggingface_hub
894c861
raw
history blame
7.92 kB
{
"best_metric": 27.7431,
"best_model_checkpoint": "./ko-en_mbartLarge_exp10p/checkpoint-20000",
"epoch": 4.640909618285184,
"eval_steps": 2000,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 2.5e-05,
"loss": 1.9237,
"step": 500
},
{
"epoch": 0.23,
"learning_rate": 5e-05,
"loss": 1.801,
"step": 1000
},
{
"epoch": 0.35,
"learning_rate": 4.999894966504487e-05,
"loss": 1.6023,
"step": 1500
},
{
"epoch": 0.46,
"learning_rate": 4.9995798748435765e-05,
"loss": 1.5087,
"step": 2000
},
{
"epoch": 0.46,
"eval_bleu": 21.689,
"eval_gen_len": 18.6869,
"eval_loss": 1.4382537603378296,
"eval_runtime": 574.749,
"eval_samples_per_second": 14.994,
"eval_steps_per_second": 0.938,
"step": 2000
},
{
"epoch": 0.58,
"learning_rate": 4.999054751493411e-05,
"loss": 1.4587,
"step": 2500
},
{
"epoch": 0.7,
"learning_rate": 4.9983196405784234e-05,
"loss": 1.4081,
"step": 3000
},
{
"epoch": 0.81,
"learning_rate": 4.997374603867628e-05,
"loss": 1.3885,
"step": 3500
},
{
"epoch": 0.93,
"learning_rate": 4.996219720769434e-05,
"loss": 1.3739,
"step": 4000
},
{
"epoch": 0.93,
"eval_bleu": 23.8363,
"eval_gen_len": 18.7463,
"eval_loss": 1.3327512741088867,
"eval_runtime": 572.6687,
"eval_samples_per_second": 15.049,
"eval_steps_per_second": 0.941,
"step": 4000
},
{
"epoch": 1.04,
"learning_rate": 4.994855088324968e-05,
"loss": 1.3303,
"step": 4500
},
{
"epoch": 1.16,
"learning_rate": 4.993280821199921e-05,
"loss": 1.289,
"step": 5000
},
{
"epoch": 1.28,
"learning_rate": 4.991497051674917e-05,
"loss": 1.277,
"step": 5500
},
{
"epoch": 1.39,
"learning_rate": 4.9895039296343946e-05,
"loss": 1.2585,
"step": 6000
},
{
"epoch": 1.39,
"eval_bleu": 24.7319,
"eval_gen_len": 18.4624,
"eval_loss": 1.2720204591751099,
"eval_runtime": 575.4318,
"eval_samples_per_second": 14.977,
"eval_steps_per_second": 0.937,
"step": 6000
},
{
"epoch": 1.51,
"learning_rate": 4.987301622554015e-05,
"loss": 1.2625,
"step": 6500
},
{
"epoch": 1.62,
"learning_rate": 4.984890315486586e-05,
"loss": 1.2415,
"step": 7000
},
{
"epoch": 1.74,
"learning_rate": 4.982270211046515e-05,
"loss": 1.2369,
"step": 7500
},
{
"epoch": 1.86,
"learning_rate": 4.9794415293927846e-05,
"loss": 1.2355,
"step": 8000
},
{
"epoch": 1.86,
"eval_bleu": 26.1612,
"eval_gen_len": 18.484,
"eval_loss": 1.2356343269348145,
"eval_runtime": 568.7222,
"eval_samples_per_second": 15.153,
"eval_steps_per_second": 0.948,
"step": 8000
},
{
"epoch": 1.97,
"learning_rate": 4.9764045082104526e-05,
"loss": 1.2264,
"step": 8500
},
{
"epoch": 2.09,
"learning_rate": 4.97315940269068e-05,
"loss": 1.1309,
"step": 9000
},
{
"epoch": 2.2,
"learning_rate": 4.969706485509287e-05,
"loss": 1.1245,
"step": 9500
},
{
"epoch": 2.32,
"learning_rate": 4.966046046803843e-05,
"loss": 1.0973,
"step": 10000
},
{
"epoch": 2.32,
"eval_bleu": 26.6567,
"eval_gen_len": 18.554,
"eval_loss": 1.207370638847351,
"eval_runtime": 569.9204,
"eval_samples_per_second": 15.121,
"eval_steps_per_second": 0.946,
"step": 10000
},
{
"epoch": 2.44,
"learning_rate": 4.962178394149285e-05,
"loss": 1.1047,
"step": 10500
},
{
"epoch": 2.55,
"learning_rate": 4.9581038525320756e-05,
"loss": 1.11,
"step": 11000
},
{
"epoch": 2.67,
"learning_rate": 4.953822764322895e-05,
"loss": 1.1161,
"step": 11500
},
{
"epoch": 2.78,
"learning_rate": 4.949335489247869e-05,
"loss": 1.1157,
"step": 12000
},
{
"epoch": 2.78,
"eval_bleu": 26.4733,
"eval_gen_len": 18.8044,
"eval_loss": 1.2069162130355835,
"eval_runtime": 576.6015,
"eval_samples_per_second": 14.946,
"eval_steps_per_second": 0.935,
"step": 12000
},
{
"epoch": 2.9,
"learning_rate": 4.944642404358347e-05,
"loss": 1.121,
"step": 12500
},
{
"epoch": 3.02,
"learning_rate": 4.939743903999218e-05,
"loss": 1.101,
"step": 13000
},
{
"epoch": 3.13,
"learning_rate": 4.9346403997757745e-05,
"loss": 0.9717,
"step": 13500
},
{
"epoch": 3.25,
"learning_rate": 4.9293323205191266e-05,
"loss": 0.9631,
"step": 14000
},
{
"epoch": 3.25,
"eval_bleu": 27.1062,
"eval_gen_len": 18.6803,
"eval_loss": 1.190077543258667,
"eval_runtime": 575.2393,
"eval_samples_per_second": 14.982,
"eval_steps_per_second": 0.937,
"step": 14000
},
{
"epoch": 3.36,
"learning_rate": 4.9238201122501694e-05,
"loss": 0.9695,
"step": 14500
},
{
"epoch": 3.48,
"learning_rate": 4.918104238142104e-05,
"loss": 0.9694,
"step": 15000
},
{
"epoch": 3.6,
"learning_rate": 4.912185178481522e-05,
"loss": 0.9877,
"step": 15500
},
{
"epoch": 3.71,
"learning_rate": 4.9060634306280435e-05,
"loss": 1.0223,
"step": 16000
},
{
"epoch": 3.71,
"eval_bleu": 26.3038,
"eval_gen_len": 18.7993,
"eval_loss": 1.2279616594314575,
"eval_runtime": 577.4522,
"eval_samples_per_second": 14.924,
"eval_steps_per_second": 0.933,
"step": 16000
},
{
"epoch": 3.83,
"learning_rate": 4.899739508972529e-05,
"loss": 1.0419,
"step": 16500
},
{
"epoch": 3.94,
"learning_rate": 4.893213944893856e-05,
"loss": 1.0316,
"step": 17000
},
{
"epoch": 4.06,
"learning_rate": 4.88648728671427e-05,
"loss": 0.9474,
"step": 17500
},
{
"epoch": 4.18,
"learning_rate": 4.879560099653307e-05,
"loss": 0.8621,
"step": 18000
},
{
"epoch": 4.18,
"eval_bleu": 26.8035,
"eval_gen_len": 18.6679,
"eval_loss": 1.218488335609436,
"eval_runtime": 569.6982,
"eval_samples_per_second": 15.127,
"eval_steps_per_second": 0.946,
"step": 18000
},
{
"epoch": 4.29,
"learning_rate": 4.872432965780303e-05,
"loss": 0.8472,
"step": 18500
},
{
"epoch": 4.41,
"learning_rate": 4.865106483965487e-05,
"loss": 0.8593,
"step": 19000
},
{
"epoch": 4.52,
"learning_rate": 4.857581269829654e-05,
"loss": 0.8685,
"step": 19500
},
{
"epoch": 4.64,
"learning_rate": 4.84985795569244e-05,
"loss": 0.866,
"step": 20000
},
{
"epoch": 4.64,
"eval_bleu": 27.7431,
"eval_gen_len": 18.6157,
"eval_loss": 1.176997184753418,
"eval_runtime": 573.1877,
"eval_samples_per_second": 15.035,
"eval_steps_per_second": 0.94,
"step": 20000
}
],
"logging_steps": 500,
"max_steps": 172360,
"num_train_epochs": 40,
"save_steps": 2000,
"total_flos": 6.935855476999455e+17,
"trial_name": null,
"trial_params": null
}