|
{ |
|
"best_metric": 0.8995155096054077, |
|
"best_model_checkpoint": "./mbartLarge_koja_37p/checkpoint-33000", |
|
"epoch": 4.844107803417298, |
|
"eval_steps": 5500, |
|
"global_step": 55000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 3.5939, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 3.0254, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 1.9133, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9936851145520225e-05, |
|
"loss": 1.672, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.987370229104044e-05, |
|
"loss": 1.56, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9810553436560664e-05, |
|
"loss": 1.4874, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.974740458208088e-05, |
|
"loss": 1.4345, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.96842557276011e-05, |
|
"loss": 1.3676, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.9621106873121326e-05, |
|
"loss": 1.3183, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.955795801864154e-05, |
|
"loss": 1.2987, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.9494809164161765e-05, |
|
"loss": 1.2712, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bleu": 3.0858, |
|
"eval_gen_len": 18.1085, |
|
"eval_loss": 1.2203737497329712, |
|
"eval_runtime": 1528.8304, |
|
"eval_samples_per_second": 14.853, |
|
"eval_steps_per_second": 0.929, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.943166030968199e-05, |
|
"loss": 1.2405, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.9368511455202204e-05, |
|
"loss": 1.2262, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.930536260072243e-05, |
|
"loss": 1.1959, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.924221374624264e-05, |
|
"loss": 1.182, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.9179064891762866e-05, |
|
"loss": 1.1683, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.911591603728309e-05, |
|
"loss": 1.1489, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.9052767182803305e-05, |
|
"loss": 1.1365, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.898961832832353e-05, |
|
"loss": 1.1313, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.8926469473843744e-05, |
|
"loss": 1.1172, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.886332061936397e-05, |
|
"loss": 1.1123, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.880017176488419e-05, |
|
"loss": 1.0946, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_bleu": 3.3162, |
|
"eval_gen_len": 17.8651, |
|
"eval_loss": 1.0577712059020996, |
|
"eval_runtime": 1538.4057, |
|
"eval_samples_per_second": 14.76, |
|
"eval_steps_per_second": 0.923, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8737022910404406e-05, |
|
"loss": 1.0856, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.867387405592463e-05, |
|
"loss": 1.0716, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.8610725201444845e-05, |
|
"loss": 1.0736, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.854757634696507e-05, |
|
"loss": 1.0367, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.848442749248529e-05, |
|
"loss": 1.0204, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.842127863800551e-05, |
|
"loss": 1.0533, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.835812978352573e-05, |
|
"loss": 1.0426, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.829498092904595e-05, |
|
"loss": 1.0261, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.823183207456617e-05, |
|
"loss": 1.0111, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.816868322008639e-05, |
|
"loss": 0.9862, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.810553436560661e-05, |
|
"loss": 0.9546, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_bleu": 5.6024, |
|
"eval_gen_len": 17.902, |
|
"eval_loss": 0.9687988758087158, |
|
"eval_runtime": 1530.7999, |
|
"eval_samples_per_second": 14.833, |
|
"eval_steps_per_second": 0.928, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.804238551112683e-05, |
|
"loss": 0.9642, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.7979236656647054e-05, |
|
"loss": 0.9364, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.791608780216727e-05, |
|
"loss": 0.931, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.785293894768749e-05, |
|
"loss": 0.915, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.778979009320771e-05, |
|
"loss": 0.9043, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.772664123872793e-05, |
|
"loss": 0.9158, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.7663492384248155e-05, |
|
"loss": 0.9068, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.760034352976837e-05, |
|
"loss": 0.9314, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.7537194675288594e-05, |
|
"loss": 0.9112, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.747404582080881e-05, |
|
"loss": 0.8862, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.741089696632903e-05, |
|
"loss": 0.89, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_bleu": 5.1453, |
|
"eval_gen_len": 17.6144, |
|
"eval_loss": 0.941440761089325, |
|
"eval_runtime": 1490.4507, |
|
"eval_samples_per_second": 15.235, |
|
"eval_steps_per_second": 0.953, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.7347748111849256e-05, |
|
"loss": 0.8726, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.728459925736947e-05, |
|
"loss": 0.8749, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.7221450402889695e-05, |
|
"loss": 0.858, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.715830154840992e-05, |
|
"loss": 0.8479, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.7095152693930134e-05, |
|
"loss": 0.8394, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.703200383945036e-05, |
|
"loss": 0.8472, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.696885498497057e-05, |
|
"loss": 0.7793, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.6905706130490796e-05, |
|
"loss": 0.7753, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.684255727601102e-05, |
|
"loss": 0.7655, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.6779408421531235e-05, |
|
"loss": 0.7666, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.671625956705146e-05, |
|
"loss": 0.834, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_bleu": 5.3985, |
|
"eval_gen_len": 17.6899, |
|
"eval_loss": 0.9212985038757324, |
|
"eval_runtime": 1489.7985, |
|
"eval_samples_per_second": 15.242, |
|
"eval_steps_per_second": 0.953, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.6653110712571674e-05, |
|
"loss": 0.8098, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.65899618580919e-05, |
|
"loss": 0.8153, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.652681300361212e-05, |
|
"loss": 0.7837, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.6463664149132336e-05, |
|
"loss": 0.8255, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 4.640051529465256e-05, |
|
"loss": 0.8345, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.6337366440172775e-05, |
|
"loss": 0.8061, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.6274217585693e-05, |
|
"loss": 0.7743, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.621106873121322e-05, |
|
"loss": 0.7759, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.614791987673344e-05, |
|
"loss": 0.7777, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.608477102225366e-05, |
|
"loss": 0.7537, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.602162216777388e-05, |
|
"loss": 0.7439, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_bleu": 6.2934, |
|
"eval_gen_len": 17.4862, |
|
"eval_loss": 0.8995155096054077, |
|
"eval_runtime": 1459.4956, |
|
"eval_samples_per_second": 15.558, |
|
"eval_steps_per_second": 0.973, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.59584733132941e-05, |
|
"loss": 0.7424, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.589532445881432e-05, |
|
"loss": 0.7416, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.583217560433454e-05, |
|
"loss": 0.7722, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.576902674985476e-05, |
|
"loss": 0.7967, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 4.5705877895374984e-05, |
|
"loss": 0.7323, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 4.56427290408952e-05, |
|
"loss": 0.7171, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 4.557958018641542e-05, |
|
"loss": 0.7044, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 4.551643133193564e-05, |
|
"loss": 0.6828, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.545328247745586e-05, |
|
"loss": 0.7149, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.5390133622976085e-05, |
|
"loss": 0.6622, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 4.53269847684963e-05, |
|
"loss": 0.6803, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_bleu": 6.3565, |
|
"eval_gen_len": 17.8899, |
|
"eval_loss": 0.9016386270523071, |
|
"eval_runtime": 1493.3684, |
|
"eval_samples_per_second": 15.205, |
|
"eval_steps_per_second": 0.951, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 4.5263835914016524e-05, |
|
"loss": 0.7079, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.520068705953675e-05, |
|
"loss": 0.6978, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 4.513753820505696e-05, |
|
"loss": 0.7528, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 4.5074389350577186e-05, |
|
"loss": 0.7558, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.50112404960974e-05, |
|
"loss": 0.7742, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 4.4948091641617625e-05, |
|
"loss": 0.7505, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 4.488494278713785e-05, |
|
"loss": 0.7185, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 4.4821793932658064e-05, |
|
"loss": 0.7024, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.475864507817829e-05, |
|
"loss": 0.7262, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.46954962236985e-05, |
|
"loss": 0.7394, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.4632347369218726e-05, |
|
"loss": 0.733, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_bleu": 7.0351, |
|
"eval_gen_len": 17.6112, |
|
"eval_loss": 0.9225591421127319, |
|
"eval_runtime": 1483.9686, |
|
"eval_samples_per_second": 15.302, |
|
"eval_steps_per_second": 0.957, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.456919851473895e-05, |
|
"loss": 0.742, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 4.4506049660259165e-05, |
|
"loss": 0.735, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.444290080577939e-05, |
|
"loss": 0.6992, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.4379751951299604e-05, |
|
"loss": 0.7007, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.431660309681983e-05, |
|
"loss": 0.6979, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 4.425345424234005e-05, |
|
"loss": 0.6515, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.4190305387860266e-05, |
|
"loss": 0.6919, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.412715653338049e-05, |
|
"loss": 0.6241, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 4.406400767890071e-05, |
|
"loss": 0.6196, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 4.400085882442093e-05, |
|
"loss": 0.6557, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 4.393770996994115e-05, |
|
"loss": 0.6601, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_bleu": 5.3084, |
|
"eval_gen_len": 17.4292, |
|
"eval_loss": 0.9806875586509705, |
|
"eval_runtime": 1461.811, |
|
"eval_samples_per_second": 15.533, |
|
"eval_steps_per_second": 0.971, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 4.387456111546137e-05, |
|
"loss": 0.7354, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 4.381141226098159e-05, |
|
"loss": 0.7452, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 4.374826340650181e-05, |
|
"loss": 0.7175, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 4.368511455202203e-05, |
|
"loss": 0.7101, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 4.362196569754225e-05, |
|
"loss": 0.7068, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 4.355881684306247e-05, |
|
"loss": 0.6404, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 4.349566798858269e-05, |
|
"loss": 0.6316, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 4.3432519134102913e-05, |
|
"loss": 0.598, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.336937027962313e-05, |
|
"loss": 0.6091, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.330622142514335e-05, |
|
"loss": 0.695, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 4.324307257066357e-05, |
|
"loss": 0.6933, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_bleu": 6.8389, |
|
"eval_gen_len": 17.5131, |
|
"eval_loss": 0.923828661441803, |
|
"eval_runtime": 1455.6892, |
|
"eval_samples_per_second": 15.599, |
|
"eval_steps_per_second": 0.975, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"step": 55000, |
|
"total_flos": 1.907212892088828e+18, |
|
"train_loss": 0.9352452936345881, |
|
"train_runtime": 56979.8243, |
|
"train_samples_per_second": 111.586, |
|
"train_steps_per_second": 6.974 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 397390, |
|
"num_train_epochs": 35, |
|
"save_steps": 5500, |
|
"total_flos": 1.907212892088828e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|