|
{ |
|
"best_metric": 28.8215, |
|
"best_model_checkpoint": "./ko-en_mbartLarge_exp10p/checkpoint-32000", |
|
"epoch": 6.188118811881188, |
|
"eval_steps": 2000, |
|
"global_step": 40000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.9221, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7751, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9999535065698766e-05, |
|
"loss": 1.5628, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9998140280088176e-05, |
|
"loss": 1.4782, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_bleu": 21.538, |
|
"eval_gen_len": 18.6032, |
|
"eval_loss": 1.435985803604126, |
|
"eval_runtime": 861.7869, |
|
"eval_samples_per_second": 15.0, |
|
"eval_steps_per_second": 0.938, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.999581569504692e-05, |
|
"loss": 1.4515, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.999256139703734e-05, |
|
"loss": 1.4195, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.998837750710223e-05, |
|
"loss": 1.3946, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.99832641808603e-05, |
|
"loss": 1.3618, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_bleu": 23.8354, |
|
"eval_gen_len": 18.5594, |
|
"eval_loss": 1.3225533962249756, |
|
"eval_runtime": 848.5716, |
|
"eval_samples_per_second": 15.234, |
|
"eval_steps_per_second": 0.952, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.99772216085004e-05, |
|
"loss": 1.3398, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.9970250014774486e-05, |
|
"loss": 1.3278, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.996234965898918e-05, |
|
"loss": 1.3002, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.9953520834996206e-05, |
|
"loss": 1.2983, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bleu": 25.0795, |
|
"eval_gen_len": 18.7894, |
|
"eval_loss": 1.2636672258377075, |
|
"eval_runtime": 866.6867, |
|
"eval_samples_per_second": 14.915, |
|
"eval_steps_per_second": 0.932, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.994376387118141e-05, |
|
"loss": 1.2851, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.993307913045257e-05, |
|
"loss": 1.2246, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.9921467010225866e-05, |
|
"loss": 1.2111, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.9908927942411154e-05, |
|
"loss": 1.2065, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_bleu": 25.7409, |
|
"eval_gen_len": 18.5615, |
|
"eval_loss": 1.237057089805603, |
|
"eval_runtime": 857.6049, |
|
"eval_samples_per_second": 15.073, |
|
"eval_steps_per_second": 0.942, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.989546239339585e-05, |
|
"loss": 1.201, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.98810708640276e-05, |
|
"loss": 1.2098, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.986575388959566e-05, |
|
"loss": 1.1832, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.984951203981097e-05, |
|
"loss": 1.1926, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_bleu": 26.0527, |
|
"eval_gen_len": 18.4019, |
|
"eval_loss": 1.2115644216537476, |
|
"eval_runtime": 851.6364, |
|
"eval_samples_per_second": 15.179, |
|
"eval_steps_per_second": 0.949, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.983234591878498e-05, |
|
"loss": 1.1858, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.981425616500717e-05, |
|
"loss": 1.1939, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.9795243451321304e-05, |
|
"loss": 1.1693, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.977530848490039e-05, |
|
"loss": 1.1734, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_bleu": 26.9802, |
|
"eval_gen_len": 18.6141, |
|
"eval_loss": 1.190738558769226, |
|
"eval_runtime": 858.138, |
|
"eval_samples_per_second": 15.064, |
|
"eval_steps_per_second": 0.942, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 4.975445200722042e-05, |
|
"loss": 1.173, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.973267479403273e-05, |
|
"loss": 1.1541, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.9709977655335196e-05, |
|
"loss": 1.0598, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.968636143534208e-05, |
|
"loss": 1.0677, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_bleu": 27.1925, |
|
"eval_gen_len": 18.4547, |
|
"eval_loss": 1.1801778078079224, |
|
"eval_runtime": 850.3421, |
|
"eval_samples_per_second": 15.202, |
|
"eval_steps_per_second": 0.95, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.966182701245266e-05, |
|
"loss": 1.0789, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.9636375299218484e-05, |
|
"loss": 1.0706, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.961000724230954e-05, |
|
"loss": 1.082, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.958272382247895e-05, |
|
"loss": 1.0773, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_bleu": 27.5641, |
|
"eval_gen_len": 18.6726, |
|
"eval_loss": 1.1654949188232422, |
|
"eval_runtime": 856.0869, |
|
"eval_samples_per_second": 15.1, |
|
"eval_steps_per_second": 0.944, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.955452605452653e-05, |
|
"loss": 1.0749, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 4.952541498726105e-05, |
|
"loss": 1.0845, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.949539170346119e-05, |
|
"loss": 1.0725, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.9464457319835334e-05, |
|
"loss": 1.0688, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_bleu": 27.6261, |
|
"eval_gen_len": 18.6127, |
|
"eval_loss": 1.1520819664001465, |
|
"eval_runtime": 854.4229, |
|
"eval_samples_per_second": 15.13, |
|
"eval_steps_per_second": 0.946, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.9432612986979945e-05, |
|
"loss": 1.0801, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.939985988933683e-05, |
|
"loss": 1.0993, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.9366199245149095e-05, |
|
"loss": 1.0534, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.9331632306415776e-05, |
|
"loss": 0.9542, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_bleu": 27.16, |
|
"eval_gen_len": 18.3782, |
|
"eval_loss": 1.1709098815917969, |
|
"eval_runtime": 856.056, |
|
"eval_samples_per_second": 15.101, |
|
"eval_steps_per_second": 0.944, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 4.929616035884531e-05, |
|
"loss": 0.9688, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.925978472180771e-05, |
|
"loss": 0.9453, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 4.9222506748285495e-05, |
|
"loss": 0.9471, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.918432782482335e-05, |
|
"loss": 0.9531, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_bleu": 28.0684, |
|
"eval_gen_len": 18.436, |
|
"eval_loss": 1.1434855461120605, |
|
"eval_runtime": 852.3756, |
|
"eval_samples_per_second": 15.166, |
|
"eval_steps_per_second": 0.948, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.914524937147655e-05, |
|
"loss": 0.9478, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 4.910527284175818e-05, |
|
"loss": 0.9594, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 4.906439972258503e-05, |
|
"loss": 0.9483, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 4.902263153422231e-05, |
|
"loss": 0.9756, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_bleu": 27.6025, |
|
"eval_gen_len": 18.7284, |
|
"eval_loss": 1.1564555168151855, |
|
"eval_runtime": 860.0664, |
|
"eval_samples_per_second": 15.03, |
|
"eval_steps_per_second": 0.939, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.8979969830227086e-05, |
|
"loss": 0.9912, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.893641619739053e-05, |
|
"loss": 0.9789, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.8891972255678876e-05, |
|
"loss": 1.0068, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.8846639658173156e-05, |
|
"loss": 0.9964, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_bleu": 25.6999, |
|
"eval_gen_len": 18.3255, |
|
"eval_loss": 1.2285293340682983, |
|
"eval_runtime": 852.9084, |
|
"eval_samples_per_second": 15.156, |
|
"eval_steps_per_second": 0.947, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.880042009100772e-05, |
|
"loss": 0.9576, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.8753315273307575e-05, |
|
"loss": 0.9583, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 4.8705326957124334e-05, |
|
"loss": 0.9696, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 4.865645692737114e-05, |
|
"loss": 0.9721, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_bleu": 27.3499, |
|
"eval_gen_len": 18.5409, |
|
"eval_loss": 1.1880507469177246, |
|
"eval_runtime": 849.7595, |
|
"eval_samples_per_second": 15.213, |
|
"eval_steps_per_second": 0.951, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 4.860670700175625e-05, |
|
"loss": 0.9355, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 4.855607903071542e-05, |
|
"loss": 0.9314, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 4.850457489734306e-05, |
|
"loss": 0.9142, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 4.845219651732225e-05, |
|
"loss": 0.9237, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"eval_bleu": 28.2692, |
|
"eval_gen_len": 18.6614, |
|
"eval_loss": 1.1497083902359009, |
|
"eval_runtime": 859.3006, |
|
"eval_samples_per_second": 15.044, |
|
"eval_steps_per_second": 0.94, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 4.839894583885341e-05, |
|
"loss": 0.9102, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.834482484258193e-05, |
|
"loss": 0.8956, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 4.8289835541524394e-05, |
|
"loss": 0.8902, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 4.8233979980993785e-05, |
|
"loss": 0.9041, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_bleu": 28.8215, |
|
"eval_gen_len": 18.5493, |
|
"eval_loss": 1.1282682418823242, |
|
"eval_runtime": 861.0284, |
|
"eval_samples_per_second": 15.013, |
|
"eval_steps_per_second": 0.938, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.817726023852338e-05, |
|
"loss": 0.8124, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.81196784237895e-05, |
|
"loss": 0.6719, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 4.8061236678533e-05, |
|
"loss": 0.6779, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 4.800193717647964e-05, |
|
"loss": 0.6842, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_bleu": 28.6873, |
|
"eval_gen_len": 18.515, |
|
"eval_loss": 1.174131989479065, |
|
"eval_runtime": 856.2355, |
|
"eval_samples_per_second": 15.097, |
|
"eval_steps_per_second": 0.944, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 4.794178212325922e-05, |
|
"loss": 0.6763, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 4.7880773756323556e-05, |
|
"loss": 0.6887, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 4.781891434486324e-05, |
|
"loss": 0.7097, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.775620618972326e-05, |
|
"loss": 0.7101, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_bleu": 28.0778, |
|
"eval_gen_len": 18.3422, |
|
"eval_loss": 1.1875863075256348, |
|
"eval_runtime": 848.3505, |
|
"eval_samples_per_second": 15.238, |
|
"eval_steps_per_second": 0.952, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 4.7692651623317395e-05, |
|
"loss": 0.7219, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 4.762825300954147e-05, |
|
"loss": 0.7489, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.756301274368545e-05, |
|
"loss": 0.7671, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 4.749693325234434e-05, |
|
"loss": 0.7697, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_bleu": 27.6338, |
|
"eval_gen_len": 18.6766, |
|
"eval_loss": 1.1897605657577515, |
|
"eval_runtime": 865.3805, |
|
"eval_samples_per_second": 14.938, |
|
"eval_steps_per_second": 0.934, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 4.74300169933279e-05, |
|
"loss": 0.7846, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.736226645556926e-05, |
|
"loss": 0.6989, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 4.729368415903233e-05, |
|
"loss": 0.615, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 4.722427265461809e-05, |
|
"loss": 0.6028, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_bleu": 28.0713, |
|
"eval_gen_len": 18.5903, |
|
"eval_loss": 1.2392680644989014, |
|
"eval_runtime": 864.5773, |
|
"eval_samples_per_second": 14.952, |
|
"eval_steps_per_second": 0.935, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"step": 40000, |
|
"total_flos": 1.3870670734394655e+18, |
|
"train_loss": 1.0430022468566895, |
|
"train_runtime": 46145.4904, |
|
"train_samples_per_second": 89.65, |
|
"train_steps_per_second": 5.603 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 258560, |
|
"num_train_epochs": 40, |
|
"save_steps": 2000, |
|
"total_flos": 1.3870670734394655e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|