|
{ |
|
"best_metric": 35.2849, |
|
"best_model_checkpoint": "large_ox-wn_cod_15ep_eap/checkpoint-38360", |
|
"epoch": 15.0, |
|
"global_step": 41100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 2.1769, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 11.342940924045202, |
|
"eval_loss": 1.905047059059143, |
|
"eval_rouge1": 28.7222, |
|
"eval_rouge2": 9.1873, |
|
"eval_rougeL": 26.6888, |
|
"eval_rougeLsum": 26.6937, |
|
"eval_runtime": 86.0596, |
|
"eval_samples_per_second": 162.469, |
|
"eval_steps_per_second": 1.278, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 1.9408, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 11.416464025175225, |
|
"eval_loss": 1.8151417970657349, |
|
"eval_rouge1": 29.8799, |
|
"eval_rouge2": 10.2327, |
|
"eval_rougeL": 27.7947, |
|
"eval_rougeLsum": 27.8044, |
|
"eval_runtime": 98.7019, |
|
"eval_samples_per_second": 141.659, |
|
"eval_steps_per_second": 1.114, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4e-05, |
|
"loss": 1.8124, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 11.531039908453726, |
|
"eval_loss": 1.7607892751693726, |
|
"eval_rouge1": 30.9845, |
|
"eval_rouge2": 10.9982, |
|
"eval_rougeL": 28.8059, |
|
"eval_rougeLsum": 28.8131, |
|
"eval_runtime": 96.5011, |
|
"eval_samples_per_second": 144.889, |
|
"eval_steps_per_second": 1.14, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 1.7118, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 11.703690459161779, |
|
"eval_loss": 1.7228699922561646, |
|
"eval_rouge1": 31.6943, |
|
"eval_rouge2": 11.7412, |
|
"eval_rougeL": 29.4967, |
|
"eval_rougeLsum": 29.5319, |
|
"eval_runtime": 87.7321, |
|
"eval_samples_per_second": 159.372, |
|
"eval_steps_per_second": 1.254, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.6286, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 11.77835788871406, |
|
"eval_loss": 1.6936795711517334, |
|
"eval_rouge1": 32.5839, |
|
"eval_rouge2": 12.2431, |
|
"eval_rougeL": 30.1799, |
|
"eval_rougeLsum": 30.206, |
|
"eval_runtime": 84.5028, |
|
"eval_samples_per_second": 165.462, |
|
"eval_steps_per_second": 1.302, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5597, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 11.597410956944643, |
|
"eval_loss": 1.674757480621338, |
|
"eval_rouge1": 32.9915, |
|
"eval_rouge2": 12.8514, |
|
"eval_rougeL": 30.7016, |
|
"eval_rougeLsum": 30.7145, |
|
"eval_runtime": 87.802, |
|
"eval_samples_per_second": 159.245, |
|
"eval_steps_per_second": 1.253, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 1.4982, |
|
"step": 19180 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 11.358031755113718, |
|
"eval_loss": 1.6578471660614014, |
|
"eval_rouge1": 33.2157, |
|
"eval_rouge2": 13.1389, |
|
"eval_rougeL": 30.9428, |
|
"eval_rougeLsum": 30.9519, |
|
"eval_runtime": 89.406, |
|
"eval_samples_per_second": 156.388, |
|
"eval_steps_per_second": 1.23, |
|
"step": 19180 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 1.4468, |
|
"step": 21920 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 11.572378772707767, |
|
"eval_loss": 1.6473166942596436, |
|
"eval_rouge1": 33.6146, |
|
"eval_rouge2": 13.5922, |
|
"eval_rougeL": 31.3001, |
|
"eval_rougeLsum": 31.3235, |
|
"eval_runtime": 98.6248, |
|
"eval_samples_per_second": 141.77, |
|
"eval_steps_per_second": 1.115, |
|
"step": 21920 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4022, |
|
"step": 24660 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 11.738878558146189, |
|
"eval_loss": 1.6383947134017944, |
|
"eval_rouge1": 34.1711, |
|
"eval_rouge2": 14.1117, |
|
"eval_rougeL": 31.7951, |
|
"eval_rougeLsum": 31.8066, |
|
"eval_runtime": 89.5372, |
|
"eval_samples_per_second": 156.159, |
|
"eval_steps_per_second": 1.229, |
|
"step": 24660 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.364, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 11.665856100700902, |
|
"eval_loss": 1.6336920261383057, |
|
"eval_rouge1": 34.5489, |
|
"eval_rouge2": 14.5012, |
|
"eval_rougeL": 32.1329, |
|
"eval_rougeLsum": 32.1446, |
|
"eval_runtime": 103.7766, |
|
"eval_samples_per_second": 134.732, |
|
"eval_steps_per_second": 1.06, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.3321, |
|
"step": 30140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 11.800314690316121, |
|
"eval_loss": 1.6291483640670776, |
|
"eval_rouge1": 34.7133, |
|
"eval_rouge2": 14.7297, |
|
"eval_rougeL": 32.3042, |
|
"eval_rougeLsum": 32.314, |
|
"eval_runtime": 91.3961, |
|
"eval_samples_per_second": 152.982, |
|
"eval_steps_per_second": 1.204, |
|
"step": 30140 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3054, |
|
"step": 32880 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 11.761908167644114, |
|
"eval_loss": 1.6267131567001343, |
|
"eval_rouge1": 34.9411, |
|
"eval_rouge2": 15.0282, |
|
"eval_rougeL": 32.5335, |
|
"eval_rougeLsum": 32.5451, |
|
"eval_runtime": 98.5092, |
|
"eval_samples_per_second": 141.936, |
|
"eval_steps_per_second": 1.117, |
|
"step": 32880 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.2845, |
|
"step": 35620 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 11.831712201401801, |
|
"eval_loss": 1.626239538192749, |
|
"eval_rouge1": 35.1648, |
|
"eval_rouge2": 15.2154, |
|
"eval_rougeL": 32.7387, |
|
"eval_rougeLsum": 32.742, |
|
"eval_runtime": 85.528, |
|
"eval_samples_per_second": 163.479, |
|
"eval_steps_per_second": 1.286, |
|
"step": 35620 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.2699, |
|
"step": 38360 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_gen_len": 11.816764411386067, |
|
"eval_loss": 1.6257190704345703, |
|
"eval_rouge1": 35.2849, |
|
"eval_rouge2": 15.3109, |
|
"eval_rougeL": 32.8508, |
|
"eval_rougeLsum": 32.853, |
|
"eval_runtime": 84.6116, |
|
"eval_samples_per_second": 165.249, |
|
"eval_steps_per_second": 1.3, |
|
"step": 38360 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.2595, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_gen_len": 11.797096266628522, |
|
"eval_loss": 1.6273423433303833, |
|
"eval_rouge1": 35.2224, |
|
"eval_rouge2": 15.2781, |
|
"eval_rougeL": 32.7718, |
|
"eval_rougeLsum": 32.7826, |
|
"eval_runtime": 95.1523, |
|
"eval_samples_per_second": 146.943, |
|
"eval_steps_per_second": 1.156, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 41100, |
|
"total_flos": 9.049973435337277e+17, |
|
"train_loss": 1.5328590292826185, |
|
"train_runtime": 18417.6233, |
|
"train_samples_per_second": 142.797, |
|
"train_steps_per_second": 2.232 |
|
} |
|
], |
|
"max_steps": 41100, |
|
"num_train_epochs": 15, |
|
"total_flos": 9.049973435337277e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|