|
{ |
|
"best_metric": 2.75215744972229, |
|
"best_model_checkpoint": "PEFT/adapters-lib/output/parallel/dataset-5400/checkpoint-5136", |
|
"epoch": 13.0, |
|
"eval_steps": 500, |
|
"global_step": 8346, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 16.2889, |
|
"eval_loss": 3.750108242034912, |
|
"eval_rouge-1": 18.3573, |
|
"eval_rouge-2": 7.2689, |
|
"eval_rouge-l": 18.0787, |
|
"eval_runtime": 67.5801, |
|
"eval_samples_per_second": 6.659, |
|
"eval_steps_per_second": 0.843, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 18.9889, |
|
"eval_loss": 3.052328109741211, |
|
"eval_rouge-1": 35.6298, |
|
"eval_rouge-2": 18.8236, |
|
"eval_rouge-l": 35.061, |
|
"eval_runtime": 75.9845, |
|
"eval_samples_per_second": 5.922, |
|
"eval_steps_per_second": 0.75, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 17.7733, |
|
"eval_loss": 2.960456371307373, |
|
"eval_rouge-1": 38.5671, |
|
"eval_rouge-2": 21.0729, |
|
"eval_rouge-l": 37.9242, |
|
"eval_runtime": 70.7318, |
|
"eval_samples_per_second": 6.362, |
|
"eval_steps_per_second": 0.806, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 17.3978, |
|
"eval_loss": 2.899169683456421, |
|
"eval_rouge-1": 40.4934, |
|
"eval_rouge-2": 22.7632, |
|
"eval_rouge-l": 39.8547, |
|
"eval_runtime": 69.4305, |
|
"eval_samples_per_second": 6.481, |
|
"eval_steps_per_second": 0.821, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 17.2022, |
|
"eval_loss": 2.8467295169830322, |
|
"eval_rouge-1": 41.7604, |
|
"eval_rouge-2": 24.0399, |
|
"eval_rouge-l": 41.092, |
|
"eval_runtime": 70.3771, |
|
"eval_samples_per_second": 6.394, |
|
"eval_steps_per_second": 0.81, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 16.8956, |
|
"eval_loss": 2.812547206878662, |
|
"eval_rouge-1": 41.5256, |
|
"eval_rouge-2": 23.8131, |
|
"eval_rouge-l": 40.8416, |
|
"eval_runtime": 68.6799, |
|
"eval_samples_per_second": 6.552, |
|
"eval_steps_per_second": 0.83, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 16.1022, |
|
"eval_loss": 2.783888816833496, |
|
"eval_rouge-1": 42.7127, |
|
"eval_rouge-2": 24.6471, |
|
"eval_rouge-l": 42.1972, |
|
"eval_runtime": 63.7095, |
|
"eval_samples_per_second": 7.063, |
|
"eval_steps_per_second": 0.895, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 8.715112540192926e-05, |
|
"loss": 3.4332, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_gen_len": 15.7444, |
|
"eval_loss": 2.75215744972229, |
|
"eval_rouge-1": 42.9172, |
|
"eval_rouge-2": 25.6169, |
|
"eval_rouge-l": 42.4864, |
|
"eval_runtime": 62.162, |
|
"eval_samples_per_second": 7.239, |
|
"eval_steps_per_second": 0.917, |
|
"step": 5136 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_gen_len": 19.3733, |
|
"eval_loss": 3.0449774265289307, |
|
"eval_rouge-1": 33.8509, |
|
"eval_rouge-2": 18.7234, |
|
"eval_rouge-l": 33.1807, |
|
"eval_runtime": 80.7729, |
|
"eval_samples_per_second": 5.571, |
|
"eval_steps_per_second": 0.706, |
|
"step": 5778 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_gen_len": 18.5022, |
|
"eval_loss": 2.987593650817871, |
|
"eval_rouge-1": 34.5358, |
|
"eval_rouge-2": 18.8882, |
|
"eval_rouge-l": 33.5787, |
|
"eval_runtime": 70.1569, |
|
"eval_samples_per_second": 6.414, |
|
"eval_steps_per_second": 0.812, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_gen_len": 18.0133, |
|
"eval_loss": 2.951963424682617, |
|
"eval_rouge-1": 36.0373, |
|
"eval_rouge-2": 20.1364, |
|
"eval_rouge-l": 35.0084, |
|
"eval_runtime": 67.9106, |
|
"eval_samples_per_second": 6.626, |
|
"eval_steps_per_second": 0.839, |
|
"step": 7062 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_gen_len": 17.6444, |
|
"eval_loss": 2.919402599334717, |
|
"eval_rouge-1": 36.9999, |
|
"eval_rouge-2": 22.0519, |
|
"eval_rouge-l": 36.0277, |
|
"eval_runtime": 67.2477, |
|
"eval_samples_per_second": 6.692, |
|
"eval_steps_per_second": 0.848, |
|
"step": 7704 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_gen_len": 18.0467, |
|
"eval_loss": 2.8770110607147217, |
|
"eval_rouge-1": 39.3542, |
|
"eval_rouge-2": 23.7167, |
|
"eval_rouge-l": 38.2318, |
|
"eval_runtime": 68.3111, |
|
"eval_samples_per_second": 6.588, |
|
"eval_steps_per_second": 0.834, |
|
"step": 8346 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"step": 8346, |
|
"total_flos": 1.840140038622413e+16, |
|
"train_loss": 3.136590390419063, |
|
"train_runtime": 4227.0028, |
|
"train_samples_per_second": 60.717, |
|
"train_steps_per_second": 7.594 |
|
} |
|
], |
|
"logging_steps": 5000, |
|
"max_steps": 32100, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.840140038622413e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|