|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"global_step": 39375, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.902023616171372e-05, |
|
"loss": 2.3606, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 108.1064, |
|
"eval_loss": 2.050605297088623, |
|
"eval_rouge1": 43.1144, |
|
"eval_rouge2": 19.9792, |
|
"eval_rougeL": 29.3193, |
|
"eval_rougeLsum": 29.3219, |
|
"eval_runtime": 6630.4724, |
|
"eval_samples_per_second": 3.016, |
|
"eval_steps_per_second": 3.016, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.914534041525834e-05, |
|
"loss": 2.0883, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 111.923, |
|
"eval_loss": 1.9714523553848267, |
|
"eval_rouge1": 43.2087, |
|
"eval_rouge2": 20.0241, |
|
"eval_rougeL": 29.2716, |
|
"eval_rougeLsum": 29.2729, |
|
"eval_runtime": 6876.0309, |
|
"eval_samples_per_second": 2.909, |
|
"eval_steps_per_second": 2.909, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5.9272200517975506e-05, |
|
"loss": 1.9401, |
|
"step": 16875 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 110.7088, |
|
"eval_loss": 1.9192545413970947, |
|
"eval_rouge1": 43.7675, |
|
"eval_rouge2": 20.4219, |
|
"eval_rougeL": 29.736, |
|
"eval_rougeLsum": 29.736, |
|
"eval_runtime": 6792.0963, |
|
"eval_samples_per_second": 2.945, |
|
"eval_steps_per_second": 2.945, |
|
"step": 16875 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.9397304771520136e-05, |
|
"loss": 1.8413, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 114.6206, |
|
"eval_loss": 1.9106097221374512, |
|
"eval_rouge1": 43.9857, |
|
"eval_rouge2": 20.6259, |
|
"eval_rougeL": 29.863, |
|
"eval_rougeLsum": 29.8645, |
|
"eval_runtime": 7084.2274, |
|
"eval_samples_per_second": 2.823, |
|
"eval_steps_per_second": 2.823, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.952240902506475e-05, |
|
"loss": 1.7632, |
|
"step": 28125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 116.4429, |
|
"eval_loss": 1.9056074619293213, |
|
"eval_rouge1": 44.0791, |
|
"eval_rouge2": 20.7132, |
|
"eval_rougeL": 29.9243, |
|
"eval_rougeLsum": 29.9263, |
|
"eval_runtime": 7223.7263, |
|
"eval_samples_per_second": 2.769, |
|
"eval_steps_per_second": 2.769, |
|
"step": 28125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.9649269127781926e-05, |
|
"loss": 1.6991, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 116.3214, |
|
"eval_loss": 1.9034677743911743, |
|
"eval_rouge1": 44.0398, |
|
"eval_rouge2": 20.6289, |
|
"eval_rougeL": 29.862, |
|
"eval_rougeLsum": 29.8651, |
|
"eval_runtime": 7200.6944, |
|
"eval_samples_per_second": 2.778, |
|
"eval_steps_per_second": 2.778, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.9777885079671656e-05, |
|
"loss": 1.6457, |
|
"step": 39375 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_gen_len": 113.2878, |
|
"eval_loss": 1.9000203609466553, |
|
"eval_rouge1": 43.9966, |
|
"eval_rouge2": 20.5908, |
|
"eval_rougeL": 29.9303, |
|
"eval_rougeLsum": 29.93, |
|
"eval_runtime": 7006.6993, |
|
"eval_samples_per_second": 2.854, |
|
"eval_steps_per_second": 2.854, |
|
"step": 39375 |
|
} |
|
], |
|
"max_steps": 50625, |
|
"num_train_epochs": 9, |
|
"total_flos": 3.353631721258291e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|