|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.932735426008969, |
|
"eval_steps": 500, |
|
"global_step": 5500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.820986547085202e-05, |
|
"loss": 1.626, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.641614349775785e-05, |
|
"loss": 1.4201, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 33.3044, |
|
"eval_gen_len": 28.8563, |
|
"eval_loss": 1.3592603206634521, |
|
"eval_runtime": 172.5107, |
|
"eval_samples_per_second": 9.762, |
|
"eval_steps_per_second": 0.614, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.462242152466368e-05, |
|
"loss": 1.2694, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2828699551569507e-05, |
|
"loss": 1.2085, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 34.5144, |
|
"eval_gen_len": 29.0564, |
|
"eval_loss": 1.3197557926177979, |
|
"eval_runtime": 172.4795, |
|
"eval_samples_per_second": 9.763, |
|
"eval_steps_per_second": 0.615, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.1034977578475336e-05, |
|
"loss": 1.1369, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.244843049327356e-06, |
|
"loss": 1.0786, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 34.6844, |
|
"eval_gen_len": 29.1496, |
|
"eval_loss": 1.3080627918243408, |
|
"eval_runtime": 175.6578, |
|
"eval_samples_per_second": 9.587, |
|
"eval_steps_per_second": 0.603, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 7.451121076233184e-06, |
|
"loss": 1.0529, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 5.657399103139014e-06, |
|
"loss": 0.9958, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 34.862, |
|
"eval_gen_len": 29.0529, |
|
"eval_loss": 1.309422254562378, |
|
"eval_runtime": 174.4734, |
|
"eval_samples_per_second": 9.652, |
|
"eval_steps_per_second": 0.608, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.863677130044843e-06, |
|
"loss": 0.9933, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 2.069955156950673e-06, |
|
"loss": 0.9477, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.762331838565023e-07, |
|
"loss": 0.9504, |
|
"step": 5500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5575, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1101235524599808.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|