|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 28.993288590604028, |
|
"eval_steps": 500, |
|
"global_step": 1080, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.000991991991991992, |
|
"loss": 1.7242, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.0008728728728728728, |
|
"loss": 1.5887, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.0007537537537537538, |
|
"loss": 1.5001, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 0.0006346346346346347, |
|
"loss": 1.4476, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 0.0005155155155155156, |
|
"loss": 1.4097, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 19.17, |
|
"learning_rate": 0.0003963963963963964, |
|
"loss": 1.3734, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 22.36, |
|
"learning_rate": 0.0002772772772772773, |
|
"loss": 1.3429, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"learning_rate": 0.00015815815815815816, |
|
"loss": 1.3154, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 28.75, |
|
"learning_rate": 3.903903903903904e-05, |
|
"loss": 1.2919, |
|
"step": 1071 |
|
} |
|
], |
|
"logging_steps": 119, |
|
"max_steps": 1110, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 1.2093205955680928e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|