|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 96.97087378640776, |
|
"eval_steps": 500, |
|
"global_step": 2497, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.000324, |
|
"loss": 1.6248, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 0.000648, |
|
"loss": 1.5109, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 0.000972, |
|
"loss": 1.4155, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 0.0009671111111111112, |
|
"loss": 1.328, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 0.0009311111111111112, |
|
"loss": 1.2665, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 0.0008951111111111111, |
|
"loss": 1.2178, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 22.02, |
|
"learning_rate": 0.0008591111111111112, |
|
"loss": 1.1829, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 25.17, |
|
"learning_rate": 0.0008231111111111112, |
|
"loss": 1.1523, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 28.31, |
|
"learning_rate": 0.0007871111111111111, |
|
"loss": 1.1296, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 31.46, |
|
"learning_rate": 0.000751111111111111, |
|
"loss": 1.1084, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 34.6, |
|
"learning_rate": 0.0007151111111111111, |
|
"loss": 1.0855, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 37.75, |
|
"learning_rate": 0.0006791111111111111, |
|
"loss": 1.0708, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 40.89, |
|
"learning_rate": 0.0006431111111111111, |
|
"loss": 1.0536, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 44.04, |
|
"learning_rate": 0.0006071111111111112, |
|
"loss": 1.0359, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 47.18, |
|
"learning_rate": 0.0005711111111111111, |
|
"loss": 1.0246, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 50.33, |
|
"learning_rate": 0.0005351111111111111, |
|
"loss": 1.0132, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 53.48, |
|
"learning_rate": 0.0004991111111111111, |
|
"loss": 1.0013, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 56.62, |
|
"learning_rate": 0.0004631111111111111, |
|
"loss": 0.9878, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 59.77, |
|
"learning_rate": 0.0004271111111111111, |
|
"loss": 0.9766, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 62.91, |
|
"learning_rate": 0.0003911111111111111, |
|
"loss": 0.9643, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 66.06, |
|
"learning_rate": 0.0003551111111111111, |
|
"loss": 0.9538, |
|
"step": 1701 |
|
}, |
|
{ |
|
"epoch": 69.2, |
|
"learning_rate": 0.0003191111111111111, |
|
"loss": 0.9486, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 72.35, |
|
"learning_rate": 0.0002831111111111111, |
|
"loss": 0.9382, |
|
"step": 1863 |
|
}, |
|
{ |
|
"epoch": 75.5, |
|
"learning_rate": 0.00024711111111111114, |
|
"loss": 0.9255, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 78.64, |
|
"learning_rate": 0.0002111111111111111, |
|
"loss": 0.9153, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 81.79, |
|
"learning_rate": 0.0001751111111111111, |
|
"loss": 0.9069, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 84.93, |
|
"learning_rate": 0.0001391111111111111, |
|
"loss": 0.8996, |
|
"step": 2187 |
|
}, |
|
{ |
|
"epoch": 88.08, |
|
"learning_rate": 0.00010311111111111111, |
|
"loss": 0.888, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 91.22, |
|
"learning_rate": 6.71111111111111e-05, |
|
"loss": 0.8791, |
|
"step": 2349 |
|
}, |
|
{ |
|
"epoch": 94.37, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.8721, |
|
"step": 2430 |
|
} |
|
], |
|
"logging_steps": 81, |
|
"max_steps": 2500, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 2.7961865832310505e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|