{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.991150442477876, | |
"global_step": 98, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.06, | |
"learning_rate": 5e-05, | |
"loss": 2.7205, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.12, | |
"learning_rate": 0.0001, | |
"loss": 2.7244, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.18, | |
"learning_rate": 0.0001, | |
"loss": 2.7638, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.24, | |
"learning_rate": 0.0001, | |
"loss": 2.7622, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 0.0001, | |
"loss": 2.7498, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.36, | |
"learning_rate": 0.0001, | |
"loss": 2.7814, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 0.0001, | |
"loss": 2.7404, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.49, | |
"learning_rate": 0.0001, | |
"loss": 2.8507, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.55, | |
"learning_rate": 0.0001, | |
"loss": 2.7969, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.61, | |
"learning_rate": 0.0001, | |
"loss": 2.7087, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 0.0001, | |
"loss": 2.6703, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.73, | |
"learning_rate": 0.0001, | |
"loss": 2.7935, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.79, | |
"learning_rate": 0.0001, | |
"loss": 2.6768, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.85, | |
"learning_rate": 0.0001, | |
"loss": 2.7279, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.91, | |
"learning_rate": 0.0001, | |
"loss": 2.7542, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.97, | |
"learning_rate": 0.0001, | |
"loss": 2.7718, | |
"step": 48 | |
}, | |
{ | |
"epoch": 1.04, | |
"learning_rate": 0.0001, | |
"loss": 3.1325, | |
"step": 51 | |
}, | |
{ | |
"epoch": 1.1, | |
"learning_rate": 0.0001, | |
"loss": 2.6908, | |
"step": 54 | |
}, | |
{ | |
"epoch": 1.16, | |
"learning_rate": 0.0001, | |
"loss": 2.661, | |
"step": 57 | |
}, | |
{ | |
"epoch": 1.22, | |
"learning_rate": 0.0001, | |
"loss": 2.6744, | |
"step": 60 | |
}, | |
{ | |
"epoch": 1.28, | |
"learning_rate": 0.0001, | |
"loss": 2.7521, | |
"step": 63 | |
}, | |
{ | |
"epoch": 1.34, | |
"learning_rate": 0.0001, | |
"loss": 2.7165, | |
"step": 66 | |
}, | |
{ | |
"epoch": 1.4, | |
"learning_rate": 0.0001, | |
"loss": 2.752, | |
"step": 69 | |
}, | |
{ | |
"epoch": 1.47, | |
"learning_rate": 0.0001, | |
"loss": 2.7158, | |
"step": 72 | |
}, | |
{ | |
"epoch": 1.53, | |
"learning_rate": 0.0001, | |
"loss": 2.6624, | |
"step": 75 | |
}, | |
{ | |
"epoch": 1.59, | |
"learning_rate": 0.0001, | |
"loss": 2.7896, | |
"step": 78 | |
}, | |
{ | |
"epoch": 1.65, | |
"learning_rate": 0.0001, | |
"loss": 2.706, | |
"step": 81 | |
}, | |
{ | |
"epoch": 1.71, | |
"learning_rate": 0.0001, | |
"loss": 2.7365, | |
"step": 84 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 0.0001, | |
"loss": 2.7428, | |
"step": 87 | |
}, | |
{ | |
"epoch": 1.83, | |
"learning_rate": 0.0001, | |
"loss": 2.7072, | |
"step": 90 | |
}, | |
{ | |
"epoch": 1.89, | |
"learning_rate": 0.0001, | |
"loss": 2.7192, | |
"step": 93 | |
}, | |
{ | |
"epoch": 1.95, | |
"learning_rate": 0.0001, | |
"loss": 2.7297, | |
"step": 96 | |
} | |
], | |
"max_steps": 294, | |
"num_train_epochs": 6, | |
"total_flos": 4.665562387762381e+17, | |
"trial_name": null, | |
"trial_params": null | |
} | |