cllm_td_opt / trainer_state.json
zyliu's picture
update model
0c2be7f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"global_step": 3834,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.78,
"learning_rate": 1.793975255513717e-05,
"loss": 0.0241,
"step": 500
},
{
"epoch": 1.56,
"learning_rate": 1.5250134480903713e-05,
"loss": 0.0021,
"step": 1000
},
{
"epoch": 2.35,
"learning_rate": 1.2560516406670254e-05,
"loss": 0.0014,
"step": 1500
},
{
"epoch": 3.13,
"learning_rate": 9.870898332436795e-06,
"loss": 0.0011,
"step": 2000
},
{
"epoch": 3.91,
"learning_rate": 7.181280258203336e-06,
"loss": 0.0008,
"step": 2500
},
{
"epoch": 4.69,
"learning_rate": 4.491662183969877e-06,
"loss": 0.0006,
"step": 3000
},
{
"epoch": 5.48,
"learning_rate": 1.8020441097364175e-06,
"loss": 0.0005,
"step": 3500
},
{
"epoch": 6.0,
"step": 3834,
"total_flos": 2403004334473216.0,
"train_loss": 0.0040491660264984795,
"train_runtime": 24808.6685,
"train_samples_per_second": 19.777,
"train_steps_per_second": 0.155
}
],
"max_steps": 3834,
"num_train_epochs": 6,
"total_flos": 2403004334473216.0,
"trial_name": null,
"trial_params": null
}