open-o1-sft-original / trainer_state.json
mikeam's picture
End of training
0231348 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.992412746585736,
"eval_steps": 500,
"global_step": 246,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12139605462822459,
"grad_norm": 1.9068916730195116,
"learning_rate": 5e-06,
"loss": 0.7721,
"step": 10
},
{
"epoch": 0.24279210925644917,
"grad_norm": 5.44810384092203,
"learning_rate": 5e-06,
"loss": 0.6638,
"step": 20
},
{
"epoch": 0.36418816388467373,
"grad_norm": 0.8162292861303865,
"learning_rate": 5e-06,
"loss": 0.6289,
"step": 30
},
{
"epoch": 0.48558421851289835,
"grad_norm": 0.724722400146013,
"learning_rate": 5e-06,
"loss": 0.6062,
"step": 40
},
{
"epoch": 0.6069802731411229,
"grad_norm": 0.6325755456980601,
"learning_rate": 5e-06,
"loss": 0.5886,
"step": 50
},
{
"epoch": 0.7283763277693475,
"grad_norm": 0.5102096530669636,
"learning_rate": 5e-06,
"loss": 0.5763,
"step": 60
},
{
"epoch": 0.849772382397572,
"grad_norm": 0.6134528530146113,
"learning_rate": 5e-06,
"loss": 0.5635,
"step": 70
},
{
"epoch": 0.9711684370257967,
"grad_norm": 0.6520975040339092,
"learning_rate": 5e-06,
"loss": 0.5578,
"step": 80
},
{
"epoch": 0.9954476479514416,
"eval_loss": 0.5475569367408752,
"eval_runtime": 31.1849,
"eval_samples_per_second": 71.156,
"eval_steps_per_second": 1.122,
"step": 82
},
{
"epoch": 1.095599393019727,
"grad_norm": 0.9030012716394636,
"learning_rate": 5e-06,
"loss": 0.5739,
"step": 90
},
{
"epoch": 1.2169954476479514,
"grad_norm": 0.5546978323548724,
"learning_rate": 5e-06,
"loss": 0.5098,
"step": 100
},
{
"epoch": 1.338391502276176,
"grad_norm": 0.7373972665017838,
"learning_rate": 5e-06,
"loss": 0.5099,
"step": 110
},
{
"epoch": 1.4597875569044005,
"grad_norm": 0.8335652060900699,
"learning_rate": 5e-06,
"loss": 0.5086,
"step": 120
},
{
"epoch": 1.5811836115326252,
"grad_norm": 0.9482742766383457,
"learning_rate": 5e-06,
"loss": 0.4988,
"step": 130
},
{
"epoch": 1.7025796661608497,
"grad_norm": 0.7107692585969188,
"learning_rate": 5e-06,
"loss": 0.4973,
"step": 140
},
{
"epoch": 1.8239757207890743,
"grad_norm": 0.6956293579831972,
"learning_rate": 5e-06,
"loss": 0.5006,
"step": 150
},
{
"epoch": 1.945371775417299,
"grad_norm": 0.5537353905530825,
"learning_rate": 5e-06,
"loss": 0.4962,
"step": 160
},
{
"epoch": 1.9939301972685888,
"eval_loss": 0.5237926244735718,
"eval_runtime": 31.2236,
"eval_samples_per_second": 71.068,
"eval_steps_per_second": 1.121,
"step": 164
},
{
"epoch": 2.069802731411229,
"grad_norm": 0.9676846382246841,
"learning_rate": 5e-06,
"loss": 0.5181,
"step": 170
},
{
"epoch": 2.191198786039454,
"grad_norm": 1.0604432739536909,
"learning_rate": 5e-06,
"loss": 0.4483,
"step": 180
},
{
"epoch": 2.3125948406676784,
"grad_norm": 0.9072874578025836,
"learning_rate": 5e-06,
"loss": 0.4481,
"step": 190
},
{
"epoch": 2.433990895295903,
"grad_norm": 0.93397296136386,
"learning_rate": 5e-06,
"loss": 0.4503,
"step": 200
},
{
"epoch": 2.5553869499241273,
"grad_norm": 0.5608137627707893,
"learning_rate": 5e-06,
"loss": 0.4475,
"step": 210
},
{
"epoch": 2.676783004552352,
"grad_norm": 0.7216376866533744,
"learning_rate": 5e-06,
"loss": 0.4491,
"step": 220
},
{
"epoch": 2.7981790591805766,
"grad_norm": 0.7502162152741092,
"learning_rate": 5e-06,
"loss": 0.4475,
"step": 230
},
{
"epoch": 2.919575113808801,
"grad_norm": 0.6902724753233441,
"learning_rate": 5e-06,
"loss": 0.4505,
"step": 240
},
{
"epoch": 2.992412746585736,
"eval_loss": 0.5186718702316284,
"eval_runtime": 31.6067,
"eval_samples_per_second": 70.207,
"eval_steps_per_second": 1.107,
"step": 246
},
{
"epoch": 2.992412746585736,
"step": 246,
"total_flos": 411849782722560.0,
"train_loss": 0.5282489497487138,
"train_runtime": 6430.2484,
"train_samples_per_second": 19.666,
"train_steps_per_second": 0.038
}
],
"logging_steps": 10,
"max_steps": 246,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 411849782722560.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}