open-o1-sft-original / trainer_state.json

End of training

0231348 verified 2 months ago

5.41 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.992412746585736,
	"eval_steps": 500,
	"global_step": 246,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.12139605462822459,
	"grad_norm": 1.9068916730195116,
	"learning_rate": 5e-06,
	"loss": 0.7721,
	"step": 10
	},
	{
	"epoch": 0.24279210925644917,
	"grad_norm": 5.44810384092203,
	"learning_rate": 5e-06,
	"loss": 0.6638,
	"step": 20
	},
	{
	"epoch": 0.36418816388467373,
	"grad_norm": 0.8162292861303865,
	"learning_rate": 5e-06,
	"loss": 0.6289,
	"step": 30
	},
	{
	"epoch": 0.48558421851289835,
	"grad_norm": 0.724722400146013,
	"learning_rate": 5e-06,
	"loss": 0.6062,
	"step": 40
	},
	{
	"epoch": 0.6069802731411229,
	"grad_norm": 0.6325755456980601,
	"learning_rate": 5e-06,
	"loss": 0.5886,
	"step": 50
	},
	{
	"epoch": 0.7283763277693475,
	"grad_norm": 0.5102096530669636,
	"learning_rate": 5e-06,
	"loss": 0.5763,
	"step": 60
	},
	{
	"epoch": 0.849772382397572,
	"grad_norm": 0.6134528530146113,
	"learning_rate": 5e-06,
	"loss": 0.5635,
	"step": 70
	},
	{
	"epoch": 0.9711684370257967,
	"grad_norm": 0.6520975040339092,
	"learning_rate": 5e-06,
	"loss": 0.5578,
	"step": 80
	},
	{
	"epoch": 0.9954476479514416,
	"eval_loss": 0.5475569367408752,
	"eval_runtime": 31.1849,
	"eval_samples_per_second": 71.156,
	"eval_steps_per_second": 1.122,
	"step": 82
	},
	{
	"epoch": 1.095599393019727,
	"grad_norm": 0.9030012716394636,
	"learning_rate": 5e-06,
	"loss": 0.5739,
	"step": 90
	},
	{
	"epoch": 1.2169954476479514,
	"grad_norm": 0.5546978323548724,
	"learning_rate": 5e-06,
	"loss": 0.5098,
	"step": 100
	},
	{
	"epoch": 1.338391502276176,
	"grad_norm": 0.7373972665017838,
	"learning_rate": 5e-06,
	"loss": 0.5099,
	"step": 110
	},
	{
	"epoch": 1.4597875569044005,
	"grad_norm": 0.8335652060900699,
	"learning_rate": 5e-06,
	"loss": 0.5086,
	"step": 120
	},
	{
	"epoch": 1.5811836115326252,
	"grad_norm": 0.9482742766383457,
	"learning_rate": 5e-06,
	"loss": 0.4988,
	"step": 130
	},
	{
	"epoch": 1.7025796661608497,
	"grad_norm": 0.7107692585969188,
	"learning_rate": 5e-06,
	"loss": 0.4973,
	"step": 140
	},
	{
	"epoch": 1.8239757207890743,
	"grad_norm": 0.6956293579831972,
	"learning_rate": 5e-06,
	"loss": 0.5006,
	"step": 150
	},
	{
	"epoch": 1.945371775417299,
	"grad_norm": 0.5537353905530825,
	"learning_rate": 5e-06,
	"loss": 0.4962,
	"step": 160
	},
	{
	"epoch": 1.9939301972685888,
	"eval_loss": 0.5237926244735718,
	"eval_runtime": 31.2236,
	"eval_samples_per_second": 71.068,
	"eval_steps_per_second": 1.121,
	"step": 164
	},
	{
	"epoch": 2.069802731411229,
	"grad_norm": 0.9676846382246841,
	"learning_rate": 5e-06,
	"loss": 0.5181,
	"step": 170
	},
	{
	"epoch": 2.191198786039454,
	"grad_norm": 1.0604432739536909,
	"learning_rate": 5e-06,
	"loss": 0.4483,
	"step": 180
	},
	{
	"epoch": 2.3125948406676784,
	"grad_norm": 0.9072874578025836,
	"learning_rate": 5e-06,
	"loss": 0.4481,
	"step": 190
	},
	{
	"epoch": 2.433990895295903,
	"grad_norm": 0.93397296136386,
	"learning_rate": 5e-06,
	"loss": 0.4503,
	"step": 200
	},
	{
	"epoch": 2.5553869499241273,
	"grad_norm": 0.5608137627707893,
	"learning_rate": 5e-06,
	"loss": 0.4475,
	"step": 210
	},
	{
	"epoch": 2.676783004552352,
	"grad_norm": 0.7216376866533744,
	"learning_rate": 5e-06,
	"loss": 0.4491,
	"step": 220
	},
	{
	"epoch": 2.7981790591805766,
	"grad_norm": 0.7502162152741092,
	"learning_rate": 5e-06,
	"loss": 0.4475,
	"step": 230
	},
	{
	"epoch": 2.919575113808801,
	"grad_norm": 0.6902724753233441,
	"learning_rate": 5e-06,
	"loss": 0.4505,
	"step": 240
	},
	{
	"epoch": 2.992412746585736,
	"eval_loss": 0.5186718702316284,
	"eval_runtime": 31.6067,
	"eval_samples_per_second": 70.207,
	"eval_steps_per_second": 1.107,
	"step": 246
	},
	{
	"epoch": 2.992412746585736,
	"step": 246,
	"total_flos": 411849782722560.0,
	"train_loss": 0.5282489497487138,
	"train_runtime": 6430.2484,
	"train_samples_per_second": 19.666,
	"train_steps_per_second": 0.038
	}
	],
	"logging_steps": 10,
	"max_steps": 246,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 411849782722560.0,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}