VERSIL91's picture
Training in progress, step 20, checkpoint
cbdacf5 verified
raw
history blame
5.06 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.005737543255697201,
"eval_steps": 5,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0002868771627848601,
"grad_norm": Infinity,
"learning_rate": 1e-05,
"loss": 62.375,
"step": 1
},
{
"epoch": 0.0002868771627848601,
"eval_loss": 3.906569480895996,
"eval_runtime": 154.6579,
"eval_samples_per_second": 37.961,
"eval_steps_per_second": 18.984,
"step": 1
},
{
"epoch": 0.0005737543255697202,
"grad_norm": Infinity,
"learning_rate": 2e-05,
"loss": 60.75,
"step": 2
},
{
"epoch": 0.0008606314883545801,
"grad_norm": Infinity,
"learning_rate": 3e-05,
"loss": 65.6562,
"step": 3
},
{
"epoch": 0.0011475086511394403,
"grad_norm": Infinity,
"learning_rate": 4e-05,
"loss": 61.0625,
"step": 4
},
{
"epoch": 0.0014343858139243002,
"grad_norm": Infinity,
"learning_rate": 5e-05,
"loss": 59.7812,
"step": 5
},
{
"epoch": 0.0014343858139243002,
"eval_loss": 3.906548023223877,
"eval_runtime": 39.3741,
"eval_samples_per_second": 149.108,
"eval_steps_per_second": 74.567,
"step": 5
},
{
"epoch": 0.0017212629767091603,
"grad_norm": Infinity,
"learning_rate": 6e-05,
"loss": 62.2031,
"step": 6
},
{
"epoch": 0.0020081401394940203,
"grad_norm": Infinity,
"learning_rate": 7e-05,
"loss": 61.6406,
"step": 7
},
{
"epoch": 0.0022950173022788806,
"grad_norm": Infinity,
"learning_rate": 8e-05,
"loss": 60.9219,
"step": 8
},
{
"epoch": 0.0025818944650637405,
"grad_norm": 27.071285247802734,
"learning_rate": 9e-05,
"loss": 63.7969,
"step": 9
},
{
"epoch": 0.0028687716278486004,
"grad_norm": 22.0546932220459,
"learning_rate": 0.0001,
"loss": 61.7656,
"step": 10
},
{
"epoch": 0.0028687716278486004,
"eval_loss": 3.8499562740325928,
"eval_runtime": 39.7448,
"eval_samples_per_second": 147.717,
"eval_steps_per_second": 73.871,
"step": 10
},
{
"epoch": 0.0031556487906334607,
"grad_norm": 19.662364959716797,
"learning_rate": 9.755282581475769e-05,
"loss": 61.8594,
"step": 11
},
{
"epoch": 0.0034425259534183205,
"grad_norm": 16.447925567626953,
"learning_rate": 9.045084971874738e-05,
"loss": 60.5938,
"step": 12
},
{
"epoch": 0.003729403116203181,
"grad_norm": 12.94922924041748,
"learning_rate": 7.938926261462366e-05,
"loss": 60.2188,
"step": 13
},
{
"epoch": 0.004016280278988041,
"grad_norm": 10.807478904724121,
"learning_rate": 6.545084971874738e-05,
"loss": 61.0781,
"step": 14
},
{
"epoch": 0.0043031574417729005,
"grad_norm": 9.725618362426758,
"learning_rate": 5e-05,
"loss": 59.7031,
"step": 15
},
{
"epoch": 0.0043031574417729005,
"eval_loss": 3.7116947174072266,
"eval_runtime": 43.4551,
"eval_samples_per_second": 135.105,
"eval_steps_per_second": 67.564,
"step": 15
},
{
"epoch": 0.004590034604557761,
"grad_norm": 9.434167861938477,
"learning_rate": 3.4549150281252636e-05,
"loss": 59.1094,
"step": 16
},
{
"epoch": 0.004876911767342621,
"grad_norm": 8.841327667236328,
"learning_rate": 2.061073738537635e-05,
"loss": 58.3125,
"step": 17
},
{
"epoch": 0.005163788930127481,
"grad_norm": 8.895508766174316,
"learning_rate": 9.549150281252633e-06,
"loss": 59.25,
"step": 18
},
{
"epoch": 0.005450666092912341,
"grad_norm": 8.961664199829102,
"learning_rate": 2.4471741852423237e-06,
"loss": 59.3125,
"step": 19
},
{
"epoch": 0.005737543255697201,
"grad_norm": 9.52428150177002,
"learning_rate": 0.0,
"loss": 59.6406,
"step": 20
},
{
"epoch": 0.005737543255697201,
"eval_loss": 3.680753469467163,
"eval_runtime": 50.2242,
"eval_samples_per_second": 116.896,
"eval_steps_per_second": 58.458,
"step": 20
}
],
"logging_steps": 1,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 5,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 602433965260800.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}