hBERTv1_new_pretrain_rte / trainer_state.json
gokuls's picture
End of training
550d48e
{
"best_metric": 0.6895588636398315,
"best_model_checkpoint": "hBERTv1_new_pretrain_rte/checkpoint-140",
"epoch": 12.0,
"global_step": 240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 3.9200000000000004e-05,
"loss": 0.7407,
"step": 20
},
{
"epoch": 1.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.7002341151237488,
"eval_runtime": 0.5049,
"eval_samples_per_second": 548.575,
"eval_steps_per_second": 5.941,
"step": 20
},
{
"epoch": 2.0,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.7061,
"step": 40
},
{
"epoch": 2.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.7245458960533142,
"eval_runtime": 0.508,
"eval_samples_per_second": 545.223,
"eval_steps_per_second": 5.905,
"step": 40
},
{
"epoch": 3.0,
"learning_rate": 3.76e-05,
"loss": 0.7102,
"step": 60
},
{
"epoch": 3.0,
"eval_accuracy": 0.5270758122743683,
"eval_loss": 0.6948612332344055,
"eval_runtime": 0.504,
"eval_samples_per_second": 549.561,
"eval_steps_per_second": 5.952,
"step": 60
},
{
"epoch": 4.0,
"learning_rate": 3.680000000000001e-05,
"loss": 0.703,
"step": 80
},
{
"epoch": 4.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.6950586438179016,
"eval_runtime": 0.5087,
"eval_samples_per_second": 544.51,
"eval_steps_per_second": 5.897,
"step": 80
},
{
"epoch": 5.0,
"learning_rate": 3.6e-05,
"loss": 0.7097,
"step": 100
},
{
"epoch": 5.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.6973713636398315,
"eval_runtime": 0.5068,
"eval_samples_per_second": 546.522,
"eval_steps_per_second": 5.919,
"step": 100
},
{
"epoch": 6.0,
"learning_rate": 3.52e-05,
"loss": 0.7006,
"step": 120
},
{
"epoch": 6.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.7052543759346008,
"eval_runtime": 0.5068,
"eval_samples_per_second": 546.614,
"eval_steps_per_second": 5.92,
"step": 120
},
{
"epoch": 7.0,
"learning_rate": 3.44e-05,
"loss": 0.6986,
"step": 140
},
{
"epoch": 7.0,
"eval_accuracy": 0.5306859205776173,
"eval_loss": 0.6895588636398315,
"eval_runtime": 0.5057,
"eval_samples_per_second": 547.783,
"eval_steps_per_second": 5.933,
"step": 140
},
{
"epoch": 8.0,
"learning_rate": 3.3600000000000004e-05,
"loss": 0.6935,
"step": 160
},
{
"epoch": 8.0,
"eval_accuracy": 0.4729241877256318,
"eval_loss": 0.7711036205291748,
"eval_runtime": 0.5097,
"eval_samples_per_second": 543.49,
"eval_steps_per_second": 5.886,
"step": 160
},
{
"epoch": 9.0,
"learning_rate": 3.28e-05,
"loss": 0.6109,
"step": 180
},
{
"epoch": 9.0,
"eval_accuracy": 0.4981949458483754,
"eval_loss": 0.8443405032157898,
"eval_runtime": 0.5053,
"eval_samples_per_second": 548.178,
"eval_steps_per_second": 5.937,
"step": 180
},
{
"epoch": 10.0,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.469,
"step": 200
},
{
"epoch": 10.0,
"eval_accuracy": 0.5126353790613718,
"eval_loss": 1.0369467735290527,
"eval_runtime": 0.5078,
"eval_samples_per_second": 545.529,
"eval_steps_per_second": 5.908,
"step": 200
},
{
"epoch": 11.0,
"learning_rate": 3.1200000000000006e-05,
"loss": 0.3028,
"step": 220
},
{
"epoch": 11.0,
"eval_accuracy": 0.5234657039711191,
"eval_loss": 1.1621102094650269,
"eval_runtime": 0.512,
"eval_samples_per_second": 541.059,
"eval_steps_per_second": 5.86,
"step": 220
},
{
"epoch": 12.0,
"learning_rate": 3.0400000000000004e-05,
"loss": 0.2155,
"step": 240
},
{
"epoch": 12.0,
"eval_accuracy": 0.5379061371841155,
"eval_loss": 1.2095954418182373,
"eval_runtime": 0.5057,
"eval_samples_per_second": 547.72,
"eval_steps_per_second": 5.932,
"step": 240
},
{
"epoch": 12.0,
"step": 240,
"total_flos": 4425131514396672.0,
"train_loss": 0.6050535062948863,
"train_runtime": 211.7637,
"train_samples_per_second": 587.919,
"train_steps_per_second": 4.722
}
],
"max_steps": 1000,
"num_train_epochs": 50,
"total_flos": 4425131514396672.0,
"trial_name": null,
"trial_params": null
}