|
{ |
|
"best_metric": 0.8294392523364487, |
|
"best_model_checkpoint": "final_models/transformer_base_final_2/finetune/qnli/checkpoint-1600", |
|
"epoch": 9.836065573770492, |
|
"global_step": 3600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.7349081635475159, |
|
"eval_f1": 0.7757216876387861, |
|
"eval_loss": 0.53801429271698, |
|
"eval_mcc": 0.4737186740651392, |
|
"eval_runtime": 3.1305, |
|
"eval_samples_per_second": 730.228, |
|
"eval_steps_per_second": 91.358, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.316939890710383e-05, |
|
"loss": 0.6208, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.8057742714881897, |
|
"eval_f1": 0.824782951854775, |
|
"eval_loss": 0.42864909768104553, |
|
"eval_mcc": 0.6103254520818892, |
|
"eval_runtime": 3.1226, |
|
"eval_samples_per_second": 732.077, |
|
"eval_steps_per_second": 91.59, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.633879781420765e-05, |
|
"loss": 0.433, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_accuracy": 0.8027121424674988, |
|
"eval_f1": 0.8223710122095312, |
|
"eval_loss": 0.46695178747177124, |
|
"eval_mcc": 0.604258498666583, |
|
"eval_runtime": 3.1147, |
|
"eval_samples_per_second": 733.949, |
|
"eval_steps_per_second": 91.824, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.9508196721311478e-05, |
|
"loss": 0.2954, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_accuracy": 0.808398962020874, |
|
"eval_f1": 0.8294392523364487, |
|
"eval_loss": 0.5520442128181458, |
|
"eval_mcc": 0.6169307221841646, |
|
"eval_runtime": 3.1389, |
|
"eval_samples_per_second": 728.273, |
|
"eval_steps_per_second": 91.114, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 2.2677595628415303e-05, |
|
"loss": 0.1693, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_accuracy": 0.7909011244773865, |
|
"eval_f1": 0.8138629283489095, |
|
"eval_loss": 0.7586725354194641, |
|
"eval_mcc": 0.5812582155687692, |
|
"eval_runtime": 3.1409, |
|
"eval_samples_per_second": 727.824, |
|
"eval_steps_per_second": 91.058, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_accuracy": 0.8048993945121765, |
|
"eval_f1": 0.820883534136546, |
|
"eval_loss": 0.8462380170822144, |
|
"eval_mcc": 0.6077896979937155, |
|
"eval_runtime": 3.1222, |
|
"eval_samples_per_second": 732.17, |
|
"eval_steps_per_second": 91.601, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.5846994535519128e-05, |
|
"loss": 0.1045, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_accuracy": 0.8018372654914856, |
|
"eval_f1": 0.8210193599367839, |
|
"eval_loss": 0.9258682131767273, |
|
"eval_mcc": 0.6022610616438395, |
|
"eval_runtime": 3.1586, |
|
"eval_samples_per_second": 723.747, |
|
"eval_steps_per_second": 90.547, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 9.016393442622952e-06, |
|
"loss": 0.06, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_accuracy": 0.8009623885154724, |
|
"eval_f1": 0.8171956609079951, |
|
"eval_loss": 1.0139225721359253, |
|
"eval_mcc": 0.5998412724259652, |
|
"eval_runtime": 3.1306, |
|
"eval_samples_per_second": 730.21, |
|
"eval_steps_per_second": 91.356, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.185792349726776e-06, |
|
"loss": 0.0406, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_accuracy": 0.8070865869522095, |
|
"eval_f1": 0.8221056877773294, |
|
"eval_loss": 1.1088656187057495, |
|
"eval_mcc": 0.6121534397963299, |
|
"eval_runtime": 3.1448, |
|
"eval_samples_per_second": 726.919, |
|
"eval_steps_per_second": 90.944, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"step": 3600, |
|
"total_flos": 2.842483414770432e+16, |
|
"train_loss": 0.24041106661160788, |
|
"train_runtime": 1450.9252, |
|
"train_samples_per_second": 302.683, |
|
"train_steps_per_second": 2.523 |
|
} |
|
], |
|
"max_steps": 3660, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.842483414770432e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|