structroberta_s1_final / finetune /qnli /trainer_state.json
Omar
update
3859fdb
{
"best_metric": 0.7438596491228071,
"best_model_checkpoint": "final_models/glue_models/structroberta_s1_final//finetune/qnli/checkpoint-2000",
"epoch": 10.0,
"global_step": 3660,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.09,
"eval_accuracy": 0.6854768395423889,
"eval_f1": 0.698279479647503,
"eval_loss": 0.5918503403663635,
"eval_mcc": 0.37039988909838023,
"eval_runtime": 4.521,
"eval_samples_per_second": 505.638,
"eval_steps_per_second": 63.26,
"step": 400
},
{
"epoch": 1.37,
"learning_rate": 4.316939890710383e-05,
"loss": 0.6348,
"step": 500
},
{
"epoch": 2.19,
"eval_accuracy": 0.6942257285118103,
"eval_f1": 0.7279096924873493,
"eval_loss": 0.6217535138130188,
"eval_mcc": 0.3841755069564845,
"eval_runtime": 4.5007,
"eval_samples_per_second": 507.921,
"eval_steps_per_second": 63.546,
"step": 800
},
{
"epoch": 2.73,
"learning_rate": 3.633879781420765e-05,
"loss": 0.5113,
"step": 1000
},
{
"epoch": 3.28,
"eval_accuracy": 0.6999124884605408,
"eval_f1": 0.7262569832402235,
"eval_loss": 0.6595348715782166,
"eval_mcc": 0.39577981158604925,
"eval_runtime": 4.4963,
"eval_samples_per_second": 508.422,
"eval_steps_per_second": 63.608,
"step": 1200
},
{
"epoch": 4.1,
"learning_rate": 2.9508196721311478e-05,
"loss": 0.3571,
"step": 1500
},
{
"epoch": 4.37,
"eval_accuracy": 0.7104111909866333,
"eval_f1": 0.7324171382376718,
"eval_loss": 0.8105891942977905,
"eval_mcc": 0.4174093343048448,
"eval_runtime": 4.5019,
"eval_samples_per_second": 507.789,
"eval_steps_per_second": 63.529,
"step": 1600
},
{
"epoch": 5.46,
"learning_rate": 2.2677595628415303e-05,
"loss": 0.2085,
"step": 2000
},
{
"epoch": 5.46,
"eval_accuracy": 0.712598443031311,
"eval_f1": 0.7438596491228071,
"eval_loss": 0.8948610424995422,
"eval_mcc": 0.42158343510176083,
"eval_runtime": 4.5038,
"eval_samples_per_second": 507.569,
"eval_steps_per_second": 63.502,
"step": 2000
},
{
"epoch": 6.56,
"eval_accuracy": 0.7213473320007324,
"eval_f1": 0.7357942762339277,
"eval_loss": 1.1155877113342285,
"eval_mcc": 0.44112337210044195,
"eval_runtime": 4.4995,
"eval_samples_per_second": 508.053,
"eval_steps_per_second": 63.562,
"step": 2400
},
{
"epoch": 6.83,
"learning_rate": 1.5846994535519128e-05,
"loss": 0.1211,
"step": 2500
},
{
"epoch": 7.65,
"eval_accuracy": 0.7152230739593506,
"eval_f1": 0.740534077321642,
"eval_loss": 1.3353219032287598,
"eval_mcc": 0.42670434808360297,
"eval_runtime": 4.5055,
"eval_samples_per_second": 507.381,
"eval_steps_per_second": 63.478,
"step": 2800
},
{
"epoch": 8.2,
"learning_rate": 9.016393442622952e-06,
"loss": 0.0599,
"step": 3000
},
{
"epoch": 8.74,
"eval_accuracy": 0.7121610045433044,
"eval_f1": 0.7269709543568464,
"eval_loss": 1.4796608686447144,
"eval_mcc": 0.42273254192250986,
"eval_runtime": 4.5005,
"eval_samples_per_second": 507.939,
"eval_steps_per_second": 63.548,
"step": 3200
},
{
"epoch": 9.56,
"learning_rate": 2.185792349726776e-06,
"loss": 0.0317,
"step": 3500
},
{
"epoch": 9.84,
"eval_accuracy": 0.7108486294746399,
"eval_f1": 0.7300939158840344,
"eval_loss": 1.662826418876648,
"eval_mcc": 0.4188408674974473,
"eval_runtime": 4.4953,
"eval_samples_per_second": 508.53,
"eval_steps_per_second": 63.622,
"step": 3600
},
{
"epoch": 10.0,
"step": 3660,
"total_flos": 3.664834789427712e+16,
"train_loss": 0.2638393348032008,
"train_runtime": 2094.1582,
"train_samples_per_second": 209.712,
"train_steps_per_second": 1.748
}
],
"max_steps": 3660,
"num_train_epochs": 10,
"total_flos": 3.664834789427712e+16,
"trial_name": null,
"trial_params": null
}