PetBERT_ICD / trainer_state.json
seanfarrell's picture
Initial Push
1ce5e6a verified
{
"best_metric": 0.9293855295231974,
"best_model_checkpoint": "Datasets/ICD/ICD_11_multilabel_MultiLabelSoftMarginLoss_FINAL_final/checkpoint-70317",
"epoch": 9.0,
"global_step": 70317,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13,
"learning_rate": 4.9677959551719697e-05,
"loss": 0.6152,
"step": 1000
},
{
"epoch": 0.26,
"learning_rate": 4.9033878655159085e-05,
"loss": 0.186,
"step": 2000
},
{
"epoch": 0.38,
"learning_rate": 4.838979775859848e-05,
"loss": 0.1556,
"step": 3000
},
{
"epoch": 0.51,
"learning_rate": 4.7745716862037875e-05,
"loss": 0.1494,
"step": 4000
},
{
"epoch": 0.64,
"learning_rate": 4.710163596547726e-05,
"loss": 0.1352,
"step": 5000
},
{
"epoch": 0.77,
"learning_rate": 4.645755506891666e-05,
"loss": 0.132,
"step": 6000
},
{
"epoch": 0.9,
"learning_rate": 4.581347417235605e-05,
"loss": 0.1282,
"step": 7000
},
{
"epoch": 1.0,
"eval_accuracy": 0.726872,
"eval_f1": 0.8632296736577655,
"eval_loss": 0.12385939061641693,
"eval_roc_auc": 0.8871861128983161,
"eval_runtime": 1129.5296,
"eval_samples_per_second": 221.331,
"eval_steps_per_second": 6.917,
"step": 7813
},
{
"epoch": 1.02,
"learning_rate": 4.516939327579544e-05,
"loss": 0.1195,
"step": 8000
},
{
"epoch": 1.15,
"learning_rate": 4.4525312379234836e-05,
"loss": 0.0897,
"step": 9000
},
{
"epoch": 1.28,
"learning_rate": 4.3881231482674224e-05,
"loss": 0.0911,
"step": 10000
},
{
"epoch": 1.41,
"learning_rate": 4.323715058611362e-05,
"loss": 0.0911,
"step": 11000
},
{
"epoch": 1.54,
"learning_rate": 4.2593069689553014e-05,
"loss": 0.093,
"step": 12000
},
{
"epoch": 1.66,
"learning_rate": 4.19489887929924e-05,
"loss": 0.093,
"step": 13000
},
{
"epoch": 1.79,
"learning_rate": 4.130490789643179e-05,
"loss": 0.0913,
"step": 14000
},
{
"epoch": 1.92,
"learning_rate": 4.0660826999871186e-05,
"loss": 0.0896,
"step": 15000
},
{
"epoch": 2.0,
"eval_accuracy": 0.780268,
"eval_f1": 0.9023734184296859,
"eval_loss": 0.10922548174858093,
"eval_roc_auc": 0.9264076688968317,
"eval_runtime": 1128.1964,
"eval_samples_per_second": 221.593,
"eval_steps_per_second": 6.925,
"step": 15626
},
{
"epoch": 2.05,
"learning_rate": 4.0016746103310574e-05,
"loss": 0.077,
"step": 16000
},
{
"epoch": 2.18,
"learning_rate": 3.937266520674997e-05,
"loss": 0.057,
"step": 17000
},
{
"epoch": 2.3,
"learning_rate": 3.8728584310189364e-05,
"loss": 0.0571,
"step": 18000
},
{
"epoch": 2.43,
"learning_rate": 3.808450341362875e-05,
"loss": 0.0613,
"step": 19000
},
{
"epoch": 2.56,
"learning_rate": 3.744042251706815e-05,
"loss": 0.062,
"step": 20000
},
{
"epoch": 2.69,
"learning_rate": 3.6796341620507536e-05,
"loss": 0.0603,
"step": 21000
},
{
"epoch": 2.82,
"learning_rate": 3.615226072394693e-05,
"loss": 0.0596,
"step": 22000
},
{
"epoch": 2.94,
"learning_rate": 3.5508179827386326e-05,
"loss": 0.0596,
"step": 23000
},
{
"epoch": 3.0,
"eval_accuracy": 0.796324,
"eval_f1": 0.9105269872918949,
"eval_loss": 0.11485119163990021,
"eval_roc_auc": 0.9327701339514269,
"eval_runtime": 1128.1956,
"eval_samples_per_second": 221.593,
"eval_steps_per_second": 6.925,
"step": 23439
},
{
"epoch": 3.07,
"learning_rate": 3.4864098930825714e-05,
"loss": 0.0464,
"step": 24000
},
{
"epoch": 3.2,
"learning_rate": 3.422001803426511e-05,
"loss": 0.0364,
"step": 25000
},
{
"epoch": 3.33,
"learning_rate": 3.35759371377045e-05,
"loss": 0.0388,
"step": 26000
},
{
"epoch": 3.46,
"learning_rate": 3.2931856241143885e-05,
"loss": 0.0379,
"step": 27000
},
{
"epoch": 3.58,
"learning_rate": 3.228777534458328e-05,
"loss": 0.0412,
"step": 28000
},
{
"epoch": 3.71,
"learning_rate": 3.164369444802267e-05,
"loss": 0.0415,
"step": 29000
},
{
"epoch": 3.84,
"learning_rate": 3.0999613551462063e-05,
"loss": 0.0398,
"step": 30000
},
{
"epoch": 3.97,
"learning_rate": 3.035553265490146e-05,
"loss": 0.0391,
"step": 31000
},
{
"epoch": 4.0,
"eval_accuracy": 0.805976,
"eval_f1": 0.9167903002642254,
"eval_loss": 0.13589395582675934,
"eval_roc_auc": 0.9395522583821788,
"eval_runtime": 1127.0647,
"eval_samples_per_second": 221.815,
"eval_steps_per_second": 6.932,
"step": 31252
},
{
"epoch": 4.1,
"learning_rate": 2.9711451758340847e-05,
"loss": 0.0267,
"step": 32000
},
{
"epoch": 4.22,
"learning_rate": 2.906737086178024e-05,
"loss": 0.0251,
"step": 33000
},
{
"epoch": 4.35,
"learning_rate": 2.8423289965219633e-05,
"loss": 0.0251,
"step": 34000
},
{
"epoch": 4.48,
"learning_rate": 2.7779209068659025e-05,
"loss": 0.0243,
"step": 35000
},
{
"epoch": 4.61,
"learning_rate": 2.7135128172098416e-05,
"loss": 0.0248,
"step": 36000
},
{
"epoch": 4.74,
"learning_rate": 2.649104727553781e-05,
"loss": 0.0264,
"step": 37000
},
{
"epoch": 4.86,
"learning_rate": 2.58469663789772e-05,
"loss": 0.0257,
"step": 38000
},
{
"epoch": 4.99,
"learning_rate": 2.5202885482416595e-05,
"loss": 0.0275,
"step": 39000
},
{
"epoch": 5.0,
"eval_accuracy": 0.811504,
"eval_f1": 0.9201141643125628,
"eval_loss": 0.1432325541973114,
"eval_roc_auc": 0.944372612395835,
"eval_runtime": 1127.5732,
"eval_samples_per_second": 221.715,
"eval_steps_per_second": 6.929,
"step": 39065
},
{
"epoch": 5.12,
"learning_rate": 2.4558804585855986e-05,
"loss": 0.0156,
"step": 40000
},
{
"epoch": 5.25,
"learning_rate": 2.3914723689295378e-05,
"loss": 0.0152,
"step": 41000
},
{
"epoch": 5.38,
"learning_rate": 2.3270642792734766e-05,
"loss": 0.0153,
"step": 42000
},
{
"epoch": 5.5,
"learning_rate": 2.262656189617416e-05,
"loss": 0.0153,
"step": 43000
},
{
"epoch": 5.63,
"learning_rate": 2.1982480999613553e-05,
"loss": 0.0157,
"step": 44000
},
{
"epoch": 5.76,
"learning_rate": 2.1338400103052944e-05,
"loss": 0.0183,
"step": 45000
},
{
"epoch": 5.89,
"learning_rate": 2.0694319206492336e-05,
"loss": 0.0165,
"step": 46000
},
{
"epoch": 6.0,
"eval_accuracy": 0.820196,
"eval_f1": 0.926118284939716,
"eval_loss": 0.1556038111448288,
"eval_roc_auc": 0.9567798902277629,
"eval_runtime": 1129.3479,
"eval_samples_per_second": 221.367,
"eval_steps_per_second": 6.918,
"step": 46878
},
{
"epoch": 6.02,
"learning_rate": 2.0050238309931728e-05,
"loss": 0.014,
"step": 47000
},
{
"epoch": 6.14,
"learning_rate": 1.940615741337112e-05,
"loss": 0.0095,
"step": 48000
},
{
"epoch": 6.27,
"learning_rate": 1.876207651681051e-05,
"loss": 0.0101,
"step": 49000
},
{
"epoch": 6.4,
"learning_rate": 1.8117995620249902e-05,
"loss": 0.0108,
"step": 50000
},
{
"epoch": 6.53,
"learning_rate": 1.7473914723689297e-05,
"loss": 0.0098,
"step": 51000
},
{
"epoch": 6.66,
"learning_rate": 1.682983382712869e-05,
"loss": 0.0124,
"step": 52000
},
{
"epoch": 6.78,
"learning_rate": 1.618575293056808e-05,
"loss": 0.0104,
"step": 53000
},
{
"epoch": 6.91,
"learning_rate": 1.5541672034007472e-05,
"loss": 0.0102,
"step": 54000
},
{
"epoch": 7.0,
"eval_accuracy": 0.822104,
"eval_f1": 0.9267807957481025,
"eval_loss": 0.17248359322547913,
"eval_roc_auc": 0.9574790529696302,
"eval_runtime": 1129.5316,
"eval_samples_per_second": 221.331,
"eval_steps_per_second": 6.917,
"step": 54691
},
{
"epoch": 7.04,
"learning_rate": 1.4897591137446864e-05,
"loss": 0.0086,
"step": 55000
},
{
"epoch": 7.17,
"learning_rate": 1.4253510240886256e-05,
"loss": 0.0061,
"step": 56000
},
{
"epoch": 7.3,
"learning_rate": 1.3609429344325647e-05,
"loss": 0.0056,
"step": 57000
},
{
"epoch": 7.42,
"learning_rate": 1.296534844776504e-05,
"loss": 0.0058,
"step": 58000
},
{
"epoch": 7.55,
"learning_rate": 1.2321267551204432e-05,
"loss": 0.0056,
"step": 59000
},
{
"epoch": 7.68,
"learning_rate": 1.1677186654643824e-05,
"loss": 0.0057,
"step": 60000
},
{
"epoch": 7.81,
"learning_rate": 1.1033105758083217e-05,
"loss": 0.0058,
"step": 61000
},
{
"epoch": 7.94,
"learning_rate": 1.0389024861522607e-05,
"loss": 0.006,
"step": 62000
},
{
"epoch": 8.0,
"eval_accuracy": 0.82428,
"eval_f1": 0.9274938850870897,
"eval_loss": 0.17893491685390472,
"eval_roc_auc": 0.954270360446044,
"eval_runtime": 1128.7984,
"eval_samples_per_second": 221.474,
"eval_steps_per_second": 6.922,
"step": 62504
},
{
"epoch": 8.06,
"learning_rate": 9.744943964962e-06,
"loss": 0.0043,
"step": 63000
},
{
"epoch": 8.19,
"learning_rate": 9.100863068401392e-06,
"loss": 0.0033,
"step": 64000
},
{
"epoch": 8.32,
"learning_rate": 8.456782171840783e-06,
"loss": 0.0036,
"step": 65000
},
{
"epoch": 8.45,
"learning_rate": 7.812701275280175e-06,
"loss": 0.003,
"step": 66000
},
{
"epoch": 8.58,
"learning_rate": 7.1686203787195675e-06,
"loss": 0.0033,
"step": 67000
},
{
"epoch": 8.7,
"learning_rate": 6.524539482158959e-06,
"loss": 0.0034,
"step": 68000
},
{
"epoch": 8.83,
"learning_rate": 5.8804585855983516e-06,
"loss": 0.003,
"step": 69000
},
{
"epoch": 8.96,
"learning_rate": 5.236377689037743e-06,
"loss": 0.0026,
"step": 70000
},
{
"epoch": 9.0,
"eval_accuracy": 0.826624,
"eval_f1": 0.9293855295231974,
"eval_loss": 0.19182080030441284,
"eval_roc_auc": 0.9598984635520128,
"eval_runtime": 1129.0264,
"eval_samples_per_second": 221.43,
"eval_steps_per_second": 6.92,
"step": 70317
}
],
"max_steps": 78130,
"num_train_epochs": 10,
"total_flos": 3.639239973215278e+17,
"trial_name": null,
"trial_params": null
}