|
{ |
|
"best_metric": 0.9293855295231974, |
|
"best_model_checkpoint": "Datasets/ICD/ICD_11_multilabel_MultiLabelSoftMarginLoss_FINAL_final/checkpoint-70317", |
|
"epoch": 9.0, |
|
"global_step": 70317, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9677959551719697e-05, |
|
"loss": 0.6152, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9033878655159085e-05, |
|
"loss": 0.186, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.838979775859848e-05, |
|
"loss": 0.1556, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.7745716862037875e-05, |
|
"loss": 0.1494, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.710163596547726e-05, |
|
"loss": 0.1352, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.645755506891666e-05, |
|
"loss": 0.132, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.581347417235605e-05, |
|
"loss": 0.1282, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.726872, |
|
"eval_f1": 0.8632296736577655, |
|
"eval_loss": 0.12385939061641693, |
|
"eval_roc_auc": 0.8871861128983161, |
|
"eval_runtime": 1129.5296, |
|
"eval_samples_per_second": 221.331, |
|
"eval_steps_per_second": 6.917, |
|
"step": 7813 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.516939327579544e-05, |
|
"loss": 0.1195, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.4525312379234836e-05, |
|
"loss": 0.0897, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.3881231482674224e-05, |
|
"loss": 0.0911, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.323715058611362e-05, |
|
"loss": 0.0911, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.2593069689553014e-05, |
|
"loss": 0.093, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.19489887929924e-05, |
|
"loss": 0.093, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.130490789643179e-05, |
|
"loss": 0.0913, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.0660826999871186e-05, |
|
"loss": 0.0896, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.780268, |
|
"eval_f1": 0.9023734184296859, |
|
"eval_loss": 0.10922548174858093, |
|
"eval_roc_auc": 0.9264076688968317, |
|
"eval_runtime": 1128.1964, |
|
"eval_samples_per_second": 221.593, |
|
"eval_steps_per_second": 6.925, |
|
"step": 15626 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.0016746103310574e-05, |
|
"loss": 0.077, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.937266520674997e-05, |
|
"loss": 0.057, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.8728584310189364e-05, |
|
"loss": 0.0571, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.808450341362875e-05, |
|
"loss": 0.0613, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.744042251706815e-05, |
|
"loss": 0.062, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.6796341620507536e-05, |
|
"loss": 0.0603, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.615226072394693e-05, |
|
"loss": 0.0596, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.5508179827386326e-05, |
|
"loss": 0.0596, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.796324, |
|
"eval_f1": 0.9105269872918949, |
|
"eval_loss": 0.11485119163990021, |
|
"eval_roc_auc": 0.9327701339514269, |
|
"eval_runtime": 1128.1956, |
|
"eval_samples_per_second": 221.593, |
|
"eval_steps_per_second": 6.925, |
|
"step": 23439 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.4864098930825714e-05, |
|
"loss": 0.0464, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.422001803426511e-05, |
|
"loss": 0.0364, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.35759371377045e-05, |
|
"loss": 0.0388, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.2931856241143885e-05, |
|
"loss": 0.0379, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.228777534458328e-05, |
|
"loss": 0.0412, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 3.164369444802267e-05, |
|
"loss": 0.0415, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.0999613551462063e-05, |
|
"loss": 0.0398, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.035553265490146e-05, |
|
"loss": 0.0391, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.805976, |
|
"eval_f1": 0.9167903002642254, |
|
"eval_loss": 0.13589395582675934, |
|
"eval_roc_auc": 0.9395522583821788, |
|
"eval_runtime": 1127.0647, |
|
"eval_samples_per_second": 221.815, |
|
"eval_steps_per_second": 6.932, |
|
"step": 31252 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.9711451758340847e-05, |
|
"loss": 0.0267, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 2.906737086178024e-05, |
|
"loss": 0.0251, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.8423289965219633e-05, |
|
"loss": 0.0251, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 2.7779209068659025e-05, |
|
"loss": 0.0243, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.7135128172098416e-05, |
|
"loss": 0.0248, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.649104727553781e-05, |
|
"loss": 0.0264, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.58469663789772e-05, |
|
"loss": 0.0257, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.5202885482416595e-05, |
|
"loss": 0.0275, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.811504, |
|
"eval_f1": 0.9201141643125628, |
|
"eval_loss": 0.1432325541973114, |
|
"eval_roc_auc": 0.944372612395835, |
|
"eval_runtime": 1127.5732, |
|
"eval_samples_per_second": 221.715, |
|
"eval_steps_per_second": 6.929, |
|
"step": 39065 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 2.4558804585855986e-05, |
|
"loss": 0.0156, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 2.3914723689295378e-05, |
|
"loss": 0.0152, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 2.3270642792734766e-05, |
|
"loss": 0.0153, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.262656189617416e-05, |
|
"loss": 0.0153, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 2.1982480999613553e-05, |
|
"loss": 0.0157, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 2.1338400103052944e-05, |
|
"loss": 0.0183, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 2.0694319206492336e-05, |
|
"loss": 0.0165, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.820196, |
|
"eval_f1": 0.926118284939716, |
|
"eval_loss": 0.1556038111448288, |
|
"eval_roc_auc": 0.9567798902277629, |
|
"eval_runtime": 1129.3479, |
|
"eval_samples_per_second": 221.367, |
|
"eval_steps_per_second": 6.918, |
|
"step": 46878 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 2.0050238309931728e-05, |
|
"loss": 0.014, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.940615741337112e-05, |
|
"loss": 0.0095, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 1.876207651681051e-05, |
|
"loss": 0.0101, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.8117995620249902e-05, |
|
"loss": 0.0108, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.7473914723689297e-05, |
|
"loss": 0.0098, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 1.682983382712869e-05, |
|
"loss": 0.0124, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.618575293056808e-05, |
|
"loss": 0.0104, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.5541672034007472e-05, |
|
"loss": 0.0102, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.822104, |
|
"eval_f1": 0.9267807957481025, |
|
"eval_loss": 0.17248359322547913, |
|
"eval_roc_auc": 0.9574790529696302, |
|
"eval_runtime": 1129.5316, |
|
"eval_samples_per_second": 221.331, |
|
"eval_steps_per_second": 6.917, |
|
"step": 54691 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4897591137446864e-05, |
|
"loss": 0.0086, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 1.4253510240886256e-05, |
|
"loss": 0.0061, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.3609429344325647e-05, |
|
"loss": 0.0056, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1.296534844776504e-05, |
|
"loss": 0.0058, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.2321267551204432e-05, |
|
"loss": 0.0056, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.1677186654643824e-05, |
|
"loss": 0.0057, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 1.1033105758083217e-05, |
|
"loss": 0.0058, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 1.0389024861522607e-05, |
|
"loss": 0.006, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.82428, |
|
"eval_f1": 0.9274938850870897, |
|
"eval_loss": 0.17893491685390472, |
|
"eval_roc_auc": 0.954270360446044, |
|
"eval_runtime": 1128.7984, |
|
"eval_samples_per_second": 221.474, |
|
"eval_steps_per_second": 6.922, |
|
"step": 62504 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.744943964962e-06, |
|
"loss": 0.0043, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 9.100863068401392e-06, |
|
"loss": 0.0033, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 8.456782171840783e-06, |
|
"loss": 0.0036, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 7.812701275280175e-06, |
|
"loss": 0.003, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 7.1686203787195675e-06, |
|
"loss": 0.0033, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.524539482158959e-06, |
|
"loss": 0.0034, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 5.8804585855983516e-06, |
|
"loss": 0.003, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 5.236377689037743e-06, |
|
"loss": 0.0026, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.826624, |
|
"eval_f1": 0.9293855295231974, |
|
"eval_loss": 0.19182080030441284, |
|
"eval_roc_auc": 0.9598984635520128, |
|
"eval_runtime": 1129.0264, |
|
"eval_samples_per_second": 221.43, |
|
"eval_steps_per_second": 6.92, |
|
"step": 70317 |
|
} |
|
], |
|
"max_steps": 78130, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.639239973215278e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|