|
{ |
|
"best_metric": 0.8924948901640382, |
|
"best_model_checkpoint": "/content/drive/MyDrive/PUBH 8885 CB/nuc_arg_long_readv4/checkpoint-1500", |
|
"epoch": 3.663003663003663, |
|
"eval_steps": 100, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2442002442002442, |
|
"grad_norm": 25.231292724609375, |
|
"learning_rate": 5e-06, |
|
"loss": 2.8722, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2442002442002442, |
|
"eval_loss": 2.381986141204834, |
|
"eval_macro_f1": 0.07916135790210403, |
|
"eval_macro_precision": 0.06590895945398809, |
|
"eval_macro_recall": 0.10263326510757667, |
|
"eval_micro_f1": 0.4847560975609756, |
|
"eval_micro_precision": 0.4847560975609756, |
|
"eval_micro_recall": 0.4847560975609756, |
|
"eval_runtime": 27.9503, |
|
"eval_samples_per_second": 82.146, |
|
"eval_steps_per_second": 1.288, |
|
"eval_weighted_f1": 0.3658094645034367, |
|
"eval_weighted_precision": 0.3000991244760369, |
|
"eval_weighted_recall": 0.4847560975609756, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4884004884004884, |
|
"grad_norm": 74.01921844482422, |
|
"learning_rate": 9.950000000000001e-06, |
|
"loss": 1.9467, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4884004884004884, |
|
"eval_loss": 1.6188805103302002, |
|
"eval_macro_f1": 0.12352787740442463, |
|
"eval_macro_precision": 0.1729632869793354, |
|
"eval_macro_recall": 0.14519904411264478, |
|
"eval_micro_f1": 0.6032229965156795, |
|
"eval_micro_precision": 0.6032229965156795, |
|
"eval_micro_recall": 0.6032229965156795, |
|
"eval_runtime": 27.9989, |
|
"eval_samples_per_second": 82.003, |
|
"eval_steps_per_second": 1.286, |
|
"eval_weighted_f1": 0.502828345764972, |
|
"eval_weighted_precision": 0.5102415198384107, |
|
"eval_weighted_recall": 0.6032229965156795, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 61.67491149902344, |
|
"learning_rate": 1.4950000000000001e-05, |
|
"loss": 1.5019, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"eval_loss": 1.3270624876022339, |
|
"eval_macro_f1": 0.18095213980700245, |
|
"eval_macro_precision": 0.1639157420556758, |
|
"eval_macro_recall": 0.2060668906229992, |
|
"eval_micro_f1": 0.6872822299651568, |
|
"eval_micro_precision": 0.6872822299651568, |
|
"eval_micro_recall": 0.6872822299651568, |
|
"eval_runtime": 28.1715, |
|
"eval_samples_per_second": 81.501, |
|
"eval_steps_per_second": 1.278, |
|
"eval_weighted_f1": 0.6267031640391093, |
|
"eval_weighted_precision": 0.5821328002832489, |
|
"eval_weighted_recall": 0.6872822299651568, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9768009768009768, |
|
"grad_norm": 93.83369445800781, |
|
"learning_rate": 1.9900000000000003e-05, |
|
"loss": 1.2602, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9768009768009768, |
|
"eval_loss": 1.1570900678634644, |
|
"eval_macro_f1": 0.24796229984985535, |
|
"eval_macro_precision": 0.250395185204251, |
|
"eval_macro_recall": 0.2602306770905881, |
|
"eval_micro_f1": 0.7151567944250871, |
|
"eval_micro_precision": 0.7151567944250871, |
|
"eval_micro_recall": 0.7151567944250871, |
|
"eval_runtime": 28.097, |
|
"eval_samples_per_second": 81.717, |
|
"eval_steps_per_second": 1.281, |
|
"eval_weighted_f1": 0.6742913163869271, |
|
"eval_weighted_precision": 0.6523515088941247, |
|
"eval_weighted_recall": 0.7151567944250871, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.221001221001221, |
|
"grad_norm": 27.968883514404297, |
|
"learning_rate": 2.4900000000000002e-05, |
|
"loss": 1.0116, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.221001221001221, |
|
"eval_loss": 0.8759449124336243, |
|
"eval_macro_f1": 0.40662329686945736, |
|
"eval_macro_precision": 0.4270552853235523, |
|
"eval_macro_recall": 0.40522488609544827, |
|
"eval_micro_f1": 0.801829268292683, |
|
"eval_micro_precision": 0.801829268292683, |
|
"eval_micro_recall": 0.801829268292683, |
|
"eval_runtime": 28.1259, |
|
"eval_samples_per_second": 81.633, |
|
"eval_steps_per_second": 1.28, |
|
"eval_weighted_f1": 0.7856079074786287, |
|
"eval_weighted_precision": 0.778677751235951, |
|
"eval_weighted_recall": 0.801829268292683, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4652014652014653, |
|
"grad_norm": 40.660743713378906, |
|
"learning_rate": 2.4752635521978294e-05, |
|
"loss": 0.8539, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4652014652014653, |
|
"eval_loss": 0.7635390758514404, |
|
"eval_macro_f1": 0.47249477353778435, |
|
"eval_macro_precision": 0.49858319443807503, |
|
"eval_macro_recall": 0.49267372749042937, |
|
"eval_micro_f1": 0.8340592334494773, |
|
"eval_micro_precision": 0.8340592334494773, |
|
"eval_micro_recall": 0.8340592334494773, |
|
"eval_runtime": 28.2397, |
|
"eval_samples_per_second": 81.304, |
|
"eval_steps_per_second": 1.275, |
|
"eval_weighted_f1": 0.8260353217142836, |
|
"eval_weighted_precision": 0.8263412894566534, |
|
"eval_weighted_recall": 0.8340592334494773, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7094017094017095, |
|
"grad_norm": 69.74828338623047, |
|
"learning_rate": 2.40005092726346e-05, |
|
"loss": 0.7398, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7094017094017095, |
|
"eval_loss": 0.6827709674835205, |
|
"eval_macro_f1": 0.5156494920599783, |
|
"eval_macro_precision": 0.595919953102261, |
|
"eval_macro_recall": 0.516030034812032, |
|
"eval_micro_f1": 0.8466898954703833, |
|
"eval_micro_precision": 0.8466898954703833, |
|
"eval_micro_recall": 0.8466898954703833, |
|
"eval_runtime": 28.0593, |
|
"eval_samples_per_second": 81.827, |
|
"eval_steps_per_second": 1.283, |
|
"eval_weighted_f1": 0.8359309638776365, |
|
"eval_weighted_precision": 0.8368823626772747, |
|
"eval_weighted_recall": 0.8466898954703833, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9536019536019538, |
|
"grad_norm": 18.172765731811523, |
|
"learning_rate": 2.2774508957989417e-05, |
|
"loss": 0.6728, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9536019536019538, |
|
"eval_loss": 0.6122449040412903, |
|
"eval_macro_f1": 0.5609693508323702, |
|
"eval_macro_precision": 0.6700802281488617, |
|
"eval_macro_recall": 0.5537448671543558, |
|
"eval_micro_f1": 0.8527874564459931, |
|
"eval_micro_precision": 0.8527874564459931, |
|
"eval_micro_recall": 0.8527874564459931, |
|
"eval_runtime": 28.5179, |
|
"eval_samples_per_second": 80.511, |
|
"eval_steps_per_second": 1.262, |
|
"eval_weighted_f1": 0.8465989784626531, |
|
"eval_weighted_precision": 0.8602585363665758, |
|
"eval_weighted_recall": 0.8527874564459931, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.197802197802198, |
|
"grad_norm": 22.065692901611328, |
|
"learning_rate": 2.112515144989503e-05, |
|
"loss": 0.5593, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.197802197802198, |
|
"eval_loss": 0.6033230423927307, |
|
"eval_macro_f1": 0.6269631440779232, |
|
"eval_macro_precision": 0.6458086803108424, |
|
"eval_macro_recall": 0.6223857477581206, |
|
"eval_micro_f1": 0.8628048780487805, |
|
"eval_micro_precision": 0.8628048780487805, |
|
"eval_micro_recall": 0.8628048780487805, |
|
"eval_runtime": 28.0799, |
|
"eval_samples_per_second": 81.767, |
|
"eval_steps_per_second": 1.282, |
|
"eval_weighted_f1": 0.8604939320250599, |
|
"eval_weighted_precision": 0.8620706032280505, |
|
"eval_weighted_recall": 0.8628048780487805, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.442002442002442, |
|
"grad_norm": 46.23727035522461, |
|
"learning_rate": 1.912039789067721e-05, |
|
"loss": 0.5492, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.442002442002442, |
|
"eval_loss": 0.5785375833511353, |
|
"eval_macro_f1": 0.656816564197882, |
|
"eval_macro_precision": 0.7161193189547552, |
|
"eval_macro_recall": 0.656158547673032, |
|
"eval_micro_f1": 0.8610627177700348, |
|
"eval_micro_precision": 0.8610627177700348, |
|
"eval_micro_recall": 0.8610627177700348, |
|
"eval_runtime": 27.9572, |
|
"eval_samples_per_second": 82.126, |
|
"eval_steps_per_second": 1.288, |
|
"eval_weighted_f1": 0.8635374104852748, |
|
"eval_weighted_precision": 0.8750576393113125, |
|
"eval_weighted_recall": 0.8610627177700348, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6862026862026864, |
|
"grad_norm": 18.426353454589844, |
|
"learning_rate": 1.6842853380380934e-05, |
|
"loss": 0.5028, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.6862026862026864, |
|
"eval_loss": 0.5303468108177185, |
|
"eval_macro_f1": 0.7262289134052592, |
|
"eval_macro_precision": 0.7495966854257389, |
|
"eval_macro_recall": 0.7177312989902742, |
|
"eval_micro_f1": 0.8854529616724739, |
|
"eval_micro_precision": 0.8854529616724739, |
|
"eval_micro_recall": 0.8854529616724739, |
|
"eval_runtime": 28.1073, |
|
"eval_samples_per_second": 81.687, |
|
"eval_steps_per_second": 1.281, |
|
"eval_weighted_f1": 0.8844974105345418, |
|
"eval_weighted_precision": 0.8869278196582587, |
|
"eval_weighted_recall": 0.8854529616724739, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.9304029304029307, |
|
"grad_norm": 18.274917602539062, |
|
"learning_rate": 1.4386363265365535e-05, |
|
"loss": 0.5033, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.9304029304029307, |
|
"eval_loss": 0.49514588713645935, |
|
"eval_macro_f1": 0.7355993207563093, |
|
"eval_macro_precision": 0.7372036245463958, |
|
"eval_macro_recall": 0.7414049084020491, |
|
"eval_micro_f1": 0.8876306620209059, |
|
"eval_micro_precision": 0.8876306620209059, |
|
"eval_micro_recall": 0.8876306620209059, |
|
"eval_runtime": 27.7713, |
|
"eval_samples_per_second": 82.675, |
|
"eval_steps_per_second": 1.296, |
|
"eval_weighted_f1": 0.8867237445831763, |
|
"eval_weighted_precision": 0.8876630720638423, |
|
"eval_weighted_recall": 0.8876306620209059, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.1746031746031744, |
|
"grad_norm": 16.126319885253906, |
|
"learning_rate": 1.1852146276869743e-05, |
|
"loss": 0.4332, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.1746031746031744, |
|
"eval_loss": 0.5176035165786743, |
|
"eval_macro_f1": 0.8066235690769744, |
|
"eval_macro_precision": 0.8112408819720179, |
|
"eval_macro_recall": 0.8135936233566461, |
|
"eval_micro_f1": 0.8867595818815331, |
|
"eval_micro_precision": 0.8867595818815331, |
|
"eval_micro_recall": 0.8867595818815331, |
|
"eval_runtime": 28.2301, |
|
"eval_samples_per_second": 81.332, |
|
"eval_steps_per_second": 1.275, |
|
"eval_weighted_f1": 0.8881457405419927, |
|
"eval_weighted_precision": 0.892519346409985, |
|
"eval_weighted_recall": 0.8867595818815331, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.4188034188034186, |
|
"grad_norm": 25.683168411254883, |
|
"learning_rate": 9.344623852086093e-06, |
|
"loss": 0.4328, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.4188034188034186, |
|
"eval_loss": 0.48010551929473877, |
|
"eval_macro_f1": 0.8188574972016193, |
|
"eval_macro_precision": 0.8223473803322559, |
|
"eval_macro_recall": 0.8205261076151831, |
|
"eval_micro_f1": 0.8906794425087108, |
|
"eval_micro_precision": 0.8906794425087108, |
|
"eval_micro_recall": 0.8906794425087108, |
|
"eval_runtime": 28.1842, |
|
"eval_samples_per_second": 81.464, |
|
"eval_steps_per_second": 1.277, |
|
"eval_weighted_f1": 0.8908187701448725, |
|
"eval_weighted_precision": 0.8923647873870492, |
|
"eval_weighted_recall": 0.8906794425087108, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 32.259490966796875, |
|
"learning_rate": 6.967117488967232e-06, |
|
"loss": 0.3965, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"eval_loss": 0.48184239864349365, |
|
"eval_macro_f1": 0.8216617574200699, |
|
"eval_macro_precision": 0.8214437028036421, |
|
"eval_macro_recall": 0.8278310307484175, |
|
"eval_micro_f1": 0.89198606271777, |
|
"eval_micro_precision": 0.89198606271777, |
|
"eval_micro_recall": 0.89198606271777, |
|
"eval_runtime": 27.8631, |
|
"eval_samples_per_second": 82.403, |
|
"eval_steps_per_second": 1.292, |
|
"eval_weighted_f1": 0.8924948901640382, |
|
"eval_weighted_precision": 0.8949476957454691, |
|
"eval_weighted_recall": 0.89198606271777, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2045, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.46348379202519e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|