{ "best_metric": 0.8924948901640382, "best_model_checkpoint": "/content/drive/MyDrive/PUBH 8885 CB/nuc_arg_long_readv4/checkpoint-1500", "epoch": 3.663003663003663, "eval_steps": 100, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2442002442002442, "grad_norm": 25.231292724609375, "learning_rate": 5e-06, "loss": 2.8722, "step": 100 }, { "epoch": 0.2442002442002442, "eval_loss": 2.381986141204834, "eval_macro_f1": 0.07916135790210403, "eval_macro_precision": 0.06590895945398809, "eval_macro_recall": 0.10263326510757667, "eval_micro_f1": 0.4847560975609756, "eval_micro_precision": 0.4847560975609756, "eval_micro_recall": 0.4847560975609756, "eval_runtime": 27.9503, "eval_samples_per_second": 82.146, "eval_steps_per_second": 1.288, "eval_weighted_f1": 0.3658094645034367, "eval_weighted_precision": 0.3000991244760369, "eval_weighted_recall": 0.4847560975609756, "step": 100 }, { "epoch": 0.4884004884004884, "grad_norm": 74.01921844482422, "learning_rate": 9.950000000000001e-06, "loss": 1.9467, "step": 200 }, { "epoch": 0.4884004884004884, "eval_loss": 1.6188805103302002, "eval_macro_f1": 0.12352787740442463, "eval_macro_precision": 0.1729632869793354, "eval_macro_recall": 0.14519904411264478, "eval_micro_f1": 0.6032229965156795, "eval_micro_precision": 0.6032229965156795, "eval_micro_recall": 0.6032229965156795, "eval_runtime": 27.9989, "eval_samples_per_second": 82.003, "eval_steps_per_second": 1.286, "eval_weighted_f1": 0.502828345764972, "eval_weighted_precision": 0.5102415198384107, "eval_weighted_recall": 0.6032229965156795, "step": 200 }, { "epoch": 0.7326007326007326, "grad_norm": 61.67491149902344, "learning_rate": 1.4950000000000001e-05, "loss": 1.5019, "step": 300 }, { "epoch": 0.7326007326007326, "eval_loss": 1.3270624876022339, "eval_macro_f1": 0.18095213980700245, "eval_macro_precision": 0.1639157420556758, "eval_macro_recall": 0.2060668906229992, "eval_micro_f1": 0.6872822299651568, "eval_micro_precision": 0.6872822299651568, "eval_micro_recall": 0.6872822299651568, "eval_runtime": 28.1715, "eval_samples_per_second": 81.501, "eval_steps_per_second": 1.278, "eval_weighted_f1": 0.6267031640391093, "eval_weighted_precision": 0.5821328002832489, "eval_weighted_recall": 0.6872822299651568, "step": 300 }, { "epoch": 0.9768009768009768, "grad_norm": 93.83369445800781, "learning_rate": 1.9900000000000003e-05, "loss": 1.2602, "step": 400 }, { "epoch": 0.9768009768009768, "eval_loss": 1.1570900678634644, "eval_macro_f1": 0.24796229984985535, "eval_macro_precision": 0.250395185204251, "eval_macro_recall": 0.2602306770905881, "eval_micro_f1": 0.7151567944250871, "eval_micro_precision": 0.7151567944250871, "eval_micro_recall": 0.7151567944250871, "eval_runtime": 28.097, "eval_samples_per_second": 81.717, "eval_steps_per_second": 1.281, "eval_weighted_f1": 0.6742913163869271, "eval_weighted_precision": 0.6523515088941247, "eval_weighted_recall": 0.7151567944250871, "step": 400 }, { "epoch": 1.221001221001221, "grad_norm": 27.968883514404297, "learning_rate": 2.4900000000000002e-05, "loss": 1.0116, "step": 500 }, { "epoch": 1.221001221001221, "eval_loss": 0.8759449124336243, "eval_macro_f1": 0.40662329686945736, "eval_macro_precision": 0.4270552853235523, "eval_macro_recall": 0.40522488609544827, "eval_micro_f1": 0.801829268292683, "eval_micro_precision": 0.801829268292683, "eval_micro_recall": 0.801829268292683, "eval_runtime": 28.1259, "eval_samples_per_second": 81.633, "eval_steps_per_second": 1.28, "eval_weighted_f1": 0.7856079074786287, "eval_weighted_precision": 0.778677751235951, "eval_weighted_recall": 0.801829268292683, "step": 500 }, { "epoch": 1.4652014652014653, "grad_norm": 40.660743713378906, "learning_rate": 2.4752635521978294e-05, "loss": 0.8539, "step": 600 }, { "epoch": 1.4652014652014653, "eval_loss": 0.7635390758514404, "eval_macro_f1": 0.47249477353778435, "eval_macro_precision": 0.49858319443807503, "eval_macro_recall": 0.49267372749042937, "eval_micro_f1": 0.8340592334494773, "eval_micro_precision": 0.8340592334494773, "eval_micro_recall": 0.8340592334494773, "eval_runtime": 28.2397, "eval_samples_per_second": 81.304, "eval_steps_per_second": 1.275, "eval_weighted_f1": 0.8260353217142836, "eval_weighted_precision": 0.8263412894566534, "eval_weighted_recall": 0.8340592334494773, "step": 600 }, { "epoch": 1.7094017094017095, "grad_norm": 69.74828338623047, "learning_rate": 2.40005092726346e-05, "loss": 0.7398, "step": 700 }, { "epoch": 1.7094017094017095, "eval_loss": 0.6827709674835205, "eval_macro_f1": 0.5156494920599783, "eval_macro_precision": 0.595919953102261, "eval_macro_recall": 0.516030034812032, "eval_micro_f1": 0.8466898954703833, "eval_micro_precision": 0.8466898954703833, "eval_micro_recall": 0.8466898954703833, "eval_runtime": 28.0593, "eval_samples_per_second": 81.827, "eval_steps_per_second": 1.283, "eval_weighted_f1": 0.8359309638776365, "eval_weighted_precision": 0.8368823626772747, "eval_weighted_recall": 0.8466898954703833, "step": 700 }, { "epoch": 1.9536019536019538, "grad_norm": 18.172765731811523, "learning_rate": 2.2774508957989417e-05, "loss": 0.6728, "step": 800 }, { "epoch": 1.9536019536019538, "eval_loss": 0.6122449040412903, "eval_macro_f1": 0.5609693508323702, "eval_macro_precision": 0.6700802281488617, "eval_macro_recall": 0.5537448671543558, "eval_micro_f1": 0.8527874564459931, "eval_micro_precision": 0.8527874564459931, "eval_micro_recall": 0.8527874564459931, "eval_runtime": 28.5179, "eval_samples_per_second": 80.511, "eval_steps_per_second": 1.262, "eval_weighted_f1": 0.8465989784626531, "eval_weighted_precision": 0.8602585363665758, "eval_weighted_recall": 0.8527874564459931, "step": 800 }, { "epoch": 2.197802197802198, "grad_norm": 22.065692901611328, "learning_rate": 2.112515144989503e-05, "loss": 0.5593, "step": 900 }, { "epoch": 2.197802197802198, "eval_loss": 0.6033230423927307, "eval_macro_f1": 0.6269631440779232, "eval_macro_precision": 0.6458086803108424, "eval_macro_recall": 0.6223857477581206, "eval_micro_f1": 0.8628048780487805, "eval_micro_precision": 0.8628048780487805, "eval_micro_recall": 0.8628048780487805, "eval_runtime": 28.0799, "eval_samples_per_second": 81.767, "eval_steps_per_second": 1.282, "eval_weighted_f1": 0.8604939320250599, "eval_weighted_precision": 0.8620706032280505, "eval_weighted_recall": 0.8628048780487805, "step": 900 }, { "epoch": 2.442002442002442, "grad_norm": 46.23727035522461, "learning_rate": 1.912039789067721e-05, "loss": 0.5492, "step": 1000 }, { "epoch": 2.442002442002442, "eval_loss": 0.5785375833511353, "eval_macro_f1": 0.656816564197882, "eval_macro_precision": 0.7161193189547552, "eval_macro_recall": 0.656158547673032, "eval_micro_f1": 0.8610627177700348, "eval_micro_precision": 0.8610627177700348, "eval_micro_recall": 0.8610627177700348, "eval_runtime": 27.9572, "eval_samples_per_second": 82.126, "eval_steps_per_second": 1.288, "eval_weighted_f1": 0.8635374104852748, "eval_weighted_precision": 0.8750576393113125, "eval_weighted_recall": 0.8610627177700348, "step": 1000 }, { "epoch": 2.6862026862026864, "grad_norm": 18.426353454589844, "learning_rate": 1.6842853380380934e-05, "loss": 0.5028, "step": 1100 }, { "epoch": 2.6862026862026864, "eval_loss": 0.5303468108177185, "eval_macro_f1": 0.7262289134052592, "eval_macro_precision": 0.7495966854257389, "eval_macro_recall": 0.7177312989902742, "eval_micro_f1": 0.8854529616724739, "eval_micro_precision": 0.8854529616724739, "eval_micro_recall": 0.8854529616724739, "eval_runtime": 28.1073, "eval_samples_per_second": 81.687, "eval_steps_per_second": 1.281, "eval_weighted_f1": 0.8844974105345418, "eval_weighted_precision": 0.8869278196582587, "eval_weighted_recall": 0.8854529616724739, "step": 1100 }, { "epoch": 2.9304029304029307, "grad_norm": 18.274917602539062, "learning_rate": 1.4386363265365535e-05, "loss": 0.5033, "step": 1200 }, { "epoch": 2.9304029304029307, "eval_loss": 0.49514588713645935, "eval_macro_f1": 0.7355993207563093, "eval_macro_precision": 0.7372036245463958, "eval_macro_recall": 0.7414049084020491, "eval_micro_f1": 0.8876306620209059, "eval_micro_precision": 0.8876306620209059, "eval_micro_recall": 0.8876306620209059, "eval_runtime": 27.7713, "eval_samples_per_second": 82.675, "eval_steps_per_second": 1.296, "eval_weighted_f1": 0.8867237445831763, "eval_weighted_precision": 0.8876630720638423, "eval_weighted_recall": 0.8876306620209059, "step": 1200 }, { "epoch": 3.1746031746031744, "grad_norm": 16.126319885253906, "learning_rate": 1.1852146276869743e-05, "loss": 0.4332, "step": 1300 }, { "epoch": 3.1746031746031744, "eval_loss": 0.5176035165786743, "eval_macro_f1": 0.8066235690769744, "eval_macro_precision": 0.8112408819720179, "eval_macro_recall": 0.8135936233566461, "eval_micro_f1": 0.8867595818815331, "eval_micro_precision": 0.8867595818815331, "eval_micro_recall": 0.8867595818815331, "eval_runtime": 28.2301, "eval_samples_per_second": 81.332, "eval_steps_per_second": 1.275, "eval_weighted_f1": 0.8881457405419927, "eval_weighted_precision": 0.892519346409985, "eval_weighted_recall": 0.8867595818815331, "step": 1300 }, { "epoch": 3.4188034188034186, "grad_norm": 25.683168411254883, "learning_rate": 9.344623852086093e-06, "loss": 0.4328, "step": 1400 }, { "epoch": 3.4188034188034186, "eval_loss": 0.48010551929473877, "eval_macro_f1": 0.8188574972016193, "eval_macro_precision": 0.8223473803322559, "eval_macro_recall": 0.8205261076151831, "eval_micro_f1": 0.8906794425087108, "eval_micro_precision": 0.8906794425087108, "eval_micro_recall": 0.8906794425087108, "eval_runtime": 28.1842, "eval_samples_per_second": 81.464, "eval_steps_per_second": 1.277, "eval_weighted_f1": 0.8908187701448725, "eval_weighted_precision": 0.8923647873870492, "eval_weighted_recall": 0.8906794425087108, "step": 1400 }, { "epoch": 3.663003663003663, "grad_norm": 32.259490966796875, "learning_rate": 6.967117488967232e-06, "loss": 0.3965, "step": 1500 }, { "epoch": 3.663003663003663, "eval_loss": 0.48184239864349365, "eval_macro_f1": 0.8216617574200699, "eval_macro_precision": 0.8214437028036421, "eval_macro_recall": 0.8278310307484175, "eval_micro_f1": 0.89198606271777, "eval_micro_precision": 0.89198606271777, "eval_micro_recall": 0.89198606271777, "eval_runtime": 27.8631, "eval_samples_per_second": 82.403, "eval_steps_per_second": 1.292, "eval_weighted_f1": 0.8924948901640382, "eval_weighted_precision": 0.8949476957454691, "eval_weighted_recall": 0.89198606271777, "step": 1500 } ], "logging_steps": 100, "max_steps": 2045, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.46348379202519e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }