|
{ |
|
"best_metric": 0.3162839710712433, |
|
"best_model_checkpoint": "autotrain-ytgys-osuer/checkpoint-1326", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1326, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.049773755656108594, |
|
"grad_norm": 9.081209182739258, |
|
"learning_rate": 8.270676691729324e-06, |
|
"loss": 0.6296, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09954751131221719, |
|
"grad_norm": 1.96213698387146, |
|
"learning_rate": 1.6541353383458648e-05, |
|
"loss": 0.4877, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1493212669683258, |
|
"grad_norm": 2.1971828937530518, |
|
"learning_rate": 2.4812030075187968e-05, |
|
"loss": 0.5465, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19909502262443438, |
|
"grad_norm": 5.049612998962402, |
|
"learning_rate": 3.3082706766917295e-05, |
|
"loss": 0.5186, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.248868778280543, |
|
"grad_norm": 0.6077613830566406, |
|
"learning_rate": 4.135338345864662e-05, |
|
"loss": 0.2962, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2986425339366516, |
|
"grad_norm": 5.124961853027344, |
|
"learning_rate": 4.9624060150375936e-05, |
|
"loss": 0.5159, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.34841628959276016, |
|
"grad_norm": 1.2383034229278564, |
|
"learning_rate": 4.9119865884325234e-05, |
|
"loss": 0.5115, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.39819004524886875, |
|
"grad_norm": 4.597978115081787, |
|
"learning_rate": 4.8197820620285e-05, |
|
"loss": 0.4524, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4479638009049774, |
|
"grad_norm": 4.115572452545166, |
|
"learning_rate": 4.727577535624476e-05, |
|
"loss": 0.4217, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.497737556561086, |
|
"grad_norm": 1.8618402481079102, |
|
"learning_rate": 4.635373009220453e-05, |
|
"loss": 0.325, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5475113122171946, |
|
"grad_norm": 4.649389743804932, |
|
"learning_rate": 4.5431684828164296e-05, |
|
"loss": 0.4603, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5972850678733032, |
|
"grad_norm": 2.3018507957458496, |
|
"learning_rate": 4.450963956412406e-05, |
|
"loss": 0.4817, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6470588235294118, |
|
"grad_norm": 1.6560252904891968, |
|
"learning_rate": 4.358759430008382e-05, |
|
"loss": 0.4588, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6968325791855203, |
|
"grad_norm": 1.248030185699463, |
|
"learning_rate": 4.266554903604359e-05, |
|
"loss": 0.4533, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.746606334841629, |
|
"grad_norm": 7.337639808654785, |
|
"learning_rate": 4.174350377200336e-05, |
|
"loss": 0.5258, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7963800904977375, |
|
"grad_norm": 3.778733253479004, |
|
"learning_rate": 4.0821458507963125e-05, |
|
"loss": 0.4715, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.8461538461538461, |
|
"grad_norm": 2.055952787399292, |
|
"learning_rate": 3.9899413243922885e-05, |
|
"loss": 0.5279, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8959276018099548, |
|
"grad_norm": 2.640718460083008, |
|
"learning_rate": 3.897736797988265e-05, |
|
"loss": 0.4142, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.9457013574660633, |
|
"grad_norm": 2.906071424484253, |
|
"learning_rate": 3.805532271584242e-05, |
|
"loss": 0.618, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.995475113122172, |
|
"grad_norm": 7.490321159362793, |
|
"learning_rate": 3.713327745180218e-05, |
|
"loss": 0.4642, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8301245753114382, |
|
"eval_auc": 0.6658117326057298, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.468678742647171, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 155.3554, |
|
"eval_samples_per_second": 5.684, |
|
"eval_steps_per_second": 0.36, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.0452488687782806, |
|
"grad_norm": 1.7145849466323853, |
|
"learning_rate": 3.6211232187761947e-05, |
|
"loss": 0.5339, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.0950226244343892, |
|
"grad_norm": 7.355587959289551, |
|
"learning_rate": 3.5289186923721714e-05, |
|
"loss": 0.4688, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.1447963800904977, |
|
"grad_norm": 8.240862846374512, |
|
"learning_rate": 3.436714165968148e-05, |
|
"loss": 0.413, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.1945701357466063, |
|
"grad_norm": 4.566345691680908, |
|
"learning_rate": 3.344509639564124e-05, |
|
"loss": 0.4214, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.244343891402715, |
|
"grad_norm": 7.886547088623047, |
|
"learning_rate": 3.252305113160101e-05, |
|
"loss": 0.5584, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2941176470588236, |
|
"grad_norm": 4.851104259490967, |
|
"learning_rate": 3.1601005867560775e-05, |
|
"loss": 0.5674, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.3438914027149322, |
|
"grad_norm": 2.095370054244995, |
|
"learning_rate": 3.067896060352054e-05, |
|
"loss": 0.4522, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.3936651583710407, |
|
"grad_norm": 1.348547339439392, |
|
"learning_rate": 2.9756915339480303e-05, |
|
"loss": 0.4711, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.4434389140271493, |
|
"grad_norm": 1.514244556427002, |
|
"learning_rate": 2.8834870075440066e-05, |
|
"loss": 0.3652, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.493212669683258, |
|
"grad_norm": 2.2231717109680176, |
|
"learning_rate": 2.7912824811399834e-05, |
|
"loss": 0.4884, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.5429864253393664, |
|
"grad_norm": 4.4252777099609375, |
|
"learning_rate": 2.69907795473596e-05, |
|
"loss": 0.4066, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.5927601809954752, |
|
"grad_norm": 2.0143589973449707, |
|
"learning_rate": 2.606873428331936e-05, |
|
"loss": 0.4817, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.6425339366515836, |
|
"grad_norm": 1.2555855512619019, |
|
"learning_rate": 2.5146689019279128e-05, |
|
"loss": 0.4505, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.6923076923076923, |
|
"grad_norm": 1.5008816719055176, |
|
"learning_rate": 2.4224643755238895e-05, |
|
"loss": 0.4987, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.742081447963801, |
|
"grad_norm": 1.0928298234939575, |
|
"learning_rate": 2.330259849119866e-05, |
|
"loss": 0.4491, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.7918552036199094, |
|
"grad_norm": 2.131342887878418, |
|
"learning_rate": 2.2380553227158423e-05, |
|
"loss": 0.5099, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.8416289592760182, |
|
"grad_norm": 2.0319790840148926, |
|
"learning_rate": 2.145850796311819e-05, |
|
"loss": 0.4312, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.8914027149321266, |
|
"grad_norm": 3.668442726135254, |
|
"learning_rate": 2.0536462699077953e-05, |
|
"loss": 0.414, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.9411764705882353, |
|
"grad_norm": 1.9720642566680908, |
|
"learning_rate": 1.961441743503772e-05, |
|
"loss": 0.3894, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.990950226244344, |
|
"grad_norm": 4.421242713928223, |
|
"learning_rate": 1.8692372170997484e-05, |
|
"loss": 0.4847, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8301245753114382, |
|
"eval_auc": 0.7693769895407002, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.45319485664367676, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 156.7435, |
|
"eval_samples_per_second": 5.633, |
|
"eval_steps_per_second": 0.357, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.0407239819004523, |
|
"grad_norm": 4.845108985900879, |
|
"learning_rate": 1.777032690695725e-05, |
|
"loss": 0.426, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.090497737556561, |
|
"grad_norm": 4.742054462432861, |
|
"learning_rate": 1.6848281642917015e-05, |
|
"loss": 0.4253, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.1402714932126696, |
|
"grad_norm": 27.354101181030273, |
|
"learning_rate": 1.5926236378876782e-05, |
|
"loss": 0.4168, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.1900452488687785, |
|
"grad_norm": 9.501100540161133, |
|
"learning_rate": 1.5004191114836546e-05, |
|
"loss": 0.4337, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.239819004524887, |
|
"grad_norm": 2.986358165740967, |
|
"learning_rate": 1.4082145850796313e-05, |
|
"loss": 0.3713, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.2895927601809953, |
|
"grad_norm": 13.785974502563477, |
|
"learning_rate": 1.3160100586756077e-05, |
|
"loss": 0.3517, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.339366515837104, |
|
"grad_norm": 6.597299098968506, |
|
"learning_rate": 1.2238055322715842e-05, |
|
"loss": 0.362, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.3891402714932126, |
|
"grad_norm": 1.9302808046340942, |
|
"learning_rate": 1.1316010058675607e-05, |
|
"loss": 0.3733, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.4389140271493215, |
|
"grad_norm": 1.2917982339859009, |
|
"learning_rate": 1.0393964794635373e-05, |
|
"loss": 0.3184, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.48868778280543, |
|
"grad_norm": 2.898386240005493, |
|
"learning_rate": 9.471919530595138e-06, |
|
"loss": 0.4959, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.5384615384615383, |
|
"grad_norm": 5.883040904998779, |
|
"learning_rate": 8.549874266554904e-06, |
|
"loss": 0.3014, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.588235294117647, |
|
"grad_norm": 10.006911277770996, |
|
"learning_rate": 7.627829002514669e-06, |
|
"loss": 0.3047, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.6380090497737556, |
|
"grad_norm": 3.732818365097046, |
|
"learning_rate": 6.7057837384744345e-06, |
|
"loss": 0.382, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.6877828054298645, |
|
"grad_norm": 4.408326148986816, |
|
"learning_rate": 5.7837384744342e-06, |
|
"loss": 0.3173, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.737556561085973, |
|
"grad_norm": 7.149359226226807, |
|
"learning_rate": 4.861693210393965e-06, |
|
"loss": 0.4387, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.7873303167420813, |
|
"grad_norm": 9.934762001037598, |
|
"learning_rate": 3.939647946353731e-06, |
|
"loss": 0.2751, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.83710407239819, |
|
"grad_norm": 5.656704902648926, |
|
"learning_rate": 3.0176026823134957e-06, |
|
"loss": 0.3792, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.8868778280542986, |
|
"grad_norm": 7.494544506072998, |
|
"learning_rate": 2.095557418273261e-06, |
|
"loss": 0.3882, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.9366515837104075, |
|
"grad_norm": 12.555413246154785, |
|
"learning_rate": 1.173512154233026e-06, |
|
"loss": 0.3562, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.986425339366516, |
|
"grad_norm": 8.668551445007324, |
|
"learning_rate": 2.5146689019279127e-07, |
|
"loss": 0.2171, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8754246885617214, |
|
"eval_auc": 0.8692314688494771, |
|
"eval_f1": 0.6180555555555556, |
|
"eval_loss": 0.3162839710712433, |
|
"eval_precision": 0.644927536231884, |
|
"eval_recall": 0.5933333333333334, |
|
"eval_runtime": 161.1074, |
|
"eval_samples_per_second": 5.481, |
|
"eval_steps_per_second": 0.348, |
|
"step": 1326 |
|
} |
|
], |
|
"logging_steps": 22, |
|
"max_steps": 1326, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 696981185648640.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|