Dranapo's picture
Upload folder using huggingface_hub
fbe2b43 verified
raw
history blame
12.5 kB
{
"best_metric": 0.3162839710712433,
"best_model_checkpoint": "autotrain-ytgys-osuer/checkpoint-1326",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1326,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.049773755656108594,
"grad_norm": 9.081209182739258,
"learning_rate": 8.270676691729324e-06,
"loss": 0.6296,
"step": 22
},
{
"epoch": 0.09954751131221719,
"grad_norm": 1.96213698387146,
"learning_rate": 1.6541353383458648e-05,
"loss": 0.4877,
"step": 44
},
{
"epoch": 0.1493212669683258,
"grad_norm": 2.1971828937530518,
"learning_rate": 2.4812030075187968e-05,
"loss": 0.5465,
"step": 66
},
{
"epoch": 0.19909502262443438,
"grad_norm": 5.049612998962402,
"learning_rate": 3.3082706766917295e-05,
"loss": 0.5186,
"step": 88
},
{
"epoch": 0.248868778280543,
"grad_norm": 0.6077613830566406,
"learning_rate": 4.135338345864662e-05,
"loss": 0.2962,
"step": 110
},
{
"epoch": 0.2986425339366516,
"grad_norm": 5.124961853027344,
"learning_rate": 4.9624060150375936e-05,
"loss": 0.5159,
"step": 132
},
{
"epoch": 0.34841628959276016,
"grad_norm": 1.2383034229278564,
"learning_rate": 4.9119865884325234e-05,
"loss": 0.5115,
"step": 154
},
{
"epoch": 0.39819004524886875,
"grad_norm": 4.597978115081787,
"learning_rate": 4.8197820620285e-05,
"loss": 0.4524,
"step": 176
},
{
"epoch": 0.4479638009049774,
"grad_norm": 4.115572452545166,
"learning_rate": 4.727577535624476e-05,
"loss": 0.4217,
"step": 198
},
{
"epoch": 0.497737556561086,
"grad_norm": 1.8618402481079102,
"learning_rate": 4.635373009220453e-05,
"loss": 0.325,
"step": 220
},
{
"epoch": 0.5475113122171946,
"grad_norm": 4.649389743804932,
"learning_rate": 4.5431684828164296e-05,
"loss": 0.4603,
"step": 242
},
{
"epoch": 0.5972850678733032,
"grad_norm": 2.3018507957458496,
"learning_rate": 4.450963956412406e-05,
"loss": 0.4817,
"step": 264
},
{
"epoch": 0.6470588235294118,
"grad_norm": 1.6560252904891968,
"learning_rate": 4.358759430008382e-05,
"loss": 0.4588,
"step": 286
},
{
"epoch": 0.6968325791855203,
"grad_norm": 1.248030185699463,
"learning_rate": 4.266554903604359e-05,
"loss": 0.4533,
"step": 308
},
{
"epoch": 0.746606334841629,
"grad_norm": 7.337639808654785,
"learning_rate": 4.174350377200336e-05,
"loss": 0.5258,
"step": 330
},
{
"epoch": 0.7963800904977375,
"grad_norm": 3.778733253479004,
"learning_rate": 4.0821458507963125e-05,
"loss": 0.4715,
"step": 352
},
{
"epoch": 0.8461538461538461,
"grad_norm": 2.055952787399292,
"learning_rate": 3.9899413243922885e-05,
"loss": 0.5279,
"step": 374
},
{
"epoch": 0.8959276018099548,
"grad_norm": 2.640718460083008,
"learning_rate": 3.897736797988265e-05,
"loss": 0.4142,
"step": 396
},
{
"epoch": 0.9457013574660633,
"grad_norm": 2.906071424484253,
"learning_rate": 3.805532271584242e-05,
"loss": 0.618,
"step": 418
},
{
"epoch": 0.995475113122172,
"grad_norm": 7.490321159362793,
"learning_rate": 3.713327745180218e-05,
"loss": 0.4642,
"step": 440
},
{
"epoch": 1.0,
"eval_accuracy": 0.8301245753114382,
"eval_auc": 0.6658117326057298,
"eval_f1": 0.0,
"eval_loss": 0.468678742647171,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 155.3554,
"eval_samples_per_second": 5.684,
"eval_steps_per_second": 0.36,
"step": 442
},
{
"epoch": 1.0452488687782806,
"grad_norm": 1.7145849466323853,
"learning_rate": 3.6211232187761947e-05,
"loss": 0.5339,
"step": 462
},
{
"epoch": 1.0950226244343892,
"grad_norm": 7.355587959289551,
"learning_rate": 3.5289186923721714e-05,
"loss": 0.4688,
"step": 484
},
{
"epoch": 1.1447963800904977,
"grad_norm": 8.240862846374512,
"learning_rate": 3.436714165968148e-05,
"loss": 0.413,
"step": 506
},
{
"epoch": 1.1945701357466063,
"grad_norm": 4.566345691680908,
"learning_rate": 3.344509639564124e-05,
"loss": 0.4214,
"step": 528
},
{
"epoch": 1.244343891402715,
"grad_norm": 7.886547088623047,
"learning_rate": 3.252305113160101e-05,
"loss": 0.5584,
"step": 550
},
{
"epoch": 1.2941176470588236,
"grad_norm": 4.851104259490967,
"learning_rate": 3.1601005867560775e-05,
"loss": 0.5674,
"step": 572
},
{
"epoch": 1.3438914027149322,
"grad_norm": 2.095370054244995,
"learning_rate": 3.067896060352054e-05,
"loss": 0.4522,
"step": 594
},
{
"epoch": 1.3936651583710407,
"grad_norm": 1.348547339439392,
"learning_rate": 2.9756915339480303e-05,
"loss": 0.4711,
"step": 616
},
{
"epoch": 1.4434389140271493,
"grad_norm": 1.514244556427002,
"learning_rate": 2.8834870075440066e-05,
"loss": 0.3652,
"step": 638
},
{
"epoch": 1.493212669683258,
"grad_norm": 2.2231717109680176,
"learning_rate": 2.7912824811399834e-05,
"loss": 0.4884,
"step": 660
},
{
"epoch": 1.5429864253393664,
"grad_norm": 4.4252777099609375,
"learning_rate": 2.69907795473596e-05,
"loss": 0.4066,
"step": 682
},
{
"epoch": 1.5927601809954752,
"grad_norm": 2.0143589973449707,
"learning_rate": 2.606873428331936e-05,
"loss": 0.4817,
"step": 704
},
{
"epoch": 1.6425339366515836,
"grad_norm": 1.2555855512619019,
"learning_rate": 2.5146689019279128e-05,
"loss": 0.4505,
"step": 726
},
{
"epoch": 1.6923076923076923,
"grad_norm": 1.5008816719055176,
"learning_rate": 2.4224643755238895e-05,
"loss": 0.4987,
"step": 748
},
{
"epoch": 1.742081447963801,
"grad_norm": 1.0928298234939575,
"learning_rate": 2.330259849119866e-05,
"loss": 0.4491,
"step": 770
},
{
"epoch": 1.7918552036199094,
"grad_norm": 2.131342887878418,
"learning_rate": 2.2380553227158423e-05,
"loss": 0.5099,
"step": 792
},
{
"epoch": 1.8416289592760182,
"grad_norm": 2.0319790840148926,
"learning_rate": 2.145850796311819e-05,
"loss": 0.4312,
"step": 814
},
{
"epoch": 1.8914027149321266,
"grad_norm": 3.668442726135254,
"learning_rate": 2.0536462699077953e-05,
"loss": 0.414,
"step": 836
},
{
"epoch": 1.9411764705882353,
"grad_norm": 1.9720642566680908,
"learning_rate": 1.961441743503772e-05,
"loss": 0.3894,
"step": 858
},
{
"epoch": 1.990950226244344,
"grad_norm": 4.421242713928223,
"learning_rate": 1.8692372170997484e-05,
"loss": 0.4847,
"step": 880
},
{
"epoch": 2.0,
"eval_accuracy": 0.8301245753114382,
"eval_auc": 0.7693769895407002,
"eval_f1": 0.0,
"eval_loss": 0.45319485664367676,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 156.7435,
"eval_samples_per_second": 5.633,
"eval_steps_per_second": 0.357,
"step": 884
},
{
"epoch": 2.0407239819004523,
"grad_norm": 4.845108985900879,
"learning_rate": 1.777032690695725e-05,
"loss": 0.426,
"step": 902
},
{
"epoch": 2.090497737556561,
"grad_norm": 4.742054462432861,
"learning_rate": 1.6848281642917015e-05,
"loss": 0.4253,
"step": 924
},
{
"epoch": 2.1402714932126696,
"grad_norm": 27.354101181030273,
"learning_rate": 1.5926236378876782e-05,
"loss": 0.4168,
"step": 946
},
{
"epoch": 2.1900452488687785,
"grad_norm": 9.501100540161133,
"learning_rate": 1.5004191114836546e-05,
"loss": 0.4337,
"step": 968
},
{
"epoch": 2.239819004524887,
"grad_norm": 2.986358165740967,
"learning_rate": 1.4082145850796313e-05,
"loss": 0.3713,
"step": 990
},
{
"epoch": 2.2895927601809953,
"grad_norm": 13.785974502563477,
"learning_rate": 1.3160100586756077e-05,
"loss": 0.3517,
"step": 1012
},
{
"epoch": 2.339366515837104,
"grad_norm": 6.597299098968506,
"learning_rate": 1.2238055322715842e-05,
"loss": 0.362,
"step": 1034
},
{
"epoch": 2.3891402714932126,
"grad_norm": 1.9302808046340942,
"learning_rate": 1.1316010058675607e-05,
"loss": 0.3733,
"step": 1056
},
{
"epoch": 2.4389140271493215,
"grad_norm": 1.2917982339859009,
"learning_rate": 1.0393964794635373e-05,
"loss": 0.3184,
"step": 1078
},
{
"epoch": 2.48868778280543,
"grad_norm": 2.898386240005493,
"learning_rate": 9.471919530595138e-06,
"loss": 0.4959,
"step": 1100
},
{
"epoch": 2.5384615384615383,
"grad_norm": 5.883040904998779,
"learning_rate": 8.549874266554904e-06,
"loss": 0.3014,
"step": 1122
},
{
"epoch": 2.588235294117647,
"grad_norm": 10.006911277770996,
"learning_rate": 7.627829002514669e-06,
"loss": 0.3047,
"step": 1144
},
{
"epoch": 2.6380090497737556,
"grad_norm": 3.732818365097046,
"learning_rate": 6.7057837384744345e-06,
"loss": 0.382,
"step": 1166
},
{
"epoch": 2.6877828054298645,
"grad_norm": 4.408326148986816,
"learning_rate": 5.7837384744342e-06,
"loss": 0.3173,
"step": 1188
},
{
"epoch": 2.737556561085973,
"grad_norm": 7.149359226226807,
"learning_rate": 4.861693210393965e-06,
"loss": 0.4387,
"step": 1210
},
{
"epoch": 2.7873303167420813,
"grad_norm": 9.934762001037598,
"learning_rate": 3.939647946353731e-06,
"loss": 0.2751,
"step": 1232
},
{
"epoch": 2.83710407239819,
"grad_norm": 5.656704902648926,
"learning_rate": 3.0176026823134957e-06,
"loss": 0.3792,
"step": 1254
},
{
"epoch": 2.8868778280542986,
"grad_norm": 7.494544506072998,
"learning_rate": 2.095557418273261e-06,
"loss": 0.3882,
"step": 1276
},
{
"epoch": 2.9366515837104075,
"grad_norm": 12.555413246154785,
"learning_rate": 1.173512154233026e-06,
"loss": 0.3562,
"step": 1298
},
{
"epoch": 2.986425339366516,
"grad_norm": 8.668551445007324,
"learning_rate": 2.5146689019279127e-07,
"loss": 0.2171,
"step": 1320
},
{
"epoch": 3.0,
"eval_accuracy": 0.8754246885617214,
"eval_auc": 0.8692314688494771,
"eval_f1": 0.6180555555555556,
"eval_loss": 0.3162839710712433,
"eval_precision": 0.644927536231884,
"eval_recall": 0.5933333333333334,
"eval_runtime": 161.1074,
"eval_samples_per_second": 5.481,
"eval_steps_per_second": 0.348,
"step": 1326
}
],
"logging_steps": 22,
"max_steps": 1326,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 696981185648640.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}