ducdatit2002's picture
Upload folder using huggingface_hub
e09333c verified
{
"best_metric": 0.8705702821418305,
"best_model_checkpoint": "./phobert_results_v2/checkpoint-3470",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 3470,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02881844380403458,
"grad_norm": 4.723288059234619,
"learning_rate": 1.994236311239193e-05,
"loss": 1.9047,
"step": 50
},
{
"epoch": 0.05763688760806916,
"grad_norm": 32.87104034423828,
"learning_rate": 1.9884726224783863e-05,
"loss": 1.7269,
"step": 100
},
{
"epoch": 0.08645533141210375,
"grad_norm": 5.206538677215576,
"learning_rate": 1.9827089337175795e-05,
"loss": 1.6865,
"step": 150
},
{
"epoch": 0.11527377521613832,
"grad_norm": 52.5133056640625,
"learning_rate": 1.9769452449567724e-05,
"loss": 1.633,
"step": 200
},
{
"epoch": 0.1440922190201729,
"grad_norm": 12.493169784545898,
"learning_rate": 1.9711815561959656e-05,
"loss": 1.553,
"step": 250
},
{
"epoch": 0.1729106628242075,
"grad_norm": 7.675703048706055,
"learning_rate": 1.9654178674351588e-05,
"loss": 1.4131,
"step": 300
},
{
"epoch": 0.2017291066282421,
"grad_norm": 5.984454154968262,
"learning_rate": 1.9596541786743517e-05,
"loss": 1.3606,
"step": 350
},
{
"epoch": 0.23054755043227665,
"grad_norm": 9.403727531433105,
"learning_rate": 1.953890489913545e-05,
"loss": 1.2202,
"step": 400
},
{
"epoch": 0.25936599423631124,
"grad_norm": 7.890481472015381,
"learning_rate": 1.9481268011527378e-05,
"loss": 1.186,
"step": 450
},
{
"epoch": 0.2881844380403458,
"grad_norm": 12.102771759033203,
"learning_rate": 1.942363112391931e-05,
"loss": 1.0865,
"step": 500
},
{
"epoch": 0.3170028818443804,
"grad_norm": 17.669885635375977,
"learning_rate": 1.936599423631124e-05,
"loss": 1.124,
"step": 550
},
{
"epoch": 0.345821325648415,
"grad_norm": 10.14146614074707,
"learning_rate": 1.930835734870317e-05,
"loss": 1.1307,
"step": 600
},
{
"epoch": 0.3746397694524496,
"grad_norm": 9.363780975341797,
"learning_rate": 1.9250720461095104e-05,
"loss": 1.0582,
"step": 650
},
{
"epoch": 0.4034582132564842,
"grad_norm": 22.122907638549805,
"learning_rate": 1.9193083573487033e-05,
"loss": 1.007,
"step": 700
},
{
"epoch": 0.4322766570605187,
"grad_norm": 22.921249389648438,
"learning_rate": 1.9135446685878965e-05,
"loss": 1.0046,
"step": 750
},
{
"epoch": 0.4610951008645533,
"grad_norm": 13.880660057067871,
"learning_rate": 1.9077809798270894e-05,
"loss": 0.9786,
"step": 800
},
{
"epoch": 0.4899135446685879,
"grad_norm": 13.75207805633545,
"learning_rate": 1.9020172910662826e-05,
"loss": 0.9166,
"step": 850
},
{
"epoch": 0.5187319884726225,
"grad_norm": 15.701948165893555,
"learning_rate": 1.8962536023054755e-05,
"loss": 0.8942,
"step": 900
},
{
"epoch": 0.547550432276657,
"grad_norm": 13.106974601745605,
"learning_rate": 1.8904899135446687e-05,
"loss": 0.8905,
"step": 950
},
{
"epoch": 0.5763688760806917,
"grad_norm": 13.854475021362305,
"learning_rate": 1.884726224783862e-05,
"loss": 0.9089,
"step": 1000
},
{
"epoch": 0.6051873198847262,
"grad_norm": 14.22427749633789,
"learning_rate": 1.878962536023055e-05,
"loss": 0.8688,
"step": 1050
},
{
"epoch": 0.6340057636887608,
"grad_norm": 14.088136672973633,
"learning_rate": 1.873198847262248e-05,
"loss": 0.859,
"step": 1100
},
{
"epoch": 0.6628242074927954,
"grad_norm": 15.746428489685059,
"learning_rate": 1.867435158501441e-05,
"loss": 0.8356,
"step": 1150
},
{
"epoch": 0.69164265129683,
"grad_norm": 10.937832832336426,
"learning_rate": 1.861671469740634e-05,
"loss": 0.8699,
"step": 1200
},
{
"epoch": 0.7204610951008645,
"grad_norm": 14.396600723266602,
"learning_rate": 1.855907780979827e-05,
"loss": 0.7789,
"step": 1250
},
{
"epoch": 0.7492795389048992,
"grad_norm": 11.801300048828125,
"learning_rate": 1.8501440922190203e-05,
"loss": 0.8713,
"step": 1300
},
{
"epoch": 0.7780979827089337,
"grad_norm": 10.435481071472168,
"learning_rate": 1.8443804034582135e-05,
"loss": 0.7961,
"step": 1350
},
{
"epoch": 0.8069164265129684,
"grad_norm": 11.2293062210083,
"learning_rate": 1.8386167146974067e-05,
"loss": 0.8397,
"step": 1400
},
{
"epoch": 0.8357348703170029,
"grad_norm": 12.809613227844238,
"learning_rate": 1.8328530259365996e-05,
"loss": 0.8297,
"step": 1450
},
{
"epoch": 0.8645533141210374,
"grad_norm": 14.791847229003906,
"learning_rate": 1.8270893371757928e-05,
"loss": 0.7709,
"step": 1500
},
{
"epoch": 0.8933717579250721,
"grad_norm": 14.587478637695312,
"learning_rate": 1.8213256484149857e-05,
"loss": 0.7552,
"step": 1550
},
{
"epoch": 0.9221902017291066,
"grad_norm": 16.79636001586914,
"learning_rate": 1.815561959654179e-05,
"loss": 0.7937,
"step": 1600
},
{
"epoch": 0.9510086455331412,
"grad_norm": 8.510680198669434,
"learning_rate": 1.8097982708933718e-05,
"loss": 0.7181,
"step": 1650
},
{
"epoch": 0.9798270893371758,
"grad_norm": 20.466522216796875,
"learning_rate": 1.804034582132565e-05,
"loss": 0.712,
"step": 1700
},
{
"epoch": 1.0,
"eval_accuracy": 0.7955036748811067,
"eval_f1_macro": 0.795530535937033,
"eval_f1_weighted": 0.795536244762841,
"eval_loss": 0.6264312267303467,
"eval_precision_macro": 0.7978076489526655,
"eval_precision_weighted": 0.797819457255313,
"eval_recall_macro": 0.7955041514180622,
"eval_recall_weighted": 0.7955036748811067,
"eval_runtime": 29.7702,
"eval_samples_per_second": 233.086,
"eval_steps_per_second": 14.578,
"step": 1735
},
{
"epoch": 1.0086455331412103,
"grad_norm": 17.28911018371582,
"learning_rate": 1.7982708933717582e-05,
"loss": 0.6512,
"step": 1750
},
{
"epoch": 1.037463976945245,
"grad_norm": 28.79306411743164,
"learning_rate": 1.792507204610951e-05,
"loss": 0.6036,
"step": 1800
},
{
"epoch": 1.0662824207492796,
"grad_norm": 13.444112777709961,
"learning_rate": 1.7867435158501444e-05,
"loss": 0.552,
"step": 1850
},
{
"epoch": 1.0951008645533142,
"grad_norm": 22.068620681762695,
"learning_rate": 1.7809798270893372e-05,
"loss": 0.577,
"step": 1900
},
{
"epoch": 1.1239193083573487,
"grad_norm": 13.942420959472656,
"learning_rate": 1.7752161383285305e-05,
"loss": 0.5573,
"step": 1950
},
{
"epoch": 1.1527377521613833,
"grad_norm": 13.652227401733398,
"learning_rate": 1.7694524495677234e-05,
"loss": 0.5401,
"step": 2000
},
{
"epoch": 1.181556195965418,
"grad_norm": 22.252243041992188,
"learning_rate": 1.7636887608069166e-05,
"loss": 0.558,
"step": 2050
},
{
"epoch": 1.2103746397694524,
"grad_norm": 17.13372802734375,
"learning_rate": 1.7579250720461095e-05,
"loss": 0.514,
"step": 2100
},
{
"epoch": 1.239193083573487,
"grad_norm": 17.164276123046875,
"learning_rate": 1.7521613832853027e-05,
"loss": 0.5137,
"step": 2150
},
{
"epoch": 1.2680115273775217,
"grad_norm": 14.449616432189941,
"learning_rate": 1.746397694524496e-05,
"loss": 0.5,
"step": 2200
},
{
"epoch": 1.2968299711815563,
"grad_norm": 11.61601734161377,
"learning_rate": 1.7406340057636888e-05,
"loss": 0.5071,
"step": 2250
},
{
"epoch": 1.3256484149855907,
"grad_norm": 20.6407527923584,
"learning_rate": 1.734870317002882e-05,
"loss": 0.4803,
"step": 2300
},
{
"epoch": 1.3544668587896254,
"grad_norm": 18.60149383544922,
"learning_rate": 1.729106628242075e-05,
"loss": 0.4967,
"step": 2350
},
{
"epoch": 1.38328530259366,
"grad_norm": 22.12944793701172,
"learning_rate": 1.723342939481268e-05,
"loss": 0.4652,
"step": 2400
},
{
"epoch": 1.4121037463976944,
"grad_norm": 18.460689544677734,
"learning_rate": 1.717579250720461e-05,
"loss": 0.4417,
"step": 2450
},
{
"epoch": 1.440922190201729,
"grad_norm": 19.452363967895508,
"learning_rate": 1.7118155619596542e-05,
"loss": 0.5082,
"step": 2500
},
{
"epoch": 1.4697406340057637,
"grad_norm": 41.46109390258789,
"learning_rate": 1.7060518731988475e-05,
"loss": 0.463,
"step": 2550
},
{
"epoch": 1.4985590778097984,
"grad_norm": 25.584379196166992,
"learning_rate": 1.7002881844380407e-05,
"loss": 0.4788,
"step": 2600
},
{
"epoch": 1.527377521613833,
"grad_norm": 16.92909049987793,
"learning_rate": 1.6945244956772336e-05,
"loss": 0.4752,
"step": 2650
},
{
"epoch": 1.5561959654178674,
"grad_norm": 10.342531204223633,
"learning_rate": 1.6887608069164268e-05,
"loss": 0.4848,
"step": 2700
},
{
"epoch": 1.585014409221902,
"grad_norm": 15.596243858337402,
"learning_rate": 1.6829971181556197e-05,
"loss": 0.5158,
"step": 2750
},
{
"epoch": 1.6138328530259365,
"grad_norm": 17.031354904174805,
"learning_rate": 1.6772334293948126e-05,
"loss": 0.4577,
"step": 2800
},
{
"epoch": 1.6426512968299711,
"grad_norm": 13.550045013427734,
"learning_rate": 1.6714697406340058e-05,
"loss": 0.4947,
"step": 2850
},
{
"epoch": 1.6714697406340058,
"grad_norm": 10.102880477905273,
"learning_rate": 1.665706051873199e-05,
"loss": 0.4681,
"step": 2900
},
{
"epoch": 1.7002881844380404,
"grad_norm": 10.968811988830566,
"learning_rate": 1.6599423631123922e-05,
"loss": 0.452,
"step": 2950
},
{
"epoch": 1.729106628242075,
"grad_norm": 4.670314311981201,
"learning_rate": 1.654178674351585e-05,
"loss": 0.4346,
"step": 3000
},
{
"epoch": 1.7579250720461095,
"grad_norm": 28.008899688720703,
"learning_rate": 1.6484149855907783e-05,
"loss": 0.4404,
"step": 3050
},
{
"epoch": 1.7867435158501441,
"grad_norm": 27.77347183227539,
"learning_rate": 1.6426512968299712e-05,
"loss": 0.4547,
"step": 3100
},
{
"epoch": 1.8155619596541785,
"grad_norm": 21.949289321899414,
"learning_rate": 1.6368876080691644e-05,
"loss": 0.4924,
"step": 3150
},
{
"epoch": 1.8443804034582132,
"grad_norm": 2.8766281604766846,
"learning_rate": 1.6311239193083573e-05,
"loss": 0.3776,
"step": 3200
},
{
"epoch": 1.8731988472622478,
"grad_norm": 38.812625885009766,
"learning_rate": 1.6253602305475506e-05,
"loss": 0.4176,
"step": 3250
},
{
"epoch": 1.9020172910662825,
"grad_norm": 24.132482528686523,
"learning_rate": 1.6195965417867438e-05,
"loss": 0.4407,
"step": 3300
},
{
"epoch": 1.9308357348703171,
"grad_norm": 19.72426414489746,
"learning_rate": 1.613832853025937e-05,
"loss": 0.4503,
"step": 3350
},
{
"epoch": 1.9596541786743515,
"grad_norm": 16.077899932861328,
"learning_rate": 1.60806916426513e-05,
"loss": 0.4244,
"step": 3400
},
{
"epoch": 1.9884726224783862,
"grad_norm": 21.258529663085938,
"learning_rate": 1.6023054755043228e-05,
"loss": 0.4161,
"step": 3450
},
{
"epoch": 2.0,
"eval_accuracy": 0.87101887880098,
"eval_f1_macro": 0.8705600204306078,
"eval_f1_weighted": 0.8705702821418305,
"eval_loss": 0.43536442518234253,
"eval_precision_macro": 0.8749786652431168,
"eval_precision_weighted": 0.8749977247467932,
"eval_recall_macro": 0.8710178763247105,
"eval_recall_weighted": 0.87101887880098,
"eval_runtime": 29.9836,
"eval_samples_per_second": 231.426,
"eval_steps_per_second": 14.475,
"step": 3470
}
],
"logging_steps": 50,
"max_steps": 17350,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 7302448933401600.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}