gryzaq1337's picture
Upload folder using huggingface_hub
67bb943 verified
{
"best_metric": 0.9045712351799011,
"best_model_checkpoint": "autotrain-45ui2-ce6i6/checkpoint-303",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 303,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04950495049504951,
"grad_norm": 6.708970069885254,
"learning_rate": 8.064516129032258e-06,
"loss": 1.084,
"step": 5
},
{
"epoch": 0.09900990099009901,
"grad_norm": 6.528580188751221,
"learning_rate": 1.6129032258064517e-05,
"loss": 1.0713,
"step": 10
},
{
"epoch": 0.1485148514851485,
"grad_norm": 8.834931373596191,
"learning_rate": 2.4193548387096777e-05,
"loss": 1.0738,
"step": 15
},
{
"epoch": 0.19801980198019803,
"grad_norm": 14.24979305267334,
"learning_rate": 3.2258064516129034e-05,
"loss": 1.0781,
"step": 20
},
{
"epoch": 0.24752475247524752,
"grad_norm": 11.004271507263184,
"learning_rate": 4.032258064516129e-05,
"loss": 1.0461,
"step": 25
},
{
"epoch": 0.297029702970297,
"grad_norm": 10.543661117553711,
"learning_rate": 4.8387096774193554e-05,
"loss": 1.0577,
"step": 30
},
{
"epoch": 0.3465346534653465,
"grad_norm": 10.67061710357666,
"learning_rate": 4.9264705882352944e-05,
"loss": 1.0457,
"step": 35
},
{
"epoch": 0.39603960396039606,
"grad_norm": 9.407144546508789,
"learning_rate": 4.834558823529412e-05,
"loss": 1.0281,
"step": 40
},
{
"epoch": 0.44554455445544555,
"grad_norm": 11.587020874023438,
"learning_rate": 4.742647058823529e-05,
"loss": 1.0348,
"step": 45
},
{
"epoch": 0.49504950495049505,
"grad_norm": 7.882496356964111,
"learning_rate": 4.6507352941176475e-05,
"loss": 1.0052,
"step": 50
},
{
"epoch": 0.5445544554455446,
"grad_norm": 5.979856014251709,
"learning_rate": 4.558823529411765e-05,
"loss": 1.0017,
"step": 55
},
{
"epoch": 0.594059405940594,
"grad_norm": 10.8490629196167,
"learning_rate": 4.4669117647058825e-05,
"loss": 1.0315,
"step": 60
},
{
"epoch": 0.6435643564356436,
"grad_norm": 15.452676773071289,
"learning_rate": 4.375e-05,
"loss": 1.0226,
"step": 65
},
{
"epoch": 0.693069306930693,
"grad_norm": 12.206670761108398,
"learning_rate": 4.2830882352941174e-05,
"loss": 1.0166,
"step": 70
},
{
"epoch": 0.7425742574257426,
"grad_norm": 10.786033630371094,
"learning_rate": 4.1911764705882356e-05,
"loss": 0.99,
"step": 75
},
{
"epoch": 0.7920792079207921,
"grad_norm": 7.7128472328186035,
"learning_rate": 4.099264705882353e-05,
"loss": 0.9677,
"step": 80
},
{
"epoch": 0.8415841584158416,
"grad_norm": 10.314261436462402,
"learning_rate": 4.007352941176471e-05,
"loss": 1.0241,
"step": 85
},
{
"epoch": 0.8910891089108911,
"grad_norm": 8.401986122131348,
"learning_rate": 3.915441176470588e-05,
"loss": 0.9343,
"step": 90
},
{
"epoch": 0.9405940594059405,
"grad_norm": 10.88681697845459,
"learning_rate": 3.8235294117647055e-05,
"loss": 0.9632,
"step": 95
},
{
"epoch": 0.9900990099009901,
"grad_norm": 8.095929145812988,
"learning_rate": 3.731617647058824e-05,
"loss": 0.9717,
"step": 100
},
{
"epoch": 1.0,
"eval_accuracy": 0.5024875621890548,
"eval_f1_macro": 0.23045267489711932,
"eval_f1_micro": 0.5024875621890548,
"eval_f1_weighted": 0.34789017873594985,
"eval_loss": 0.9706681966781616,
"eval_precision_macro": 0.27946127946127947,
"eval_precision_micro": 0.5024875621890548,
"eval_precision_weighted": 0.3856475199758782,
"eval_recall_macro": 0.3310708898944193,
"eval_recall_micro": 0.5024875621890548,
"eval_recall_weighted": 0.5024875621890548,
"eval_runtime": 23.9303,
"eval_samples_per_second": 8.399,
"eval_steps_per_second": 0.543,
"step": 101
},
{
"epoch": 1.0396039603960396,
"grad_norm": 20.65790367126465,
"learning_rate": 3.639705882352941e-05,
"loss": 0.9095,
"step": 105
},
{
"epoch": 1.0891089108910892,
"grad_norm": 12.614889144897461,
"learning_rate": 3.5477941176470594e-05,
"loss": 0.9053,
"step": 110
},
{
"epoch": 1.1386138613861387,
"grad_norm": 17.137245178222656,
"learning_rate": 3.455882352941177e-05,
"loss": 0.9321,
"step": 115
},
{
"epoch": 1.188118811881188,
"grad_norm": 10.98409652709961,
"learning_rate": 3.363970588235294e-05,
"loss": 0.977,
"step": 120
},
{
"epoch": 1.2376237623762376,
"grad_norm": 6.829283714294434,
"learning_rate": 3.272058823529412e-05,
"loss": 0.9677,
"step": 125
},
{
"epoch": 1.2871287128712872,
"grad_norm": 7.726484298706055,
"learning_rate": 3.180147058823529e-05,
"loss": 0.9311,
"step": 130
},
{
"epoch": 1.3366336633663367,
"grad_norm": 9.711603164672852,
"learning_rate": 3.0882352941176475e-05,
"loss": 0.9655,
"step": 135
},
{
"epoch": 1.386138613861386,
"grad_norm": 9.904062271118164,
"learning_rate": 2.9963235294117646e-05,
"loss": 0.966,
"step": 140
},
{
"epoch": 1.4356435643564356,
"grad_norm": 6.124065399169922,
"learning_rate": 2.9044117647058828e-05,
"loss": 0.9243,
"step": 145
},
{
"epoch": 1.4851485148514851,
"grad_norm": 6.805947303771973,
"learning_rate": 2.8125000000000003e-05,
"loss": 0.9398,
"step": 150
},
{
"epoch": 1.5346534653465347,
"grad_norm": 16.854427337646484,
"learning_rate": 2.7205882352941174e-05,
"loss": 0.9272,
"step": 155
},
{
"epoch": 1.5841584158415842,
"grad_norm": 31.26000213623047,
"learning_rate": 2.6286764705882356e-05,
"loss": 0.9341,
"step": 160
},
{
"epoch": 1.6336633663366338,
"grad_norm": 17.125900268554688,
"learning_rate": 2.536764705882353e-05,
"loss": 0.9858,
"step": 165
},
{
"epoch": 1.6831683168316833,
"grad_norm": 10.775436401367188,
"learning_rate": 2.4448529411764705e-05,
"loss": 0.8983,
"step": 170
},
{
"epoch": 1.7326732673267327,
"grad_norm": 11.20962905883789,
"learning_rate": 2.3529411764705884e-05,
"loss": 0.9343,
"step": 175
},
{
"epoch": 1.7821782178217822,
"grad_norm": 20.909711837768555,
"learning_rate": 2.261029411764706e-05,
"loss": 0.9835,
"step": 180
},
{
"epoch": 1.8316831683168315,
"grad_norm": 9.628500938415527,
"learning_rate": 2.1691176470588237e-05,
"loss": 0.9921,
"step": 185
},
{
"epoch": 1.881188118811881,
"grad_norm": 10.983928680419922,
"learning_rate": 2.0772058823529415e-05,
"loss": 0.9928,
"step": 190
},
{
"epoch": 1.9306930693069306,
"grad_norm": 18.505460739135742,
"learning_rate": 1.9852941176470586e-05,
"loss": 0.8767,
"step": 195
},
{
"epoch": 1.9801980198019802,
"grad_norm": 33.27599334716797,
"learning_rate": 1.8933823529411765e-05,
"loss": 0.9311,
"step": 200
},
{
"epoch": 2.0,
"eval_accuracy": 0.5074626865671642,
"eval_f1_macro": 0.22442244224422445,
"eval_f1_micro": 0.5074626865671642,
"eval_f1_weighted": 0.34165804640165515,
"eval_loss": 0.9343423247337341,
"eval_precision_macro": 0.1691542288557214,
"eval_precision_micro": 0.5074626865671642,
"eval_precision_weighted": 0.2575183782579639,
"eval_recall_macro": 0.3333333333333333,
"eval_recall_micro": 0.5074626865671642,
"eval_recall_weighted": 0.5074626865671642,
"eval_runtime": 25.7511,
"eval_samples_per_second": 7.806,
"eval_steps_per_second": 0.505,
"step": 202
},
{
"epoch": 2.0297029702970297,
"grad_norm": 16.32452392578125,
"learning_rate": 1.8014705882352943e-05,
"loss": 0.9146,
"step": 205
},
{
"epoch": 2.0792079207920793,
"grad_norm": 20.738645553588867,
"learning_rate": 1.7095588235294118e-05,
"loss": 0.9345,
"step": 210
},
{
"epoch": 2.128712871287129,
"grad_norm": 11.859530448913574,
"learning_rate": 1.6176470588235296e-05,
"loss": 0.892,
"step": 215
},
{
"epoch": 2.1782178217821784,
"grad_norm": 9.968949317932129,
"learning_rate": 1.5257352941176473e-05,
"loss": 0.9616,
"step": 220
},
{
"epoch": 2.227722772277228,
"grad_norm": 21.256311416625977,
"learning_rate": 1.4338235294117647e-05,
"loss": 0.9486,
"step": 225
},
{
"epoch": 2.2772277227722775,
"grad_norm": 8.86528491973877,
"learning_rate": 1.3419117647058824e-05,
"loss": 0.8652,
"step": 230
},
{
"epoch": 2.3267326732673266,
"grad_norm": 14.925371170043945,
"learning_rate": 1.25e-05,
"loss": 0.8879,
"step": 235
},
{
"epoch": 2.376237623762376,
"grad_norm": 19.46267318725586,
"learning_rate": 1.1580882352941177e-05,
"loss": 0.8791,
"step": 240
},
{
"epoch": 2.4257425742574257,
"grad_norm": 20.021953582763672,
"learning_rate": 1.0661764705882354e-05,
"loss": 0.9569,
"step": 245
},
{
"epoch": 2.4752475247524752,
"grad_norm": 11.353257179260254,
"learning_rate": 9.74264705882353e-06,
"loss": 0.8624,
"step": 250
},
{
"epoch": 2.5247524752475248,
"grad_norm": 14.153010368347168,
"learning_rate": 8.823529411764707e-06,
"loss": 0.9545,
"step": 255
},
{
"epoch": 2.5742574257425743,
"grad_norm": 5.752263069152832,
"learning_rate": 7.904411764705882e-06,
"loss": 0.8787,
"step": 260
},
{
"epoch": 2.623762376237624,
"grad_norm": 16.687803268432617,
"learning_rate": 6.985294117647059e-06,
"loss": 0.985,
"step": 265
},
{
"epoch": 2.6732673267326734,
"grad_norm": 20.000131607055664,
"learning_rate": 6.066176470588236e-06,
"loss": 0.9866,
"step": 270
},
{
"epoch": 2.7227722772277225,
"grad_norm": 7.210810661315918,
"learning_rate": 5.147058823529412e-06,
"loss": 0.8979,
"step": 275
},
{
"epoch": 2.772277227722772,
"grad_norm": 15.409006118774414,
"learning_rate": 4.227941176470589e-06,
"loss": 1.0289,
"step": 280
},
{
"epoch": 2.8217821782178216,
"grad_norm": 14.048492431640625,
"learning_rate": 3.308823529411765e-06,
"loss": 0.882,
"step": 285
},
{
"epoch": 2.871287128712871,
"grad_norm": 12.120644569396973,
"learning_rate": 2.389705882352941e-06,
"loss": 0.8982,
"step": 290
},
{
"epoch": 2.9207920792079207,
"grad_norm": 18.88411521911621,
"learning_rate": 1.4705882352941177e-06,
"loss": 0.9771,
"step": 295
},
{
"epoch": 2.9702970297029703,
"grad_norm": 13.562541961669922,
"learning_rate": 5.514705882352942e-07,
"loss": 0.8367,
"step": 300
},
{
"epoch": 3.0,
"eval_accuracy": 0.5124378109452736,
"eval_f1_macro": 0.23360438147930815,
"eval_f1_micro": 0.5124378109452736,
"eval_f1_weighted": 0.3526136606764151,
"eval_loss": 0.9045712351799011,
"eval_precision_macro": 0.5033333333333333,
"eval_precision_micro": 0.5124378109452736,
"eval_precision_weighted": 0.6468656716417911,
"eval_recall_macro": 0.3376068376068376,
"eval_recall_micro": 0.5124378109452736,
"eval_recall_weighted": 0.5124378109452736,
"eval_runtime": 22.9513,
"eval_samples_per_second": 8.758,
"eval_steps_per_second": 0.566,
"step": 303
}
],
"logging_steps": 5,
"max_steps": 303,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.103312140783002e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}