|
{ |
|
"best_metric": 0.9045712351799011, |
|
"best_model_checkpoint": "autotrain-45ui2-ce6i6/checkpoint-303", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 303, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04950495049504951, |
|
"grad_norm": 6.708970069885254, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 1.084, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09900990099009901, |
|
"grad_norm": 6.528580188751221, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 1.0713, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1485148514851485, |
|
"grad_norm": 8.834931373596191, |
|
"learning_rate": 2.4193548387096777e-05, |
|
"loss": 1.0738, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.19801980198019803, |
|
"grad_norm": 14.24979305267334, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 1.0781, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24752475247524752, |
|
"grad_norm": 11.004271507263184, |
|
"learning_rate": 4.032258064516129e-05, |
|
"loss": 1.0461, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.297029702970297, |
|
"grad_norm": 10.543661117553711, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 1.0577, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3465346534653465, |
|
"grad_norm": 10.67061710357666, |
|
"learning_rate": 4.9264705882352944e-05, |
|
"loss": 1.0457, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.39603960396039606, |
|
"grad_norm": 9.407144546508789, |
|
"learning_rate": 4.834558823529412e-05, |
|
"loss": 1.0281, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.44554455445544555, |
|
"grad_norm": 11.587020874023438, |
|
"learning_rate": 4.742647058823529e-05, |
|
"loss": 1.0348, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.49504950495049505, |
|
"grad_norm": 7.882496356964111, |
|
"learning_rate": 4.6507352941176475e-05, |
|
"loss": 1.0052, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5445544554455446, |
|
"grad_norm": 5.979856014251709, |
|
"learning_rate": 4.558823529411765e-05, |
|
"loss": 1.0017, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.594059405940594, |
|
"grad_norm": 10.8490629196167, |
|
"learning_rate": 4.4669117647058825e-05, |
|
"loss": 1.0315, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6435643564356436, |
|
"grad_norm": 15.452676773071289, |
|
"learning_rate": 4.375e-05, |
|
"loss": 1.0226, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.693069306930693, |
|
"grad_norm": 12.206670761108398, |
|
"learning_rate": 4.2830882352941174e-05, |
|
"loss": 1.0166, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7425742574257426, |
|
"grad_norm": 10.786033630371094, |
|
"learning_rate": 4.1911764705882356e-05, |
|
"loss": 0.99, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7920792079207921, |
|
"grad_norm": 7.7128472328186035, |
|
"learning_rate": 4.099264705882353e-05, |
|
"loss": 0.9677, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8415841584158416, |
|
"grad_norm": 10.314261436462402, |
|
"learning_rate": 4.007352941176471e-05, |
|
"loss": 1.0241, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8910891089108911, |
|
"grad_norm": 8.401986122131348, |
|
"learning_rate": 3.915441176470588e-05, |
|
"loss": 0.9343, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9405940594059405, |
|
"grad_norm": 10.88681697845459, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 0.9632, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9900990099009901, |
|
"grad_norm": 8.095929145812988, |
|
"learning_rate": 3.731617647058824e-05, |
|
"loss": 0.9717, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5024875621890548, |
|
"eval_f1_macro": 0.23045267489711932, |
|
"eval_f1_micro": 0.5024875621890548, |
|
"eval_f1_weighted": 0.34789017873594985, |
|
"eval_loss": 0.9706681966781616, |
|
"eval_precision_macro": 0.27946127946127947, |
|
"eval_precision_micro": 0.5024875621890548, |
|
"eval_precision_weighted": 0.3856475199758782, |
|
"eval_recall_macro": 0.3310708898944193, |
|
"eval_recall_micro": 0.5024875621890548, |
|
"eval_recall_weighted": 0.5024875621890548, |
|
"eval_runtime": 23.9303, |
|
"eval_samples_per_second": 8.399, |
|
"eval_steps_per_second": 0.543, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.0396039603960396, |
|
"grad_norm": 20.65790367126465, |
|
"learning_rate": 3.639705882352941e-05, |
|
"loss": 0.9095, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0891089108910892, |
|
"grad_norm": 12.614889144897461, |
|
"learning_rate": 3.5477941176470594e-05, |
|
"loss": 0.9053, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1386138613861387, |
|
"grad_norm": 17.137245178222656, |
|
"learning_rate": 3.455882352941177e-05, |
|
"loss": 0.9321, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.188118811881188, |
|
"grad_norm": 10.98409652709961, |
|
"learning_rate": 3.363970588235294e-05, |
|
"loss": 0.977, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.2376237623762376, |
|
"grad_norm": 6.829283714294434, |
|
"learning_rate": 3.272058823529412e-05, |
|
"loss": 0.9677, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2871287128712872, |
|
"grad_norm": 7.726484298706055, |
|
"learning_rate": 3.180147058823529e-05, |
|
"loss": 0.9311, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.3366336633663367, |
|
"grad_norm": 9.711603164672852, |
|
"learning_rate": 3.0882352941176475e-05, |
|
"loss": 0.9655, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.386138613861386, |
|
"grad_norm": 9.904062271118164, |
|
"learning_rate": 2.9963235294117646e-05, |
|
"loss": 0.966, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.4356435643564356, |
|
"grad_norm": 6.124065399169922, |
|
"learning_rate": 2.9044117647058828e-05, |
|
"loss": 0.9243, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.4851485148514851, |
|
"grad_norm": 6.805947303771973, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 0.9398, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.5346534653465347, |
|
"grad_norm": 16.854427337646484, |
|
"learning_rate": 2.7205882352941174e-05, |
|
"loss": 0.9272, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5841584158415842, |
|
"grad_norm": 31.26000213623047, |
|
"learning_rate": 2.6286764705882356e-05, |
|
"loss": 0.9341, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.6336633663366338, |
|
"grad_norm": 17.125900268554688, |
|
"learning_rate": 2.536764705882353e-05, |
|
"loss": 0.9858, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6831683168316833, |
|
"grad_norm": 10.775436401367188, |
|
"learning_rate": 2.4448529411764705e-05, |
|
"loss": 0.8983, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.7326732673267327, |
|
"grad_norm": 11.20962905883789, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 0.9343, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7821782178217822, |
|
"grad_norm": 20.909711837768555, |
|
"learning_rate": 2.261029411764706e-05, |
|
"loss": 0.9835, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.8316831683168315, |
|
"grad_norm": 9.628500938415527, |
|
"learning_rate": 2.1691176470588237e-05, |
|
"loss": 0.9921, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.881188118811881, |
|
"grad_norm": 10.983928680419922, |
|
"learning_rate": 2.0772058823529415e-05, |
|
"loss": 0.9928, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.9306930693069306, |
|
"grad_norm": 18.505460739135742, |
|
"learning_rate": 1.9852941176470586e-05, |
|
"loss": 0.8767, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.9801980198019802, |
|
"grad_norm": 33.27599334716797, |
|
"learning_rate": 1.8933823529411765e-05, |
|
"loss": 0.9311, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5074626865671642, |
|
"eval_f1_macro": 0.22442244224422445, |
|
"eval_f1_micro": 0.5074626865671642, |
|
"eval_f1_weighted": 0.34165804640165515, |
|
"eval_loss": 0.9343423247337341, |
|
"eval_precision_macro": 0.1691542288557214, |
|
"eval_precision_micro": 0.5074626865671642, |
|
"eval_precision_weighted": 0.2575183782579639, |
|
"eval_recall_macro": 0.3333333333333333, |
|
"eval_recall_micro": 0.5074626865671642, |
|
"eval_recall_weighted": 0.5074626865671642, |
|
"eval_runtime": 25.7511, |
|
"eval_samples_per_second": 7.806, |
|
"eval_steps_per_second": 0.505, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 2.0297029702970297, |
|
"grad_norm": 16.32452392578125, |
|
"learning_rate": 1.8014705882352943e-05, |
|
"loss": 0.9146, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 2.0792079207920793, |
|
"grad_norm": 20.738645553588867, |
|
"learning_rate": 1.7095588235294118e-05, |
|
"loss": 0.9345, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.128712871287129, |
|
"grad_norm": 11.859530448913574, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 0.892, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.1782178217821784, |
|
"grad_norm": 9.968949317932129, |
|
"learning_rate": 1.5257352941176473e-05, |
|
"loss": 0.9616, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.227722772277228, |
|
"grad_norm": 21.256311416625977, |
|
"learning_rate": 1.4338235294117647e-05, |
|
"loss": 0.9486, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.2772277227722775, |
|
"grad_norm": 8.86528491973877, |
|
"learning_rate": 1.3419117647058824e-05, |
|
"loss": 0.8652, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.3267326732673266, |
|
"grad_norm": 14.925371170043945, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.8879, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.376237623762376, |
|
"grad_norm": 19.46267318725586, |
|
"learning_rate": 1.1580882352941177e-05, |
|
"loss": 0.8791, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.4257425742574257, |
|
"grad_norm": 20.021953582763672, |
|
"learning_rate": 1.0661764705882354e-05, |
|
"loss": 0.9569, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.4752475247524752, |
|
"grad_norm": 11.353257179260254, |
|
"learning_rate": 9.74264705882353e-06, |
|
"loss": 0.8624, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.5247524752475248, |
|
"grad_norm": 14.153010368347168, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.9545, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.5742574257425743, |
|
"grad_norm": 5.752263069152832, |
|
"learning_rate": 7.904411764705882e-06, |
|
"loss": 0.8787, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.623762376237624, |
|
"grad_norm": 16.687803268432617, |
|
"learning_rate": 6.985294117647059e-06, |
|
"loss": 0.985, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.6732673267326734, |
|
"grad_norm": 20.000131607055664, |
|
"learning_rate": 6.066176470588236e-06, |
|
"loss": 0.9866, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.7227722772277225, |
|
"grad_norm": 7.210810661315918, |
|
"learning_rate": 5.147058823529412e-06, |
|
"loss": 0.8979, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.772277227722772, |
|
"grad_norm": 15.409006118774414, |
|
"learning_rate": 4.227941176470589e-06, |
|
"loss": 1.0289, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.8217821782178216, |
|
"grad_norm": 14.048492431640625, |
|
"learning_rate": 3.308823529411765e-06, |
|
"loss": 0.882, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.871287128712871, |
|
"grad_norm": 12.120644569396973, |
|
"learning_rate": 2.389705882352941e-06, |
|
"loss": 0.8982, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.9207920792079207, |
|
"grad_norm": 18.88411521911621, |
|
"learning_rate": 1.4705882352941177e-06, |
|
"loss": 0.9771, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.9702970297029703, |
|
"grad_norm": 13.562541961669922, |
|
"learning_rate": 5.514705882352942e-07, |
|
"loss": 0.8367, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5124378109452736, |
|
"eval_f1_macro": 0.23360438147930815, |
|
"eval_f1_micro": 0.5124378109452736, |
|
"eval_f1_weighted": 0.3526136606764151, |
|
"eval_loss": 0.9045712351799011, |
|
"eval_precision_macro": 0.5033333333333333, |
|
"eval_precision_micro": 0.5124378109452736, |
|
"eval_precision_weighted": 0.6468656716417911, |
|
"eval_recall_macro": 0.3376068376068376, |
|
"eval_recall_micro": 0.5124378109452736, |
|
"eval_recall_weighted": 0.5124378109452736, |
|
"eval_runtime": 22.9513, |
|
"eval_samples_per_second": 8.758, |
|
"eval_steps_per_second": 0.566, |
|
"step": 303 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 303, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.103312140783002e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|