|
{ |
|
"best_metric": 0.1198035180568695, |
|
"best_model_checkpoint": "DinoVdeauTest-large-2024_09_24-batch-size32_freeze/checkpoint-26208", |
|
"epoch": 106.0, |
|
"eval_steps": 500, |
|
"global_step": 28938, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2169092169092169, |
|
"eval_f1_macro": 0.5520222156367808, |
|
"eval_f1_micro": 0.7485599799649386, |
|
"eval_loss": 0.17755262553691864, |
|
"eval_runtime": 583.8306, |
|
"eval_samples_per_second": 4.943, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 0.2782972455024719, |
|
"learning_rate": 0.001, |
|
"loss": 0.2736, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.24047124047124047, |
|
"eval_f1_macro": 0.5698350844395493, |
|
"eval_f1_micro": 0.7693301744561706, |
|
"eval_loss": 0.15282750129699707, |
|
"eval_runtime": 589.1224, |
|
"eval_samples_per_second": 4.899, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.2494802494802495, |
|
"eval_f1_macro": 0.6216984689952817, |
|
"eval_f1_micro": 0.7773639211038686, |
|
"eval_loss": 0.14834792912006378, |
|
"eval_runtime": 584.7179, |
|
"eval_samples_per_second": 4.936, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 0.260547012090683, |
|
"learning_rate": 0.001, |
|
"loss": 0.1699, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.2553707553707554, |
|
"eval_f1_macro": 0.6272242063327677, |
|
"eval_f1_micro": 0.777158535539741, |
|
"eval_loss": 0.1467081755399704, |
|
"eval_runtime": 581.5001, |
|
"eval_samples_per_second": 4.963, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.24636174636174638, |
|
"eval_f1_macro": 0.6281234234815563, |
|
"eval_f1_micro": 0.7772219080349763, |
|
"eval_loss": 0.1453460305929184, |
|
"eval_runtime": 586.5696, |
|
"eval_samples_per_second": 4.92, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.18768520653247833, |
|
"learning_rate": 0.001, |
|
"loss": 0.1622, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.262993762993763, |
|
"eval_f1_macro": 0.6191104313765434, |
|
"eval_f1_micro": 0.7809704878364948, |
|
"eval_loss": 0.14370940625667572, |
|
"eval_runtime": 588.2849, |
|
"eval_samples_per_second": 4.906, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.001, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.2598752598752599, |
|
"eval_f1_macro": 0.6172423187752968, |
|
"eval_f1_micro": 0.7811054928907905, |
|
"eval_loss": 0.14275749027729034, |
|
"eval_runtime": 590.2652, |
|
"eval_samples_per_second": 4.889, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.001, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 0.21249784529209137, |
|
"learning_rate": 0.001, |
|
"loss": 0.1593, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.25571725571725573, |
|
"eval_f1_macro": 0.6252572102986063, |
|
"eval_f1_micro": 0.7799277605779154, |
|
"eval_loss": 0.1432911604642868, |
|
"eval_runtime": 589.0928, |
|
"eval_samples_per_second": 4.899, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.25190575190575193, |
|
"eval_f1_macro": 0.6529741984810739, |
|
"eval_f1_micro": 0.7892645815722739, |
|
"eval_loss": 0.14202255010604858, |
|
"eval_runtime": 586.5195, |
|
"eval_samples_per_second": 4.921, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.001, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 9.157509157509157, |
|
"grad_norm": 0.19528812170028687, |
|
"learning_rate": 0.001, |
|
"loss": 0.1569, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.2553707553707554, |
|
"eval_f1_macro": 0.6021759252294137, |
|
"eval_f1_micro": 0.7441173839133935, |
|
"eval_loss": 0.24619852006435394, |
|
"eval_runtime": 594.8068, |
|
"eval_samples_per_second": 4.852, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 0.16383688151836395, |
|
"learning_rate": 0.001, |
|
"loss": 0.156, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.2702702702702703, |
|
"eval_f1_macro": 0.6483450509947135, |
|
"eval_f1_micro": 0.7882727742379886, |
|
"eval_loss": 0.14001792669296265, |
|
"eval_runtime": 585.5736, |
|
"eval_samples_per_second": 4.929, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.001, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.2598752598752599, |
|
"eval_f1_macro": 0.6590290067827226, |
|
"eval_f1_micro": 0.7905884326636748, |
|
"eval_loss": 0.13999028503894806, |
|
"eval_runtime": 594.5312, |
|
"eval_samples_per_second": 4.854, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.001, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.164137601852417, |
|
"learning_rate": 0.001, |
|
"loss": 0.1547, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2643797643797644, |
|
"eval_f1_macro": 0.6466696619852873, |
|
"eval_f1_micro": 0.7875647668393783, |
|
"eval_loss": 0.13937941193580627, |
|
"eval_runtime": 616.229, |
|
"eval_samples_per_second": 4.683, |
|
"eval_steps_per_second": 0.148, |
|
"learning_rate": 0.001, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.26403326403326405, |
|
"eval_f1_macro": 0.6469191780902835, |
|
"eval_f1_micro": 0.7879462374796679, |
|
"eval_loss": 0.13986562192440033, |
|
"eval_runtime": 604.9775, |
|
"eval_samples_per_second": 4.77, |
|
"eval_steps_per_second": 0.15, |
|
"learning_rate": 0.001, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 14.652014652014651, |
|
"grad_norm": 0.17560669779777527, |
|
"learning_rate": 0.001, |
|
"loss": 0.1543, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.2512127512127512, |
|
"eval_f1_macro": 0.6413408300553888, |
|
"eval_f1_micro": 0.7881215116526777, |
|
"eval_loss": 0.13914281129837036, |
|
"eval_runtime": 609.152, |
|
"eval_samples_per_second": 4.738, |
|
"eval_steps_per_second": 0.149, |
|
"learning_rate": 0.001, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.26334026334026334, |
|
"eval_f1_macro": 0.6347762053394217, |
|
"eval_f1_micro": 0.7906518010291596, |
|
"eval_loss": 0.14137022197246552, |
|
"eval_runtime": 599.9977, |
|
"eval_samples_per_second": 4.81, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 16.483516483516482, |
|
"grad_norm": 0.16528823971748352, |
|
"learning_rate": 0.001, |
|
"loss": 0.1536, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.2616077616077616, |
|
"eval_f1_macro": 0.6445091939759507, |
|
"eval_f1_micro": 0.7903470350404312, |
|
"eval_loss": 0.14031356573104858, |
|
"eval_runtime": 579.4575, |
|
"eval_samples_per_second": 4.981, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.001, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.26022176022176025, |
|
"eval_f1_macro": 0.6463740590949281, |
|
"eval_f1_micro": 0.792909822326517, |
|
"eval_loss": 0.14039942622184753, |
|
"eval_runtime": 586.9618, |
|
"eval_samples_per_second": 4.917, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.001, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 18.315018315018314, |
|
"grad_norm": 0.17777948081493378, |
|
"learning_rate": 0.001, |
|
"loss": 0.1556, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.25848925848925847, |
|
"eval_f1_macro": 0.6526097604635809, |
|
"eval_f1_micro": 0.7935513900055008, |
|
"eval_loss": 0.14035724103450775, |
|
"eval_runtime": 574.4193, |
|
"eval_samples_per_second": 5.024, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.255024255024255, |
|
"eval_f1_macro": 0.6491113591903472, |
|
"eval_f1_micro": 0.7900462566386842, |
|
"eval_loss": 0.1390993297100067, |
|
"eval_runtime": 576.4077, |
|
"eval_samples_per_second": 5.007, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.146520146520146, |
|
"grad_norm": 0.14455388486385345, |
|
"learning_rate": 0.001, |
|
"loss": 0.1534, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.25814275814275817, |
|
"eval_f1_macro": 0.6507882842254223, |
|
"eval_f1_micro": 0.7916862276471341, |
|
"eval_loss": 0.13827534019947052, |
|
"eval_runtime": 598.284, |
|
"eval_samples_per_second": 4.824, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 0.001, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 21.978021978021978, |
|
"grad_norm": 0.1771659553050995, |
|
"learning_rate": 0.001, |
|
"loss": 0.1533, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.26853776853776856, |
|
"eval_f1_macro": 0.6489112697391156, |
|
"eval_f1_micro": 0.7935178133197935, |
|
"eval_loss": 0.13889800012111664, |
|
"eval_runtime": 594.9509, |
|
"eval_samples_per_second": 4.851, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.001, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.2560637560637561, |
|
"eval_f1_macro": 0.6549504216490796, |
|
"eval_f1_micro": 0.7842915050342152, |
|
"eval_loss": 0.1385144591331482, |
|
"eval_runtime": 583.6881, |
|
"eval_samples_per_second": 4.944, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.17443060874938965, |
|
"learning_rate": 0.001, |
|
"loss": 0.1531, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2695772695772696, |
|
"eval_f1_macro": 0.6507209930045859, |
|
"eval_f1_micro": 0.7921174652241113, |
|
"eval_loss": 0.13674499094486237, |
|
"eval_runtime": 574.2321, |
|
"eval_samples_per_second": 5.026, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.2751212751212751, |
|
"eval_f1_macro": 0.6408412361180978, |
|
"eval_f1_micro": 0.7893792922444337, |
|
"eval_loss": 0.13791973888874054, |
|
"eval_runtime": 577.6745, |
|
"eval_samples_per_second": 4.996, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 0.16064241528511047, |
|
"learning_rate": 0.001, |
|
"loss": 0.1533, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.27096327096327094, |
|
"eval_f1_macro": 0.6468889221342444, |
|
"eval_f1_micro": 0.7942844956280657, |
|
"eval_loss": 0.1375003606081009, |
|
"eval_runtime": 576.2047, |
|
"eval_samples_per_second": 5.009, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.2643797643797644, |
|
"eval_f1_macro": 0.6516252219978993, |
|
"eval_f1_micro": 0.7921208479958802, |
|
"eval_loss": 0.13920167088508606, |
|
"eval_runtime": 574.3969, |
|
"eval_samples_per_second": 5.024, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 27.47252747252747, |
|
"grad_norm": 0.15187925100326538, |
|
"learning_rate": 0.001, |
|
"loss": 0.1529, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.2636867636867637, |
|
"eval_f1_macro": 0.6424574000169906, |
|
"eval_f1_micro": 0.7917504599717599, |
|
"eval_loss": 0.13846370577812195, |
|
"eval_runtime": 583.6888, |
|
"eval_samples_per_second": 4.944, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.001, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.2626472626472626, |
|
"eval_f1_macro": 0.6496760555529415, |
|
"eval_f1_micro": 0.7883173722754258, |
|
"eval_loss": 0.14017289876937866, |
|
"eval_runtime": 569.4194, |
|
"eval_samples_per_second": 5.068, |
|
"eval_steps_per_second": 0.16, |
|
"learning_rate": 0.001, |
|
"step": 7917 |
|
}, |
|
{ |
|
"epoch": 29.304029304029303, |
|
"grad_norm": 0.1429462879896164, |
|
"learning_rate": 0.001, |
|
"loss": 0.153, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.2668052668052668, |
|
"eval_f1_macro": 0.6553418932728605, |
|
"eval_f1_micro": 0.788684508761224, |
|
"eval_loss": 0.13773277401924133, |
|
"eval_runtime": 575.6917, |
|
"eval_samples_per_second": 5.013, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.001, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.27893277893277896, |
|
"eval_f1_macro": 0.6718196714045327, |
|
"eval_f1_micro": 0.8017825006427286, |
|
"eval_loss": 0.1312580555677414, |
|
"eval_runtime": 580.0807, |
|
"eval_samples_per_second": 4.975, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 8463 |
|
}, |
|
{ |
|
"epoch": 31.135531135531135, |
|
"grad_norm": 0.1542915403842926, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1486, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.2806652806652807, |
|
"eval_f1_macro": 0.6772473958552189, |
|
"eval_f1_micro": 0.8060657118786858, |
|
"eval_loss": 0.1318267583847046, |
|
"eval_runtime": 567.7754, |
|
"eval_samples_per_second": 5.083, |
|
"eval_steps_per_second": 0.16, |
|
"learning_rate": 0.0001, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 32.967032967032964, |
|
"grad_norm": 0.1691681146621704, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1415, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.27754677754677753, |
|
"eval_f1_macro": 0.6791810095569879, |
|
"eval_f1_micro": 0.8050148746281343, |
|
"eval_loss": 0.1309264600276947, |
|
"eval_runtime": 573.2692, |
|
"eval_samples_per_second": 5.034, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.0001, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.28205128205128205, |
|
"eval_f1_macro": 0.6774879025186358, |
|
"eval_f1_micro": 0.804873906767428, |
|
"eval_loss": 0.1296369731426239, |
|
"eval_runtime": 573.8273, |
|
"eval_samples_per_second": 5.029, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.0001, |
|
"step": 9282 |
|
}, |
|
{ |
|
"epoch": 34.798534798534796, |
|
"grad_norm": 0.16409221291542053, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1395, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.28932778932778935, |
|
"eval_f1_macro": 0.6864816840760736, |
|
"eval_f1_micro": 0.8084945013272659, |
|
"eval_loss": 0.12816031277179718, |
|
"eval_runtime": 577.8146, |
|
"eval_samples_per_second": 4.995, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.2830907830907831, |
|
"eval_f1_macro": 0.6827519815644174, |
|
"eval_f1_micro": 0.8055151151836945, |
|
"eval_loss": 0.12886497378349304, |
|
"eval_runtime": 576.0485, |
|
"eval_samples_per_second": 5.01, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.0001, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 36.63003663003663, |
|
"grad_norm": 0.1629299372434616, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1387, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.2830907830907831, |
|
"eval_f1_macro": 0.677476009100308, |
|
"eval_f1_micro": 0.8054549328787556, |
|
"eval_loss": 0.12768980860710144, |
|
"eval_runtime": 578.6622, |
|
"eval_samples_per_second": 4.987, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 10101 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.288981288981289, |
|
"eval_f1_macro": 0.6882635597933275, |
|
"eval_f1_micro": 0.8084112149532711, |
|
"eval_loss": 0.1274958997964859, |
|
"eval_runtime": 583.3399, |
|
"eval_samples_per_second": 4.947, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 10374 |
|
}, |
|
{ |
|
"epoch": 38.46153846153846, |
|
"grad_norm": 0.20412367582321167, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1354, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.28794178794178793, |
|
"eval_f1_macro": 0.6853651532137502, |
|
"eval_f1_micro": 0.8099270562589084, |
|
"eval_loss": 0.12658803164958954, |
|
"eval_runtime": 588.9538, |
|
"eval_samples_per_second": 4.9, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.28863478863478864, |
|
"eval_f1_macro": 0.698146081234717, |
|
"eval_f1_micro": 0.8117343111925994, |
|
"eval_loss": 0.12820227444171906, |
|
"eval_runtime": 583.5775, |
|
"eval_samples_per_second": 4.945, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 40.29304029304029, |
|
"grad_norm": 0.1920085847377777, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1355, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.2882882882882883, |
|
"eval_f1_macro": 0.6850722645729148, |
|
"eval_f1_micro": 0.8081688455951466, |
|
"eval_loss": 0.12668322026729584, |
|
"eval_runtime": 580.9481, |
|
"eval_samples_per_second": 4.968, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 11193 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.2907137907137907, |
|
"eval_f1_macro": 0.6941693850580287, |
|
"eval_f1_micro": 0.8112331081081081, |
|
"eval_loss": 0.12620903551578522, |
|
"eval_runtime": 587.956, |
|
"eval_samples_per_second": 4.909, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 42.124542124542124, |
|
"grad_norm": 0.20102928578853607, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1347, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.2910602910602911, |
|
"eval_f1_macro": 0.69077693559172, |
|
"eval_f1_micro": 0.8107360033550011, |
|
"eval_loss": 0.12590545415878296, |
|
"eval_runtime": 582.6431, |
|
"eval_samples_per_second": 4.953, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 11739 |
|
}, |
|
{ |
|
"epoch": 43.956043956043956, |
|
"grad_norm": 0.20798929035663605, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1337, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.29313929313929316, |
|
"eval_f1_macro": 0.6925170228864211, |
|
"eval_f1_micro": 0.8114530416894076, |
|
"eval_loss": 0.12642185389995575, |
|
"eval_runtime": 580.9769, |
|
"eval_samples_per_second": 4.967, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 12012 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.2966042966042966, |
|
"eval_f1_macro": 0.6974852591826903, |
|
"eval_f1_micro": 0.8110017663386323, |
|
"eval_loss": 0.1257808804512024, |
|
"eval_runtime": 587.2129, |
|
"eval_samples_per_second": 4.915, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 45.78754578754579, |
|
"grad_norm": 0.21279653906822205, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1329, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.2986832986832987, |
|
"eval_f1_macro": 0.694112624204446, |
|
"eval_f1_micro": 0.8109046268467172, |
|
"eval_loss": 0.12542255222797394, |
|
"eval_runtime": 584.2715, |
|
"eval_samples_per_second": 4.939, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.2920997920997921, |
|
"eval_f1_macro": 0.6937033329875891, |
|
"eval_f1_micro": 0.8098117995347853, |
|
"eval_loss": 0.1256514936685562, |
|
"eval_runtime": 582.8653, |
|
"eval_samples_per_second": 4.951, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 12831 |
|
}, |
|
{ |
|
"epoch": 47.61904761904762, |
|
"grad_norm": 0.19104164838790894, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1331, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.29140679140679143, |
|
"eval_f1_macro": 0.6904792900172609, |
|
"eval_f1_micro": 0.8106740862526874, |
|
"eval_loss": 0.1253652125597, |
|
"eval_runtime": 590.5724, |
|
"eval_samples_per_second": 4.887, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.0001, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.2945252945252945, |
|
"eval_f1_macro": 0.6973960572840976, |
|
"eval_f1_micro": 0.8136526746223817, |
|
"eval_loss": 0.1252448409795761, |
|
"eval_runtime": 596.869, |
|
"eval_samples_per_second": 4.835, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 0.0001, |
|
"step": 13377 |
|
}, |
|
{ |
|
"epoch": 49.45054945054945, |
|
"grad_norm": 0.25627467036247253, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1309, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.2983367983367983, |
|
"eval_f1_macro": 0.7025806374316313, |
|
"eval_f1_micro": 0.814972104255142, |
|
"eval_loss": 0.12482810020446777, |
|
"eval_runtime": 598.3632, |
|
"eval_samples_per_second": 4.823, |
|
"eval_steps_per_second": 0.152, |
|
"learning_rate": 0.0001, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.2959112959112959, |
|
"eval_f1_macro": 0.7066639116616246, |
|
"eval_f1_micro": 0.815773054365459, |
|
"eval_loss": 0.12458823621273041, |
|
"eval_runtime": 578.8177, |
|
"eval_samples_per_second": 4.986, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 13923 |
|
}, |
|
{ |
|
"epoch": 51.282051282051285, |
|
"grad_norm": 0.2170032560825348, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1304, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.29521829521829523, |
|
"eval_f1_macro": 0.7008785276837475, |
|
"eval_f1_micro": 0.8120651620566774, |
|
"eval_loss": 0.12461517751216888, |
|
"eval_runtime": 581.8661, |
|
"eval_samples_per_second": 4.96, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 14196 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.697381421827107, |
|
"eval_f1_micro": 0.8142762173840784, |
|
"eval_loss": 0.12417320907115936, |
|
"eval_runtime": 587.3587, |
|
"eval_samples_per_second": 4.914, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 14469 |
|
}, |
|
{ |
|
"epoch": 53.11355311355312, |
|
"grad_norm": 0.18614046275615692, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1309, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.2966042966042966, |
|
"eval_f1_macro": 0.7000554609843794, |
|
"eval_f1_micro": 0.8135221716405328, |
|
"eval_loss": 0.12411519885063171, |
|
"eval_runtime": 590.4943, |
|
"eval_samples_per_second": 4.887, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.0001, |
|
"step": 14742 |
|
}, |
|
{ |
|
"epoch": 54.94505494505494, |
|
"grad_norm": 0.296142578125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1289, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.29521829521829523, |
|
"eval_f1_macro": 0.6996806217177095, |
|
"eval_f1_micro": 0.8130868668154448, |
|
"eval_loss": 0.12417341023683548, |
|
"eval_runtime": 580.598, |
|
"eval_samples_per_second": 4.971, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 15015 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.30214830214830213, |
|
"eval_f1_macro": 0.7064033143378504, |
|
"eval_f1_micro": 0.8178580303155802, |
|
"eval_loss": 0.1234845370054245, |
|
"eval_runtime": 578.4648, |
|
"eval_samples_per_second": 4.989, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 15288 |
|
}, |
|
{ |
|
"epoch": 56.776556776556774, |
|
"grad_norm": 0.23026500642299652, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1286, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.2993762993762994, |
|
"eval_f1_macro": 0.6963082116005005, |
|
"eval_f1_micro": 0.8150138550675959, |
|
"eval_loss": 0.12350637465715408, |
|
"eval_runtime": 578.6503, |
|
"eval_samples_per_second": 4.987, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 15561 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.2983367983367983, |
|
"eval_f1_macro": 0.7011763866264856, |
|
"eval_f1_micro": 0.814454598809424, |
|
"eval_loss": 0.12311282008886337, |
|
"eval_runtime": 575.7868, |
|
"eval_samples_per_second": 5.012, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.0001, |
|
"step": 15834 |
|
}, |
|
{ |
|
"epoch": 58.608058608058606, |
|
"grad_norm": 0.2271023392677307, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1282, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.30006930006930005, |
|
"eval_f1_macro": 0.7021882450681799, |
|
"eval_f1_micro": 0.8153207895067678, |
|
"eval_loss": 0.12340909987688065, |
|
"eval_runtime": 574.6752, |
|
"eval_samples_per_second": 5.022, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.0001, |
|
"step": 16107 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.2972972972972973, |
|
"eval_f1_macro": 0.6978379224258093, |
|
"eval_f1_micro": 0.8121788610981358, |
|
"eval_loss": 0.12386388331651688, |
|
"eval_runtime": 580.8249, |
|
"eval_samples_per_second": 4.969, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 60.43956043956044, |
|
"grad_norm": 0.26319652795791626, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1282, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.30145530145530147, |
|
"eval_f1_macro": 0.7114090358686158, |
|
"eval_f1_micro": 0.8157532819337362, |
|
"eval_loss": 0.12355918437242508, |
|
"eval_runtime": 580.0589, |
|
"eval_samples_per_second": 4.975, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 16653 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.3031878031878032, |
|
"eval_f1_macro": 0.7119609165997448, |
|
"eval_f1_micro": 0.8168304358780549, |
|
"eval_loss": 0.12266429513692856, |
|
"eval_runtime": 571.7568, |
|
"eval_samples_per_second": 5.048, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.0001, |
|
"step": 16926 |
|
}, |
|
{ |
|
"epoch": 62.27106227106227, |
|
"grad_norm": 0.2723998725414276, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1265, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.2948717948717949, |
|
"eval_f1_macro": 0.7077249537938423, |
|
"eval_f1_micro": 0.8136965505608501, |
|
"eval_loss": 0.12305888533592224, |
|
"eval_runtime": 579.5124, |
|
"eval_samples_per_second": 4.98, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 17199 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.30561330561330563, |
|
"eval_f1_macro": 0.708440477929098, |
|
"eval_f1_micro": 0.8172187094342783, |
|
"eval_loss": 0.12276986986398697, |
|
"eval_runtime": 580.0855, |
|
"eval_samples_per_second": 4.975, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 64.1025641025641, |
|
"grad_norm": 0.2615499794483185, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1273, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.7103116175502073, |
|
"eval_f1_micro": 0.8182651445712857, |
|
"eval_loss": 0.12323789298534393, |
|
"eval_runtime": 578.4314, |
|
"eval_samples_per_second": 4.989, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 17745 |
|
}, |
|
{ |
|
"epoch": 65.93406593406593, |
|
"grad_norm": 0.278421550989151, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1258, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.30353430353430355, |
|
"eval_f1_macro": 0.706549053618703, |
|
"eval_f1_micro": 0.8178908425822006, |
|
"eval_loss": 0.12264254689216614, |
|
"eval_runtime": 582.1432, |
|
"eval_samples_per_second": 4.958, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 18018 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.3052668052668053, |
|
"eval_f1_macro": 0.7104724530407897, |
|
"eval_f1_micro": 0.8184709429598117, |
|
"eval_loss": 0.12282951176166534, |
|
"eval_runtime": 572.3604, |
|
"eval_samples_per_second": 5.042, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.0001, |
|
"step": 18291 |
|
}, |
|
{ |
|
"epoch": 67.76556776556777, |
|
"grad_norm": 0.33604416251182556, |
|
"learning_rate": 0.0001, |
|
"loss": 0.125, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.3042273042273042, |
|
"eval_f1_macro": 0.7127667888432785, |
|
"eval_f1_micro": 0.818125541661508, |
|
"eval_loss": 0.12277437746524811, |
|
"eval_runtime": 566.9946, |
|
"eval_samples_per_second": 5.09, |
|
"eval_steps_per_second": 0.16, |
|
"learning_rate": 0.0001, |
|
"step": 18564 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.3052668052668053, |
|
"eval_f1_macro": 0.703803449192515, |
|
"eval_f1_micro": 0.8137196924896511, |
|
"eval_loss": 0.12282923609018326, |
|
"eval_runtime": 578.0179, |
|
"eval_samples_per_second": 4.993, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 18837 |
|
}, |
|
{ |
|
"epoch": 69.59706959706959, |
|
"grad_norm": 0.237099289894104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.125, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_f1_macro": 0.7079779831387449, |
|
"eval_f1_micro": 0.8154847434705434, |
|
"eval_loss": 0.1231524795293808, |
|
"eval_runtime": 578.443, |
|
"eval_samples_per_second": 4.989, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.7111485120209401, |
|
"eval_f1_micro": 0.8155765340525961, |
|
"eval_loss": 0.12310803681612015, |
|
"eval_runtime": 577.0393, |
|
"eval_samples_per_second": 5.001, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.0001, |
|
"step": 19383 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 0.31944581866264343, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1245, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.30076230076230076, |
|
"eval_f1_macro": 0.7149554589558469, |
|
"eval_f1_micro": 0.8161826422695988, |
|
"eval_loss": 0.12233748286962509, |
|
"eval_runtime": 578.4014, |
|
"eval_samples_per_second": 4.99, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 19656 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7041677932151664, |
|
"eval_f1_micro": 0.8173891171292027, |
|
"eval_loss": 0.12232980877161026, |
|
"eval_runtime": 572.1622, |
|
"eval_samples_per_second": 5.044, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 0.0001, |
|
"step": 19929 |
|
}, |
|
{ |
|
"epoch": 73.26007326007326, |
|
"grad_norm": 0.2972647249698639, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1248, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.29625779625779625, |
|
"eval_f1_macro": 0.7009378784168091, |
|
"eval_f1_micro": 0.8124655033329088, |
|
"eval_loss": 0.12370481342077255, |
|
"eval_runtime": 589.0452, |
|
"eval_samples_per_second": 4.899, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.0001, |
|
"step": 20202 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.30457380457380456, |
|
"eval_f1_macro": 0.7045397920320168, |
|
"eval_f1_micro": 0.8151614807319335, |
|
"eval_loss": 0.12251079827547073, |
|
"eval_runtime": 585.6991, |
|
"eval_samples_per_second": 4.927, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 75.0915750915751, |
|
"grad_norm": 0.34343692660331726, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1249, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.30076230076230076, |
|
"eval_f1_macro": 0.7099467053038022, |
|
"eval_f1_micro": 0.8159596808063839, |
|
"eval_loss": 0.12471619248390198, |
|
"eval_runtime": 580.0788, |
|
"eval_samples_per_second": 4.975, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 20748 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 0.34839001297950745, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1238, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.29902979902979904, |
|
"eval_f1_macro": 0.7139220834888879, |
|
"eval_f1_micro": 0.8179482930062977, |
|
"eval_loss": 0.1225149855017662, |
|
"eval_runtime": 592.1949, |
|
"eval_samples_per_second": 4.873, |
|
"eval_steps_per_second": 0.154, |
|
"learning_rate": 0.0001, |
|
"step": 21021 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.30457380457380456, |
|
"eval_f1_macro": 0.7060530834173799, |
|
"eval_f1_micro": 0.8188081009296149, |
|
"eval_loss": 0.12218895554542542, |
|
"eval_runtime": 580.5057, |
|
"eval_samples_per_second": 4.972, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 21294 |
|
}, |
|
{ |
|
"epoch": 78.75457875457876, |
|
"grad_norm": 0.3252258002758026, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1233, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_f1_macro": 0.7100736740896743, |
|
"eval_f1_micro": 0.8152018201735907, |
|
"eval_loss": 0.12460680305957794, |
|
"eval_runtime": 580.1369, |
|
"eval_samples_per_second": 4.975, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 0.0001, |
|
"step": 21567 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.3038808038808039, |
|
"eval_f1_macro": 0.710254660316209, |
|
"eval_f1_micro": 0.8179503235232728, |
|
"eval_loss": 0.12210072576999664, |
|
"eval_runtime": 588.8207, |
|
"eval_samples_per_second": 4.901, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 21840 |
|
}, |
|
{ |
|
"epoch": 80.58608058608058, |
|
"grad_norm": 0.31095778942108154, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1225, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.30180180180180183, |
|
"eval_f1_macro": 0.7156736962695093, |
|
"eval_f1_micro": 0.8184843191082273, |
|
"eval_loss": 0.12116113305091858, |
|
"eval_runtime": 583.0708, |
|
"eval_samples_per_second": 4.95, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 22113 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.30803880803880807, |
|
"eval_f1_macro": 0.7088973142030256, |
|
"eval_f1_micro": 0.8151834668916069, |
|
"eval_loss": 0.12159755080938339, |
|
"eval_runtime": 583.0652, |
|
"eval_samples_per_second": 4.95, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 22386 |
|
}, |
|
{ |
|
"epoch": 82.41758241758242, |
|
"grad_norm": 0.30886727571487427, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1216, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.30803880803880807, |
|
"eval_f1_macro": 0.7090314532601811, |
|
"eval_f1_micro": 0.8164913756836348, |
|
"eval_loss": 0.12142453342676163, |
|
"eval_runtime": 587.9863, |
|
"eval_samples_per_second": 4.908, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 22659 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.306999306999307, |
|
"eval_f1_macro": 0.7100088286352488, |
|
"eval_f1_micro": 0.8168789808917197, |
|
"eval_loss": 0.12157817929983139, |
|
"eval_runtime": 574.5086, |
|
"eval_samples_per_second": 5.023, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 0.0001, |
|
"step": 22932 |
|
}, |
|
{ |
|
"epoch": 84.24908424908425, |
|
"grad_norm": 0.33512353897094727, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1232, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.3052668052668053, |
|
"eval_f1_macro": 0.7109093140233731, |
|
"eval_f1_micro": 0.8188048474717927, |
|
"eval_loss": 0.12155645340681076, |
|
"eval_runtime": 588.285, |
|
"eval_samples_per_second": 4.906, |
|
"eval_steps_per_second": 0.155, |
|
"learning_rate": 0.0001, |
|
"step": 23205 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.306999306999307, |
|
"eval_f1_macro": 0.7176309861117636, |
|
"eval_f1_micro": 0.8190986316274009, |
|
"eval_loss": 0.12185127288103104, |
|
"eval_runtime": 596.3097, |
|
"eval_samples_per_second": 4.84, |
|
"eval_steps_per_second": 0.153, |
|
"learning_rate": 0.0001, |
|
"step": 23478 |
|
}, |
|
{ |
|
"epoch": 86.08058608058609, |
|
"grad_norm": 0.3552737832069397, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1221, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.3063063063063063, |
|
"eval_f1_macro": 0.7080074645247395, |
|
"eval_f1_micro": 0.8176601181250785, |
|
"eval_loss": 0.12196006625890732, |
|
"eval_runtime": 582.057, |
|
"eval_samples_per_second": 4.958, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 0.0001, |
|
"step": 23751 |
|
}, |
|
{ |
|
"epoch": 87.91208791208791, |
|
"grad_norm": 0.31451234221458435, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1208, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.3049203049203049, |
|
"eval_f1_macro": 0.7158455151348946, |
|
"eval_f1_micro": 0.8210709982967056, |
|
"eval_loss": 0.1209772601723671, |
|
"eval_runtime": 583.2281, |
|
"eval_samples_per_second": 4.948, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 1e-05, |
|
"step": 24024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.30734580734580735, |
|
"eval_f1_macro": 0.7312085502704033, |
|
"eval_f1_micro": 0.82409381663113, |
|
"eval_loss": 0.12103869765996933, |
|
"eval_runtime": 571.5547, |
|
"eval_samples_per_second": 5.049, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 1e-05, |
|
"step": 24297 |
|
}, |
|
{ |
|
"epoch": 89.74358974358974, |
|
"grad_norm": 0.3047947883605957, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1189, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.306999306999307, |
|
"eval_f1_macro": 0.7232165780756572, |
|
"eval_f1_micro": 0.823466447097571, |
|
"eval_loss": 0.12060839682817459, |
|
"eval_runtime": 577.3382, |
|
"eval_samples_per_second": 4.999, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 1e-05, |
|
"step": 24570 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.3087318087318087, |
|
"eval_f1_macro": 0.714800773362679, |
|
"eval_f1_micro": 0.8190853196327803, |
|
"eval_loss": 0.12036494165658951, |
|
"eval_runtime": 575.4555, |
|
"eval_samples_per_second": 5.015, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 1e-05, |
|
"step": 24843 |
|
}, |
|
{ |
|
"epoch": 91.57509157509158, |
|
"grad_norm": 0.28365448117256165, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1181, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.3087318087318087, |
|
"eval_f1_macro": 0.7131694962358415, |
|
"eval_f1_micro": 0.8194187804468336, |
|
"eval_loss": 0.12028194963932037, |
|
"eval_runtime": 576.0948, |
|
"eval_samples_per_second": 5.01, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 1e-05, |
|
"step": 25116 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.30838530838530837, |
|
"eval_f1_macro": 0.7183857334665726, |
|
"eval_f1_micro": 0.8214921910601102, |
|
"eval_loss": 0.12036142498254776, |
|
"eval_runtime": 576.2755, |
|
"eval_samples_per_second": 5.008, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 1e-05, |
|
"step": 25389 |
|
}, |
|
{ |
|
"epoch": 93.4065934065934, |
|
"grad_norm": 0.3265780806541443, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1183, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.30665280665280664, |
|
"eval_f1_macro": 0.7195089038891936, |
|
"eval_f1_micro": 0.8207247828991316, |
|
"eval_loss": 0.12012535333633423, |
|
"eval_runtime": 574.7766, |
|
"eval_samples_per_second": 5.021, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 1e-05, |
|
"step": 25662 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.30838530838530837, |
|
"eval_f1_macro": 0.7157953029268052, |
|
"eval_f1_micro": 0.8197421299397187, |
|
"eval_loss": 0.12010551244020462, |
|
"eval_runtime": 579.3324, |
|
"eval_samples_per_second": 4.982, |
|
"eval_steps_per_second": 0.157, |
|
"learning_rate": 1e-05, |
|
"step": 25935 |
|
}, |
|
{ |
|
"epoch": 95.23809523809524, |
|
"grad_norm": 0.3151456415653229, |
|
"learning_rate": 1e-05, |
|
"loss": 0.117, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.3052668052668053, |
|
"eval_f1_macro": 0.7193329750108997, |
|
"eval_f1_micro": 0.8217099503939306, |
|
"eval_loss": 0.1198035180568695, |
|
"eval_runtime": 605.2902, |
|
"eval_samples_per_second": 4.768, |
|
"eval_steps_per_second": 0.15, |
|
"learning_rate": 1e-05, |
|
"step": 26208 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.3063063063063063, |
|
"eval_f1_macro": 0.7210913273742954, |
|
"eval_f1_micro": 0.8204592028773368, |
|
"eval_loss": 0.12007978558540344, |
|
"eval_runtime": 637.4782, |
|
"eval_samples_per_second": 4.527, |
|
"eval_steps_per_second": 0.143, |
|
"learning_rate": 1e-05, |
|
"step": 26481 |
|
}, |
|
{ |
|
"epoch": 97.06959706959707, |
|
"grad_norm": 0.32921385765075684, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1176, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.30803880803880807, |
|
"eval_f1_macro": 0.7250050738866957, |
|
"eval_f1_micro": 0.8226615276223631, |
|
"eval_loss": 0.12013950198888779, |
|
"eval_runtime": 575.4419, |
|
"eval_samples_per_second": 5.015, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 1e-05, |
|
"step": 26754 |
|
}, |
|
{ |
|
"epoch": 98.9010989010989, |
|
"grad_norm": 0.37390851974487305, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1176, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.30734580734580735, |
|
"eval_f1_macro": 0.7226446473273916, |
|
"eval_f1_micro": 0.8206050968740846, |
|
"eval_loss": 0.12003140896558762, |
|
"eval_runtime": 619.3031, |
|
"eval_samples_per_second": 4.66, |
|
"eval_steps_per_second": 0.147, |
|
"learning_rate": 1e-05, |
|
"step": 27027 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.30803880803880807, |
|
"eval_f1_macro": 0.7191058080317732, |
|
"eval_f1_micro": 0.8216021451315569, |
|
"eval_loss": 0.1200241968035698, |
|
"eval_runtime": 615.3025, |
|
"eval_samples_per_second": 4.69, |
|
"eval_steps_per_second": 0.148, |
|
"learning_rate": 1e-05, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 100.73260073260073, |
|
"grad_norm": 0.39757904410362244, |
|
"learning_rate": 1e-05, |
|
"loss": 0.117, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.3097713097713098, |
|
"eval_f1_macro": 0.724185821594473, |
|
"eval_f1_micro": 0.8228338260398884, |
|
"eval_loss": 0.11989927291870117, |
|
"eval_runtime": 574.4571, |
|
"eval_samples_per_second": 5.024, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 1e-05, |
|
"step": 27573 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.30734580734580735, |
|
"eval_f1_macro": 0.7216480982679994, |
|
"eval_f1_micro": 0.8192071374463429, |
|
"eval_loss": 0.12043397128582001, |
|
"eval_runtime": 627.7673, |
|
"eval_samples_per_second": 4.597, |
|
"eval_steps_per_second": 0.145, |
|
"learning_rate": 1e-05, |
|
"step": 27846 |
|
}, |
|
{ |
|
"epoch": 102.56410256410257, |
|
"grad_norm": 0.3315370976924896, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1159, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.30665280665280664, |
|
"eval_f1_macro": 0.7231653777895258, |
|
"eval_f1_micro": 0.8221990402670561, |
|
"eval_loss": 0.1199527159333229, |
|
"eval_runtime": 574.6395, |
|
"eval_samples_per_second": 5.022, |
|
"eval_steps_per_second": 0.158, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 28119 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.31011781011781014, |
|
"eval_f1_macro": 0.7234886652066129, |
|
"eval_f1_micro": 0.8217751676454663, |
|
"eval_loss": 0.12043838202953339, |
|
"eval_runtime": 571.7276, |
|
"eval_samples_per_second": 5.048, |
|
"eval_steps_per_second": 0.159, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 28392 |
|
}, |
|
{ |
|
"epoch": 104.3956043956044, |
|
"grad_norm": 0.3368052840232849, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.1151, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_f1_macro": 0.7205886261735153, |
|
"eval_f1_micro": 0.8219212232009308, |
|
"eval_loss": 0.11985885351896286, |
|
"eval_runtime": 623.1101, |
|
"eval_samples_per_second": 4.632, |
|
"eval_steps_per_second": 0.146, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 28665 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.31046431046431044, |
|
"eval_f1_macro": 0.7270103073654017, |
|
"eval_f1_micro": 0.8228195739014648, |
|
"eval_loss": 0.11984959244728088, |
|
"eval_runtime": 582.6782, |
|
"eval_samples_per_second": 4.953, |
|
"eval_steps_per_second": 0.156, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 28938 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 28938, |
|
"total_flos": 1.3699618367216817e+21, |
|
"train_loss": 0.13672988786670365, |
|
"train_runtime": 251282.626, |
|
"train_samples_per_second": 5.203, |
|
"train_steps_per_second": 0.163 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 40950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3699618367216817e+21, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|