|
{ |
|
"best_metric": 0.8169879527109274, |
|
"best_model_checkpoint": "/home/bram/shares/predict/trained/dutch/hebban-reviews/xlm-roberta-base/checkpoint-11000", |
|
"epoch": 3.9447731755424065, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.794166666666667e-05, |
|
"loss": 0.913, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.7665803747534516, |
|
"eval_f1": 0.7649545454029377, |
|
"eval_loss": 0.7253644466400146, |
|
"eval_precision": 0.7641565688780922, |
|
"eval_recall": 0.7665803747534516, |
|
"eval_runtime": 24.3431, |
|
"eval_samples_per_second": 666.472, |
|
"eval_steps_per_second": 27.77, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.5858333333333334e-05, |
|
"loss": 0.7582, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.6934787968441815, |
|
"eval_f1": 0.7165538695299023, |
|
"eval_loss": 0.7026467323303223, |
|
"eval_precision": 0.7706797791371907, |
|
"eval_recall": 0.6934787968441815, |
|
"eval_runtime": 24.1169, |
|
"eval_samples_per_second": 672.723, |
|
"eval_steps_per_second": 28.03, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.3775e-05, |
|
"loss": 0.6847, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.7233727810650887, |
|
"eval_f1": 0.7442026185547411, |
|
"eval_loss": 0.6611877083778381, |
|
"eval_precision": 0.8038626962859567, |
|
"eval_recall": 0.7233727810650887, |
|
"eval_runtime": 24.1771, |
|
"eval_samples_per_second": 671.047, |
|
"eval_steps_per_second": 27.96, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.1691666666666666e-05, |
|
"loss": 0.6532, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.8156434911242604, |
|
"eval_f1": 0.8093687626348003, |
|
"eval_loss": 0.6557860374450684, |
|
"eval_precision": 0.8059505966835585, |
|
"eval_recall": 0.8156434911242604, |
|
"eval_runtime": 24.3135, |
|
"eval_samples_per_second": 667.283, |
|
"eval_steps_per_second": 27.803, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.960833333333334e-05, |
|
"loss": 0.6281, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.7695389546351085, |
|
"eval_f1": 0.7795959943899675, |
|
"eval_loss": 0.616236686706543, |
|
"eval_precision": 0.7973850176423627, |
|
"eval_recall": 0.7695389546351085, |
|
"eval_runtime": 24.3023, |
|
"eval_samples_per_second": 667.59, |
|
"eval_steps_per_second": 27.816, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7525e-05, |
|
"loss": 0.6281, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.8015902366863905, |
|
"eval_f1": 0.803170220169187, |
|
"eval_loss": 0.5991469621658325, |
|
"eval_precision": 0.806066258666526, |
|
"eval_recall": 0.8015902366863905, |
|
"eval_runtime": 24.3429, |
|
"eval_samples_per_second": 666.477, |
|
"eval_steps_per_second": 27.77, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.544583333333333e-05, |
|
"loss": 0.5668, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.7874753451676528, |
|
"eval_f1": 0.7971578305557919, |
|
"eval_loss": 0.5982191562652588, |
|
"eval_precision": 0.8156521173286116, |
|
"eval_recall": 0.7874753451676528, |
|
"eval_runtime": 24.3367, |
|
"eval_samples_per_second": 666.646, |
|
"eval_steps_per_second": 27.777, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.3362500000000005e-05, |
|
"loss": 0.567, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.8007889546351085, |
|
"eval_f1": 0.8041408078539543, |
|
"eval_loss": 0.6023094654083252, |
|
"eval_precision": 0.8088979038333125, |
|
"eval_recall": 0.8007889546351085, |
|
"eval_runtime": 24.2641, |
|
"eval_samples_per_second": 668.643, |
|
"eval_steps_per_second": 27.86, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.127916666666667e-05, |
|
"loss": 0.5704, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.7429117357001972, |
|
"eval_f1": 0.7619810076683907, |
|
"eval_loss": 0.6065093278884888, |
|
"eval_precision": 0.8107750610152921, |
|
"eval_recall": 0.7429117357001972, |
|
"eval_runtime": 24.3353, |
|
"eval_samples_per_second": 666.686, |
|
"eval_steps_per_second": 27.779, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.9195833333333333e-05, |
|
"loss": 0.5596, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.8072608481262328, |
|
"eval_f1": 0.8104579115041036, |
|
"eval_loss": 0.5900022983551025, |
|
"eval_precision": 0.8144869336926288, |
|
"eval_recall": 0.8072608481262328, |
|
"eval_runtime": 24.3429, |
|
"eval_samples_per_second": 666.477, |
|
"eval_steps_per_second": 27.77, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.7116666666666667e-05, |
|
"loss": 0.5495, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.810034516765286, |
|
"eval_f1": 0.8141305380075001, |
|
"eval_loss": 0.613106906414032, |
|
"eval_precision": 0.8219044900382881, |
|
"eval_recall": 0.810034516765286, |
|
"eval_runtime": 24.3418, |
|
"eval_samples_per_second": 666.509, |
|
"eval_steps_per_second": 27.771, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.5033333333333336e-05, |
|
"loss": 0.5449, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.8124383629191322, |
|
"eval_f1": 0.8140798132169556, |
|
"eval_loss": 0.6060279011726379, |
|
"eval_precision": 0.816286158402022, |
|
"eval_recall": 0.8124383629191322, |
|
"eval_runtime": 24.332, |
|
"eval_samples_per_second": 666.777, |
|
"eval_steps_per_second": 27.782, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.2950000000000002e-05, |
|
"loss": 0.4898, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.7848865877712031, |
|
"eval_f1": 0.7964804879952159, |
|
"eval_loss": 0.6215521693229675, |
|
"eval_precision": 0.820125366434727, |
|
"eval_recall": 0.7848865877712031, |
|
"eval_runtime": 24.3196, |
|
"eval_samples_per_second": 667.117, |
|
"eval_steps_per_second": 27.797, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.0866666666666668e-05, |
|
"loss": 0.4837, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.7318786982248521, |
|
"eval_f1": 0.7528823670992008, |
|
"eval_loss": 0.6411539912223816, |
|
"eval_precision": 0.8100101192694165, |
|
"eval_recall": 0.7318786982248521, |
|
"eval_runtime": 24.3327, |
|
"eval_samples_per_second": 666.758, |
|
"eval_steps_per_second": 27.782, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.87875e-05, |
|
"loss": 0.4671, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.803870808678501, |
|
"eval_f1": 0.810953536758238, |
|
"eval_loss": 0.6316830515861511, |
|
"eval_precision": 0.8241951591967538, |
|
"eval_recall": 0.803870808678501, |
|
"eval_runtime": 24.3331, |
|
"eval_samples_per_second": 666.747, |
|
"eval_steps_per_second": 27.781, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.670416666666667e-05, |
|
"loss": 0.4791, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_accuracy": 0.8032544378698225, |
|
"eval_f1": 0.8091355876498971, |
|
"eval_loss": 0.5908682942390442, |
|
"eval_precision": 0.8179762946015251, |
|
"eval_recall": 0.8032544378698225, |
|
"eval_runtime": 24.3361, |
|
"eval_samples_per_second": 666.664, |
|
"eval_steps_per_second": 27.778, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4620833333333334e-05, |
|
"loss": 0.4739, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.8067061143984221, |
|
"eval_f1": 0.8120892015198095, |
|
"eval_loss": 0.6165759563446045, |
|
"eval_precision": 0.8199753861011193, |
|
"eval_recall": 0.8067061143984221, |
|
"eval_runtime": 24.1747, |
|
"eval_samples_per_second": 671.116, |
|
"eval_steps_per_second": 27.963, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.25375e-05, |
|
"loss": 0.4587, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.8041173570019724, |
|
"eval_f1": 0.8104680056938384, |
|
"eval_loss": 0.5887444019317627, |
|
"eval_precision": 0.820271897699825, |
|
"eval_recall": 0.8041173570019724, |
|
"eval_runtime": 24.1945, |
|
"eval_samples_per_second": 670.564, |
|
"eval_steps_per_second": 27.94, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.0454166666666667e-05, |
|
"loss": 0.4147, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.780448717948718, |
|
"eval_f1": 0.7927207824065717, |
|
"eval_loss": 0.6190515160560608, |
|
"eval_precision": 0.8178157106781372, |
|
"eval_recall": 0.780448717948718, |
|
"eval_runtime": 24.1866, |
|
"eval_samples_per_second": 670.784, |
|
"eval_steps_per_second": 27.949, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 8.370833333333333e-06, |
|
"loss": 0.3861, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_accuracy": 0.7917899408284024, |
|
"eval_f1": 0.8013705698417323, |
|
"eval_loss": 0.6606641411781311, |
|
"eval_precision": 0.8190127006775076, |
|
"eval_recall": 0.7917899408284024, |
|
"eval_runtime": 24.3416, |
|
"eval_samples_per_second": 666.514, |
|
"eval_steps_per_second": 27.771, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.2875e-06, |
|
"loss": 0.3897, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.788646449704142, |
|
"eval_f1": 0.7987527519476123, |
|
"eval_loss": 0.6613931059837341, |
|
"eval_precision": 0.8177063830689373, |
|
"eval_recall": 0.788646449704142, |
|
"eval_runtime": 24.342, |
|
"eval_samples_per_second": 666.503, |
|
"eval_steps_per_second": 27.771, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.204166666666667e-06, |
|
"loss": 0.3877, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_accuracy": 0.8135478303747534, |
|
"eval_f1": 0.8169879527109274, |
|
"eval_loss": 0.6640126705169678, |
|
"eval_precision": 0.8215474627622835, |
|
"eval_recall": 0.8135478303747534, |
|
"eval_runtime": 24.3344, |
|
"eval_samples_per_second": 666.71, |
|
"eval_steps_per_second": 27.78, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.1208333333333335e-06, |
|
"loss": 0.3795, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_accuracy": 0.8053500986193294, |
|
"eval_f1": 0.8113411583628731, |
|
"eval_loss": 0.6599082350730896, |
|
"eval_precision": 0.8205901740056264, |
|
"eval_recall": 0.8053500986193294, |
|
"eval_runtime": 24.3271, |
|
"eval_samples_per_second": 666.91, |
|
"eval_steps_per_second": 27.788, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.7500000000000005e-08, |
|
"loss": 0.3863, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.8009122287968442, |
|
"eval_f1": 0.8081790831347971, |
|
"eval_loss": 0.6572125554084778, |
|
"eval_precision": 0.8200806840462704, |
|
"eval_recall": 0.8009122287968442, |
|
"eval_runtime": 24.3366, |
|
"eval_samples_per_second": 666.651, |
|
"eval_steps_per_second": 27.777, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"step": 12000, |
|
"total_flos": 1.4962346001065574e+17, |
|
"train_loss": 0.5341533139546712, |
|
"train_runtime": 3936.6668, |
|
"train_samples_per_second": 146.317, |
|
"train_steps_per_second": 3.048 |
|
} |
|
], |
|
"max_steps": 12000, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.4962346001065574e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|