|
{ |
|
"best_metric": 0.7877622937690213, |
|
"best_model_checkpoint": "./XLM-V_64-multi-outputs/checkpoint-16000", |
|
"epoch": 14.8619957537155, |
|
"eval_steps": 1000, |
|
"global_step": 21000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7077140835102619, |
|
"grad_norm": 0.7589670419692993, |
|
"learning_rate": 4.7169811320754717e-07, |
|
"loss": 0.6944, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7077140835102619, |
|
"eval_accuracy": 0.5096086826645425, |
|
"eval_f1": 0.6748960327414351, |
|
"eval_loss": 0.6923578381538391, |
|
"eval_precision": 0.5093155325296403, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 57.7937, |
|
"eval_samples_per_second": 173.773, |
|
"eval_steps_per_second": 2.717, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4154281670205238, |
|
"grad_norm": 6.16334867477417, |
|
"learning_rate": 9.433962264150943e-07, |
|
"loss": 0.6848, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4154281670205238, |
|
"eval_accuracy": 0.6086826645424674, |
|
"eval_f1": 0.7141402385801571, |
|
"eval_loss": 0.6570077538490295, |
|
"eval_precision": 0.5684344603983326, |
|
"eval_recall": 0.9602895148669797, |
|
"eval_runtime": 57.4971, |
|
"eval_samples_per_second": 174.67, |
|
"eval_steps_per_second": 2.731, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.1231422505307855, |
|
"grad_norm": 2.09871768951416, |
|
"learning_rate": 9.538663171690695e-07, |
|
"loss": 0.6613, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1231422505307855, |
|
"eval_accuracy": 0.6230210096584686, |
|
"eval_f1": 0.7231241772707327, |
|
"eval_loss": 0.6287678480148315, |
|
"eval_precision": 0.5774351786965662, |
|
"eval_recall": 0.9671361502347418, |
|
"eval_runtime": 57.6578, |
|
"eval_samples_per_second": 174.183, |
|
"eval_steps_per_second": 2.723, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.8308563340410475, |
|
"grad_norm": 1.726118803024292, |
|
"learning_rate": 9.014416775884666e-07, |
|
"loss": 0.6505, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.8308563340410475, |
|
"eval_accuracy": 0.635965348999303, |
|
"eval_f1": 0.7311764705882353, |
|
"eval_loss": 0.615561306476593, |
|
"eval_precision": 0.5857681432610744, |
|
"eval_recall": 0.9726134585289515, |
|
"eval_runtime": 57.6722, |
|
"eval_samples_per_second": 174.139, |
|
"eval_steps_per_second": 2.722, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.538570417551309, |
|
"grad_norm": 18.405258178710938, |
|
"learning_rate": 8.490170380078637e-07, |
|
"loss": 0.6453, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.538570417551309, |
|
"eval_accuracy": 0.6402469381658867, |
|
"eval_f1": 0.7314353675760054, |
|
"eval_loss": 0.6071421504020691, |
|
"eval_precision": 0.5898573312552452, |
|
"eval_recall": 0.9624413145539906, |
|
"eval_runtime": 57.5099, |
|
"eval_samples_per_second": 174.631, |
|
"eval_steps_per_second": 2.73, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.246284501061571, |
|
"grad_norm": 1.6453580856323242, |
|
"learning_rate": 7.965923984272608e-07, |
|
"loss": 0.632, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.246284501061571, |
|
"eval_accuracy": 0.6659364731653888, |
|
"eval_f1": 0.7478770571879462, |
|
"eval_loss": 0.5868598222732544, |
|
"eval_precision": 0.6071995118974984, |
|
"eval_recall": 0.97339593114241, |
|
"eval_runtime": 58.3002, |
|
"eval_samples_per_second": 172.263, |
|
"eval_steps_per_second": 2.693, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.953998584571833, |
|
"grad_norm": 3.636608362197876, |
|
"learning_rate": 7.44167758846658e-07, |
|
"loss": 0.6232, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.953998584571833, |
|
"eval_accuracy": 0.6733047894055562, |
|
"eval_f1": 0.7526200708738596, |
|
"eval_loss": 0.5761261582374573, |
|
"eval_precision": 0.6123175070543492, |
|
"eval_recall": 0.9763302034428795, |
|
"eval_runtime": 57.6672, |
|
"eval_samples_per_second": 174.154, |
|
"eval_steps_per_second": 2.723, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.661712668082095, |
|
"grad_norm": 24.744699478149414, |
|
"learning_rate": 6.91743119266055e-07, |
|
"loss": 0.6141, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.661712668082095, |
|
"eval_accuracy": 0.688140993726974, |
|
"eval_f1": 0.7601837672281776, |
|
"eval_loss": 0.5643310546875, |
|
"eval_precision": 0.6245596376446905, |
|
"eval_recall": 0.9710485133020345, |
|
"eval_runtime": 57.5859, |
|
"eval_samples_per_second": 174.4, |
|
"eval_steps_per_second": 2.726, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.369426751592357, |
|
"grad_norm": 9.586795806884766, |
|
"learning_rate": 6.393184796854522e-07, |
|
"loss": 0.605, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.369426751592357, |
|
"eval_accuracy": 0.6972020312655581, |
|
"eval_f1": 0.7631066448547168, |
|
"eval_loss": 0.553417980670929, |
|
"eval_precision": 0.6340453074433657, |
|
"eval_recall": 0.9581377151799687, |
|
"eval_runtime": 57.7405, |
|
"eval_samples_per_second": 173.933, |
|
"eval_steps_per_second": 2.719, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.077140835102618, |
|
"grad_norm": 8.45578384399414, |
|
"learning_rate": 5.868938401048492e-07, |
|
"loss": 0.6021, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.077140835102618, |
|
"eval_accuracy": 0.7043712038235587, |
|
"eval_f1": 0.7691829277773459, |
|
"eval_loss": 0.5444474816322327, |
|
"eval_precision": 0.6382402270674752, |
|
"eval_recall": 0.9677230046948356, |
|
"eval_runtime": 57.654, |
|
"eval_samples_per_second": 174.194, |
|
"eval_steps_per_second": 2.723, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.78485491861288, |
|
"grad_norm": 10.198156356811523, |
|
"learning_rate": 5.344692005242464e-07, |
|
"loss": 0.5934, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.78485491861288, |
|
"eval_accuracy": 0.7097480832420592, |
|
"eval_f1": 0.7714263310593585, |
|
"eval_loss": 0.5372178554534912, |
|
"eval_precision": 0.6437639052480042, |
|
"eval_recall": 0.962245696400626, |
|
"eval_runtime": 57.4867, |
|
"eval_samples_per_second": 174.701, |
|
"eval_steps_per_second": 2.731, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.492569002123142, |
|
"grad_norm": 34.88262176513672, |
|
"learning_rate": 4.820445609436435e-07, |
|
"loss": 0.5842, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.492569002123142, |
|
"eval_accuracy": 0.7273722991138106, |
|
"eval_f1": 0.7810650887573964, |
|
"eval_loss": 0.5250558853149414, |
|
"eval_precision": 0.6605355693805789, |
|
"eval_recall": 0.9553990610328639, |
|
"eval_runtime": 57.6716, |
|
"eval_samples_per_second": 174.141, |
|
"eval_steps_per_second": 2.722, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.200283085633403, |
|
"grad_norm": 63.931617736816406, |
|
"learning_rate": 4.296199213630406e-07, |
|
"loss": 0.5775, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.200283085633403, |
|
"eval_accuracy": 0.7322513193268944, |
|
"eval_f1": 0.7830226740902122, |
|
"eval_loss": 0.5154615640640259, |
|
"eval_precision": 0.6663919791237467, |
|
"eval_recall": 0.9491392801251957, |
|
"eval_runtime": 57.6583, |
|
"eval_samples_per_second": 174.181, |
|
"eval_steps_per_second": 2.723, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.907997169143666, |
|
"grad_norm": 18.855731964111328, |
|
"learning_rate": 3.771952817824377e-07, |
|
"loss": 0.5694, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 9.907997169143666, |
|
"eval_accuracy": 0.7335457532609778, |
|
"eval_f1": 0.7848182695400451, |
|
"eval_loss": 0.5107560157775879, |
|
"eval_precision": 0.6663025669033316, |
|
"eval_recall": 0.9546165884194053, |
|
"eval_runtime": 57.5122, |
|
"eval_samples_per_second": 174.624, |
|
"eval_steps_per_second": 2.73, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 10.615711252653927, |
|
"grad_norm": 5.973861217498779, |
|
"learning_rate": 3.247706422018349e-07, |
|
"loss": 0.5645, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 10.615711252653927, |
|
"eval_accuracy": 0.7354376182415613, |
|
"eval_f1": 0.7858466994438623, |
|
"eval_loss": 0.5042764544487, |
|
"eval_precision": 0.6682659355723098, |
|
"eval_recall": 0.9536384976525821, |
|
"eval_runtime": 57.6441, |
|
"eval_samples_per_second": 174.224, |
|
"eval_steps_per_second": 2.724, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 11.32342533616419, |
|
"grad_norm": 13.824345588684082, |
|
"learning_rate": 2.72346002621232e-07, |
|
"loss": 0.5555, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.32342533616419, |
|
"eval_accuracy": 0.7361346211291446, |
|
"eval_f1": 0.7877622937690213, |
|
"eval_loss": 0.5019333362579346, |
|
"eval_precision": 0.666937889883374, |
|
"eval_recall": 0.9620500782472613, |
|
"eval_runtime": 57.7949, |
|
"eval_samples_per_second": 173.77, |
|
"eval_steps_per_second": 2.717, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 12.031139419674451, |
|
"grad_norm": 12.343938827514648, |
|
"learning_rate": 2.199213630406291e-07, |
|
"loss": 0.5517, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 12.031139419674451, |
|
"eval_accuracy": 0.7386239171562282, |
|
"eval_f1": 0.786879922058943, |
|
"eval_loss": 0.4967314302921295, |
|
"eval_precision": 0.672588480222068, |
|
"eval_recall": 0.9479655712050078, |
|
"eval_runtime": 57.6752, |
|
"eval_samples_per_second": 174.13, |
|
"eval_steps_per_second": 2.722, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 12.738853503184714, |
|
"grad_norm": 10.652567863464355, |
|
"learning_rate": 1.6749672346002622e-07, |
|
"loss": 0.5485, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 12.738853503184714, |
|
"eval_accuracy": 0.7389226326794782, |
|
"eval_f1": 0.7872444011684518, |
|
"eval_loss": 0.49510088562965393, |
|
"eval_precision": 0.6726289517470881, |
|
"eval_recall": 0.948943661971831, |
|
"eval_runtime": 57.6551, |
|
"eval_samples_per_second": 174.191, |
|
"eval_steps_per_second": 2.723, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 13.446567586694975, |
|
"grad_norm": 7.330574035644531, |
|
"learning_rate": 1.1507208387942332e-07, |
|
"loss": 0.5448, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 13.446567586694975, |
|
"eval_accuracy": 0.7379269142686449, |
|
"eval_f1": 0.7868825910931174, |
|
"eval_loss": 0.49567386507987976, |
|
"eval_precision": 0.6713180436584691, |
|
"eval_recall": 0.9505086071987481, |
|
"eval_runtime": 57.6941, |
|
"eval_samples_per_second": 174.073, |
|
"eval_steps_per_second": 2.721, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 14.154281670205236, |
|
"grad_norm": 22.538665771484375, |
|
"learning_rate": 6.264744429882045e-08, |
|
"loss": 0.5441, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 14.154281670205236, |
|
"eval_accuracy": 0.7379269142686449, |
|
"eval_f1": 0.7872615583575816, |
|
"eval_loss": 0.4930832087993622, |
|
"eval_precision": 0.6707988980716253, |
|
"eval_recall": 0.952660406885759, |
|
"eval_runtime": 57.6062, |
|
"eval_samples_per_second": 174.339, |
|
"eval_steps_per_second": 2.725, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 14.8619957537155, |
|
"grad_norm": 11.810145378112793, |
|
"learning_rate": 1.0222804718217562e-08, |
|
"loss": 0.5416, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 14.8619957537155, |
|
"eval_accuracy": 0.7375286269043114, |
|
"eval_f1": 0.7866968765172357, |
|
"eval_loss": 0.4928380250930786, |
|
"eval_precision": 0.6708528843499862, |
|
"eval_recall": 0.9508998435054773, |
|
"eval_runtime": 57.6166, |
|
"eval_samples_per_second": 174.308, |
|
"eval_steps_per_second": 2.725, |
|
"step": 21000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 21195, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.173409489802339e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|