|
{ |
|
"best_metric": 0.311672180891037, |
|
"best_model_checkpoint": "/kaggle/working/wev2vec-base960-agu-amharic/checkpoint-1500", |
|
"epoch": 3.9893617021276597, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13297872340425532, |
|
"grad_norm": 2.105905532836914, |
|
"learning_rate": 9.73404255319149e-06, |
|
"loss": 1.6043, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"grad_norm": 4.957370281219482, |
|
"learning_rate": 9.46808510638298e-06, |
|
"loss": 1.4442, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39893617021276595, |
|
"grad_norm": 4.0505690574646, |
|
"learning_rate": 9.204787234042554e-06, |
|
"loss": 1.2199, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"grad_norm": 3.2562971115112305, |
|
"learning_rate": 8.941489361702127e-06, |
|
"loss": 1.0649, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6648936170212766, |
|
"grad_norm": 3.3451857566833496, |
|
"learning_rate": 8.675531914893619e-06, |
|
"loss": 0.8682, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6648936170212766, |
|
"eval_accuracy": 0.7563527822494507, |
|
"eval_loss": 0.7632076144218445, |
|
"eval_runtime": 59.9292, |
|
"eval_samples_per_second": 22.326, |
|
"eval_steps_per_second": 2.803, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"grad_norm": 7.531016826629639, |
|
"learning_rate": 8.409574468085107e-06, |
|
"loss": 0.7386, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9308510638297872, |
|
"grad_norm": 14.800432205200195, |
|
"learning_rate": 8.143617021276596e-06, |
|
"loss": 0.6506, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 18.182409286499023, |
|
"learning_rate": 7.877659574468086e-06, |
|
"loss": 0.5668, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.196808510638298, |
|
"grad_norm": 11.308631896972656, |
|
"learning_rate": 7.6117021276595745e-06, |
|
"loss": 0.4825, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 3.6028597354888916, |
|
"learning_rate": 7.348404255319149e-06, |
|
"loss": 0.4482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"eval_accuracy": 0.9103139042854309, |
|
"eval_loss": 0.35010650753974915, |
|
"eval_runtime": 60.1508, |
|
"eval_samples_per_second": 22.244, |
|
"eval_steps_per_second": 2.793, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4627659574468086, |
|
"grad_norm": 13.081503868103027, |
|
"learning_rate": 7.0824468085106394e-06, |
|
"loss": 0.3877, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"grad_norm": 0.6290038228034973, |
|
"learning_rate": 6.816489361702127e-06, |
|
"loss": 0.3846, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.728723404255319, |
|
"grad_norm": 5.16023063659668, |
|
"learning_rate": 6.550531914893618e-06, |
|
"loss": 0.3553, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"grad_norm": 1.3795862197875977, |
|
"learning_rate": 6.284574468085107e-06, |
|
"loss": 0.3397, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.9946808510638299, |
|
"grad_norm": 17.843442916870117, |
|
"learning_rate": 6.018617021276596e-06, |
|
"loss": 0.2724, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.9946808510638299, |
|
"eval_accuracy": 0.9230194091796875, |
|
"eval_loss": 0.311672180891037, |
|
"eval_runtime": 60.1853, |
|
"eval_samples_per_second": 22.231, |
|
"eval_steps_per_second": 2.791, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 0.7912893295288086, |
|
"learning_rate": 5.755319148936171e-06, |
|
"loss": 0.228, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.2606382978723403, |
|
"grad_norm": 14.161015510559082, |
|
"learning_rate": 5.48936170212766e-06, |
|
"loss": 0.2633, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"grad_norm": 12.629347801208496, |
|
"learning_rate": 5.223404255319149e-06, |
|
"loss": 0.2218, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.526595744680851, |
|
"grad_norm": 2.9135618209838867, |
|
"learning_rate": 4.957446808510639e-06, |
|
"loss": 0.2379, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 14.890237808227539, |
|
"learning_rate": 4.691489361702128e-06, |
|
"loss": 0.2269, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"eval_accuracy": 0.9267563819885254, |
|
"eval_loss": 0.34563255310058594, |
|
"eval_runtime": 60.1272, |
|
"eval_samples_per_second": 22.253, |
|
"eval_steps_per_second": 2.794, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.7925531914893615, |
|
"grad_norm": 11.785533905029297, |
|
"learning_rate": 4.425531914893617e-06, |
|
"loss": 0.2319, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"grad_norm": 12.325530052185059, |
|
"learning_rate": 4.1595744680851066e-06, |
|
"loss": 0.1945, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.0585106382978724, |
|
"grad_norm": 27.31170082092285, |
|
"learning_rate": 3.893617021276596e-06, |
|
"loss": 0.1832, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"grad_norm": 21.935894012451172, |
|
"learning_rate": 3.6276595744680853e-06, |
|
"loss": 0.1982, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.324468085106383, |
|
"grad_norm": 3.217214822769165, |
|
"learning_rate": 3.3617021276595745e-06, |
|
"loss": 0.1663, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.324468085106383, |
|
"eval_accuracy": 0.927503764629364, |
|
"eval_loss": 0.37431710958480835, |
|
"eval_runtime": 60.1697, |
|
"eval_samples_per_second": 22.237, |
|
"eval_steps_per_second": 2.792, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"grad_norm": 0.2889520823955536, |
|
"learning_rate": 3.0957446808510637e-06, |
|
"loss": 0.1712, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.5904255319148937, |
|
"grad_norm": 19.394306182861328, |
|
"learning_rate": 2.8297872340425537e-06, |
|
"loss": 0.1615, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"grad_norm": 0.08302600681781769, |
|
"learning_rate": 2.563829787234043e-06, |
|
"loss": 0.1709, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.8563829787234045, |
|
"grad_norm": 59.50064468383789, |
|
"learning_rate": 2.297872340425532e-06, |
|
"loss": 0.1614, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 1.0584908723831177, |
|
"learning_rate": 2.0319148936170213e-06, |
|
"loss": 0.1737, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"eval_accuracy": 0.9327354431152344, |
|
"eval_loss": 0.37135931849479675, |
|
"eval_runtime": 60.1293, |
|
"eval_samples_per_second": 22.252, |
|
"eval_steps_per_second": 2.794, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"step": 3000, |
|
"total_flos": 1.8714792090048768e+18, |
|
"train_loss": 0.4606133778889974, |
|
"train_runtime": 3208.6756, |
|
"train_samples_per_second": 18.727, |
|
"train_steps_per_second": 1.172 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.8714792090048768e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|