|
{ |
|
"best_metric": 0.06556913256645203, |
|
"best_model_checkpoint": "deberta-v3-xsmall-zyda-2-transformed-sentiment-new/checkpoint-6033", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 6033, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2486325211337643, |
|
"grad_norm": 2.0000367164611816, |
|
"learning_rate": 4.5856124647770596e-05, |
|
"loss": 0.2003, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4972650422675286, |
|
"grad_norm": 2.3387935161590576, |
|
"learning_rate": 4.17122492955412e-05, |
|
"loss": 0.1052, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7458975634012929, |
|
"grad_norm": 1.853918194770813, |
|
"learning_rate": 3.7568373943311785e-05, |
|
"loss": 0.085, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9945300845350572, |
|
"grad_norm": 1.7671293020248413, |
|
"learning_rate": 3.342449859108238e-05, |
|
"loss": 0.0792, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.08709739148616791, |
|
"eval_mse": 0.08709739712527088, |
|
"eval_runtime": 14.8419, |
|
"eval_samples_per_second": 673.767, |
|
"eval_steps_per_second": 84.221, |
|
"step": 2011 |
|
}, |
|
{ |
|
"epoch": 1.2431626056688214, |
|
"grad_norm": 1.0026581287384033, |
|
"learning_rate": 2.928062323885298e-05, |
|
"loss": 0.0594, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4917951268025857, |
|
"grad_norm": 0.9303980469703674, |
|
"learning_rate": 2.5136747886623573e-05, |
|
"loss": 0.0594, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.74042764793635, |
|
"grad_norm": 1.7368980646133423, |
|
"learning_rate": 2.0992872534394168e-05, |
|
"loss": 0.0551, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.9890601690701144, |
|
"grad_norm": 0.6475295424461365, |
|
"learning_rate": 1.684899718216476e-05, |
|
"loss": 0.0541, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.06912554055452347, |
|
"eval_mse": 0.06912553393413896, |
|
"eval_runtime": 13.2293, |
|
"eval_samples_per_second": 755.898, |
|
"eval_steps_per_second": 94.487, |
|
"step": 4022 |
|
}, |
|
{ |
|
"epoch": 2.2376926902038785, |
|
"grad_norm": 0.6805059909820557, |
|
"learning_rate": 1.2705121829935357e-05, |
|
"loss": 0.0444, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.486325211337643, |
|
"grad_norm": 1.3735737800598145, |
|
"learning_rate": 8.56124647770595e-06, |
|
"loss": 0.043, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.734957732471407, |
|
"grad_norm": 0.9396611452102661, |
|
"learning_rate": 4.417371125476545e-06, |
|
"loss": 0.0422, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.9835902536051715, |
|
"grad_norm": 0.756208062171936, |
|
"learning_rate": 2.7349577324714074e-07, |
|
"loss": 0.0411, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.06556913256645203, |
|
"eval_mse": 0.06556913494220615, |
|
"eval_runtime": 13.2288, |
|
"eval_samples_per_second": 755.924, |
|
"eval_steps_per_second": 94.491, |
|
"step": 6033 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6033, |
|
"total_flos": 6357984788759040.0, |
|
"train_loss": 0.07220485827706652, |
|
"train_runtime": 846.782, |
|
"train_samples_per_second": 455.926, |
|
"train_steps_per_second": 7.125 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6033, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6357984788759040.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|