Whisper-squeezeformer-v4 / trainer_state.json
jun-han's picture
Training checkpoint
7cc1079 verified
{
"best_metric": 9.103012781497261,
"best_model_checkpoint": "./Whisper-squeezeformer-v4\\checkpoint-33000",
"epoch": 14.4,
"eval_steps": 3000,
"global_step": 36000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 14.352978706359863,
"learning_rate": 9.976666666666667e-06,
"loss": 3.89,
"step": 3000
},
{
"epoch": 1.0,
"eval_loss": 3.287811756134033,
"eval_runtime": 2286.1099,
"eval_samples_per_second": 2.187,
"eval_steps_per_second": 0.273,
"eval_wer": 114.50152344831994,
"step": 3000
},
{
"epoch": 2.0,
"grad_norm": 9.717866897583008,
"learning_rate": 8.004666666666668e-06,
"loss": 1.1579,
"step": 6000
},
{
"epoch": 2.0,
"eval_loss": 0.7946763634681702,
"eval_runtime": 2115.529,
"eval_samples_per_second": 2.363,
"eval_steps_per_second": 0.295,
"eval_wer": 42.057827115249395,
"step": 6000
},
{
"epoch": 3.0,
"grad_norm": 7.917266845703125,
"learning_rate": 6.004666666666668e-06,
"loss": 0.3888,
"step": 9000
},
{
"epoch": 3.0,
"eval_loss": 0.7378555536270142,
"eval_runtime": 2241.804,
"eval_samples_per_second": 2.23,
"eval_steps_per_second": 0.279,
"eval_wer": 36.931370251209174,
"step": 9000
},
{
"epoch": 4.0,
"grad_norm": 5.832924842834473,
"learning_rate": 4.004666666666667e-06,
"loss": 0.2242,
"step": 12000
},
{
"epoch": 4.0,
"eval_loss": 0.7416579127311707,
"eval_runtime": 2509.1854,
"eval_samples_per_second": 1.993,
"eval_steps_per_second": 0.249,
"eval_wer": 35.91715850254618,
"step": 12000
},
{
"epoch": 5.0,
"grad_norm": 10.248781204223633,
"learning_rate": 5.558888888888889e-06,
"loss": 0.5221,
"step": 15000
},
{
"epoch": 5.0,
"eval_loss": 0.6810700297355652,
"eval_runtime": 2258.0308,
"eval_samples_per_second": 2.214,
"eval_steps_per_second": 0.277,
"eval_wer": 32.78077259071443,
"step": 15000
},
{
"epoch": 6.0,
"grad_norm": 7.38523530960083,
"learning_rate": 4.448148148148149e-06,
"loss": 0.324,
"step": 18000
},
{
"epoch": 6.0,
"eval_loss": 0.671556830406189,
"eval_runtime": 2259.7666,
"eval_samples_per_second": 2.213,
"eval_steps_per_second": 0.277,
"eval_wer": 32.045682142628856,
"step": 18000
},
{
"epoch": 7.0,
"grad_norm": 5.77803373336792,
"learning_rate": 3.337777777777778e-06,
"loss": 0.2034,
"step": 21000
},
{
"epoch": 7.0,
"eval_loss": 0.684516429901123,
"eval_runtime": 2285.8284,
"eval_samples_per_second": 2.187,
"eval_steps_per_second": 0.273,
"eval_wer": 32.007329597511344,
"step": 21000
},
{
"epoch": 9.6,
"grad_norm": 3.751537322998047,
"learning_rate": 2.2274074074074075e-06,
"loss": 0.2177,
"step": 24000
},
{
"epoch": 9.6,
"eval_loss": 0.19905297458171844,
"eval_runtime": 1258.2631,
"eval_samples_per_second": 2.082,
"eval_steps_per_second": 0.261,
"eval_wer": 10.862370663420572,
"step": 24000
},
{
"epoch": 10.8,
"grad_norm": 2.6073410511016846,
"learning_rate": 1.1166666666666666e-06,
"loss": 0.127,
"step": 27000
},
{
"epoch": 10.8,
"eval_loss": 0.1856304407119751,
"eval_runtime": 1152.5547,
"eval_samples_per_second": 2.273,
"eval_steps_per_second": 0.285,
"eval_wer": 10.548539257455873,
"step": 27000
},
{
"epoch": 12.0,
"grad_norm": 2.8211987018585205,
"learning_rate": 5.555555555555556e-09,
"loss": 0.0909,
"step": 30000
},
{
"epoch": 12.0,
"eval_loss": 0.18379394710063934,
"eval_runtime": 1196.9273,
"eval_samples_per_second": 2.189,
"eval_steps_per_second": 0.274,
"eval_wer": 9.591828971393792,
"step": 30000
},
{
"epoch": 13.2,
"grad_norm": 1.6552714109420776,
"learning_rate": 9.13939393939394e-07,
"loss": 0.0785,
"step": 33000
},
{
"epoch": 13.2,
"eval_loss": 0.18493999540805817,
"eval_runtime": 1318.1594,
"eval_samples_per_second": 1.988,
"eval_steps_per_second": 0.249,
"eval_wer": 9.103012781497261,
"step": 33000
},
{
"epoch": 14.4,
"grad_norm": 2.0688605308532715,
"learning_rate": 5.151515151515151e-09,
"loss": 0.0595,
"step": 36000
},
{
"epoch": 14.4,
"eval_loss": 0.1860339492559433,
"eval_runtime": 1345.7303,
"eval_samples_per_second": 1.947,
"eval_steps_per_second": 0.244,
"eval_wer": 9.129640900791236,
"step": 36000
},
{
"epoch": 14.4,
"step": 36000,
"total_flos": 2.495852642304e+20,
"train_loss": 0.011502729203965929,
"train_runtime": 50610.9609,
"train_samples_per_second": 14.226,
"train_steps_per_second": 0.711
}
],
"logging_steps": 3000,
"max_steps": 36000,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 3000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.495852642304e+20,
"train_batch_size": 20,
"trial_name": null,
"trial_params": null
}