|
{
|
|
"best_metric": 9.103012781497261,
|
|
"best_model_checkpoint": "./Whisper-squeezeformer-v4\\checkpoint-33000",
|
|
"epoch": 14.4,
|
|
"eval_steps": 3000,
|
|
"global_step": 36000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 14.352978706359863,
|
|
"learning_rate": 9.976666666666667e-06,
|
|
"loss": 3.89,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_loss": 3.287811756134033,
|
|
"eval_runtime": 2286.1099,
|
|
"eval_samples_per_second": 2.187,
|
|
"eval_steps_per_second": 0.273,
|
|
"eval_wer": 114.50152344831994,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"grad_norm": 9.717866897583008,
|
|
"learning_rate": 8.004666666666668e-06,
|
|
"loss": 1.1579,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 2.0,
|
|
"eval_loss": 0.7946763634681702,
|
|
"eval_runtime": 2115.529,
|
|
"eval_samples_per_second": 2.363,
|
|
"eval_steps_per_second": 0.295,
|
|
"eval_wer": 42.057827115249395,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"grad_norm": 7.917266845703125,
|
|
"learning_rate": 6.004666666666668e-06,
|
|
"loss": 0.3888,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 3.0,
|
|
"eval_loss": 0.7378555536270142,
|
|
"eval_runtime": 2241.804,
|
|
"eval_samples_per_second": 2.23,
|
|
"eval_steps_per_second": 0.279,
|
|
"eval_wer": 36.931370251209174,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"grad_norm": 5.832924842834473,
|
|
"learning_rate": 4.004666666666667e-06,
|
|
"loss": 0.2242,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_loss": 0.7416579127311707,
|
|
"eval_runtime": 2509.1854,
|
|
"eval_samples_per_second": 1.993,
|
|
"eval_steps_per_second": 0.249,
|
|
"eval_wer": 35.91715850254618,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"grad_norm": 10.248781204223633,
|
|
"learning_rate": 5.558888888888889e-06,
|
|
"loss": 0.5221,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_loss": 0.6810700297355652,
|
|
"eval_runtime": 2258.0308,
|
|
"eval_samples_per_second": 2.214,
|
|
"eval_steps_per_second": 0.277,
|
|
"eval_wer": 32.78077259071443,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"grad_norm": 7.38523530960083,
|
|
"learning_rate": 4.448148148148149e-06,
|
|
"loss": 0.324,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 6.0,
|
|
"eval_loss": 0.671556830406189,
|
|
"eval_runtime": 2259.7666,
|
|
"eval_samples_per_second": 2.213,
|
|
"eval_steps_per_second": 0.277,
|
|
"eval_wer": 32.045682142628856,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"grad_norm": 5.77803373336792,
|
|
"learning_rate": 3.337777777777778e-06,
|
|
"loss": 0.2034,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 7.0,
|
|
"eval_loss": 0.684516429901123,
|
|
"eval_runtime": 2285.8284,
|
|
"eval_samples_per_second": 2.187,
|
|
"eval_steps_per_second": 0.273,
|
|
"eval_wer": 32.007329597511344,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 9.6,
|
|
"grad_norm": 3.751537322998047,
|
|
"learning_rate": 2.2274074074074075e-06,
|
|
"loss": 0.2177,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 9.6,
|
|
"eval_loss": 0.19905297458171844,
|
|
"eval_runtime": 1258.2631,
|
|
"eval_samples_per_second": 2.082,
|
|
"eval_steps_per_second": 0.261,
|
|
"eval_wer": 10.862370663420572,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 10.8,
|
|
"grad_norm": 2.6073410511016846,
|
|
"learning_rate": 1.1166666666666666e-06,
|
|
"loss": 0.127,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 10.8,
|
|
"eval_loss": 0.1856304407119751,
|
|
"eval_runtime": 1152.5547,
|
|
"eval_samples_per_second": 2.273,
|
|
"eval_steps_per_second": 0.285,
|
|
"eval_wer": 10.548539257455873,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"grad_norm": 2.8211987018585205,
|
|
"learning_rate": 5.555555555555556e-09,
|
|
"loss": 0.0909,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_loss": 0.18379394710063934,
|
|
"eval_runtime": 1196.9273,
|
|
"eval_samples_per_second": 2.189,
|
|
"eval_steps_per_second": 0.274,
|
|
"eval_wer": 9.591828971393792,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"epoch": 13.2,
|
|
"grad_norm": 1.6552714109420776,
|
|
"learning_rate": 9.13939393939394e-07,
|
|
"loss": 0.0785,
|
|
"step": 33000
|
|
},
|
|
{
|
|
"epoch": 13.2,
|
|
"eval_loss": 0.18493999540805817,
|
|
"eval_runtime": 1318.1594,
|
|
"eval_samples_per_second": 1.988,
|
|
"eval_steps_per_second": 0.249,
|
|
"eval_wer": 9.103012781497261,
|
|
"step": 33000
|
|
},
|
|
{
|
|
"epoch": 14.4,
|
|
"grad_norm": 2.0688605308532715,
|
|
"learning_rate": 5.151515151515151e-09,
|
|
"loss": 0.0595,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 14.4,
|
|
"eval_loss": 0.1860339492559433,
|
|
"eval_runtime": 1345.7303,
|
|
"eval_samples_per_second": 1.947,
|
|
"eval_steps_per_second": 0.244,
|
|
"eval_wer": 9.129640900791236,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 14.4,
|
|
"step": 36000,
|
|
"total_flos": 2.495852642304e+20,
|
|
"train_loss": 0.011502729203965929,
|
|
"train_runtime": 50610.9609,
|
|
"train_samples_per_second": 14.226,
|
|
"train_steps_per_second": 0.711
|
|
}
|
|
],
|
|
"logging_steps": 3000,
|
|
"max_steps": 36000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 15,
|
|
"save_steps": 3000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.495852642304e+20,
|
|
"train_batch_size": 20,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|