|
{ |
|
"best_metric": 0.6367625594139099, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-bsbigcgen-female-model/checkpoint-600", |
|
"epoch": 3.6933744221879814, |
|
"eval_steps": 200, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07704160246533127, |
|
"grad_norm": 112.25947570800781, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 11.5295, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15408320493066255, |
|
"grad_norm": Infinity, |
|
"learning_rate": 8.8e-07, |
|
"loss": 9.8901, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.23112480739599384, |
|
"grad_norm": 84.68060302734375, |
|
"learning_rate": 1.3800000000000001e-06, |
|
"loss": 7.8133, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3081664098613251, |
|
"grad_norm": 94.15545654296875, |
|
"learning_rate": 1.8800000000000002e-06, |
|
"loss": 6.4974, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3852080123266564, |
|
"grad_norm": 72.34725189208984, |
|
"learning_rate": 2.38e-06, |
|
"loss": 4.9676, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4622496147919877, |
|
"grad_norm": 67.70198059082031, |
|
"learning_rate": 2.88e-06, |
|
"loss": 4.2556, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.539291217257319, |
|
"grad_norm": 80.96290588378906, |
|
"learning_rate": 3.3800000000000007e-06, |
|
"loss": 4.3249, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6163328197226502, |
|
"grad_norm": 67.85588073730469, |
|
"learning_rate": 3.88e-06, |
|
"loss": 3.47, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6163328197226502, |
|
"eval_loss": 0.927435576915741, |
|
"eval_runtime": 249.4489, |
|
"eval_samples_per_second": 1.728, |
|
"eval_steps_per_second": 0.866, |
|
"eval_wer": 0.6674612634088201, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6933744221879815, |
|
"grad_norm": 60.49135971069336, |
|
"learning_rate": 4.38e-06, |
|
"loss": 3.7208, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.7704160246533128, |
|
"grad_norm": 84.06492614746094, |
|
"learning_rate": 4.880000000000001e-06, |
|
"loss": 3.6644, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 66.79823303222656, |
|
"learning_rate": 5.380000000000001e-06, |
|
"loss": 3.1538, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.9244992295839753, |
|
"grad_norm": 66.50402069091797, |
|
"learning_rate": 5.8800000000000005e-06, |
|
"loss": 3.0771, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 68.71908569335938, |
|
"learning_rate": 6.380000000000001e-06, |
|
"loss": 3.2809, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.0770416024653313, |
|
"grad_norm": 51.007381439208984, |
|
"learning_rate": 6.88e-06, |
|
"loss": 2.4943, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.1540832049306626, |
|
"grad_norm": 59.46632385253906, |
|
"learning_rate": 7.3800000000000005e-06, |
|
"loss": 2.2635, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.231124807395994, |
|
"grad_norm": 61.94889831542969, |
|
"learning_rate": 7.88e-06, |
|
"loss": 2.569, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.231124807395994, |
|
"eval_loss": 0.7236458659172058, |
|
"eval_runtime": 242.3506, |
|
"eval_samples_per_second": 1.778, |
|
"eval_steps_per_second": 0.891, |
|
"eval_wer": 0.5511323003575685, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.308166409861325, |
|
"grad_norm": 54.65010070800781, |
|
"learning_rate": 8.380000000000001e-06, |
|
"loss": 2.4459, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.3852080123266564, |
|
"grad_norm": 69.15284729003906, |
|
"learning_rate": 8.880000000000001e-06, |
|
"loss": 2.1676, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4622496147919877, |
|
"grad_norm": 40.50135040283203, |
|
"learning_rate": 9.38e-06, |
|
"loss": 2.1835, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.539291217257319, |
|
"grad_norm": 47.11216735839844, |
|
"learning_rate": 9.88e-06, |
|
"loss": 2.7759, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.61633281972265, |
|
"grad_norm": 41.89402389526367, |
|
"learning_rate": 9.979392624728852e-06, |
|
"loss": 2.5969, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.6933744221879814, |
|
"grad_norm": 54.26713562011719, |
|
"learning_rate": 9.952277657266813e-06, |
|
"loss": 2.0526, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.7704160246533127, |
|
"grad_norm": 48.94434356689453, |
|
"learning_rate": 9.925162689804773e-06, |
|
"loss": 2.068, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.847457627118644, |
|
"grad_norm": 46.54755783081055, |
|
"learning_rate": 9.898047722342734e-06, |
|
"loss": 2.5305, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.847457627118644, |
|
"eval_loss": 0.6367625594139099, |
|
"eval_runtime": 236.1921, |
|
"eval_samples_per_second": 1.825, |
|
"eval_steps_per_second": 0.915, |
|
"eval_wer": 0.47723480333730633, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9244992295839753, |
|
"grad_norm": 45.87973403930664, |
|
"learning_rate": 9.870932754880696e-06, |
|
"loss": 2.4065, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 28.206829071044922, |
|
"learning_rate": 9.843817787418655e-06, |
|
"loss": 2.0807, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.0770416024653313, |
|
"grad_norm": 29.09613800048828, |
|
"learning_rate": 9.816702819956617e-06, |
|
"loss": 1.3675, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.1540832049306626, |
|
"grad_norm": 28.714885711669922, |
|
"learning_rate": 9.789587852494578e-06, |
|
"loss": 1.2501, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.231124807395994, |
|
"grad_norm": 54.41804122924805, |
|
"learning_rate": 9.76247288503254e-06, |
|
"loss": 1.2393, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.3081664098613253, |
|
"grad_norm": 31.56501007080078, |
|
"learning_rate": 9.7353579175705e-06, |
|
"loss": 1.2768, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.3852080123266566, |
|
"grad_norm": 73.83405303955078, |
|
"learning_rate": 9.70824295010846e-06, |
|
"loss": 1.2799, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.462249614791988, |
|
"grad_norm": 42.22137451171875, |
|
"learning_rate": 9.68112798264642e-06, |
|
"loss": 1.4563, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.462249614791988, |
|
"eval_loss": 0.651836097240448, |
|
"eval_runtime": 241.7933, |
|
"eval_samples_per_second": 1.783, |
|
"eval_steps_per_second": 0.893, |
|
"eval_wer": 0.4936829558998808, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.539291217257319, |
|
"grad_norm": 58.9369010925293, |
|
"learning_rate": 9.654013015184382e-06, |
|
"loss": 1.4182, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.61633281972265, |
|
"grad_norm": 33.295654296875, |
|
"learning_rate": 9.626898047722343e-06, |
|
"loss": 1.5956, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.6933744221879814, |
|
"grad_norm": 31.526493072509766, |
|
"learning_rate": 9.599783080260305e-06, |
|
"loss": 1.4829, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.7704160246533127, |
|
"grad_norm": 38.276947021484375, |
|
"learning_rate": 9.572668112798266e-06, |
|
"loss": 1.4473, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.847457627118644, |
|
"grad_norm": 35.50004959106445, |
|
"learning_rate": 9.545553145336226e-06, |
|
"loss": 1.417, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.9244992295839753, |
|
"grad_norm": 29.786792755126953, |
|
"learning_rate": 9.518438177874187e-06, |
|
"loss": 1.2574, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 12.207977294921875, |
|
"learning_rate": 9.491323210412147e-06, |
|
"loss": 1.2935, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.0770416024653313, |
|
"grad_norm": 27.92531394958496, |
|
"learning_rate": 9.464208242950108e-06, |
|
"loss": 0.6413, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0770416024653313, |
|
"eval_loss": 0.6891891956329346, |
|
"eval_runtime": 240.7265, |
|
"eval_samples_per_second": 1.79, |
|
"eval_steps_per_second": 0.897, |
|
"eval_wer": 0.47818831942789036, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.1540832049306626, |
|
"grad_norm": 30.59457778930664, |
|
"learning_rate": 9.43709327548807e-06, |
|
"loss": 0.764, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.231124807395994, |
|
"grad_norm": 23.21906852722168, |
|
"learning_rate": 9.409978308026031e-06, |
|
"loss": 0.6408, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.3081664098613253, |
|
"grad_norm": 28.22869110107422, |
|
"learning_rate": 9.382863340563993e-06, |
|
"loss": 0.6615, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 3.3852080123266566, |
|
"grad_norm": 32.756710052490234, |
|
"learning_rate": 9.355748373101952e-06, |
|
"loss": 0.7282, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.462249614791988, |
|
"grad_norm": 27.851184844970703, |
|
"learning_rate": 9.328633405639914e-06, |
|
"loss": 0.8159, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.539291217257319, |
|
"grad_norm": 39.99174118041992, |
|
"learning_rate": 9.301518438177875e-06, |
|
"loss": 1.218, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.61633281972265, |
|
"grad_norm": 28.462491989135742, |
|
"learning_rate": 9.274403470715837e-06, |
|
"loss": 0.7496, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 3.6933744221879814, |
|
"grad_norm": 30.86623191833496, |
|
"learning_rate": 9.247288503253798e-06, |
|
"loss": 0.581, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.6933744221879814, |
|
"eval_loss": 0.6935437321662903, |
|
"eval_runtime": 235.9787, |
|
"eval_samples_per_second": 1.826, |
|
"eval_steps_per_second": 0.915, |
|
"eval_wer": 0.47508939213349227, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.6933744221879814, |
|
"step": 1200, |
|
"total_flos": 9.7824980385792e+18, |
|
"train_loss": 2.601292386849721, |
|
"train_runtime": 3847.301, |
|
"train_samples_per_second": 20.235, |
|
"train_steps_per_second": 2.526 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 9720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.7824980385792e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|