whisper_medium_CGN / trainer_state.json
Jakob Poncelet
First model version
d3bee8d
{
"best_metric": 10.727751271110364,
"best_model_checkpoint": "/esat/audioslave/jponcele/whisper/finetuning_event/CGN/medium/checkpoint-15000",
"epoch": 18.0328,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9600000000000003e-06,
"loss": 0.9633,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 3.96e-06,
"loss": 0.433,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.2679,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 7.960000000000002e-06,
"loss": 0.258,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 9.960000000000001e-06,
"loss": 0.2858,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 9.93241379310345e-06,
"loss": 0.2051,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.86344827586207e-06,
"loss": 0.1674,
"step": 700
},
{
"epoch": 0.05,
"learning_rate": 9.79448275862069e-06,
"loss": 0.4538,
"step": 800
},
{
"epoch": 1.01,
"learning_rate": 9.725517241379311e-06,
"loss": 0.1422,
"step": 900
},
{
"epoch": 1.01,
"learning_rate": 9.65655172413793e-06,
"loss": 0.1116,
"step": 1000
},
{
"epoch": 1.01,
"eval_loss": 0.29779529571533203,
"eval_runtime": 54354.8942,
"eval_samples_per_second": 0.95,
"eval_steps_per_second": 0.03,
"eval_wer": 15.212704864281193,
"step": 1000
},
{
"epoch": 1.02,
"learning_rate": 9.587586206896554e-06,
"loss": 0.1925,
"step": 1100
},
{
"epoch": 1.03,
"learning_rate": 9.518620689655173e-06,
"loss": 0.3391,
"step": 1200
},
{
"epoch": 1.03,
"learning_rate": 9.449655172413794e-06,
"loss": 0.1312,
"step": 1300
},
{
"epoch": 1.04,
"learning_rate": 9.380689655172415e-06,
"loss": 0.1703,
"step": 1400
},
{
"epoch": 1.05,
"learning_rate": 9.311724137931035e-06,
"loss": 0.2245,
"step": 1500
},
{
"epoch": 1.05,
"learning_rate": 9.242758620689656e-06,
"loss": 0.2559,
"step": 1600
},
{
"epoch": 2.01,
"learning_rate": 9.173793103448277e-06,
"loss": 0.1932,
"step": 1700
},
{
"epoch": 2.01,
"learning_rate": 9.104827586206897e-06,
"loss": 0.1351,
"step": 1800
},
{
"epoch": 2.02,
"learning_rate": 9.035862068965518e-06,
"loss": 0.1651,
"step": 1900
},
{
"epoch": 2.03,
"learning_rate": 8.966896551724139e-06,
"loss": 0.0786,
"step": 2000
},
{
"epoch": 2.03,
"eval_loss": 0.2841915190219879,
"eval_runtime": 50819.3721,
"eval_samples_per_second": 1.016,
"eval_steps_per_second": 0.032,
"eval_wer": 13.48516624073487,
"step": 2000
},
{
"epoch": 2.03,
"learning_rate": 8.89793103448276e-06,
"loss": 0.0883,
"step": 2100
},
{
"epoch": 2.04,
"learning_rate": 8.82896551724138e-06,
"loss": 0.0619,
"step": 2200
},
{
"epoch": 2.05,
"learning_rate": 8.76e-06,
"loss": 0.2144,
"step": 2300
},
{
"epoch": 2.05,
"learning_rate": 8.691034482758622e-06,
"loss": 0.1967,
"step": 2400
},
{
"epoch": 3.01,
"learning_rate": 8.622068965517241e-06,
"loss": 0.1782,
"step": 2500
},
{
"epoch": 3.01,
"learning_rate": 8.553103448275863e-06,
"loss": 0.1759,
"step": 2600
},
{
"epoch": 3.02,
"learning_rate": 8.484137931034482e-06,
"loss": 0.1136,
"step": 2700
},
{
"epoch": 3.03,
"learning_rate": 8.415172413793105e-06,
"loss": 0.1568,
"step": 2800
},
{
"epoch": 3.03,
"learning_rate": 8.346206896551724e-06,
"loss": 0.0608,
"step": 2900
},
{
"epoch": 3.04,
"learning_rate": 8.277241379310346e-06,
"loss": 0.2042,
"step": 3000
},
{
"epoch": 3.04,
"eval_loss": 0.2656170427799225,
"eval_runtime": 51339.123,
"eval_samples_per_second": 1.005,
"eval_steps_per_second": 0.031,
"eval_wer": 13.358978374888741,
"step": 3000
},
{
"epoch": 3.05,
"learning_rate": 8.208275862068967e-06,
"loss": 0.1519,
"step": 3100
},
{
"epoch": 3.05,
"learning_rate": 8.139310344827586e-06,
"loss": 0.1594,
"step": 3200
},
{
"epoch": 4.01,
"learning_rate": 8.070344827586207e-06,
"loss": 0.1251,
"step": 3300
},
{
"epoch": 4.01,
"learning_rate": 8.001379310344829e-06,
"loss": 0.2179,
"step": 3400
},
{
"epoch": 4.02,
"learning_rate": 7.932413793103448e-06,
"loss": 0.1667,
"step": 3500
},
{
"epoch": 4.03,
"learning_rate": 7.86344827586207e-06,
"loss": 0.0913,
"step": 3600
},
{
"epoch": 4.03,
"learning_rate": 7.79448275862069e-06,
"loss": 0.1367,
"step": 3700
},
{
"epoch": 4.04,
"learning_rate": 7.725517241379312e-06,
"loss": 0.0334,
"step": 3800
},
{
"epoch": 4.05,
"learning_rate": 7.656551724137931e-06,
"loss": 0.1014,
"step": 3900
},
{
"epoch": 4.05,
"learning_rate": 7.587586206896552e-06,
"loss": 0.1183,
"step": 4000
},
{
"epoch": 4.05,
"eval_loss": 0.26671889424324036,
"eval_runtime": 51490.188,
"eval_samples_per_second": 1.002,
"eval_steps_per_second": 0.031,
"eval_wer": 12.697733584435728,
"step": 4000
},
{
"epoch": 5.0,
"learning_rate": 7.518620689655173e-06,
"loss": 0.0621,
"step": 4100
},
{
"epoch": 5.01,
"learning_rate": 7.449655172413793e-06,
"loss": 0.087,
"step": 4200
},
{
"epoch": 5.02,
"learning_rate": 7.380689655172414e-06,
"loss": 0.0685,
"step": 4300
},
{
"epoch": 5.02,
"learning_rate": 7.311724137931035e-06,
"loss": 0.2037,
"step": 4400
},
{
"epoch": 5.03,
"learning_rate": 7.242758620689656e-06,
"loss": 0.1319,
"step": 4500
},
{
"epoch": 5.04,
"learning_rate": 7.173793103448277e-06,
"loss": 0.12,
"step": 4600
},
{
"epoch": 5.04,
"learning_rate": 7.104827586206897e-06,
"loss": 0.0796,
"step": 4700
},
{
"epoch": 5.05,
"learning_rate": 7.0358620689655175e-06,
"loss": 0.0628,
"step": 4800
},
{
"epoch": 6.0,
"learning_rate": 6.966896551724139e-06,
"loss": 0.0421,
"step": 4900
},
{
"epoch": 6.01,
"learning_rate": 6.897931034482759e-06,
"loss": 0.0584,
"step": 5000
},
{
"epoch": 6.01,
"eval_loss": 0.26035600900650024,
"eval_runtime": 50523.6417,
"eval_samples_per_second": 1.022,
"eval_steps_per_second": 0.032,
"eval_wer": 12.099264392229069,
"step": 5000
},
{
"epoch": 6.02,
"learning_rate": 6.828965517241379e-06,
"loss": 0.0527,
"step": 5100
},
{
"epoch": 6.02,
"learning_rate": 6.760000000000001e-06,
"loss": 0.0596,
"step": 5200
},
{
"epoch": 6.03,
"learning_rate": 6.691034482758622e-06,
"loss": 0.0212,
"step": 5300
},
{
"epoch": 6.04,
"learning_rate": 6.622068965517242e-06,
"loss": 0.0502,
"step": 5400
},
{
"epoch": 6.04,
"learning_rate": 6.553103448275862e-06,
"loss": 0.0626,
"step": 5500
},
{
"epoch": 6.05,
"learning_rate": 6.4841379310344835e-06,
"loss": 0.0531,
"step": 5600
},
{
"epoch": 7.0,
"learning_rate": 6.415172413793104e-06,
"loss": 0.0442,
"step": 5700
},
{
"epoch": 7.01,
"learning_rate": 6.346206896551724e-06,
"loss": 0.0849,
"step": 5800
},
{
"epoch": 7.02,
"learning_rate": 6.2772413793103445e-06,
"loss": 0.0737,
"step": 5900
},
{
"epoch": 7.02,
"learning_rate": 6.2082758620689665e-06,
"loss": 0.0126,
"step": 6000
},
{
"epoch": 7.02,
"eval_loss": 0.27762678265571594,
"eval_runtime": 52091.6281,
"eval_samples_per_second": 0.991,
"eval_steps_per_second": 0.031,
"eval_wer": 12.14765126308833,
"step": 6000
},
{
"epoch": 7.03,
"learning_rate": 6.139310344827587e-06,
"loss": 0.1003,
"step": 6100
},
{
"epoch": 7.04,
"learning_rate": 6.070344827586207e-06,
"loss": 0.0976,
"step": 6200
},
{
"epoch": 7.04,
"learning_rate": 6.001379310344828e-06,
"loss": 0.0666,
"step": 6300
},
{
"epoch": 7.05,
"learning_rate": 5.933103448275862e-06,
"loss": 0.0571,
"step": 6400
},
{
"epoch": 8.0,
"learning_rate": 5.8641379310344835e-06,
"loss": 0.103,
"step": 6500
},
{
"epoch": 8.01,
"learning_rate": 5.795172413793104e-06,
"loss": 0.103,
"step": 6600
},
{
"epoch": 8.02,
"learning_rate": 5.726206896551725e-06,
"loss": 0.0163,
"step": 6700
},
{
"epoch": 8.02,
"learning_rate": 5.657241379310345e-06,
"loss": 0.0209,
"step": 6800
},
{
"epoch": 8.03,
"learning_rate": 5.588275862068966e-06,
"loss": 0.0216,
"step": 6900
},
{
"epoch": 8.04,
"learning_rate": 5.519310344827587e-06,
"loss": 0.0837,
"step": 7000
},
{
"epoch": 8.04,
"eval_loss": 0.2541463077068329,
"eval_runtime": 51084.0397,
"eval_samples_per_second": 1.01,
"eval_steps_per_second": 0.032,
"eval_wer": 11.939715052264187,
"step": 7000
},
{
"epoch": 8.04,
"learning_rate": 5.450344827586207e-06,
"loss": 0.0847,
"step": 7100
},
{
"epoch": 8.05,
"learning_rate": 5.3813793103448275e-06,
"loss": 0.0729,
"step": 7200
},
{
"epoch": 9.0,
"learning_rate": 5.3124137931034495e-06,
"loss": 0.0972,
"step": 7300
},
{
"epoch": 9.01,
"learning_rate": 5.24344827586207e-06,
"loss": 0.1214,
"step": 7400
},
{
"epoch": 9.02,
"learning_rate": 5.17448275862069e-06,
"loss": 0.0409,
"step": 7500
},
{
"epoch": 9.02,
"learning_rate": 5.1055172413793105e-06,
"loss": 0.0693,
"step": 7600
},
{
"epoch": 9.03,
"learning_rate": 5.036551724137932e-06,
"loss": 0.0252,
"step": 7700
},
{
"epoch": 9.04,
"learning_rate": 4.967586206896552e-06,
"loss": 0.0379,
"step": 7800
},
{
"epoch": 9.04,
"learning_rate": 4.898620689655173e-06,
"loss": 0.0548,
"step": 7900
},
{
"epoch": 9.05,
"learning_rate": 4.8296551724137935e-06,
"loss": 0.0229,
"step": 8000
},
{
"epoch": 9.05,
"eval_loss": 0.26634466648101807,
"eval_runtime": 50116.5426,
"eval_samples_per_second": 1.03,
"eval_steps_per_second": 0.032,
"eval_wer": 11.32214577945519,
"step": 8000
},
{
"epoch": 10.0,
"learning_rate": 4.760689655172414e-06,
"loss": 0.0136,
"step": 8100
},
{
"epoch": 10.01,
"learning_rate": 4.691724137931035e-06,
"loss": 0.0128,
"step": 8200
},
{
"epoch": 10.02,
"learning_rate": 4.622758620689655e-06,
"loss": 0.0465,
"step": 8300
},
{
"epoch": 10.02,
"learning_rate": 4.5537931034482765e-06,
"loss": 0.048,
"step": 8400
},
{
"epoch": 10.03,
"learning_rate": 4.484827586206897e-06,
"loss": 0.025,
"step": 8500
},
{
"epoch": 10.04,
"learning_rate": 4.415862068965517e-06,
"loss": 0.0319,
"step": 8600
},
{
"epoch": 10.04,
"learning_rate": 4.347586206896552e-06,
"loss": 0.0757,
"step": 8700
},
{
"epoch": 10.05,
"learning_rate": 4.278620689655173e-06,
"loss": 0.0902,
"step": 8800
},
{
"epoch": 11.0,
"learning_rate": 4.2096551724137935e-06,
"loss": 0.0532,
"step": 8900
},
{
"epoch": 11.01,
"learning_rate": 4.140689655172414e-06,
"loss": 0.042,
"step": 9000
},
{
"epoch": 11.01,
"eval_loss": 0.25486019253730774,
"eval_runtime": 50673.4695,
"eval_samples_per_second": 1.019,
"eval_steps_per_second": 0.032,
"eval_wer": 11.486279138764633,
"step": 9000
},
{
"epoch": 11.02,
"learning_rate": 4.071724137931035e-06,
"loss": 0.067,
"step": 9100
},
{
"epoch": 11.02,
"learning_rate": 4.002758620689655e-06,
"loss": 0.0894,
"step": 9200
},
{
"epoch": 11.03,
"learning_rate": 3.9337931034482765e-06,
"loss": 0.0844,
"step": 9300
},
{
"epoch": 11.04,
"learning_rate": 3.864827586206897e-06,
"loss": 0.1191,
"step": 9400
},
{
"epoch": 11.04,
"learning_rate": 3.795862068965517e-06,
"loss": 0.1158,
"step": 9500
},
{
"epoch": 11.05,
"learning_rate": 3.7268965517241383e-06,
"loss": 0.0047,
"step": 9600
},
{
"epoch": 12.0,
"learning_rate": 3.657931034482759e-06,
"loss": 0.0304,
"step": 9700
},
{
"epoch": 12.01,
"learning_rate": 3.5889655172413794e-06,
"loss": 0.0239,
"step": 9800
},
{
"epoch": 12.02,
"learning_rate": 3.52e-06,
"loss": 0.0452,
"step": 9900
},
{
"epoch": 12.02,
"learning_rate": 3.4510344827586214e-06,
"loss": 0.0075,
"step": 10000
},
{
"epoch": 12.02,
"eval_loss": 0.2775491178035736,
"eval_runtime": 50630.8251,
"eval_samples_per_second": 1.019,
"eval_steps_per_second": 0.032,
"eval_wer": 11.078046749357283,
"step": 10000
},
{
"epoch": 12.03,
"learning_rate": 3.3820689655172417e-06,
"loss": 0.0148,
"step": 10100
},
{
"epoch": 12.04,
"learning_rate": 3.3131034482758624e-06,
"loss": 0.0136,
"step": 10200
},
{
"epoch": 12.04,
"learning_rate": 3.2441379310344828e-06,
"loss": 0.0179,
"step": 10300
},
{
"epoch": 12.05,
"learning_rate": 3.175172413793104e-06,
"loss": 0.0038,
"step": 10400
},
{
"epoch": 13.0,
"learning_rate": 3.1062068965517243e-06,
"loss": 0.0088,
"step": 10500
},
{
"epoch": 13.01,
"learning_rate": 3.037241379310345e-06,
"loss": 0.0239,
"step": 10600
},
{
"epoch": 13.01,
"learning_rate": 2.9696551724137935e-06,
"loss": 0.1778,
"step": 10700
},
{
"epoch": 13.02,
"learning_rate": 2.900689655172414e-06,
"loss": 0.1326,
"step": 10800
},
{
"epoch": 13.03,
"learning_rate": 2.8317241379310346e-06,
"loss": 0.0981,
"step": 10900
},
{
"epoch": 13.03,
"learning_rate": 2.7627586206896558e-06,
"loss": 0.008,
"step": 11000
},
{
"epoch": 13.03,
"eval_loss": 0.2499028444290161,
"eval_runtime": 50219.7501,
"eval_samples_per_second": 1.028,
"eval_steps_per_second": 0.032,
"eval_wer": 10.975924985070105,
"step": 11000
},
{
"epoch": 13.04,
"learning_rate": 2.693793103448276e-06,
"loss": 0.0285,
"step": 11100
},
{
"epoch": 13.05,
"learning_rate": 2.624827586206897e-06,
"loss": 0.0103,
"step": 11200
},
{
"epoch": 14.0,
"learning_rate": 2.555862068965517e-06,
"loss": 0.0371,
"step": 11300
},
{
"epoch": 14.01,
"learning_rate": 2.486896551724138e-06,
"loss": 0.0156,
"step": 11400
},
{
"epoch": 14.01,
"learning_rate": 2.4179310344827587e-06,
"loss": 0.0435,
"step": 11500
},
{
"epoch": 14.02,
"learning_rate": 2.3489655172413794e-06,
"loss": 0.0172,
"step": 11600
},
{
"epoch": 14.03,
"learning_rate": 2.28e-06,
"loss": 0.0153,
"step": 11700
},
{
"epoch": 14.03,
"learning_rate": 2.211034482758621e-06,
"loss": 0.0447,
"step": 11800
},
{
"epoch": 14.04,
"learning_rate": 2.1420689655172417e-06,
"loss": 0.1005,
"step": 11900
},
{
"epoch": 14.05,
"learning_rate": 2.073103448275862e-06,
"loss": 0.0739,
"step": 12000
},
{
"epoch": 14.05,
"eval_loss": 0.23082366585731506,
"eval_runtime": 49871.6923,
"eval_samples_per_second": 1.035,
"eval_steps_per_second": 0.032,
"eval_wer": 10.944091517399537,
"step": 12000
},
{
"epoch": 15.0,
"learning_rate": 2.004137931034483e-06,
"loss": 0.0301,
"step": 12100
},
{
"epoch": 15.01,
"learning_rate": 1.9351724137931035e-06,
"loss": 0.0686,
"step": 12200
},
{
"epoch": 15.01,
"learning_rate": 1.8662068965517243e-06,
"loss": 0.0211,
"step": 12300
},
{
"epoch": 15.02,
"learning_rate": 1.797241379310345e-06,
"loss": 0.0106,
"step": 12400
},
{
"epoch": 15.03,
"learning_rate": 1.7282758620689658e-06,
"loss": 0.0105,
"step": 12500
},
{
"epoch": 15.03,
"learning_rate": 1.6593103448275863e-06,
"loss": 0.0118,
"step": 12600
},
{
"epoch": 15.04,
"learning_rate": 1.590344827586207e-06,
"loss": 0.0372,
"step": 12700
},
{
"epoch": 15.05,
"learning_rate": 1.5213793103448276e-06,
"loss": 0.0414,
"step": 12800
},
{
"epoch": 16.0,
"learning_rate": 1.4524137931034486e-06,
"loss": 0.0686,
"step": 12900
},
{
"epoch": 16.01,
"learning_rate": 1.3834482758620691e-06,
"loss": 0.0379,
"step": 13000
},
{
"epoch": 16.01,
"eval_loss": 0.2422882616519928,
"eval_runtime": 93043.0894,
"eval_samples_per_second": 0.555,
"eval_steps_per_second": 0.017,
"eval_wer": 10.792564211287639,
"step": 13000
},
{
"epoch": 16.01,
"learning_rate": 1.3144827586206899e-06,
"loss": 0.0205,
"step": 13100
},
{
"epoch": 16.02,
"learning_rate": 1.2455172413793104e-06,
"loss": 0.0128,
"step": 13200
},
{
"epoch": 16.03,
"learning_rate": 1.1765517241379312e-06,
"loss": 0.0147,
"step": 13300
},
{
"epoch": 16.03,
"learning_rate": 1.1075862068965517e-06,
"loss": 0.0142,
"step": 13400
},
{
"epoch": 16.04,
"learning_rate": 1.0386206896551724e-06,
"loss": 0.0075,
"step": 13500
},
{
"epoch": 16.05,
"learning_rate": 9.696551724137932e-07,
"loss": 0.0038,
"step": 13600
},
{
"epoch": 16.05,
"learning_rate": 9.006896551724138e-07,
"loss": 0.0042,
"step": 13700
},
{
"epoch": 17.01,
"learning_rate": 8.317241379310345e-07,
"loss": 0.015,
"step": 13800
},
{
"epoch": 17.01,
"learning_rate": 7.627586206896552e-07,
"loss": 0.0243,
"step": 13900
},
{
"epoch": 17.02,
"learning_rate": 6.937931034482759e-07,
"loss": 0.02,
"step": 14000
},
{
"epoch": 17.02,
"eval_loss": 0.2628898024559021,
"eval_runtime": 49937.0425,
"eval_samples_per_second": 1.034,
"eval_steps_per_second": 0.032,
"eval_wer": 10.769898782306194,
"step": 14000
},
{
"epoch": 17.03,
"learning_rate": 6.248275862068965e-07,
"loss": 0.0228,
"step": 14100
},
{
"epoch": 17.03,
"learning_rate": 5.558620689655173e-07,
"loss": 0.0489,
"step": 14200
},
{
"epoch": 17.04,
"learning_rate": 4.868965517241379e-07,
"loss": 0.0036,
"step": 14300
},
{
"epoch": 17.05,
"learning_rate": 4.179310344827586e-07,
"loss": 0.0083,
"step": 14400
},
{
"epoch": 17.05,
"learning_rate": 3.489655172413793e-07,
"loss": 0.0041,
"step": 14500
},
{
"epoch": 18.01,
"learning_rate": 2.8e-07,
"loss": 0.0051,
"step": 14600
},
{
"epoch": 18.01,
"learning_rate": 2.1103448275862072e-07,
"loss": 0.0068,
"step": 14700
},
{
"epoch": 18.02,
"learning_rate": 1.420689655172414e-07,
"loss": 0.0119,
"step": 14800
},
{
"epoch": 18.03,
"learning_rate": 7.310344827586207e-08,
"loss": 0.013,
"step": 14900
},
{
"epoch": 18.03,
"learning_rate": 4.137931034482759e-09,
"loss": 0.0111,
"step": 15000
},
{
"epoch": 18.03,
"eval_loss": 0.2638553977012634,
"eval_runtime": 49994.1917,
"eval_samples_per_second": 1.032,
"eval_steps_per_second": 0.032,
"eval_wer": 10.727751271110364,
"step": 15000
},
{
"epoch": 18.03,
"step": 15000,
"total_flos": 9.790458756857856e+20,
"train_loss": 0.006645547614494959,
"train_runtime": 316852.8637,
"train_samples_per_second": 3.03,
"train_steps_per_second": 0.047
}
],
"max_steps": 15000,
"num_train_epochs": 9223372036854775807,
"total_flos": 9.790458756857856e+20,
"trial_name": null,
"trial_params": null
}