|
{ |
|
"best_metric": 0.21041299402713776, |
|
"best_model_checkpoint": "wav2vec2-xls-r-1b-ja-dumy8/checkpoint-42000", |
|
"epoch": 99.99831744251262, |
|
"global_step": 44500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.9427586206896555e-05, |
|
"loss": 2.2896, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_cer": 0.17672193185166804, |
|
"eval_loss": 0.47477588057518005, |
|
"eval_runtime": 210.4906, |
|
"eval_samples_per_second": 17.464, |
|
"eval_steps_per_second": 2.185, |
|
"eval_wer": 0.4013274336283186, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 4.770459770114943e-05, |
|
"loss": 1.1608, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_cer": 0.14555577958031546, |
|
"eval_loss": 0.33504194021224976, |
|
"eval_runtime": 209.7295, |
|
"eval_samples_per_second": 17.527, |
|
"eval_steps_per_second": 2.193, |
|
"eval_wer": 0.315929203539823, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 4.598045977011494e-05, |
|
"loss": 1.1042, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_cer": 0.1399956573661926, |
|
"eval_loss": 0.31194448471069336, |
|
"eval_runtime": 210.0423, |
|
"eval_samples_per_second": 17.501, |
|
"eval_steps_per_second": 2.19, |
|
"eval_wer": 0.2970656730321379, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 4.4257471264367814e-05, |
|
"loss": 1.0494, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"eval_cer": 0.1353350807264606, |
|
"eval_loss": 0.2974083721637726, |
|
"eval_runtime": 209.3107, |
|
"eval_samples_per_second": 17.562, |
|
"eval_steps_per_second": 2.198, |
|
"eval_wer": 0.2867489520260829, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 4.253448275862069e-05, |
|
"loss": 1.0061, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"eval_cer": 0.1299610713897979, |
|
"eval_loss": 0.2802002429962158, |
|
"eval_runtime": 210.2437, |
|
"eval_samples_per_second": 17.484, |
|
"eval_steps_per_second": 2.188, |
|
"eval_wer": 0.2745924545877969, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 20.22, |
|
"learning_rate": 4.0810344827586214e-05, |
|
"loss": 0.9629, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 20.22, |
|
"eval_cer": 0.13255889697101292, |
|
"eval_loss": 0.2843669652938843, |
|
"eval_runtime": 212.4442, |
|
"eval_samples_per_second": 17.303, |
|
"eval_steps_per_second": 2.165, |
|
"eval_wer": 0.2776432231020028, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"learning_rate": 3.908735632183908e-05, |
|
"loss": 0.9267, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"eval_cer": 0.12550211703398112, |
|
"eval_loss": 0.2576734721660614, |
|
"eval_runtime": 210.3131, |
|
"eval_samples_per_second": 17.479, |
|
"eval_steps_per_second": 2.187, |
|
"eval_wer": 0.2603400093153237, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"learning_rate": 3.73632183908046e-05, |
|
"loss": 0.8984, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 26.96, |
|
"eval_cer": 0.12256308451075577, |
|
"eval_loss": 0.2508338689804077, |
|
"eval_runtime": 209.2521, |
|
"eval_samples_per_second": 17.567, |
|
"eval_steps_per_second": 2.198, |
|
"eval_wer": 0.2530740568234746, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 30.34, |
|
"learning_rate": 3.564022988505747e-05, |
|
"loss": 0.8729, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 30.34, |
|
"eval_cer": 0.12540906059525103, |
|
"eval_loss": 0.26286962628364563, |
|
"eval_runtime": 208.7235, |
|
"eval_samples_per_second": 17.612, |
|
"eval_steps_per_second": 2.204, |
|
"eval_wer": 0.2605728924080112, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 33.71, |
|
"learning_rate": 3.3917241379310346e-05, |
|
"loss": 0.8546, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 33.71, |
|
"eval_cer": 0.11932161856165764, |
|
"eval_loss": 0.2401771992444992, |
|
"eval_runtime": 208.4946, |
|
"eval_samples_per_second": 17.631, |
|
"eval_steps_per_second": 2.206, |
|
"eval_wer": 0.24473684210526317, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 37.08, |
|
"learning_rate": 3.219310344827586e-05, |
|
"loss": 0.8304, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 37.08, |
|
"eval_cer": 0.12086480450393164, |
|
"eval_loss": 0.25319021940231323, |
|
"eval_runtime": 209.1002, |
|
"eval_samples_per_second": 17.58, |
|
"eval_steps_per_second": 2.2, |
|
"eval_wer": 0.2472286911970191, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 40.45, |
|
"learning_rate": 3.046896551724138e-05, |
|
"loss": 0.8075, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 40.45, |
|
"eval_cer": 0.11977139134885308, |
|
"eval_loss": 0.2439287155866623, |
|
"eval_runtime": 209.0991, |
|
"eval_samples_per_second": 17.58, |
|
"eval_steps_per_second": 2.2, |
|
"eval_wer": 0.2468793665579879, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 43.82, |
|
"learning_rate": 2.8747126436781612e-05, |
|
"loss": 0.7827, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 43.82, |
|
"eval_cer": 0.11672379298044264, |
|
"eval_loss": 0.23866486549377441, |
|
"eval_runtime": 208.9618, |
|
"eval_samples_per_second": 17.592, |
|
"eval_steps_per_second": 2.201, |
|
"eval_wer": 0.23721471821145784, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 47.19, |
|
"learning_rate": 2.7022988505747126e-05, |
|
"loss": 0.7627, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 47.19, |
|
"eval_cer": 0.11469981543806318, |
|
"eval_loss": 0.23443765938282013, |
|
"eval_runtime": 209.1358, |
|
"eval_samples_per_second": 17.577, |
|
"eval_steps_per_second": 2.2, |
|
"eval_wer": 0.23306939916162087, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 50.56, |
|
"learning_rate": 2.5300000000000002e-05, |
|
"loss": 0.7402, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 50.56, |
|
"eval_cer": 0.11345906292166198, |
|
"eval_loss": 0.23135580122470856, |
|
"eval_runtime": 210.5917, |
|
"eval_samples_per_second": 17.456, |
|
"eval_steps_per_second": 2.184, |
|
"eval_wer": 0.22994876571960876, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 53.93, |
|
"learning_rate": 2.357586206896552e-05, |
|
"loss": 0.718, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 53.93, |
|
"eval_cer": 0.1114040665663725, |
|
"eval_loss": 0.22568760812282562, |
|
"eval_runtime": 208.402, |
|
"eval_samples_per_second": 17.639, |
|
"eval_steps_per_second": 2.207, |
|
"eval_wer": 0.22666511411271542, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 57.3, |
|
"learning_rate": 2.1851724137931033e-05, |
|
"loss": 0.7016, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 57.3, |
|
"eval_cer": 0.10889929742388758, |
|
"eval_loss": 0.22044427692890167, |
|
"eval_runtime": 208.5673, |
|
"eval_samples_per_second": 17.625, |
|
"eval_steps_per_second": 2.206, |
|
"eval_wer": 0.2183977643223102, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 60.67, |
|
"learning_rate": 2.012873563218391e-05, |
|
"loss": 0.6804, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 60.67, |
|
"eval_cer": 0.10851931696573971, |
|
"eval_loss": 0.22273367643356323, |
|
"eval_runtime": 208.2294, |
|
"eval_samples_per_second": 17.654, |
|
"eval_steps_per_second": 2.209, |
|
"eval_wer": 0.21811830461108522, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 64.04, |
|
"learning_rate": 1.8405747126436782e-05, |
|
"loss": 0.6625, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 64.04, |
|
"eval_cer": 0.10578966142965708, |
|
"eval_loss": 0.21378228068351746, |
|
"eval_runtime": 209.3597, |
|
"eval_samples_per_second": 17.558, |
|
"eval_steps_per_second": 2.197, |
|
"eval_wer": 0.21115510013972985, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 67.42, |
|
"learning_rate": 1.66816091954023e-05, |
|
"loss": 0.6465, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 67.42, |
|
"eval_cer": 0.10442483366161577, |
|
"eval_loss": 0.2140830010175705, |
|
"eval_runtime": 208.6345, |
|
"eval_samples_per_second": 17.619, |
|
"eval_steps_per_second": 2.205, |
|
"eval_wer": 0.20808104331625524, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 70.79, |
|
"learning_rate": 1.4958620689655173e-05, |
|
"loss": 0.6238, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 70.79, |
|
"eval_cer": 0.1049754175907688, |
|
"eval_loss": 0.21720194816589355, |
|
"eval_runtime": 210.4781, |
|
"eval_samples_per_second": 17.465, |
|
"eval_steps_per_second": 2.186, |
|
"eval_wer": 0.2082207731718677, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 74.16, |
|
"learning_rate": 1.323448275862069e-05, |
|
"loss": 0.6062, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 74.16, |
|
"eval_cer": 0.10427749430029312, |
|
"eval_loss": 0.2174130082130432, |
|
"eval_runtime": 209.3158, |
|
"eval_samples_per_second": 17.562, |
|
"eval_steps_per_second": 2.198, |
|
"eval_wer": 0.20582207731718677, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 77.53, |
|
"learning_rate": 1.1510344827586207e-05, |
|
"loss": 0.588, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 77.53, |
|
"eval_cer": 0.1026955348418816, |
|
"eval_loss": 0.21560049057006836, |
|
"eval_runtime": 208.8439, |
|
"eval_samples_per_second": 17.602, |
|
"eval_steps_per_second": 2.203, |
|
"eval_wer": 0.20344666977177456, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 80.9, |
|
"learning_rate": 9.786206896551724e-06, |
|
"loss": 0.5722, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 80.9, |
|
"eval_cer": 0.10293593064193433, |
|
"eval_loss": 0.21624404191970825, |
|
"eval_runtime": 208.6816, |
|
"eval_samples_per_second": 17.615, |
|
"eval_steps_per_second": 2.204, |
|
"eval_wer": 0.2032137866790871, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 84.27, |
|
"learning_rate": 8.064367816091953e-06, |
|
"loss": 0.5585, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 84.27, |
|
"eval_cer": 0.10209066799013601, |
|
"eval_loss": 0.21560421586036682, |
|
"eval_runtime": 211.1243, |
|
"eval_samples_per_second": 17.412, |
|
"eval_steps_per_second": 2.179, |
|
"eval_wer": 0.20216581276199347, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 87.64, |
|
"learning_rate": 6.341379310344828e-06, |
|
"loss": 0.5456, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 87.64, |
|
"eval_cer": 0.10090419839632737, |
|
"eval_loss": 0.21256230771541595, |
|
"eval_runtime": 208.9966, |
|
"eval_samples_per_second": 17.589, |
|
"eval_steps_per_second": 2.201, |
|
"eval_wer": 0.19930135072193758, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 91.01, |
|
"learning_rate": 4.618390804597701e-06, |
|
"loss": 0.5325, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 91.01, |
|
"eval_cer": 0.10033810506071933, |
|
"eval_loss": 0.2121460884809494, |
|
"eval_runtime": 208.4119, |
|
"eval_samples_per_second": 17.638, |
|
"eval_steps_per_second": 2.207, |
|
"eval_wer": 0.19659990684676293, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 94.38, |
|
"learning_rate": 2.8942528735632185e-06, |
|
"loss": 0.5229, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 94.38, |
|
"eval_cer": 0.09914388076368318, |
|
"eval_loss": 0.21041299402713776, |
|
"eval_runtime": 208.604, |
|
"eval_samples_per_second": 17.622, |
|
"eval_steps_per_second": 2.205, |
|
"eval_wer": 0.19410805775500697, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 97.75, |
|
"learning_rate": 1.1701149425287358e-06, |
|
"loss": 0.5134, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 97.75, |
|
"eval_cer": 0.0991593901701382, |
|
"eval_loss": 0.21077032387256622, |
|
"eval_runtime": 208.8814, |
|
"eval_samples_per_second": 17.599, |
|
"eval_steps_per_second": 2.202, |
|
"eval_wer": 0.19482999534233814, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 44500, |
|
"total_flos": 1.4850172866055105e+21, |
|
"train_loss": 0.804292679347349, |
|
"train_runtime": 300809.9995, |
|
"train_samples_per_second": 9.48, |
|
"train_steps_per_second": 0.148 |
|
} |
|
], |
|
"max_steps": 44500, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.4850172866055105e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|