|
{ |
|
"best_metric": 0.38558459281921387, |
|
"best_model_checkpoint": "./Hubert-common_voice_JSUT-ja-demo-roma/checkpoint-10300", |
|
"epoch": 20.0, |
|
"eval_steps": 100, |
|
"global_step": 10340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19342359767891681, |
|
"eval_cer": 3.3906697851852106, |
|
"eval_loss": 16.271892547607422, |
|
"eval_runtime": 235.1592, |
|
"eval_samples_per_second": 23.214, |
|
"eval_steps_per_second": 2.904, |
|
"eval_wer": 3.0917750503755266, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38684719535783363, |
|
"eval_cer": 2.893664453340365, |
|
"eval_loss": 16.005735397338867, |
|
"eval_runtime": 236.1908, |
|
"eval_samples_per_second": 23.113, |
|
"eval_steps_per_second": 2.892, |
|
"eval_wer": 2.8734200403004215, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5802707930367504, |
|
"eval_cer": 1.690616021560241, |
|
"eval_loss": 15.374765396118164, |
|
"eval_runtime": 231.2146, |
|
"eval_samples_per_second": 23.61, |
|
"eval_steps_per_second": 2.954, |
|
"eval_wer": 1.9646455394760944, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7736943907156673, |
|
"eval_cer": 0.9291963879062664, |
|
"eval_loss": 13.043855667114258, |
|
"eval_runtime": 226.4398, |
|
"eval_samples_per_second": 24.108, |
|
"eval_steps_per_second": 3.016, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9671179883945842, |
|
"grad_norm": 47.46343994140625, |
|
"learning_rate": 1.1904e-06, |
|
"loss": 11.8785, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9671179883945842, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 7.6484761238098145, |
|
"eval_runtime": 226.1704, |
|
"eval_samples_per_second": 24.137, |
|
"eval_steps_per_second": 3.02, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1605415860735009, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 6.007350444793701, |
|
"eval_runtime": 228.9926, |
|
"eval_samples_per_second": 23.839, |
|
"eval_steps_per_second": 2.983, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3539651837524178, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 5.646536827087402, |
|
"eval_runtime": 323.7978, |
|
"eval_samples_per_second": 16.859, |
|
"eval_steps_per_second": 2.109, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5473887814313345, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 5.502617359161377, |
|
"eval_runtime": 258.5966, |
|
"eval_samples_per_second": 21.11, |
|
"eval_steps_per_second": 2.641, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7408123791102514, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 5.361776351928711, |
|
"eval_runtime": 256.6054, |
|
"eval_samples_per_second": 21.274, |
|
"eval_steps_per_second": 2.662, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9342359767891684, |
|
"grad_norm": 29.7659969329834, |
|
"learning_rate": 2.3904e-06, |
|
"loss": 4.9912, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9342359767891684, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 5.219295024871826, |
|
"eval_runtime": 262.3733, |
|
"eval_samples_per_second": 20.806, |
|
"eval_steps_per_second": 2.603, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 5.06879997253418, |
|
"eval_runtime": 246.2278, |
|
"eval_samples_per_second": 22.171, |
|
"eval_steps_per_second": 2.774, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.3210831721470018, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 4.91414737701416, |
|
"eval_runtime": 265.8995, |
|
"eval_samples_per_second": 20.53, |
|
"eval_steps_per_second": 2.569, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.514506769825919, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 4.753695964813232, |
|
"eval_runtime": 261.3741, |
|
"eval_samples_per_second": 20.886, |
|
"eval_steps_per_second": 2.613, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.7079303675048356, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 4.589428901672363, |
|
"eval_runtime": 262.0559, |
|
"eval_samples_per_second": 20.831, |
|
"eval_steps_per_second": 2.606, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.9013539651837523, |
|
"grad_norm": 19.784141540527344, |
|
"learning_rate": 3.5904e-06, |
|
"loss": 4.3024, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.9013539651837523, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 4.4225029945373535, |
|
"eval_runtime": 247.2412, |
|
"eval_samples_per_second": 22.08, |
|
"eval_steps_per_second": 2.762, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.094777562862669, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 4.2562456130981445, |
|
"eval_runtime": 265.0317, |
|
"eval_samples_per_second": 20.598, |
|
"eval_steps_per_second": 2.577, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.288201160541586, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 4.094423770904541, |
|
"eval_runtime": 262.7201, |
|
"eval_samples_per_second": 20.779, |
|
"eval_steps_per_second": 2.6, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.481624758220503, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.9344263076782227, |
|
"eval_runtime": 233.1037, |
|
"eval_samples_per_second": 23.419, |
|
"eval_steps_per_second": 2.93, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.6750483558994196, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.7835137844085693, |
|
"eval_runtime": 231.2092, |
|
"eval_samples_per_second": 23.611, |
|
"eval_steps_per_second": 2.954, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.8684719535783367, |
|
"grad_norm": 19.985584259033203, |
|
"learning_rate": 4.7904e-06, |
|
"loss": 3.6966, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.8684719535783367, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.641073703765869, |
|
"eval_runtime": 230.0297, |
|
"eval_samples_per_second": 23.732, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.061895551257253, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.5155856609344482, |
|
"eval_runtime": 229.2143, |
|
"eval_samples_per_second": 23.816, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.397294044494629, |
|
"eval_runtime": 229.4391, |
|
"eval_samples_per_second": 23.793, |
|
"eval_steps_per_second": 2.977, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.448742746615087, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.290855646133423, |
|
"eval_runtime": 231.2121, |
|
"eval_samples_per_second": 23.61, |
|
"eval_steps_per_second": 2.954, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.6421663442940035, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.1956613063812256, |
|
"eval_runtime": 230.2419, |
|
"eval_samples_per_second": 23.71, |
|
"eval_steps_per_second": 2.966, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.835589941972921, |
|
"grad_norm": 3.1049797534942627, |
|
"learning_rate": 5.9904e-06, |
|
"loss": 3.2011, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.835589941972921, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.115924596786499, |
|
"eval_runtime": 231.315, |
|
"eval_samples_per_second": 23.6, |
|
"eval_steps_per_second": 2.953, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.029013539651838, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.0544307231903076, |
|
"eval_runtime": 231.5463, |
|
"eval_samples_per_second": 23.576, |
|
"eval_steps_per_second": 2.95, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.222437137330754, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 3.0038533210754395, |
|
"eval_runtime": 237.992, |
|
"eval_samples_per_second": 22.938, |
|
"eval_steps_per_second": 2.87, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.415860735009671, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 2.9653820991516113, |
|
"eval_runtime": 231.2651, |
|
"eval_samples_per_second": 23.605, |
|
"eval_steps_per_second": 2.953, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.609284332688588, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 2.9386982917785645, |
|
"eval_runtime": 235.1119, |
|
"eval_samples_per_second": 23.219, |
|
"eval_steps_per_second": 2.905, |
|
"eval_wer": 1.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.802707930367505, |
|
"grad_norm": 0.9443885684013367, |
|
"learning_rate": 7.190400000000001e-06, |
|
"loss": 2.9439, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.802707930367505, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 2.9090564250946045, |
|
"eval_runtime": 234.7068, |
|
"eval_samples_per_second": 23.259, |
|
"eval_steps_per_second": 2.91, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.996131528046422, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 2.8868346214294434, |
|
"eval_runtime": 233.0298, |
|
"eval_samples_per_second": 23.426, |
|
"eval_steps_per_second": 2.931, |
|
"eval_wer": 1.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.189555125725338, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 2.8660459518432617, |
|
"eval_runtime": 237.6658, |
|
"eval_samples_per_second": 22.969, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 2.853316068649292, |
|
"eval_runtime": 238.1324, |
|
"eval_samples_per_second": 22.924, |
|
"eval_steps_per_second": 2.868, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.576402321083172, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 2.7336666584014893, |
|
"eval_runtime": 248.0434, |
|
"eval_samples_per_second": 22.008, |
|
"eval_steps_per_second": 2.754, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.769825918762089, |
|
"grad_norm": 5.135930061340332, |
|
"learning_rate": 8.3904e-06, |
|
"loss": 2.7884, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.769825918762089, |
|
"eval_cer": 0.929223783383958, |
|
"eval_loss": 2.523024559020996, |
|
"eval_runtime": 234.9913, |
|
"eval_samples_per_second": 23.231, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.963249516441006, |
|
"eval_cer": 0.9181765570048525, |
|
"eval_loss": 2.272446870803833, |
|
"eval_runtime": 233.6368, |
|
"eval_samples_per_second": 23.365, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.156673114119923, |
|
"eval_cer": 0.6315890404391495, |
|
"eval_loss": 1.9632701873779297, |
|
"eval_runtime": 237.5728, |
|
"eval_samples_per_second": 22.978, |
|
"eval_steps_per_second": 2.875, |
|
"eval_wer": 1.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.350096711798839, |
|
"eval_cer": 0.4242155476184769, |
|
"eval_loss": 1.5858280658721924, |
|
"eval_runtime": 277.2902, |
|
"eval_samples_per_second": 19.687, |
|
"eval_steps_per_second": 2.463, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.543520309477756, |
|
"eval_cer": 0.3861015892801496, |
|
"eval_loss": 1.351006031036377, |
|
"eval_runtime": 236.5551, |
|
"eval_samples_per_second": 23.077, |
|
"eval_steps_per_second": 2.887, |
|
"eval_wer": 0.9998168162667155, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.7369439071566735, |
|
"grad_norm": 3.2181670665740967, |
|
"learning_rate": 9.5904e-06, |
|
"loss": 1.7651, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.7369439071566735, |
|
"eval_cer": 0.33344405672233657, |
|
"eval_loss": 1.1917191743850708, |
|
"eval_runtime": 237.0965, |
|
"eval_samples_per_second": 23.024, |
|
"eval_steps_per_second": 2.881, |
|
"eval_wer": 0.999267265066862, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.93036750483559, |
|
"eval_cer": 0.29824771675815615, |
|
"eval_loss": 1.0715813636779785, |
|
"eval_runtime": 240.0983, |
|
"eval_samples_per_second": 22.737, |
|
"eval_steps_per_second": 2.845, |
|
"eval_wer": 0.9979849789338707, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.123791102514506, |
|
"eval_cer": 0.27819422708796343, |
|
"eval_loss": 0.9761540293693542, |
|
"eval_runtime": 238.5129, |
|
"eval_samples_per_second": 22.888, |
|
"eval_steps_per_second": 2.864, |
|
"eval_wer": 0.9976186114673017, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.317214700193423, |
|
"eval_cer": 0.2595721511271527, |
|
"eval_loss": 0.904407799243927, |
|
"eval_runtime": 234.8451, |
|
"eval_samples_per_second": 23.245, |
|
"eval_steps_per_second": 2.908, |
|
"eval_wer": 0.9965195090675948, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"eval_cer": 0.2565654974505084, |
|
"eval_loss": 0.8529276251792908, |
|
"eval_runtime": 237.5826, |
|
"eval_samples_per_second": 22.977, |
|
"eval_steps_per_second": 2.875, |
|
"eval_wer": 0.9963363253343103, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 8.704061895551257, |
|
"grad_norm": 2.711160898208618, |
|
"learning_rate": 1.0790400000000001e-05, |
|
"loss": 0.9278, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.704061895551257, |
|
"eval_cer": 0.2466140901790637, |
|
"eval_loss": 0.7957596182823181, |
|
"eval_runtime": 234.6716, |
|
"eval_samples_per_second": 23.262, |
|
"eval_steps_per_second": 2.91, |
|
"eval_wer": 0.9970690602674482, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.897485493230175, |
|
"eval_cer": 0.24351497676521047, |
|
"eval_loss": 0.7534636855125427, |
|
"eval_runtime": 235.3455, |
|
"eval_samples_per_second": 23.196, |
|
"eval_steps_per_second": 2.902, |
|
"eval_wer": 0.9965195090675948, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"eval_cer": 0.24028573483232255, |
|
"eval_loss": 0.7190229296684265, |
|
"eval_runtime": 235.8068, |
|
"eval_samples_per_second": 23.15, |
|
"eval_steps_per_second": 2.896, |
|
"eval_wer": 0.9974354277340172, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 9.284332688588007, |
|
"eval_cer": 0.2355908348429383, |
|
"eval_loss": 0.6800413727760315, |
|
"eval_runtime": 236.222, |
|
"eval_samples_per_second": 23.11, |
|
"eval_steps_per_second": 2.891, |
|
"eval_wer": 0.9974354277340172, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 9.477756286266924, |
|
"eval_cer": 0.23300196220108965, |
|
"eval_loss": 0.6568382978439331, |
|
"eval_runtime": 236.9709, |
|
"eval_samples_per_second": 23.037, |
|
"eval_steps_per_second": 2.882, |
|
"eval_wer": 0.9963363253343103, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 9.671179883945841, |
|
"grad_norm": 5.1745381355285645, |
|
"learning_rate": 1.19904e-05, |
|
"loss": 0.6673, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.671179883945841, |
|
"eval_cer": 0.23294032237628373, |
|
"eval_loss": 0.6317699551582336, |
|
"eval_runtime": 235.798, |
|
"eval_samples_per_second": 23.151, |
|
"eval_steps_per_second": 2.897, |
|
"eval_wer": 0.9959699578677413, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.864603481624759, |
|
"eval_cer": 0.22926932836562006, |
|
"eval_loss": 0.613182783126831, |
|
"eval_runtime": 236.7731, |
|
"eval_samples_per_second": 23.056, |
|
"eval_steps_per_second": 2.885, |
|
"eval_wer": 0.9972522440007328, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 10.058027079303676, |
|
"eval_cer": 0.2261496683434982, |
|
"eval_loss": 0.5896427631378174, |
|
"eval_runtime": 236.3817, |
|
"eval_samples_per_second": 23.094, |
|
"eval_steps_per_second": 2.889, |
|
"eval_wer": 0.9970690602674482, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 10.251450676982591, |
|
"eval_cer": 0.22313274136271954, |
|
"eval_loss": 0.5742546916007996, |
|
"eval_runtime": 234.8696, |
|
"eval_samples_per_second": 23.243, |
|
"eval_steps_per_second": 2.908, |
|
"eval_wer": 0.9961531416010259, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 10.444874274661508, |
|
"eval_cer": 0.2214684660929597, |
|
"eval_loss": 0.5562453866004944, |
|
"eval_runtime": 234.6327, |
|
"eval_samples_per_second": 23.266, |
|
"eval_steps_per_second": 2.911, |
|
"eval_wer": 0.9959699578677413, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 3.1148133277893066, |
|
"learning_rate": 1.31904e-05, |
|
"loss": 0.5392, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_cer": 0.2236943486553957, |
|
"eval_loss": 0.5472539067268372, |
|
"eval_runtime": 233.6555, |
|
"eval_samples_per_second": 23.363, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 0.9972522440007328, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.831721470019342, |
|
"eval_cer": 0.21852002780640986, |
|
"eval_loss": 0.5307034850120544, |
|
"eval_runtime": 233.484, |
|
"eval_samples_per_second": 23.381, |
|
"eval_steps_per_second": 2.925, |
|
"eval_wer": 0.9963363253343103, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.02514506769826, |
|
"eval_cer": 0.21733859783096304, |
|
"eval_loss": 0.5194967985153198, |
|
"eval_runtime": 235.0242, |
|
"eval_samples_per_second": 23.227, |
|
"eval_steps_per_second": 2.906, |
|
"eval_wer": 0.9976186114673017, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 11.218568665377177, |
|
"eval_cer": 0.21635578506877978, |
|
"eval_loss": 0.5090118050575256, |
|
"eval_runtime": 234.9867, |
|
"eval_samples_per_second": 23.231, |
|
"eval_steps_per_second": 2.907, |
|
"eval_wer": 0.9978017952005862, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 11.411992263056092, |
|
"eval_cer": 0.21347583547645874, |
|
"eval_loss": 0.4978716969490051, |
|
"eval_runtime": 235.5271, |
|
"eval_samples_per_second": 23.178, |
|
"eval_steps_per_second": 2.9, |
|
"eval_wer": 0.9974354277340172, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 11.60541586073501, |
|
"grad_norm": 2.333935499191284, |
|
"learning_rate": 1.43904e-05, |
|
"loss": 0.4572, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.60541586073501, |
|
"eval_cer": 0.21267451775398177, |
|
"eval_loss": 0.49008145928382874, |
|
"eval_runtime": 235.4425, |
|
"eval_samples_per_second": 23.186, |
|
"eval_steps_per_second": 2.901, |
|
"eval_wer": 0.9974354277340172, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.798839458413926, |
|
"eval_cer": 0.21371212147154808, |
|
"eval_loss": 0.487173467874527, |
|
"eval_runtime": 235.8902, |
|
"eval_samples_per_second": 23.142, |
|
"eval_steps_per_second": 2.895, |
|
"eval_wer": 0.999267265066862, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 11.992263056092844, |
|
"eval_cer": 0.211890322205062, |
|
"eval_loss": 0.4753509759902954, |
|
"eval_runtime": 236.7331, |
|
"eval_samples_per_second": 23.06, |
|
"eval_steps_per_second": 2.885, |
|
"eval_wer": 0.9972522440007328, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 12.18568665377176, |
|
"eval_cer": 0.21195538646457937, |
|
"eval_loss": 0.47239425778388977, |
|
"eval_runtime": 236.344, |
|
"eval_samples_per_second": 23.098, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 0.9968858765341637, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 12.379110251450676, |
|
"eval_cer": 0.20878093548707446, |
|
"eval_loss": 0.4649977684020996, |
|
"eval_runtime": 242.8375, |
|
"eval_samples_per_second": 22.48, |
|
"eval_steps_per_second": 2.813, |
|
"eval_wer": 0.9987177138670086, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 12.572533849129593, |
|
"grad_norm": 3.1772515773773193, |
|
"learning_rate": 1.5590400000000002e-05, |
|
"loss": 0.41, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 12.572533849129593, |
|
"eval_cer": 0.20764744759758783, |
|
"eval_loss": 0.4591744542121887, |
|
"eval_runtime": 237.1668, |
|
"eval_samples_per_second": 23.018, |
|
"eval_steps_per_second": 2.88, |
|
"eval_wer": 0.9976186114673017, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"eval_cer": 0.20641122666675799, |
|
"eval_loss": 0.4502531886100769, |
|
"eval_runtime": 237.1617, |
|
"eval_samples_per_second": 23.018, |
|
"eval_steps_per_second": 2.88, |
|
"eval_wer": 0.9981681626671551, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 12.959381044487428, |
|
"eval_cer": 0.20994524328896408, |
|
"eval_loss": 0.44779568910598755, |
|
"eval_runtime": 236.6236, |
|
"eval_samples_per_second": 23.07, |
|
"eval_steps_per_second": 2.886, |
|
"eval_wer": 0.9963363253343103, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 13.152804642166345, |
|
"eval_cer": 0.2061133008468627, |
|
"eval_loss": 0.4495759606361389, |
|
"eval_runtime": 237.3684, |
|
"eval_samples_per_second": 22.998, |
|
"eval_steps_per_second": 2.877, |
|
"eval_wer": 0.9981681626671551, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 13.346228239845262, |
|
"eval_cer": 0.20519897677890822, |
|
"eval_loss": 0.443766713142395, |
|
"eval_runtime": 251.4879, |
|
"eval_samples_per_second": 21.707, |
|
"eval_steps_per_second": 2.716, |
|
"eval_wer": 0.9981681626671551, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 13.539651837524177, |
|
"grad_norm": 2.66337513923645, |
|
"learning_rate": 1.6790399999999998e-05, |
|
"loss": 0.3688, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.539651837524177, |
|
"eval_cer": 0.20395933141336695, |
|
"eval_loss": 0.4365153908729553, |
|
"eval_runtime": 238.8337, |
|
"eval_samples_per_second": 22.857, |
|
"eval_steps_per_second": 2.86, |
|
"eval_wer": 0.9990840813335776, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.733075435203094, |
|
"eval_cer": 0.20464421835565494, |
|
"eval_loss": 0.4288468062877655, |
|
"eval_runtime": 234.5955, |
|
"eval_samples_per_second": 23.27, |
|
"eval_steps_per_second": 2.911, |
|
"eval_wer": 0.9979849789338707, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 13.926499032882012, |
|
"eval_cer": 0.20250394666100494, |
|
"eval_loss": 0.4299309551715851, |
|
"eval_runtime": 237.0206, |
|
"eval_samples_per_second": 23.032, |
|
"eval_steps_per_second": 2.882, |
|
"eval_wer": 0.9981681626671551, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 14.119922630560929, |
|
"eval_cer": 0.20263407518003965, |
|
"eval_loss": 0.4274175465106964, |
|
"eval_runtime": 237.1451, |
|
"eval_samples_per_second": 23.02, |
|
"eval_steps_per_second": 2.88, |
|
"eval_wer": 0.9985345301337242, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 14.313346228239846, |
|
"eval_cer": 0.20056229217961846, |
|
"eval_loss": 0.42421066761016846, |
|
"eval_runtime": 244.6511, |
|
"eval_samples_per_second": 22.313, |
|
"eval_steps_per_second": 2.792, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 14.506769825918763, |
|
"grad_norm": 1.9870613813400269, |
|
"learning_rate": 1.79904e-05, |
|
"loss": 0.3394, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.506769825918763, |
|
"eval_cer": 0.20014108671011133, |
|
"eval_loss": 0.4253482520580292, |
|
"eval_runtime": 244.8721, |
|
"eval_samples_per_second": 22.293, |
|
"eval_steps_per_second": 2.789, |
|
"eval_wer": 0.9970690602674482, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.700193423597678, |
|
"eval_cer": 0.19957263054801228, |
|
"eval_loss": 0.4177948236465454, |
|
"eval_runtime": 234.464, |
|
"eval_samples_per_second": 23.283, |
|
"eval_steps_per_second": 2.913, |
|
"eval_wer": 0.9974354277340172, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"eval_cer": 0.20044928583414093, |
|
"eval_loss": 0.41819530725479126, |
|
"eval_runtime": 240.7737, |
|
"eval_samples_per_second": 22.673, |
|
"eval_steps_per_second": 2.837, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 15.087040618955513, |
|
"eval_cer": 0.1979117797129639, |
|
"eval_loss": 0.41940802335739136, |
|
"eval_runtime": 239.6423, |
|
"eval_samples_per_second": 22.78, |
|
"eval_steps_per_second": 2.85, |
|
"eval_wer": 0.9970690602674482, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 15.28046421663443, |
|
"eval_cer": 0.19965481698108684, |
|
"eval_loss": 0.416002482175827, |
|
"eval_runtime": 233.6247, |
|
"eval_samples_per_second": 23.367, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 0.9978017952005862, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 15.473887814313347, |
|
"grad_norm": 3.6397812366485596, |
|
"learning_rate": 1.9190400000000002e-05, |
|
"loss": 0.3157, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.473887814313347, |
|
"eval_cer": 0.20096637547556837, |
|
"eval_loss": 0.40957844257354736, |
|
"eval_runtime": 233.3898, |
|
"eval_samples_per_second": 23.39, |
|
"eval_steps_per_second": 2.926, |
|
"eval_wer": 0.9974354277340172, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.667311411992262, |
|
"eval_cer": 0.19801108831959563, |
|
"eval_loss": 0.40878182649612427, |
|
"eval_runtime": 233.2723, |
|
"eval_samples_per_second": 23.402, |
|
"eval_steps_per_second": 2.928, |
|
"eval_wer": 0.9978017952005862, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 15.86073500967118, |
|
"eval_cer": 0.19737414346326781, |
|
"eval_loss": 0.4118936359882355, |
|
"eval_runtime": 235.4949, |
|
"eval_samples_per_second": 23.181, |
|
"eval_steps_per_second": 2.9, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 16.054158607350097, |
|
"eval_cer": 0.1965180347854078, |
|
"eval_loss": 0.40991291403770447, |
|
"eval_runtime": 243.05, |
|
"eval_samples_per_second": 22.46, |
|
"eval_steps_per_second": 2.81, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 16.247582205029012, |
|
"eval_cer": 0.19766864484845165, |
|
"eval_loss": 0.4085560739040375, |
|
"eval_runtime": 232.1832, |
|
"eval_samples_per_second": 23.512, |
|
"eval_steps_per_second": 2.942, |
|
"eval_wer": 0.9985345301337242, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 16.44100580270793, |
|
"grad_norm": 2.502861976623535, |
|
"learning_rate": 2.03904e-05, |
|
"loss": 0.2917, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.44100580270793, |
|
"eval_cer": 0.19676116964992005, |
|
"eval_loss": 0.40965768694877625, |
|
"eval_runtime": 233.1901, |
|
"eval_samples_per_second": 23.41, |
|
"eval_steps_per_second": 2.929, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.634429400386846, |
|
"eval_cer": 0.19489485273218524, |
|
"eval_loss": 0.41127797961235046, |
|
"eval_runtime": 235.9189, |
|
"eval_samples_per_second": 23.139, |
|
"eval_steps_per_second": 2.895, |
|
"eval_wer": 0.9979849789338707, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 16.827852998065765, |
|
"eval_cer": 0.19560713515216477, |
|
"eval_loss": 0.40175729990005493, |
|
"eval_runtime": 236.2968, |
|
"eval_samples_per_second": 23.102, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 17.02127659574468, |
|
"eval_cer": 0.19336413041617154, |
|
"eval_loss": 0.4042558968067169, |
|
"eval_runtime": 232.2491, |
|
"eval_samples_per_second": 23.505, |
|
"eval_steps_per_second": 2.941, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 17.214700193423596, |
|
"eval_cer": 0.19456953143459843, |
|
"eval_loss": 0.4046263098716736, |
|
"eval_runtime": 233.5316, |
|
"eval_samples_per_second": 23.376, |
|
"eval_steps_per_second": 2.925, |
|
"eval_wer": 0.9979849789338707, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 17.408123791102515, |
|
"grad_norm": 1.7895680665969849, |
|
"learning_rate": 2.15904e-05, |
|
"loss": 0.2785, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.408123791102515, |
|
"eval_cer": 0.19269636564744075, |
|
"eval_loss": 0.4045611321926117, |
|
"eval_runtime": 235.9629, |
|
"eval_samples_per_second": 23.135, |
|
"eval_steps_per_second": 2.895, |
|
"eval_wer": 0.9981681626671551, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.60154738878143, |
|
"eval_cer": 0.19476814864786196, |
|
"eval_loss": 0.40159401297569275, |
|
"eval_runtime": 235.3013, |
|
"eval_samples_per_second": 23.2, |
|
"eval_steps_per_second": 2.903, |
|
"eval_wer": 0.9989008976002931, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 17.79497098646035, |
|
"eval_cer": 0.19216557826716754, |
|
"eval_loss": 0.40133848786354065, |
|
"eval_runtime": 232.8231, |
|
"eval_samples_per_second": 23.447, |
|
"eval_steps_per_second": 2.934, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 17.988394584139265, |
|
"eval_cer": 0.19296004712022163, |
|
"eval_loss": 0.3879222273826599, |
|
"eval_runtime": 258.2005, |
|
"eval_samples_per_second": 21.142, |
|
"eval_steps_per_second": 2.645, |
|
"eval_wer": 0.9989008976002931, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 18.181818181818183, |
|
"eval_cer": 0.19279224981936108, |
|
"eval_loss": 0.40087568759918213, |
|
"eval_runtime": 233.7931, |
|
"eval_samples_per_second": 23.35, |
|
"eval_steps_per_second": 2.921, |
|
"eval_wer": 0.9979849789338707, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 18.3752417794971, |
|
"grad_norm": 2.837678909301758, |
|
"learning_rate": 2.27904e-05, |
|
"loss": 0.2647, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.3752417794971, |
|
"eval_cer": 0.19262787695321196, |
|
"eval_loss": 0.39038729667663574, |
|
"eval_runtime": 234.0768, |
|
"eval_samples_per_second": 23.321, |
|
"eval_steps_per_second": 2.918, |
|
"eval_wer": 0.9985345301337242, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.568665377176014, |
|
"eval_cer": 0.19593930531917444, |
|
"eval_loss": 0.3944104015827179, |
|
"eval_runtime": 237.4314, |
|
"eval_samples_per_second": 22.992, |
|
"eval_steps_per_second": 2.877, |
|
"eval_wer": 0.9983513464004397, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 18.762088974854933, |
|
"eval_cer": 0.19592218314561724, |
|
"eval_loss": 0.39569512009620667, |
|
"eval_runtime": 246.3149, |
|
"eval_samples_per_second": 22.163, |
|
"eval_steps_per_second": 2.773, |
|
"eval_wer": 0.9989008976002931, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 18.95551257253385, |
|
"eval_cer": 0.1937613648426986, |
|
"eval_loss": 0.39492446184158325, |
|
"eval_runtime": 243.7942, |
|
"eval_samples_per_second": 22.392, |
|
"eval_steps_per_second": 2.802, |
|
"eval_wer": 0.9981681626671551, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 19.148936170212767, |
|
"eval_cer": 0.1932579729401169, |
|
"eval_loss": 0.40386101603507996, |
|
"eval_runtime": 249.0686, |
|
"eval_samples_per_second": 21.918, |
|
"eval_steps_per_second": 2.742, |
|
"eval_wer": 0.9972522440007328, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 19.342359767891683, |
|
"grad_norm": 3.1278374195098877, |
|
"learning_rate": 2.39904e-05, |
|
"loss": 0.248, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.342359767891683, |
|
"eval_cer": 0.19337097928559444, |
|
"eval_loss": 0.40820688009262085, |
|
"eval_runtime": 231.3525, |
|
"eval_samples_per_second": 23.596, |
|
"eval_steps_per_second": 2.952, |
|
"eval_wer": 0.9990840813335776, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.535783365570598, |
|
"eval_cer": 0.19222379365726203, |
|
"eval_loss": 0.4074006974697113, |
|
"eval_runtime": 253.2468, |
|
"eval_samples_per_second": 21.556, |
|
"eval_steps_per_second": 2.697, |
|
"eval_wer": 0.999267265066862, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 19.729206963249517, |
|
"eval_cer": 0.19064170482057674, |
|
"eval_loss": 0.39546000957489014, |
|
"eval_runtime": 235.3118, |
|
"eval_samples_per_second": 23.199, |
|
"eval_steps_per_second": 2.903, |
|
"eval_wer": 0.9989008976002931, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 19.922630560928432, |
|
"eval_cer": 0.19093620620576057, |
|
"eval_loss": 0.38558459281921387, |
|
"eval_runtime": 246.5841, |
|
"eval_samples_per_second": 22.138, |
|
"eval_steps_per_second": 2.77, |
|
"eval_wer": 0.9979849789338707, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 10340, |
|
"total_flos": 1.4614133359859188e+19, |
|
"train_loss": 1.9748668191050192, |
|
"train_runtime": 66473.8913, |
|
"train_samples_per_second": 4.974, |
|
"train_steps_per_second": 0.156 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4614133359859188e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|