|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"global_step": 10440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 1.0, |
|
"eval_loss": 2.2818498611450195, |
|
"eval_runtime": 85.7153, |
|
"eval_samples_per_second": 22.925, |
|
"eval_steps_per_second": 2.87, |
|
"eval_wer": 1.0, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6692, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.029949254143222897, |
|
"eval_loss": 0.2044876664876938, |
|
"eval_runtime": 85.7259, |
|
"eval_samples_per_second": 22.922, |
|
"eval_steps_per_second": 2.87, |
|
"eval_wer": 0.052662850639351944, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2225, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.018884051083140417, |
|
"eval_loss": 0.11616221815347672, |
|
"eval_runtime": 85.4863, |
|
"eval_samples_per_second": 22.986, |
|
"eval_steps_per_second": 2.878, |
|
"eval_wer": 0.031858806989355296, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.014687169423704908, |
|
"eval_loss": 0.09268919378519058, |
|
"eval_runtime": 85.2634, |
|
"eval_samples_per_second": 23.046, |
|
"eval_steps_per_second": 2.885, |
|
"eval_wer": 0.023540536921737965, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 9.470338983050848e-05, |
|
"loss": 0.0868, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.014284728716635749, |
|
"eval_loss": 0.07971413433551788, |
|
"eval_runtime": 85.4679, |
|
"eval_samples_per_second": 22.991, |
|
"eval_steps_per_second": 2.878, |
|
"eval_wer": 0.021841735288210484, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 8.940677966101694e-05, |
|
"loss": 0.0598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_cer": 0.012847440477103041, |
|
"eval_loss": 0.07152710855007172, |
|
"eval_runtime": 85.5884, |
|
"eval_samples_per_second": 22.959, |
|
"eval_steps_per_second": 2.874, |
|
"eval_wer": 0.01967429872129611, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_cer": 0.010252656108666656, |
|
"eval_loss": 0.06518065184354782, |
|
"eval_runtime": 85.121, |
|
"eval_samples_per_second": 23.085, |
|
"eval_steps_per_second": 2.89, |
|
"eval_wer": 0.015975430139921, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 8.411016949152542e-05, |
|
"loss": 0.0447, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.00946693853772211, |
|
"eval_loss": 0.057057663798332214, |
|
"eval_runtime": 84.3173, |
|
"eval_samples_per_second": 23.305, |
|
"eval_steps_per_second": 2.918, |
|
"eval_wer": 0.015230635335073977, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 7.88135593220339e-05, |
|
"loss": 0.0368, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_cer": 0.011180186119245098, |
|
"eval_loss": 0.060811206698417664, |
|
"eval_runtime": 84.3248, |
|
"eval_samples_per_second": 23.303, |
|
"eval_steps_per_second": 2.917, |
|
"eval_wer": 0.01630180089710116, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_cer": 0.008297944102902173, |
|
"eval_loss": 0.058583296835422516, |
|
"eval_runtime": 84.6721, |
|
"eval_samples_per_second": 23.207, |
|
"eval_steps_per_second": 2.905, |
|
"eval_wer": 0.013657360915846555, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 7.351694915254238e-05, |
|
"loss": 0.0303, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_cer": 0.008535575758504913, |
|
"eval_loss": 0.06412886828184128, |
|
"eval_runtime": 87.3267, |
|
"eval_samples_per_second": 22.502, |
|
"eval_steps_per_second": 2.817, |
|
"eval_wer": 0.014125995849233446, |
|
"step": 3828 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 6.822033898305085e-05, |
|
"loss": 0.0273, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.007933831082220552, |
|
"eval_loss": 0.06564020365476608, |
|
"eval_runtime": 84.2667, |
|
"eval_samples_per_second": 23.319, |
|
"eval_steps_per_second": 2.919, |
|
"eval_wer": 0.013071567249112941, |
|
"step": 4176 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 6.29343220338983e-05, |
|
"loss": 0.0232, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_cer": 0.008225121498765848, |
|
"eval_loss": 0.06898853182792664, |
|
"eval_runtime": 84.1359, |
|
"eval_samples_per_second": 23.355, |
|
"eval_steps_per_second": 2.924, |
|
"eval_wer": 0.0132640423110397, |
|
"step": 4524 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_cer": 0.00787250678400049, |
|
"eval_loss": 0.05983823910355568, |
|
"eval_runtime": 84.1873, |
|
"eval_samples_per_second": 23.341, |
|
"eval_steps_per_second": 2.922, |
|
"eval_wer": 0.012803775858606146, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 5.763771186440679e-05, |
|
"loss": 0.0189, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_cer": 0.007420240084627531, |
|
"eval_loss": 0.06711488217115402, |
|
"eval_runtime": 84.8039, |
|
"eval_samples_per_second": 23.171, |
|
"eval_steps_per_second": 2.901, |
|
"eval_wer": 0.012100823458525808, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 5.2341101694915265e-05, |
|
"loss": 0.017, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.006906649087034511, |
|
"eval_loss": 0.06541039049625397, |
|
"eval_runtime": 84.2563, |
|
"eval_samples_per_second": 23.322, |
|
"eval_steps_per_second": 2.92, |
|
"eval_wer": 0.011364397134632121, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_cer": 0.007335919174574946, |
|
"eval_loss": 0.07511687278747559, |
|
"eval_runtime": 84.0992, |
|
"eval_samples_per_second": 23.365, |
|
"eval_steps_per_second": 2.925, |
|
"eval_wer": 0.011807926625159, |
|
"step": 5916 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 4.705508474576271e-05, |
|
"loss": 0.0146, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_cer": 0.006753338341484355, |
|
"eval_loss": 0.06527850776910782, |
|
"eval_runtime": 83.7988, |
|
"eval_samples_per_second": 23.449, |
|
"eval_steps_per_second": 2.936, |
|
"eval_wer": 0.011171922072705363, |
|
"step": 6264 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 4.175847457627119e-05, |
|
"loss": 0.0127, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_cer": 0.006921980161589526, |
|
"eval_loss": 0.06817645579576492, |
|
"eval_runtime": 84.0515, |
|
"eval_samples_per_second": 23.379, |
|
"eval_steps_per_second": 2.927, |
|
"eval_wer": 0.01123886992033206, |
|
"step": 6612 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.006814662639704417, |
|
"eval_loss": 0.06784532964229584, |
|
"eval_runtime": 83.9653, |
|
"eval_samples_per_second": 23.403, |
|
"eval_steps_per_second": 2.93, |
|
"eval_wer": 0.01137276561558546, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 3.6461864406779664e-05, |
|
"loss": 0.0114, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_cer": 0.006584696521379184, |
|
"eval_loss": 0.06555593758821487, |
|
"eval_runtime": 83.8204, |
|
"eval_samples_per_second": 23.443, |
|
"eval_steps_per_second": 2.935, |
|
"eval_wer": 0.011113342706032, |
|
"step": 7308 |
|
}, |
|
{ |
|
"epoch": 21.55, |
|
"learning_rate": 3.117584745762712e-05, |
|
"loss": 0.0101, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_cer": 0.006596194827295445, |
|
"eval_loss": 0.06685744225978851, |
|
"eval_runtime": 84.0101, |
|
"eval_samples_per_second": 23.39, |
|
"eval_steps_per_second": 2.928, |
|
"eval_wer": 0.010920867644105242, |
|
"step": 7656 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 2.5879237288135593e-05, |
|
"loss": 0.0092, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_cer": 0.006477378999494075, |
|
"eval_loss": 0.06765928864479065, |
|
"eval_runtime": 84.2885, |
|
"eval_samples_per_second": 23.313, |
|
"eval_steps_per_second": 2.919, |
|
"eval_wer": 0.010778603467898508, |
|
"step": 8004 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.006331733791221427, |
|
"eval_loss": 0.0652570053935051, |
|
"eval_runtime": 84.1568, |
|
"eval_samples_per_second": 23.349, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 0.010402021824998326, |
|
"step": 8352 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 2.058262711864407e-05, |
|
"loss": 0.0088, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_cer": 0.006266576724362611, |
|
"eval_loss": 0.0673212930560112, |
|
"eval_runtime": 83.9435, |
|
"eval_samples_per_second": 23.409, |
|
"eval_steps_per_second": 2.931, |
|
"eval_wer": 0.01020117828211823, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 25.86, |
|
"learning_rate": 1.5286016949152543e-05, |
|
"loss": 0.0074, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_cer": 0.006350897634415196, |
|
"eval_loss": 0.06691750884056091, |
|
"eval_runtime": 84.013, |
|
"eval_samples_per_second": 23.389, |
|
"eval_steps_per_second": 2.928, |
|
"eval_wer": 0.0104857066345317, |
|
"step": 9048 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_cer": 0.006113265978812455, |
|
"eval_loss": 0.0707407295703888, |
|
"eval_runtime": 84.4435, |
|
"eval_samples_per_second": 23.27, |
|
"eval_steps_per_second": 2.913, |
|
"eval_wer": 0.01013423043449153, |
|
"step": 9396 |
|
}, |
|
{ |
|
"epoch": 27.3, |
|
"learning_rate": 9.989406779661017e-06, |
|
"loss": 0.0066, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.0059829518450948225, |
|
"eval_loss": 0.06726762652397156, |
|
"eval_runtime": 84.279, |
|
"eval_samples_per_second": 23.315, |
|
"eval_steps_per_second": 2.919, |
|
"eval_wer": 0.009966860815424784, |
|
"step": 9744 |
|
}, |
|
{ |
|
"epoch": 28.74, |
|
"learning_rate": 4.692796610169492e-06, |
|
"loss": 0.0058, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_cer": 0.005867968785932206, |
|
"eval_loss": 0.06885003298521042, |
|
"eval_runtime": 84.0405, |
|
"eval_samples_per_second": 23.382, |
|
"eval_steps_per_second": 2.927, |
|
"eval_wer": 0.010000334739238134, |
|
"step": 10092 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_cer": 0.0058143100249896515, |
|
"eval_loss": 0.06832413375377655, |
|
"eval_runtime": 84.1565, |
|
"eval_samples_per_second": 23.349, |
|
"eval_steps_per_second": 2.923, |
|
"eval_wer": 0.009874807524938073, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 10440, |
|
"total_flos": 2.004174730615405e+19, |
|
"train_loss": 0.15939651108792915, |
|
"train_runtime": 19958.548, |
|
"train_samples_per_second": 16.737, |
|
"train_steps_per_second": 0.523 |
|
} |
|
], |
|
"max_steps": 10440, |
|
"num_train_epochs": 30, |
|
"total_flos": 2.004174730615405e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|