|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.953846153846154, |
|
"global_step": 6800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 6.331, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 3.29716157913208, |
|
"eval_runtime": 392.7938, |
|
"eval_samples_per_second": 8.236, |
|
"eval_wer": 0.9908326967150497, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 0.00028573692551505547, |
|
"loss": 3.3384, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 3.2769880294799805, |
|
"eval_runtime": 403.0436, |
|
"eval_samples_per_second": 8.026, |
|
"eval_wer": 0.9908326967150497, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.00026671949286846274, |
|
"loss": 2.8767, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_loss": 0.987671434879303, |
|
"eval_runtime": 414.5553, |
|
"eval_samples_per_second": 7.804, |
|
"eval_wer": 0.9542438985163444, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00024770206022187, |
|
"loss": 0.7362, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 0.5294700860977173, |
|
"eval_runtime": 412.2788, |
|
"eval_samples_per_second": 7.847, |
|
"eval_wer": 0.7132805275220135, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.0002286846275752773, |
|
"loss": 0.4225, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"eval_loss": 0.47939780354499817, |
|
"eval_runtime": 412.9001, |
|
"eval_samples_per_second": 7.835, |
|
"eval_wer": 0.6393791966547384, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 0.0002096671949286846, |
|
"loss": 0.2997, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"eval_loss": 0.47680211067199707, |
|
"eval_runtime": 406.5095, |
|
"eval_samples_per_second": 7.958, |
|
"eval_wer": 0.5926983233484782, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 0.00019064976228209192, |
|
"loss": 0.2349, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"eval_loss": 0.46566906571388245, |
|
"eval_runtime": 410.6338, |
|
"eval_samples_per_second": 7.878, |
|
"eval_wer": 0.5762936753648827, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.00017163232963549917, |
|
"loss": 0.1947, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"eval_loss": 0.5099472999572754, |
|
"eval_runtime": 412.6367, |
|
"eval_samples_per_second": 7.84, |
|
"eval_wer": 0.5637087370833501, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 0.00015261489698890646, |
|
"loss": 0.1652, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"eval_loss": 0.4866821765899658, |
|
"eval_runtime": 413.7594, |
|
"eval_samples_per_second": 7.819, |
|
"eval_wer": 0.5551445458566202, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 0.0001335974643423138, |
|
"loss": 0.1397, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"eval_loss": 0.510926365852356, |
|
"eval_runtime": 416.443, |
|
"eval_samples_per_second": 7.768, |
|
"eval_wer": 0.5460576575127659, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"learning_rate": 0.00011458003169572107, |
|
"loss": 0.1251, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 19.38, |
|
"eval_loss": 0.5401586294174194, |
|
"eval_runtime": 410.6668, |
|
"eval_samples_per_second": 7.877, |
|
"eval_wer": 0.5362470346990471, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 9.556259904912835e-05, |
|
"loss": 0.1105, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"eval_loss": 0.5472865104675293, |
|
"eval_runtime": 415.7439, |
|
"eval_samples_per_second": 7.781, |
|
"eval_wer": 0.530416951469583, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 22.91, |
|
"learning_rate": 7.654516640253565e-05, |
|
"loss": 0.0932, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 22.91, |
|
"eval_loss": 0.5360305309295654, |
|
"eval_runtime": 420.1088, |
|
"eval_samples_per_second": 7.7, |
|
"eval_wer": 0.5289694825298541, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"learning_rate": 5.752773375594294e-05, |
|
"loss": 0.0893, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 24.67, |
|
"eval_loss": 0.5389866232872009, |
|
"eval_runtime": 429.4038, |
|
"eval_samples_per_second": 7.534, |
|
"eval_wer": 0.5169072413654457, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 26.43, |
|
"learning_rate": 3.851030110935023e-05, |
|
"loss": 0.0863, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 26.43, |
|
"eval_loss": 0.5444660186767578, |
|
"eval_runtime": 423.7501, |
|
"eval_samples_per_second": 7.634, |
|
"eval_wer": 0.5146958304853042, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"learning_rate": 1.9492868462757525e-05, |
|
"loss": 0.0778, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 28.19, |
|
"eval_loss": 0.5570007562637329, |
|
"eval_runtime": 420.4258, |
|
"eval_samples_per_second": 7.695, |
|
"eval_wer": 0.5113988179003659, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 29.95, |
|
"learning_rate": 4.754358161648177e-07, |
|
"loss": 0.0732, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 29.95, |
|
"eval_loss": 0.557680606842041, |
|
"eval_runtime": 427.3219, |
|
"eval_samples_per_second": 7.57, |
|
"eval_wer": 0.5118411000763942, |
|
"step": 6800 |
|
} |
|
], |
|
"max_steps": 6810, |
|
"num_train_epochs": 30, |
|
"total_flos": 3.375783462381318e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|