|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 27.0042194092827, |
|
"global_step": 6400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.920000000000001e-05, |
|
"loss": 9.7705, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 3.045895576477051, |
|
"eval_runtime": 105.6261, |
|
"eval_samples_per_second": 16.047, |
|
"eval_steps_per_second": 2.007, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 9.517601043024771e-05, |
|
"loss": 2.7947, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 1.6579201221466064, |
|
"eval_runtime": 102.9312, |
|
"eval_samples_per_second": 16.467, |
|
"eval_steps_per_second": 2.06, |
|
"eval_wer": 1.0785922937821673, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 8.865710560625816e-05, |
|
"loss": 1.0354, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_loss": 0.6200010776519775, |
|
"eval_runtime": 102.9566, |
|
"eval_samples_per_second": 16.463, |
|
"eval_steps_per_second": 2.059, |
|
"eval_wer": 0.7493392683266101, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 8.213820078226858e-05, |
|
"loss": 0.5782, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_loss": 0.5652060508728027, |
|
"eval_runtime": 104.7594, |
|
"eval_samples_per_second": 16.18, |
|
"eval_steps_per_second": 2.024, |
|
"eval_wer": 0.7191542634580609, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 7.561929595827901e-05, |
|
"loss": 0.4526, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"eval_loss": 0.5032265782356262, |
|
"eval_runtime": 118.2197, |
|
"eval_samples_per_second": 14.338, |
|
"eval_steps_per_second": 1.793, |
|
"eval_wer": 0.6482125469467241, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 6.910039113428944e-05, |
|
"loss": 0.3766, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"eval_loss": 0.4892590045928955, |
|
"eval_runtime": 106.8164, |
|
"eval_samples_per_second": 15.868, |
|
"eval_steps_per_second": 1.985, |
|
"eval_wer": 0.6219223814160523, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 6.258148631029988e-05, |
|
"loss": 0.3112, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"eval_loss": 0.46891558170318604, |
|
"eval_runtime": 105.9027, |
|
"eval_samples_per_second": 16.005, |
|
"eval_steps_per_second": 2.002, |
|
"eval_wer": 0.6139936013353735, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 5.606258148631031e-05, |
|
"loss": 0.2825, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"eval_loss": 0.48954522609710693, |
|
"eval_runtime": 108.9558, |
|
"eval_samples_per_second": 15.557, |
|
"eval_steps_per_second": 1.946, |
|
"eval_wer": 0.6003616636528029, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 4.954367666232073e-05, |
|
"loss": 0.253, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"eval_loss": 0.494277685880661, |
|
"eval_runtime": 107.9999, |
|
"eval_samples_per_second": 15.694, |
|
"eval_steps_per_second": 1.963, |
|
"eval_wer": 0.5914591737376548, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"learning_rate": 4.3024771838331163e-05, |
|
"loss": 0.2359, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.88, |
|
"eval_loss": 0.47655120491981506, |
|
"eval_runtime": 110.482, |
|
"eval_samples_per_second": 15.342, |
|
"eval_steps_per_second": 1.919, |
|
"eval_wer": 0.5760189177910697, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 3.650586701434159e-05, |
|
"loss": 0.2086, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"eval_loss": 0.48324596881866455, |
|
"eval_runtime": 105.3701, |
|
"eval_samples_per_second": 16.086, |
|
"eval_steps_per_second": 2.012, |
|
"eval_wer": 0.57768813465016, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"learning_rate": 2.9986962190352026e-05, |
|
"loss": 0.1987, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"eval_loss": 0.4831419885158539, |
|
"eval_runtime": 102.4502, |
|
"eval_samples_per_second": 16.545, |
|
"eval_steps_per_second": 2.069, |
|
"eval_wer": 0.5722631798581166, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 2.3468057366362453e-05, |
|
"loss": 0.1812, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_loss": 0.47938045859336853, |
|
"eval_runtime": 110.2812, |
|
"eval_samples_per_second": 15.37, |
|
"eval_steps_per_second": 1.922, |
|
"eval_wer": 0.5669773264709974, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 23.63, |
|
"learning_rate": 1.694915254237288e-05, |
|
"loss": 0.1759, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 23.63, |
|
"eval_loss": 0.4787568747997284, |
|
"eval_runtime": 111.7955, |
|
"eval_samples_per_second": 15.162, |
|
"eval_steps_per_second": 1.896, |
|
"eval_wer": 0.564473501182362, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 1.0430247718383312e-05, |
|
"loss": 0.1695, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"eval_loss": 0.471430242061615, |
|
"eval_runtime": 115.4715, |
|
"eval_samples_per_second": 14.679, |
|
"eval_steps_per_second": 1.836, |
|
"eval_wer": 0.5600222562247879, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 3.9113428943937425e-06, |
|
"loss": 0.1595, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.4793846905231476, |
|
"eval_runtime": 107.2446, |
|
"eval_samples_per_second": 15.805, |
|
"eval_steps_per_second": 1.977, |
|
"eval_wer": 0.5586312421755459, |
|
"step": 6400 |
|
} |
|
], |
|
"max_steps": 6636, |
|
"num_train_epochs": 28, |
|
"total_flos": 1.1173889195953228e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|