|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 2180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 3.6019930839538574, |
|
"eval_runtime": 69.4999, |
|
"eval_samples_per_second": 23.698, |
|
"eval_steps_per_second": 2.964, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 2.9971394538879395, |
|
"eval_runtime": 69.2726, |
|
"eval_samples_per_second": 23.776, |
|
"eval_steps_per_second": 2.974, |
|
"eval_wer": 0.9998978653865795, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 0.9173915386199951, |
|
"eval_runtime": 69.6556, |
|
"eval_samples_per_second": 23.645, |
|
"eval_steps_per_second": 2.957, |
|
"eval_wer": 0.7772444081299152, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 0.5667781829833984, |
|
"eval_runtime": 68.9782, |
|
"eval_samples_per_second": 23.877, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 0.6355836993156981, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0002988, |
|
"loss": 3.1619, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 0.4949137270450592, |
|
"eval_runtime": 69.6168, |
|
"eval_samples_per_second": 23.658, |
|
"eval_steps_per_second": 2.959, |
|
"eval_wer": 0.5255847206618323, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_loss": 0.451631098985672, |
|
"eval_runtime": 69.6073, |
|
"eval_samples_per_second": 23.661, |
|
"eval_steps_per_second": 2.959, |
|
"eval_wer": 0.4744152793381677, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_loss": 0.42906084656715393, |
|
"eval_runtime": 68.1794, |
|
"eval_samples_per_second": 24.157, |
|
"eval_steps_per_second": 3.021, |
|
"eval_wer": 0.45746093351036665, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 0.4329916536808014, |
|
"eval_runtime": 69.7025, |
|
"eval_samples_per_second": 23.629, |
|
"eval_steps_per_second": 2.955, |
|
"eval_wer": 0.42733122255132266, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_loss": 0.401591032743454, |
|
"eval_runtime": 69.0593, |
|
"eval_samples_per_second": 23.849, |
|
"eval_steps_per_second": 2.983, |
|
"eval_wer": 0.41446226126034114, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 0.00021107142857142858, |
|
"loss": 0.2261, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"eval_loss": 0.4213583171367645, |
|
"eval_runtime": 69.3263, |
|
"eval_samples_per_second": 23.757, |
|
"eval_steps_per_second": 2.971, |
|
"eval_wer": 0.40046981922173425, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"eval_loss": 0.4093494117259979, |
|
"eval_runtime": 69.6468, |
|
"eval_samples_per_second": 23.648, |
|
"eval_steps_per_second": 2.958, |
|
"eval_wer": 0.3946481462567664, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"eval_loss": 0.40514957904815674, |
|
"eval_runtime": 68.9496, |
|
"eval_samples_per_second": 23.887, |
|
"eval_steps_per_second": 2.988, |
|
"eval_wer": 0.3916862424675723, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"eval_loss": 0.3908359408378601, |
|
"eval_runtime": 68.6938, |
|
"eval_samples_per_second": 23.976, |
|
"eval_steps_per_second": 2.999, |
|
"eval_wer": 0.37187212746399756, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"eval_loss": 0.38503608107566833, |
|
"eval_runtime": 68.9973, |
|
"eval_samples_per_second": 23.871, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 0.36033091614748236, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 0.00012178571428571428, |
|
"loss": 0.1119, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"eval_loss": 0.3967060148715973, |
|
"eval_runtime": 69.4393, |
|
"eval_samples_per_second": 23.719, |
|
"eval_steps_per_second": 2.967, |
|
"eval_wer": 0.3645184352977224, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"eval_loss": 0.3821273744106293, |
|
"eval_runtime": 69.0903, |
|
"eval_samples_per_second": 23.838, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 0.3525686855275253, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"eval_loss": 0.39191436767578125, |
|
"eval_runtime": 69.7445, |
|
"eval_samples_per_second": 23.615, |
|
"eval_steps_per_second": 2.954, |
|
"eval_wer": 0.3518537432335819, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"eval_loss": 0.37627777457237244, |
|
"eval_runtime": 70.1892, |
|
"eval_samples_per_second": 23.465, |
|
"eval_steps_per_second": 2.935, |
|
"eval_wer": 0.3366356858339291, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"eval_loss": 0.3681693971157074, |
|
"eval_runtime": 69.0258, |
|
"eval_samples_per_second": 23.861, |
|
"eval_steps_per_second": 2.984, |
|
"eval_wer": 0.33489939740578084, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"learning_rate": 3.25e-05, |
|
"loss": 0.074, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"eval_loss": 0.37525057792663574, |
|
"eval_runtime": 69.5184, |
|
"eval_samples_per_second": 23.692, |
|
"eval_steps_per_second": 2.963, |
|
"eval_wer": 0.33234603207026864, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"eval_loss": 0.3752918243408203, |
|
"eval_runtime": 68.5179, |
|
"eval_samples_per_second": 24.038, |
|
"eval_steps_per_second": 3.007, |
|
"eval_wer": 0.3267286283321418, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2180, |
|
"total_flos": 8.620382802507182e+18, |
|
"train_loss": 0.8245810762457891, |
|
"train_runtime": 5142.237, |
|
"train_samples_per_second": 13.527, |
|
"train_steps_per_second": 0.424 |
|
} |
|
], |
|
"max_steps": 2180, |
|
"num_train_epochs": 20, |
|
"total_flos": 8.620382802507182e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|