|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 2220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.6e-06, |
|
"loss": 38.5727, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.35e-06, |
|
"loss": 24.0751, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.1099999999999999e-05, |
|
"loss": 15.8855, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.485e-05, |
|
"loss": 12.8886, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.8599999999999998e-05, |
|
"loss": 9.9218, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.2349999999999998e-05, |
|
"loss": 7.1493, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.6099999999999997e-05, |
|
"loss": 5.3703, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.985e-05, |
|
"loss": 4.6384, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.36e-05, |
|
"loss": 4.3898, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 3.735e-05, |
|
"loss": 4.2279, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 4.11016845703125, |
|
"eval_runtime": 236.4257, |
|
"eval_samples_per_second": 18.962, |
|
"eval_steps_per_second": 2.373, |
|
"eval_wer": 2.082517793594306, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 4.11e-05, |
|
"loss": 4.1565, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 4.484999999999999e-05, |
|
"loss": 4.1112, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.8599999999999995e-05, |
|
"loss": 4.0744, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 5.234999999999999e-05, |
|
"loss": 4.0272, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 5.6099999999999995e-05, |
|
"loss": 3.9966, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 5.985e-05, |
|
"loss": 3.8571, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 6.359999999999999e-05, |
|
"loss": 3.4211, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 6.735e-05, |
|
"loss": 2.833, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.11e-05, |
|
"loss": 2.5621, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 7.484999999999999e-05, |
|
"loss": 2.4119, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_loss": 1.173694133758545, |
|
"eval_runtime": 237.1778, |
|
"eval_samples_per_second": 18.901, |
|
"eval_steps_per_second": 2.365, |
|
"eval_wer": 2.3580960854092528, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 4.227272727272727e-05, |
|
"loss": 2.2872, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 8.181818181818181e-06, |
|
"loss": 2.186, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2220, |
|
"total_flos": 1.6498328689753115e+19, |
|
"train_loss": 7.544306694924295, |
|
"train_runtime": 6173.2652, |
|
"train_samples_per_second": 17.208, |
|
"train_steps_per_second": 0.36 |
|
} |
|
], |
|
"max_steps": 2220, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.6498328689753115e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|