|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 12500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00028807199999999995, |
|
"loss": 3.1701, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 2.8104631900787354, |
|
"eval_runtime": 249.0567, |
|
"eval_samples_per_second": 20.076, |
|
"eval_steps_per_second": 2.509, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.000276072, |
|
"loss": 1.9143, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.5976669192314148, |
|
"eval_runtime": 247.4077, |
|
"eval_samples_per_second": 20.21, |
|
"eval_steps_per_second": 2.526, |
|
"eval_wer": 0.7001713772748293, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00026407199999999996, |
|
"loss": 1.1259, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.5062595009803772, |
|
"eval_runtime": 241.7474, |
|
"eval_samples_per_second": 20.683, |
|
"eval_steps_per_second": 2.585, |
|
"eval_wer": 0.6156796605097794, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.000252072, |
|
"loss": 0.9732, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 0.42641571164131165, |
|
"eval_runtime": 234.1368, |
|
"eval_samples_per_second": 21.355, |
|
"eval_steps_per_second": 2.669, |
|
"eval_wer": 0.5673403879111014, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.000240096, |
|
"loss": 0.8983, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.42490196228027344, |
|
"eval_runtime": 237.618, |
|
"eval_samples_per_second": 21.042, |
|
"eval_steps_per_second": 2.63, |
|
"eval_wer": 0.49024781697995157, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00022809599999999998, |
|
"loss": 0.8507, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.38109204173088074, |
|
"eval_runtime": 243.3726, |
|
"eval_samples_per_second": 20.545, |
|
"eval_steps_per_second": 2.568, |
|
"eval_wer": 0.4535785207148881, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.00021609599999999996, |
|
"loss": 0.8064, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 0.3643375337123871, |
|
"eval_runtime": 241.9721, |
|
"eval_samples_per_second": 20.664, |
|
"eval_steps_per_second": 2.583, |
|
"eval_wer": 0.44669622697967953, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.000204096, |
|
"loss": 0.7866, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.36003848910331726, |
|
"eval_runtime": 240.8042, |
|
"eval_samples_per_second": 20.764, |
|
"eval_steps_per_second": 2.595, |
|
"eval_wer": 0.44533608987785966, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00019209599999999997, |
|
"loss": 0.7773, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.37241309881210327, |
|
"eval_runtime": 234.1677, |
|
"eval_samples_per_second": 21.352, |
|
"eval_steps_per_second": 2.669, |
|
"eval_wer": 0.4470226598841163, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00018009599999999998, |
|
"loss": 0.747, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.3500821888446808, |
|
"eval_runtime": 239.967, |
|
"eval_samples_per_second": 20.836, |
|
"eval_steps_per_second": 2.605, |
|
"eval_wer": 0.41892222736051793, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.00016809599999999998, |
|
"loss": 0.7279, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 0.3500120937824249, |
|
"eval_runtime": 236.3527, |
|
"eval_samples_per_second": 21.155, |
|
"eval_steps_per_second": 2.644, |
|
"eval_wer": 0.42607654851609045, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.00015612, |
|
"loss": 0.7153, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 0.33280453085899353, |
|
"eval_runtime": 237.5821, |
|
"eval_samples_per_second": 21.045, |
|
"eval_steps_per_second": 2.631, |
|
"eval_wer": 0.3965887761486358, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 0.000144144, |
|
"loss": 0.7, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"eval_loss": 0.33142638206481934, |
|
"eval_runtime": 238.2506, |
|
"eval_samples_per_second": 20.986, |
|
"eval_steps_per_second": 2.623, |
|
"eval_wer": 0.3869045999836784, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 0.000132168, |
|
"loss": 0.6784, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"eval_loss": 0.33956480026245117, |
|
"eval_runtime": 241.1503, |
|
"eval_samples_per_second": 20.734, |
|
"eval_steps_per_second": 2.592, |
|
"eval_wer": 0.40510323440602813, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.00012016799999999999, |
|
"loss": 0.6582, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.3236238658428192, |
|
"eval_runtime": 237.147, |
|
"eval_samples_per_second": 21.084, |
|
"eval_steps_per_second": 2.635, |
|
"eval_wer": 0.38992410434971847, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 0.00010816799999999998, |
|
"loss": 0.6478, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_loss": 0.3263108730316162, |
|
"eval_runtime": 239.702, |
|
"eval_samples_per_second": 20.859, |
|
"eval_steps_per_second": 2.607, |
|
"eval_wer": 0.3832050270667283, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 9.616799999999999e-05, |
|
"loss": 0.6277, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_loss": 0.3138865828514099, |
|
"eval_runtime": 241.2261, |
|
"eval_samples_per_second": 20.727, |
|
"eval_steps_per_second": 2.591, |
|
"eval_wer": 0.37692119365632054, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 8.416799999999998e-05, |
|
"loss": 0.6053, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"eval_loss": 0.2955300807952881, |
|
"eval_runtime": 239.5196, |
|
"eval_samples_per_second": 20.875, |
|
"eval_steps_per_second": 2.609, |
|
"eval_wer": 0.3535812409890917, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 7.216799999999999e-05, |
|
"loss": 0.5777, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_loss": 0.27926793694496155, |
|
"eval_runtime": 243.0206, |
|
"eval_samples_per_second": 20.574, |
|
"eval_steps_per_second": 2.572, |
|
"eval_wer": 0.3413128043306765, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 6.016799999999999e-05, |
|
"loss": 0.5631, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.2789314091205597, |
|
"eval_runtime": 235.0589, |
|
"eval_samples_per_second": 21.271, |
|
"eval_steps_per_second": 2.659, |
|
"eval_wer": 0.33532820108266914, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 4.8168e-05, |
|
"loss": 0.5446, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_loss": 0.27094796299934387, |
|
"eval_runtime": 244.8594, |
|
"eval_samples_per_second": 20.42, |
|
"eval_steps_per_second": 2.552, |
|
"eval_wer": 0.32637849895269444, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 3.6167999999999997e-05, |
|
"loss": 0.528, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"eval_loss": 0.26928168535232544, |
|
"eval_runtime": 237.8703, |
|
"eval_samples_per_second": 21.02, |
|
"eval_steps_per_second": 2.627, |
|
"eval_wer": 0.3233861973286907, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 2.4191999999999998e-05, |
|
"loss": 0.5169, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_loss": 0.265558123588562, |
|
"eval_runtime": 240.7332, |
|
"eval_samples_per_second": 20.77, |
|
"eval_steps_per_second": 2.596, |
|
"eval_wer": 0.31925138053915836, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 1.2215999999999998e-05, |
|
"loss": 0.5041, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": 0.2575398087501526, |
|
"eval_runtime": 242.0688, |
|
"eval_samples_per_second": 20.655, |
|
"eval_steps_per_second": 2.582, |
|
"eval_wer": 0.31024727292511084, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2.16e-07, |
|
"loss": 0.4971, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.2584246098995209, |
|
"eval_runtime": 247.3331, |
|
"eval_samples_per_second": 20.216, |
|
"eval_steps_per_second": 2.527, |
|
"eval_wer": 0.31141699083267593, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 12500, |
|
"total_flos": 3.751048304986411e+19, |
|
"train_loss": 0.8456691796875, |
|
"train_runtime": 20148.7553, |
|
"train_samples_per_second": 9.926, |
|
"train_steps_per_second": 0.62 |
|
} |
|
], |
|
"max_steps": 12500, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.751048304986411e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|