{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "learning_rate": 0.00028807199999999995, "loss": 3.1701, "step": 500 }, { "epoch": 0.8, "eval_loss": 2.8104631900787354, "eval_runtime": 249.0567, "eval_samples_per_second": 20.076, "eval_steps_per_second": 2.509, "eval_wer": 1.0, "step": 500 }, { "epoch": 1.6, "learning_rate": 0.000276072, "loss": 1.9143, "step": 1000 }, { "epoch": 1.6, "eval_loss": 0.5976669192314148, "eval_runtime": 247.4077, "eval_samples_per_second": 20.21, "eval_steps_per_second": 2.526, "eval_wer": 0.7001713772748293, "step": 1000 }, { "epoch": 2.4, "learning_rate": 0.00026407199999999996, "loss": 1.1259, "step": 1500 }, { "epoch": 2.4, "eval_loss": 0.5062595009803772, "eval_runtime": 241.7474, "eval_samples_per_second": 20.683, "eval_steps_per_second": 2.585, "eval_wer": 0.6156796605097794, "step": 1500 }, { "epoch": 3.2, "learning_rate": 0.000252072, "loss": 0.9732, "step": 2000 }, { "epoch": 3.2, "eval_loss": 0.42641571164131165, "eval_runtime": 234.1368, "eval_samples_per_second": 21.355, "eval_steps_per_second": 2.669, "eval_wer": 0.5673403879111014, "step": 2000 }, { "epoch": 4.0, "learning_rate": 0.000240096, "loss": 0.8983, "step": 2500 }, { "epoch": 4.0, "eval_loss": 0.42490196228027344, "eval_runtime": 237.618, "eval_samples_per_second": 21.042, "eval_steps_per_second": 2.63, "eval_wer": 0.49024781697995157, "step": 2500 }, { "epoch": 4.8, "learning_rate": 0.00022809599999999998, "loss": 0.8507, "step": 3000 }, { "epoch": 4.8, "eval_loss": 0.38109204173088074, "eval_runtime": 243.3726, "eval_samples_per_second": 20.545, "eval_steps_per_second": 2.568, "eval_wer": 0.4535785207148881, "step": 3000 }, { "epoch": 5.6, "learning_rate": 0.00021609599999999996, "loss": 0.8064, "step": 3500 }, { "epoch": 5.6, "eval_loss": 0.3643375337123871, "eval_runtime": 241.9721, "eval_samples_per_second": 20.664, "eval_steps_per_second": 2.583, "eval_wer": 0.44669622697967953, "step": 3500 }, { "epoch": 6.4, "learning_rate": 0.000204096, "loss": 0.7866, "step": 4000 }, { "epoch": 6.4, "eval_loss": 0.36003848910331726, "eval_runtime": 240.8042, "eval_samples_per_second": 20.764, "eval_steps_per_second": 2.595, "eval_wer": 0.44533608987785966, "step": 4000 }, { "epoch": 7.2, "learning_rate": 0.00019209599999999997, "loss": 0.7773, "step": 4500 }, { "epoch": 7.2, "eval_loss": 0.37241309881210327, "eval_runtime": 234.1677, "eval_samples_per_second": 21.352, "eval_steps_per_second": 2.669, "eval_wer": 0.4470226598841163, "step": 4500 }, { "epoch": 8.0, "learning_rate": 0.00018009599999999998, "loss": 0.747, "step": 5000 }, { "epoch": 8.0, "eval_loss": 0.3500821888446808, "eval_runtime": 239.967, "eval_samples_per_second": 20.836, "eval_steps_per_second": 2.605, "eval_wer": 0.41892222736051793, "step": 5000 }, { "epoch": 8.8, "learning_rate": 0.00016809599999999998, "loss": 0.7279, "step": 5500 }, { "epoch": 8.8, "eval_loss": 0.3500120937824249, "eval_runtime": 236.3527, "eval_samples_per_second": 21.155, "eval_steps_per_second": 2.644, "eval_wer": 0.42607654851609045, "step": 5500 }, { "epoch": 9.6, "learning_rate": 0.00015612, "loss": 0.7153, "step": 6000 }, { "epoch": 9.6, "eval_loss": 0.33280453085899353, "eval_runtime": 237.5821, "eval_samples_per_second": 21.045, "eval_steps_per_second": 2.631, "eval_wer": 0.3965887761486358, "step": 6000 }, { "epoch": 10.4, "learning_rate": 0.000144144, "loss": 0.7, "step": 6500 }, { "epoch": 10.4, "eval_loss": 0.33142638206481934, "eval_runtime": 238.2506, "eval_samples_per_second": 20.986, "eval_steps_per_second": 2.623, "eval_wer": 0.3869045999836784, "step": 6500 }, { "epoch": 11.2, "learning_rate": 0.000132168, "loss": 0.6784, "step": 7000 }, { "epoch": 11.2, "eval_loss": 0.33956480026245117, "eval_runtime": 241.1503, "eval_samples_per_second": 20.734, "eval_steps_per_second": 2.592, "eval_wer": 0.40510323440602813, "step": 7000 }, { "epoch": 12.0, "learning_rate": 0.00012016799999999999, "loss": 0.6582, "step": 7500 }, { "epoch": 12.0, "eval_loss": 0.3236238658428192, "eval_runtime": 237.147, "eval_samples_per_second": 21.084, "eval_steps_per_second": 2.635, "eval_wer": 0.38992410434971847, "step": 7500 }, { "epoch": 12.8, "learning_rate": 0.00010816799999999998, "loss": 0.6478, "step": 8000 }, { "epoch": 12.8, "eval_loss": 0.3263108730316162, "eval_runtime": 239.702, "eval_samples_per_second": 20.859, "eval_steps_per_second": 2.607, "eval_wer": 0.3832050270667283, "step": 8000 }, { "epoch": 13.6, "learning_rate": 9.616799999999999e-05, "loss": 0.6277, "step": 8500 }, { "epoch": 13.6, "eval_loss": 0.3138865828514099, "eval_runtime": 241.2261, "eval_samples_per_second": 20.727, "eval_steps_per_second": 2.591, "eval_wer": 0.37692119365632054, "step": 8500 }, { "epoch": 14.4, "learning_rate": 8.416799999999998e-05, "loss": 0.6053, "step": 9000 }, { "epoch": 14.4, "eval_loss": 0.2955300807952881, "eval_runtime": 239.5196, "eval_samples_per_second": 20.875, "eval_steps_per_second": 2.609, "eval_wer": 0.3535812409890917, "step": 9000 }, { "epoch": 15.2, "learning_rate": 7.216799999999999e-05, "loss": 0.5777, "step": 9500 }, { "epoch": 15.2, "eval_loss": 0.27926793694496155, "eval_runtime": 243.0206, "eval_samples_per_second": 20.574, "eval_steps_per_second": 2.572, "eval_wer": 0.3413128043306765, "step": 9500 }, { "epoch": 16.0, "learning_rate": 6.016799999999999e-05, "loss": 0.5631, "step": 10000 }, { "epoch": 16.0, "eval_loss": 0.2789314091205597, "eval_runtime": 235.0589, "eval_samples_per_second": 21.271, "eval_steps_per_second": 2.659, "eval_wer": 0.33532820108266914, "step": 10000 }, { "epoch": 16.8, "learning_rate": 4.8168e-05, "loss": 0.5446, "step": 10500 }, { "epoch": 16.8, "eval_loss": 0.27094796299934387, "eval_runtime": 244.8594, "eval_samples_per_second": 20.42, "eval_steps_per_second": 2.552, "eval_wer": 0.32637849895269444, "step": 10500 }, { "epoch": 17.6, "learning_rate": 3.6167999999999997e-05, "loss": 0.528, "step": 11000 }, { "epoch": 17.6, "eval_loss": 0.26928168535232544, "eval_runtime": 237.8703, "eval_samples_per_second": 21.02, "eval_steps_per_second": 2.627, "eval_wer": 0.3233861973286907, "step": 11000 }, { "epoch": 18.4, "learning_rate": 2.4191999999999998e-05, "loss": 0.5169, "step": 11500 }, { "epoch": 18.4, "eval_loss": 0.265558123588562, "eval_runtime": 240.7332, "eval_samples_per_second": 20.77, "eval_steps_per_second": 2.596, "eval_wer": 0.31925138053915836, "step": 11500 }, { "epoch": 19.2, "learning_rate": 1.2215999999999998e-05, "loss": 0.5041, "step": 12000 }, { "epoch": 19.2, "eval_loss": 0.2575398087501526, "eval_runtime": 242.0688, "eval_samples_per_second": 20.655, "eval_steps_per_second": 2.582, "eval_wer": 0.31024727292511084, "step": 12000 }, { "epoch": 20.0, "learning_rate": 2.16e-07, "loss": 0.4971, "step": 12500 }, { "epoch": 20.0, "eval_loss": 0.2584246098995209, "eval_runtime": 247.3331, "eval_samples_per_second": 20.216, "eval_steps_per_second": 2.527, "eval_wer": 0.31141699083267593, "step": 12500 }, { "epoch": 20.0, "step": 12500, "total_flos": 3.751048304986411e+19, "train_loss": 0.8456691796875, "train_runtime": 20148.7553, "train_samples_per_second": 9.926, "train_steps_per_second": 0.62 } ], "max_steps": 12500, "num_train_epochs": 20, "total_flos": 3.751048304986411e+19, "trial_name": null, "trial_params": null }