{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.994535519125684, "global_step": 4550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.1, "learning_rate": 3.7499999999999997e-06, "loss": 14.8405, "step": 100 }, { "epoch": 2.2, "learning_rate": 7.499999999999999e-06, "loss": 6.8597, "step": 200 }, { "epoch": 3.3, "learning_rate": 1.1249999999999999e-05, "loss": 4.4132, "step": 300 }, { "epoch": 4.39, "learning_rate": 1.4999999999999999e-05, "loss": 3.7795, "step": 400 }, { "epoch": 5.49, "learning_rate": 1.875e-05, "loss": 3.4129, "step": 500 }, { "epoch": 5.49, "eval_loss": 3.3224499225616455, "eval_runtime": 168.3653, "eval_samples_per_second": 28.765, "eval_steps_per_second": 0.903, "eval_wer": 1.0, "step": 500 }, { "epoch": 6.59, "learning_rate": 2.2499999999999998e-05, "loss": 3.2045, "step": 600 }, { "epoch": 7.69, "learning_rate": 2.6249999999999998e-05, "loss": 3.142, "step": 700 }, { "epoch": 8.79, "learning_rate": 2.9999999999999997e-05, "loss": 3.0872, "step": 800 }, { "epoch": 9.89, "learning_rate": 3.375e-05, "loss": 2.9848, "step": 900 }, { "epoch": 10.98, "learning_rate": 3.75e-05, "loss": 2.9323, "step": 1000 }, { "epoch": 10.98, "eval_loss": 2.9127891063690186, "eval_runtime": 166.7051, "eval_samples_per_second": 29.051, "eval_steps_per_second": 0.912, "eval_wer": 1.0000283848992335, "step": 1000 }, { "epoch": 12.09, "learning_rate": 4.125e-05, "loss": 2.9013, "step": 1100 }, { "epoch": 13.19, "learning_rate": 4.4999999999999996e-05, "loss": 2.729, "step": 1200 }, { "epoch": 14.28, "learning_rate": 4.875e-05, "loss": 2.1939, "step": 1300 }, { "epoch": 15.38, "learning_rate": 5.2499999999999995e-05, "loss": 1.8125, "step": 1400 }, { "epoch": 16.48, "learning_rate": 5.625e-05, "loss": 1.6839, "step": 1500 }, { "epoch": 16.48, "eval_loss": 0.7740097641944885, "eval_runtime": 167.3266, "eval_samples_per_second": 28.943, "eval_steps_per_second": 0.908, "eval_wer": 0.685381776894692, "step": 1500 }, { "epoch": 17.58, "learning_rate": 5.9999999999999995e-05, "loss": 1.6194, "step": 1600 }, { "epoch": 18.68, "learning_rate": 6.374999999999999e-05, "loss": 1.5613, "step": 1700 }, { "epoch": 19.78, "learning_rate": 6.75e-05, "loss": 1.5197, "step": 1800 }, { "epoch": 20.87, "learning_rate": 7.1175e-05, "loss": 1.5009, "step": 1900 }, { "epoch": 21.97, "learning_rate": 7.492499999999999e-05, "loss": 1.485, "step": 2000 }, { "epoch": 21.97, "eval_loss": 0.5829736590385437, "eval_runtime": 166.3956, "eval_samples_per_second": 29.105, "eval_steps_per_second": 0.913, "eval_wer": 0.597615668464377, "step": 2000 }, { "epoch": 23.08, "learning_rate": 7.211764705882351e-05, "loss": 1.4606, "step": 2100 }, { "epoch": 24.17, "learning_rate": 6.920588235294117e-05, "loss": 1.4257, "step": 2200 }, { "epoch": 25.27, "learning_rate": 6.626470588235294e-05, "loss": 1.4002, "step": 2300 }, { "epoch": 26.37, "learning_rate": 6.33235294117647e-05, "loss": 1.3856, "step": 2400 }, { "epoch": 27.47, "learning_rate": 6.038235294117646e-05, "loss": 1.362, "step": 2500 }, { "epoch": 27.47, "eval_loss": 0.48657190799713135, "eval_runtime": 166.2387, "eval_samples_per_second": 29.133, "eval_steps_per_second": 0.914, "eval_wer": 0.490519443655975, "step": 2500 }, { "epoch": 28.57, "learning_rate": 5.744117647058823e-05, "loss": 1.3342, "step": 2600 }, { "epoch": 29.67, "learning_rate": 5.4499999999999997e-05, "loss": 1.318, "step": 2700 }, { "epoch": 30.77, "learning_rate": 5.155882352941176e-05, "loss": 1.3013, "step": 2800 }, { "epoch": 31.86, "learning_rate": 4.861764705882352e-05, "loss": 1.2849, "step": 2900 }, { "epoch": 32.96, "learning_rate": 4.567647058823529e-05, "loss": 1.2752, "step": 3000 }, { "epoch": 32.96, "eval_loss": 0.42398178577423096, "eval_runtime": 167.0219, "eval_samples_per_second": 28.996, "eval_steps_per_second": 0.91, "eval_wer": 0.4966789667896679, "step": 3000 }, { "epoch": 34.07, "learning_rate": 4.2735294117647056e-05, "loss": 1.2682, "step": 3100 }, { "epoch": 35.16, "learning_rate": 3.979411764705882e-05, "loss": 1.2428, "step": 3200 }, { "epoch": 36.26, "learning_rate": 3.6852941176470586e-05, "loss": 1.2282, "step": 3300 }, { "epoch": 37.36, "learning_rate": 3.391176470588235e-05, "loss": 1.2198, "step": 3400 }, { "epoch": 38.46, "learning_rate": 3.0970588235294116e-05, "loss": 1.1957, "step": 3500 }, { "epoch": 38.46, "eval_loss": 0.38985687494277954, "eval_runtime": 166.2154, "eval_samples_per_second": 29.137, "eval_steps_per_second": 0.914, "eval_wer": 0.42577348850411584, "step": 3500 }, { "epoch": 39.56, "learning_rate": 2.8029411764705878e-05, "loss": 1.1891, "step": 3600 }, { "epoch": 40.66, "learning_rate": 2.5088235294117646e-05, "loss": 1.1847, "step": 3700 }, { "epoch": 41.75, "learning_rate": 2.2147058823529408e-05, "loss": 1.1779, "step": 3800 }, { "epoch": 42.85, "learning_rate": 1.9205882352941176e-05, "loss": 1.1654, "step": 3900 }, { "epoch": 43.95, "learning_rate": 1.626470588235294e-05, "loss": 1.1646, "step": 4000 }, { "epoch": 43.95, "eval_loss": 0.3597247898578644, "eval_runtime": 165.7161, "eval_samples_per_second": 29.225, "eval_steps_per_second": 0.917, "eval_wer": 0.401447629860914, "step": 4000 }, { "epoch": 45.05, "learning_rate": 1.3323529411764704e-05, "loss": 1.1574, "step": 4100 }, { "epoch": 46.15, "learning_rate": 1.0411764705882353e-05, "loss": 1.1513, "step": 4200 }, { "epoch": 47.25, "learning_rate": 7.470588235294117e-06, "loss": 1.1412, "step": 4300 }, { "epoch": 48.35, "learning_rate": 4.529411764705882e-06, "loss": 1.1383, "step": 4400 }, { "epoch": 49.45, "learning_rate": 1.5882352941176468e-06, "loss": 1.1265, "step": 4500 }, { "epoch": 49.45, "eval_loss": 0.35590532422065735, "eval_runtime": 165.6811, "eval_samples_per_second": 29.231, "eval_steps_per_second": 0.917, "eval_wer": 0.38288390576213455, "step": 4500 }, { "epoch": 49.99, "step": 4550, "total_flos": 7.032770514539837e+19, "train_loss": 2.1873197584885817, "train_runtime": 245048.5131, "train_samples_per_second": 2.385, "train_steps_per_second": 0.019 } ], "max_steps": 4550, "num_train_epochs": 50, "total_flos": 7.032770514539837e+19, "trial_name": null, "trial_params": null }