{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.389521640091116, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 5e-05, "loss": 467.0219, "step": 100 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 446.5955, "step": 200 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 393.423, "step": 300 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 360.4873, "step": 400 }, { "epoch": 0.57, "learning_rate": 5e-05, "loss": 369.4461, "step": 500 }, { "epoch": 0.68, "learning_rate": 5e-05, "loss": 364.8889, "step": 600 }, { "epoch": 0.8, "learning_rate": 5e-05, "loss": 362.8324, "step": 700 }, { "epoch": 0.91, "learning_rate": 5e-05, "loss": 345.4677, "step": 800 }, { "epoch": 1.03, "learning_rate": 5e-05, "loss": 377.1422, "step": 900 }, { "epoch": 1.14, "learning_rate": 5e-05, "loss": 319.0323, "step": 1000 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 298.1518, "step": 1100 }, { "epoch": 1.37, "learning_rate": 5e-05, "loss": 335.3308, "step": 1200 }, { "epoch": 1.48, "learning_rate": 5e-05, "loss": 290.9471, "step": 1300 }, { "epoch": 1.59, "learning_rate": 5e-05, "loss": 310.6187, "step": 1400 }, { "epoch": 1.71, "learning_rate": 5e-05, "loss": 333.6563, "step": 1500 }, { "epoch": 1.82, "learning_rate": 5e-05, "loss": 287.3142, "step": 1600 }, { "epoch": 1.94, "learning_rate": 5e-05, "loss": 304.1946, "step": 1700 }, { "epoch": 2.05, "learning_rate": 5e-05, "loss": 295.4456, "step": 1800 }, { "epoch": 2.16, "learning_rate": 5e-05, "loss": 288.6632, "step": 1900 }, { "epoch": 2.28, "learning_rate": 5e-05, "loss": 268.8452, "step": 2000 }, { "epoch": 2.28, "eval_cer": 0.1040043410507569, "eval_loss": 262.4808654785156, "eval_runtime": 146.2892, "eval_samples_per_second": 12.783, "eval_steps_per_second": 1.6, "eval_wer": 0.22893966022261278, "step": 2000 }, { "epoch": 2.39, "learning_rate": 5e-05, "loss": 279.8674, "step": 2100 }, { "epoch": 2.51, "learning_rate": 5e-05, "loss": 288.3673, "step": 2200 }, { "epoch": 2.62, "learning_rate": 5e-05, "loss": 266.2094, "step": 2300 }, { "epoch": 2.73, "learning_rate": 5e-05, "loss": 266.3993, "step": 2400 }, { "epoch": 2.85, "learning_rate": 5e-05, "loss": 265.0983, "step": 2500 }, { "epoch": 2.96, "learning_rate": 5e-05, "loss": 280.5026, "step": 2600 }, { "epoch": 3.08, "learning_rate": 5e-05, "loss": 253.5466, "step": 2700 }, { "epoch": 3.19, "learning_rate": 5e-05, "loss": 259.7672, "step": 2800 }, { "epoch": 3.3, "learning_rate": 5e-05, "loss": 241.1983, "step": 2900 }, { "epoch": 3.42, "learning_rate": 5e-05, "loss": 234.3184, "step": 3000 }, { "epoch": 3.53, "learning_rate": 5e-05, "loss": 241.2133, "step": 3100 }, { "epoch": 3.64, "learning_rate": 5e-05, "loss": 264.1351, "step": 3200 }, { "epoch": 3.76, "learning_rate": 5e-05, "loss": 247.2135, "step": 3300 }, { "epoch": 3.87, "learning_rate": 5e-05, "loss": 227.8616, "step": 3400 }, { "epoch": 3.99, "learning_rate": 5e-05, "loss": 236.7091, "step": 3500 }, { "epoch": 4.1, "learning_rate": 5e-05, "loss": 234.9673, "step": 3600 }, { "epoch": 4.21, "learning_rate": 5e-05, "loss": 211.579, "step": 3700 }, { "epoch": 4.33, "learning_rate": 5e-05, "loss": 226.7139, "step": 3800 }, { "epoch": 4.44, "learning_rate": 5e-05, "loss": 239.6923, "step": 3900 }, { "epoch": 4.56, "learning_rate": 5e-05, "loss": 212.4134, "step": 4000 }, { "epoch": 4.56, "eval_cer": 0.09499526936776491, "eval_loss": 273.2842712402344, "eval_runtime": 145.12, "eval_samples_per_second": 12.886, "eval_steps_per_second": 1.612, "eval_wer": 0.20917789494239405, "step": 4000 }, { "epoch": 4.67, "learning_rate": 5e-05, "loss": 222.3978, "step": 4100 }, { "epoch": 4.78, "learning_rate": 5e-05, "loss": 220.115, "step": 4200 }, { "epoch": 4.9, "learning_rate": 5e-05, "loss": 214.0393, "step": 4300 }, { "epoch": 5.01, "learning_rate": 5e-05, "loss": 217.6124, "step": 4400 }, { "epoch": 5.13, "learning_rate": 5e-05, "loss": 207.3773, "step": 4500 }, { "epoch": 5.24, "learning_rate": 5e-05, "loss": 210.2402, "step": 4600 }, { "epoch": 5.35, "learning_rate": 5e-05, "loss": 183.2794, "step": 4700 }, { "epoch": 5.47, "learning_rate": 5e-05, "loss": 212.013, "step": 4800 }, { "epoch": 5.58, "learning_rate": 5e-05, "loss": 201.9256, "step": 4900 }, { "epoch": 5.69, "learning_rate": 5e-05, "loss": 204.919, "step": 5000 }, { "epoch": 5.81, "learning_rate": 5e-05, "loss": 200.8947, "step": 5100 }, { "epoch": 5.92, "learning_rate": 5e-05, "loss": 205.1183, "step": 5200 }, { "epoch": 6.04, "learning_rate": 5e-05, "loss": 183.53, "step": 5300 }, { "epoch": 6.15, "learning_rate": 5e-05, "loss": 176.2721, "step": 5400 }, { "epoch": 6.26, "learning_rate": 5e-05, "loss": 182.444, "step": 5500 }, { "epoch": 6.38, "learning_rate": 5e-05, "loss": 179.9177, "step": 5600 }, { "epoch": 6.49, "learning_rate": 5e-05, "loss": 184.7364, "step": 5700 }, { "epoch": 6.61, "learning_rate": 5e-05, "loss": 170.2941, "step": 5800 }, { "epoch": 6.72, "learning_rate": 5e-05, "loss": 184.6765, "step": 5900 }, { "epoch": 6.83, "learning_rate": 5e-05, "loss": 200.3965, "step": 6000 }, { "epoch": 6.83, "eval_cer": 0.09548224621549421, "eval_loss": 279.6430358886719, "eval_runtime": 147.4008, "eval_samples_per_second": 12.687, "eval_steps_per_second": 1.588, "eval_wer": 0.2134348760007811, "step": 6000 }, { "epoch": 6.95, "learning_rate": 5e-05, "loss": 203.8473, "step": 6100 }, { "epoch": 7.06, "learning_rate": 5e-05, "loss": 180.4022, "step": 6200 }, { "epoch": 7.18, "learning_rate": 5e-05, "loss": 167.2582, "step": 6300 }, { "epoch": 7.29, "learning_rate": 5e-05, "loss": 171.7429, "step": 6400 }, { "epoch": 7.4, "learning_rate": 5e-05, "loss": 161.9806, "step": 6500 }, { "epoch": 7.52, "learning_rate": 5e-05, "loss": 181.7546, "step": 6600 }, { "epoch": 7.63, "learning_rate": 5e-05, "loss": 166.6461, "step": 6700 }, { "epoch": 7.74, "learning_rate": 5e-05, "loss": 156.8281, "step": 6800 }, { "epoch": 7.86, "learning_rate": 5e-05, "loss": 162.8639, "step": 6900 }, { "epoch": 7.97, "learning_rate": 5e-05, "loss": 177.6029, "step": 7000 }, { "epoch": 8.09, "learning_rate": 5e-05, "loss": 162.6876, "step": 7100 }, { "epoch": 8.2, "learning_rate": 5e-05, "loss": 154.6953, "step": 7200 }, { "epoch": 8.31, "learning_rate": 5e-05, "loss": 157.7532, "step": 7300 }, { "epoch": 8.43, "learning_rate": 5e-05, "loss": 155.4144, "step": 7400 }, { "epoch": 8.54, "learning_rate": 5e-05, "loss": 160.9859, "step": 7500 }, { "epoch": 8.66, "learning_rate": 5e-05, "loss": 157.5087, "step": 7600 }, { "epoch": 8.77, "learning_rate": 5e-05, "loss": 158.3729, "step": 7700 }, { "epoch": 8.88, "learning_rate": 5e-05, "loss": 161.7107, "step": 7800 }, { "epoch": 9.0, "learning_rate": 5e-05, "loss": 158.3218, "step": 7900 }, { "epoch": 9.11, "learning_rate": 5e-05, "loss": 130.7524, "step": 8000 }, { "epoch": 9.11, "eval_cer": 0.09586487088156723, "eval_loss": 320.47552490234375, "eval_runtime": 144.7814, "eval_samples_per_second": 12.916, "eval_steps_per_second": 1.616, "eval_wer": 0.20667838312829526, "step": 8000 }, { "epoch": 9.23, "learning_rate": 5e-05, "loss": 142.4579, "step": 8100 }, { "epoch": 9.34, "learning_rate": 5e-05, "loss": 147.8841, "step": 8200 }, { "epoch": 9.45, "learning_rate": 5e-05, "loss": 158.0805, "step": 8300 }, { "epoch": 9.57, "learning_rate": 5e-05, "loss": 145.2056, "step": 8400 }, { "epoch": 9.68, "learning_rate": 5e-05, "loss": 144.9573, "step": 8500 }, { "epoch": 9.79, "learning_rate": 5e-05, "loss": 148.1379, "step": 8600 }, { "epoch": 9.91, "learning_rate": 5e-05, "loss": 148.8498, "step": 8700 }, { "epoch": 10.02, "learning_rate": 5e-05, "loss": 160.1041, "step": 8800 }, { "epoch": 10.14, "learning_rate": 5e-05, "loss": 141.2527, "step": 8900 }, { "epoch": 10.25, "learning_rate": 5e-05, "loss": 127.7468, "step": 9000 }, { "epoch": 10.36, "learning_rate": 5e-05, "loss": 136.7523, "step": 9100 }, { "epoch": 10.48, "learning_rate": 5e-05, "loss": 134.6625, "step": 9200 }, { "epoch": 10.59, "learning_rate": 5e-05, "loss": 138.4917, "step": 9300 }, { "epoch": 10.71, "learning_rate": 5e-05, "loss": 135.3308, "step": 9400 }, { "epoch": 10.82, "learning_rate": 5e-05, "loss": 143.3156, "step": 9500 }, { "epoch": 10.93, "learning_rate": 5e-05, "loss": 130.6054, "step": 9600 }, { "epoch": 11.05, "learning_rate": 5e-05, "loss": 138.9151, "step": 9700 }, { "epoch": 11.16, "learning_rate": 5e-05, "loss": 125.203, "step": 9800 }, { "epoch": 11.28, "learning_rate": 5e-05, "loss": 131.7745, "step": 9900 }, { "epoch": 11.39, "learning_rate": 5e-05, "loss": 119.4062, "step": 10000 }, { "epoch": 11.39, "eval_cer": 0.09683186776491541, "eval_loss": 327.7448425292969, "eval_runtime": 145.6399, "eval_samples_per_second": 12.84, "eval_steps_per_second": 1.607, "eval_wer": 0.20937316930287053, "step": 10000 } ], "max_steps": 10000, "num_train_epochs": 12, "total_flos": 1.2933525176475093e+19, "trial_name": null, "trial_params": null }