{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 6.000000000000001e-07, "loss": 16.3378, "step": 10 }, { "epoch": 0.17, "learning_rate": 1.2000000000000002e-06, "loss": 16.7146, "step": 20 }, { "epoch": 0.25, "learning_rate": 1.8e-06, "loss": 16.2989, "step": 30 }, { "epoch": 0.33, "learning_rate": 2.4000000000000003e-06, "loss": 15.6823, "step": 40 }, { "epoch": 0.42, "learning_rate": 3e-06, "loss": 15.2213, "step": 50 }, { "epoch": 0.5, "learning_rate": 3.6e-06, "loss": 13.8045, "step": 60 }, { "epoch": 0.58, "learning_rate": 4.2000000000000004e-06, "loss": 12.3665, "step": 70 }, { "epoch": 0.67, "learning_rate": 4.800000000000001e-06, "loss": 9.9596, "step": 80 }, { "epoch": 0.75, "learning_rate": 5.4e-06, "loss": 7.1678, "step": 90 }, { "epoch": 0.83, "learning_rate": 6e-06, "loss": 5.6617, "step": 100 }, { "epoch": 0.92, "learning_rate": 6.6e-06, "loss": 4.5458, "step": 110 }, { "epoch": 1.0, "learning_rate": 7.2e-06, "loss": 3.4904, "step": 120 }, { "epoch": 1.0, "eval_cer": 0.999086184248101, "eval_loss": 3.4430177211761475, "eval_runtime": 13.8851, "eval_samples_per_second": 69.139, "eval_steps_per_second": 2.161, "eval_wer": 0.996996996996997, "step": 120 }, { "epoch": 1.08, "learning_rate": 7.8e-06, "loss": 3.2384, "step": 130 }, { "epoch": 1.17, "learning_rate": 8.400000000000001e-06, "loss": 2.8932, "step": 140 }, { "epoch": 1.25, "learning_rate": 9e-06, "loss": 2.7276, "step": 150 }, { "epoch": 1.33, "learning_rate": 9.600000000000001e-06, "loss": 2.6587, "step": 160 }, { "epoch": 1.42, "learning_rate": 1.02e-05, "loss": 2.3907, "step": 170 }, { "epoch": 1.5, "learning_rate": 1.08e-05, "loss": 2.22, "step": 180 }, { "epoch": 1.58, "learning_rate": 1.1400000000000001e-05, "loss": 2.0003, "step": 190 }, { "epoch": 1.67, "learning_rate": 1.2e-05, "loss": 1.8618, "step": 200 }, { "epoch": 1.75, "learning_rate": 1.26e-05, "loss": 1.6286, "step": 210 }, { "epoch": 1.83, "learning_rate": 1.32e-05, "loss": 1.4244, "step": 220 }, { "epoch": 1.92, "learning_rate": 1.3800000000000002e-05, "loss": 1.3615, "step": 230 }, { "epoch": 2.0, "learning_rate": 1.44e-05, "loss": 1.1939, "step": 240 }, { "epoch": 2.0, "eval_cer": 0.6264778114112742, "eval_loss": 1.006431221961975, "eval_runtime": 15.0117, "eval_samples_per_second": 63.95, "eval_steps_per_second": 1.998, "eval_wer": 0.8269519519519519, "step": 240 }, { "epoch": 2.08, "learning_rate": 1.5e-05, "loss": 1.3824, "step": 250 }, { "epoch": 2.17, "learning_rate": 1.56e-05, "loss": 1.0939, "step": 260 }, { "epoch": 2.25, "learning_rate": 1.62e-05, "loss": 1.0596, "step": 270 }, { "epoch": 2.33, "learning_rate": 1.6800000000000002e-05, "loss": 1.0087, "step": 280 }, { "epoch": 2.42, "learning_rate": 1.74e-05, "loss": 0.9328, "step": 290 }, { "epoch": 2.5, "learning_rate": 1.8e-05, "loss": 0.9045, "step": 300 }, { "epoch": 2.58, "learning_rate": 1.86e-05, "loss": 0.8645, "step": 310 }, { "epoch": 2.67, "learning_rate": 1.9200000000000003e-05, "loss": 0.8474, "step": 320 }, { "epoch": 2.75, "learning_rate": 1.98e-05, "loss": 0.7985, "step": 330 }, { "epoch": 2.83, "learning_rate": 2.04e-05, "loss": 0.7874, "step": 340 }, { "epoch": 2.92, "learning_rate": 2.1e-05, "loss": 0.8, "step": 350 }, { "epoch": 3.0, "learning_rate": 2.16e-05, "loss": 0.7726, "step": 360 }, { "epoch": 3.0, "eval_cer": 0.5705065966074591, "eval_loss": 0.6256773471832275, "eval_runtime": 13.9464, "eval_samples_per_second": 68.835, "eval_steps_per_second": 2.151, "eval_wer": 0.8198198198198198, "step": 360 }, { "epoch": 3.08, "learning_rate": 2.22e-05, "loss": 0.7963, "step": 370 }, { "epoch": 3.17, "learning_rate": 2.2800000000000002e-05, "loss": 0.7342, "step": 380 }, { "epoch": 3.25, "learning_rate": 2.3400000000000003e-05, "loss": 0.7324, "step": 390 }, { "epoch": 3.33, "learning_rate": 2.4e-05, "loss": 0.6865, "step": 400 }, { "epoch": 3.42, "learning_rate": 2.4599999999999998e-05, "loss": 0.6731, "step": 410 }, { "epoch": 3.5, "learning_rate": 2.52e-05, "loss": 0.6683, "step": 420 }, { "epoch": 3.58, "learning_rate": 2.58e-05, "loss": 0.6583, "step": 430 }, { "epoch": 3.67, "learning_rate": 2.64e-05, "loss": 0.6218, "step": 440 }, { "epoch": 3.75, "learning_rate": 2.7000000000000002e-05, "loss": 0.5825, "step": 450 }, { "epoch": 3.83, "learning_rate": 2.7600000000000003e-05, "loss": 0.5552, "step": 460 }, { "epoch": 3.92, "learning_rate": 2.8199999999999998e-05, "loss": 0.6122, "step": 470 }, { "epoch": 4.0, "learning_rate": 2.88e-05, "loss": 0.5502, "step": 480 }, { "epoch": 4.0, "eval_cer": 0.34153863727225997, "eval_loss": 0.41475021839141846, "eval_runtime": 13.9543, "eval_samples_per_second": 68.796, "eval_steps_per_second": 2.15, "eval_wer": 0.5910285285285285, "step": 480 }, { "epoch": 4.08, "learning_rate": 2.94e-05, "loss": 0.5787, "step": 490 }, { "epoch": 4.17, "learning_rate": 3e-05, "loss": 0.4964, "step": 500 }, { "epoch": 4.25, "learning_rate": 3.06e-05, "loss": 0.5245, "step": 510 }, { "epoch": 4.33, "learning_rate": 3.12e-05, "loss": 0.4688, "step": 520 }, { "epoch": 4.42, "learning_rate": 3.18e-05, "loss": 0.5043, "step": 530 }, { "epoch": 4.5, "learning_rate": 3.24e-05, "loss": 0.4769, "step": 540 }, { "epoch": 4.58, "learning_rate": 3.3e-05, "loss": 0.4966, "step": 550 }, { "epoch": 4.67, "learning_rate": 3.3600000000000004e-05, "loss": 0.4772, "step": 560 }, { "epoch": 4.75, "learning_rate": 3.42e-05, "loss": 0.4364, "step": 570 }, { "epoch": 4.83, "learning_rate": 3.48e-05, "loss": 0.417, "step": 580 }, { "epoch": 4.92, "learning_rate": 3.54e-05, "loss": 0.4407, "step": 590 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.4152, "step": 600 }, { "epoch": 5.0, "eval_cer": 0.21817351076589184, "eval_loss": 0.24392470717430115, "eval_runtime": 13.9841, "eval_samples_per_second": 68.65, "eval_steps_per_second": 2.145, "eval_wer": 0.4166666666666667, "step": 600 }, { "epoch": 5.08, "learning_rate": 3.66e-05, "loss": 0.364, "step": 610 }, { "epoch": 5.17, "learning_rate": 3.72e-05, "loss": 0.4115, "step": 620 }, { "epoch": 5.25, "learning_rate": 3.7800000000000004e-05, "loss": 0.3769, "step": 630 }, { "epoch": 5.33, "learning_rate": 3.8400000000000005e-05, "loss": 0.3635, "step": 640 }, { "epoch": 5.42, "learning_rate": 3.9e-05, "loss": 0.3743, "step": 650 }, { "epoch": 5.5, "learning_rate": 3.96e-05, "loss": 0.3188, "step": 660 }, { "epoch": 5.58, "learning_rate": 4.02e-05, "loss": 0.3608, "step": 670 }, { "epoch": 5.67, "learning_rate": 4.08e-05, "loss": 0.3285, "step": 680 }, { "epoch": 5.75, "learning_rate": 4.14e-05, "loss": 0.2964, "step": 690 }, { "epoch": 5.83, "learning_rate": 4.2e-05, "loss": 0.2799, "step": 700 }, { "epoch": 5.92, "learning_rate": 4.26e-05, "loss": 0.3272, "step": 710 }, { "epoch": 6.0, "learning_rate": 4.32e-05, "loss": 0.3159, "step": 720 }, { "epoch": 6.0, "eval_cer": 0.17619509966303043, "eval_loss": 0.13585154712200165, "eval_runtime": 13.8057, "eval_samples_per_second": 69.536, "eval_steps_per_second": 2.173, "eval_wer": 0.3083708708708709, "step": 720 }, { "epoch": 6.08, "learning_rate": 4.38e-05, "loss": 0.3256, "step": 730 }, { "epoch": 6.17, "learning_rate": 4.44e-05, "loss": 0.2651, "step": 740 }, { "epoch": 6.25, "learning_rate": 4.5e-05, "loss": 0.2502, "step": 750 }, { "epoch": 6.33, "learning_rate": 4.5600000000000004e-05, "loss": 0.2632, "step": 760 }, { "epoch": 6.42, "learning_rate": 4.6200000000000005e-05, "loss": 0.2412, "step": 770 }, { "epoch": 6.5, "learning_rate": 4.6800000000000006e-05, "loss": 0.2871, "step": 780 }, { "epoch": 6.58, "learning_rate": 4.74e-05, "loss": 0.2409, "step": 790 }, { "epoch": 6.67, "learning_rate": 4.8e-05, "loss": 0.2091, "step": 800 }, { "epoch": 6.75, "learning_rate": 4.86e-05, "loss": 0.2677, "step": 810 }, { "epoch": 6.83, "learning_rate": 4.9199999999999997e-05, "loss": 0.2109, "step": 820 }, { "epoch": 6.92, "learning_rate": 4.98e-05, "loss": 0.1886, "step": 830 }, { "epoch": 7.0, "learning_rate": 5.04e-05, "loss": 0.2425, "step": 840 }, { "epoch": 7.0, "eval_cer": 0.15089382603232623, "eval_loss": 0.07371211796998978, "eval_runtime": 13.9128, "eval_samples_per_second": 69.001, "eval_steps_per_second": 2.156, "eval_wer": 0.25225225225225223, "step": 840 }, { "epoch": 7.08, "learning_rate": 5.1e-05, "loss": 0.2069, "step": 850 }, { "epoch": 7.17, "learning_rate": 5.16e-05, "loss": 0.1888, "step": 860 }, { "epoch": 7.25, "learning_rate": 5.22e-05, "loss": 0.1926, "step": 870 }, { "epoch": 7.33, "learning_rate": 5.28e-05, "loss": 0.1794, "step": 880 }, { "epoch": 7.42, "learning_rate": 5.3400000000000004e-05, "loss": 0.2078, "step": 890 }, { "epoch": 7.5, "learning_rate": 5.4000000000000005e-05, "loss": 0.1646, "step": 900 }, { "epoch": 7.58, "learning_rate": 5.4600000000000006e-05, "loss": 0.1417, "step": 910 }, { "epoch": 7.67, "learning_rate": 5.520000000000001e-05, "loss": 0.1454, "step": 920 }, { "epoch": 7.75, "learning_rate": 5.58e-05, "loss": 0.173, "step": 930 }, { "epoch": 7.83, "learning_rate": 5.6399999999999995e-05, "loss": 0.1449, "step": 940 }, { "epoch": 7.92, "learning_rate": 5.6999999999999996e-05, "loss": 0.1359, "step": 950 }, { "epoch": 8.0, "learning_rate": 5.76e-05, "loss": 0.1921, "step": 960 }, { "epoch": 8.0, "eval_cer": 0.16083157233422812, "eval_loss": 0.045883145183324814, "eval_runtime": 13.9631, "eval_samples_per_second": 68.753, "eval_steps_per_second": 2.149, "eval_wer": 0.22128378378378377, "step": 960 }, { "epoch": 8.0, "step": 960, "total_flos": 2.451994748193635e+18, "train_loss": 2.0509253946443398, "train_runtime": 1630.9039, "train_samples_per_second": 18.836, "train_steps_per_second": 0.589 } ], "logging_steps": 10, "max_steps": 960, "num_train_epochs": 8, "save_steps": 500, "total_flos": 2.451994748193635e+18, "trial_name": null, "trial_params": null }