{ "best_metric": 9.288054935192335, "best_model_checkpoint": "./checkpoint-900", "epoch": 13.01525, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.974489795918367e-07, "loss": 0.1595, "step": 25 }, { "epoch": 0.05, "learning_rate": 4.846938775510204e-07, "loss": 0.1463, "step": 50 }, { "epoch": 0.07, "learning_rate": 4.719387755102041e-07, "loss": 0.1424, "step": 75 }, { "epoch": 1.02, "learning_rate": 4.5918367346938775e-07, "loss": 0.1368, "step": 100 }, { "epoch": 1.02, "eval_loss": 0.16647553443908691, "eval_runtime": 1871.163, "eval_samples_per_second": 1.66, "eval_steps_per_second": 0.104, "eval_wer": 9.69505958965582, "step": 100 }, { "epoch": 1.05, "learning_rate": 4.464285714285714e-07, "loss": 0.1338, "step": 125 }, { "epoch": 1.07, "learning_rate": 4.336734693877551e-07, "loss": 0.1281, "step": 150 }, { "epoch": 2.02, "learning_rate": 4.2091836734693876e-07, "loss": 0.1275, "step": 175 }, { "epoch": 2.05, "learning_rate": 4.0816326530612243e-07, "loss": 0.1196, "step": 200 }, { "epoch": 2.05, "eval_loss": 0.15860922634601593, "eval_runtime": 1883.9234, "eval_samples_per_second": 1.649, "eval_steps_per_second": 0.104, "eval_wer": 9.601135438625786, "step": 200 }, { "epoch": 2.07, "learning_rate": 3.9540816326530615e-07, "loss": 0.1189, "step": 225 }, { "epoch": 3.02, "learning_rate": 3.8265306122448977e-07, "loss": 0.1189, "step": 250 }, { "epoch": 3.05, "learning_rate": 3.6989795918367343e-07, "loss": 0.1136, "step": 275 }, { "epoch": 3.07, "learning_rate": 3.5714285714285716e-07, "loss": 0.107, "step": 300 }, { "epoch": 3.07, "eval_loss": 0.15376655757427216, "eval_runtime": 1823.9097, "eval_samples_per_second": 1.703, "eval_steps_per_second": 0.107, "eval_wer": 9.400763916428376, "step": 300 }, { "epoch": 4.02, "learning_rate": 3.443877551020408e-07, "loss": 0.1128, "step": 325 }, { "epoch": 4.05, "learning_rate": 3.3163265306122444e-07, "loss": 0.1085, "step": 350 }, { "epoch": 4.07, "learning_rate": 3.1887755102040816e-07, "loss": 0.1035, "step": 375 }, { "epoch": 5.02, "learning_rate": 3.0612244897959183e-07, "loss": 0.1051, "step": 400 }, { "epoch": 5.02, "eval_loss": 0.15036460757255554, "eval_runtime": 1824.8717, "eval_samples_per_second": 1.703, "eval_steps_per_second": 0.107, "eval_wer": 9.323537392248127, "step": 400 }, { "epoch": 5.05, "learning_rate": 2.933673469387755e-07, "loss": 0.102, "step": 425 }, { "epoch": 5.07, "learning_rate": 2.8061224489795917e-07, "loss": 0.101, "step": 450 }, { "epoch": 6.02, "learning_rate": 2.6785714285714284e-07, "loss": 0.0995, "step": 475 }, { "epoch": 6.05, "learning_rate": 2.551020408163265e-07, "loss": 0.0988, "step": 500 }, { "epoch": 6.05, "eval_loss": 0.14860908687114716, "eval_runtime": 1843.7431, "eval_samples_per_second": 1.685, "eval_steps_per_second": 0.106, "eval_wer": 9.446682390265284, "step": 500 }, { "epoch": 6.07, "learning_rate": 2.423469387755102e-07, "loss": 0.0955, "step": 525 }, { "epoch": 7.02, "learning_rate": 2.2959183673469388e-07, "loss": 0.0952, "step": 550 }, { "epoch": 7.04, "learning_rate": 2.1683673469387754e-07, "loss": 0.0973, "step": 575 }, { "epoch": 7.07, "learning_rate": 2.0408163265306121e-07, "loss": 0.0939, "step": 600 }, { "epoch": 7.07, "eval_loss": 0.14743566513061523, "eval_runtime": 1839.7788, "eval_samples_per_second": 1.689, "eval_steps_per_second": 0.106, "eval_wer": 9.442507983552838, "step": 600 }, { "epoch": 8.02, "learning_rate": 1.9132653061224488e-07, "loss": 0.0923, "step": 625 }, { "epoch": 8.04, "learning_rate": 1.7857142857142858e-07, "loss": 0.0933, "step": 650 }, { "epoch": 8.07, "learning_rate": 1.6581632653061222e-07, "loss": 0.0904, "step": 675 }, { "epoch": 9.02, "learning_rate": 1.5306122448979592e-07, "loss": 0.0901, "step": 700 }, { "epoch": 9.02, "eval_loss": 0.14642581343650818, "eval_runtime": 1823.9218, "eval_samples_per_second": 1.703, "eval_steps_per_second": 0.107, "eval_wer": 9.300578155329674, "step": 700 }, { "epoch": 9.04, "learning_rate": 1.4030612244897959e-07, "loss": 0.0894, "step": 725 }, { "epoch": 9.07, "learning_rate": 1.2755102040816326e-07, "loss": 0.0898, "step": 750 }, { "epoch": 10.02, "learning_rate": 1.1479591836734694e-07, "loss": 0.0891, "step": 775 }, { "epoch": 10.04, "learning_rate": 1.0204081632653061e-07, "loss": 0.0859, "step": 800 }, { "epoch": 10.04, "eval_loss": 0.14592841267585754, "eval_runtime": 1840.3966, "eval_samples_per_second": 1.688, "eval_steps_per_second": 0.106, "eval_wer": 9.436246373484169, "step": 800 }, { "epoch": 10.07, "learning_rate": 8.928571428571429e-08, "loss": 0.0884, "step": 825 }, { "epoch": 11.02, "learning_rate": 7.653061224489796e-08, "loss": 0.0881, "step": 850 }, { "epoch": 11.04, "learning_rate": 6.377551020408163e-08, "loss": 0.0877, "step": 875 }, { "epoch": 11.07, "learning_rate": 5.1020408163265303e-08, "loss": 0.0859, "step": 900 }, { "epoch": 11.07, "eval_loss": 0.14575552940368652, "eval_runtime": 1821.0167, "eval_samples_per_second": 1.706, "eval_steps_per_second": 0.107, "eval_wer": 9.288054935192335, "step": 900 }, { "epoch": 12.02, "learning_rate": 3.826530612244898e-08, "loss": 0.0864, "step": 925 }, { "epoch": 12.04, "learning_rate": 2.5510204081632652e-08, "loss": 0.0869, "step": 950 }, { "epoch": 12.07, "learning_rate": 1.2755102040816326e-08, "loss": 0.0876, "step": 975 }, { "epoch": 13.02, "learning_rate": 0.0, "loss": 0.0839, "step": 1000 }, { "epoch": 13.02, "eval_loss": 0.14563478529453278, "eval_runtime": 1822.7536, "eval_samples_per_second": 1.705, "eval_steps_per_second": 0.107, "eval_wer": 9.290142138548559, "step": 1000 }, { "epoch": 13.02, "step": 1000, "total_flos": 1.846196240007168e+19, "train_loss": 0.10451480412483215, "train_runtime": 27555.4596, "train_samples_per_second": 2.323, "train_steps_per_second": 0.036 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "total_flos": 1.846196240007168e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }