{ "best_metric": 35.60262364321316, "best_model_checkpoint": "./checkpoint-1000", "epoch": 3.4305317324185247, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 5e-07, "loss": 0.25, "step": 25 }, { "epoch": 0.17, "learning_rate": 1e-06, "loss": 0.2435, "step": 50 }, { "epoch": 0.26, "learning_rate": 9.736842105263158e-07, "loss": 0.2513, "step": 75 }, { "epoch": 0.34, "learning_rate": 9.473684210526315e-07, "loss": 0.2497, "step": 100 }, { "epoch": 0.43, "learning_rate": 9.210526315789473e-07, "loss": 0.2489, "step": 125 }, { "epoch": 0.51, "learning_rate": 8.947368421052631e-07, "loss": 0.2486, "step": 150 }, { "epoch": 0.6, "learning_rate": 8.684210526315789e-07, "loss": 0.2449, "step": 175 }, { "epoch": 0.69, "learning_rate": 8.421052631578947e-07, "loss": 0.2439, "step": 200 }, { "epoch": 0.77, "learning_rate": 8.157894736842105e-07, "loss": 0.2467, "step": 225 }, { "epoch": 0.86, "learning_rate": 7.894736842105263e-07, "loss": 0.2441, "step": 250 }, { "epoch": 0.94, "learning_rate": 7.631578947368421e-07, "loss": 0.2415, "step": 275 }, { "epoch": 1.03, "learning_rate": 7.368421052631578e-07, "loss": 0.2432, "step": 300 }, { "epoch": 1.11, "learning_rate": 7.105263157894736e-07, "loss": 0.2397, "step": 325 }, { "epoch": 1.2, "learning_rate": 6.842105263157895e-07, "loss": 0.2384, "step": 350 }, { "epoch": 1.29, "learning_rate": 6.578947368421053e-07, "loss": 0.2367, "step": 375 }, { "epoch": 1.37, "learning_rate": 6.31578947368421e-07, "loss": 0.2365, "step": 400 }, { "epoch": 1.46, "learning_rate": 6.052631578947368e-07, "loss": 0.2389, "step": 425 }, { "epoch": 1.54, "learning_rate": 5.789473684210526e-07, "loss": 0.2394, "step": 450 }, { "epoch": 1.63, "learning_rate": 5.526315789473684e-07, "loss": 0.2389, "step": 475 }, { "epoch": 1.72, "learning_rate": 5.263157894736842e-07, "loss": 0.2423, "step": 500 }, { "epoch": 1.72, "eval_loss": 0.2709830701351166, "eval_runtime": 1895.8676, "eval_samples_per_second": 4.913, "eval_steps_per_second": 0.154, "eval_wer": 35.95697654649092, "step": 500 }, { "epoch": 1.8, "learning_rate": 5e-07, "loss": 0.2376, "step": 525 }, { "epoch": 1.89, "learning_rate": 4.7368421052631574e-07, "loss": 0.2327, "step": 550 }, { "epoch": 1.97, "learning_rate": 4.4736842105263156e-07, "loss": 0.2357, "step": 575 }, { "epoch": 2.06, "learning_rate": 4.2105263157894733e-07, "loss": 0.234, "step": 600 }, { "epoch": 2.14, "learning_rate": 3.9473684210526315e-07, "loss": 0.2314, "step": 625 }, { "epoch": 2.23, "learning_rate": 3.684210526315789e-07, "loss": 0.2357, "step": 650 }, { "epoch": 2.32, "learning_rate": 3.4210526315789473e-07, "loss": 0.2358, "step": 675 }, { "epoch": 2.4, "learning_rate": 3.157894736842105e-07, "loss": 0.2383, "step": 700 }, { "epoch": 2.49, "learning_rate": 2.894736842105263e-07, "loss": 0.2293, "step": 725 }, { "epoch": 2.57, "learning_rate": 2.631578947368421e-07, "loss": 0.2321, "step": 750 }, { "epoch": 2.66, "learning_rate": 2.3684210526315787e-07, "loss": 0.2302, "step": 775 }, { "epoch": 2.74, "learning_rate": 2.1052631578947366e-07, "loss": 0.2254, "step": 800 }, { "epoch": 2.83, "learning_rate": 1.8421052631578946e-07, "loss": 0.234, "step": 825 }, { "epoch": 2.92, "learning_rate": 1.5789473684210525e-07, "loss": 0.2358, "step": 850 }, { "epoch": 3.0, "learning_rate": 1.3157894736842104e-07, "loss": 0.2321, "step": 875 }, { "epoch": 3.09, "learning_rate": 1.0526315789473683e-07, "loss": 0.2375, "step": 900 }, { "epoch": 3.17, "learning_rate": 7.894736842105262e-08, "loss": 0.2258, "step": 925 }, { "epoch": 3.26, "learning_rate": 5.2631578947368416e-08, "loss": 0.2254, "step": 950 }, { "epoch": 3.34, "learning_rate": 2.6315789473684208e-08, "loss": 0.234, "step": 975 }, { "epoch": 3.43, "learning_rate": 0.0, "loss": 0.2329, "step": 1000 }, { "epoch": 3.43, "eval_loss": 0.267061322927475, "eval_runtime": 1899.4594, "eval_samples_per_second": 4.904, "eval_steps_per_second": 0.154, "eval_wer": 35.60262364321316, "step": 1000 }, { "epoch": 3.43, "step": 1000, "total_flos": 4.14559151456256e+18, "train_loss": 0.23807335948944092, "train_runtime": 7036.0339, "train_samples_per_second": 9.096, "train_steps_per_second": 0.142 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 4.14559151456256e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }