{ "best_metric": null, "best_model_checkpoint": null, "epoch": 197.5483870967742, "global_step": 6124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.23, "learning_rate": 1.2639705882352941e-05, "loss": 14.7415, "step": 100 }, { "epoch": 6.45, "learning_rate": 2.477205882352941e-05, "loss": 6.4415, "step": 200 }, { "epoch": 9.68, "learning_rate": 3.690441176470588e-05, "loss": 4.4273, "step": 300 }, { "epoch": 12.9, "learning_rate": 4.9036764705882346e-05, "loss": 3.5184, "step": 400 }, { "epoch": 12.9, "eval_cer": 1.0, "eval_loss": 3.4210262298583984, "eval_runtime": 96.9515, "eval_samples_per_second": 18.628, "eval_steps_per_second": 0.299, "eval_wer": 1.0, "step": 400 }, { "epoch": 16.13, "learning_rate": 6.116911764705881e-05, "loss": 3.3554, "step": 500 }, { "epoch": 19.35, "learning_rate": 7.330147058823529e-05, "loss": 3.2617, "step": 600 }, { "epoch": 22.58, "learning_rate": 7.5e-05, "loss": 3.0887, "step": 700 }, { "epoch": 25.81, "learning_rate": 7.5e-05, "loss": 2.3797, "step": 800 }, { "epoch": 25.81, "eval_cer": 0.2583848190644307, "eval_loss": 1.1068178415298462, "eval_runtime": 99.1553, "eval_samples_per_second": 18.214, "eval_steps_per_second": 0.292, "eval_wer": 0.8389036136789708, "step": 800 }, { "epoch": 29.03, "learning_rate": 7.5e-05, "loss": 1.8558, "step": 900 }, { "epoch": 32.26, "learning_rate": 7.5e-05, "loss": 1.6732, "step": 1000 }, { "epoch": 35.48, "learning_rate": 7.5e-05, "loss": 1.5624, "step": 1100 }, { "epoch": 38.71, "learning_rate": 7.5e-05, "loss": 1.5022, "step": 1200 }, { "epoch": 38.71, "eval_cer": 0.1516846387044783, "eval_loss": 0.5277877449989319, "eval_runtime": 102.5824, "eval_samples_per_second": 17.605, "eval_steps_per_second": 0.283, "eval_wer": 0.6279656086366281, "step": 1200 }, { "epoch": 41.94, "learning_rate": 7.5e-05, "loss": 1.4358, "step": 1300 }, { "epoch": 45.16, "learning_rate": 7.5e-05, "loss": 1.388, "step": 1400 }, { "epoch": 48.39, "learning_rate": 7.5e-05, "loss": 1.3493, "step": 1500 }, { "epoch": 51.61, "learning_rate": 7.5e-05, "loss": 1.3181, "step": 1600 }, { "epoch": 51.61, "eval_cer": 0.12968648067846042, "eval_loss": 0.4253957271575928, "eval_runtime": 98.985, "eval_samples_per_second": 18.245, "eval_steps_per_second": 0.293, "eval_wer": 0.5587303639537139, "step": 1600 }, { "epoch": 54.84, "learning_rate": 7.5e-05, "loss": 1.2814, "step": 1700 }, { "epoch": 58.06, "learning_rate": 7.5e-05, "loss": 1.2547, "step": 1800 }, { "epoch": 61.29, "learning_rate": 7.5e-05, "loss": 1.2244, "step": 1900 }, { "epoch": 64.52, "learning_rate": 7.5e-05, "loss": 1.2037, "step": 2000 }, { "epoch": 64.52, "eval_cer": 0.11762730726428489, "eval_loss": 0.3835846781730652, "eval_runtime": 99.4181, "eval_samples_per_second": 18.166, "eval_steps_per_second": 0.292, "eval_wer": 0.5142543150817764, "step": 2000 }, { "epoch": 67.74, "learning_rate": 7.5e-05, "loss": 1.1751, "step": 2100 }, { "epoch": 70.97, "learning_rate": 7.5e-05, "loss": 1.1642, "step": 2200 }, { "epoch": 74.19, "learning_rate": 7.5e-05, "loss": 1.1397, "step": 2300 }, { "epoch": 77.42, "learning_rate": 7.5e-05, "loss": 1.1245, "step": 2400 }, { "epoch": 77.42, "eval_cer": 0.11111324302544226, "eval_loss": 0.36429011821746826, "eval_runtime": 98.0055, "eval_samples_per_second": 18.428, "eval_steps_per_second": 0.296, "eval_wer": 0.48710323873553557, "step": 2400 }, { "epoch": 80.65, "learning_rate": 7.5e-05, "loss": 1.1057, "step": 2500 }, { "epoch": 83.87, "learning_rate": 7.5e-05, "loss": 1.0852, "step": 2600 }, { "epoch": 87.1, "learning_rate": 7.5e-05, "loss": 1.073, "step": 2700 }, { "epoch": 90.32, "learning_rate": 7.5e-05, "loss": 1.0582, "step": 2800 }, { "epoch": 90.32, "eval_cer": 0.10623009324993285, "eval_loss": 0.3561805486679077, "eval_runtime": 94.8015, "eval_samples_per_second": 19.05, "eval_steps_per_second": 0.306, "eval_wer": 0.46758032193419097, "step": 2800 }, { "epoch": 93.55, "learning_rate": 7.5e-05, "loss": 1.0376, "step": 2900 }, { "epoch": 96.77, "learning_rate": 7.5e-05, "loss": 1.0341, "step": 3000 }, { "epoch": 100.0, "learning_rate": 7.416258570029382e-05, "loss": 1.017, "step": 3100 }, { "epoch": 103.23, "learning_rate": 7.183643486777668e-05, "loss": 1.0027, "step": 3200 }, { "epoch": 103.23, "eval_cer": 0.10575041252542308, "eval_loss": 0.3529968559741974, "eval_runtime": 98.9163, "eval_samples_per_second": 18.258, "eval_steps_per_second": 0.293, "eval_wer": 0.4625379791841748, "step": 3200 }, { "epoch": 106.45, "learning_rate": 6.951028403525955e-05, "loss": 0.9864, "step": 3300 }, { "epoch": 109.68, "learning_rate": 6.71841332027424e-05, "loss": 0.9769, "step": 3400 }, { "epoch": 112.9, "learning_rate": 6.485798237022525e-05, "loss": 0.9587, "step": 3500 }, { "epoch": 116.13, "learning_rate": 6.253183153770813e-05, "loss": 0.9382, "step": 3600 }, { "epoch": 116.13, "eval_cer": 0.10022449057907057, "eval_loss": 0.3388434052467346, "eval_runtime": 98.6206, "eval_samples_per_second": 18.313, "eval_steps_per_second": 0.294, "eval_wer": 0.44424332536039823, "step": 3600 }, { "epoch": 119.35, "learning_rate": 6.020568070519098e-05, "loss": 0.9296, "step": 3700 }, { "epoch": 122.58, "learning_rate": 5.787952987267385e-05, "loss": 0.9145, "step": 3800 }, { "epoch": 125.81, "learning_rate": 5.5553379040156705e-05, "loss": 0.9043, "step": 3900 }, { "epoch": 129.03, "learning_rate": 5.3227228207639564e-05, "loss": 0.8915, "step": 4000 }, { "epoch": 129.03, "eval_cer": 0.10004221190375685, "eval_loss": 0.3429908752441406, "eval_runtime": 98.3419, "eval_samples_per_second": 18.364, "eval_steps_per_second": 0.295, "eval_wer": 0.44269183528347017, "step": 4000 }, { "epoch": 132.26, "learning_rate": 5.090107737512243e-05, "loss": 0.8768, "step": 4100 }, { "epoch": 135.48, "learning_rate": 4.857492654260528e-05, "loss": 0.8697, "step": 4200 }, { "epoch": 138.71, "learning_rate": 4.6248775710088145e-05, "loss": 0.8604, "step": 4300 }, { "epoch": 141.94, "learning_rate": 4.394588638589617e-05, "loss": 0.853, "step": 4400 }, { "epoch": 141.94, "eval_cer": 0.09997505660232549, "eval_loss": 0.35362082719802856, "eval_runtime": 98.9414, "eval_samples_per_second": 18.253, "eval_steps_per_second": 0.293, "eval_wer": 0.437455556273838, "step": 4400 }, { "epoch": 145.16, "learning_rate": 4.161973555337904e-05, "loss": 0.8447, "step": 4500 }, { "epoch": 148.39, "learning_rate": 3.9293584720861896e-05, "loss": 0.8309, "step": 4600 }, { "epoch": 151.61, "learning_rate": 3.6967433888344755e-05, "loss": 0.827, "step": 4700 }, { "epoch": 154.84, "learning_rate": 3.464128305582762e-05, "loss": 0.8127, "step": 4800 }, { "epoch": 154.84, "eval_cer": 0.09857438888675697, "eval_loss": 0.3511004149913788, "eval_runtime": 97.5708, "eval_samples_per_second": 18.51, "eval_steps_per_second": 0.297, "eval_wer": 0.4343525761199819, "step": 4800 }, { "epoch": 158.06, "learning_rate": 3.231513222331048e-05, "loss": 0.8123, "step": 4900 }, { "epoch": 161.29, "learning_rate": 2.9988981390793336e-05, "loss": 0.8063, "step": 5000 }, { "epoch": 164.52, "learning_rate": 2.76628305582762e-05, "loss": 0.8046, "step": 5100 }, { "epoch": 167.74, "learning_rate": 2.5336679725759056e-05, "loss": 0.7861, "step": 5200 }, { "epoch": 167.74, "eval_cer": 0.09927472274454123, "eval_loss": 0.3595349192619324, "eval_runtime": 97.9334, "eval_samples_per_second": 18.441, "eval_steps_per_second": 0.296, "eval_wer": 0.43719697459435, "step": 5200 }, { "epoch": 170.97, "learning_rate": 2.301052889324192e-05, "loss": 0.7904, "step": 5300 }, { "epoch": 174.19, "learning_rate": 2.068437806072478e-05, "loss": 0.775, "step": 5400 }, { "epoch": 177.42, "learning_rate": 1.8358227228207637e-05, "loss": 0.7737, "step": 5500 }, { "epoch": 180.65, "learning_rate": 1.6032076395690502e-05, "loss": 0.7619, "step": 5600 }, { "epoch": 180.65, "eval_cer": 0.0985456080432864, "eval_loss": 0.3628048300743103, "eval_runtime": 97.525, "eval_samples_per_second": 18.518, "eval_steps_per_second": 0.297, "eval_wer": 0.43157282306548583, "step": 5600 }, { "epoch": 183.87, "learning_rate": 1.3705925563173359e-05, "loss": 0.7654, "step": 5700 }, { "epoch": 187.1, "learning_rate": 1.1379774730656215e-05, "loss": 0.7586, "step": 5800 }, { "epoch": 190.32, "learning_rate": 9.053623898139082e-06, "loss": 0.7537, "step": 5900 }, { "epoch": 193.55, "learning_rate": 6.727473065621939e-06, "loss": 0.7537, "step": 6000 }, { "epoch": 193.55, "eval_cer": 0.09426685598065927, "eval_loss": 0.3633102476596832, "eval_runtime": 94.1997, "eval_samples_per_second": 19.172, "eval_steps_per_second": 0.308, "eval_wer": 0.41735083069364537, "step": 6000 }, { "epoch": 196.77, "learning_rate": 4.4245837414299736e-06, "loss": 0.7486, "step": 6100 }, { "epoch": 197.55, "step": 6124, "total_flos": 1.4975736611439493e+20, "train_loss": 1.5647427196365646, "train_runtime": 50885.6899, "train_samples_per_second": 15.405, "train_steps_per_second": 0.12 } ], "max_steps": 6124, "num_train_epochs": 198, "total_flos": 1.4975736611439493e+20, "trial_name": null, "trial_params": null }