|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 79.98765432098766, |
|
"global_step": 4800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.752e-06, |
|
"loss": 14.1408, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 9.552000000000001e-06, |
|
"loss": 6.1022, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.4351999999999999e-05, |
|
"loss": 4.037, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 1.9152000000000002e-05, |
|
"loss": 3.3885, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 2.3952e-05, |
|
"loss": 3.0021, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 2.90588116645813, |
|
"eval_runtime": 32.387, |
|
"eval_samples_per_second": 27.542, |
|
"eval_steps_per_second": 3.458, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 2.8752e-05, |
|
"loss": 2.8318, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 3.3552e-05, |
|
"loss": 2.7909, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3.8352e-05, |
|
"loss": 2.7355, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 4.3152e-05, |
|
"loss": 2.6796, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 4.7952000000000004e-05, |
|
"loss": 2.6604, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"eval_loss": 2.640192985534668, |
|
"eval_runtime": 31.7049, |
|
"eval_samples_per_second": 28.134, |
|
"eval_steps_per_second": 3.533, |
|
"eval_wer": 0.9891806116560877, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 5.2752e-05, |
|
"loss": 2.4114, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 5.755200000000001e-05, |
|
"loss": 1.675, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.66, |
|
"learning_rate": 6.2352e-05, |
|
"loss": 1.4118, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 6.7152e-05, |
|
"loss": 1.291, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"learning_rate": 7.1952e-05, |
|
"loss": 1.2216, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_loss": 0.6051220893859863, |
|
"eval_runtime": 31.8239, |
|
"eval_samples_per_second": 28.029, |
|
"eval_steps_per_second": 3.519, |
|
"eval_wer": 0.6850836699365263, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.66, |
|
"learning_rate": 7.6752e-05, |
|
"loss": 1.2115, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 8.1504e-05, |
|
"loss": 1.1555, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 8.6304e-05, |
|
"loss": 1.1142, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 31.66, |
|
"learning_rate": 9.1104e-05, |
|
"loss": 1.1029, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 9.590400000000001e-05, |
|
"loss": 1.0754, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.5407921075820923, |
|
"eval_runtime": 31.9875, |
|
"eval_samples_per_second": 27.886, |
|
"eval_steps_per_second": 3.501, |
|
"eval_wer": 0.6464223889209464, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"learning_rate": 9.264e-05, |
|
"loss": 1.05, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 36.66, |
|
"learning_rate": 8.924571428571428e-05, |
|
"loss": 1.0239, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 38.33, |
|
"learning_rate": 8.581714285714286e-05, |
|
"loss": 1.0023, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 39.99, |
|
"learning_rate": 8.238857142857142e-05, |
|
"loss": 0.9695, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 41.66, |
|
"learning_rate": 7.896e-05, |
|
"loss": 0.9582, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 41.66, |
|
"eval_loss": 0.5521320700645447, |
|
"eval_runtime": 31.6386, |
|
"eval_samples_per_second": 28.193, |
|
"eval_steps_per_second": 3.54, |
|
"eval_wer": 0.5934795152914022, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"learning_rate": 7.553142857142858e-05, |
|
"loss": 0.9312, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"learning_rate": 7.210285714285715e-05, |
|
"loss": 0.9136, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 46.66, |
|
"learning_rate": 6.867428571428571e-05, |
|
"loss": 0.9065, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 48.33, |
|
"learning_rate": 6.524571428571428e-05, |
|
"loss": 0.8843, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"learning_rate": 6.181714285714286e-05, |
|
"loss": 0.8653, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"eval_loss": 0.5156339406967163, |
|
"eval_runtime": 31.9186, |
|
"eval_samples_per_second": 27.946, |
|
"eval_steps_per_second": 3.509, |
|
"eval_wer": 0.5549624927870744, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 51.66, |
|
"learning_rate": 5.842285714285714e-05, |
|
"loss": 0.8526, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 5.4994285714285715e-05, |
|
"loss": 0.8323, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 54.99, |
|
"learning_rate": 5.156571428571429e-05, |
|
"loss": 0.8089, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 56.66, |
|
"learning_rate": 4.813714285714286e-05, |
|
"loss": 0.8016, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 4.4708571428571425e-05, |
|
"loss": 0.7867, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"eval_loss": 0.543922483921051, |
|
"eval_runtime": 31.6944, |
|
"eval_samples_per_second": 28.144, |
|
"eval_steps_per_second": 3.534, |
|
"eval_wer": 0.5605885747259088, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 59.99, |
|
"learning_rate": 4.128e-05, |
|
"loss": 0.7598, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 61.66, |
|
"learning_rate": 3.785142857142857e-05, |
|
"loss": 0.7582, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"learning_rate": 3.442285714285715e-05, |
|
"loss": 0.7423, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"learning_rate": 3.0994285714285715e-05, |
|
"loss": 0.7313, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"learning_rate": 2.7565714285714287e-05, |
|
"loss": 0.7265, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 66.66, |
|
"eval_loss": 0.48627379536628723, |
|
"eval_runtime": 31.7498, |
|
"eval_samples_per_second": 28.095, |
|
"eval_steps_per_second": 3.528, |
|
"eval_wer": 0.525533756491633, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 68.33, |
|
"learning_rate": 2.413714285714286e-05, |
|
"loss": 0.7134, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 69.99, |
|
"learning_rate": 2.0708571428571428e-05, |
|
"loss": 0.6867, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 71.66, |
|
"learning_rate": 1.728e-05, |
|
"loss": 0.6987, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 73.33, |
|
"learning_rate": 1.3851428571428573e-05, |
|
"loss": 0.6976, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"learning_rate": 1.0422857142857143e-05, |
|
"loss": 0.6699, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"eval_loss": 0.5050373673439026, |
|
"eval_runtime": 31.7366, |
|
"eval_samples_per_second": 28.106, |
|
"eval_steps_per_second": 3.529, |
|
"eval_wer": 0.5168782458165032, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 76.66, |
|
"learning_rate": 6.994285714285714e-06, |
|
"loss": 0.6777, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 78.33, |
|
"learning_rate": 3.565714285714286e-06, |
|
"loss": 0.6669, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 79.99, |
|
"learning_rate": 1.3714285714285715e-07, |
|
"loss": 0.6602, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 79.99, |
|
"step": 4800, |
|
"total_flos": 2.017023736432276e+19, |
|
"train_loss": 1.6836539268493653, |
|
"train_runtime": 8656.8344, |
|
"train_samples_per_second": 17.937, |
|
"train_steps_per_second": 0.554 |
|
} |
|
], |
|
"max_steps": 4800, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.017023736432276e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|