|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.909466234149218, |
|
"global_step": 6700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"eval_cer": 0.06875412712023102, |
|
"eval_loss": 0.2844763696193695, |
|
"eval_runtime": 65.9715, |
|
"eval_samples_per_second": 7.579, |
|
"eval_steps_per_second": 0.955, |
|
"eval_wer": 0.25628282600101654, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002394, |
|
"loss": 3.0984, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_cer": 0.04085233040615791, |
|
"eval_loss": 0.15484967827796936, |
|
"eval_runtime": 65.7396, |
|
"eval_samples_per_second": 7.606, |
|
"eval_steps_per_second": 0.958, |
|
"eval_wer": 0.1411306263059807, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002857074569789675, |
|
"loss": 0.2022, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_cer": 0.03730423898366892, |
|
"eval_loss": 0.14234833419322968, |
|
"eval_runtime": 65.8678, |
|
"eval_samples_per_second": 7.591, |
|
"eval_steps_per_second": 0.956, |
|
"eval_wer": 0.11916191336759474, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002665869980879541, |
|
"loss": 0.1458, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_cer": 0.03469244946433676, |
|
"eval_loss": 0.12395954132080078, |
|
"eval_runtime": 66.8459, |
|
"eval_samples_per_second": 7.48, |
|
"eval_steps_per_second": 0.942, |
|
"eval_wer": 0.11165075958660417, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002474665391969407, |
|
"loss": 0.1354, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_cer": 0.03423908222701871, |
|
"eval_loss": 0.12080004811286926, |
|
"eval_runtime": 65.8681, |
|
"eval_samples_per_second": 7.591, |
|
"eval_steps_per_second": 0.956, |
|
"eval_wer": 0.10814931947817247, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00022834608030592734, |
|
"loss": 0.1169, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_cer": 0.03338162679991721, |
|
"eval_loss": 0.11872279644012451, |
|
"eval_runtime": 66.8663, |
|
"eval_samples_per_second": 7.478, |
|
"eval_steps_per_second": 0.942, |
|
"eval_wer": 0.10735867171175241, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_cer": 0.03327321289534116, |
|
"eval_loss": 0.12243428081274033, |
|
"eval_runtime": 67.2919, |
|
"eval_samples_per_second": 7.43, |
|
"eval_steps_per_second": 0.936, |
|
"eval_wer": 0.10199356186818773, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00020922562141491394, |
|
"loss": 0.1107, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_cer": 0.0313808974700137, |
|
"eval_loss": 0.11623063683509827, |
|
"eval_runtime": 66.9398, |
|
"eval_samples_per_second": 7.469, |
|
"eval_steps_per_second": 0.941, |
|
"eval_wer": 0.09724967526966737, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.00019010516252390057, |
|
"loss": 0.0976, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_cer": 0.03366744527561771, |
|
"eval_loss": 0.11647358536720276, |
|
"eval_runtime": 67.2006, |
|
"eval_samples_per_second": 7.44, |
|
"eval_steps_per_second": 0.937, |
|
"eval_wer": 0.10052521601626475, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.0001709847036328872, |
|
"loss": 0.0914, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_cer": 0.03200181346894927, |
|
"eval_loss": 0.1139749139547348, |
|
"eval_runtime": 67.5106, |
|
"eval_samples_per_second": 7.406, |
|
"eval_steps_per_second": 0.933, |
|
"eval_wer": 0.09871802112159034, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.00015186424474187378, |
|
"loss": 0.0852, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_cer": 0.03156815785064506, |
|
"eval_loss": 0.12490493804216385, |
|
"eval_runtime": 67.225, |
|
"eval_samples_per_second": 7.438, |
|
"eval_steps_per_second": 0.937, |
|
"eval_wer": 0.09561190489636867, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.0001327437858508604, |
|
"loss": 0.0777, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_cer": 0.03226792032563595, |
|
"eval_loss": 0.11327209323644638, |
|
"eval_runtime": 67.468, |
|
"eval_samples_per_second": 7.411, |
|
"eval_steps_per_second": 0.934, |
|
"eval_wer": 0.09532953069407579, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_cer": 0.03157801366015198, |
|
"eval_loss": 0.1231861561536789, |
|
"eval_runtime": 67.6103, |
|
"eval_samples_per_second": 7.395, |
|
"eval_steps_per_second": 0.932, |
|
"eval_wer": 0.09521658101315864, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.00011362332695984703, |
|
"loss": 0.0714, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_cer": 0.032208785468594466, |
|
"eval_loss": 0.13348866999149323, |
|
"eval_runtime": 67.4714, |
|
"eval_samples_per_second": 7.411, |
|
"eval_steps_per_second": 0.934, |
|
"eval_wer": 0.09555543005591009, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 9.450286806883364e-05, |
|
"loss": 0.0631, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_cer": 0.03205109251648384, |
|
"eval_loss": 0.12228710949420929, |
|
"eval_runtime": 67.5997, |
|
"eval_samples_per_second": 7.396, |
|
"eval_steps_per_second": 0.932, |
|
"eval_wer": 0.09521658101315864, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 7.543021032504778e-05, |
|
"loss": 0.0621, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_cer": 0.032080659945004586, |
|
"eval_loss": 0.1337049901485443, |
|
"eval_runtime": 67.7409, |
|
"eval_samples_per_second": 7.381, |
|
"eval_steps_per_second": 0.93, |
|
"eval_wer": 0.09504715649178291, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 5.6309751434034414e-05, |
|
"loss": 0.0538, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_cer": 0.03255373880133645, |
|
"eval_loss": 0.13896578550338745, |
|
"eval_runtime": 67.8406, |
|
"eval_samples_per_second": 7.37, |
|
"eval_steps_per_second": 0.929, |
|
"eval_wer": 0.09431298356582142, |
|
"step": 5695 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 3.718929254302103e-05, |
|
"loss": 0.0513, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_cer": 0.03282970146753004, |
|
"eval_loss": 0.1389976292848587, |
|
"eval_runtime": 68.1201, |
|
"eval_samples_per_second": 7.34, |
|
"eval_steps_per_second": 0.925, |
|
"eval_wer": 0.09549895521545151, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_cer": 0.03255373880133645, |
|
"eval_loss": 0.1410442739725113, |
|
"eval_runtime": 67.6146, |
|
"eval_samples_per_second": 7.395, |
|
"eval_steps_per_second": 0.932, |
|
"eval_wer": 0.09470830744903146, |
|
"step": 6365 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 1.8068833652007646e-05, |
|
"loss": 0.0442, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_cer": 0.03214965061155298, |
|
"eval_loss": 0.1414366066455841, |
|
"eval_runtime": 68.0581, |
|
"eval_samples_per_second": 7.347, |
|
"eval_steps_per_second": 0.926, |
|
"eval_wer": 0.09453888292765573, |
|
"step": 6700 |
|
} |
|
], |
|
"max_steps": 6776, |
|
"num_train_epochs": 8, |
|
"total_flos": 2.1224029808181802e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|