|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.079754601226995, |
|
"global_step": 16000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0002376, |
|
"loss": 6.548, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.476283073425293, |
|
"eval_runtime": 60.3486, |
|
"eval_samples_per_second": 26.513, |
|
"eval_steps_per_second": 1.657, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.000294379746835443, |
|
"loss": 3.42, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.3155558109283447, |
|
"eval_runtime": 60.3859, |
|
"eval_samples_per_second": 26.496, |
|
"eval_steps_per_second": 1.656, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.00028678481012658224, |
|
"loss": 3.291, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.2396233081817627, |
|
"eval_runtime": 60.8505, |
|
"eval_samples_per_second": 26.294, |
|
"eval_steps_per_second": 1.643, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.0002791898734177215, |
|
"loss": 2.6515, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"eval_cer": 0.5835108495504928, |
|
"eval_loss": 2.042246103286743, |
|
"eval_runtime": 60.4335, |
|
"eval_samples_per_second": 26.475, |
|
"eval_steps_per_second": 1.655, |
|
"eval_wer": 0.999671772428884, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00027159493670886076, |
|
"loss": 1.7019, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_cer": 0.4797450987471567, |
|
"eval_loss": 1.6337121725082397, |
|
"eval_runtime": 60.2878, |
|
"eval_samples_per_second": 26.539, |
|
"eval_steps_per_second": 1.659, |
|
"eval_wer": 0.9892778993435448, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 0.00026399999999999997, |
|
"loss": 1.3604, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"eval_cer": 0.44627576993898255, |
|
"eval_loss": 1.5220645666122437, |
|
"eval_runtime": 60.8256, |
|
"eval_samples_per_second": 26.305, |
|
"eval_steps_per_second": 1.644, |
|
"eval_wer": 0.987527352297593, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 0.0002564050632911392, |
|
"loss": 1.1965, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_cer": 0.4246669314366177, |
|
"eval_loss": 1.528436303138733, |
|
"eval_runtime": 60.5379, |
|
"eval_samples_per_second": 26.43, |
|
"eval_steps_per_second": 1.652, |
|
"eval_wer": 0.9765864332603938, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 0.00024881012658227843, |
|
"loss": 1.069, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"eval_cer": 0.41239123370762176, |
|
"eval_loss": 1.5227978229522705, |
|
"eval_runtime": 60.2014, |
|
"eval_samples_per_second": 26.577, |
|
"eval_steps_per_second": 1.661, |
|
"eval_wer": 0.9671772428884027, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 0.00024121518987341772, |
|
"loss": 0.9536, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"eval_cer": 0.38684695093331406, |
|
"eval_loss": 1.4059038162231445, |
|
"eval_runtime": 60.2947, |
|
"eval_samples_per_second": 26.536, |
|
"eval_steps_per_second": 1.659, |
|
"eval_wer": 0.9599562363238512, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.00023362025316455695, |
|
"loss": 0.8487, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"eval_cer": 0.37390331082788747, |
|
"eval_loss": 1.4082870483398438, |
|
"eval_runtime": 60.1938, |
|
"eval_samples_per_second": 26.581, |
|
"eval_steps_per_second": 1.661, |
|
"eval_wer": 0.950109409190372, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 0.00022602531645569618, |
|
"loss": 0.7655, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"eval_cer": 0.36121240567570495, |
|
"eval_loss": 1.40787935256958, |
|
"eval_runtime": 60.2423, |
|
"eval_samples_per_second": 26.559, |
|
"eval_steps_per_second": 1.66, |
|
"eval_wer": 0.9368708971553611, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 0.0002184303797468354, |
|
"loss": 0.6956, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"eval_cer": 0.34590388850778064, |
|
"eval_loss": 1.417035698890686, |
|
"eval_runtime": 60.1538, |
|
"eval_samples_per_second": 26.598, |
|
"eval_steps_per_second": 1.662, |
|
"eval_wer": 0.9411378555798687, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 0.00021083544303797464, |
|
"loss": 0.6287, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"eval_cer": 0.3383579449037802, |
|
"eval_loss": 1.3999766111373901, |
|
"eval_runtime": 60.2038, |
|
"eval_samples_per_second": 26.576, |
|
"eval_steps_per_second": 1.661, |
|
"eval_wer": 0.9235229759299781, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 0.00020324050632911393, |
|
"loss": 0.561, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"eval_cer": 0.32953027403689933, |
|
"eval_loss": 1.4734982252120972, |
|
"eval_runtime": 60.3387, |
|
"eval_samples_per_second": 26.517, |
|
"eval_steps_per_second": 1.657, |
|
"eval_wer": 0.9022975929978119, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 0.00019564556962025316, |
|
"loss": 0.5155, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_cer": 0.32232732786944435, |
|
"eval_loss": 1.538634181022644, |
|
"eval_runtime": 69.1066, |
|
"eval_samples_per_second": 23.153, |
|
"eval_steps_per_second": 1.447, |
|
"eval_wer": 0.9202407002188184, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 0.0001880506329113924, |
|
"loss": 0.4864, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"eval_cer": 0.32590172220818137, |
|
"eval_loss": 1.618619680404663, |
|
"eval_runtime": 69.0557, |
|
"eval_samples_per_second": 23.17, |
|
"eval_steps_per_second": 1.448, |
|
"eval_wer": 0.9073304157549235, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"learning_rate": 0.00018045569620253163, |
|
"loss": 0.4261, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"eval_cer": 0.313030292089396, |
|
"eval_loss": 1.6417021751403809, |
|
"eval_runtime": 68.875, |
|
"eval_samples_per_second": 23.23, |
|
"eval_steps_per_second": 1.452, |
|
"eval_wer": 0.9216630196936543, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 22.09, |
|
"learning_rate": 0.00017286075949367088, |
|
"loss": 0.4051, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 22.09, |
|
"eval_cer": 0.30263205401307003, |
|
"eval_loss": 1.6295086145401, |
|
"eval_runtime": 69.3362, |
|
"eval_samples_per_second": 23.076, |
|
"eval_steps_per_second": 1.442, |
|
"eval_wer": 0.8954048140043763, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"learning_rate": 0.00016526582278481012, |
|
"loss": 0.3779, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"eval_cer": 0.31534101166191286, |
|
"eval_loss": 1.8218317031860352, |
|
"eval_runtime": 68.8091, |
|
"eval_samples_per_second": 23.253, |
|
"eval_steps_per_second": 1.453, |
|
"eval_wer": 0.8979212253829322, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 24.54, |
|
"learning_rate": 0.00015767088607594935, |
|
"loss": 0.35, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 24.54, |
|
"eval_cer": 0.303570783839405, |
|
"eval_loss": 1.779032588005066, |
|
"eval_runtime": 68.3457, |
|
"eval_samples_per_second": 23.41, |
|
"eval_steps_per_second": 1.463, |
|
"eval_wer": 0.8921225382932166, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"learning_rate": 0.00015007594936708858, |
|
"loss": 0.3343, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"eval_cer": 0.3071812831714626, |
|
"eval_loss": 1.8588141202926636, |
|
"eval_runtime": 60.0463, |
|
"eval_samples_per_second": 26.646, |
|
"eval_steps_per_second": 1.665, |
|
"eval_wer": 0.9113785557986871, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"learning_rate": 0.00014248101265822784, |
|
"loss": 0.3137, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_cer": 0.29351554319962453, |
|
"eval_loss": 1.8095606565475464, |
|
"eval_runtime": 59.7272, |
|
"eval_samples_per_second": 26.788, |
|
"eval_steps_per_second": 1.674, |
|
"eval_wer": 0.8756017505470459, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 28.22, |
|
"learning_rate": 0.00013488607594936707, |
|
"loss": 0.299, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 28.22, |
|
"eval_cer": 0.3023251615698451, |
|
"eval_loss": 1.9720637798309326, |
|
"eval_runtime": 60.067, |
|
"eval_samples_per_second": 26.637, |
|
"eval_steps_per_second": 1.665, |
|
"eval_wer": 0.8863238512035011, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 29.45, |
|
"learning_rate": 0.00012729113924050633, |
|
"loss": 0.2894, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 29.45, |
|
"eval_cer": 0.29584431526880167, |
|
"eval_loss": 1.9907439947128296, |
|
"eval_runtime": 68.8361, |
|
"eval_samples_per_second": 23.244, |
|
"eval_steps_per_second": 1.453, |
|
"eval_wer": 0.887199124726477, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 30.67, |
|
"learning_rate": 0.00011969620253164556, |
|
"loss": 0.2784, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 30.67, |
|
"eval_cer": 0.2944542730259595, |
|
"eval_loss": 1.9494301080703735, |
|
"eval_runtime": 68.9865, |
|
"eval_samples_per_second": 23.193, |
|
"eval_steps_per_second": 1.45, |
|
"eval_wer": 0.9089715536105033, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 31.9, |
|
"learning_rate": 0.00011210126582278481, |
|
"loss": 0.2662, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 31.9, |
|
"eval_cer": 0.29346138570964364, |
|
"eval_loss": 1.995172142982483, |
|
"eval_runtime": 68.9796, |
|
"eval_samples_per_second": 23.195, |
|
"eval_steps_per_second": 1.45, |
|
"eval_wer": 0.8978118161925602, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 33.13, |
|
"learning_rate": 0.00010450632911392404, |
|
"loss": 0.2614, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 33.13, |
|
"eval_cer": 0.29790229988807454, |
|
"eval_loss": 2.0600392818450928, |
|
"eval_runtime": 68.6491, |
|
"eval_samples_per_second": 23.307, |
|
"eval_steps_per_second": 1.457, |
|
"eval_wer": 0.8948577680525164, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 34.36, |
|
"learning_rate": 9.691139240506327e-05, |
|
"loss": 0.2401, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 34.36, |
|
"eval_cer": 0.29495974293244753, |
|
"eval_loss": 2.118035316467285, |
|
"eval_runtime": 68.8384, |
|
"eval_samples_per_second": 23.243, |
|
"eval_steps_per_second": 1.453, |
|
"eval_wer": 0.8913566739606127, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 35.58, |
|
"learning_rate": 8.933544303797467e-05, |
|
"loss": 0.2392, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 35.58, |
|
"eval_cer": 0.28950788894104057, |
|
"eval_loss": 2.1196768283843994, |
|
"eval_runtime": 68.573, |
|
"eval_samples_per_second": 23.333, |
|
"eval_steps_per_second": 1.458, |
|
"eval_wer": 0.8713347921225383, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 36.81, |
|
"learning_rate": 8.175949367088606e-05, |
|
"loss": 0.23, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 36.81, |
|
"eval_cer": 0.2940932230927537, |
|
"eval_loss": 2.168043851852417, |
|
"eval_runtime": 68.6136, |
|
"eval_samples_per_second": 23.319, |
|
"eval_steps_per_second": 1.457, |
|
"eval_wer": 0.8713347921225383, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 38.04, |
|
"learning_rate": 7.416455696202532e-05, |
|
"loss": 0.2246, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 38.04, |
|
"eval_cer": 0.2879192692349352, |
|
"eval_loss": 2.1525843143463135, |
|
"eval_runtime": 59.7644, |
|
"eval_samples_per_second": 26.772, |
|
"eval_steps_per_second": 1.673, |
|
"eval_wer": 0.874070021881838, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 39.26, |
|
"learning_rate": 6.656962025316455e-05, |
|
"loss": 0.2152, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 39.26, |
|
"eval_cer": 0.28893020904791133, |
|
"eval_loss": 2.263143301010132, |
|
"eval_runtime": 60.4724, |
|
"eval_samples_per_second": 26.458, |
|
"eval_steps_per_second": 1.654, |
|
"eval_wer": 0.8789934354485777, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 40.49, |
|
"learning_rate": 5.897468354430379e-05, |
|
"loss": 0.212, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 40.49, |
|
"eval_cer": 0.28430876990287757, |
|
"eval_loss": 2.2723913192749023, |
|
"eval_runtime": 60.2908, |
|
"eval_samples_per_second": 26.538, |
|
"eval_steps_per_second": 1.659, |
|
"eval_wer": 0.8660831509846827, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 41.72, |
|
"learning_rate": 5.1379746835443034e-05, |
|
"loss": 0.2044, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 41.72, |
|
"eval_cer": 0.2877929017583132, |
|
"eval_loss": 2.2438297271728516, |
|
"eval_runtime": 70.0914, |
|
"eval_samples_per_second": 22.827, |
|
"eval_steps_per_second": 1.427, |
|
"eval_wer": 0.8691466083150985, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"learning_rate": 4.380379746835442e-05, |
|
"loss": 0.2029, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"eval_cer": 0.28327977759324113, |
|
"eval_loss": 2.2518999576568604, |
|
"eval_runtime": 69.2505, |
|
"eval_samples_per_second": 23.105, |
|
"eval_steps_per_second": 1.444, |
|
"eval_wer": 0.8576586433260394, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 44.17, |
|
"learning_rate": 3.6208860759493666e-05, |
|
"loss": 0.1972, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 44.17, |
|
"eval_cer": 0.28133010795393004, |
|
"eval_loss": 2.2697150707244873, |
|
"eval_runtime": 68.5533, |
|
"eval_samples_per_second": 23.34, |
|
"eval_steps_per_second": 1.459, |
|
"eval_wer": 0.8603938730853392, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"learning_rate": 2.861392405063291e-05, |
|
"loss": 0.1884, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 45.4, |
|
"eval_cer": 0.2846878723327436, |
|
"eval_loss": 2.3294308185577393, |
|
"eval_runtime": 60.5028, |
|
"eval_samples_per_second": 26.445, |
|
"eval_steps_per_second": 1.653, |
|
"eval_wer": 0.8661925601750547, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 46.63, |
|
"learning_rate": 2.101898734177215e-05, |
|
"loss": 0.1877, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 46.63, |
|
"eval_cer": 0.2792721233346572, |
|
"eval_loss": 2.3077094554901123, |
|
"eval_runtime": 60.8923, |
|
"eval_samples_per_second": 26.276, |
|
"eval_steps_per_second": 1.642, |
|
"eval_wer": 0.8561269146608315, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 47.85, |
|
"learning_rate": 1.3424050632911391e-05, |
|
"loss": 0.1871, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 47.85, |
|
"eval_cer": 0.2801025381810304, |
|
"eval_loss": 2.351794481277466, |
|
"eval_runtime": 60.3104, |
|
"eval_samples_per_second": 26.529, |
|
"eval_steps_per_second": 1.658, |
|
"eval_wer": 0.8563457330415755, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 49.08, |
|
"learning_rate": 5.829113924050632e-06, |
|
"loss": 0.1838, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 49.08, |
|
"eval_cer": 0.27992201321442756, |
|
"eval_loss": 2.3462095260620117, |
|
"eval_runtime": 60.2933, |
|
"eval_samples_per_second": 26.537, |
|
"eval_steps_per_second": 1.659, |
|
"eval_wer": 0.8555798687089715, |
|
"step": 16000 |
|
} |
|
], |
|
"max_steps": 16300, |
|
"num_train_epochs": 50, |
|
"total_flos": 6.654066355887293e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|