|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 200.3030303030303, |
|
"global_step": 9815, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 8.16743119266055e-06, |
|
"loss": 15.5589, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.573623853211009e-05, |
|
"loss": 6.7324, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 2.330504587155963e-05, |
|
"loss": 4.9014, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 3.087385321100917e-05, |
|
"loss": 3.9471, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.7109458446502686, |
|
"eval_runtime": 120.434, |
|
"eval_samples_per_second": 23.573, |
|
"eval_steps_per_second": 0.374, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 3.8442660550458714e-05, |
|
"loss": 3.5201, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 4.601146788990825e-05, |
|
"loss": 3.4075, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 5.35802752293578e-05, |
|
"loss": 3.3368, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 6.114908256880733e-05, |
|
"loss": 3.274, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"eval_cer": 0.9572537581332735, |
|
"eval_loss": 3.1581757068634033, |
|
"eval_runtime": 118.5811, |
|
"eval_samples_per_second": 23.941, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 0.9917300840188272, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 6.871788990825688e-05, |
|
"loss": 2.8673, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 7.5e-05, |
|
"loss": 2.0372, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 22.44, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.7262, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.5889, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"eval_cer": 0.19903073816468478, |
|
"eval_loss": 0.7762717604637146, |
|
"eval_runtime": 118.6201, |
|
"eval_samples_per_second": 23.934, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 0.6030440329037082, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 26.53, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.5113, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.4529, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 30.61, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.4042, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 32.65, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3647, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 32.65, |
|
"eval_cer": 0.16868745793134396, |
|
"eval_loss": 0.605066180229187, |
|
"eval_runtime": 119.2577, |
|
"eval_samples_per_second": 23.806, |
|
"eval_steps_per_second": 0.377, |
|
"eval_wer": 0.5134826023841992, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 34.69, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3249, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 36.73, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3121, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 38.77, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2899, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 40.81, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2532, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 40.81, |
|
"eval_cer": 0.15389724029616333, |
|
"eval_loss": 0.5423398017883301, |
|
"eval_runtime": 118.7297, |
|
"eval_samples_per_second": 23.911, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 0.4711652663528791, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 42.85, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2254, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 44.89, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2028, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 46.93, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1903, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 48.97, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1905, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 48.97, |
|
"eval_cer": 0.1490329818263406, |
|
"eval_loss": 0.5179790258407593, |
|
"eval_runtime": 119.6696, |
|
"eval_samples_per_second": 23.724, |
|
"eval_steps_per_second": 0.376, |
|
"eval_wer": 0.4532177891171425, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 51.02, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1671, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 53.06, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1381, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 55.1, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.131, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1193, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"eval_cer": 0.13925959165357865, |
|
"eval_loss": 0.4906373620033264, |
|
"eval_runtime": 119.1486, |
|
"eval_samples_per_second": 23.827, |
|
"eval_steps_per_second": 0.378, |
|
"eval_wer": 0.4248009501605595, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 59.18, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1008, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 61.22, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0784, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 63.26, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0678, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 65.3, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0584, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 65.3, |
|
"eval_cer": 0.13315683194974198, |
|
"eval_loss": 0.48543456196784973, |
|
"eval_runtime": 119.1946, |
|
"eval_samples_per_second": 23.818, |
|
"eval_steps_per_second": 0.378, |
|
"eval_wer": 0.40685347292482293, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 67.34, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0434, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 69.38, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0332, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 71.42, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0198, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 73.46, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0095, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 73.46, |
|
"eval_cer": 0.12868745793134395, |
|
"eval_loss": 0.4780372381210327, |
|
"eval_runtime": 118.9725, |
|
"eval_samples_per_second": 23.863, |
|
"eval_steps_per_second": 0.378, |
|
"eval_wer": 0.39255707561694453, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 75.51, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9989, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 77.55, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9924, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 79.59, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9878, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 81.63, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9759, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 81.63, |
|
"eval_cer": 0.12686560466681623, |
|
"eval_loss": 0.46656644344329834, |
|
"eval_runtime": 116.6705, |
|
"eval_samples_per_second": 24.333, |
|
"eval_steps_per_second": 0.386, |
|
"eval_wer": 0.39251308670215107, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 83.67, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9659, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 85.71, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9531, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 87.75, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9452, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 89.79, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9593, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 89.79, |
|
"eval_cer": 0.1247386134170967, |
|
"eval_loss": 0.4808085262775421, |
|
"eval_runtime": 119.5296, |
|
"eval_samples_per_second": 23.751, |
|
"eval_steps_per_second": 0.376, |
|
"eval_wer": 0.38296749219196763, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 91.83, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.928, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 93.87, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9251, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 95.91, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.9098, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 97.95, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.909, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 97.95, |
|
"eval_cer": 0.1212385012340139, |
|
"eval_loss": 0.4797753393650055, |
|
"eval_runtime": 116.9311, |
|
"eval_samples_per_second": 24.279, |
|
"eval_steps_per_second": 0.385, |
|
"eval_wer": 0.37645713280253373, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.8979, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 102.04, |
|
"learning_rate": 7.370797677261614e-05, |
|
"loss": 0.8993, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 104.08, |
|
"learning_rate": 7.22562652811736e-05, |
|
"loss": 0.8888, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 106.12, |
|
"learning_rate": 7.080455378973104e-05, |
|
"loss": 0.8788, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 106.12, |
|
"eval_cer": 0.11615885124523222, |
|
"eval_loss": 0.4906252324581146, |
|
"eval_runtime": 119.2237, |
|
"eval_samples_per_second": 23.812, |
|
"eval_steps_per_second": 0.377, |
|
"eval_wer": 0.3608410680508512, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 108.16, |
|
"learning_rate": 6.93528422982885e-05, |
|
"loss": 0.8701, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 110.2, |
|
"learning_rate": 6.790113080684596e-05, |
|
"loss": 0.8621, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 112.24, |
|
"learning_rate": 6.644941931540342e-05, |
|
"loss": 0.8525, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 114.28, |
|
"learning_rate": 6.499770782396088e-05, |
|
"loss": 0.8471, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 114.28, |
|
"eval_cer": 0.11655373569665695, |
|
"eval_loss": 0.4758923053741455, |
|
"eval_runtime": 117.4104, |
|
"eval_samples_per_second": 24.18, |
|
"eval_steps_per_second": 0.383, |
|
"eval_wer": 0.360357189988123, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 116.32, |
|
"learning_rate": 6.354599633251834e-05, |
|
"loss": 0.8442, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 118.36, |
|
"learning_rate": 6.209428484107578e-05, |
|
"loss": 0.8244, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 120.4, |
|
"learning_rate": 6.0642573349633246e-05, |
|
"loss": 0.8224, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 122.44, |
|
"learning_rate": 5.9190861858190705e-05, |
|
"loss": 0.8116, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 122.44, |
|
"eval_cer": 0.11757684541171191, |
|
"eval_loss": 0.5079970359802246, |
|
"eval_runtime": 118.6326, |
|
"eval_samples_per_second": 23.931, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 0.3627325913869705, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 124.48, |
|
"learning_rate": 5.773915036674816e-05, |
|
"loss": 0.807, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 126.53, |
|
"learning_rate": 5.6287438875305616e-05, |
|
"loss": 0.8011, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 128.57, |
|
"learning_rate": 5.4835727383863075e-05, |
|
"loss": 0.799, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 130.61, |
|
"learning_rate": 5.3384015892420534e-05, |
|
"loss": 0.7881, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 130.61, |
|
"eval_cer": 0.11347543190486875, |
|
"eval_loss": 0.4867645502090454, |
|
"eval_runtime": 118.7534, |
|
"eval_samples_per_second": 23.907, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 0.34892007214182025, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 132.65, |
|
"learning_rate": 5.193230440097799e-05, |
|
"loss": 0.7781, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 134.69, |
|
"learning_rate": 5.048059290953545e-05, |
|
"loss": 0.7685, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 136.73, |
|
"learning_rate": 4.902888141809291e-05, |
|
"loss": 0.7654, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 138.77, |
|
"learning_rate": 4.757716992665037e-05, |
|
"loss": 0.766, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 138.77, |
|
"eval_cer": 0.11357415301772493, |
|
"eval_loss": 0.49552106857299805, |
|
"eval_runtime": 117.4435, |
|
"eval_samples_per_second": 24.173, |
|
"eval_steps_per_second": 0.383, |
|
"eval_wer": 0.34922799454537456, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 140.81, |
|
"learning_rate": 4.6139975550122245e-05, |
|
"loss": 0.752, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 142.85, |
|
"learning_rate": 4.4688264058679704e-05, |
|
"loss": 0.7451, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 144.89, |
|
"learning_rate": 4.323655256723716e-05, |
|
"loss": 0.7398, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 146.93, |
|
"learning_rate": 4.178484107579462e-05, |
|
"loss": 0.7333, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 146.93, |
|
"eval_cer": 0.11247027148306035, |
|
"eval_loss": 0.5019353032112122, |
|
"eval_runtime": 118.907, |
|
"eval_samples_per_second": 23.876, |
|
"eval_steps_per_second": 0.378, |
|
"eval_wer": 0.3461487705098315, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 148.97, |
|
"learning_rate": 4.033312958435208e-05, |
|
"loss": 0.7255, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 151.02, |
|
"learning_rate": 3.888141809290954e-05, |
|
"loss": 0.7283, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 153.06, |
|
"learning_rate": 3.742970660146699e-05, |
|
"loss": 0.7149, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 155.1, |
|
"learning_rate": 3.597799511002445e-05, |
|
"loss": 0.709, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 155.1, |
|
"eval_cer": 0.11168050258021091, |
|
"eval_loss": 0.5083914995193481, |
|
"eval_runtime": 120.383, |
|
"eval_samples_per_second": 23.583, |
|
"eval_steps_per_second": 0.374, |
|
"eval_wer": 0.3468086042317336, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 157.14, |
|
"learning_rate": 3.452628361858191e-05, |
|
"loss": 0.7084, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 159.18, |
|
"learning_rate": 3.307457212713937e-05, |
|
"loss": 0.704, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 161.22, |
|
"learning_rate": 3.1622860635696827e-05, |
|
"loss": 0.698, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 163.26, |
|
"learning_rate": 3.0171149144254275e-05, |
|
"loss": 0.6911, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 163.26, |
|
"eval_cer": 0.11064841821853265, |
|
"eval_loss": 0.5144167542457581, |
|
"eval_runtime": 120.6954, |
|
"eval_samples_per_second": 23.522, |
|
"eval_steps_per_second": 0.373, |
|
"eval_wer": 0.34122201205296265, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 165.3, |
|
"learning_rate": 2.8719437652811734e-05, |
|
"loss": 0.6889, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 167.34, |
|
"learning_rate": 2.726772616136919e-05, |
|
"loss": 0.6863, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 169.38, |
|
"learning_rate": 2.581601466992665e-05, |
|
"loss": 0.6825, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 171.42, |
|
"learning_rate": 2.4364303178484107e-05, |
|
"loss": 0.6683, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 171.42, |
|
"eval_cer": 0.11165357864034103, |
|
"eval_loss": 0.5218909382820129, |
|
"eval_runtime": 119.5764, |
|
"eval_samples_per_second": 23.742, |
|
"eval_steps_per_second": 0.376, |
|
"eval_wer": 0.34091408964940834, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 173.46, |
|
"learning_rate": 2.2912591687041566e-05, |
|
"loss": 0.6627, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 175.51, |
|
"learning_rate": 2.1460880195599022e-05, |
|
"loss": 0.6655, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 177.55, |
|
"learning_rate": 2.000916870415648e-05, |
|
"loss": 0.6662, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 179.59, |
|
"learning_rate": 1.855745721271394e-05, |
|
"loss": 0.659, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 179.59, |
|
"eval_cer": 0.1095983845636078, |
|
"eval_loss": 0.5229596495628357, |
|
"eval_runtime": 119.167, |
|
"eval_samples_per_second": 23.824, |
|
"eval_steps_per_second": 0.378, |
|
"eval_wer": 0.3375709321251045, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 181.63, |
|
"learning_rate": 1.7105745721271395e-05, |
|
"loss": 0.6526, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 183.67, |
|
"learning_rate": 1.5654034229828854e-05, |
|
"loss": 0.654, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 185.71, |
|
"learning_rate": 1.4202322738386313e-05, |
|
"loss": 0.6481, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 187.75, |
|
"learning_rate": 1.275061124694377e-05, |
|
"loss": 0.6475, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 187.75, |
|
"eval_cer": 0.10967915638321742, |
|
"eval_loss": 0.5228886604309082, |
|
"eval_runtime": 118.4312, |
|
"eval_samples_per_second": 23.972, |
|
"eval_steps_per_second": 0.38, |
|
"eval_wer": 0.3397703778647781, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 189.79, |
|
"learning_rate": 1.1298899755501229e-05, |
|
"loss": 0.6476, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 191.83, |
|
"learning_rate": 9.847188264058686e-06, |
|
"loss": 0.6485, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 193.87, |
|
"learning_rate": 8.395476772616145e-06, |
|
"loss": 0.6449, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 195.91, |
|
"learning_rate": 6.943765281173602e-06, |
|
"loss": 0.6419, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 195.91, |
|
"eval_cer": 0.10843168050258022, |
|
"eval_loss": 0.520015299320221, |
|
"eval_runtime": 118.8519, |
|
"eval_samples_per_second": 23.887, |
|
"eval_steps_per_second": 0.379, |
|
"eval_wer": 0.33369990762327895, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 197.95, |
|
"learning_rate": 5.4920537897310516e-06, |
|
"loss": 0.627, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 199.99, |
|
"learning_rate": 4.054859413202941e-06, |
|
"loss": 0.6301, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 200.3, |
|
"step": 9815, |
|
"total_flos": 1.72528931544575e+20, |
|
"train_loss": 1.3263086397673363, |
|
"train_runtime": 56983.3895, |
|
"train_samples_per_second": 22.047, |
|
"train_steps_per_second": 0.172 |
|
} |
|
], |
|
"max_steps": 9815, |
|
"num_train_epochs": 201, |
|
"total_flos": 1.72528931544575e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|