|
{ |
|
"best_metric": 0.10924588888883591, |
|
"best_model_checkpoint": "./checkpoint-12000", |
|
"epoch": 38.58520900321543, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.50125e-06, |
|
"loss": 8.0884, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.62625e-06, |
|
"loss": 3.2246, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2751250000000001e-05, |
|
"loss": 3.1607, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.6876250000000003e-05, |
|
"loss": 2.3964, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.100125e-05, |
|
"loss": 1.7005, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_cer": 0.11636638864978778, |
|
"eval_loss": 0.4082379639148712, |
|
"eval_runtime": 199.3377, |
|
"eval_samples_per_second": 29.106, |
|
"eval_steps_per_second": 0.457, |
|
"eval_wer": 0.5583793477747888, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.512625e-05, |
|
"loss": 1.4874, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.9251250000000002e-05, |
|
"loss": 1.3431, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.3376250000000004e-05, |
|
"loss": 1.2316, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.750125e-05, |
|
"loss": 1.187, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 4.162625e-05, |
|
"loss": 1.1555, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_cer": 0.05566026535276483, |
|
"eval_loss": 0.2020130306482315, |
|
"eval_runtime": 199.2116, |
|
"eval_samples_per_second": 29.125, |
|
"eval_steps_per_second": 0.457, |
|
"eval_wer": 0.29534192269573833, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.575125e-05, |
|
"loss": 1.1286, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 4.9876250000000005e-05, |
|
"loss": 1.1143, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1067, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0992, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0927, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_cer": 0.04799130331542548, |
|
"eval_loss": 0.1707664430141449, |
|
"eval_runtime": 197.8453, |
|
"eval_samples_per_second": 29.326, |
|
"eval_steps_per_second": 0.46, |
|
"eval_wer": 0.25843598093350323, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0907, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0765, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0693, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0547, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0707, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_cer": 0.04497120892820041, |
|
"eval_loss": 0.15630319714546204, |
|
"eval_runtime": 197.159, |
|
"eval_samples_per_second": 29.428, |
|
"eval_steps_per_second": 0.462, |
|
"eval_wer": 0.24054934163952996, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0647, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 5e-05, |
|
"loss": 1.054, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0478, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0611, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0728, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"eval_cer": 0.04629574663856816, |
|
"eval_loss": 0.16203930974006653, |
|
"eval_runtime": 196.804, |
|
"eval_samples_per_second": 29.481, |
|
"eval_steps_per_second": 0.462, |
|
"eval_wer": 0.2442304969559677, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0563, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0404, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0791, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0535, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0268, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_cer": 0.04575776913715829, |
|
"eval_loss": 0.15875375270843506, |
|
"eval_runtime": 201.4769, |
|
"eval_samples_per_second": 28.797, |
|
"eval_steps_per_second": 0.452, |
|
"eval_wer": 0.2377884751522016, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0322, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0208, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0172, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 5e-05, |
|
"loss": 1.019, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0328, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"eval_cer": 0.04419206909857232, |
|
"eval_loss": 0.14661966264247894, |
|
"eval_runtime": 196.9894, |
|
"eval_samples_per_second": 29.453, |
|
"eval_steps_per_second": 0.462, |
|
"eval_wer": 0.23516919156165936, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0153, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0206, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0168, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0269, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0249, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"eval_cer": 0.04486361342791843, |
|
"eval_loss": 0.15519459545612335, |
|
"eval_runtime": 197.5966, |
|
"eval_samples_per_second": 29.363, |
|
"eval_steps_per_second": 0.461, |
|
"eval_wer": 0.23413091698522817, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 5e-05, |
|
"loss": 1.022, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0219, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0203, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0171, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 5e-05, |
|
"loss": 1.016, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"eval_cer": 0.047286367279095305, |
|
"eval_loss": 0.16016805171966553, |
|
"eval_runtime": 197.4133, |
|
"eval_samples_per_second": 29.39, |
|
"eval_steps_per_second": 0.461, |
|
"eval_wer": 0.2435461796215017, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0233, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0139, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0252, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 4.936666666666667e-05, |
|
"loss": 1.0305, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 4.870694444444445e-05, |
|
"loss": 1.0164, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"eval_cer": 0.044392419340476684, |
|
"eval_loss": 0.14910832047462463, |
|
"eval_runtime": 205.8325, |
|
"eval_samples_per_second": 28.188, |
|
"eval_steps_per_second": 0.442, |
|
"eval_wer": 0.23372976544433433, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 4.804722222222223e-05, |
|
"loss": 1.0029, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 4.73875e-05, |
|
"loss": 0.9924, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"learning_rate": 4.672777777777778e-05, |
|
"loss": 1.0058, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 4.606805555555556e-05, |
|
"loss": 0.996, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 4.540833333333334e-05, |
|
"loss": 0.9935, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"eval_cer": 0.045754058947493396, |
|
"eval_loss": 0.15390604734420776, |
|
"eval_runtime": 206.7044, |
|
"eval_samples_per_second": 28.069, |
|
"eval_steps_per_second": 0.44, |
|
"eval_wer": 0.23729293501345036, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 4.4748611111111116e-05, |
|
"loss": 0.9993, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 4.408888888888889e-05, |
|
"loss": 0.983, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 4.342916666666667e-05, |
|
"loss": 0.9794, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 4.2769444444444447e-05, |
|
"loss": 0.9719, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 4.2109722222222226e-05, |
|
"loss": 0.9626, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"eval_cer": 0.04342777002760381, |
|
"eval_loss": 0.1458132266998291, |
|
"eval_runtime": 201.2355, |
|
"eval_samples_per_second": 28.832, |
|
"eval_steps_per_second": 0.452, |
|
"eval_wer": 0.2305441502666478, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 4.145e-05, |
|
"loss": 0.9542, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 4.079027777777778e-05, |
|
"loss": 0.978, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 20.26, |
|
"learning_rate": 4.013055555555556e-05, |
|
"loss": 0.9536, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 20.58, |
|
"learning_rate": 3.9470833333333335e-05, |
|
"loss": 0.9627, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"learning_rate": 3.8811111111111114e-05, |
|
"loss": 0.9505, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_cer": 0.04073046214122466, |
|
"eval_loss": 0.13684287667274475, |
|
"eval_runtime": 202.0319, |
|
"eval_samples_per_second": 28.718, |
|
"eval_steps_per_second": 0.45, |
|
"eval_wer": 0.21565434895464627, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 21.22, |
|
"learning_rate": 3.815138888888889e-05, |
|
"loss": 0.9395, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 21.54, |
|
"learning_rate": 3.749166666666667e-05, |
|
"loss": 0.9393, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 3.6831944444444444e-05, |
|
"loss": 0.9541, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 22.19, |
|
"learning_rate": 3.6172222222222224e-05, |
|
"loss": 0.9538, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 22.51, |
|
"learning_rate": 3.55125e-05, |
|
"loss": 0.9389, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 22.51, |
|
"eval_cer": 0.042626369059986347, |
|
"eval_loss": 0.14371351897716522, |
|
"eval_runtime": 197.7954, |
|
"eval_samples_per_second": 29.333, |
|
"eval_steps_per_second": 0.46, |
|
"eval_wer": 0.22306385388645053, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 22.83, |
|
"learning_rate": 3.485277777777778e-05, |
|
"loss": 0.9429, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"learning_rate": 3.419965277777778e-05, |
|
"loss": 0.9407, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 3.353993055555556e-05, |
|
"loss": 0.9224, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 23.79, |
|
"learning_rate": 3.288020833333334e-05, |
|
"loss": 0.9197, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"learning_rate": 3.2220486111111115e-05, |
|
"loss": 0.9129, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"eval_cer": 0.039372532723872845, |
|
"eval_loss": 0.13133755326271057, |
|
"eval_runtime": 209.4773, |
|
"eval_samples_per_second": 27.698, |
|
"eval_steps_per_second": 0.434, |
|
"eval_wer": 0.20760772098730473, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 24.44, |
|
"learning_rate": 3.156076388888889e-05, |
|
"loss": 0.9169, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"learning_rate": 3.090763888888889e-05, |
|
"loss": 0.9133, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"learning_rate": 3.024791666666667e-05, |
|
"loss": 0.9068, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 25.4, |
|
"learning_rate": 2.958819444444445e-05, |
|
"loss": 0.9137, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 25.72, |
|
"learning_rate": 2.8928472222222224e-05, |
|
"loss": 0.9118, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 25.72, |
|
"eval_cer": 0.03844869549731382, |
|
"eval_loss": 0.12918178737163544, |
|
"eval_runtime": 197.6149, |
|
"eval_samples_per_second": 29.36, |
|
"eval_steps_per_second": 0.46, |
|
"eval_wer": 0.2040445514181887, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 26.05, |
|
"learning_rate": 2.826875e-05, |
|
"loss": 0.9057, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"learning_rate": 2.7609027777777785e-05, |
|
"loss": 0.8956, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 26.69, |
|
"learning_rate": 2.694930555555556e-05, |
|
"loss": 0.9088, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 2.6289583333333333e-05, |
|
"loss": 0.8997, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 2.5629861111111116e-05, |
|
"loss": 0.8848, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"eval_cer": 0.03840788341099997, |
|
"eval_loss": 0.1298777312040329, |
|
"eval_runtime": 197.318, |
|
"eval_samples_per_second": 29.404, |
|
"eval_steps_per_second": 0.461, |
|
"eval_wer": 0.20281749964604276, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 27.65, |
|
"learning_rate": 2.4970138888888895e-05, |
|
"loss": 0.8926, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 27.97, |
|
"learning_rate": 2.431041666666667e-05, |
|
"loss": 0.8802, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 2.365069444444445e-05, |
|
"loss": 0.8784, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 28.62, |
|
"learning_rate": 2.2990972222222225e-05, |
|
"loss": 0.8749, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 28.94, |
|
"learning_rate": 2.2331250000000004e-05, |
|
"loss": 0.8667, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 28.94, |
|
"eval_cer": 0.03673829806179692, |
|
"eval_loss": 0.12283530086278915, |
|
"eval_runtime": 199.3855, |
|
"eval_samples_per_second": 29.099, |
|
"eval_steps_per_second": 0.456, |
|
"eval_wer": 0.1945113030345934, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 29.26, |
|
"learning_rate": 2.1671527777777783e-05, |
|
"loss": 0.8628, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 2.101180555555556e-05, |
|
"loss": 0.8775, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 29.9, |
|
"learning_rate": 2.0352083333333338e-05, |
|
"loss": 0.8661, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 30.23, |
|
"learning_rate": 1.9692361111111114e-05, |
|
"loss": 0.8624, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 30.55, |
|
"learning_rate": 1.9032638888888893e-05, |
|
"loss": 0.8641, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 30.55, |
|
"eval_cer": 0.036352438336647766, |
|
"eval_loss": 0.12234856933355331, |
|
"eval_runtime": 202.2537, |
|
"eval_samples_per_second": 28.687, |
|
"eval_steps_per_second": 0.45, |
|
"eval_wer": 0.19385058284959178, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 30.87, |
|
"learning_rate": 1.837291666666667e-05, |
|
"loss": 0.8637, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 31.19, |
|
"learning_rate": 1.7713194444444447e-05, |
|
"loss": 0.8608, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 31.51, |
|
"learning_rate": 1.7053472222222226e-05, |
|
"loss": 0.8556, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 31.83, |
|
"learning_rate": 1.6393750000000002e-05, |
|
"loss": 0.854, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 32.15, |
|
"learning_rate": 1.573402777777778e-05, |
|
"loss": 0.8516, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 32.15, |
|
"eval_cer": 0.03494627645365231, |
|
"eval_loss": 0.11841931194067001, |
|
"eval_runtime": 199.2371, |
|
"eval_samples_per_second": 29.121, |
|
"eval_steps_per_second": 0.457, |
|
"eval_wer": 0.18762093539100477, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 32.48, |
|
"learning_rate": 1.5074305555555557e-05, |
|
"loss": 0.8433, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 1.4414583333333338e-05, |
|
"loss": 0.8507, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 33.12, |
|
"learning_rate": 1.3754861111111117e-05, |
|
"loss": 0.8419, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 33.44, |
|
"learning_rate": 1.3095138888888892e-05, |
|
"loss": 0.8344, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"learning_rate": 1.2435416666666671e-05, |
|
"loss": 0.8379, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 33.76, |
|
"eval_cer": 0.03375159538155591, |
|
"eval_loss": 0.11372008919715881, |
|
"eval_runtime": 199.4785, |
|
"eval_samples_per_second": 29.086, |
|
"eval_steps_per_second": 0.456, |
|
"eval_wer": 0.18207560526688377, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 34.08, |
|
"learning_rate": 1.1782291666666672e-05, |
|
"loss": 0.8302, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 34.41, |
|
"learning_rate": 1.1122569444444448e-05, |
|
"loss": 0.8294, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 34.73, |
|
"learning_rate": 1.0462847222222227e-05, |
|
"loss": 0.8225, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 35.05, |
|
"learning_rate": 9.803125000000001e-06, |
|
"loss": 0.8237, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 35.37, |
|
"learning_rate": 9.143402777777782e-06, |
|
"loss": 0.8235, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 35.37, |
|
"eval_cer": 0.03308005105220979, |
|
"eval_loss": 0.11269930005073547, |
|
"eval_runtime": 198.8276, |
|
"eval_samples_per_second": 29.181, |
|
"eval_steps_per_second": 0.458, |
|
"eval_wer": 0.1778753126622304, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 35.69, |
|
"learning_rate": 8.483680555555563e-06, |
|
"loss": 0.8205, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 7.823958333333337e-06, |
|
"loss": 0.826, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 36.33, |
|
"learning_rate": 7.1642361111111165e-06, |
|
"loss": 0.8207, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 36.66, |
|
"learning_rate": 6.504513888888891e-06, |
|
"loss": 0.8129, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 36.98, |
|
"learning_rate": 5.844791666666671e-06, |
|
"loss": 0.8112, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 36.98, |
|
"eval_cer": 0.03268677094773085, |
|
"eval_loss": 0.11033473163843155, |
|
"eval_runtime": 201.8103, |
|
"eval_samples_per_second": 28.75, |
|
"eval_steps_per_second": 0.451, |
|
"eval_wer": 0.17662466374062014, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 37.3, |
|
"learning_rate": 5.185069444444451e-06, |
|
"loss": 0.805, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 37.62, |
|
"learning_rate": 4.525347222222226e-06, |
|
"loss": 0.8108, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"learning_rate": 3.865625000000006e-06, |
|
"loss": 0.8025, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 38.26, |
|
"learning_rate": 3.2059027777777807e-06, |
|
"loss": 0.8018, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 38.59, |
|
"learning_rate": 2.5461805555555606e-06, |
|
"loss": 0.8069, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 38.59, |
|
"eval_cer": 0.032260099136267845, |
|
"eval_loss": 0.10924588888883591, |
|
"eval_runtime": 199.7196, |
|
"eval_samples_per_second": 29.051, |
|
"eval_steps_per_second": 0.456, |
|
"eval_wer": 0.17520883477275945, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 38.59, |
|
"step": 12000, |
|
"total_flos": 1.0363087195555613e+21, |
|
"train_loss": 1.0786900800069172, |
|
"train_runtime": 133237.4383, |
|
"train_samples_per_second": 11.528, |
|
"train_steps_per_second": 0.09 |
|
} |
|
], |
|
"max_steps": 12000, |
|
"num_train_epochs": 39, |
|
"total_flos": 1.0363087195555613e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|