|
{ |
|
"best_metric": 21.95886308189698, |
|
"best_model_checkpoint": "./whisper-tiny-lv/checkpoint-91000", |
|
"epoch": 41.91616766467066, |
|
"eval_steps": 1000, |
|
"global_step": 91000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 6.921171188354492, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 1.8377, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 6.424713611602783, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.7472, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.457923889160156, |
|
"learning_rate": 9.988688827331488e-06, |
|
"loss": 0.5694, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 5.4181013107299805, |
|
"learning_rate": 9.977146814404432e-06, |
|
"loss": 0.481, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.511210560798645, |
|
"eval_runtime": 658.2418, |
|
"eval_samples_per_second": 10.258, |
|
"eval_steps_per_second": 0.321, |
|
"eval_wer": 51.379495011603225, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.981955528259277, |
|
"learning_rate": 9.965604801477378e-06, |
|
"loss": 0.4294, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 6.043950080871582, |
|
"learning_rate": 9.954062788550324e-06, |
|
"loss": 0.3919, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.050214767456055, |
|
"learning_rate": 9.94252077562327e-06, |
|
"loss": 0.3599, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 5.506693363189697, |
|
"learning_rate": 9.930978762696215e-06, |
|
"loss": 0.3399, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 0.39190948009490967, |
|
"eval_runtime": 666.4927, |
|
"eval_samples_per_second": 10.131, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 42.10881250371899, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 4.966064929962158, |
|
"learning_rate": 9.91943674976916e-06, |
|
"loss": 0.3048, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 4.654673099517822, |
|
"learning_rate": 9.907894736842107e-06, |
|
"loss": 0.2753, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 4.946408271789551, |
|
"learning_rate": 9.896352723915051e-06, |
|
"loss": 0.2609, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 4.584148406982422, |
|
"learning_rate": 9.884810710987997e-06, |
|
"loss": 0.2539, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 0.33731845021247864, |
|
"eval_runtime": 657.5879, |
|
"eval_samples_per_second": 10.268, |
|
"eval_steps_per_second": 0.321, |
|
"eval_wer": 38.419580696987126, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 4.824137210845947, |
|
"learning_rate": 9.873268698060943e-06, |
|
"loss": 0.2474, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 4.751330852508545, |
|
"learning_rate": 9.861726685133887e-06, |
|
"loss": 0.2374, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 5.100882530212402, |
|
"learning_rate": 9.850184672206833e-06, |
|
"loss": 0.2293, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 4.561028480529785, |
|
"learning_rate": 9.83864265927978e-06, |
|
"loss": 0.2252, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 0.30130764842033386, |
|
"eval_runtime": 668.7558, |
|
"eval_samples_per_second": 10.096, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 35.210345716722536, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 4.661614418029785, |
|
"learning_rate": 9.827100646352725e-06, |
|
"loss": 0.2127, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 4.2613959312438965, |
|
"learning_rate": 9.81555863342567e-06, |
|
"loss": 0.1886, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 4.164435386657715, |
|
"learning_rate": 9.804016620498615e-06, |
|
"loss": 0.1725, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 4.097248077392578, |
|
"learning_rate": 9.792474607571561e-06, |
|
"loss": 0.1715, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 0.283372163772583, |
|
"eval_runtime": 661.6835, |
|
"eval_samples_per_second": 10.204, |
|
"eval_steps_per_second": 0.319, |
|
"eval_wer": 33.31415990638079, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 4.298040866851807, |
|
"learning_rate": 9.780932594644506e-06, |
|
"loss": 0.1657, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 4.593989849090576, |
|
"learning_rate": 9.769390581717453e-06, |
|
"loss": 0.1637, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 4.4265055656433105, |
|
"learning_rate": 9.757848568790398e-06, |
|
"loss": 0.1619, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 3.670001983642578, |
|
"learning_rate": 9.746306555863344e-06, |
|
"loss": 0.1562, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 0.2656751573085785, |
|
"eval_runtime": 654.0062, |
|
"eval_samples_per_second": 10.324, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 31.9693754090882, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 4.299009799957275, |
|
"learning_rate": 9.73476454293629e-06, |
|
"loss": 0.1547, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 4.178277492523193, |
|
"learning_rate": 9.723222530009234e-06, |
|
"loss": 0.1523, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 3.7556698322296143, |
|
"learning_rate": 9.71168051708218e-06, |
|
"loss": 0.1205, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 4.128946304321289, |
|
"learning_rate": 9.700138504155126e-06, |
|
"loss": 0.1177, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_loss": 0.25489723682403564, |
|
"eval_runtime": 667.3883, |
|
"eval_samples_per_second": 10.117, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 30.705912688180575, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"grad_norm": 3.462374448776245, |
|
"learning_rate": 9.68859649122807e-06, |
|
"loss": 0.1173, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 4.692279815673828, |
|
"learning_rate": 9.677054478301016e-06, |
|
"loss": 0.117, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 3.5876481533050537, |
|
"learning_rate": 9.665512465373962e-06, |
|
"loss": 0.1147, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 4.460909843444824, |
|
"learning_rate": 9.653970452446908e-06, |
|
"loss": 0.1149, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_loss": 0.24221286177635193, |
|
"eval_runtime": 671.3545, |
|
"eval_samples_per_second": 10.057, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 29.97004978479481, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 3.741210460662842, |
|
"learning_rate": 9.642428439519853e-06, |
|
"loss": 0.1124, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 4.417487144470215, |
|
"learning_rate": 9.630886426592799e-06, |
|
"loss": 0.1124, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 3.8332269191741943, |
|
"learning_rate": 9.619344413665745e-06, |
|
"loss": 0.1028, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"grad_norm": 3.3890438079833984, |
|
"learning_rate": 9.607802400738689e-06, |
|
"loss": 0.0834, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 0.23551978170871735, |
|
"eval_runtime": 665.3587, |
|
"eval_samples_per_second": 10.148, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 29.347243985163736, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"grad_norm": 3.3677661418914795, |
|
"learning_rate": 9.596260387811635e-06, |
|
"loss": 0.0835, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 3.392284631729126, |
|
"learning_rate": 9.584718374884581e-06, |
|
"loss": 0.0835, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"grad_norm": 3.0857667922973633, |
|
"learning_rate": 9.573176361957525e-06, |
|
"loss": 0.0813, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 3.726276159286499, |
|
"learning_rate": 9.561634349030471e-06, |
|
"loss": 0.0825, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_loss": 0.229040265083313, |
|
"eval_runtime": 671.1646, |
|
"eval_samples_per_second": 10.06, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 28.81567725172065, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 3.28595232963562, |
|
"learning_rate": 9.550092336103417e-06, |
|
"loss": 0.0816, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 3.427420139312744, |
|
"learning_rate": 9.538550323176363e-06, |
|
"loss": 0.0814, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 3.8041698932647705, |
|
"learning_rate": 9.527008310249308e-06, |
|
"loss": 0.0802, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 2.9603447914123535, |
|
"learning_rate": 9.515466297322253e-06, |
|
"loss": 0.0669, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_loss": 0.22645771503448486, |
|
"eval_runtime": 667.9924, |
|
"eval_samples_per_second": 10.108, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 28.53402622131424, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 3.0245766639709473, |
|
"learning_rate": 9.5039242843952e-06, |
|
"loss": 0.057, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 2.435096502304077, |
|
"learning_rate": 9.492382271468144e-06, |
|
"loss": 0.0588, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"grad_norm": 2.9605906009674072, |
|
"learning_rate": 9.480840258541091e-06, |
|
"loss": 0.0583, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"grad_norm": 2.877732515335083, |
|
"learning_rate": 9.469298245614036e-06, |
|
"loss": 0.0567, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_loss": 0.2239210605621338, |
|
"eval_runtime": 671.9438, |
|
"eval_samples_per_second": 10.048, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 27.875518178392213, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"grad_norm": 3.2738921642303467, |
|
"learning_rate": 9.457848568790397e-06, |
|
"loss": 0.0589, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"grad_norm": 2.727008104324341, |
|
"learning_rate": 9.446306555863343e-06, |
|
"loss": 0.0586, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"grad_norm": 3.6808159351348877, |
|
"learning_rate": 9.43476454293629e-06, |
|
"loss": 0.0602, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"grad_norm": 3.2749545574188232, |
|
"learning_rate": 9.423222530009234e-06, |
|
"loss": 0.0589, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_loss": 0.21996423602104187, |
|
"eval_runtime": 674.0776, |
|
"eval_samples_per_second": 10.017, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 27.879485094313427, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 2.8687331676483154, |
|
"learning_rate": 9.41168051708218e-06, |
|
"loss": 0.0406, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"grad_norm": 2.4594838619232178, |
|
"learning_rate": 9.400138504155126e-06, |
|
"loss": 0.0405, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"grad_norm": 2.6956489086151123, |
|
"learning_rate": 9.388596491228072e-06, |
|
"loss": 0.0398, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"grad_norm": 2.6623504161834717, |
|
"learning_rate": 9.377054478301016e-06, |
|
"loss": 0.0415, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_loss": 0.22312334179878235, |
|
"eval_runtime": 673.796, |
|
"eval_samples_per_second": 10.021, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 26.97701172223655, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"grad_norm": 3.0151000022888184, |
|
"learning_rate": 9.365512465373962e-06, |
|
"loss": 0.0422, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"grad_norm": 3.232048749923706, |
|
"learning_rate": 9.353970452446908e-06, |
|
"loss": 0.0412, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"grad_norm": 2.809514284133911, |
|
"learning_rate": 9.342428439519852e-06, |
|
"loss": 0.0411, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"grad_norm": 2.0289547443389893, |
|
"learning_rate": 9.3308864265928e-06, |
|
"loss": 0.0423, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 0.2184455841779709, |
|
"eval_runtime": 688.9862, |
|
"eval_samples_per_second": 9.8, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 27.084118452109408, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"grad_norm": 3.098123788833618, |
|
"learning_rate": 9.319344413665744e-06, |
|
"loss": 0.0375, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"grad_norm": 2.8762073516845703, |
|
"learning_rate": 9.307802400738688e-06, |
|
"loss": 0.0266, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"grad_norm": 4.211934566497803, |
|
"learning_rate": 9.296260387811636e-06, |
|
"loss": 0.0278, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 2.860619306564331, |
|
"learning_rate": 9.28471837488458e-06, |
|
"loss": 0.0281, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"eval_loss": 0.22083307802677155, |
|
"eval_runtime": 665.4984, |
|
"eval_samples_per_second": 10.146, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 27.224943967312615, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": 2.3288867473602295, |
|
"learning_rate": 9.273176361957526e-06, |
|
"loss": 0.0278, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 2.9130966663360596, |
|
"learning_rate": 9.261634349030472e-06, |
|
"loss": 0.0281, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"grad_norm": 1.9348437786102295, |
|
"learning_rate": 9.250092336103417e-06, |
|
"loss": 0.0294, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"grad_norm": 3.404127836227417, |
|
"learning_rate": 9.238550323176363e-06, |
|
"loss": 0.0296, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 0.2222394496202469, |
|
"eval_runtime": 668.3229, |
|
"eval_samples_per_second": 10.103, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 26.802467421702996, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"grad_norm": 2.1417627334594727, |
|
"learning_rate": 9.227008310249309e-06, |
|
"loss": 0.0287, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"grad_norm": 1.783292531967163, |
|
"learning_rate": 9.215512465373963e-06, |
|
"loss": 0.023, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"grad_norm": 1.875301480293274, |
|
"learning_rate": 9.203970452446908e-06, |
|
"loss": 0.0185, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"grad_norm": 1.9140523672103882, |
|
"learning_rate": 9.192428439519852e-06, |
|
"loss": 0.0186, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 0.2229994833469391, |
|
"eval_runtime": 670.5805, |
|
"eval_samples_per_second": 10.069, |
|
"eval_steps_per_second": 0.315, |
|
"eval_wer": 26.498998353729892, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"grad_norm": 2.4175968170166016, |
|
"learning_rate": 9.1808864265928e-06, |
|
"loss": 0.0192, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"grad_norm": 2.8320376873016357, |
|
"learning_rate": 9.169344413665744e-06, |
|
"loss": 0.0198, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"grad_norm": 2.431974172592163, |
|
"learning_rate": 9.15780240073869e-06, |
|
"loss": 0.0196, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 2.0679523944854736, |
|
"learning_rate": 9.146260387811636e-06, |
|
"loss": 0.0201, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_loss": 0.22365085780620575, |
|
"eval_runtime": 657.293, |
|
"eval_samples_per_second": 10.272, |
|
"eval_steps_per_second": 0.321, |
|
"eval_wer": 26.042803022789933, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"grad_norm": 2.1873040199279785, |
|
"learning_rate": 9.13471837488458e-06, |
|
"loss": 0.0202, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"grad_norm": 2.5239417552948, |
|
"learning_rate": 9.123222530009235e-06, |
|
"loss": 0.0201, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 2.638354778289795, |
|
"learning_rate": 9.111680517082179e-06, |
|
"loss": 0.0134, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"grad_norm": 1.6381027698516846, |
|
"learning_rate": 9.100138504155125e-06, |
|
"loss": 0.0127, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"eval_loss": 0.22688329219818115, |
|
"eval_runtime": 665.6628, |
|
"eval_samples_per_second": 10.143, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 26.134042088977928, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"grad_norm": 1.1191093921661377, |
|
"learning_rate": 9.088596491228071e-06, |
|
"loss": 0.0133, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"grad_norm": 2.025820016860962, |
|
"learning_rate": 9.077054478301015e-06, |
|
"loss": 0.0133, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"grad_norm": 1.148484468460083, |
|
"learning_rate": 9.065512465373963e-06, |
|
"loss": 0.0136, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"grad_norm": 2.849606513977051, |
|
"learning_rate": 9.053970452446907e-06, |
|
"loss": 0.0135, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 0.22943329811096191, |
|
"eval_runtime": 665.6291, |
|
"eval_samples_per_second": 10.144, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 26.330404427078168, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"grad_norm": 2.421963691711426, |
|
"learning_rate": 9.042428439519853e-06, |
|
"loss": 0.014, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"grad_norm": 1.5668810606002808, |
|
"learning_rate": 9.0308864265928e-06, |
|
"loss": 0.0148, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"grad_norm": 0.8170527815818787, |
|
"learning_rate": 9.019390581717452e-06, |
|
"loss": 0.0135, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"grad_norm": 1.8164241313934326, |
|
"learning_rate": 9.007848568790398e-06, |
|
"loss": 0.0086, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"eval_loss": 0.23072278499603271, |
|
"eval_runtime": 665.1317, |
|
"eval_samples_per_second": 10.151, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 26.07057143423845, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"grad_norm": 1.6295300722122192, |
|
"learning_rate": 8.996306555863344e-06, |
|
"loss": 0.0091, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"grad_norm": 1.5732313394546509, |
|
"learning_rate": 8.984764542936288e-06, |
|
"loss": 0.0093, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"grad_norm": 1.6755157709121704, |
|
"learning_rate": 8.973222530009234e-06, |
|
"loss": 0.0095, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"grad_norm": 1.9076685905456543, |
|
"learning_rate": 8.96168051708218e-06, |
|
"loss": 0.0097, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"eval_loss": 0.2347114235162735, |
|
"eval_runtime": 674.6572, |
|
"eval_samples_per_second": 10.008, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 25.425947597040683, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"grad_norm": 1.4558827877044678, |
|
"learning_rate": 8.950138504155126e-06, |
|
"loss": 0.0097, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"grad_norm": 2.6334474086761475, |
|
"learning_rate": 8.93859649122807e-06, |
|
"loss": 0.0103, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"grad_norm": 1.4682759046554565, |
|
"learning_rate": 8.927100646352724e-06, |
|
"loss": 0.0104, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"grad_norm": 1.105055332183838, |
|
"learning_rate": 8.915558633425671e-06, |
|
"loss": 0.0082, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"eval_loss": 0.23529361188411713, |
|
"eval_runtime": 677.9021, |
|
"eval_samples_per_second": 9.96, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 25.18594918380705, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"grad_norm": 0.9997087717056274, |
|
"learning_rate": 8.904016620498616e-06, |
|
"loss": 0.0062, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"grad_norm": 1.274902582168579, |
|
"learning_rate": 8.89247460757156e-06, |
|
"loss": 0.0067, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"grad_norm": 1.1603277921676636, |
|
"learning_rate": 8.880932594644508e-06, |
|
"loss": 0.0068, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"grad_norm": 1.450378179550171, |
|
"learning_rate": 8.869390581717452e-06, |
|
"loss": 0.0069, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"eval_loss": 0.23996803164482117, |
|
"eval_runtime": 672.8137, |
|
"eval_samples_per_second": 10.035, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 26.251066108653827, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"grad_norm": 1.0134578943252563, |
|
"learning_rate": 8.857848568790398e-06, |
|
"loss": 0.0072, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"grad_norm": 1.8789595365524292, |
|
"learning_rate": 8.846306555863344e-06, |
|
"loss": 0.0072, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"grad_norm": 0.972827136516571, |
|
"learning_rate": 8.834764542936288e-06, |
|
"loss": 0.0076, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"grad_norm": 2.2902746200561523, |
|
"learning_rate": 8.823268698060943e-06, |
|
"loss": 0.0075, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"eval_loss": 0.23599743843078613, |
|
"eval_runtime": 667.2943, |
|
"eval_samples_per_second": 10.118, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 25.400162643552772, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"grad_norm": 1.1216572523117065, |
|
"learning_rate": 8.811726685133887e-06, |
|
"loss": 0.0054, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"grad_norm": 1.168253779411316, |
|
"learning_rate": 8.800184672206835e-06, |
|
"loss": 0.0048, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"grad_norm": 0.6528610587120056, |
|
"learning_rate": 8.788642659279779e-06, |
|
"loss": 0.0049, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"grad_norm": 1.3029311895370483, |
|
"learning_rate": 8.777100646352723e-06, |
|
"loss": 0.0052, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"eval_loss": 0.24270391464233398, |
|
"eval_runtime": 668.33, |
|
"eval_samples_per_second": 10.103, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 24.73967114267013, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"grad_norm": 1.0006558895111084, |
|
"learning_rate": 8.765558633425671e-06, |
|
"loss": 0.0053, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"grad_norm": 1.4560002088546753, |
|
"learning_rate": 8.754016620498615e-06, |
|
"loss": 0.0052, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"grad_norm": 1.9307841062545776, |
|
"learning_rate": 8.742474607571561e-06, |
|
"loss": 0.0053, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"grad_norm": 1.977569580078125, |
|
"learning_rate": 8.730932594644507e-06, |
|
"loss": 0.0056, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_loss": 0.24133123457431793, |
|
"eval_runtime": 682.6727, |
|
"eval_samples_per_second": 9.891, |
|
"eval_steps_per_second": 0.309, |
|
"eval_wer": 25.247436380585913, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"grad_norm": 0.9914600849151611, |
|
"learning_rate": 8.719390581717452e-06, |
|
"loss": 0.0056, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"grad_norm": 0.8464221358299255, |
|
"learning_rate": 8.707848568790398e-06, |
|
"loss": 0.0037, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"grad_norm": 0.6425495147705078, |
|
"learning_rate": 8.69635272391505e-06, |
|
"loss": 0.0038, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"grad_norm": 2.0510671138763428, |
|
"learning_rate": 8.684810710987998e-06, |
|
"loss": 0.0041, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 13.36, |
|
"eval_loss": 0.24362993240356445, |
|
"eval_runtime": 666.1767, |
|
"eval_samples_per_second": 10.135, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 25.074875538012968, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"grad_norm": 1.125919222831726, |
|
"learning_rate": 8.673268698060943e-06, |
|
"loss": 0.0041, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"grad_norm": 1.247779369354248, |
|
"learning_rate": 8.661726685133889e-06, |
|
"loss": 0.0044, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"grad_norm": 1.009018063545227, |
|
"learning_rate": 8.650184672206835e-06, |
|
"loss": 0.0046, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"grad_norm": 1.848225712776184, |
|
"learning_rate": 8.638642659279779e-06, |
|
"loss": 0.0048, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"eval_loss": 0.24545399844646454, |
|
"eval_runtime": 667.6447, |
|
"eval_samples_per_second": 10.113, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 25.29900628756174, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"grad_norm": 0.4666302502155304, |
|
"learning_rate": 8.627100646352725e-06, |
|
"loss": 0.0046, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"grad_norm": 0.6961706280708313, |
|
"learning_rate": 8.61555863342567e-06, |
|
"loss": 0.004, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 14.16, |
|
"grad_norm": 0.9278767108917236, |
|
"learning_rate": 8.604016620498615e-06, |
|
"loss": 0.003, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"grad_norm": 0.9175160527229309, |
|
"learning_rate": 8.59252077562327e-06, |
|
"loss": 0.0033, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"eval_loss": 0.247360959649086, |
|
"eval_runtime": 674.4213, |
|
"eval_samples_per_second": 10.012, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 25.13041236091001, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"grad_norm": 0.6542864441871643, |
|
"learning_rate": 8.580978762696216e-06, |
|
"loss": 0.0034, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"grad_norm": 14.155986785888672, |
|
"learning_rate": 8.56943674976916e-06, |
|
"loss": 0.0037, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"grad_norm": 0.7710313200950623, |
|
"learning_rate": 8.557940904893815e-06, |
|
"loss": 0.0039, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"grad_norm": 1.6279186010360718, |
|
"learning_rate": 8.546398891966759e-06, |
|
"loss": 0.0038, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"eval_loss": 0.25091758370399475, |
|
"eval_runtime": 686.3757, |
|
"eval_samples_per_second": 9.837, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 25.287105539798084, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"grad_norm": 0.883815586566925, |
|
"learning_rate": 8.534856879039707e-06, |
|
"loss": 0.0038, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"grad_norm": 0.6397805213928223, |
|
"learning_rate": 8.523314866112651e-06, |
|
"loss": 0.0037, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"grad_norm": 0.40614956617355347, |
|
"learning_rate": 8.511772853185595e-06, |
|
"loss": 0.003, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"grad_norm": 0.3507106602191925, |
|
"learning_rate": 8.500230840258543e-06, |
|
"loss": 0.0026, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_loss": 0.2525634467601776, |
|
"eval_runtime": 668.1892, |
|
"eval_samples_per_second": 10.105, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 25.26925441815261, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"grad_norm": 0.236125648021698, |
|
"learning_rate": 8.488688827331487e-06, |
|
"loss": 0.0028, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"grad_norm": 1.2400788068771362, |
|
"learning_rate": 8.477146814404433e-06, |
|
"loss": 0.0028, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"grad_norm": 1.4743680953979492, |
|
"learning_rate": 8.465604801477379e-06, |
|
"loss": 0.0032, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"grad_norm": 1.9355671405792236, |
|
"learning_rate": 8.454062788550323e-06, |
|
"loss": 0.0035, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"eval_loss": 0.2545054256916046, |
|
"eval_runtime": 681.2267, |
|
"eval_samples_per_second": 9.912, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 25.43784834480433, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"grad_norm": 0.8273574709892273, |
|
"learning_rate": 8.442566943674978e-06, |
|
"loss": 0.0033, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"grad_norm": 1.760032057762146, |
|
"learning_rate": 8.431024930747922e-06, |
|
"loss": 0.0035, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"grad_norm": 1.946081280708313, |
|
"learning_rate": 8.419482917820868e-06, |
|
"loss": 0.0033, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"grad_norm": 0.564914345741272, |
|
"learning_rate": 8.407940904893814e-06, |
|
"loss": 0.0024, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"eval_loss": 0.2501762807369232, |
|
"eval_runtime": 670.7755, |
|
"eval_samples_per_second": 10.066, |
|
"eval_steps_per_second": 0.315, |
|
"eval_wer": 24.212071325148262, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"grad_norm": 1.6005988121032715, |
|
"learning_rate": 8.396398891966759e-06, |
|
"loss": 0.0024, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"grad_norm": 0.8374671339988708, |
|
"learning_rate": 8.384856879039706e-06, |
|
"loss": 0.0028, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"grad_norm": 1.0652960538864136, |
|
"learning_rate": 8.37331486611265e-06, |
|
"loss": 0.003, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"grad_norm": 1.4033843278884888, |
|
"learning_rate": 8.361772853185595e-06, |
|
"loss": 0.0027, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"eval_loss": 0.2562379240989685, |
|
"eval_runtime": 678.952, |
|
"eval_samples_per_second": 9.945, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 24.701985441418568, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"grad_norm": 0.6179186701774597, |
|
"learning_rate": 8.350230840258543e-06, |
|
"loss": 0.0029, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"grad_norm": 1.806768774986267, |
|
"learning_rate": 8.338688827331487e-06, |
|
"loss": 0.0031, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"grad_norm": 0.528777003288269, |
|
"learning_rate": 8.327146814404433e-06, |
|
"loss": 0.0031, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"grad_norm": 0.18216899037361145, |
|
"learning_rate": 8.315604801477379e-06, |
|
"loss": 0.0029, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"eval_loss": 0.25483274459838867, |
|
"eval_runtime": 673.8208, |
|
"eval_samples_per_second": 10.02, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 24.735704226748915, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"grad_norm": 0.3349086046218872, |
|
"learning_rate": 8.304062788550323e-06, |
|
"loss": 0.0019, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"grad_norm": 1.109415888786316, |
|
"learning_rate": 8.29252077562327e-06, |
|
"loss": 0.0023, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"grad_norm": 0.5039780735969543, |
|
"learning_rate": 8.280978762696215e-06, |
|
"loss": 0.0024, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"grad_norm": 1.2728774547576904, |
|
"learning_rate": 8.269436749769161e-06, |
|
"loss": 0.0026, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"eval_loss": 0.25514695048332214, |
|
"eval_runtime": 676.5647, |
|
"eval_samples_per_second": 9.98, |
|
"eval_steps_per_second": 0.312, |
|
"eval_wer": 24.102981137314792, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"grad_norm": 0.5640347003936768, |
|
"learning_rate": 8.257894736842105e-06, |
|
"loss": 0.0027, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 17.73, |
|
"grad_norm": 0.6144846677780151, |
|
"learning_rate": 8.24639889196676e-06, |
|
"loss": 0.0027, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"grad_norm": 0.42385855317115784, |
|
"learning_rate": 8.234856879039706e-06, |
|
"loss": 0.0027, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"grad_norm": 0.5083030462265015, |
|
"learning_rate": 8.22331486611265e-06, |
|
"loss": 0.0026, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"eval_loss": 0.2563398778438568, |
|
"eval_runtime": 693.9966, |
|
"eval_samples_per_second": 9.729, |
|
"eval_steps_per_second": 0.304, |
|
"eval_wer": 24.52942459884563, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"grad_norm": 0.7999371290206909, |
|
"learning_rate": 8.211772853185596e-06, |
|
"loss": 0.0022, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"grad_norm": 0.47253143787384033, |
|
"learning_rate": 8.200230840258542e-06, |
|
"loss": 0.0019, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"grad_norm": 0.9011787176132202, |
|
"learning_rate": 8.188688827331487e-06, |
|
"loss": 0.0019, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"grad_norm": 1.1234568357467651, |
|
"learning_rate": 8.177146814404433e-06, |
|
"loss": 0.002, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"eval_loss": 0.2568005323410034, |
|
"eval_runtime": 670.3005, |
|
"eval_samples_per_second": 10.073, |
|
"eval_steps_per_second": 0.315, |
|
"eval_wer": 24.36479758811512, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"grad_norm": 0.7889108657836914, |
|
"learning_rate": 8.165604801477379e-06, |
|
"loss": 0.002, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"grad_norm": 1.1920995712280273, |
|
"learning_rate": 8.154062788550325e-06, |
|
"loss": 0.0021, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 18.77, |
|
"grad_norm": 0.8429755568504333, |
|
"learning_rate": 8.142520775623269e-06, |
|
"loss": 0.0022, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"grad_norm": 3.1110446453094482, |
|
"learning_rate": 8.131024930747923e-06, |
|
"loss": 0.0028, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"eval_loss": 0.2559947073459625, |
|
"eval_runtime": 691.8949, |
|
"eval_samples_per_second": 9.759, |
|
"eval_steps_per_second": 0.305, |
|
"eval_wer": 24.575044131939624, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.3139660656452179, |
|
"learning_rate": 8.11948291782087e-06, |
|
"loss": 0.0026, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"grad_norm": 0.21102827787399292, |
|
"learning_rate": 8.107940904893814e-06, |
|
"loss": 0.0017, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"grad_norm": 0.2366773635149002, |
|
"learning_rate": 8.09639889196676e-06, |
|
"loss": 0.0015, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"grad_norm": 0.5561370253562927, |
|
"learning_rate": 8.084903047091414e-06, |
|
"loss": 0.0019, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"eval_loss": 0.26248979568481445, |
|
"eval_runtime": 678.0519, |
|
"eval_samples_per_second": 9.958, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 24.596862169506316, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"grad_norm": 0.6396375894546509, |
|
"learning_rate": 8.073361034164359e-06, |
|
"loss": 0.0019, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"grad_norm": 0.4240398406982422, |
|
"learning_rate": 8.061819021237305e-06, |
|
"loss": 0.0021, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"grad_norm": 2.101404905319214, |
|
"learning_rate": 8.05027700831025e-06, |
|
"loss": 0.002, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"grad_norm": 0.4409444034099579, |
|
"learning_rate": 8.038734995383195e-06, |
|
"loss": 0.0021, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 0.26329436898231506, |
|
"eval_runtime": 674.3068, |
|
"eval_samples_per_second": 10.013, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 24.102981137314792, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"grad_norm": 1.5778237581253052, |
|
"learning_rate": 8.027192982456141e-06, |
|
"loss": 0.0021, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 20.04, |
|
"grad_norm": 0.34445250034332275, |
|
"learning_rate": 8.015650969529087e-06, |
|
"loss": 0.002, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 20.15, |
|
"grad_norm": 0.8328190445899963, |
|
"learning_rate": 8.004108956602033e-06, |
|
"loss": 0.0015, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"grad_norm": 1.4873714447021484, |
|
"learning_rate": 7.992566943674977e-06, |
|
"loss": 0.0015, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"eval_loss": 0.2650669813156128, |
|
"eval_runtime": 678.5481, |
|
"eval_samples_per_second": 9.951, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 24.23983973659678, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 20.38, |
|
"grad_norm": 2.7649030685424805, |
|
"learning_rate": 7.981024930747923e-06, |
|
"loss": 0.0016, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"grad_norm": 0.20013980567455292, |
|
"learning_rate": 7.969529085872578e-06, |
|
"loss": 0.0019, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"grad_norm": 1.8381603956222534, |
|
"learning_rate": 7.957987072945522e-06, |
|
"loss": 0.0021, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 20.73, |
|
"grad_norm": 1.9389904737472534, |
|
"learning_rate": 7.946445060018468e-06, |
|
"loss": 0.0018, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 20.73, |
|
"eval_loss": 0.26352566480636597, |
|
"eval_runtime": 684.4278, |
|
"eval_samples_per_second": 9.865, |
|
"eval_steps_per_second": 0.308, |
|
"eval_wer": 24.100997679354187, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 20.84, |
|
"grad_norm": 0.1582639217376709, |
|
"learning_rate": 7.934903047091414e-06, |
|
"loss": 0.0022, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"grad_norm": 1.1058118343353271, |
|
"learning_rate": 7.923361034164358e-06, |
|
"loss": 0.0021, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"grad_norm": 0.4075948894023895, |
|
"learning_rate": 7.911819021237304e-06, |
|
"loss": 0.0018, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 21.19, |
|
"grad_norm": 0.894478440284729, |
|
"learning_rate": 7.90027700831025e-06, |
|
"loss": 0.0013, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 21.19, |
|
"eval_loss": 0.2585604190826416, |
|
"eval_runtime": 674.5545, |
|
"eval_samples_per_second": 10.01, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 23.849098518356904, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"grad_norm": 0.8086264729499817, |
|
"learning_rate": 7.888734995383196e-06, |
|
"loss": 0.0016, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 21.42, |
|
"grad_norm": 0.5767725706100464, |
|
"learning_rate": 7.87719298245614e-06, |
|
"loss": 0.0016, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 21.53, |
|
"grad_norm": 1.622611403465271, |
|
"learning_rate": 7.865650969529087e-06, |
|
"loss": 0.0019, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 21.65, |
|
"grad_norm": 2.0076467990875244, |
|
"learning_rate": 7.854108956602033e-06, |
|
"loss": 0.0018, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 21.65, |
|
"eval_loss": 0.2612689435482025, |
|
"eval_runtime": 680.9382, |
|
"eval_samples_per_second": 9.916, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 23.940337584544896, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"grad_norm": 0.3226953446865082, |
|
"learning_rate": 7.842566943674977e-06, |
|
"loss": 0.0018, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"grad_norm": 0.5793449878692627, |
|
"learning_rate": 7.831024930747923e-06, |
|
"loss": 0.0019, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"grad_norm": 0.3840419352054596, |
|
"learning_rate": 7.819482917820869e-06, |
|
"loss": 0.0019, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"grad_norm": 2.221217155456543, |
|
"learning_rate": 7.807940904893813e-06, |
|
"loss": 0.0014, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"eval_loss": 0.26184049248695374, |
|
"eval_runtime": 679.8585, |
|
"eval_samples_per_second": 9.931, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 23.591248983477794, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"grad_norm": 0.1267741620540619, |
|
"learning_rate": 7.79639889196676e-06, |
|
"loss": 0.0013, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"grad_norm": 0.3871385157108307, |
|
"learning_rate": 7.784856879039705e-06, |
|
"loss": 0.0016, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"grad_norm": 0.319933146238327, |
|
"learning_rate": 7.773361034164358e-06, |
|
"loss": 0.0016, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"grad_norm": 0.21517297625541687, |
|
"learning_rate": 7.761819021237304e-06, |
|
"loss": 0.0017, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"eval_loss": 0.2654561698436737, |
|
"eval_runtime": 676.7053, |
|
"eval_samples_per_second": 9.978, |
|
"eval_steps_per_second": 0.312, |
|
"eval_wer": 23.755875994208303, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"grad_norm": 0.17343254387378693, |
|
"learning_rate": 7.75027700831025e-06, |
|
"loss": 0.0017, |
|
"step": 49250 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"grad_norm": 0.31837859749794006, |
|
"learning_rate": 7.738734995383196e-06, |
|
"loss": 0.0015, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 22.92, |
|
"grad_norm": 1.0666159391403198, |
|
"learning_rate": 7.72719298245614e-06, |
|
"loss": 0.0015, |
|
"step": 49750 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"grad_norm": 0.5933089852333069, |
|
"learning_rate": 7.715650969529086e-06, |
|
"loss": 0.0016, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"eval_loss": 0.2641240656375885, |
|
"eval_runtime": 701.7818, |
|
"eval_samples_per_second": 9.621, |
|
"eval_steps_per_second": 0.301, |
|
"eval_wer": 23.57141440387171, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"grad_norm": 4.56223201751709, |
|
"learning_rate": 7.704108956602032e-06, |
|
"loss": 0.0012, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 23.26, |
|
"grad_norm": 0.13189882040023804, |
|
"learning_rate": 7.692566943674977e-06, |
|
"loss": 0.0013, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 23.38, |
|
"grad_norm": 0.3501899242401123, |
|
"learning_rate": 7.681024930747923e-06, |
|
"loss": 0.0014, |
|
"step": 50750 |
|
}, |
|
{ |
|
"epoch": 23.49, |
|
"grad_norm": 0.3943934738636017, |
|
"learning_rate": 7.669529085872577e-06, |
|
"loss": 0.0014, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 23.49, |
|
"eval_loss": 0.26596611738204956, |
|
"eval_runtime": 667.3547, |
|
"eval_samples_per_second": 10.118, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 23.60910010512327, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 23.61, |
|
"grad_norm": 0.6058038473129272, |
|
"learning_rate": 7.657987072945522e-06, |
|
"loss": 0.0014, |
|
"step": 51250 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"grad_norm": 0.18997740745544434, |
|
"learning_rate": 7.646445060018468e-06, |
|
"loss": 0.0018, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 23.84, |
|
"grad_norm": 0.4755234122276306, |
|
"learning_rate": 7.634903047091414e-06, |
|
"loss": 0.0017, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"grad_norm": 0.6140190362930298, |
|
"learning_rate": 7.62336103416436e-06, |
|
"loss": 0.0018, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"eval_loss": 0.2636994421482086, |
|
"eval_runtime": 678.6943, |
|
"eval_samples_per_second": 9.949, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 23.870916555923596, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"grad_norm": 0.6709560751914978, |
|
"learning_rate": 7.611819021237305e-06, |
|
"loss": 0.0013, |
|
"step": 52250 |
|
}, |
|
{ |
|
"epoch": 24.18, |
|
"grad_norm": 0.40510040521621704, |
|
"learning_rate": 7.60027700831025e-06, |
|
"loss": 0.0011, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"grad_norm": 0.464121550321579, |
|
"learning_rate": 7.588734995383196e-06, |
|
"loss": 0.0011, |
|
"step": 52750 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"grad_norm": 0.415995329618454, |
|
"learning_rate": 7.577192982456141e-06, |
|
"loss": 0.0012, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"eval_loss": 0.2662787139415741, |
|
"eval_runtime": 679.0891, |
|
"eval_samples_per_second": 9.943, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 23.48017533768372, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"grad_norm": 0.6342004537582397, |
|
"learning_rate": 7.565650969529087e-06, |
|
"loss": 0.0014, |
|
"step": 53250 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"grad_norm": 0.36562052369117737, |
|
"learning_rate": 7.554155124653741e-06, |
|
"loss": 0.0014, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 24.76, |
|
"grad_norm": 0.47582271695137024, |
|
"learning_rate": 7.542613111726685e-06, |
|
"loss": 0.0015, |
|
"step": 53750 |
|
}, |
|
{ |
|
"epoch": 24.87, |
|
"grad_norm": 0.7419930696487427, |
|
"learning_rate": 7.531071098799632e-06, |
|
"loss": 0.0015, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 24.87, |
|
"eval_loss": 0.2703973352909088, |
|
"eval_runtime": 665.4932, |
|
"eval_samples_per_second": 10.146, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 23.75190907828709, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"grad_norm": 0.7274812459945679, |
|
"learning_rate": 7.519529085872577e-06, |
|
"loss": 0.0016, |
|
"step": 54250 |
|
}, |
|
{ |
|
"epoch": 25.1, |
|
"grad_norm": 1.2647254467010498, |
|
"learning_rate": 7.507987072945521e-06, |
|
"loss": 0.0012, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"grad_norm": 0.27594470977783203, |
|
"learning_rate": 7.496445060018468e-06, |
|
"loss": 0.0011, |
|
"step": 54750 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"grad_norm": 0.14190466701984406, |
|
"learning_rate": 7.484903047091413e-06, |
|
"loss": 0.0012, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 25.33, |
|
"eval_loss": 0.2656785249710083, |
|
"eval_runtime": 668.1719, |
|
"eval_samples_per_second": 10.105, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 24.233889362714958, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 25.45, |
|
"grad_norm": 0.559374213218689, |
|
"learning_rate": 7.473361034164359e-06, |
|
"loss": 0.0013, |
|
"step": 55250 |
|
}, |
|
{ |
|
"epoch": 25.56, |
|
"grad_norm": 3.8242385387420654, |
|
"learning_rate": 7.461865189289012e-06, |
|
"loss": 0.0012, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 25.68, |
|
"grad_norm": 0.23001307249069214, |
|
"learning_rate": 7.450323176361957e-06, |
|
"loss": 0.0014, |
|
"step": 55750 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"grad_norm": 0.45375123620033264, |
|
"learning_rate": 7.438781163434904e-06, |
|
"loss": 0.0013, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"eval_loss": 0.2668148875236511, |
|
"eval_runtime": 675.9749, |
|
"eval_samples_per_second": 9.989, |
|
"eval_steps_per_second": 0.312, |
|
"eval_wer": 23.287779915504693, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 25.91, |
|
"grad_norm": 0.11875366419553757, |
|
"learning_rate": 7.427239150507849e-06, |
|
"loss": 0.0014, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 26.02, |
|
"grad_norm": 0.21367508172988892, |
|
"learning_rate": 7.415697137580795e-06, |
|
"loss": 0.0011, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 26.14, |
|
"grad_norm": 0.2883310317993164, |
|
"learning_rate": 7.4041551246537405e-06, |
|
"loss": 0.001, |
|
"step": 56750 |
|
}, |
|
{ |
|
"epoch": 26.26, |
|
"grad_norm": 0.4850456118583679, |
|
"learning_rate": 7.392613111726686e-06, |
|
"loss": 0.001, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 26.26, |
|
"eval_loss": 0.26939988136291504, |
|
"eval_runtime": 668.5911, |
|
"eval_samples_per_second": 10.099, |
|
"eval_steps_per_second": 0.316, |
|
"eval_wer": 23.285796457544084, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"grad_norm": 0.21186549961566925, |
|
"learning_rate": 7.381071098799632e-06, |
|
"loss": 0.0014, |
|
"step": 57250 |
|
}, |
|
{ |
|
"epoch": 26.49, |
|
"grad_norm": 1.3765850067138672, |
|
"learning_rate": 7.369529085872577e-06, |
|
"loss": 0.0014, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 26.6, |
|
"grad_norm": 1.716868281364441, |
|
"learning_rate": 7.358033240997231e-06, |
|
"loss": 0.0012, |
|
"step": 57750 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"grad_norm": 1.3002432584762573, |
|
"learning_rate": 7.3464912280701765e-06, |
|
"loss": 0.0013, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"eval_loss": 0.2650892734527588, |
|
"eval_runtime": 675.4132, |
|
"eval_samples_per_second": 9.997, |
|
"eval_steps_per_second": 0.312, |
|
"eval_wer": 23.279846083662257, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 26.83, |
|
"grad_norm": 0.3222731053829193, |
|
"learning_rate": 7.334949215143121e-06, |
|
"loss": 0.0013, |
|
"step": 58250 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"grad_norm": 0.376442015171051, |
|
"learning_rate": 7.323407202216068e-06, |
|
"loss": 0.0013, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 27.06, |
|
"grad_norm": 0.1525341123342514, |
|
"learning_rate": 7.311865189289013e-06, |
|
"loss": 0.001, |
|
"step": 58750 |
|
}, |
|
{ |
|
"epoch": 27.18, |
|
"grad_norm": 0.3236662745475769, |
|
"learning_rate": 7.300323176361959e-06, |
|
"loss": 0.0009, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 27.18, |
|
"eval_loss": 0.26985055208206177, |
|
"eval_runtime": 680.6209, |
|
"eval_samples_per_second": 9.92, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 23.258028046095564, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 27.29, |
|
"grad_norm": 0.12151502072811127, |
|
"learning_rate": 7.288827331486612e-06, |
|
"loss": 0.0012, |
|
"step": 59250 |
|
}, |
|
{ |
|
"epoch": 27.41, |
|
"grad_norm": 1.5825998783111572, |
|
"learning_rate": 7.277285318559557e-06, |
|
"loss": 0.0012, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 27.52, |
|
"grad_norm": 0.1484094262123108, |
|
"learning_rate": 7.265743305632504e-06, |
|
"loss": 0.0012, |
|
"step": 59750 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"grad_norm": 0.6981366872787476, |
|
"learning_rate": 7.254201292705448e-06, |
|
"loss": 0.001, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"eval_loss": 0.2713184356689453, |
|
"eval_runtime": 670.5479, |
|
"eval_samples_per_second": 10.069, |
|
"eval_steps_per_second": 0.315, |
|
"eval_wer": 23.236210008528868, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 27.75, |
|
"grad_norm": 0.1501929610967636, |
|
"learning_rate": 7.242659279778393e-06, |
|
"loss": 0.0011, |
|
"step": 60250 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"grad_norm": 3.85675048828125, |
|
"learning_rate": 7.23111726685134e-06, |
|
"loss": 0.0014, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 27.98, |
|
"grad_norm": 0.3616078197956085, |
|
"learning_rate": 7.219575253924285e-06, |
|
"loss": 0.0013, |
|
"step": 60750 |
|
}, |
|
{ |
|
"epoch": 28.1, |
|
"grad_norm": 0.20366336405277252, |
|
"learning_rate": 7.208033240997231e-06, |
|
"loss": 0.0011, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 28.1, |
|
"eval_loss": 0.2708372175693512, |
|
"eval_runtime": 673.0742, |
|
"eval_samples_per_second": 10.032, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 23.390919729456332, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"grad_norm": 0.40390634536743164, |
|
"learning_rate": 7.196491228070176e-06, |
|
"loss": 0.001, |
|
"step": 61250 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"grad_norm": 1.1840142011642456, |
|
"learning_rate": 7.184949215143121e-06, |
|
"loss": 0.0009, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 28.44, |
|
"grad_norm": 1.5266140699386597, |
|
"learning_rate": 7.173407202216067e-06, |
|
"loss": 0.0009, |
|
"step": 61750 |
|
}, |
|
{ |
|
"epoch": 28.56, |
|
"grad_norm": 0.1660241335630417, |
|
"learning_rate": 7.1618651892890125e-06, |
|
"loss": 0.0012, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 28.56, |
|
"eval_loss": 0.27276286482810974, |
|
"eval_runtime": 683.6949, |
|
"eval_samples_per_second": 9.876, |
|
"eval_steps_per_second": 0.309, |
|
"eval_wer": 22.956542436083065, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 28.67, |
|
"grad_norm": 1.5870364904403687, |
|
"learning_rate": 7.1503231763619585e-06, |
|
"loss": 0.0011, |
|
"step": 62250 |
|
}, |
|
{ |
|
"epoch": 28.79, |
|
"grad_norm": 0.21234387159347534, |
|
"learning_rate": 7.138781163434904e-06, |
|
"loss": 0.0014, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"grad_norm": 0.25426217913627625, |
|
"learning_rate": 7.127239150507849e-06, |
|
"loss": 0.0012, |
|
"step": 62750 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"grad_norm": 0.1948922723531723, |
|
"learning_rate": 7.115697137580795e-06, |
|
"loss": 0.0013, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"eval_loss": 0.27328047156333923, |
|
"eval_runtime": 662.6991, |
|
"eval_samples_per_second": 10.189, |
|
"eval_steps_per_second": 0.318, |
|
"eval_wer": 22.79389888331317, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 29.13, |
|
"grad_norm": 0.1855873465538025, |
|
"learning_rate": 7.10415512465374e-06, |
|
"loss": 0.0008, |
|
"step": 63250 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"grad_norm": 1.3260753154754639, |
|
"learning_rate": 7.092613111726686e-06, |
|
"loss": 0.0007, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 29.36, |
|
"grad_norm": 0.13366416096687317, |
|
"learning_rate": 7.08111726685134e-06, |
|
"loss": 0.0009, |
|
"step": 63750 |
|
}, |
|
{ |
|
"epoch": 29.48, |
|
"grad_norm": 0.8121051788330078, |
|
"learning_rate": 7.069575253924285e-06, |
|
"loss": 0.0009, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 29.48, |
|
"eval_loss": 0.272777795791626, |
|
"eval_runtime": 672.0892, |
|
"eval_samples_per_second": 10.046, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 22.861336453973855, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 29.59, |
|
"grad_norm": 0.20612682402133942, |
|
"learning_rate": 7.058033240997231e-06, |
|
"loss": 0.0011, |
|
"step": 64250 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"grad_norm": 0.37823590636253357, |
|
"learning_rate": 7.046491228070176e-06, |
|
"loss": 0.0011, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 29.82, |
|
"grad_norm": 2.2586910724639893, |
|
"learning_rate": 7.034949215143122e-06, |
|
"loss": 0.0011, |
|
"step": 64750 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"grad_norm": 0.2618952989578247, |
|
"learning_rate": 7.023407202216067e-06, |
|
"loss": 0.001, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_loss": 0.27243489027023315, |
|
"eval_runtime": 683.4018, |
|
"eval_samples_per_second": 9.88, |
|
"eval_steps_per_second": 0.309, |
|
"eval_wer": 22.87522065969812, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 30.06, |
|
"grad_norm": 0.1083679348230362, |
|
"learning_rate": 7.011865189289012e-06, |
|
"loss": 0.0009, |
|
"step": 65250 |
|
}, |
|
{ |
|
"epoch": 30.17, |
|
"grad_norm": 0.12795807421207428, |
|
"learning_rate": 7.000323176361958e-06, |
|
"loss": 0.0006, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 30.29, |
|
"grad_norm": 1.4113242626190186, |
|
"learning_rate": 6.988781163434903e-06, |
|
"loss": 0.0005, |
|
"step": 65750 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"grad_norm": 0.4899054765701294, |
|
"learning_rate": 6.977285318559557e-06, |
|
"loss": 0.0009, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"eval_loss": 0.2714119553565979, |
|
"eval_runtime": 679.0994, |
|
"eval_samples_per_second": 9.943, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 23.03588075450741, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 30.52, |
|
"grad_norm": 0.13397559523582458, |
|
"learning_rate": 6.965743305632503e-06, |
|
"loss": 0.0008, |
|
"step": 66250 |
|
}, |
|
{ |
|
"epoch": 30.63, |
|
"grad_norm": 0.22414207458496094, |
|
"learning_rate": 6.954201292705448e-06, |
|
"loss": 0.0008, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 30.75, |
|
"grad_norm": 0.2454010248184204, |
|
"learning_rate": 6.942659279778394e-06, |
|
"loss": 0.0011, |
|
"step": 66750 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"grad_norm": 0.5344116687774658, |
|
"learning_rate": 6.9311172668513394e-06, |
|
"loss": 0.0014, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"eval_loss": 0.27881717681884766, |
|
"eval_runtime": 673.8635, |
|
"eval_samples_per_second": 10.02, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 23.321498700835036, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 30.98, |
|
"grad_norm": 1.2090712785720825, |
|
"learning_rate": 6.919575253924285e-06, |
|
"loss": 0.0012, |
|
"step": 67250 |
|
}, |
|
{ |
|
"epoch": 31.09, |
|
"grad_norm": 0.17009182274341583, |
|
"learning_rate": 6.908033240997231e-06, |
|
"loss": 0.0009, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 31.21, |
|
"grad_norm": 0.47179415822029114, |
|
"learning_rate": 6.896491228070176e-06, |
|
"loss": 0.0007, |
|
"step": 67750 |
|
}, |
|
{ |
|
"epoch": 31.32, |
|
"grad_norm": 0.2590140402317047, |
|
"learning_rate": 6.884949215143122e-06, |
|
"loss": 0.0007, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 31.32, |
|
"eval_loss": 0.278424471616745, |
|
"eval_runtime": 660.7801, |
|
"eval_samples_per_second": 10.218, |
|
"eval_steps_per_second": 0.319, |
|
"eval_wer": 23.246127298331913, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 31.44, |
|
"grad_norm": 0.6639719009399414, |
|
"learning_rate": 6.873407202216067e-06, |
|
"loss": 0.0009, |
|
"step": 68250 |
|
}, |
|
{ |
|
"epoch": 31.55, |
|
"grad_norm": 0.8088191151618958, |
|
"learning_rate": 6.861911357340721e-06, |
|
"loss": 0.001, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"grad_norm": 0.9694509506225586, |
|
"learning_rate": 6.850369344413667e-06, |
|
"loss": 0.001, |
|
"step": 68750 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"grad_norm": 1.2024418115615845, |
|
"learning_rate": 6.838827331486612e-06, |
|
"loss": 0.0009, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"eval_loss": 0.27510857582092285, |
|
"eval_runtime": 673.7472, |
|
"eval_samples_per_second": 10.022, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 23.137037110498444, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 31.9, |
|
"grad_norm": 0.2124684602022171, |
|
"learning_rate": 6.827285318559558e-06, |
|
"loss": 0.0012, |
|
"step": 69250 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"grad_norm": 0.1560162901878357, |
|
"learning_rate": 6.815743305632503e-06, |
|
"loss": 0.001, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"grad_norm": 0.1794072687625885, |
|
"learning_rate": 6.804201292705448e-06, |
|
"loss": 0.0006, |
|
"step": 69750 |
|
}, |
|
{ |
|
"epoch": 32.24, |
|
"grad_norm": 0.2194598764181137, |
|
"learning_rate": 6.792659279778394e-06, |
|
"loss": 0.0005, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 32.24, |
|
"eval_loss": 0.27563953399658203, |
|
"eval_runtime": 687.9789, |
|
"eval_samples_per_second": 9.814, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 22.787948509431345, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 32.36, |
|
"grad_norm": 0.11972519010305405, |
|
"learning_rate": 6.781117266851339e-06, |
|
"loss": 0.0007, |
|
"step": 70250 |
|
}, |
|
{ |
|
"epoch": 32.47, |
|
"grad_norm": 1.2118364572525024, |
|
"learning_rate": 6.769575253924285e-06, |
|
"loss": 0.0008, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 32.59, |
|
"grad_norm": 0.159651979804039, |
|
"learning_rate": 6.75803324099723e-06, |
|
"loss": 0.0009, |
|
"step": 70750 |
|
}, |
|
{ |
|
"epoch": 32.7, |
|
"grad_norm": 1.5151838064193726, |
|
"learning_rate": 6.7464912280701755e-06, |
|
"loss": 0.0009, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 32.7, |
|
"eval_loss": 0.27915722131729126, |
|
"eval_runtime": 689.7748, |
|
"eval_samples_per_second": 9.789, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 22.797865799234383, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 32.82, |
|
"grad_norm": 0.10878114402294159, |
|
"learning_rate": 6.73499538319483e-06, |
|
"loss": 0.0008, |
|
"step": 71250 |
|
}, |
|
{ |
|
"epoch": 32.93, |
|
"grad_norm": 0.13962584733963013, |
|
"learning_rate": 6.723453370267775e-06, |
|
"loss": 0.001, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 33.05, |
|
"grad_norm": 2.416551113128662, |
|
"learning_rate": 6.711911357340721e-06, |
|
"loss": 0.0009, |
|
"step": 71750 |
|
}, |
|
{ |
|
"epoch": 33.16, |
|
"grad_norm": 0.14477728307247162, |
|
"learning_rate": 6.700369344413666e-06, |
|
"loss": 0.0007, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 33.16, |
|
"eval_loss": 0.2731185853481293, |
|
"eval_runtime": 673.8757, |
|
"eval_samples_per_second": 10.02, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 23.30364757918956, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 33.28, |
|
"grad_norm": 0.0625206008553505, |
|
"learning_rate": 6.6888273314866115e-06, |
|
"loss": 0.0007, |
|
"step": 72250 |
|
}, |
|
{ |
|
"epoch": 33.39, |
|
"grad_norm": 0.1424214392900467, |
|
"learning_rate": 6.6772853185595575e-06, |
|
"loss": 0.0008, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 33.51, |
|
"grad_norm": 0.3345101773738861, |
|
"learning_rate": 6.665743305632503e-06, |
|
"loss": 0.0008, |
|
"step": 72750 |
|
}, |
|
{ |
|
"epoch": 33.63, |
|
"grad_norm": 1.2112958431243896, |
|
"learning_rate": 6.654201292705449e-06, |
|
"loss": 0.0009, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 33.63, |
|
"eval_loss": 0.2806909680366516, |
|
"eval_runtime": 699.4048, |
|
"eval_samples_per_second": 9.654, |
|
"eval_steps_per_second": 0.302, |
|
"eval_wer": 22.60348691909475, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 33.74, |
|
"grad_norm": 0.6432926058769226, |
|
"learning_rate": 6.642659279778394e-06, |
|
"loss": 0.001, |
|
"step": 73250 |
|
}, |
|
{ |
|
"epoch": 33.86, |
|
"grad_norm": 0.5472640991210938, |
|
"learning_rate": 6.6311634349030475e-06, |
|
"loss": 0.0008, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"grad_norm": 0.05136106163263321, |
|
"learning_rate": 6.6196214219759935e-06, |
|
"loss": 0.0008, |
|
"step": 73750 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"grad_norm": 0.16334278881549835, |
|
"learning_rate": 6.608079409048939e-06, |
|
"loss": 0.0008, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 34.09, |
|
"eval_loss": 0.2772423326969147, |
|
"eval_runtime": 671.9826, |
|
"eval_samples_per_second": 10.048, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 22.41307495487633, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 34.2, |
|
"grad_norm": 0.17405687272548676, |
|
"learning_rate": 6.596537396121884e-06, |
|
"loss": 0.0008, |
|
"step": 74250 |
|
}, |
|
{ |
|
"epoch": 34.32, |
|
"grad_norm": 1.0651663541793823, |
|
"learning_rate": 6.58499538319483e-06, |
|
"loss": 0.0008, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 34.43, |
|
"grad_norm": 0.22232329845428467, |
|
"learning_rate": 6.573453370267775e-06, |
|
"loss": 0.0007, |
|
"step": 74750 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"grad_norm": 0.10098864883184433, |
|
"learning_rate": 6.561911357340721e-06, |
|
"loss": 0.0007, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 34.55, |
|
"eval_loss": 0.2794438600540161, |
|
"eval_runtime": 681.119, |
|
"eval_samples_per_second": 9.913, |
|
"eval_steps_per_second": 0.31, |
|
"eval_wer": 22.53604934843406, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 34.66, |
|
"grad_norm": 0.09176724404096603, |
|
"learning_rate": 6.550369344413666e-06, |
|
"loss": 0.0008, |
|
"step": 75250 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"grad_norm": 0.3179700970649719, |
|
"learning_rate": 6.538827331486611e-06, |
|
"loss": 0.0008, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 34.89, |
|
"grad_norm": 0.38459789752960205, |
|
"learning_rate": 6.527285318559557e-06, |
|
"loss": 0.0008, |
|
"step": 75750 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"grad_norm": 0.2603273391723633, |
|
"learning_rate": 6.515743305632502e-06, |
|
"loss": 0.0008, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"eval_loss": 0.2777673900127411, |
|
"eval_runtime": 692.7589, |
|
"eval_samples_per_second": 9.747, |
|
"eval_steps_per_second": 0.305, |
|
"eval_wer": 22.811750004958643, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 35.12, |
|
"grad_norm": 0.051916543394327164, |
|
"learning_rate": 6.504247460757157e-06, |
|
"loss": 0.0007, |
|
"step": 76250 |
|
}, |
|
{ |
|
"epoch": 35.24, |
|
"grad_norm": 0.30884623527526855, |
|
"learning_rate": 6.492705447830102e-06, |
|
"loss": 0.0006, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 35.35, |
|
"grad_norm": 0.1257990300655365, |
|
"learning_rate": 6.481163434903047e-06, |
|
"loss": 0.0007, |
|
"step": 76750 |
|
}, |
|
{ |
|
"epoch": 35.47, |
|
"grad_norm": 0.08370446413755417, |
|
"learning_rate": 6.469621421975993e-06, |
|
"loss": 0.0008, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 35.47, |
|
"eval_loss": 0.2764694094657898, |
|
"eval_runtime": 692.6631, |
|
"eval_samples_per_second": 9.748, |
|
"eval_steps_per_second": 0.305, |
|
"eval_wer": 22.82960112660412, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 35.58, |
|
"grad_norm": 0.16313733160495758, |
|
"learning_rate": 6.458079409048938e-06, |
|
"loss": 0.0007, |
|
"step": 77250 |
|
}, |
|
{ |
|
"epoch": 35.7, |
|
"grad_norm": 1.0557291507720947, |
|
"learning_rate": 6.446537396121884e-06, |
|
"loss": 0.0007, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 35.81, |
|
"grad_norm": 0.2264009267091751, |
|
"learning_rate": 6.4349953831948295e-06, |
|
"loss": 0.0008, |
|
"step": 77750 |
|
}, |
|
{ |
|
"epoch": 35.93, |
|
"grad_norm": 0.2705702781677246, |
|
"learning_rate": 6.423453370267775e-06, |
|
"loss": 0.0009, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 35.93, |
|
"eval_loss": 0.27600711584091187, |
|
"eval_runtime": 689.6167, |
|
"eval_samples_per_second": 9.791, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 22.551917012118928, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"grad_norm": 0.2169518917798996, |
|
"learning_rate": 6.411911357340721e-06, |
|
"loss": 0.0006, |
|
"step": 78250 |
|
}, |
|
{ |
|
"epoch": 36.16, |
|
"grad_norm": 0.19748559594154358, |
|
"learning_rate": 6.400415512465374e-06, |
|
"loss": 0.0006, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"grad_norm": 1.7767668962478638, |
|
"learning_rate": 6.3888734995383196e-06, |
|
"loss": 0.0006, |
|
"step": 78750 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"grad_norm": 0.2516990303993225, |
|
"learning_rate": 6.3773314866112655e-06, |
|
"loss": 0.0005, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"eval_loss": 0.2752860188484192, |
|
"eval_runtime": 664.3231, |
|
"eval_samples_per_second": 10.164, |
|
"eval_steps_per_second": 0.318, |
|
"eval_wer": 22.64315607830692, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"grad_norm": 0.05742982402443886, |
|
"learning_rate": 6.365789473684211e-06, |
|
"loss": 0.0005, |
|
"step": 79250 |
|
}, |
|
{ |
|
"epoch": 36.62, |
|
"grad_norm": 5.542628765106201, |
|
"learning_rate": 6.354247460757157e-06, |
|
"loss": 0.0009, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 36.73, |
|
"grad_norm": 0.12612101435661316, |
|
"learning_rate": 6.342705447830102e-06, |
|
"loss": 0.0009, |
|
"step": 79750 |
|
}, |
|
{ |
|
"epoch": 36.85, |
|
"grad_norm": 1.6482515335083008, |
|
"learning_rate": 6.331163434903047e-06, |
|
"loss": 0.0007, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 36.85, |
|
"eval_loss": 0.2798755466938019, |
|
"eval_runtime": 679.0069, |
|
"eval_samples_per_second": 9.944, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 22.450760656127894, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"grad_norm": 0.1331368237733841, |
|
"learning_rate": 6.319621421975993e-06, |
|
"loss": 0.0007, |
|
"step": 80250 |
|
}, |
|
{ |
|
"epoch": 37.08, |
|
"grad_norm": 0.097502700984478, |
|
"learning_rate": 6.308079409048938e-06, |
|
"loss": 0.0006, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 37.19, |
|
"grad_norm": 0.05282368138432503, |
|
"learning_rate": 6.296537396121884e-06, |
|
"loss": 0.0005, |
|
"step": 80750 |
|
}, |
|
{ |
|
"epoch": 37.31, |
|
"grad_norm": 0.7441471815109253, |
|
"learning_rate": 6.284995383194829e-06, |
|
"loss": 0.0006, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 37.31, |
|
"eval_loss": 0.2776803970336914, |
|
"eval_runtime": 685.1247, |
|
"eval_samples_per_second": 9.855, |
|
"eval_steps_per_second": 0.308, |
|
"eval_wer": 22.208778784933653, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 37.43, |
|
"grad_norm": 0.12682919204235077, |
|
"learning_rate": 6.273499538319483e-06, |
|
"loss": 0.0006, |
|
"step": 81250 |
|
}, |
|
{ |
|
"epoch": 37.54, |
|
"grad_norm": 0.14379066228866577, |
|
"learning_rate": 6.261957525392429e-06, |
|
"loss": 0.0006, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 37.66, |
|
"grad_norm": 0.23371708393096924, |
|
"learning_rate": 6.250415512465374e-06, |
|
"loss": 0.0006, |
|
"step": 81750 |
|
}, |
|
{ |
|
"epoch": 37.77, |
|
"grad_norm": 0.21299830079078674, |
|
"learning_rate": 6.238919667590029e-06, |
|
"loss": 0.0008, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 37.77, |
|
"eval_loss": 0.27769771218299866, |
|
"eval_runtime": 686.8168, |
|
"eval_samples_per_second": 9.831, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 22.746295892258566, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 37.89, |
|
"grad_norm": 0.1677282303571701, |
|
"learning_rate": 6.227377654662974e-06, |
|
"loss": 0.0008, |
|
"step": 82250 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"grad_norm": 1.1451334953308105, |
|
"learning_rate": 6.215835641735919e-06, |
|
"loss": 0.0009, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 38.12, |
|
"grad_norm": 0.19697508215904236, |
|
"learning_rate": 6.204293628808865e-06, |
|
"loss": 0.0006, |
|
"step": 82750 |
|
}, |
|
{ |
|
"epoch": 38.23, |
|
"grad_norm": 0.07527792453765869, |
|
"learning_rate": 6.19275161588181e-06, |
|
"loss": 0.0007, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 38.23, |
|
"eval_loss": 0.2804949879646301, |
|
"eval_runtime": 675.5771, |
|
"eval_samples_per_second": 9.994, |
|
"eval_steps_per_second": 0.312, |
|
"eval_wer": 22.71257710692822, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 38.35, |
|
"grad_norm": 1.9222028255462646, |
|
"learning_rate": 6.181209602954756e-06, |
|
"loss": 0.0007, |
|
"step": 83250 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"grad_norm": 0.09556487202644348, |
|
"learning_rate": 6.169667590027701e-06, |
|
"loss": 0.0007, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 38.58, |
|
"grad_norm": 0.21437525749206543, |
|
"learning_rate": 6.1581255771006465e-06, |
|
"loss": 0.0006, |
|
"step": 83750 |
|
}, |
|
{ |
|
"epoch": 38.69, |
|
"grad_norm": 0.1807592660188675, |
|
"learning_rate": 6.1465835641735925e-06, |
|
"loss": 0.0007, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 38.69, |
|
"eval_loss": 0.2847980260848999, |
|
"eval_runtime": 672.9632, |
|
"eval_samples_per_second": 10.033, |
|
"eval_steps_per_second": 0.314, |
|
"eval_wer": 22.424975702639983, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 38.81, |
|
"grad_norm": 0.16911369562149048, |
|
"learning_rate": 6.135041551246538e-06, |
|
"loss": 0.0007, |
|
"step": 84250 |
|
}, |
|
{ |
|
"epoch": 38.92, |
|
"grad_norm": 1.3031611442565918, |
|
"learning_rate": 6.123545706371191e-06, |
|
"loss": 0.0008, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 39.04, |
|
"grad_norm": 0.04561692103743553, |
|
"learning_rate": 6.112003693444137e-06, |
|
"loss": 0.0006, |
|
"step": 84750 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"grad_norm": 0.061062462627887726, |
|
"learning_rate": 6.1004616805170825e-06, |
|
"loss": 0.0003, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"eval_loss": 0.2790899872779846, |
|
"eval_runtime": 674.5949, |
|
"eval_samples_per_second": 10.009, |
|
"eval_steps_per_second": 0.313, |
|
"eval_wer": 22.01439990479402, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 39.27, |
|
"grad_norm": 0.07837537676095963, |
|
"learning_rate": 6.0889196675900285e-06, |
|
"loss": 0.0004, |
|
"step": 85250 |
|
}, |
|
{ |
|
"epoch": 39.38, |
|
"grad_norm": 0.05536266788840294, |
|
"learning_rate": 6.077377654662974e-06, |
|
"loss": 0.0006, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 39.5, |
|
"grad_norm": 5.097941875457764, |
|
"learning_rate": 6.065835641735919e-06, |
|
"loss": 0.0009, |
|
"step": 85750 |
|
}, |
|
{ |
|
"epoch": 39.61, |
|
"grad_norm": 0.07944060117006302, |
|
"learning_rate": 6.054293628808865e-06, |
|
"loss": 0.0006, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 39.61, |
|
"eval_loss": 0.2777423858642578, |
|
"eval_runtime": 687.4077, |
|
"eval_samples_per_second": 9.822, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 22.262332149870083, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 39.73, |
|
"grad_norm": 1.522270917892456, |
|
"learning_rate": 6.04275161588181e-06, |
|
"loss": 0.0006, |
|
"step": 86250 |
|
}, |
|
{ |
|
"epoch": 39.84, |
|
"grad_norm": 0.05595465004444122, |
|
"learning_rate": 6.031209602954756e-06, |
|
"loss": 0.0006, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 39.96, |
|
"grad_norm": 0.5327405333518982, |
|
"learning_rate": 6.019667590027701e-06, |
|
"loss": 0.0005, |
|
"step": 86750 |
|
}, |
|
{ |
|
"epoch": 40.07, |
|
"grad_norm": 0.18009261786937714, |
|
"learning_rate": 6.008125577100646e-06, |
|
"loss": 0.0003, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 40.07, |
|
"eval_loss": 0.2798568308353424, |
|
"eval_runtime": 666.6019, |
|
"eval_samples_per_second": 10.129, |
|
"eval_steps_per_second": 0.317, |
|
"eval_wer": 22.0798540174941, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 40.19, |
|
"grad_norm": 0.15836778283119202, |
|
"learning_rate": 5.996583564173592e-06, |
|
"loss": 0.0005, |
|
"step": 87250 |
|
}, |
|
{ |
|
"epoch": 40.3, |
|
"grad_norm": 0.06244779750704765, |
|
"learning_rate": 5.985041551246537e-06, |
|
"loss": 0.0005, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 40.42, |
|
"grad_norm": 0.47360849380493164, |
|
"learning_rate": 5.973499538319484e-06, |
|
"loss": 0.0005, |
|
"step": 87750 |
|
}, |
|
{ |
|
"epoch": 40.53, |
|
"grad_norm": 0.12535277009010315, |
|
"learning_rate": 5.9619575253924285e-06, |
|
"loss": 0.0005, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 40.53, |
|
"eval_loss": 0.2800135612487793, |
|
"eval_runtime": 683.9129, |
|
"eval_samples_per_second": 9.873, |
|
"eval_steps_per_second": 0.309, |
|
"eval_wer": 22.23059682250035, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"grad_norm": 0.06063379347324371, |
|
"learning_rate": 5.950415512465374e-06, |
|
"loss": 0.0007, |
|
"step": 88250 |
|
}, |
|
{ |
|
"epoch": 40.76, |
|
"grad_norm": 0.14233584702014923, |
|
"learning_rate": 5.9388734995383205e-06, |
|
"loss": 0.0005, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 40.88, |
|
"grad_norm": 2.8523402214050293, |
|
"learning_rate": 5.927331486611266e-06, |
|
"loss": 0.0006, |
|
"step": 88750 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"grad_norm": 0.20620940625667572, |
|
"learning_rate": 5.915789473684212e-06, |
|
"loss": 0.0007, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_loss": 0.2811349332332611, |
|
"eval_runtime": 676.2894, |
|
"eval_samples_per_second": 9.984, |
|
"eval_steps_per_second": 0.312, |
|
"eval_wer": 22.298034393161036, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 41.11, |
|
"grad_norm": 0.5996530652046204, |
|
"learning_rate": 5.904247460757157e-06, |
|
"loss": 0.0004, |
|
"step": 89250 |
|
}, |
|
{ |
|
"epoch": 41.23, |
|
"grad_norm": 0.19429056346416473, |
|
"learning_rate": 5.892705447830102e-06, |
|
"loss": 0.0004, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 41.34, |
|
"grad_norm": 0.07230094075202942, |
|
"learning_rate": 5.881163434903048e-06, |
|
"loss": 0.0005, |
|
"step": 89750 |
|
}, |
|
{ |
|
"epoch": 41.46, |
|
"grad_norm": 1.4591439962387085, |
|
"learning_rate": 5.869621421975993e-06, |
|
"loss": 0.0004, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 41.46, |
|
"eval_loss": 0.2819642424583435, |
|
"eval_runtime": 679.526, |
|
"eval_samples_per_second": 9.936, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 22.71257710692822, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 41.57, |
|
"grad_norm": 0.07382282614707947, |
|
"learning_rate": 5.858079409048939e-06, |
|
"loss": 0.0005, |
|
"step": 90250 |
|
}, |
|
{ |
|
"epoch": 41.69, |
|
"grad_norm": 0.9907983541488647, |
|
"learning_rate": 5.846583564173593e-06, |
|
"loss": 0.0006, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 41.8, |
|
"grad_norm": 0.11259205639362335, |
|
"learning_rate": 5.835041551246537e-06, |
|
"loss": 0.0006, |
|
"step": 90750 |
|
}, |
|
{ |
|
"epoch": 41.92, |
|
"grad_norm": 0.06520923972129822, |
|
"learning_rate": 5.823499538319484e-06, |
|
"loss": 0.0006, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 41.92, |
|
"eval_loss": 0.28300294280052185, |
|
"eval_runtime": 684.3308, |
|
"eval_samples_per_second": 9.867, |
|
"eval_steps_per_second": 0.308, |
|
"eval_wer": 21.95886308189698, |
|
"step": 91000 |
|
} |
|
], |
|
"logging_steps": 250, |
|
"max_steps": 217100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 1000, |
|
"total_flos": 1.4334909633589248e+20, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|