|
{ |
|
"best_metric": 0.47799625992774963, |
|
"best_model_checkpoint": "../checkpoints/Wav2Vec/NURC-SP/final-version/train/checkpoint-5207", |
|
"epoch": 16.0, |
|
"eval_steps": 500, |
|
"global_step": 13886, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.48270344734191895, |
|
"learning_rate": 1.3837638376383764e-08, |
|
"loss": 0.4969, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.3699261992619927e-06, |
|
"loss": 1.0935, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.196231842041016, |
|
"learning_rate": 2.753690036900369e-06, |
|
"loss": 1.0424, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 7.084147930145264, |
|
"learning_rate": 4.137453874538745e-06, |
|
"loss": 0.9727, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.268230438232422, |
|
"learning_rate": 5.521217712177122e-06, |
|
"loss": 0.9538, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 7.1523942947387695, |
|
"learning_rate": 6.904981549815498e-06, |
|
"loss": 0.941, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 7.721753120422363, |
|
"learning_rate": 8.288745387453875e-06, |
|
"loss": 0.9294, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 6.676694393157959, |
|
"learning_rate": 9.67250922509225e-06, |
|
"loss": 0.9104, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 13.917472839355469, |
|
"learning_rate": 1.1056273062730627e-05, |
|
"loss": 0.9171, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5722900629043579, |
|
"eval_runtime": 102.9565, |
|
"eval_samples_per_second": 30.45, |
|
"eval_steps_per_second": 3.807, |
|
"eval_wer": 0.25805475722281046, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.1242845058441162, |
|
"learning_rate": 1.2440036900369004e-05, |
|
"loss": 0.8226, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 1.4485478401184082, |
|
"learning_rate": 1.382380073800738e-05, |
|
"loss": 0.8966, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 1.5878037214279175, |
|
"learning_rate": 1.5207564575645757e-05, |
|
"loss": 0.881, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.9301995635032654, |
|
"learning_rate": 1.6591328413284134e-05, |
|
"loss": 0.8691, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.2811486721038818, |
|
"learning_rate": 1.797509225092251e-05, |
|
"loss": 0.8756, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.5193356275558472, |
|
"learning_rate": 1.9358856088560884e-05, |
|
"loss": 0.8687, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 1.1962685585021973, |
|
"learning_rate": 2.072878228782288e-05, |
|
"loss": 0.8588, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 1.906406044960022, |
|
"learning_rate": 2.2112546125461254e-05, |
|
"loss": 0.8614, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 2.207412004470825, |
|
"learning_rate": 2.349630996309963e-05, |
|
"loss": 0.8574, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5288712978363037, |
|
"eval_runtime": 107.9908, |
|
"eval_samples_per_second": 29.03, |
|
"eval_steps_per_second": 3.63, |
|
"eval_wer": 0.254853022739878, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 1.1061471700668335, |
|
"learning_rate": 2.4880073800738008e-05, |
|
"loss": 0.8829, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.760769784450531, |
|
"learning_rate": 2.6263837638376385e-05, |
|
"loss": 0.8459, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 1.408493161201477, |
|
"learning_rate": 2.764760147601476e-05, |
|
"loss": 0.8378, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 0.9650698304176331, |
|
"learning_rate": 2.9031365313653138e-05, |
|
"loss": 0.8415, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.9561858177185059, |
|
"learning_rate": 2.9978145791850807e-05, |
|
"loss": 0.8364, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.7619096040725708, |
|
"learning_rate": 2.9905298431353505e-05, |
|
"loss": 0.8364, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 1.0944970846176147, |
|
"learning_rate": 2.9833179544461174e-05, |
|
"loss": 0.8256, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 0.7943573594093323, |
|
"learning_rate": 2.9760332183963868e-05, |
|
"loss": 0.8175, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.035665988922119, |
|
"learning_rate": 2.9688213297071538e-05, |
|
"loss": 0.8533, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.5230820178985596, |
|
"eval_runtime": 99.7653, |
|
"eval_samples_per_second": 31.424, |
|
"eval_steps_per_second": 3.929, |
|
"eval_wer": 0.2511722886098926, |
|
"step": 2603 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 1.9500775337219238, |
|
"learning_rate": 2.9615365936574232e-05, |
|
"loss": 0.8022, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"grad_norm": 2.2833547592163086, |
|
"learning_rate": 2.954251857607693e-05, |
|
"loss": 0.8143, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"grad_norm": 1.7430369853973389, |
|
"learning_rate": 2.9469671215579623e-05, |
|
"loss": 0.8134, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"grad_norm": 1.7504712343215942, |
|
"learning_rate": 2.9396823855082317e-05, |
|
"loss": 0.8026, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"grad_norm": 1.6993814706802368, |
|
"learning_rate": 2.9323976494585015e-05, |
|
"loss": 0.8098, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"grad_norm": 3.484405279159546, |
|
"learning_rate": 2.925112913408771e-05, |
|
"loss": 0.812, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 2.0759809017181396, |
|
"learning_rate": 2.917901024719538e-05, |
|
"loss": 0.8121, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 2.325293779373169, |
|
"learning_rate": 2.9106162886698072e-05, |
|
"loss": 0.8076, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.5064914226531982, |
|
"eval_runtime": 117.608, |
|
"eval_samples_per_second": 26.656, |
|
"eval_steps_per_second": 3.333, |
|
"eval_wer": 0.23836535067816267, |
|
"step": 3471 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"grad_norm": 1.8136922121047974, |
|
"learning_rate": 2.9033315526200766e-05, |
|
"loss": 0.7724, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"grad_norm": 7.821990013122559, |
|
"learning_rate": 2.8961196639308436e-05, |
|
"loss": 0.7879, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"grad_norm": 0.916205108165741, |
|
"learning_rate": 2.888834927881113e-05, |
|
"loss": 0.7907, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 1.157679557800293, |
|
"learning_rate": 2.8815501918313824e-05, |
|
"loss": 0.7859, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"grad_norm": 1.1060450077056885, |
|
"learning_rate": 2.874265455781652e-05, |
|
"loss": 0.7934, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"grad_norm": 1.0524017810821533, |
|
"learning_rate": 2.8669807197319215e-05, |
|
"loss": 0.7887, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 0.9493302702903748, |
|
"learning_rate": 2.8596959836821913e-05, |
|
"loss": 0.7962, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 1.216361403465271, |
|
"learning_rate": 2.8524840949929582e-05, |
|
"loss": 0.7902, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"grad_norm": 0.858220100402832, |
|
"learning_rate": 2.8451993589432277e-05, |
|
"loss": 0.7875, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.4950461685657501, |
|
"eval_runtime": 107.5304, |
|
"eval_samples_per_second": 29.155, |
|
"eval_steps_per_second": 3.645, |
|
"eval_wer": 0.23864266626329855, |
|
"step": 4339 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"grad_norm": 0.6915091872215271, |
|
"learning_rate": 2.8379146228934974e-05, |
|
"loss": 0.8172, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"grad_norm": 0.8395456671714783, |
|
"learning_rate": 2.8306298868437668e-05, |
|
"loss": 0.7719, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 0.5675917863845825, |
|
"learning_rate": 2.8233451507940365e-05, |
|
"loss": 0.7725, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"grad_norm": 0.7588133215904236, |
|
"learning_rate": 2.816060414744306e-05, |
|
"loss": 0.7735, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"grad_norm": 0.7658157348632812, |
|
"learning_rate": 2.8087756786945757e-05, |
|
"loss": 0.7713, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"grad_norm": 0.6394296288490295, |
|
"learning_rate": 2.801490942644845e-05, |
|
"loss": 0.7683, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"grad_norm": 1.0770230293273926, |
|
"learning_rate": 2.794206206595114e-05, |
|
"loss": 0.784, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"grad_norm": 0.537818193435669, |
|
"learning_rate": 2.786921470545384e-05, |
|
"loss": 0.7705, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"grad_norm": 1.1256853342056274, |
|
"learning_rate": 2.7796367344956533e-05, |
|
"loss": 0.7947, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.47799625992774963, |
|
"eval_runtime": 102.2073, |
|
"eval_samples_per_second": 30.673, |
|
"eval_steps_per_second": 3.835, |
|
"eval_wer": 0.23347451217667523, |
|
"step": 5207 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"grad_norm": 2.259537935256958, |
|
"learning_rate": 2.772351998445923e-05, |
|
"loss": 0.7386, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"grad_norm": 1.8576114177703857, |
|
"learning_rate": 2.7650672623961924e-05, |
|
"loss": 0.7591, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"grad_norm": 3.029550075531006, |
|
"learning_rate": 2.7578553737069594e-05, |
|
"loss": 0.765, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"grad_norm": 1.3059985637664795, |
|
"learning_rate": 2.750570637657229e-05, |
|
"loss": 0.7484, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"grad_norm": 2.597036600112915, |
|
"learning_rate": 2.7432859016074985e-05, |
|
"loss": 0.7696, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"grad_norm": 1.8154231309890747, |
|
"learning_rate": 2.736074012918265e-05, |
|
"loss": 0.7642, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 1.9362813234329224, |
|
"learning_rate": 2.728789276868535e-05, |
|
"loss": 0.7703, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"grad_norm": 2.5824599266052246, |
|
"learning_rate": 2.7215045408188043e-05, |
|
"loss": 0.7621, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.4984392821788788, |
|
"eval_runtime": 96.472, |
|
"eval_samples_per_second": 32.496, |
|
"eval_steps_per_second": 4.063, |
|
"eval_wer": 0.23700398326022287, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 1.1438063383102417, |
|
"learning_rate": 2.714219804769074e-05, |
|
"loss": 0.7505, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"grad_norm": 0.8449379205703735, |
|
"learning_rate": 2.7069350687193434e-05, |
|
"loss": 0.7626, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"grad_norm": 1.075359582901001, |
|
"learning_rate": 2.699650332669613e-05, |
|
"loss": 0.7503, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 14440011566940.16, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 97.0769, |
|
"eval_samples_per_second": 32.294, |
|
"eval_steps_per_second": 4.038, |
|
"eval_wer": 1.0, |
|
"step": 6943 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 108.3909, |
|
"eval_samples_per_second": 28.923, |
|
"eval_steps_per_second": 3.617, |
|
"eval_wer": 1.0, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 107.8221, |
|
"eval_samples_per_second": 29.076, |
|
"eval_steps_per_second": 3.636, |
|
"eval_wer": 1.0, |
|
"step": 8678 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 116.4634, |
|
"eval_samples_per_second": 26.918, |
|
"eval_steps_per_second": 3.366, |
|
"eval_wer": 1.0, |
|
"step": 9546 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 11.29, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 114.432, |
|
"eval_samples_per_second": 27.396, |
|
"eval_steps_per_second": 3.426, |
|
"eval_wer": 1.0, |
|
"step": 10414 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 107.4788, |
|
"eval_samples_per_second": 29.169, |
|
"eval_steps_per_second": 3.647, |
|
"eval_wer": 1.0, |
|
"step": 11282 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 106.6864, |
|
"eval_samples_per_second": 29.385, |
|
"eval_steps_per_second": 3.674, |
|
"eval_wer": 1.0, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 109.7539, |
|
"eval_samples_per_second": 28.564, |
|
"eval_steps_per_second": 3.572, |
|
"eval_wer": 1.0, |
|
"step": 13018 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.697756301296683e-05, |
|
"loss": 0.0, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 108.5954, |
|
"eval_samples_per_second": 28.869, |
|
"eval_steps_per_second": 3.61, |
|
"eval_wer": 1.0, |
|
"step": 13886 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 13886, |
|
"total_flos": 6.139205851819624e+20, |
|
"train_loss": 103989713142.67763, |
|
"train_runtime": 155022.8715, |
|
"train_samples_per_second": 53.744, |
|
"train_steps_per_second": 0.28 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 43350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 6.139205851819624e+20, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|