|
{ |
|
"best_metric": 0.09467801134467801, |
|
"best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.75_g1.0-0.05_10_0.004_40/checkpoint-3150", |
|
"epoch": 100.0, |
|
"eval_steps": 50, |
|
"global_step": 5300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.547169811320755e-07, |
|
"loss": 1628.3266, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 795.486328125, |
|
"eval_runtime": 3.3117, |
|
"eval_samples_per_second": 216.507, |
|
"eval_steps_per_second": 6.945, |
|
"eval_wer": 15.705538872205539, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.5660377358490568e-06, |
|
"loss": 1167.4639, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 276.5289001464844, |
|
"eval_runtime": 2.3138, |
|
"eval_samples_per_second": 309.881, |
|
"eval_steps_per_second": 9.94, |
|
"eval_wer": 0.995412078745412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.452830188679246e-06, |
|
"loss": 193.5269, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 67.26387786865234, |
|
"eval_runtime": 2.2447, |
|
"eval_samples_per_second": 319.421, |
|
"eval_steps_per_second": 10.246, |
|
"eval_wer": 1.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.339622641509434e-06, |
|
"loss": 86.2802, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 63.62594985961914, |
|
"eval_runtime": 2.2662, |
|
"eval_samples_per_second": 316.387, |
|
"eval_steps_per_second": 10.149, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 8.226415094339623e-06, |
|
"loss": 82.8944, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 61.89385986328125, |
|
"eval_runtime": 2.2689, |
|
"eval_samples_per_second": 316.007, |
|
"eval_steps_per_second": 10.137, |
|
"eval_wer": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.0113207547169812e-05, |
|
"loss": 79.9575, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 59.79137420654297, |
|
"eval_runtime": 2.2608, |
|
"eval_samples_per_second": 317.142, |
|
"eval_steps_per_second": 10.173, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.2e-05, |
|
"loss": 77.3429, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 58.15074157714844, |
|
"eval_runtime": 2.2539, |
|
"eval_samples_per_second": 318.116, |
|
"eval_steps_per_second": 10.205, |
|
"eval_wer": 1.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.3886792452830189e-05, |
|
"loss": 74.0584, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 57.29766082763672, |
|
"eval_runtime": 2.2566, |
|
"eval_samples_per_second": 317.741, |
|
"eval_steps_per_second": 10.193, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.577358490566038e-05, |
|
"loss": 72.6694, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 56.71151351928711, |
|
"eval_runtime": 2.3841, |
|
"eval_samples_per_second": 300.746, |
|
"eval_steps_per_second": 9.647, |
|
"eval_wer": 1.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1.766037735849057e-05, |
|
"loss": 73.417, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 56.60741424560547, |
|
"eval_runtime": 2.3092, |
|
"eval_samples_per_second": 310.501, |
|
"eval_steps_per_second": 9.96, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.9547169811320757e-05, |
|
"loss": 72.4291, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_loss": 56.47547912597656, |
|
"eval_runtime": 2.313, |
|
"eval_samples_per_second": 309.989, |
|
"eval_steps_per_second": 9.944, |
|
"eval_wer": 1.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.9996868319012422e-05, |
|
"loss": 72.7847, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_loss": 56.86225128173828, |
|
"eval_runtime": 2.2819, |
|
"eval_samples_per_second": 314.217, |
|
"eval_steps_per_second": 10.079, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.9983208992285993e-05, |
|
"loss": 69.3297, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 49.65397644042969, |
|
"eval_runtime": 2.2885, |
|
"eval_samples_per_second": 313.303, |
|
"eval_steps_per_second": 10.05, |
|
"eval_wer": 0.9637137137137137, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 1.9958724515842856e-05, |
|
"loss": 54.2644, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 29.155927658081055, |
|
"eval_runtime": 2.3174, |
|
"eval_samples_per_second": 309.402, |
|
"eval_steps_per_second": 9.925, |
|
"eval_wer": 0.5630630630630631, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.9923441439074434e-05, |
|
"loss": 31.2303, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 13.895669937133789, |
|
"eval_runtime": 2.2929, |
|
"eval_samples_per_second": 312.699, |
|
"eval_steps_per_second": 10.031, |
|
"eval_wer": 0.24140807474140807, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.9877398020679796e-05, |
|
"loss": 19.4522, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_loss": 9.745962142944336, |
|
"eval_runtime": 2.3244, |
|
"eval_samples_per_second": 308.464, |
|
"eval_steps_per_second": 9.895, |
|
"eval_wer": 0.19494494494494494, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 1.9820644187180354e-05, |
|
"loss": 15.0046, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_loss": 7.673460483551025, |
|
"eval_runtime": 2.357, |
|
"eval_samples_per_second": 304.198, |
|
"eval_steps_per_second": 9.758, |
|
"eval_wer": 0.16216216216216217, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 1.975324147878278e-05, |
|
"loss": 12.3783, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_loss": 6.555938720703125, |
|
"eval_runtime": 2.2666, |
|
"eval_samples_per_second": 316.327, |
|
"eval_steps_per_second": 10.147, |
|
"eval_wer": 0.1519853186519853, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 1.9675262982648757e-05, |
|
"loss": 10.7256, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_loss": 5.785167217254639, |
|
"eval_runtime": 2.3021, |
|
"eval_samples_per_second": 311.456, |
|
"eval_steps_per_second": 9.991, |
|
"eval_wer": 0.142308975642309, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 1.958679325364396e-05, |
|
"loss": 9.8218, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"eval_loss": 5.4473090171813965, |
|
"eval_runtime": 2.3201, |
|
"eval_samples_per_second": 309.032, |
|
"eval_steps_per_second": 9.913, |
|
"eval_wer": 0.1394728061394728, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.9487928222652195e-05, |
|
"loss": 9.0115, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 5.125023365020752, |
|
"eval_runtime": 2.3061, |
|
"eval_samples_per_second": 310.909, |
|
"eval_steps_per_second": 9.973, |
|
"eval_wer": 0.13563563563563563, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 1.9378775092554124e-05, |
|
"loss": 8.1076, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_loss": 4.797976016998291, |
|
"eval_runtime": 2.2582, |
|
"eval_samples_per_second": 317.516, |
|
"eval_steps_per_second": 10.185, |
|
"eval_wer": 0.12328995662328995, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 1.925945222198336e-05, |
|
"loss": 7.9779, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"eval_loss": 4.615002632141113, |
|
"eval_runtime": 2.3017, |
|
"eval_samples_per_second": 311.506, |
|
"eval_steps_per_second": 9.993, |
|
"eval_wer": 0.12112112112112113, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 1.9130088996985967e-05, |
|
"loss": 7.6027, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"eval_loss": 4.650717258453369, |
|
"eval_runtime": 2.245, |
|
"eval_samples_per_second": 319.376, |
|
"eval_steps_per_second": 10.245, |
|
"eval_wer": 0.12512512512512514, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 1.8990825690722557e-05, |
|
"loss": 7.4535, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_loss": 4.481402397155762, |
|
"eval_runtime": 2.3288, |
|
"eval_samples_per_second": 307.882, |
|
"eval_steps_per_second": 9.876, |
|
"eval_wer": 0.12095428762095428, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 1.8841813311365105e-05, |
|
"loss": 6.946, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"eval_loss": 4.436919212341309, |
|
"eval_runtime": 2.254, |
|
"eval_samples_per_second": 318.102, |
|
"eval_steps_per_second": 10.204, |
|
"eval_wer": 0.11494828161494829, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 1.868321343835339e-05, |
|
"loss": 7.0627, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"eval_loss": 4.11531400680542, |
|
"eval_runtime": 2.2753, |
|
"eval_samples_per_second": 315.126, |
|
"eval_steps_per_second": 10.109, |
|
"eval_wer": 0.11394728061394728, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 1.8515198047188652e-05, |
|
"loss": 6.2482, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"eval_loss": 4.004451274871826, |
|
"eval_runtime": 2.232, |
|
"eval_samples_per_second": 321.232, |
|
"eval_steps_per_second": 10.305, |
|
"eval_wer": 0.11011011011011011, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 1.833794932295441e-05, |
|
"loss": 6.2238, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"eval_loss": 4.035464286804199, |
|
"eval_runtime": 2.4345, |
|
"eval_samples_per_second": 294.516, |
|
"eval_steps_per_second": 9.448, |
|
"eval_wer": 0.11578244911578245, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 1.8151659462766685e-05, |
|
"loss": 5.8919, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"eval_loss": 3.9624619483947754, |
|
"eval_runtime": 2.2497, |
|
"eval_samples_per_second": 318.712, |
|
"eval_steps_per_second": 10.224, |
|
"eval_wer": 0.11544878211544878, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 1.7956530467367805e-05, |
|
"loss": 5.7955, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"eval_loss": 3.7957444190979004, |
|
"eval_runtime": 2.3687, |
|
"eval_samples_per_second": 302.701, |
|
"eval_steps_per_second": 9.71, |
|
"eval_wer": 0.11269602936269603, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"learning_rate": 1.7752773922089784e-05, |
|
"loss": 5.4849, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"eval_loss": 3.7985599040985107, |
|
"eval_runtime": 2.3776, |
|
"eval_samples_per_second": 301.568, |
|
"eval_steps_per_second": 9.674, |
|
"eval_wer": 0.10577243910577244, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 1.7540610767424813e-05, |
|
"loss": 5.1108, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"eval_loss": 3.818775177001953, |
|
"eval_runtime": 2.2702, |
|
"eval_samples_per_second": 315.836, |
|
"eval_steps_per_second": 10.131, |
|
"eval_wer": 0.10702369035702369, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"learning_rate": 1.7320271059451597e-05, |
|
"loss": 5.3354, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"eval_loss": 3.69089674949646, |
|
"eval_runtime": 2.3064, |
|
"eval_samples_per_second": 310.88, |
|
"eval_steps_per_second": 9.972, |
|
"eval_wer": 0.10235235235235235, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.7091993720377336e-05, |
|
"loss": 5.1149, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_loss": 3.62273907661438, |
|
"eval_runtime": 2.3105, |
|
"eval_samples_per_second": 310.319, |
|
"eval_steps_per_second": 9.954, |
|
"eval_wer": 0.10226893560226893, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"learning_rate": 1.685602627946584e-05, |
|
"loss": 4.976, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_loss": 3.6176178455352783, |
|
"eval_runtime": 2.4073, |
|
"eval_samples_per_second": 297.846, |
|
"eval_steps_per_second": 9.554, |
|
"eval_wer": 0.1016016016016016, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 1.661262460463274e-05, |
|
"loss": 4.5904, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_loss": 3.5958945751190186, |
|
"eval_runtime": 2.3015, |
|
"eval_samples_per_second": 311.541, |
|
"eval_steps_per_second": 9.994, |
|
"eval_wer": 0.10785785785785786, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"learning_rate": 1.6362052624998767e-05, |
|
"loss": 4.6613, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"eval_loss": 3.4999887943267822, |
|
"eval_runtime": 2.265, |
|
"eval_samples_per_second": 316.562, |
|
"eval_steps_per_second": 10.155, |
|
"eval_wer": 0.10685685685685686, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 1.6104582044701983e-05, |
|
"loss": 4.7697, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"eval_loss": 3.5211145877838135, |
|
"eval_runtime": 2.326, |
|
"eval_samples_per_second": 308.248, |
|
"eval_steps_per_second": 9.888, |
|
"eval_wer": 0.10143476810143477, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"learning_rate": 1.584049204827929e-05, |
|
"loss": 4.4224, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"eval_loss": 3.4720044136047363, |
|
"eval_runtime": 2.25, |
|
"eval_samples_per_second": 318.672, |
|
"eval_steps_per_second": 10.222, |
|
"eval_wer": 0.1001001001001001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 1.5570068997936686e-05, |
|
"loss": 4.5255, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_loss": 3.417844533920288, |
|
"eval_runtime": 2.2907, |
|
"eval_samples_per_second": 313.008, |
|
"eval_steps_per_second": 10.041, |
|
"eval_wer": 0.09826493159826494, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 1.5293606123036508e-05, |
|
"loss": 4.2808, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"eval_loss": 3.4800703525543213, |
|
"eval_runtime": 2.3337, |
|
"eval_samples_per_second": 307.243, |
|
"eval_steps_per_second": 9.856, |
|
"eval_wer": 0.10435435435435435, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"learning_rate": 1.5011403202138346e-05, |
|
"loss": 4.2407, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"eval_loss": 3.4080042839050293, |
|
"eval_runtime": 2.2974, |
|
"eval_samples_per_second": 312.092, |
|
"eval_steps_per_second": 10.011, |
|
"eval_wer": 0.10001668335001668, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"learning_rate": 1.4723766237938495e-05, |
|
"loss": 3.9611, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"eval_loss": 3.451436758041382, |
|
"eval_runtime": 2.3789, |
|
"eval_samples_per_second": 301.4, |
|
"eval_steps_per_second": 9.668, |
|
"eval_wer": 0.10485485485485485, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"learning_rate": 1.4431007125460274e-05, |
|
"loss": 4.014, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_loss": 3.3982768058776855, |
|
"eval_runtime": 2.2505, |
|
"eval_samples_per_second": 318.595, |
|
"eval_steps_per_second": 10.22, |
|
"eval_wer": 0.10885885885885886, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 1.4133443313855155e-05, |
|
"loss": 3.8487, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"eval_loss": 3.4163670539855957, |
|
"eval_runtime": 2.4151, |
|
"eval_samples_per_second": 296.887, |
|
"eval_steps_per_second": 9.524, |
|
"eval_wer": 0.10418752085418752, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"learning_rate": 1.3831397462181298e-05, |
|
"loss": 3.8132, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"eval_loss": 3.356208086013794, |
|
"eval_runtime": 2.3331, |
|
"eval_samples_per_second": 307.322, |
|
"eval_steps_per_second": 9.858, |
|
"eval_wer": 0.09584584584584585, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 1.3525197089532833e-05, |
|
"loss": 3.6973, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"eval_loss": 3.283907890319824, |
|
"eval_runtime": 2.2596, |
|
"eval_samples_per_second": 317.308, |
|
"eval_steps_per_second": 10.179, |
|
"eval_wer": 0.09776443109776443, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"learning_rate": 1.3215174219899224e-05, |
|
"loss": 3.606, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"eval_loss": 3.3125367164611816, |
|
"eval_runtime": 2.2757, |
|
"eval_samples_per_second": 315.07, |
|
"eval_steps_per_second": 10.107, |
|
"eval_wer": 0.10093426760093427, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"learning_rate": 1.2901665022139796e-05, |
|
"loss": 3.5412, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"eval_loss": 3.2580113410949707, |
|
"eval_runtime": 2.2944, |
|
"eval_samples_per_second": 312.502, |
|
"eval_steps_per_second": 10.024, |
|
"eval_wer": 0.09768101434768102, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"learning_rate": 1.2591371175914464e-05, |
|
"loss": 3.3971, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"eval_loss": 3.306504487991333, |
|
"eval_runtime": 2.3284, |
|
"eval_samples_per_second": 307.938, |
|
"eval_steps_per_second": 9.878, |
|
"eval_wer": 0.09843176509843177, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"learning_rate": 1.2271965256408201e-05, |
|
"loss": 3.4795, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"eval_loss": 3.3312034606933594, |
|
"eval_runtime": 2.3017, |
|
"eval_samples_per_second": 311.51, |
|
"eval_steps_per_second": 9.993, |
|
"eval_wer": 0.10368702035368701, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.1950095763909446e-05, |
|
"loss": 3.302, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 3.3014612197875977, |
|
"eval_runtime": 2.4125, |
|
"eval_samples_per_second": 297.198, |
|
"eval_steps_per_second": 9.534, |
|
"eval_wer": 0.0985985985985986, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"learning_rate": 1.1626111712993397e-05, |
|
"loss": 3.2486, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"eval_loss": 3.2506401538848877, |
|
"eval_runtime": 2.2778, |
|
"eval_samples_per_second": 314.775, |
|
"eval_steps_per_second": 10.097, |
|
"eval_wer": 0.09768101434768102, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"learning_rate": 1.1300364411126395e-05, |
|
"loss": 3.3977, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"eval_loss": 3.240551471710205, |
|
"eval_runtime": 2.4135, |
|
"eval_samples_per_second": 297.074, |
|
"eval_steps_per_second": 9.53, |
|
"eval_wer": 0.09517851184517852, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"learning_rate": 1.0973207077730688e-05, |
|
"loss": 3.0229, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"eval_loss": 3.287993907928467, |
|
"eval_runtime": 2.3151, |
|
"eval_samples_per_second": 309.7, |
|
"eval_steps_per_second": 9.935, |
|
"eval_wer": 0.09893226559893227, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"learning_rate": 1.0644994461176039e-05, |
|
"loss": 3.2615, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"eval_loss": 3.3112218379974365, |
|
"eval_runtime": 2.2897, |
|
"eval_samples_per_second": 313.137, |
|
"eval_steps_per_second": 10.045, |
|
"eval_wer": 0.09984984984984985, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"learning_rate": 1.0316082454113379e-05, |
|
"loss": 3.2023, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"eval_loss": 3.289529323577881, |
|
"eval_runtime": 2.3096, |
|
"eval_samples_per_second": 310.45, |
|
"eval_steps_per_second": 9.959, |
|
"eval_wer": 0.10368702035368701, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"learning_rate": 9.98682770756773e-06, |
|
"loss": 3.0037, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"eval_loss": 3.33939266204834, |
|
"eval_runtime": 2.3223, |
|
"eval_samples_per_second": 308.74, |
|
"eval_steps_per_second": 9.904, |
|
"eval_wer": 0.10176843510176843, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"learning_rate": 9.657587244208758e-06, |
|
"loss": 2.9249, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"eval_loss": 3.2351438999176025, |
|
"eval_runtime": 2.3103, |
|
"eval_samples_per_second": 310.353, |
|
"eval_steps_per_second": 9.956, |
|
"eval_wer": 0.09743076409743076, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"learning_rate": 9.328718071218366e-06, |
|
"loss": 3.112, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"eval_loss": 3.2868096828460693, |
|
"eval_runtime": 2.2583, |
|
"eval_samples_per_second": 317.501, |
|
"eval_steps_per_second": 10.185, |
|
"eval_wer": 0.10185185185185185, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"learning_rate": 9.000576793175061e-06, |
|
"loss": 3.0261, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"eval_loss": 3.324059009552002, |
|
"eval_runtime": 2.3538, |
|
"eval_samples_per_second": 304.612, |
|
"eval_steps_per_second": 9.771, |
|
"eval_wer": 0.10385385385385386, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"learning_rate": 8.673519225374882e-06, |
|
"loss": 2.8959, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"eval_loss": 3.225084066390991, |
|
"eval_runtime": 2.3969, |
|
"eval_samples_per_second": 299.14, |
|
"eval_steps_per_second": 9.596, |
|
"eval_wer": 0.09467801134467801, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"learning_rate": 8.347900008008194e-06, |
|
"loss": 2.946, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"eval_loss": 3.28798508644104, |
|
"eval_runtime": 2.4104, |
|
"eval_samples_per_second": 297.466, |
|
"eval_steps_per_second": 9.542, |
|
"eval_wer": 0.10118451785118451, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"learning_rate": 8.024072221610653e-06, |
|
"loss": 2.6933, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"eval_loss": 3.259455680847168, |
|
"eval_runtime": 2.3334, |
|
"eval_samples_per_second": 307.279, |
|
"eval_steps_per_second": 9.857, |
|
"eval_wer": 0.1031031031031031, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"learning_rate": 7.702387004205407e-06, |
|
"loss": 2.8755, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"eval_loss": 3.214036464691162, |
|
"eval_runtime": 2.2931, |
|
"eval_samples_per_second": 312.677, |
|
"eval_steps_per_second": 10.03, |
|
"eval_wer": 0.10477143810477144, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"learning_rate": 7.383193170551595e-06, |
|
"loss": 2.606, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"eval_loss": 3.2743003368377686, |
|
"eval_runtime": 2.33, |
|
"eval_samples_per_second": 307.725, |
|
"eval_steps_per_second": 9.871, |
|
"eval_wer": 0.1075241908575242, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"learning_rate": 7.066836833912053e-06, |
|
"loss": 2.7607, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"eval_loss": 3.2454757690429688, |
|
"eval_runtime": 2.3209, |
|
"eval_samples_per_second": 308.936, |
|
"eval_steps_per_second": 9.91, |
|
"eval_wer": 0.10527193860527194, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"learning_rate": 6.7536610307503735e-06, |
|
"loss": 2.6394, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"eval_loss": 3.2334530353546143, |
|
"eval_runtime": 2.3834, |
|
"eval_samples_per_second": 300.837, |
|
"eval_steps_per_second": 9.65, |
|
"eval_wer": 0.09943276609943276, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"learning_rate": 6.444005348764207e-06, |
|
"loss": 2.6899, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"eval_loss": 3.227813482284546, |
|
"eval_runtime": 2.2591, |
|
"eval_samples_per_second": 317.385, |
|
"eval_steps_per_second": 10.181, |
|
"eval_wer": 0.10043376710043377, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"learning_rate": 6.138205558658212e-06, |
|
"loss": 2.719, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_loss": 3.201153516769409, |
|
"eval_runtime": 2.3345, |
|
"eval_samples_per_second": 307.127, |
|
"eval_steps_per_second": 9.852, |
|
"eval_wer": 0.09793126459793126, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"learning_rate": 5.83659325005591e-06, |
|
"loss": 2.6997, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"eval_loss": 3.2009031772613525, |
|
"eval_runtime": 2.2428, |
|
"eval_samples_per_second": 319.684, |
|
"eval_steps_per_second": 10.255, |
|
"eval_wer": 0.09793126459793126, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"learning_rate": 5.53949547194521e-06, |
|
"loss": 2.5935, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"eval_loss": 3.2141449451446533, |
|
"eval_runtime": 2.306, |
|
"eval_samples_per_second": 310.924, |
|
"eval_steps_per_second": 9.974, |
|
"eval_wer": 0.09784784784784785, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"learning_rate": 5.247234378047524e-06, |
|
"loss": 2.6115, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"eval_loss": 3.1760427951812744, |
|
"eval_runtime": 2.254, |
|
"eval_samples_per_second": 318.096, |
|
"eval_steps_per_second": 10.204, |
|
"eval_wer": 0.09467801134467801, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"learning_rate": 4.960126877495005e-06, |
|
"loss": 2.5713, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"eval_loss": 3.1937472820281982, |
|
"eval_runtime": 2.3527, |
|
"eval_samples_per_second": 304.754, |
|
"eval_steps_per_second": 9.776, |
|
"eval_wer": 0.09768101434768102, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"learning_rate": 4.67848429119466e-06, |
|
"loss": 2.6647, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"eval_loss": 3.162875175476074, |
|
"eval_runtime": 2.3463, |
|
"eval_samples_per_second": 305.584, |
|
"eval_steps_per_second": 9.803, |
|
"eval_wer": 0.0985985985985986, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"learning_rate": 4.402612014251967e-06, |
|
"loss": 2.4878, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"eval_loss": 3.1674914360046387, |
|
"eval_runtime": 2.32, |
|
"eval_samples_per_second": 309.054, |
|
"eval_steps_per_second": 9.914, |
|
"eval_wer": 0.09517851184517852, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"learning_rate": 4.132809184820095e-06, |
|
"loss": 2.4761, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"eval_loss": 3.1950597763061523, |
|
"eval_runtime": 2.2542, |
|
"eval_samples_per_second": 318.073, |
|
"eval_steps_per_second": 10.203, |
|
"eval_wer": 0.09759759759759759, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"learning_rate": 3.869368359733711e-06, |
|
"loss": 2.3124, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"eval_loss": 3.162883758544922, |
|
"eval_runtime": 2.3024, |
|
"eval_samples_per_second": 311.42, |
|
"eval_steps_per_second": 9.99, |
|
"eval_wer": 0.09542876209542876, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"learning_rate": 3.6125751972791635e-06, |
|
"loss": 2.5718, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"eval_loss": 3.1576590538024902, |
|
"eval_runtime": 2.2697, |
|
"eval_samples_per_second": 315.908, |
|
"eval_steps_per_second": 10.134, |
|
"eval_wer": 0.09784784784784785, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"learning_rate": 3.3627081474450273e-06, |
|
"loss": 2.4606, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"eval_loss": 3.1632273197174072, |
|
"eval_runtime": 2.4587, |
|
"eval_samples_per_second": 291.613, |
|
"eval_steps_per_second": 9.354, |
|
"eval_wer": 0.09734734734734735, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"learning_rate": 3.120038149988832e-06, |
|
"loss": 2.5313, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"eval_loss": 3.1841392517089844, |
|
"eval_runtime": 2.3574, |
|
"eval_samples_per_second": 304.154, |
|
"eval_steps_per_second": 9.757, |
|
"eval_wer": 0.09884884884884886, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"learning_rate": 2.889457763733774e-06, |
|
"loss": 2.5124, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"eval_loss": 3.1893506050109863, |
|
"eval_runtime": 2.2547, |
|
"eval_samples_per_second": 318.009, |
|
"eval_steps_per_second": 10.201, |
|
"eval_wer": 0.09868201534868201, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"learning_rate": 2.6618064384144925e-06, |
|
"loss": 2.3324, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"eval_loss": 3.1718995571136475, |
|
"eval_runtime": 2.3078, |
|
"eval_samples_per_second": 310.686, |
|
"eval_steps_per_second": 9.966, |
|
"eval_wer": 0.0965965965965966, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"learning_rate": 2.44211217820481e-06, |
|
"loss": 2.4468, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"eval_loss": 3.175983190536499, |
|
"eval_runtime": 2.483, |
|
"eval_samples_per_second": 288.768, |
|
"eval_steps_per_second": 9.263, |
|
"eval_wer": 0.09642976309642977, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"learning_rate": 2.2306132054298847e-06, |
|
"loss": 2.4035, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"eval_loss": 3.201446294784546, |
|
"eval_runtime": 2.2646, |
|
"eval_samples_per_second": 316.615, |
|
"eval_steps_per_second": 10.156, |
|
"eval_wer": 0.09826493159826494, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"learning_rate": 2.027538855972291e-06, |
|
"loss": 2.3834, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"eval_loss": 3.1823039054870605, |
|
"eval_runtime": 2.3457, |
|
"eval_samples_per_second": 305.661, |
|
"eval_steps_per_second": 9.805, |
|
"eval_wer": 0.0965965965965966, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"learning_rate": 1.8331093305949532e-06, |
|
"loss": 2.3655, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"eval_loss": 3.1758322715759277, |
|
"eval_runtime": 2.3111, |
|
"eval_samples_per_second": 310.239, |
|
"eval_steps_per_second": 9.952, |
|
"eval_wer": 0.09476142809476143, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"learning_rate": 1.647535456169591e-06, |
|
"loss": 2.3525, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"eval_loss": 3.192059278488159, |
|
"eval_runtime": 2.397, |
|
"eval_samples_per_second": 299.127, |
|
"eval_steps_per_second": 9.595, |
|
"eval_wer": 0.09801468134801468, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"learning_rate": 1.4710184570696184e-06, |
|
"loss": 2.4428, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"eval_loss": 3.1989715099334717, |
|
"eval_runtime": 2.305, |
|
"eval_samples_per_second": 311.067, |
|
"eval_steps_per_second": 9.978, |
|
"eval_wer": 0.09701368034701369, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"learning_rate": 1.3037497369753871e-06, |
|
"loss": 2.3276, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"eval_loss": 3.1906816959381104, |
|
"eval_runtime": 2.3224, |
|
"eval_samples_per_second": 308.734, |
|
"eval_steps_per_second": 9.904, |
|
"eval_wer": 0.09843176509843177, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"learning_rate": 1.1459106713283286e-06, |
|
"loss": 2.4423, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"eval_loss": 3.189251661300659, |
|
"eval_runtime": 2.2565, |
|
"eval_samples_per_second": 317.748, |
|
"eval_steps_per_second": 10.193, |
|
"eval_wer": 0.09768101434768102, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"learning_rate": 9.976724106591128e-07, |
|
"loss": 2.3457, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"eval_loss": 3.2001450061798096, |
|
"eval_runtime": 2.4103, |
|
"eval_samples_per_second": 297.477, |
|
"eval_steps_per_second": 9.542, |
|
"eval_wer": 0.10051718385051718, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"learning_rate": 8.591956950030067e-07, |
|
"loss": 2.4146, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"eval_loss": 3.1883089542388916, |
|
"eval_runtime": 2.2661, |
|
"eval_samples_per_second": 316.404, |
|
"eval_steps_per_second": 10.15, |
|
"eval_wer": 0.09851518184851518, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"learning_rate": 7.306306796037188e-07, |
|
"loss": 2.3415, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"eval_loss": 3.193378210067749, |
|
"eval_runtime": 2.3137, |
|
"eval_samples_per_second": 309.9, |
|
"eval_steps_per_second": 9.941, |
|
"eval_wer": 0.09968301634968302, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"learning_rate": 6.121167720947174e-07, |
|
"loss": 2.2057, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"eval_loss": 3.193852663040161, |
|
"eval_runtime": 2.2411, |
|
"eval_samples_per_second": 319.931, |
|
"eval_steps_per_second": 10.263, |
|
"eval_wer": 0.09951618284951619, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"learning_rate": 5.037824813345571e-07, |
|
"loss": 2.5141, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"eval_loss": 3.1944401264190674, |
|
"eval_runtime": 2.2989, |
|
"eval_samples_per_second": 311.892, |
|
"eval_steps_per_second": 10.005, |
|
"eval_wer": 0.1006006006006006, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"learning_rate": 4.057452780601334e-07, |
|
"loss": 2.175, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"eval_loss": 3.1808109283447266, |
|
"eval_runtime": 2.257, |
|
"eval_samples_per_second": 317.681, |
|
"eval_steps_per_second": 10.191, |
|
"eval_wer": 0.0985985985985986, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"learning_rate": 3.1811146750898025e-07, |
|
"loss": 2.4668, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"eval_loss": 3.1884679794311523, |
|
"eval_runtime": 2.3016, |
|
"eval_samples_per_second": 311.519, |
|
"eval_steps_per_second": 9.993, |
|
"eval_wer": 0.09943276609943276, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"learning_rate": 2.4097607414869995e-07, |
|
"loss": 2.2732, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"eval_loss": 3.1876654624938965, |
|
"eval_runtime": 2.3135, |
|
"eval_samples_per_second": 309.92, |
|
"eval_steps_per_second": 9.942, |
|
"eval_wer": 0.09984984984984985, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"learning_rate": 1.7442273863854553e-07, |
|
"loss": 2.2636, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"eval_loss": 3.187673568725586, |
|
"eval_runtime": 2.2902, |
|
"eval_samples_per_second": 313.071, |
|
"eval_steps_per_second": 10.043, |
|
"eval_wer": 0.09893226559893227, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"learning_rate": 1.185236271348722e-07, |
|
"loss": 2.3504, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"eval_loss": 3.1904258728027344, |
|
"eval_runtime": 2.2424, |
|
"eval_samples_per_second": 319.748, |
|
"eval_steps_per_second": 10.257, |
|
"eval_wer": 0.10001668335001668, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"learning_rate": 7.33393530387927e-08, |
|
"loss": 2.2721, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"eval_loss": 3.1916568279266357, |
|
"eval_runtime": 2.3214, |
|
"eval_samples_per_second": 308.871, |
|
"eval_steps_per_second": 9.908, |
|
"eval_wer": 0.10051718385051718, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"learning_rate": 3.8918911270908745e-08, |
|
"loss": 2.4014, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"eval_loss": 3.1922268867492676, |
|
"eval_runtime": 2.3615, |
|
"eval_samples_per_second": 303.621, |
|
"eval_steps_per_second": 9.74, |
|
"eval_wer": 0.10026693360026694, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"learning_rate": 1.5299625144370444e-08, |
|
"loss": 2.3263, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"eval_loss": 3.189690351486206, |
|
"eval_runtime": 2.305, |
|
"eval_samples_per_second": 311.061, |
|
"eval_steps_per_second": 9.978, |
|
"eval_wer": 0.09984984984984985, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.6567448613734612e-09, |
|
"loss": 2.3731, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 3.1906578540802, |
|
"eval_runtime": 2.2424, |
|
"eval_samples_per_second": 319.749, |
|
"eval_steps_per_second": 10.257, |
|
"eval_wer": 0.1001001001001001, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 5300, |
|
"total_flos": 8.033701259431117e+16, |
|
"train_loss": 39.75085498593888, |
|
"train_runtime": 4049.5415, |
|
"train_samples_per_second": 83.565, |
|
"train_steps_per_second": 1.309 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5300, |
|
"num_train_epochs": 100, |
|
"save_steps": 50, |
|
"total_flos": 8.033701259431117e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|