{ "best_metric": 0.3294712007045746, "best_model_checkpoint": "/content/drive/MyDrive/Development/whisperfinetune/whisper-medium-20241208122604/20241208122604/checkpoint-100", "epoch": 100.0, "eval_steps": 1, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.1317138671875, "learning_rate": 1e-05, "loss": 0.0573, "step": 1 }, { "epoch": 1.0, "eval_loss": 2.347386121749878, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.1236, "eval_samples_per_second": 1.524, "eval_steps_per_second": 0.229, "eval_wer": 11.728395061728394, "step": 1 }, { "epoch": 2.0, "grad_norm": 0.5305148959159851, "learning_rate": 1e-05, "loss": 0.0595, "step": 2 }, { "epoch": 2.0, "eval_loss": 2.04854416847229, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.3393, "eval_samples_per_second": 1.499, "eval_steps_per_second": 0.225, "eval_wer": 8.641975308641975, "step": 2 }, { "epoch": 3.0, "grad_norm": 0.47034594416618347, "learning_rate": 1e-05, "loss": 0.0515, "step": 3 }, { "epoch": 3.0, "eval_loss": 1.7598810195922852, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.2129, "eval_samples_per_second": 1.514, "eval_steps_per_second": 0.227, "eval_wer": 5.555555555555555, "step": 3 }, { "epoch": 4.0, "grad_norm": 0.8446077108383179, "learning_rate": 1e-05, "loss": 0.044, "step": 4 }, { "epoch": 4.0, "eval_loss": 1.1538535356521606, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.3081, "eval_samples_per_second": 1.503, "eval_steps_per_second": 0.225, "eval_wer": 5.246913580246913, "step": 4 }, { "epoch": 5.0, "grad_norm": 0.1741047352552414, "learning_rate": 1e-05, "loss": 0.0282, "step": 5 }, { "epoch": 5.0, "eval_loss": 1.1221383810043335, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.1632, "eval_samples_per_second": 1.519, "eval_steps_per_second": 0.228, "eval_wer": 4.938271604938271, "step": 5 }, { "epoch": 6.0, "grad_norm": 0.16934086382389069, "learning_rate": 1e-05, "loss": 0.0276, "step": 6 }, { "epoch": 6.0, "eval_loss": 1.0995134115219116, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.2456, "eval_samples_per_second": 1.51, "eval_steps_per_second": 0.226, "eval_wer": 4.320987654320987, "step": 6 }, { "epoch": 7.0, "grad_norm": 0.166069895029068, "learning_rate": 1e-05, "loss": 0.0271, "step": 7 }, { "epoch": 7.0, "eval_loss": 1.0805295705795288, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0555, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 7 }, { "epoch": 8.0, "grad_norm": 0.1637350171804428, "learning_rate": 1e-05, "loss": 0.0266, "step": 8 }, { "epoch": 8.0, "eval_loss": 1.0605648756027222, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.2511, "eval_samples_per_second": 1.509, "eval_steps_per_second": 0.226, "eval_wer": 4.320987654320987, "step": 8 }, { "epoch": 9.0, "grad_norm": 0.16157877445220947, "learning_rate": 1e-05, "loss": 0.026, "step": 9 }, { "epoch": 9.0, "eval_loss": 1.0405693054199219, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.1332, "eval_samples_per_second": 1.523, "eval_steps_per_second": 0.228, "eval_wer": 4.62962962962963, "step": 9 }, { "epoch": 10.0, "grad_norm": 0.1597001850605011, "learning_rate": 1e-05, "loss": 0.0255, "step": 10 }, { "epoch": 10.0, "eval_loss": 1.021639347076416, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0982, "eval_samples_per_second": 1.527, "eval_steps_per_second": 0.229, "eval_wer": 4.62962962962963, "step": 10 }, { "epoch": 11.0, "grad_norm": 0.15809614956378937, "learning_rate": 1e-05, "loss": 0.025, "step": 11 }, { "epoch": 11.0, "eval_loss": 1.0027821063995361, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.2797, "eval_samples_per_second": 1.506, "eval_steps_per_second": 0.226, "eval_wer": 4.62962962962963, "step": 11 }, { "epoch": 12.0, "grad_norm": 0.15653067827224731, "learning_rate": 1e-05, "loss": 0.0244, "step": 12 }, { "epoch": 12.0, "eval_loss": 0.9836124777793884, "eval_model_preparation_time": 0.0121, "eval_runtime": 18.954, "eval_samples_per_second": 1.055, "eval_steps_per_second": 0.158, "eval_wer": 32.407407407407405, "step": 12 }, { "epoch": 13.0, "grad_norm": 0.15485768020153046, "learning_rate": 1e-05, "loss": 0.0239, "step": 13 }, { "epoch": 13.0, "eval_loss": 0.9660758376121521, "eval_model_preparation_time": 0.0121, "eval_runtime": 19.023, "eval_samples_per_second": 1.051, "eval_steps_per_second": 0.158, "eval_wer": 32.407407407407405, "step": 13 }, { "epoch": 14.0, "grad_norm": 0.1532512605190277, "learning_rate": 1e-05, "loss": 0.0234, "step": 14 }, { "epoch": 14.0, "eval_loss": 0.9473101496696472, "eval_model_preparation_time": 0.0121, "eval_runtime": 19.1218, "eval_samples_per_second": 1.046, "eval_steps_per_second": 0.157, "eval_wer": 32.407407407407405, "step": 14 }, { "epoch": 15.0, "grad_norm": 0.15129534900188446, "learning_rate": 1e-05, "loss": 0.0229, "step": 15 }, { "epoch": 15.0, "eval_loss": 0.9287378191947937, "eval_model_preparation_time": 0.0121, "eval_runtime": 18.9602, "eval_samples_per_second": 1.055, "eval_steps_per_second": 0.158, "eval_wer": 32.407407407407405, "step": 15 }, { "epoch": 16.0, "grad_norm": 0.1490040421485901, "learning_rate": 1e-05, "loss": 0.0224, "step": 16 }, { "epoch": 16.0, "eval_loss": 0.9106081128120422, "eval_model_preparation_time": 0.0121, "eval_runtime": 19.0132, "eval_samples_per_second": 1.052, "eval_steps_per_second": 0.158, "eval_wer": 30.555555555555557, "step": 16 }, { "epoch": 17.0, "grad_norm": 0.14625835418701172, "learning_rate": 1e-05, "loss": 0.0218, "step": 17 }, { "epoch": 17.0, "eval_loss": 0.8923694491386414, "eval_model_preparation_time": 0.0121, "eval_runtime": 19.1496, "eval_samples_per_second": 1.044, "eval_steps_per_second": 0.157, "eval_wer": 32.098765432098766, "step": 17 }, { "epoch": 18.0, "grad_norm": 0.1428905576467514, "learning_rate": 1e-05, "loss": 0.0213, "step": 18 }, { "epoch": 18.0, "eval_loss": 0.8747221827507019, "eval_model_preparation_time": 0.0121, "eval_runtime": 18.9828, "eval_samples_per_second": 1.054, "eval_steps_per_second": 0.158, "eval_wer": 30.555555555555557, "step": 18 }, { "epoch": 19.0, "grad_norm": 0.13874377310276031, "learning_rate": 1e-05, "loss": 0.0209, "step": 19 }, { "epoch": 19.0, "eval_loss": 0.8583929538726807, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0296, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 19 }, { "epoch": 20.0, "grad_norm": 0.13400524854660034, "learning_rate": 1e-05, "loss": 0.0204, "step": 20 }, { "epoch": 20.0, "eval_loss": 0.842006504535675, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0362, "eval_samples_per_second": 1.534, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 20 }, { "epoch": 21.0, "grad_norm": 0.128588005900383, "learning_rate": 1e-05, "loss": 0.02, "step": 21 }, { "epoch": 21.0, "eval_loss": 0.8266332745552063, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.1349, "eval_samples_per_second": 1.523, "eval_steps_per_second": 0.228, "eval_wer": 4.320987654320987, "step": 21 }, { "epoch": 22.0, "grad_norm": 0.12290169298648834, "learning_rate": 1e-05, "loss": 0.0196, "step": 22 }, { "epoch": 22.0, "eval_loss": 0.812411367893219, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0331, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 22 }, { "epoch": 23.0, "grad_norm": 0.11754843592643738, "learning_rate": 1e-05, "loss": 0.0193, "step": 23 }, { "epoch": 23.0, "eval_loss": 0.7992361187934875, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0451, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 23 }, { "epoch": 24.0, "grad_norm": 0.11315960437059402, "learning_rate": 1e-05, "loss": 0.019, "step": 24 }, { "epoch": 24.0, "eval_loss": 0.7874162197113037, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0108, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.320987654320987, "step": 24 }, { "epoch": 25.0, "grad_norm": 0.10973851382732391, "learning_rate": 1e-05, "loss": 0.0187, "step": 25 }, { "epoch": 25.0, "eval_loss": 0.7770275473594666, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0113, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.320987654320987, "step": 25 }, { "epoch": 26.0, "grad_norm": 0.10730645060539246, "learning_rate": 1e-05, "loss": 0.0185, "step": 26 }, { "epoch": 26.0, "eval_loss": 0.7670324444770813, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0264, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 26 }, { "epoch": 27.0, "grad_norm": 0.10558220744132996, "learning_rate": 1e-05, "loss": 0.0183, "step": 27 }, { "epoch": 27.0, "eval_loss": 0.7583534121513367, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0554, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 27 }, { "epoch": 28.0, "grad_norm": 0.10448750853538513, "learning_rate": 1e-05, "loss": 0.0181, "step": 28 }, { "epoch": 28.0, "eval_loss": 0.7502322793006897, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0142, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.320987654320987, "step": 28 }, { "epoch": 29.0, "grad_norm": 0.10369043797254562, "learning_rate": 1e-05, "loss": 0.0179, "step": 29 }, { "epoch": 29.0, "eval_loss": 0.7425273060798645, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.024, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 29 }, { "epoch": 30.0, "grad_norm": 0.10316835343837738, "learning_rate": 1e-05, "loss": 0.0177, "step": 30 }, { "epoch": 30.0, "eval_loss": 0.7353512644767761, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0504, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 30 }, { "epoch": 31.0, "grad_norm": 0.10271348059177399, "learning_rate": 1e-05, "loss": 0.0175, "step": 31 }, { "epoch": 31.0, "eval_loss": 0.7285407185554504, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0146, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 31 }, { "epoch": 32.0, "grad_norm": 0.10236351937055588, "learning_rate": 1e-05, "loss": 0.0174, "step": 32 }, { "epoch": 32.0, "eval_loss": 0.7218928337097168, "eval_model_preparation_time": 0.0121, "eval_runtime": 12.9915, "eval_samples_per_second": 1.539, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 32 }, { "epoch": 33.0, "grad_norm": 0.1020871251821518, "learning_rate": 1e-05, "loss": 0.0172, "step": 33 }, { "epoch": 33.0, "eval_loss": 0.7155935764312744, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.03, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 33 }, { "epoch": 34.0, "grad_norm": 0.10186725854873657, "learning_rate": 1e-05, "loss": 0.0171, "step": 34 }, { "epoch": 34.0, "eval_loss": 0.7092947959899902, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0366, "eval_samples_per_second": 1.534, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 34 }, { "epoch": 35.0, "grad_norm": 0.10165821760892868, "learning_rate": 1e-05, "loss": 0.0169, "step": 35 }, { "epoch": 35.0, "eval_loss": 0.703009307384491, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0457, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 35 }, { "epoch": 36.0, "grad_norm": 0.10144059360027313, "learning_rate": 1e-05, "loss": 0.0167, "step": 36 }, { "epoch": 36.0, "eval_loss": 0.6968026161193848, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0191, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 36 }, { "epoch": 37.0, "grad_norm": 0.1012805923819542, "learning_rate": 1e-05, "loss": 0.0166, "step": 37 }, { "epoch": 37.0, "eval_loss": 0.6906681060791016, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0242, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 37 }, { "epoch": 38.0, "grad_norm": 0.10113769769668579, "learning_rate": 1e-05, "loss": 0.0164, "step": 38 }, { "epoch": 38.0, "eval_loss": 0.6843755841255188, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0667, "eval_samples_per_second": 1.531, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 38 }, { "epoch": 39.0, "grad_norm": 0.10098178684711456, "learning_rate": 1e-05, "loss": 0.0162, "step": 39 }, { "epoch": 39.0, "eval_loss": 0.6783390045166016, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0465, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 39 }, { "epoch": 40.0, "grad_norm": 0.1008835956454277, "learning_rate": 1e-05, "loss": 0.0161, "step": 40 }, { "epoch": 40.0, "eval_loss": 0.6723348498344421, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0472, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 40 }, { "epoch": 41.0, "grad_norm": 0.10077005624771118, "learning_rate": 1e-05, "loss": 0.0159, "step": 41 }, { "epoch": 41.0, "eval_loss": 0.6662711501121521, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0732, "eval_samples_per_second": 1.53, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 41 }, { "epoch": 42.0, "grad_norm": 0.1006632074713707, "learning_rate": 1e-05, "loss": 0.0158, "step": 42 }, { "epoch": 42.0, "eval_loss": 0.6601687073707581, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.104, "eval_samples_per_second": 1.526, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 42 }, { "epoch": 43.0, "grad_norm": 0.10058455169200897, "learning_rate": 1e-05, "loss": 0.0156, "step": 43 }, { "epoch": 43.0, "eval_loss": 0.6541077494621277, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0457, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 43 }, { "epoch": 44.0, "grad_norm": 0.10048093646764755, "learning_rate": 1e-05, "loss": 0.0154, "step": 44 }, { "epoch": 44.0, "eval_loss": 0.6480630040168762, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0213, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 44 }, { "epoch": 45.0, "grad_norm": 0.10042457282543182, "learning_rate": 1e-05, "loss": 0.0153, "step": 45 }, { "epoch": 45.0, "eval_loss": 0.6420673131942749, "eval_model_preparation_time": 0.0121, "eval_runtime": 12.994, "eval_samples_per_second": 1.539, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 45 }, { "epoch": 46.0, "grad_norm": 0.1003439873456955, "learning_rate": 1e-05, "loss": 0.0151, "step": 46 }, { "epoch": 46.0, "eval_loss": 0.6359073519706726, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0936, "eval_samples_per_second": 1.527, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 46 }, { "epoch": 47.0, "grad_norm": 0.10026626288890839, "learning_rate": 1e-05, "loss": 0.0149, "step": 47 }, { "epoch": 47.0, "eval_loss": 0.6298264861106873, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0519, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 47 }, { "epoch": 48.0, "grad_norm": 0.10018378496170044, "learning_rate": 1e-05, "loss": 0.0148, "step": 48 }, { "epoch": 48.0, "eval_loss": 0.6237847208976746, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.1033, "eval_samples_per_second": 1.526, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 48 }, { "epoch": 49.0, "grad_norm": 0.10017245262861252, "learning_rate": 1e-05, "loss": 0.0146, "step": 49 }, { "epoch": 49.0, "eval_loss": 0.617721438407898, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0808, "eval_samples_per_second": 1.529, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 49 }, { "epoch": 50.0, "grad_norm": 0.10012275725603104, "learning_rate": 1e-05, "loss": 0.0144, "step": 50 }, { "epoch": 50.0, "eval_loss": 0.6115495562553406, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0424, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 50 }, { "epoch": 51.0, "grad_norm": 0.10007242113351822, "learning_rate": 1e-05, "loss": 0.0142, "step": 51 }, { "epoch": 51.0, "eval_loss": 0.6055248379707336, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.1155, "eval_samples_per_second": 1.525, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 51 }, { "epoch": 52.0, "grad_norm": 0.1000462993979454, "learning_rate": 1e-05, "loss": 0.0141, "step": 52 }, { "epoch": 52.0, "eval_loss": 0.5995514988899231, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0374, "eval_samples_per_second": 1.534, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 52 }, { "epoch": 53.0, "grad_norm": 0.09999983012676239, "learning_rate": 1e-05, "loss": 0.0139, "step": 53 }, { "epoch": 53.0, "eval_loss": 0.5934532284736633, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0708, "eval_samples_per_second": 1.53, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 53 }, { "epoch": 54.0, "grad_norm": 0.10001446306705475, "learning_rate": 1e-05, "loss": 0.0137, "step": 54 }, { "epoch": 54.0, "eval_loss": 0.5873024463653564, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0551, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 54 }, { "epoch": 55.0, "grad_norm": 0.09999886900186539, "learning_rate": 1e-05, "loss": 0.0136, "step": 55 }, { "epoch": 55.0, "eval_loss": 0.5811672210693359, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0575, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 55 }, { "epoch": 56.0, "grad_norm": 0.09996996074914932, "learning_rate": 1e-05, "loss": 0.0134, "step": 56 }, { "epoch": 56.0, "eval_loss": 0.5749332308769226, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0147, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 56 }, { "epoch": 57.0, "grad_norm": 0.09995821118354797, "learning_rate": 1e-05, "loss": 0.0132, "step": 57 }, { "epoch": 57.0, "eval_loss": 0.5688786506652832, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0009, "eval_samples_per_second": 1.538, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 57 }, { "epoch": 58.0, "grad_norm": 0.09996328502893448, "learning_rate": 1e-05, "loss": 0.013, "step": 58 }, { "epoch": 58.0, "eval_loss": 0.5627100467681885, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0166, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 58 }, { "epoch": 59.0, "grad_norm": 0.09995493292808533, "learning_rate": 1e-05, "loss": 0.0128, "step": 59 }, { "epoch": 59.0, "eval_loss": 0.5565866827964783, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0199, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 59 }, { "epoch": 60.0, "grad_norm": 0.09997319430112839, "learning_rate": 1e-05, "loss": 0.0127, "step": 60 }, { "epoch": 60.0, "eval_loss": 0.5505630373954773, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0285, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 60 }, { "epoch": 61.0, "grad_norm": 0.0999804213643074, "learning_rate": 1e-05, "loss": 0.0125, "step": 61 }, { "epoch": 61.0, "eval_loss": 0.5444908738136292, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0398, "eval_samples_per_second": 1.534, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 61 }, { "epoch": 62.0, "grad_norm": 0.10000340640544891, "learning_rate": 1e-05, "loss": 0.0123, "step": 62 }, { "epoch": 62.0, "eval_loss": 0.5383059978485107, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0216, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 62 }, { "epoch": 63.0, "grad_norm": 0.10002919286489487, "learning_rate": 1e-05, "loss": 0.0121, "step": 63 }, { "epoch": 63.0, "eval_loss": 0.5322217345237732, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.027, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 63 }, { "epoch": 64.0, "grad_norm": 0.10005642473697662, "learning_rate": 1e-05, "loss": 0.0119, "step": 64 }, { "epoch": 64.0, "eval_loss": 0.5260897278785706, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0762, "eval_samples_per_second": 1.529, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 64 }, { "epoch": 65.0, "grad_norm": 0.1000811904668808, "learning_rate": 1e-05, "loss": 0.0117, "step": 65 }, { "epoch": 65.0, "eval_loss": 0.5198546648025513, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0323, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 65 }, { "epoch": 66.0, "grad_norm": 0.10011852532625198, "learning_rate": 1e-05, "loss": 0.0115, "step": 66 }, { "epoch": 66.0, "eval_loss": 0.513763964176178, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0426, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 66 }, { "epoch": 67.0, "grad_norm": 0.10016665607690811, "learning_rate": 1e-05, "loss": 0.0114, "step": 67 }, { "epoch": 67.0, "eval_loss": 0.5077674388885498, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0559, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 67 }, { "epoch": 68.0, "grad_norm": 0.10022170841693878, "learning_rate": 1e-05, "loss": 0.0112, "step": 68 }, { "epoch": 68.0, "eval_loss": 0.5016047358512878, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0439, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 68 }, { "epoch": 69.0, "grad_norm": 0.10022887587547302, "learning_rate": 1e-05, "loss": 0.011, "step": 69 }, { "epoch": 69.0, "eval_loss": 0.49558281898498535, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0145, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 69 }, { "epoch": 70.0, "grad_norm": 0.10028593987226486, "learning_rate": 1e-05, "loss": 0.0108, "step": 70 }, { "epoch": 70.0, "eval_loss": 0.4895244538784027, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0373, "eval_samples_per_second": 1.534, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 70 }, { "epoch": 71.0, "grad_norm": 0.10034992545843124, "learning_rate": 1e-05, "loss": 0.0106, "step": 71 }, { "epoch": 71.0, "eval_loss": 0.4835710823535919, "eval_model_preparation_time": 0.0121, "eval_runtime": 12.9895, "eval_samples_per_second": 1.54, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 71 }, { "epoch": 72.0, "grad_norm": 0.10041330754756927, "learning_rate": 1e-05, "loss": 0.0104, "step": 72 }, { "epoch": 72.0, "eval_loss": 0.4775284230709076, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0996, "eval_samples_per_second": 1.527, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 72 }, { "epoch": 73.0, "grad_norm": 0.10046479851007462, "learning_rate": 1e-05, "loss": 0.0102, "step": 73 }, { "epoch": 73.0, "eval_loss": 0.47157832980155945, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0539, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 73 }, { "epoch": 74.0, "grad_norm": 0.10052894800901413, "learning_rate": 1e-05, "loss": 0.01, "step": 74 }, { "epoch": 74.0, "eval_loss": 0.4655916392803192, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0131, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 74 }, { "epoch": 75.0, "grad_norm": 0.10059099644422531, "learning_rate": 1e-05, "loss": 0.0098, "step": 75 }, { "epoch": 75.0, "eval_loss": 0.4597587287425995, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0097, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 75 }, { "epoch": 76.0, "grad_norm": 0.10066922754049301, "learning_rate": 1e-05, "loss": 0.0096, "step": 76 }, { "epoch": 76.0, "eval_loss": 0.4539148807525635, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.1072, "eval_samples_per_second": 1.526, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 76 }, { "epoch": 77.0, "grad_norm": 0.10073923319578171, "learning_rate": 1e-05, "loss": 0.0094, "step": 77 }, { "epoch": 77.0, "eval_loss": 0.44823265075683594, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0093, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 77 }, { "epoch": 78.0, "grad_norm": 0.1008358970284462, "learning_rate": 1e-05, "loss": 0.0091, "step": 78 }, { "epoch": 78.0, "eval_loss": 0.442667692899704, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0167, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 78 }, { "epoch": 79.0, "grad_norm": 0.10090507566928864, "learning_rate": 1e-05, "loss": 0.0089, "step": 79 }, { "epoch": 79.0, "eval_loss": 0.43717432022094727, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0261, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 79 }, { "epoch": 80.0, "grad_norm": 0.10097309201955795, "learning_rate": 1e-05, "loss": 0.0087, "step": 80 }, { "epoch": 80.0, "eval_loss": 0.43188872933387756, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0257, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 80 }, { "epoch": 81.0, "grad_norm": 0.10104646533727646, "learning_rate": 1e-05, "loss": 0.0085, "step": 81 }, { "epoch": 81.0, "eval_loss": 0.4267992079257965, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0831, "eval_samples_per_second": 1.529, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 81 }, { "epoch": 82.0, "grad_norm": 0.10112679749727249, "learning_rate": 1e-05, "loss": 0.0083, "step": 82 }, { "epoch": 82.0, "eval_loss": 0.4219636917114258, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0252, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 82 }, { "epoch": 83.0, "grad_norm": 0.10117711871862411, "learning_rate": 1e-05, "loss": 0.008, "step": 83 }, { "epoch": 83.0, "eval_loss": 0.4172472655773163, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0112, "eval_samples_per_second": 1.537, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 83 }, { "epoch": 84.0, "grad_norm": 0.1012597382068634, "learning_rate": 1e-05, "loss": 0.0078, "step": 84 }, { "epoch": 84.0, "eval_loss": 0.41286230087280273, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0229, "eval_samples_per_second": 1.536, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 84 }, { "epoch": 85.0, "grad_norm": 0.101369209587574, "learning_rate": 1e-05, "loss": 0.0076, "step": 85 }, { "epoch": 85.0, "eval_loss": 0.40860816836357117, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0992, "eval_samples_per_second": 1.527, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 85 }, { "epoch": 86.0, "grad_norm": 0.10140149295330048, "learning_rate": 1e-05, "loss": 0.0073, "step": 86 }, { "epoch": 86.0, "eval_loss": 0.4042428433895111, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.03, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.23, "eval_wer": 4.012345679012346, "step": 86 }, { "epoch": 87.0, "grad_norm": 0.10143295675516129, "learning_rate": 1e-05, "loss": 0.0071, "step": 87 }, { "epoch": 87.0, "eval_loss": 0.3998439311981201, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.005, "eval_samples_per_second": 1.538, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 87 }, { "epoch": 88.0, "grad_norm": 0.1014971062541008, "learning_rate": 1e-05, "loss": 0.0069, "step": 88 }, { "epoch": 88.0, "eval_loss": 0.39532041549682617, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0078, "eval_samples_per_second": 1.538, "eval_steps_per_second": 0.231, "eval_wer": 4.012345679012346, "step": 88 }, { "epoch": 89.0, "grad_norm": 0.10153479129076004, "learning_rate": 1e-05, "loss": 0.0066, "step": 89 }, { "epoch": 89.0, "eval_loss": 0.3908376693725586, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0794, "eval_samples_per_second": 1.529, "eval_steps_per_second": 0.229, "eval_wer": 4.012345679012346, "step": 89 }, { "epoch": 90.0, "grad_norm": 0.10166627168655396, "learning_rate": 1e-05, "loss": 0.0064, "step": 90 }, { "epoch": 90.0, "eval_loss": 0.3862927258014679, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0683, "eval_samples_per_second": 1.53, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 90 }, { "epoch": 91.0, "grad_norm": 0.10168776661157608, "learning_rate": 1e-05, "loss": 0.0062, "step": 91 }, { "epoch": 91.0, "eval_loss": 0.38115009665489197, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0067, "eval_samples_per_second": 1.538, "eval_steps_per_second": 0.231, "eval_wer": 4.320987654320987, "step": 91 }, { "epoch": 92.0, "grad_norm": 0.10173481702804565, "learning_rate": 1e-05, "loss": 0.0059, "step": 92 }, { "epoch": 92.0, "eval_loss": 0.37574759125709534, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.045, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 4.320987654320987, "step": 92 }, { "epoch": 93.0, "grad_norm": 0.1017962321639061, "learning_rate": 1e-05, "loss": 0.0057, "step": 93 }, { "epoch": 93.0, "eval_loss": 0.3700896203517914, "eval_model_preparation_time": 0.0121, "eval_runtime": 12.9624, "eval_samples_per_second": 1.543, "eval_steps_per_second": 0.231, "eval_wer": 4.320987654320987, "step": 93 }, { "epoch": 94.0, "grad_norm": 0.10191880166530609, "learning_rate": 1e-05, "loss": 0.0055, "step": 94 }, { "epoch": 94.0, "eval_loss": 0.3639739453792572, "eval_model_preparation_time": 0.0121, "eval_runtime": 12.9926, "eval_samples_per_second": 1.539, "eval_steps_per_second": 0.231, "eval_wer": 4.320987654320987, "step": 94 }, { "epoch": 95.0, "grad_norm": 0.10210127383470535, "learning_rate": 1e-05, "loss": 0.0053, "step": 95 }, { "epoch": 95.0, "eval_loss": 0.3575873374938965, "eval_model_preparation_time": 0.0121, "eval_runtime": 12.9753, "eval_samples_per_second": 1.541, "eval_steps_per_second": 0.231, "eval_wer": 5.246913580246913, "step": 95 }, { "epoch": 96.0, "grad_norm": 0.10229586809873581, "learning_rate": 1e-05, "loss": 0.005, "step": 96 }, { "epoch": 96.0, "eval_loss": 0.351242333650589, "eval_model_preparation_time": 0.0121, "eval_runtime": 12.9627, "eval_samples_per_second": 1.543, "eval_steps_per_second": 0.231, "eval_wer": 5.246913580246913, "step": 96 }, { "epoch": 97.0, "grad_norm": 0.10258200764656067, "learning_rate": 1e-05, "loss": 0.0048, "step": 97 }, { "epoch": 97.0, "eval_loss": 0.34528884291648865, "eval_model_preparation_time": 0.0121, "eval_runtime": 12.9959, "eval_samples_per_second": 1.539, "eval_steps_per_second": 0.231, "eval_wer": 5.246913580246913, "step": 97 }, { "epoch": 98.0, "grad_norm": 0.1028934046626091, "learning_rate": 1e-05, "loss": 0.0046, "step": 98 }, { "epoch": 98.0, "eval_loss": 0.3395444452762604, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0387, "eval_samples_per_second": 1.534, "eval_steps_per_second": 0.23, "eval_wer": 5.246913580246913, "step": 98 }, { "epoch": 99.0, "grad_norm": 0.10328880697488785, "learning_rate": 1e-05, "loss": 0.0044, "step": 99 }, { "epoch": 99.0, "eval_loss": 0.33414900302886963, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0468, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.23, "eval_wer": 5.864197530864197, "step": 99 }, { "epoch": 100.0, "grad_norm": 0.10364117473363876, "learning_rate": 1e-05, "loss": 0.0042, "step": 100 }, { "epoch": 100.0, "eval_loss": 0.3294712007045746, "eval_model_preparation_time": 0.0121, "eval_runtime": 13.0722, "eval_samples_per_second": 1.53, "eval_steps_per_second": 0.229, "eval_wer": 5.864197530864197, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 1000, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.12362944512e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }