{ "best_metric": 58.90729282066525, "best_model_checkpoint": "./checkpoint-3500", "epoch": 15.337423312883436, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.6e-07, "loss": 1.0735, "step": 25 }, { "epoch": 0.15, "learning_rate": 9.6e-07, "loss": 0.9014, "step": 50 }, { "epoch": 0.23, "learning_rate": 9.953535353535352e-07, "loss": 0.651, "step": 75 }, { "epoch": 0.31, "learning_rate": 9.903030303030303e-07, "loss": 0.4902, "step": 100 }, { "epoch": 0.38, "learning_rate": 9.852525252525253e-07, "loss": 0.4484, "step": 125 }, { "epoch": 0.46, "learning_rate": 9.802020202020202e-07, "loss": 0.4164, "step": 150 }, { "epoch": 0.54, "learning_rate": 9.751515151515151e-07, "loss": 0.4303, "step": 175 }, { "epoch": 0.61, "learning_rate": 9.7010101010101e-07, "loss": 0.4073, "step": 200 }, { "epoch": 0.69, "learning_rate": 9.65050505050505e-07, "loss": 0.3977, "step": 225 }, { "epoch": 0.77, "learning_rate": 9.6e-07, "loss": 0.3906, "step": 250 }, { "epoch": 0.84, "learning_rate": 9.549494949494948e-07, "loss": 0.3687, "step": 275 }, { "epoch": 0.92, "learning_rate": 9.498989898989899e-07, "loss": 0.3817, "step": 300 }, { "epoch": 1.0, "learning_rate": 9.448484848484848e-07, "loss": 0.3733, "step": 325 }, { "epoch": 1.07, "learning_rate": 9.397979797979797e-07, "loss": 0.3608, "step": 350 }, { "epoch": 1.15, "learning_rate": 9.347474747474747e-07, "loss": 0.3519, "step": 375 }, { "epoch": 1.23, "learning_rate": 9.296969696969696e-07, "loss": 0.3286, "step": 400 }, { "epoch": 1.3, "learning_rate": 9.246464646464645e-07, "loss": 0.3468, "step": 425 }, { "epoch": 1.38, "learning_rate": 9.195959595959596e-07, "loss": 0.3386, "step": 450 }, { "epoch": 1.46, "learning_rate": 9.145454545454545e-07, "loss": 0.3356, "step": 475 }, { "epoch": 1.53, "learning_rate": 9.094949494949494e-07, "loss": 0.3404, "step": 500 }, { "epoch": 1.53, "eval_loss": 0.46061962842941284, "eval_runtime": 2718.8588, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.121, "eval_wer": 66.62163257585992, "step": 500 }, { "epoch": 1.61, "learning_rate": 9.044444444444445e-07, "loss": 0.3266, "step": 525 }, { "epoch": 1.69, "learning_rate": 8.993939393939394e-07, "loss": 0.3369, "step": 550 }, { "epoch": 1.76, "learning_rate": 8.943434343434342e-07, "loss": 0.3154, "step": 575 }, { "epoch": 1.84, "learning_rate": 8.892929292929292e-07, "loss": 0.3285, "step": 600 }, { "epoch": 1.92, "learning_rate": 8.842424242424242e-07, "loss": 0.3154, "step": 625 }, { "epoch": 1.99, "learning_rate": 8.791919191919191e-07, "loss": 0.3243, "step": 650 }, { "epoch": 2.07, "learning_rate": 8.741414141414141e-07, "loss": 0.3045, "step": 675 }, { "epoch": 2.15, "learning_rate": 8.690909090909091e-07, "loss": 0.2976, "step": 700 }, { "epoch": 2.22, "learning_rate": 8.64040404040404e-07, "loss": 0.2947, "step": 725 }, { "epoch": 2.3, "learning_rate": 8.58989898989899e-07, "loss": 0.2951, "step": 750 }, { "epoch": 2.38, "learning_rate": 8.539393939393939e-07, "loss": 0.2865, "step": 775 }, { "epoch": 2.45, "learning_rate": 8.488888888888888e-07, "loss": 0.3027, "step": 800 }, { "epoch": 2.53, "learning_rate": 8.438383838383838e-07, "loss": 0.2963, "step": 825 }, { "epoch": 2.61, "learning_rate": 8.387878787878787e-07, "loss": 0.2903, "step": 850 }, { "epoch": 2.68, "learning_rate": 8.337373737373737e-07, "loss": 0.2887, "step": 875 }, { "epoch": 2.76, "learning_rate": 8.286868686868687e-07, "loss": 0.2903, "step": 900 }, { "epoch": 2.84, "learning_rate": 8.236363636363636e-07, "loss": 0.2836, "step": 925 }, { "epoch": 2.91, "learning_rate": 8.185858585858586e-07, "loss": 0.2832, "step": 950 }, { "epoch": 2.99, "learning_rate": 8.135353535353536e-07, "loss": 0.2984, "step": 975 }, { "epoch": 3.07, "learning_rate": 8.084848484848484e-07, "loss": 0.2707, "step": 1000 }, { "epoch": 3.07, "eval_loss": 0.42951369285583496, "eval_runtime": 2739.5863, "eval_samples_per_second": 3.82, "eval_steps_per_second": 0.12, "eval_wer": 66.84995541625011, "step": 1000 }, { "epoch": 3.14, "learning_rate": 8.034343434343433e-07, "loss": 0.2778, "step": 1025 }, { "epoch": 3.22, "learning_rate": 7.983838383838384e-07, "loss": 0.2742, "step": 1050 }, { "epoch": 3.3, "learning_rate": 7.933333333333333e-07, "loss": 0.2626, "step": 1075 }, { "epoch": 3.37, "learning_rate": 7.882828282828282e-07, "loss": 0.2658, "step": 1100 }, { "epoch": 3.45, "learning_rate": 7.832323232323233e-07, "loss": 0.2654, "step": 1125 }, { "epoch": 3.53, "learning_rate": 7.781818181818182e-07, "loss": 0.2634, "step": 1150 }, { "epoch": 3.6, "learning_rate": 7.731313131313131e-07, "loss": 0.2609, "step": 1175 }, { "epoch": 3.68, "learning_rate": 7.68080808080808e-07, "loss": 0.262, "step": 1200 }, { "epoch": 3.76, "learning_rate": 7.63030303030303e-07, "loss": 0.2701, "step": 1225 }, { "epoch": 3.83, "learning_rate": 7.579797979797979e-07, "loss": 0.2646, "step": 1250 }, { "epoch": 3.91, "learning_rate": 7.529292929292928e-07, "loss": 0.2692, "step": 1275 }, { "epoch": 3.99, "learning_rate": 7.478787878787879e-07, "loss": 0.2588, "step": 1300 }, { "epoch": 4.06, "learning_rate": 7.428282828282828e-07, "loss": 0.2549, "step": 1325 }, { "epoch": 4.14, "learning_rate": 7.377777777777777e-07, "loss": 0.2376, "step": 1350 }, { "epoch": 4.22, "learning_rate": 7.327272727272728e-07, "loss": 0.2542, "step": 1375 }, { "epoch": 4.29, "learning_rate": 7.276767676767677e-07, "loss": 0.2634, "step": 1400 }, { "epoch": 4.37, "learning_rate": 7.226262626262625e-07, "loss": 0.2453, "step": 1425 }, { "epoch": 4.45, "learning_rate": 7.175757575757575e-07, "loss": 0.2393, "step": 1450 }, { "epoch": 4.52, "learning_rate": 7.125252525252525e-07, "loss": 0.2466, "step": 1475 }, { "epoch": 4.6, "learning_rate": 7.074747474747474e-07, "loss": 0.2427, "step": 1500 }, { "epoch": 4.6, "eval_loss": 0.4123845398426056, "eval_runtime": 2708.1735, "eval_samples_per_second": 3.865, "eval_steps_per_second": 0.121, "eval_wer": 61.166202815531356, "step": 1500 }, { "epoch": 4.68, "learning_rate": 7.024242424242424e-07, "loss": 0.247, "step": 1525 }, { "epoch": 4.75, "learning_rate": 6.973737373737374e-07, "loss": 0.2531, "step": 1550 }, { "epoch": 4.83, "learning_rate": 6.923232323232323e-07, "loss": 0.2496, "step": 1575 }, { "epoch": 4.91, "learning_rate": 6.872727272727273e-07, "loss": 0.2366, "step": 1600 }, { "epoch": 4.98, "learning_rate": 6.822222222222221e-07, "loss": 0.2287, "step": 1625 }, { "epoch": 5.06, "learning_rate": 6.771717171717171e-07, "loss": 0.2352, "step": 1650 }, { "epoch": 5.14, "learning_rate": 6.721212121212121e-07, "loss": 0.2242, "step": 1675 }, { "epoch": 5.21, "learning_rate": 6.67070707070707e-07, "loss": 0.2288, "step": 1700 }, { "epoch": 5.29, "learning_rate": 6.62020202020202e-07, "loss": 0.2374, "step": 1725 }, { "epoch": 5.37, "learning_rate": 6.56969696969697e-07, "loss": 0.2312, "step": 1750 }, { "epoch": 5.44, "learning_rate": 6.519191919191919e-07, "loss": 0.2279, "step": 1775 }, { "epoch": 5.52, "learning_rate": 6.468686868686868e-07, "loss": 0.2263, "step": 1800 }, { "epoch": 5.6, "learning_rate": 6.418181818181818e-07, "loss": 0.2305, "step": 1825 }, { "epoch": 5.67, "learning_rate": 6.367676767676767e-07, "loss": 0.2251, "step": 1850 }, { "epoch": 5.75, "learning_rate": 6.317171717171716e-07, "loss": 0.2285, "step": 1875 }, { "epoch": 5.83, "learning_rate": 6.266666666666667e-07, "loss": 0.2308, "step": 1900 }, { "epoch": 5.9, "learning_rate": 6.216161616161616e-07, "loss": 0.2343, "step": 1925 }, { "epoch": 5.98, "learning_rate": 6.165656565656565e-07, "loss": 0.2335, "step": 1950 }, { "epoch": 6.06, "learning_rate": 6.115151515151516e-07, "loss": 0.2246, "step": 1975 }, { "epoch": 6.13, "learning_rate": 6.064646464646465e-07, "loss": 0.2131, "step": 2000 }, { "epoch": 6.13, "eval_loss": 0.4056357443332672, "eval_runtime": 2709.0159, "eval_samples_per_second": 3.863, "eval_steps_per_second": 0.121, "eval_wer": 62.303763949309634, "step": 2000 }, { "epoch": 6.21, "learning_rate": 6.014141414141413e-07, "loss": 0.2113, "step": 2025 }, { "epoch": 6.29, "learning_rate": 5.963636363636363e-07, "loss": 0.2207, "step": 2050 }, { "epoch": 6.37, "learning_rate": 5.913131313131313e-07, "loss": 0.2171, "step": 2075 }, { "epoch": 6.44, "learning_rate": 5.862626262626262e-07, "loss": 0.2112, "step": 2100 }, { "epoch": 6.52, "learning_rate": 5.812121212121212e-07, "loss": 0.2098, "step": 2125 }, { "epoch": 6.6, "learning_rate": 5.761616161616162e-07, "loss": 0.2094, "step": 2150 }, { "epoch": 6.67, "learning_rate": 5.711111111111111e-07, "loss": 0.2149, "step": 2175 }, { "epoch": 6.75, "learning_rate": 5.660606060606061e-07, "loss": 0.2162, "step": 2200 }, { "epoch": 6.83, "learning_rate": 5.610101010101009e-07, "loss": 0.2216, "step": 2225 }, { "epoch": 6.9, "learning_rate": 5.559595959595959e-07, "loss": 0.2169, "step": 2250 }, { "epoch": 6.98, "learning_rate": 5.509090909090909e-07, "loss": 0.2209, "step": 2275 }, { "epoch": 7.06, "learning_rate": 5.458585858585858e-07, "loss": 0.2108, "step": 2300 }, { "epoch": 7.13, "learning_rate": 5.408080808080808e-07, "loss": 0.198, "step": 2325 }, { "epoch": 7.21, "learning_rate": 5.357575757575758e-07, "loss": 0.2075, "step": 2350 }, { "epoch": 7.29, "learning_rate": 5.307070707070707e-07, "loss": 0.2067, "step": 2375 }, { "epoch": 7.36, "learning_rate": 5.256565656565657e-07, "loss": 0.2193, "step": 2400 }, { "epoch": 7.44, "learning_rate": 5.206060606060607e-07, "loss": 0.1996, "step": 2425 }, { "epoch": 7.52, "learning_rate": 5.155555555555555e-07, "loss": 0.2118, "step": 2450 }, { "epoch": 7.59, "learning_rate": 5.105050505050504e-07, "loss": 0.1938, "step": 2475 }, { "epoch": 7.67, "learning_rate": 5.054545454545454e-07, "loss": 0.2085, "step": 2500 }, { "epoch": 7.67, "eval_loss": 0.401197612285614, "eval_runtime": 2721.3063, "eval_samples_per_second": 3.846, "eval_steps_per_second": 0.121, "eval_wer": 62.27539247210137, "step": 2500 }, { "epoch": 7.75, "learning_rate": 5.004040404040404e-07, "loss": 0.1983, "step": 2525 }, { "epoch": 7.82, "learning_rate": 4.953535353535353e-07, "loss": 0.2098, "step": 2550 }, { "epoch": 7.9, "learning_rate": 4.903030303030302e-07, "loss": 0.2036, "step": 2575 }, { "epoch": 7.98, "learning_rate": 4.852525252525253e-07, "loss": 0.2024, "step": 2600 }, { "epoch": 8.05, "learning_rate": 4.802020202020202e-07, "loss": 0.2074, "step": 2625 }, { "epoch": 8.13, "learning_rate": 4.7515151515151514e-07, "loss": 0.1989, "step": 2650 }, { "epoch": 8.21, "learning_rate": 4.701010101010101e-07, "loss": 0.1904, "step": 2675 }, { "epoch": 8.28, "learning_rate": 4.65050505050505e-07, "loss": 0.1921, "step": 2700 }, { "epoch": 8.36, "learning_rate": 4.6e-07, "loss": 0.1993, "step": 2725 }, { "epoch": 8.44, "learning_rate": 4.549494949494949e-07, "loss": 0.1966, "step": 2750 }, { "epoch": 8.51, "learning_rate": 4.498989898989899e-07, "loss": 0.1912, "step": 2775 }, { "epoch": 8.59, "learning_rate": 4.448484848484848e-07, "loss": 0.2001, "step": 2800 }, { "epoch": 8.67, "learning_rate": 4.3979797979797977e-07, "loss": 0.2025, "step": 2825 }, { "epoch": 8.74, "learning_rate": 4.3474747474747475e-07, "loss": 0.192, "step": 2850 }, { "epoch": 8.82, "learning_rate": 4.296969696969697e-07, "loss": 0.1979, "step": 2875 }, { "epoch": 8.9, "learning_rate": 4.246464646464646e-07, "loss": 0.1928, "step": 2900 }, { "epoch": 8.97, "learning_rate": 4.1959595959595955e-07, "loss": 0.1845, "step": 2925 }, { "epoch": 9.05, "learning_rate": 4.1454545454545453e-07, "loss": 0.1846, "step": 2950 }, { "epoch": 9.13, "learning_rate": 4.0949494949494946e-07, "loss": 0.1926, "step": 2975 }, { "epoch": 9.2, "learning_rate": 4.044444444444444e-07, "loss": 0.1904, "step": 3000 }, { "epoch": 9.2, "eval_loss": 0.3976161777973175, "eval_runtime": 2684.5933, "eval_samples_per_second": 3.899, "eval_steps_per_second": 0.122, "eval_wer": 59.734118727877004, "step": 3000 }, { "epoch": 9.28, "learning_rate": 3.993939393939394e-07, "loss": 0.1972, "step": 3025 }, { "epoch": 9.36, "learning_rate": 3.943434343434343e-07, "loss": 0.1881, "step": 3050 }, { "epoch": 9.43, "learning_rate": 3.892929292929293e-07, "loss": 0.1935, "step": 3075 }, { "epoch": 9.51, "learning_rate": 3.8424242424242423e-07, "loss": 0.1911, "step": 3100 }, { "epoch": 9.59, "learning_rate": 3.7919191919191916e-07, "loss": 0.1852, "step": 3125 }, { "epoch": 9.66, "learning_rate": 3.7414141414141414e-07, "loss": 0.1939, "step": 3150 }, { "epoch": 9.74, "learning_rate": 3.690909090909091e-07, "loss": 0.1834, "step": 3175 }, { "epoch": 9.82, "learning_rate": 3.6404040404040406e-07, "loss": 0.1794, "step": 3200 }, { "epoch": 9.89, "learning_rate": 3.5898989898989894e-07, "loss": 0.1878, "step": 3225 }, { "epoch": 9.97, "learning_rate": 3.539393939393939e-07, "loss": 0.1884, "step": 3250 }, { "epoch": 10.05, "learning_rate": 3.488888888888889e-07, "loss": 0.1844, "step": 3275 }, { "epoch": 10.12, "learning_rate": 3.4383838383838384e-07, "loss": 0.1744, "step": 3300 }, { "epoch": 10.2, "learning_rate": 3.3878787878787877e-07, "loss": 0.1848, "step": 3325 }, { "epoch": 10.28, "learning_rate": 3.337373737373737e-07, "loss": 0.1792, "step": 3350 }, { "epoch": 10.35, "learning_rate": 3.286868686868687e-07, "loss": 0.1757, "step": 3375 }, { "epoch": 10.43, "learning_rate": 3.236363636363636e-07, "loss": 0.1815, "step": 3400 }, { "epoch": 10.51, "learning_rate": 3.1858585858585855e-07, "loss": 0.1758, "step": 3425 }, { "epoch": 10.58, "learning_rate": 3.1353535353535353e-07, "loss": 0.1895, "step": 3450 }, { "epoch": 10.66, "learning_rate": 3.0848484848484847e-07, "loss": 0.177, "step": 3475 }, { "epoch": 10.74, "learning_rate": 3.0343434343434345e-07, "loss": 0.1836, "step": 3500 }, { "epoch": 10.74, "eval_loss": 0.40053287148475647, "eval_runtime": 2681.3744, "eval_samples_per_second": 3.903, "eval_steps_per_second": 0.122, "eval_wer": 58.90729282066525, "step": 3500 }, { "epoch": 10.81, "learning_rate": 2.9838383838383833e-07, "loss": 0.1861, "step": 3525 }, { "epoch": 10.89, "learning_rate": 2.933333333333333e-07, "loss": 0.1817, "step": 3550 }, { "epoch": 10.97, "learning_rate": 2.882828282828283e-07, "loss": 0.1854, "step": 3575 }, { "epoch": 11.04, "learning_rate": 2.8323232323232323e-07, "loss": 0.1796, "step": 3600 }, { "epoch": 11.12, "learning_rate": 2.7818181818181816e-07, "loss": 0.1719, "step": 3625 }, { "epoch": 11.2, "learning_rate": 2.731313131313131e-07, "loss": 0.1718, "step": 3650 }, { "epoch": 11.27, "learning_rate": 2.680808080808081e-07, "loss": 0.1774, "step": 3675 }, { "epoch": 11.35, "learning_rate": 2.63030303030303e-07, "loss": 0.1795, "step": 3700 }, { "epoch": 11.43, "learning_rate": 2.5797979797979794e-07, "loss": 0.1784, "step": 3725 }, { "epoch": 11.5, "learning_rate": 2.529292929292929e-07, "loss": 0.1689, "step": 3750 }, { "epoch": 11.58, "learning_rate": 2.4787878787878786e-07, "loss": 0.1792, "step": 3775 }, { "epoch": 11.66, "learning_rate": 2.4282828282828284e-07, "loss": 0.1749, "step": 3800 }, { "epoch": 11.73, "learning_rate": 2.3777777777777777e-07, "loss": 0.1787, "step": 3825 }, { "epoch": 11.81, "learning_rate": 2.3272727272727273e-07, "loss": 0.1816, "step": 3850 }, { "epoch": 11.89, "learning_rate": 2.2767676767676766e-07, "loss": 0.171, "step": 3875 }, { "epoch": 11.96, "learning_rate": 2.2262626262626262e-07, "loss": 0.1794, "step": 3900 }, { "epoch": 12.04, "learning_rate": 2.1757575757575758e-07, "loss": 0.1651, "step": 3925 }, { "epoch": 12.12, "learning_rate": 2.1252525252525254e-07, "loss": 0.1689, "step": 3950 }, { "epoch": 12.19, "learning_rate": 2.0747474747474747e-07, "loss": 0.1747, "step": 3975 }, { "epoch": 12.27, "learning_rate": 2.0242424242424242e-07, "loss": 0.1653, "step": 4000 }, { "epoch": 12.27, "eval_loss": 0.3988991677761078, "eval_runtime": 2674.7246, "eval_samples_per_second": 3.913, "eval_steps_per_second": 0.123, "eval_wer": 59.77735145505147, "step": 4000 }, { "epoch": 12.35, "learning_rate": 1.9737373737373736e-07, "loss": 0.1748, "step": 4025 }, { "epoch": 12.42, "learning_rate": 1.9232323232323231e-07, "loss": 0.1722, "step": 4050 }, { "epoch": 12.5, "learning_rate": 1.8727272727272727e-07, "loss": 0.1678, "step": 4075 }, { "epoch": 12.58, "learning_rate": 1.8222222222222223e-07, "loss": 0.1784, "step": 4100 }, { "epoch": 12.65, "learning_rate": 1.7717171717171716e-07, "loss": 0.1795, "step": 4125 }, { "epoch": 12.73, "learning_rate": 1.7212121212121212e-07, "loss": 0.1752, "step": 4150 }, { "epoch": 12.81, "learning_rate": 1.6707070707070705e-07, "loss": 0.1725, "step": 4175 }, { "epoch": 12.88, "learning_rate": 1.62020202020202e-07, "loss": 0.1792, "step": 4200 }, { "epoch": 12.96, "learning_rate": 1.5696969696969697e-07, "loss": 0.169, "step": 4225 }, { "epoch": 13.04, "learning_rate": 1.5191919191919193e-07, "loss": 0.1643, "step": 4250 }, { "epoch": 13.11, "learning_rate": 1.4686868686868686e-07, "loss": 0.1653, "step": 4275 }, { "epoch": 13.19, "learning_rate": 1.4181818181818182e-07, "loss": 0.1743, "step": 4300 }, { "epoch": 13.27, "learning_rate": 1.3676767676767675e-07, "loss": 0.1677, "step": 4325 }, { "epoch": 13.34, "learning_rate": 1.317171717171717e-07, "loss": 0.1724, "step": 4350 }, { "epoch": 13.42, "learning_rate": 1.2666666666666666e-07, "loss": 0.17, "step": 4375 }, { "epoch": 13.5, "learning_rate": 1.2161616161616162e-07, "loss": 0.1665, "step": 4400 }, { "epoch": 13.57, "learning_rate": 1.1656565656565657e-07, "loss": 0.1733, "step": 4425 }, { "epoch": 13.65, "learning_rate": 1.1151515151515151e-07, "loss": 0.1655, "step": 4450 }, { "epoch": 13.73, "learning_rate": 1.0646464646464647e-07, "loss": 0.1642, "step": 4475 }, { "epoch": 13.8, "learning_rate": 1.0141414141414141e-07, "loss": 0.1693, "step": 4500 }, { "epoch": 13.8, "eval_loss": 0.3983347713947296, "eval_runtime": 2686.1318, "eval_samples_per_second": 3.896, "eval_steps_per_second": 0.122, "eval_wer": 59.94622929557675, "step": 4500 }, { "epoch": 13.88, "learning_rate": 9.636363636363636e-08, "loss": 0.1635, "step": 4525 }, { "epoch": 13.96, "learning_rate": 9.131313131313132e-08, "loss": 0.1769, "step": 4550 }, { "epoch": 14.03, "learning_rate": 8.626262626262626e-08, "loss": 0.1747, "step": 4575 }, { "epoch": 14.11, "learning_rate": 8.12121212121212e-08, "loss": 0.1684, "step": 4600 }, { "epoch": 14.19, "learning_rate": 7.616161616161616e-08, "loss": 0.1664, "step": 4625 }, { "epoch": 14.26, "learning_rate": 7.111111111111111e-08, "loss": 0.1736, "step": 4650 }, { "epoch": 14.34, "learning_rate": 6.606060606060605e-08, "loss": 0.1609, "step": 4675 }, { "epoch": 14.42, "learning_rate": 6.101010101010101e-08, "loss": 0.1662, "step": 4700 }, { "epoch": 14.49, "learning_rate": 5.5959595959595956e-08, "loss": 0.1746, "step": 4725 }, { "epoch": 14.57, "learning_rate": 5.090909090909091e-08, "loss": 0.1677, "step": 4750 }, { "epoch": 14.65, "learning_rate": 4.585858585858585e-08, "loss": 0.1674, "step": 4775 }, { "epoch": 14.72, "learning_rate": 4.0808080808080803e-08, "loss": 0.168, "step": 4800 }, { "epoch": 14.8, "learning_rate": 3.5757575757575755e-08, "loss": 0.1691, "step": 4825 }, { "epoch": 14.88, "learning_rate": 3.0707070707070706e-08, "loss": 0.1641, "step": 4850 }, { "epoch": 14.95, "learning_rate": 2.5656565656565654e-08, "loss": 0.1678, "step": 4875 }, { "epoch": 15.03, "learning_rate": 2.0606060606060606e-08, "loss": 0.1672, "step": 4900 }, { "epoch": 15.11, "learning_rate": 1.5555555555555554e-08, "loss": 0.1656, "step": 4925 }, { "epoch": 15.18, "learning_rate": 1.0505050505050505e-08, "loss": 0.1637, "step": 4950 }, { "epoch": 15.26, "learning_rate": 5.454545454545455e-09, "loss": 0.1614, "step": 4975 }, { "epoch": 15.34, "learning_rate": 4.0404040404040403e-10, "loss": 0.1616, "step": 5000 }, { "epoch": 15.34, "eval_loss": 0.3983960449695587, "eval_runtime": 2696.9238, "eval_samples_per_second": 3.881, "eval_steps_per_second": 0.122, "eval_wer": 59.830041341295356, "step": 5000 }, { "epoch": 15.34, "step": 5000, "total_flos": 9.22520949202944e+19, "train_loss": 0.23562390689849855, "train_runtime": 67707.284, "train_samples_per_second": 4.726, "train_steps_per_second": 0.074 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 9.22520949202944e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }