|
{ |
|
"best_metric": 21.170638221105975, |
|
"best_model_checkpoint": "./whisper-medium-hi/checkpoint-13000", |
|
"epoch": 2.5942580421999306, |
|
"eval_steps": 1000, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004323763403666551, |
|
"grad_norm": 7.115922451019287, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 0.7628, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008647526807333102, |
|
"grad_norm": 5.3593902587890625, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 0.6367, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012971290210999653, |
|
"grad_norm": 12.570939064025879, |
|
"learning_rate": 1.48e-06, |
|
"loss": 0.5008, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.017295053614666205, |
|
"grad_norm": 4.155623912811279, |
|
"learning_rate": 1.98e-06, |
|
"loss": 0.4183, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.021618817018332757, |
|
"grad_norm": 3.200021982192993, |
|
"learning_rate": 2.4800000000000004e-06, |
|
"loss": 0.3569, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.025942580421999307, |
|
"grad_norm": 4.024964809417725, |
|
"learning_rate": 2.9800000000000003e-06, |
|
"loss": 0.379, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03026634382566586, |
|
"grad_norm": 5.151554107666016, |
|
"learning_rate": 3.48e-06, |
|
"loss": 0.3407, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03459010722933241, |
|
"grad_norm": 3.2938315868377686, |
|
"learning_rate": 3.980000000000001e-06, |
|
"loss": 0.3222, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03891387063299896, |
|
"grad_norm": 4.498330593109131, |
|
"learning_rate": 4.48e-06, |
|
"loss": 0.3147, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.043237634036665515, |
|
"grad_norm": 3.980656385421753, |
|
"learning_rate": 4.980000000000001e-06, |
|
"loss": 0.2994, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04756139744033207, |
|
"grad_norm": 4.725027084350586, |
|
"learning_rate": 5.480000000000001e-06, |
|
"loss": 0.2797, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.051885160843998614, |
|
"grad_norm": 4.188190937042236, |
|
"learning_rate": 5.98e-06, |
|
"loss": 0.2981, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05620892424766517, |
|
"grad_norm": 3.800278425216675, |
|
"learning_rate": 6.480000000000001e-06, |
|
"loss": 0.285, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.06053268765133172, |
|
"grad_norm": 3.427295207977295, |
|
"learning_rate": 6.98e-06, |
|
"loss": 0.2853, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06485645105499827, |
|
"grad_norm": 3.704080104827881, |
|
"learning_rate": 7.48e-06, |
|
"loss": 0.2714, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.06918021445866482, |
|
"grad_norm": 4.518213272094727, |
|
"learning_rate": 7.980000000000002e-06, |
|
"loss": 0.2637, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07350397786233137, |
|
"grad_norm": 3.9395949840545654, |
|
"learning_rate": 8.48e-06, |
|
"loss": 0.2698, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.07782774126599792, |
|
"grad_norm": 3.8880250453948975, |
|
"learning_rate": 8.98e-06, |
|
"loss": 0.28, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08215150466966448, |
|
"grad_norm": 3.307865858078003, |
|
"learning_rate": 9.48e-06, |
|
"loss": 0.259, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.08647526807333103, |
|
"grad_norm": 3.322256565093994, |
|
"learning_rate": 9.980000000000001e-06, |
|
"loss": 0.2505, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09079903147699758, |
|
"grad_norm": 4.282881736755371, |
|
"learning_rate": 9.98344827586207e-06, |
|
"loss": 0.249, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.09512279488066414, |
|
"grad_norm": 2.616466522216797, |
|
"learning_rate": 9.966206896551724e-06, |
|
"loss": 0.2422, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09944655828433069, |
|
"grad_norm": 3.2629432678222656, |
|
"learning_rate": 9.94896551724138e-06, |
|
"loss": 0.2617, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.10377032168799723, |
|
"grad_norm": 3.622666597366333, |
|
"learning_rate": 9.931724137931036e-06, |
|
"loss": 0.2627, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.10809408509166378, |
|
"grad_norm": 3.1837680339813232, |
|
"learning_rate": 9.91448275862069e-06, |
|
"loss": 0.2418, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.11241784849533033, |
|
"grad_norm": 4.119836807250977, |
|
"learning_rate": 9.897241379310345e-06, |
|
"loss": 0.2588, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11674161189899689, |
|
"grad_norm": 3.3661813735961914, |
|
"learning_rate": 9.88e-06, |
|
"loss": 0.2401, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.12106537530266344, |
|
"grad_norm": 3.551511526107788, |
|
"learning_rate": 9.862758620689657e-06, |
|
"loss": 0.2249, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12538913870632998, |
|
"grad_norm": 3.5980288982391357, |
|
"learning_rate": 9.845517241379312e-06, |
|
"loss": 0.2356, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.12971290210999653, |
|
"grad_norm": 3.171095132827759, |
|
"learning_rate": 9.828275862068967e-06, |
|
"loss": 0.2315, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.13403666551366308, |
|
"grad_norm": 3.0761499404907227, |
|
"learning_rate": 9.811034482758621e-06, |
|
"loss": 0.2238, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.13836042891732964, |
|
"grad_norm": 2.9867141246795654, |
|
"learning_rate": 9.793793103448276e-06, |
|
"loss": 0.2273, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1426841923209962, |
|
"grad_norm": 3.2623560428619385, |
|
"learning_rate": 9.776551724137931e-06, |
|
"loss": 0.2361, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.14700795572466274, |
|
"grad_norm": 3.183367967605591, |
|
"learning_rate": 9.759310344827588e-06, |
|
"loss": 0.2301, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1513317191283293, |
|
"grad_norm": 3.199496030807495, |
|
"learning_rate": 9.742068965517243e-06, |
|
"loss": 0.2433, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.15565548253199585, |
|
"grad_norm": 2.4695870876312256, |
|
"learning_rate": 9.724827586206897e-06, |
|
"loss": 0.222, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1599792459356624, |
|
"grad_norm": 3.613236427307129, |
|
"learning_rate": 9.707586206896552e-06, |
|
"loss": 0.2337, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.16430300933932895, |
|
"grad_norm": 3.4775917530059814, |
|
"learning_rate": 9.690344827586207e-06, |
|
"loss": 0.2243, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1686267727429955, |
|
"grad_norm": 4.184005260467529, |
|
"learning_rate": 9.673103448275862e-06, |
|
"loss": 0.2253, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.17295053614666206, |
|
"grad_norm": 2.5935354232788086, |
|
"learning_rate": 9.655862068965519e-06, |
|
"loss": 0.2142, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17295053614666206, |
|
"eval_loss": 0.21374455094337463, |
|
"eval_runtime": 18340.9505, |
|
"eval_samples_per_second": 1.261, |
|
"eval_steps_per_second": 0.158, |
|
"eval_wer": 25.915886295852886, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1772742995503286, |
|
"grad_norm": 3.235563039779663, |
|
"learning_rate": 9.638620689655174e-06, |
|
"loss": 0.2267, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.18159806295399517, |
|
"grad_norm": 3.3286187648773193, |
|
"learning_rate": 9.621379310344828e-06, |
|
"loss": 0.2325, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.18592182635766172, |
|
"grad_norm": 3.00522518157959, |
|
"learning_rate": 9.604137931034483e-06, |
|
"loss": 0.2203, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.19024558976132827, |
|
"grad_norm": 3.3868496417999268, |
|
"learning_rate": 9.586896551724138e-06, |
|
"loss": 0.2015, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.19456935316499482, |
|
"grad_norm": 2.8398849964141846, |
|
"learning_rate": 9.569655172413795e-06, |
|
"loss": 0.2255, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.19889311656866138, |
|
"grad_norm": 2.5562820434570312, |
|
"learning_rate": 9.55241379310345e-06, |
|
"loss": 0.2164, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2032168799723279, |
|
"grad_norm": 2.275176525115967, |
|
"learning_rate": 9.535172413793104e-06, |
|
"loss": 0.2104, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.20754064337599445, |
|
"grad_norm": 3.1678836345672607, |
|
"learning_rate": 9.51793103448276e-06, |
|
"loss": 0.2109, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.211864406779661, |
|
"grad_norm": 3.5133020877838135, |
|
"learning_rate": 9.500689655172414e-06, |
|
"loss": 0.2218, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.21618817018332756, |
|
"grad_norm": 2.210845470428467, |
|
"learning_rate": 9.483448275862069e-06, |
|
"loss": 0.2113, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2205119335869941, |
|
"grad_norm": 2.5255446434020996, |
|
"learning_rate": 9.466206896551726e-06, |
|
"loss": 0.2115, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.22483569699066067, |
|
"grad_norm": 3.083555221557617, |
|
"learning_rate": 9.44896551724138e-06, |
|
"loss": 0.208, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.22915946039432722, |
|
"grad_norm": 2.392900228500366, |
|
"learning_rate": 9.431724137931035e-06, |
|
"loss": 0.2258, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.23348322379799377, |
|
"grad_norm": 2.5552825927734375, |
|
"learning_rate": 9.41448275862069e-06, |
|
"loss": 0.2023, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.23780698720166032, |
|
"grad_norm": 2.8265864849090576, |
|
"learning_rate": 9.397241379310345e-06, |
|
"loss": 0.2214, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.24213075060532688, |
|
"grad_norm": 3.0209717750549316, |
|
"learning_rate": 9.38e-06, |
|
"loss": 0.217, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.24645451400899343, |
|
"grad_norm": 3.3972790241241455, |
|
"learning_rate": 9.362758620689657e-06, |
|
"loss": 0.2176, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.25077827741265996, |
|
"grad_norm": 2.645206928253174, |
|
"learning_rate": 9.345517241379311e-06, |
|
"loss": 0.2186, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.25510204081632654, |
|
"grad_norm": 2.4541475772857666, |
|
"learning_rate": 9.328275862068966e-06, |
|
"loss": 0.2089, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.25942580421999306, |
|
"grad_norm": 3.789954900741577, |
|
"learning_rate": 9.311034482758621e-06, |
|
"loss": 0.2117, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.26374956762365964, |
|
"grad_norm": 2.3939154148101807, |
|
"learning_rate": 9.293793103448276e-06, |
|
"loss": 0.207, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.26807333102732617, |
|
"grad_norm": 3.0324156284332275, |
|
"learning_rate": 9.276551724137933e-06, |
|
"loss": 0.2276, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.27239709443099275, |
|
"grad_norm": 2.5523455142974854, |
|
"learning_rate": 9.259310344827587e-06, |
|
"loss": 0.2061, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.2767208578346593, |
|
"grad_norm": 2.8267321586608887, |
|
"learning_rate": 9.242068965517242e-06, |
|
"loss": 0.2054, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.28104462123832585, |
|
"grad_norm": 1.9460331201553345, |
|
"learning_rate": 9.224827586206897e-06, |
|
"loss": 0.1974, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.2853683846419924, |
|
"grad_norm": 2.280379056930542, |
|
"learning_rate": 9.207586206896552e-06, |
|
"loss": 0.1978, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.28969214804565896, |
|
"grad_norm": 2.5089499950408936, |
|
"learning_rate": 9.190344827586207e-06, |
|
"loss": 0.2081, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.2940159114493255, |
|
"grad_norm": 2.703047037124634, |
|
"learning_rate": 9.173103448275864e-06, |
|
"loss": 0.2001, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.29833967485299207, |
|
"grad_norm": 2.437891960144043, |
|
"learning_rate": 9.155862068965518e-06, |
|
"loss": 0.2085, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.3026634382566586, |
|
"grad_norm": 2.1032724380493164, |
|
"learning_rate": 9.138620689655173e-06, |
|
"loss": 0.2081, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.30698720166032517, |
|
"grad_norm": 2.795346260070801, |
|
"learning_rate": 9.121379310344828e-06, |
|
"loss": 0.2031, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.3113109650639917, |
|
"grad_norm": 2.2833924293518066, |
|
"learning_rate": 9.104137931034483e-06, |
|
"loss": 0.1887, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3156347284676583, |
|
"grad_norm": 2.965100049972534, |
|
"learning_rate": 9.08689655172414e-06, |
|
"loss": 0.2158, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.3199584918713248, |
|
"grad_norm": 2.2826411724090576, |
|
"learning_rate": 9.069655172413794e-06, |
|
"loss": 0.2018, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3242822552749913, |
|
"grad_norm": 2.4495162963867188, |
|
"learning_rate": 9.05241379310345e-06, |
|
"loss": 0.2164, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.3286060186786579, |
|
"grad_norm": 2.1806020736694336, |
|
"learning_rate": 9.035172413793104e-06, |
|
"loss": 0.2039, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.33292978208232443, |
|
"grad_norm": 2.480971574783325, |
|
"learning_rate": 9.017931034482759e-06, |
|
"loss": 0.1978, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.337253545485991, |
|
"grad_norm": 2.4935293197631836, |
|
"learning_rate": 9.000689655172414e-06, |
|
"loss": 0.1989, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.34157730888965754, |
|
"grad_norm": 2.6331419944763184, |
|
"learning_rate": 8.98344827586207e-06, |
|
"loss": 0.2118, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.3459010722933241, |
|
"grad_norm": 2.9479784965515137, |
|
"learning_rate": 8.966206896551725e-06, |
|
"loss": 0.2071, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3459010722933241, |
|
"eval_loss": 0.18826113641262054, |
|
"eval_runtime": 18564.0039, |
|
"eval_samples_per_second": 1.246, |
|
"eval_steps_per_second": 0.156, |
|
"eval_wer": 24.743280064288342, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.35022483569699064, |
|
"grad_norm": 1.9392203092575073, |
|
"learning_rate": 8.94896551724138e-06, |
|
"loss": 0.1949, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.3545485991006572, |
|
"grad_norm": 2.540531873703003, |
|
"learning_rate": 8.931724137931035e-06, |
|
"loss": 0.1977, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.35887236250432375, |
|
"grad_norm": 2.9909920692443848, |
|
"learning_rate": 8.91448275862069e-06, |
|
"loss": 0.2044, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.36319612590799033, |
|
"grad_norm": 2.2228713035583496, |
|
"learning_rate": 8.897241379310345e-06, |
|
"loss": 0.2024, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.36751988931165686, |
|
"grad_norm": 1.7810205221176147, |
|
"learning_rate": 8.880000000000001e-06, |
|
"loss": 0.1884, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.37184365271532344, |
|
"grad_norm": 1.9215483665466309, |
|
"learning_rate": 8.862758620689656e-06, |
|
"loss": 0.2031, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.37616741611898996, |
|
"grad_norm": 2.5977275371551514, |
|
"learning_rate": 8.845517241379311e-06, |
|
"loss": 0.1986, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.38049117952265654, |
|
"grad_norm": 2.242727518081665, |
|
"learning_rate": 8.828275862068966e-06, |
|
"loss": 0.204, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.38481494292632307, |
|
"grad_norm": 2.6293296813964844, |
|
"learning_rate": 8.811034482758621e-06, |
|
"loss": 0.1922, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.38913870632998965, |
|
"grad_norm": 2.4864461421966553, |
|
"learning_rate": 8.793793103448277e-06, |
|
"loss": 0.2096, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3934624697336562, |
|
"grad_norm": 2.586041212081909, |
|
"learning_rate": 8.776551724137932e-06, |
|
"loss": 0.2092, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.39778623313732275, |
|
"grad_norm": 2.194061040878296, |
|
"learning_rate": 8.759310344827587e-06, |
|
"loss": 0.1966, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4021099965409893, |
|
"grad_norm": 2.837115526199341, |
|
"learning_rate": 8.742068965517242e-06, |
|
"loss": 0.1884, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.4064337599446558, |
|
"grad_norm": 2.554459810256958, |
|
"learning_rate": 8.724827586206897e-06, |
|
"loss": 0.1889, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.4107575233483224, |
|
"grad_norm": 2.0888259410858154, |
|
"learning_rate": 8.707586206896552e-06, |
|
"loss": 0.1871, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.4150812867519889, |
|
"grad_norm": 3.076122999191284, |
|
"learning_rate": 8.690344827586208e-06, |
|
"loss": 0.1945, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4194050501556555, |
|
"grad_norm": 2.9319562911987305, |
|
"learning_rate": 8.673103448275863e-06, |
|
"loss": 0.2083, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.423728813559322, |
|
"grad_norm": 1.6421335935592651, |
|
"learning_rate": 8.655862068965518e-06, |
|
"loss": 0.1914, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.4280525769629886, |
|
"grad_norm": 2.8207969665527344, |
|
"learning_rate": 8.638620689655173e-06, |
|
"loss": 0.1956, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.4323763403666551, |
|
"grad_norm": 2.3561465740203857, |
|
"learning_rate": 8.621379310344828e-06, |
|
"loss": 0.1931, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4367001037703217, |
|
"grad_norm": 2.1428236961364746, |
|
"learning_rate": 8.604137931034483e-06, |
|
"loss": 0.196, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.4410238671739882, |
|
"grad_norm": 2.463754653930664, |
|
"learning_rate": 8.58689655172414e-06, |
|
"loss": 0.2049, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.4453476305776548, |
|
"grad_norm": 3.0367329120635986, |
|
"learning_rate": 8.569655172413794e-06, |
|
"loss": 0.2048, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.44967139398132133, |
|
"grad_norm": 1.86019766330719, |
|
"learning_rate": 8.552413793103449e-06, |
|
"loss": 0.1966, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4539951573849879, |
|
"grad_norm": 2.3381924629211426, |
|
"learning_rate": 8.535172413793104e-06, |
|
"loss": 0.1907, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.45831892078865444, |
|
"grad_norm": 1.926013469696045, |
|
"learning_rate": 8.517931034482759e-06, |
|
"loss": 0.1922, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.462642684192321, |
|
"grad_norm": 3.239078998565674, |
|
"learning_rate": 8.500689655172415e-06, |
|
"loss": 0.2048, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.46696644759598754, |
|
"grad_norm": 2.443401336669922, |
|
"learning_rate": 8.48344827586207e-06, |
|
"loss": 0.1868, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.4712902109996541, |
|
"grad_norm": 2.624505043029785, |
|
"learning_rate": 8.466206896551725e-06, |
|
"loss": 0.1953, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.47561397440332065, |
|
"grad_norm": 2.1747653484344482, |
|
"learning_rate": 8.44896551724138e-06, |
|
"loss": 0.2068, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.4799377378069872, |
|
"grad_norm": 2.4387295246124268, |
|
"learning_rate": 8.431724137931035e-06, |
|
"loss": 0.1831, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.48426150121065376, |
|
"grad_norm": 1.7778469324111938, |
|
"learning_rate": 8.41448275862069e-06, |
|
"loss": 0.19, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.4885852646143203, |
|
"grad_norm": 2.475799560546875, |
|
"learning_rate": 8.397241379310346e-06, |
|
"loss": 0.1957, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.49290902801798686, |
|
"grad_norm": 1.8860992193222046, |
|
"learning_rate": 8.380000000000001e-06, |
|
"loss": 0.1963, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.4972327914216534, |
|
"grad_norm": 2.0776500701904297, |
|
"learning_rate": 8.362758620689656e-06, |
|
"loss": 0.1861, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.5015565548253199, |
|
"grad_norm": 2.2878217697143555, |
|
"learning_rate": 8.34551724137931e-06, |
|
"loss": 0.1851, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5058803182289865, |
|
"grad_norm": 2.424276351928711, |
|
"learning_rate": 8.328275862068966e-06, |
|
"loss": 0.1884, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 2.194385290145874, |
|
"learning_rate": 8.31103448275862e-06, |
|
"loss": 0.1914, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5145278450363197, |
|
"grad_norm": 2.2288753986358643, |
|
"learning_rate": 8.293793103448277e-06, |
|
"loss": 0.1961, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.5188516084399861, |
|
"grad_norm": 3.0630691051483154, |
|
"learning_rate": 8.276551724137932e-06, |
|
"loss": 0.1941, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5188516084399861, |
|
"eval_loss": 0.17789211869239807, |
|
"eval_runtime": 18421.853, |
|
"eval_samples_per_second": 1.255, |
|
"eval_steps_per_second": 0.157, |
|
"eval_wer": 23.17027741918857, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5231753718436527, |
|
"grad_norm": 2.5339276790618896, |
|
"learning_rate": 8.259310344827587e-06, |
|
"loss": 0.1845, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.5274991352473193, |
|
"grad_norm": 2.1724178791046143, |
|
"learning_rate": 8.242068965517242e-06, |
|
"loss": 0.1775, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.5318228986509859, |
|
"grad_norm": 2.3713173866271973, |
|
"learning_rate": 8.224827586206897e-06, |
|
"loss": 0.1783, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.5361466620546523, |
|
"grad_norm": 2.005528450012207, |
|
"learning_rate": 8.207586206896553e-06, |
|
"loss": 0.1986, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5404704254583189, |
|
"grad_norm": 1.9392186403274536, |
|
"learning_rate": 8.190344827586208e-06, |
|
"loss": 0.1897, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.5447941888619855, |
|
"grad_norm": 1.6991775035858154, |
|
"learning_rate": 8.173103448275863e-06, |
|
"loss": 0.1992, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5491179522656521, |
|
"grad_norm": 2.995962381362915, |
|
"learning_rate": 8.155862068965518e-06, |
|
"loss": 0.1867, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.5534417156693185, |
|
"grad_norm": 2.4674553871154785, |
|
"learning_rate": 8.138620689655173e-06, |
|
"loss": 0.1825, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5577654790729851, |
|
"grad_norm": 1.9179223775863647, |
|
"learning_rate": 8.121379310344828e-06, |
|
"loss": 0.1942, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.5620892424766517, |
|
"grad_norm": 1.9651795625686646, |
|
"learning_rate": 8.104137931034484e-06, |
|
"loss": 0.1836, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5664130058803182, |
|
"grad_norm": 2.7088942527770996, |
|
"learning_rate": 8.086896551724139e-06, |
|
"loss": 0.1894, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.5707367692839848, |
|
"grad_norm": 2.2452046871185303, |
|
"learning_rate": 8.069655172413794e-06, |
|
"loss": 0.1753, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5750605326876513, |
|
"grad_norm": 2.9989702701568604, |
|
"learning_rate": 8.052413793103449e-06, |
|
"loss": 0.1837, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.5793842960913179, |
|
"grad_norm": 2.451460599899292, |
|
"learning_rate": 8.035172413793104e-06, |
|
"loss": 0.1856, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.5837080594949844, |
|
"grad_norm": 2.1404428482055664, |
|
"learning_rate": 8.017931034482758e-06, |
|
"loss": 0.1796, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.588031822898651, |
|
"grad_norm": 1.7960869073867798, |
|
"learning_rate": 8.000689655172415e-06, |
|
"loss": 0.181, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.5923555863023175, |
|
"grad_norm": 2.464204788208008, |
|
"learning_rate": 7.98344827586207e-06, |
|
"loss": 0.1784, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.5966793497059841, |
|
"grad_norm": 2.2879879474639893, |
|
"learning_rate": 7.966206896551725e-06, |
|
"loss": 0.1874, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6010031131096506, |
|
"grad_norm": 2.815308094024658, |
|
"learning_rate": 7.94896551724138e-06, |
|
"loss": 0.19, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.6053268765133172, |
|
"grad_norm": 2.048867702484131, |
|
"learning_rate": 7.931724137931034e-06, |
|
"loss": 0.1803, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6096506399169838, |
|
"grad_norm": 1.690579891204834, |
|
"learning_rate": 7.914482758620691e-06, |
|
"loss": 0.1857, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.6139744033206503, |
|
"grad_norm": 2.5136778354644775, |
|
"learning_rate": 7.897241379310346e-06, |
|
"loss": 0.1835, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.6182981667243168, |
|
"grad_norm": 1.5882636308670044, |
|
"learning_rate": 7.88e-06, |
|
"loss": 0.1858, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.6226219301279834, |
|
"grad_norm": 1.7922052145004272, |
|
"learning_rate": 7.862758620689656e-06, |
|
"loss": 0.175, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.62694569353165, |
|
"grad_norm": 2.2753283977508545, |
|
"learning_rate": 7.84551724137931e-06, |
|
"loss": 0.1844, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.6312694569353166, |
|
"grad_norm": 2.2595114707946777, |
|
"learning_rate": 7.828275862068965e-06, |
|
"loss": 0.1863, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.635593220338983, |
|
"grad_norm": 2.442291498184204, |
|
"learning_rate": 7.811034482758622e-06, |
|
"loss": 0.1703, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.6399169837426496, |
|
"grad_norm": 2.492649555206299, |
|
"learning_rate": 7.793793103448277e-06, |
|
"loss": 0.1717, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6442407471463162, |
|
"grad_norm": 2.1932718753814697, |
|
"learning_rate": 7.776551724137932e-06, |
|
"loss": 0.1821, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.6485645105499827, |
|
"grad_norm": 2.30263090133667, |
|
"learning_rate": 7.759310344827587e-06, |
|
"loss": 0.1837, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.6528882739536492, |
|
"grad_norm": 1.7888277769088745, |
|
"learning_rate": 7.742068965517241e-06, |
|
"loss": 0.1788, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.6572120373573158, |
|
"grad_norm": 2.500505208969116, |
|
"learning_rate": 7.724827586206896e-06, |
|
"loss": 0.1901, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.6615358007609824, |
|
"grad_norm": 1.7561728954315186, |
|
"learning_rate": 7.707586206896553e-06, |
|
"loss": 0.1718, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.6658595641646489, |
|
"grad_norm": 2.2992682456970215, |
|
"learning_rate": 7.690344827586208e-06, |
|
"loss": 0.1831, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.6701833275683154, |
|
"grad_norm": 2.2881760597229004, |
|
"learning_rate": 7.673103448275863e-06, |
|
"loss": 0.1848, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.674507090971982, |
|
"grad_norm": 2.236409902572632, |
|
"learning_rate": 7.655862068965517e-06, |
|
"loss": 0.1916, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6788308543756486, |
|
"grad_norm": 2.074676513671875, |
|
"learning_rate": 7.638620689655172e-06, |
|
"loss": 0.1783, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.6831546177793151, |
|
"grad_norm": 1.740821361541748, |
|
"learning_rate": 7.621379310344829e-06, |
|
"loss": 0.1695, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.6874783811829817, |
|
"grad_norm": 1.9029902219772339, |
|
"learning_rate": 7.604137931034483e-06, |
|
"loss": 0.194, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.6918021445866482, |
|
"grad_norm": 2.5968997478485107, |
|
"learning_rate": 7.586896551724139e-06, |
|
"loss": 0.1883, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6918021445866482, |
|
"eval_loss": 0.17140337824821472, |
|
"eval_runtime": 18369.4452, |
|
"eval_samples_per_second": 1.259, |
|
"eval_steps_per_second": 0.157, |
|
"eval_wer": 23.089682705170947, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6961259079903148, |
|
"grad_norm": 1.9076557159423828, |
|
"learning_rate": 7.5696551724137935e-06, |
|
"loss": 0.1955, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.7004496713939813, |
|
"grad_norm": 1.8503438234329224, |
|
"learning_rate": 7.552413793103449e-06, |
|
"loss": 0.1626, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.7047734347976479, |
|
"grad_norm": 2.7024850845336914, |
|
"learning_rate": 7.535172413793104e-06, |
|
"loss": 0.1704, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.7090971982013144, |
|
"grad_norm": 1.7515462636947632, |
|
"learning_rate": 7.517931034482759e-06, |
|
"loss": 0.1721, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.713420961604981, |
|
"grad_norm": 1.8947151899337769, |
|
"learning_rate": 7.500689655172414e-06, |
|
"loss": 0.1727, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.7177447250086475, |
|
"grad_norm": 1.7755450010299683, |
|
"learning_rate": 7.48344827586207e-06, |
|
"loss": 0.184, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.7220684884123141, |
|
"grad_norm": 2.1575655937194824, |
|
"learning_rate": 7.4662068965517244e-06, |
|
"loss": 0.1812, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.7263922518159807, |
|
"grad_norm": 2.710113048553467, |
|
"learning_rate": 7.44896551724138e-06, |
|
"loss": 0.1828, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7307160152196471, |
|
"grad_norm": 2.426133155822754, |
|
"learning_rate": 7.431724137931036e-06, |
|
"loss": 0.186, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.7350397786233137, |
|
"grad_norm": 2.672506332397461, |
|
"learning_rate": 7.41448275862069e-06, |
|
"loss": 0.1896, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.7393635420269803, |
|
"grad_norm": 1.9614557027816772, |
|
"learning_rate": 7.397241379310346e-06, |
|
"loss": 0.1918, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.7436873054306469, |
|
"grad_norm": 2.511770486831665, |
|
"learning_rate": 7.3800000000000005e-06, |
|
"loss": 0.1976, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7480110688343133, |
|
"grad_norm": 2.0811338424682617, |
|
"learning_rate": 7.362758620689656e-06, |
|
"loss": 0.1893, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.7523348322379799, |
|
"grad_norm": 2.0760035514831543, |
|
"learning_rate": 7.345517241379311e-06, |
|
"loss": 0.1761, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.7566585956416465, |
|
"grad_norm": 2.1201930046081543, |
|
"learning_rate": 7.328275862068967e-06, |
|
"loss": 0.1898, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.7609823590453131, |
|
"grad_norm": 1.741500735282898, |
|
"learning_rate": 7.311034482758621e-06, |
|
"loss": 0.176, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7653061224489796, |
|
"grad_norm": 2.043604612350464, |
|
"learning_rate": 7.2937931034482765e-06, |
|
"loss": 0.1718, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.7696298858526461, |
|
"grad_norm": Infinity, |
|
"learning_rate": 7.277241379310346e-06, |
|
"loss": 0.1929, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.7739536492563127, |
|
"grad_norm": 2.5208499431610107, |
|
"learning_rate": 7.260000000000001e-06, |
|
"loss": 0.2019, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.7782774126599793, |
|
"grad_norm": 1.9209998846054077, |
|
"learning_rate": 7.242758620689656e-06, |
|
"loss": 0.1739, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.7826011760636458, |
|
"grad_norm": 1.8282277584075928, |
|
"learning_rate": 7.2255172413793105e-06, |
|
"loss": 0.1733, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.7869249394673123, |
|
"grad_norm": 2.2943856716156006, |
|
"learning_rate": 7.208275862068966e-06, |
|
"loss": 0.1831, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.7912487028709789, |
|
"grad_norm": 1.9153530597686768, |
|
"learning_rate": 7.191034482758621e-06, |
|
"loss": 0.1868, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.7955724662746455, |
|
"grad_norm": 2.1466894149780273, |
|
"learning_rate": 7.173793103448277e-06, |
|
"loss": 0.1727, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.799896229678312, |
|
"grad_norm": 1.8446885347366333, |
|
"learning_rate": 7.156551724137931e-06, |
|
"loss": 0.1801, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.8042199930819786, |
|
"grad_norm": 1.8949089050292969, |
|
"learning_rate": 7.139310344827587e-06, |
|
"loss": 0.1825, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.8085437564856451, |
|
"grad_norm": 1.7999627590179443, |
|
"learning_rate": 7.1220689655172414e-06, |
|
"loss": 0.1801, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.8128675198893116, |
|
"grad_norm": 2.10203218460083, |
|
"learning_rate": 7.104827586206897e-06, |
|
"loss": 0.1696, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.8171912832929782, |
|
"grad_norm": 1.9392683506011963, |
|
"learning_rate": 7.087586206896553e-06, |
|
"loss": 0.1779, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.8215150466966448, |
|
"grad_norm": 2.000790596008301, |
|
"learning_rate": 7.070344827586208e-06, |
|
"loss": 0.179, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.8258388101003113, |
|
"grad_norm": 2.079643964767456, |
|
"learning_rate": 7.0531034482758635e-06, |
|
"loss": 0.184, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.8301625735039778, |
|
"grad_norm": 1.8971096277236938, |
|
"learning_rate": 7.0358620689655175e-06, |
|
"loss": 0.1638, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8344863369076444, |
|
"grad_norm": 1.8511512279510498, |
|
"learning_rate": 7.018620689655173e-06, |
|
"loss": 0.1696, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.838810100311311, |
|
"grad_norm": 2.439633369445801, |
|
"learning_rate": 7.001379310344828e-06, |
|
"loss": 0.187, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.8431338637149776, |
|
"grad_norm": 1.5714433193206787, |
|
"learning_rate": 6.984137931034484e-06, |
|
"loss": 0.1722, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 1.7673485279083252, |
|
"learning_rate": 6.966896551724139e-06, |
|
"loss": 0.1702, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.8517813905223106, |
|
"grad_norm": 1.4075311422348022, |
|
"learning_rate": 6.9496551724137935e-06, |
|
"loss": 0.1726, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.8561051539259772, |
|
"grad_norm": 1.8928003311157227, |
|
"learning_rate": 6.932413793103448e-06, |
|
"loss": 0.1798, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.8604289173296438, |
|
"grad_norm": 2.118661403656006, |
|
"learning_rate": 6.915172413793104e-06, |
|
"loss": 0.1812, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.8647526807333102, |
|
"grad_norm": 2.0651979446411133, |
|
"learning_rate": 6.897931034482759e-06, |
|
"loss": 0.1929, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8647526807333102, |
|
"eval_loss": 0.1684178113937378, |
|
"eval_runtime": 18447.2626, |
|
"eval_samples_per_second": 1.254, |
|
"eval_steps_per_second": 0.157, |
|
"eval_wer": 22.593462925260116, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8690764441369768, |
|
"grad_norm": 1.4254800081253052, |
|
"learning_rate": 6.880689655172415e-06, |
|
"loss": 0.1829, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.8734002075406434, |
|
"grad_norm": 1.8314497470855713, |
|
"learning_rate": 6.863448275862069e-06, |
|
"loss": 0.1804, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.8777239709443099, |
|
"grad_norm": 1.9438308477401733, |
|
"learning_rate": 6.8462068965517245e-06, |
|
"loss": 0.1889, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.8820477343479765, |
|
"grad_norm": 2.4102623462677, |
|
"learning_rate": 6.828965517241379e-06, |
|
"loss": 0.1725, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.886371497751643, |
|
"grad_norm": 2.181542158126831, |
|
"learning_rate": 6.811724137931035e-06, |
|
"loss": 0.1953, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.8906952611553096, |
|
"grad_norm": 1.6767735481262207, |
|
"learning_rate": 6.794482758620691e-06, |
|
"loss": 0.1654, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.8950190245589761, |
|
"grad_norm": 2.3569998741149902, |
|
"learning_rate": 6.777241379310346e-06, |
|
"loss": 0.1765, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.8993427879626427, |
|
"grad_norm": 2.1700599193573, |
|
"learning_rate": 6.760000000000001e-06, |
|
"loss": 0.1845, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9036665513663092, |
|
"grad_norm": 1.6862359046936035, |
|
"learning_rate": 6.742758620689655e-06, |
|
"loss": 0.1714, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.9079903147699758, |
|
"grad_norm": 1.861148715019226, |
|
"learning_rate": 6.725517241379311e-06, |
|
"loss": 0.1718, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.9123140781736423, |
|
"grad_norm": 1.8183972835540771, |
|
"learning_rate": 6.708275862068966e-06, |
|
"loss": 0.1694, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.9166378415773089, |
|
"grad_norm": 1.633947730064392, |
|
"learning_rate": 6.691034482758622e-06, |
|
"loss": 0.1756, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.9209616049809755, |
|
"grad_norm": 1.9196300506591797, |
|
"learning_rate": 6.6737931034482765e-06, |
|
"loss": 0.1809, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.925285368384642, |
|
"grad_norm": 2.4457297325134277, |
|
"learning_rate": 6.656551724137931e-06, |
|
"loss": 0.1838, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.9296091317883085, |
|
"grad_norm": 1.6058560609817505, |
|
"learning_rate": 6.639310344827586e-06, |
|
"loss": 0.1795, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.9339328951919751, |
|
"grad_norm": 2.3014094829559326, |
|
"learning_rate": 6.622068965517242e-06, |
|
"loss": 0.1791, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.9382566585956417, |
|
"grad_norm": 2.237236738204956, |
|
"learning_rate": 6.604827586206897e-06, |
|
"loss": 0.1675, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.9425804219993082, |
|
"grad_norm": 1.8356267213821411, |
|
"learning_rate": 6.587586206896553e-06, |
|
"loss": 0.1753, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.9469041854029747, |
|
"grad_norm": 1.816933274269104, |
|
"learning_rate": 6.570344827586207e-06, |
|
"loss": 0.1797, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.9512279488066413, |
|
"grad_norm": 1.9767167568206787, |
|
"learning_rate": 6.553103448275862e-06, |
|
"loss": 0.1787, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9555517122103079, |
|
"grad_norm": 2.3487281799316406, |
|
"learning_rate": 6.535862068965517e-06, |
|
"loss": 0.1713, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.9598754756139743, |
|
"grad_norm": 2.3690738677978516, |
|
"learning_rate": 6.518620689655173e-06, |
|
"loss": 0.175, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.9641992390176409, |
|
"grad_norm": 1.8204621076583862, |
|
"learning_rate": 6.501379310344829e-06, |
|
"loss": 0.1795, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.9685230024213075, |
|
"grad_norm": 1.9036630392074585, |
|
"learning_rate": 6.4841379310344835e-06, |
|
"loss": 0.1846, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.9728467658249741, |
|
"grad_norm": 2.118363618850708, |
|
"learning_rate": 6.466896551724139e-06, |
|
"loss": 0.1742, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.9771705292286406, |
|
"grad_norm": 1.9643069505691528, |
|
"learning_rate": 6.449655172413793e-06, |
|
"loss": 0.1789, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.9814942926323071, |
|
"grad_norm": 1.637279748916626, |
|
"learning_rate": 6.432413793103449e-06, |
|
"loss": 0.1848, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.9858180560359737, |
|
"grad_norm": 1.6916080713272095, |
|
"learning_rate": 6.415172413793104e-06, |
|
"loss": 0.1744, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.9901418194396403, |
|
"grad_norm": 1.6688181161880493, |
|
"learning_rate": 6.3979310344827595e-06, |
|
"loss": 0.1761, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.9944655828433068, |
|
"grad_norm": 2.0284881591796875, |
|
"learning_rate": 6.380689655172414e-06, |
|
"loss": 0.1809, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.9987893462469734, |
|
"grad_norm": 1.8513811826705933, |
|
"learning_rate": 6.363448275862069e-06, |
|
"loss": 0.1787, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.0031131096506398, |
|
"grad_norm": 1.592392921447754, |
|
"learning_rate": 6.346206896551724e-06, |
|
"loss": 0.1542, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.0074368730543064, |
|
"grad_norm": 1.8996076583862305, |
|
"learning_rate": 6.32896551724138e-06, |
|
"loss": 0.1502, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.011760636457973, |
|
"grad_norm": 2.029642343521118, |
|
"learning_rate": 6.311724137931035e-06, |
|
"loss": 0.1436, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.0160843998616396, |
|
"grad_norm": 2.350755453109741, |
|
"learning_rate": 6.2944827586206905e-06, |
|
"loss": 0.1423, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 2.1733169555664062, |
|
"learning_rate": 6.2772413793103445e-06, |
|
"loss": 0.1515, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.0247319266689727, |
|
"grad_norm": 1.7886191606521606, |
|
"learning_rate": 6.26e-06, |
|
"loss": 0.141, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.0290556900726393, |
|
"grad_norm": 1.4512900114059448, |
|
"learning_rate": 6.242758620689656e-06, |
|
"loss": 0.1419, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.0333794534763059, |
|
"grad_norm": 1.4808529615402222, |
|
"learning_rate": 6.225517241379311e-06, |
|
"loss": 0.1426, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.0377032168799722, |
|
"grad_norm": 1.99927818775177, |
|
"learning_rate": 6.2082758620689665e-06, |
|
"loss": 0.1434, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0377032168799722, |
|
"eval_loss": 0.1640278398990631, |
|
"eval_runtime": 18500.4166, |
|
"eval_samples_per_second": 1.25, |
|
"eval_steps_per_second": 0.156, |
|
"eval_wer": 22.07428302333264, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0420269802836388, |
|
"grad_norm": 1.9692049026489258, |
|
"learning_rate": 6.191034482758621e-06, |
|
"loss": 0.1518, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 1.0463507436873054, |
|
"grad_norm": 1.943925380706787, |
|
"learning_rate": 6.173793103448277e-06, |
|
"loss": 0.1486, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.050674507090972, |
|
"grad_norm": 1.8784852027893066, |
|
"learning_rate": 6.156551724137931e-06, |
|
"loss": 0.1516, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 1.0549982704946386, |
|
"grad_norm": 1.8840405941009521, |
|
"learning_rate": 6.139310344827587e-06, |
|
"loss": 0.1458, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.0593220338983051, |
|
"grad_norm": 1.711713194847107, |
|
"learning_rate": 6.122068965517242e-06, |
|
"loss": 0.1518, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 1.0636457973019717, |
|
"grad_norm": 2.1738007068634033, |
|
"learning_rate": 6.104827586206897e-06, |
|
"loss": 0.1578, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.067969560705638, |
|
"grad_norm": 1.4103723764419556, |
|
"learning_rate": 6.087586206896552e-06, |
|
"loss": 0.1433, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 1.0722933241093047, |
|
"grad_norm": 1.4984797239303589, |
|
"learning_rate": 6.070344827586207e-06, |
|
"loss": 0.1536, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.0766170875129712, |
|
"grad_norm": 1.7718677520751953, |
|
"learning_rate": 6.053103448275862e-06, |
|
"loss": 0.1356, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 1.0809408509166378, |
|
"grad_norm": 1.9681774377822876, |
|
"learning_rate": 6.035862068965518e-06, |
|
"loss": 0.1312, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.0852646143203044, |
|
"grad_norm": 2.0595345497131348, |
|
"learning_rate": 6.018620689655173e-06, |
|
"loss": 0.1372, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 1.089588377723971, |
|
"grad_norm": 1.863364338874817, |
|
"learning_rate": 6.001379310344828e-06, |
|
"loss": 0.1355, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.0939121411276376, |
|
"grad_norm": 1.5299562215805054, |
|
"learning_rate": 5.984137931034482e-06, |
|
"loss": 0.1471, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 1.098235904531304, |
|
"grad_norm": 2.049438238143921, |
|
"learning_rate": 5.966896551724138e-06, |
|
"loss": 0.1508, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.1025596679349705, |
|
"grad_norm": 1.7478573322296143, |
|
"learning_rate": 5.949655172413794e-06, |
|
"loss": 0.1442, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 1.106883431338637, |
|
"grad_norm": 1.8549025058746338, |
|
"learning_rate": 5.932413793103449e-06, |
|
"loss": 0.1523, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.1112071947423037, |
|
"grad_norm": 1.9961248636245728, |
|
"learning_rate": 5.915172413793104e-06, |
|
"loss": 0.1464, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 1.1155309581459703, |
|
"grad_norm": 1.5598413944244385, |
|
"learning_rate": 5.897931034482759e-06, |
|
"loss": 0.1478, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.1198547215496368, |
|
"grad_norm": 1.586255669593811, |
|
"learning_rate": 5.880689655172415e-06, |
|
"loss": 0.1429, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 1.1241784849533034, |
|
"grad_norm": 1.8345210552215576, |
|
"learning_rate": 5.863448275862069e-06, |
|
"loss": 0.1421, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.12850224835697, |
|
"grad_norm": 2.151012897491455, |
|
"learning_rate": 5.846206896551725e-06, |
|
"loss": 0.146, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 1.1328260117606366, |
|
"grad_norm": 1.7682011127471924, |
|
"learning_rate": 5.82896551724138e-06, |
|
"loss": 0.1422, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.137149775164303, |
|
"grad_norm": 1.4105076789855957, |
|
"learning_rate": 5.811724137931035e-06, |
|
"loss": 0.1593, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 1.1414735385679695, |
|
"grad_norm": 2.0739102363586426, |
|
"learning_rate": 5.79448275862069e-06, |
|
"loss": 0.1469, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.145797301971636, |
|
"grad_norm": 1.3610910177230835, |
|
"learning_rate": 5.777241379310345e-06, |
|
"loss": 0.1602, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 1.1501210653753027, |
|
"grad_norm": 2.168071746826172, |
|
"learning_rate": 5.76e-06, |
|
"loss": 0.151, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.1544448287789693, |
|
"grad_norm": 1.9631882905960083, |
|
"learning_rate": 5.742758620689656e-06, |
|
"loss": 0.1474, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 1.1587685921826358, |
|
"grad_norm": 1.6678909063339233, |
|
"learning_rate": 5.7255172413793105e-06, |
|
"loss": 0.1447, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.1630923555863024, |
|
"grad_norm": 1.8679783344268799, |
|
"learning_rate": 5.708275862068966e-06, |
|
"loss": 0.1553, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 1.1674161189899688, |
|
"grad_norm": 1.428916573524475, |
|
"learning_rate": 5.691034482758622e-06, |
|
"loss": 0.1371, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.1717398823936354, |
|
"grad_norm": 1.7976248264312744, |
|
"learning_rate": 5.673793103448276e-06, |
|
"loss": 0.1476, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 1.176063645797302, |
|
"grad_norm": 1.8509339094161987, |
|
"learning_rate": 5.656551724137932e-06, |
|
"loss": 0.132, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.1803874092009685, |
|
"grad_norm": 2.185060739517212, |
|
"learning_rate": 5.6393103448275865e-06, |
|
"loss": 0.1526, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 1.184711172604635, |
|
"grad_norm": 1.3566219806671143, |
|
"learning_rate": 5.622068965517242e-06, |
|
"loss": 0.1445, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.1890349360083017, |
|
"grad_norm": 1.539624810218811, |
|
"learning_rate": 5.604827586206897e-06, |
|
"loss": 0.1415, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 1.1933586994119683, |
|
"grad_norm": 1.6758310794830322, |
|
"learning_rate": 5.587586206896553e-06, |
|
"loss": 0.15, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.1976824628156346, |
|
"grad_norm": 1.9276903867721558, |
|
"learning_rate": 5.570344827586207e-06, |
|
"loss": 0.1496, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 1.2020062262193012, |
|
"grad_norm": 2.1183435916900635, |
|
"learning_rate": 5.553103448275863e-06, |
|
"loss": 0.1379, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.2063299896229678, |
|
"grad_norm": 1.862641453742981, |
|
"learning_rate": 5.5358620689655175e-06, |
|
"loss": 0.1455, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 1.2106537530266344, |
|
"grad_norm": 1.83571195602417, |
|
"learning_rate": 5.518620689655173e-06, |
|
"loss": 0.1442, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2106537530266344, |
|
"eval_loss": 0.1629372239112854, |
|
"eval_runtime": 18325.4837, |
|
"eval_samples_per_second": 1.262, |
|
"eval_steps_per_second": 0.158, |
|
"eval_wer": 21.968151030745478, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.214977516430301, |
|
"grad_norm": 1.160624623298645, |
|
"learning_rate": 5.501379310344828e-06, |
|
"loss": 0.1396, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 1.2193012798339675, |
|
"grad_norm": 2.0968518257141113, |
|
"learning_rate": 5.484137931034483e-06, |
|
"loss": 0.153, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.223625043237634, |
|
"grad_norm": 1.8394380807876587, |
|
"learning_rate": 5.466896551724138e-06, |
|
"loss": 0.1411, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 1.2279488066413007, |
|
"grad_norm": 2.051907539367676, |
|
"learning_rate": 5.4496551724137935e-06, |
|
"loss": 0.1517, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.2322725700449673, |
|
"grad_norm": 1.548345923423767, |
|
"learning_rate": 5.432413793103448e-06, |
|
"loss": 0.1433, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 1.2365963334486336, |
|
"grad_norm": 1.661149024963379, |
|
"learning_rate": 5.415172413793104e-06, |
|
"loss": 0.1415, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.2409200968523002, |
|
"grad_norm": 1.7700337171554565, |
|
"learning_rate": 5.39793103448276e-06, |
|
"loss": 0.1415, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 1.2452438602559668, |
|
"grad_norm": 1.8730343580245972, |
|
"learning_rate": 5.380689655172414e-06, |
|
"loss": 0.1502, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.2495676236596334, |
|
"grad_norm": 1.8379130363464355, |
|
"learning_rate": 5.3634482758620695e-06, |
|
"loss": 0.1364, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 1.2538913870633, |
|
"grad_norm": 1.5410575866699219, |
|
"learning_rate": 5.346206896551724e-06, |
|
"loss": 0.1471, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.2582151504669665, |
|
"grad_norm": 1.5988543033599854, |
|
"learning_rate": 5.32896551724138e-06, |
|
"loss": 0.1406, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 1.262538913870633, |
|
"grad_norm": 1.7049413919448853, |
|
"learning_rate": 5.311724137931035e-06, |
|
"loss": 0.1552, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.2668626772742995, |
|
"grad_norm": 1.604293704032898, |
|
"learning_rate": 5.294482758620691e-06, |
|
"loss": 0.1462, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 1.271186440677966, |
|
"grad_norm": 1.8004859685897827, |
|
"learning_rate": 5.277241379310345e-06, |
|
"loss": 0.1426, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.2755102040816326, |
|
"grad_norm": 1.3034578561782837, |
|
"learning_rate": 5.2600000000000005e-06, |
|
"loss": 0.1324, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 1.2798339674852992, |
|
"grad_norm": 2.3090808391571045, |
|
"learning_rate": 5.242758620689655e-06, |
|
"loss": 0.1403, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.2841577308889658, |
|
"grad_norm": 1.5892201662063599, |
|
"learning_rate": 5.225517241379311e-06, |
|
"loss": 0.141, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 1.2884814942926324, |
|
"grad_norm": 1.6891568899154663, |
|
"learning_rate": 5.208275862068966e-06, |
|
"loss": 0.1521, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.292805257696299, |
|
"grad_norm": 2.103891134262085, |
|
"learning_rate": 5.191034482758621e-06, |
|
"loss": 0.1409, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 1.2971290210999653, |
|
"grad_norm": 1.9179925918579102, |
|
"learning_rate": 5.173793103448276e-06, |
|
"loss": 0.1413, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3014527845036319, |
|
"grad_norm": 2.1257266998291016, |
|
"learning_rate": 5.156551724137931e-06, |
|
"loss": 0.1383, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 1.3057765479072985, |
|
"grad_norm": 1.5119720697402954, |
|
"learning_rate": 5.139310344827587e-06, |
|
"loss": 0.1411, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.310100311310965, |
|
"grad_norm": 1.8337161540985107, |
|
"learning_rate": 5.122068965517242e-06, |
|
"loss": 0.1397, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 1.3144240747146316, |
|
"grad_norm": 1.5722588300704956, |
|
"learning_rate": 5.104827586206898e-06, |
|
"loss": 0.1395, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.3187478381182982, |
|
"grad_norm": 1.2701337337493896, |
|
"learning_rate": 5.088275862068965e-06, |
|
"loss": 0.1481, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 1.3230716015219648, |
|
"grad_norm": 1.6665078401565552, |
|
"learning_rate": 5.071034482758621e-06, |
|
"loss": 0.1369, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.3273953649256311, |
|
"grad_norm": 2.2191314697265625, |
|
"learning_rate": 5.053793103448277e-06, |
|
"loss": 0.1356, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 1.331719128329298, |
|
"grad_norm": 1.6571834087371826, |
|
"learning_rate": 5.036551724137932e-06, |
|
"loss": 0.1443, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.3360428917329643, |
|
"grad_norm": 1.906327724456787, |
|
"learning_rate": 5.019310344827587e-06, |
|
"loss": 0.1386, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 1.340366655136631, |
|
"grad_norm": 2.03704571723938, |
|
"learning_rate": 5.002068965517241e-06, |
|
"loss": 0.1465, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.3446904185402975, |
|
"grad_norm": 1.2263180017471313, |
|
"learning_rate": 4.984827586206897e-06, |
|
"loss": 0.1491, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 1.349014181943964, |
|
"grad_norm": 1.9075984954833984, |
|
"learning_rate": 4.967586206896552e-06, |
|
"loss": 0.146, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.3533379453476306, |
|
"grad_norm": 1.754158616065979, |
|
"learning_rate": 4.950344827586207e-06, |
|
"loss": 0.1499, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 1.357661708751297, |
|
"grad_norm": 1.525426983833313, |
|
"learning_rate": 4.933103448275863e-06, |
|
"loss": 0.1372, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.3619854721549638, |
|
"grad_norm": 1.7515060901641846, |
|
"learning_rate": 4.9158620689655175e-06, |
|
"loss": 0.1423, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 1.3663092355586302, |
|
"grad_norm": 1.675724983215332, |
|
"learning_rate": 4.898620689655173e-06, |
|
"loss": 0.1372, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.3706329989622967, |
|
"grad_norm": 1.8081847429275513, |
|
"learning_rate": 4.881379310344828e-06, |
|
"loss": 0.15, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 1.3749567623659633, |
|
"grad_norm": 2.3439128398895264, |
|
"learning_rate": 4.864137931034483e-06, |
|
"loss": 0.1512, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.37928052576963, |
|
"grad_norm": 1.5367904901504517, |
|
"learning_rate": 4.846896551724139e-06, |
|
"loss": 0.1343, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 1.3836042891732965, |
|
"grad_norm": 1.7868484258651733, |
|
"learning_rate": 4.8296551724137935e-06, |
|
"loss": 0.1521, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.3836042891732965, |
|
"eval_loss": 0.16171179711818695, |
|
"eval_runtime": 18373.8485, |
|
"eval_samples_per_second": 1.259, |
|
"eval_steps_per_second": 0.157, |
|
"eval_wer": 22.230552425221457, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.387928052576963, |
|
"grad_norm": 2.016526699066162, |
|
"learning_rate": 4.812413793103448e-06, |
|
"loss": 0.1319, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 1.3922518159806296, |
|
"grad_norm": 1.9844586849212646, |
|
"learning_rate": 4.795172413793104e-06, |
|
"loss": 0.1467, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.396575579384296, |
|
"grad_norm": 1.9286713600158691, |
|
"learning_rate": 4.777931034482759e-06, |
|
"loss": 0.1409, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 1.4008993427879626, |
|
"grad_norm": 1.6599113941192627, |
|
"learning_rate": 4.760689655172414e-06, |
|
"loss": 0.1451, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.4052231061916292, |
|
"grad_norm": 1.4539234638214111, |
|
"learning_rate": 4.7434482758620696e-06, |
|
"loss": 0.1381, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 1.4095468695952957, |
|
"grad_norm": 1.7355366945266724, |
|
"learning_rate": 4.726206896551724e-06, |
|
"loss": 0.1468, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.4138706329989623, |
|
"grad_norm": 1.8770480155944824, |
|
"learning_rate": 4.708965517241379e-06, |
|
"loss": 0.1434, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 1.418194396402629, |
|
"grad_norm": 1.5144529342651367, |
|
"learning_rate": 4.691724137931035e-06, |
|
"loss": 0.1342, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.4225181598062955, |
|
"grad_norm": 1.4679632186889648, |
|
"learning_rate": 4.67448275862069e-06, |
|
"loss": 0.1417, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 1.4268419232099618, |
|
"grad_norm": 1.7771397829055786, |
|
"learning_rate": 4.657241379310346e-06, |
|
"loss": 0.1406, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.4311656866136286, |
|
"grad_norm": 2.567934989929199, |
|
"learning_rate": 4.6400000000000005e-06, |
|
"loss": 0.1457, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 1.435489450017295, |
|
"grad_norm": 1.4760676622390747, |
|
"learning_rate": 4.622758620689655e-06, |
|
"loss": 0.1496, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.4398132134209616, |
|
"grad_norm": 1.6052241325378418, |
|
"learning_rate": 4.605517241379311e-06, |
|
"loss": 0.1409, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 1.4441369768246282, |
|
"grad_norm": 1.5678763389587402, |
|
"learning_rate": 4.588275862068966e-06, |
|
"loss": 0.1347, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.4484607402282947, |
|
"grad_norm": 1.3863381147384644, |
|
"learning_rate": 4.571034482758621e-06, |
|
"loss": 0.1374, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 1.4527845036319613, |
|
"grad_norm": 2.31950306892395, |
|
"learning_rate": 4.5537931034482765e-06, |
|
"loss": 0.147, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.4571082670356277, |
|
"grad_norm": 2.1464667320251465, |
|
"learning_rate": 4.536551724137931e-06, |
|
"loss": 0.1434, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 1.4614320304392945, |
|
"grad_norm": 1.6955801248550415, |
|
"learning_rate": 4.519310344827586e-06, |
|
"loss": 0.1463, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.4657557938429608, |
|
"grad_norm": 1.6823946237564087, |
|
"learning_rate": 4.502068965517242e-06, |
|
"loss": 0.1458, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 1.4700795572466274, |
|
"grad_norm": 1.947957992553711, |
|
"learning_rate": 4.484827586206897e-06, |
|
"loss": 0.1342, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.474403320650294, |
|
"grad_norm": 1.645462989807129, |
|
"learning_rate": 4.467586206896552e-06, |
|
"loss": 0.1379, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 1.4787270840539606, |
|
"grad_norm": 1.5544453859329224, |
|
"learning_rate": 4.4503448275862074e-06, |
|
"loss": 0.151, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.4830508474576272, |
|
"grad_norm": 1.968682050704956, |
|
"learning_rate": 4.433103448275862e-06, |
|
"loss": 0.143, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 1.4873746108612937, |
|
"grad_norm": 1.315320372581482, |
|
"learning_rate": 4.415862068965517e-06, |
|
"loss": 0.1493, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.4916983742649603, |
|
"grad_norm": 1.9570213556289673, |
|
"learning_rate": 4.398620689655173e-06, |
|
"loss": 0.1431, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 1.4960221376686267, |
|
"grad_norm": 1.6048710346221924, |
|
"learning_rate": 4.381379310344829e-06, |
|
"loss": 0.1446, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.5003459010722935, |
|
"grad_norm": 1.7305105924606323, |
|
"learning_rate": 4.3641379310344835e-06, |
|
"loss": 0.142, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 1.5046696644759598, |
|
"grad_norm": 1.682835340499878, |
|
"learning_rate": 4.346896551724138e-06, |
|
"loss": 0.1497, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.5089934278796264, |
|
"grad_norm": 1.6621623039245605, |
|
"learning_rate": 4.329655172413793e-06, |
|
"loss": 0.138, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 1.513317191283293, |
|
"grad_norm": 1.8892101049423218, |
|
"learning_rate": 4.312413793103449e-06, |
|
"loss": 0.1322, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.5176409546869594, |
|
"grad_norm": 1.942769169807434, |
|
"learning_rate": 4.295172413793104e-06, |
|
"loss": 0.1521, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 1.5219647180906262, |
|
"grad_norm": 2.032823085784912, |
|
"learning_rate": 4.277931034482759e-06, |
|
"loss": 0.151, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.5262884814942925, |
|
"grad_norm": 2.2061808109283447, |
|
"learning_rate": 4.260689655172414e-06, |
|
"loss": 0.1523, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 1.5306122448979593, |
|
"grad_norm": 1.5324435234069824, |
|
"learning_rate": 4.243448275862069e-06, |
|
"loss": 0.1422, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.5349360083016257, |
|
"grad_norm": 2.379418134689331, |
|
"learning_rate": 4.226206896551724e-06, |
|
"loss": 0.1376, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 1.5392597717052923, |
|
"grad_norm": 2.124983072280884, |
|
"learning_rate": 4.20896551724138e-06, |
|
"loss": 0.1449, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.5435835351089588, |
|
"grad_norm": 1.7968913316726685, |
|
"learning_rate": 4.191724137931035e-06, |
|
"loss": 0.1469, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 1.5479072985126254, |
|
"grad_norm": 1.7448081970214844, |
|
"learning_rate": 4.17448275862069e-06, |
|
"loss": 0.137, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.552231061916292, |
|
"grad_norm": 1.6165127754211426, |
|
"learning_rate": 4.157241379310345e-06, |
|
"loss": 0.1435, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 1.5565548253199584, |
|
"grad_norm": 1.9623734951019287, |
|
"learning_rate": 4.14e-06, |
|
"loss": 0.1411, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.5565548253199584, |
|
"eval_loss": 0.16016647219657898, |
|
"eval_runtime": 18381.9716, |
|
"eval_samples_per_second": 1.258, |
|
"eval_steps_per_second": 0.157, |
|
"eval_wer": 21.488565624948748, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.5608785887236252, |
|
"grad_norm": 2.1674628257751465, |
|
"learning_rate": 4.122758620689655e-06, |
|
"loss": 0.133, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 1.5652023521272915, |
|
"grad_norm": 2.140751600265503, |
|
"learning_rate": 4.105517241379311e-06, |
|
"loss": 0.138, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.569526115530958, |
|
"grad_norm": 2.360790252685547, |
|
"learning_rate": 4.0882758620689665e-06, |
|
"loss": 0.1556, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 1.5738498789346247, |
|
"grad_norm": 2.2368650436401367, |
|
"learning_rate": 4.071034482758621e-06, |
|
"loss": 0.1361, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.5781736423382913, |
|
"grad_norm": 1.6915383338928223, |
|
"learning_rate": 4.053793103448276e-06, |
|
"loss": 0.1355, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 1.5824974057419579, |
|
"grad_norm": 1.6061946153640747, |
|
"learning_rate": 4.036551724137931e-06, |
|
"loss": 0.1429, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.5868211691456242, |
|
"grad_norm": 2.3573153018951416, |
|
"learning_rate": 4.019310344827587e-06, |
|
"loss": 0.1514, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 1.591144932549291, |
|
"grad_norm": 1.658203363418579, |
|
"learning_rate": 4.002068965517242e-06, |
|
"loss": 0.1253, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.5954686959529574, |
|
"grad_norm": 1.5806010961532593, |
|
"learning_rate": 3.9848275862068965e-06, |
|
"loss": 0.135, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 1.599792459356624, |
|
"grad_norm": 1.6775792837142944, |
|
"learning_rate": 3.967586206896552e-06, |
|
"loss": 0.1393, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.6041162227602905, |
|
"grad_norm": 1.632627010345459, |
|
"learning_rate": 3.950344827586207e-06, |
|
"loss": 0.1441, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 1.6084399861639571, |
|
"grad_norm": 1.3760111331939697, |
|
"learning_rate": 3.933103448275862e-06, |
|
"loss": 0.1344, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.6127637495676237, |
|
"grad_norm": 1.8090389966964722, |
|
"learning_rate": 3.915862068965518e-06, |
|
"loss": 0.1469, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 1.61708751297129, |
|
"grad_norm": 1.6850309371948242, |
|
"learning_rate": 3.898620689655173e-06, |
|
"loss": 0.1361, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.6214112763749569, |
|
"grad_norm": 1.616658329963684, |
|
"learning_rate": 3.8813793103448275e-06, |
|
"loss": 0.143, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 1.6257350397786232, |
|
"grad_norm": 1.5689523220062256, |
|
"learning_rate": 3.864137931034483e-06, |
|
"loss": 0.1452, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.63005880318229, |
|
"grad_norm": 1.563894510269165, |
|
"learning_rate": 3.846896551724138e-06, |
|
"loss": 0.1326, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 1.6343825665859564, |
|
"grad_norm": 1.7194911241531372, |
|
"learning_rate": 3.829655172413793e-06, |
|
"loss": 0.1446, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.638706329989623, |
|
"grad_norm": 1.3777263164520264, |
|
"learning_rate": 3.8124137931034486e-06, |
|
"loss": 0.1438, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 1.6430300933932895, |
|
"grad_norm": 1.9295512437820435, |
|
"learning_rate": 3.795172413793104e-06, |
|
"loss": 0.1358, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.6473538567969561, |
|
"grad_norm": 1.6866202354431152, |
|
"learning_rate": 3.7779310344827592e-06, |
|
"loss": 0.1422, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 1.6516776202006227, |
|
"grad_norm": 1.8246145248413086, |
|
"learning_rate": 3.760689655172414e-06, |
|
"loss": 0.1366, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.656001383604289, |
|
"grad_norm": 1.933508276939392, |
|
"learning_rate": 3.7434482758620694e-06, |
|
"loss": 0.1384, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 1.6603251470079559, |
|
"grad_norm": 1.7554436922073364, |
|
"learning_rate": 3.7262068965517247e-06, |
|
"loss": 0.1409, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.6646489104116222, |
|
"grad_norm": 1.4804601669311523, |
|
"learning_rate": 3.7089655172413795e-06, |
|
"loss": 0.1372, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 1.6689726738152888, |
|
"grad_norm": 1.9584331512451172, |
|
"learning_rate": 3.691724137931035e-06, |
|
"loss": 0.1435, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.6732964372189554, |
|
"grad_norm": 1.8187692165374756, |
|
"learning_rate": 3.67448275862069e-06, |
|
"loss": 0.1403, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 1.677620200622622, |
|
"grad_norm": 2.358635187149048, |
|
"learning_rate": 3.657241379310345e-06, |
|
"loss": 0.1438, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.6819439640262885, |
|
"grad_norm": 2.2827279567718506, |
|
"learning_rate": 3.6400000000000003e-06, |
|
"loss": 0.1335, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 1.686267727429955, |
|
"grad_norm": 1.7171095609664917, |
|
"learning_rate": 3.622758620689655e-06, |
|
"loss": 0.1406, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.6905914908336217, |
|
"grad_norm": 1.9003287553787231, |
|
"learning_rate": 3.6055172413793105e-06, |
|
"loss": 0.1428, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 1.6332672834396362, |
|
"learning_rate": 3.5882758620689658e-06, |
|
"loss": 0.1487, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.6992390176409546, |
|
"grad_norm": 1.782727599143982, |
|
"learning_rate": 3.5710344827586206e-06, |
|
"loss": 0.1341, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 1.7035627810446212, |
|
"grad_norm": 1.5744534730911255, |
|
"learning_rate": 3.553793103448276e-06, |
|
"loss": 0.1447, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.7078865444482878, |
|
"grad_norm": 1.8412572145462036, |
|
"learning_rate": 3.5365517241379316e-06, |
|
"loss": 0.1382, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 1.7122103078519544, |
|
"grad_norm": 1.9548972845077515, |
|
"learning_rate": 3.5193103448275865e-06, |
|
"loss": 0.1312, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.7165340712556207, |
|
"grad_norm": 1.7109405994415283, |
|
"learning_rate": 3.502068965517242e-06, |
|
"loss": 0.1421, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 1.7208578346592875, |
|
"grad_norm": 1.8661736249923706, |
|
"learning_rate": 3.484827586206897e-06, |
|
"loss": 0.1429, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.725181598062954, |
|
"grad_norm": 1.750954508781433, |
|
"learning_rate": 3.467586206896552e-06, |
|
"loss": 0.1445, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 1.7295053614666207, |
|
"grad_norm": 1.9543379545211792, |
|
"learning_rate": 3.4503448275862073e-06, |
|
"loss": 0.1401, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7295053614666207, |
|
"eval_loss": 0.15686531364917755, |
|
"eval_runtime": 18349.489, |
|
"eval_samples_per_second": 1.26, |
|
"eval_steps_per_second": 0.158, |
|
"eval_wer": 21.496062807648066, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.733829124870287, |
|
"grad_norm": 1.7155358791351318, |
|
"learning_rate": 3.4331034482758626e-06, |
|
"loss": 0.1358, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 1.7381528882739536, |
|
"grad_norm": 1.5235016345977783, |
|
"learning_rate": 3.4158620689655174e-06, |
|
"loss": 0.144, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.7424766516776202, |
|
"grad_norm": 1.7004233598709106, |
|
"learning_rate": 3.3986206896551727e-06, |
|
"loss": 0.1422, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 1.7468004150812868, |
|
"grad_norm": 1.533207654953003, |
|
"learning_rate": 3.381379310344828e-06, |
|
"loss": 0.1431, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.7511241784849534, |
|
"grad_norm": 1.442386507987976, |
|
"learning_rate": 3.364827586206897e-06, |
|
"loss": 0.1435, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 1.7554479418886197, |
|
"grad_norm": 1.8642257452011108, |
|
"learning_rate": 3.347586206896552e-06, |
|
"loss": 0.1433, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.7597717052922865, |
|
"grad_norm": 2.0965137481689453, |
|
"learning_rate": 3.330344827586207e-06, |
|
"loss": 0.1455, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 1.764095468695953, |
|
"grad_norm": 1.3066208362579346, |
|
"learning_rate": 3.3131034482758624e-06, |
|
"loss": 0.141, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.7684192320996195, |
|
"grad_norm": 1.3160147666931152, |
|
"learning_rate": 3.2958620689655173e-06, |
|
"loss": 0.1357, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 1.772742995503286, |
|
"grad_norm": 1.9913936853408813, |
|
"learning_rate": 3.2786206896551726e-06, |
|
"loss": 0.1466, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.7770667589069526, |
|
"grad_norm": 1.4349515438079834, |
|
"learning_rate": 3.261379310344828e-06, |
|
"loss": 0.1408, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 1.7813905223106192, |
|
"grad_norm": 2.0493996143341064, |
|
"learning_rate": 3.2441379310344828e-06, |
|
"loss": 0.1454, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 2.072068214416504, |
|
"learning_rate": 3.226896551724138e-06, |
|
"loss": 0.1307, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 1.7900380491179524, |
|
"grad_norm": 1.3982826471328735, |
|
"learning_rate": 3.209655172413793e-06, |
|
"loss": 0.1376, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.7943618125216187, |
|
"grad_norm": 2.1408965587615967, |
|
"learning_rate": 3.1924137931034486e-06, |
|
"loss": 0.1453, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 1.7986855759252853, |
|
"grad_norm": 1.740058183670044, |
|
"learning_rate": 3.175172413793104e-06, |
|
"loss": 0.1467, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.803009339328952, |
|
"grad_norm": 1.626065969467163, |
|
"learning_rate": 3.1579310344827592e-06, |
|
"loss": 0.1426, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 1.8073331027326185, |
|
"grad_norm": 1.758325219154358, |
|
"learning_rate": 3.140689655172414e-06, |
|
"loss": 0.1421, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.811656866136285, |
|
"grad_norm": 3.0554895401000977, |
|
"learning_rate": 3.1234482758620694e-06, |
|
"loss": 0.1512, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 1.8159806295399514, |
|
"grad_norm": 2.0043108463287354, |
|
"learning_rate": 3.1062068965517243e-06, |
|
"loss": 0.1477, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.8203043929436182, |
|
"grad_norm": 1.9781348705291748, |
|
"learning_rate": 3.0889655172413796e-06, |
|
"loss": 0.1434, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 1.8246281563472846, |
|
"grad_norm": 1.9845682382583618, |
|
"learning_rate": 3.071724137931035e-06, |
|
"loss": 0.1371, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.8289519197509514, |
|
"grad_norm": 1.7266494035720825, |
|
"learning_rate": 3.0544827586206897e-06, |
|
"loss": 0.1414, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 1.8332756831546178, |
|
"grad_norm": 1.6237975358963013, |
|
"learning_rate": 3.037241379310345e-06, |
|
"loss": 0.1451, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.8375994465582843, |
|
"grad_norm": 1.3192580938339233, |
|
"learning_rate": 3.0200000000000003e-06, |
|
"loss": 0.1335, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 1.841923209961951, |
|
"grad_norm": 1.5643882751464844, |
|
"learning_rate": 3.002758620689655e-06, |
|
"loss": 0.14, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.8462469733656173, |
|
"grad_norm": 2.649275779724121, |
|
"learning_rate": 2.9855172413793105e-06, |
|
"loss": 0.1412, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 1.850570736769284, |
|
"grad_norm": 2.3274197578430176, |
|
"learning_rate": 2.9682758620689658e-06, |
|
"loss": 0.1486, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.8548945001729504, |
|
"grad_norm": 2.0510830879211426, |
|
"learning_rate": 2.9510344827586206e-06, |
|
"loss": 0.1295, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 1.8592182635766172, |
|
"grad_norm": 1.8213071823120117, |
|
"learning_rate": 2.933793103448276e-06, |
|
"loss": 0.1326, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.8635420269802836, |
|
"grad_norm": 1.182237148284912, |
|
"learning_rate": 2.9165517241379316e-06, |
|
"loss": 0.1402, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 1.8678657903839502, |
|
"grad_norm": 1.5509198904037476, |
|
"learning_rate": 2.8993103448275865e-06, |
|
"loss": 0.1299, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.8721895537876168, |
|
"grad_norm": 1.480782151222229, |
|
"learning_rate": 2.882068965517242e-06, |
|
"loss": 0.1445, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 1.8765133171912833, |
|
"grad_norm": 1.7314891815185547, |
|
"learning_rate": 2.864827586206897e-06, |
|
"loss": 0.1332, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.88083708059495, |
|
"grad_norm": 1.5692538022994995, |
|
"learning_rate": 2.847586206896552e-06, |
|
"loss": 0.1425, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 1.8851608439986163, |
|
"grad_norm": 1.927616000175476, |
|
"learning_rate": 2.8303448275862073e-06, |
|
"loss": 0.1386, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.889484607402283, |
|
"grad_norm": 1.9353783130645752, |
|
"learning_rate": 2.813103448275862e-06, |
|
"loss": 0.1418, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 1.8938083708059494, |
|
"grad_norm": 1.5706393718719482, |
|
"learning_rate": 2.7958620689655174e-06, |
|
"loss": 0.1316, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.898132134209616, |
|
"grad_norm": 1.642783522605896, |
|
"learning_rate": 2.7786206896551727e-06, |
|
"loss": 0.1392, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 1.9024558976132826, |
|
"grad_norm": 1.7343003749847412, |
|
"learning_rate": 2.7613793103448276e-06, |
|
"loss": 0.148, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.9024558976132826, |
|
"eval_loss": 0.1548272669315338, |
|
"eval_runtime": 18698.2371, |
|
"eval_samples_per_second": 1.237, |
|
"eval_steps_per_second": 0.155, |
|
"eval_wer": 21.28684455294534, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.9067796610169492, |
|
"grad_norm": 1.9196803569793701, |
|
"learning_rate": 2.744137931034483e-06, |
|
"loss": 0.1343, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 1.9111034244206158, |
|
"grad_norm": 2.108469009399414, |
|
"learning_rate": 2.726896551724138e-06, |
|
"loss": 0.146, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.9154271878242821, |
|
"grad_norm": 1.4290471076965332, |
|
"learning_rate": 2.709655172413793e-06, |
|
"loss": 0.1428, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 1.919750951227949, |
|
"grad_norm": 2.097700357437134, |
|
"learning_rate": 2.6924137931034483e-06, |
|
"loss": 0.137, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.9240747146316153, |
|
"grad_norm": 1.7768304347991943, |
|
"learning_rate": 2.6751724137931036e-06, |
|
"loss": 0.1339, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 1.9283984780352819, |
|
"grad_norm": 2.3340935707092285, |
|
"learning_rate": 2.6579310344827585e-06, |
|
"loss": 0.1499, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.9327222414389484, |
|
"grad_norm": 1.390179991722107, |
|
"learning_rate": 2.6406896551724142e-06, |
|
"loss": 0.1332, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 1.937046004842615, |
|
"grad_norm": 1.4288243055343628, |
|
"learning_rate": 2.6234482758620695e-06, |
|
"loss": 0.1378, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.9413697682462816, |
|
"grad_norm": 1.7294844388961792, |
|
"learning_rate": 2.6062068965517244e-06, |
|
"loss": 0.1456, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 1.945693531649948, |
|
"grad_norm": 2.5070643424987793, |
|
"learning_rate": 2.5889655172413797e-06, |
|
"loss": 0.1351, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.9500172950536148, |
|
"grad_norm": 1.9430601596832275, |
|
"learning_rate": 2.571724137931035e-06, |
|
"loss": 0.1373, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 1.9543410584572811, |
|
"grad_norm": 1.5814850330352783, |
|
"learning_rate": 2.55448275862069e-06, |
|
"loss": 0.1423, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.958664821860948, |
|
"grad_norm": 2.2843282222747803, |
|
"learning_rate": 2.537241379310345e-06, |
|
"loss": 0.1426, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 1.9629885852646143, |
|
"grad_norm": 1.528397798538208, |
|
"learning_rate": 2.52e-06, |
|
"loss": 0.1362, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.9673123486682809, |
|
"grad_norm": 1.5325922966003418, |
|
"learning_rate": 2.5027586206896553e-06, |
|
"loss": 0.1348, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 1.9716361120719474, |
|
"grad_norm": 1.3210896253585815, |
|
"learning_rate": 2.4855172413793106e-06, |
|
"loss": 0.1399, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.975959875475614, |
|
"grad_norm": 1.7425906658172607, |
|
"learning_rate": 2.4682758620689655e-06, |
|
"loss": 0.1347, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 1.9802836388792806, |
|
"grad_norm": 1.4508990049362183, |
|
"learning_rate": 2.4510344827586208e-06, |
|
"loss": 0.1361, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.984607402282947, |
|
"grad_norm": 1.4730141162872314, |
|
"learning_rate": 2.433793103448276e-06, |
|
"loss": 0.147, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 1.9889311656866138, |
|
"grad_norm": 1.602019190788269, |
|
"learning_rate": 2.4165517241379314e-06, |
|
"loss": 0.1363, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.9932549290902801, |
|
"grad_norm": 1.9487652778625488, |
|
"learning_rate": 2.3993103448275866e-06, |
|
"loss": 0.1371, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 1.9975786924939467, |
|
"grad_norm": 1.735405445098877, |
|
"learning_rate": 2.3820689655172415e-06, |
|
"loss": 0.142, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.0019024558976133, |
|
"grad_norm": 1.195746898651123, |
|
"learning_rate": 2.364827586206897e-06, |
|
"loss": 0.134, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 2.0062262193012796, |
|
"grad_norm": 1.7679691314697266, |
|
"learning_rate": 2.3475862068965517e-06, |
|
"loss": 0.1181, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.0105499827049464, |
|
"grad_norm": 1.606373906135559, |
|
"learning_rate": 2.330344827586207e-06, |
|
"loss": 0.1136, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 2.014873746108613, |
|
"grad_norm": 1.4573360681533813, |
|
"learning_rate": 2.3131034482758623e-06, |
|
"loss": 0.1146, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.0191975095122796, |
|
"grad_norm": 1.7319608926773071, |
|
"learning_rate": 2.2958620689655176e-06, |
|
"loss": 0.1073, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 2.023521272915946, |
|
"grad_norm": 1.566738247871399, |
|
"learning_rate": 2.278620689655173e-06, |
|
"loss": 0.1185, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.0278450363196128, |
|
"grad_norm": 1.3787413835525513, |
|
"learning_rate": 2.2613793103448277e-06, |
|
"loss": 0.1142, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 2.032168799723279, |
|
"grad_norm": 1.364129662513733, |
|
"learning_rate": 2.244137931034483e-06, |
|
"loss": 0.1221, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.0364925631269455, |
|
"grad_norm": 1.4195500612258911, |
|
"learning_rate": 2.2268965517241383e-06, |
|
"loss": 0.1086, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 1.7290923595428467, |
|
"learning_rate": 2.209655172413793e-06, |
|
"loss": 0.1236, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.0451400899342786, |
|
"grad_norm": 1.8055263757705688, |
|
"learning_rate": 2.1924137931034485e-06, |
|
"loss": 0.1052, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 2.0494638533379455, |
|
"grad_norm": 1.3701517581939697, |
|
"learning_rate": 2.1751724137931033e-06, |
|
"loss": 0.1104, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.053787616741612, |
|
"grad_norm": 1.7283467054367065, |
|
"learning_rate": 2.157931034482759e-06, |
|
"loss": 0.119, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 2.0581113801452786, |
|
"grad_norm": 1.6004694700241089, |
|
"learning_rate": 2.140689655172414e-06, |
|
"loss": 0.1173, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.062435143548945, |
|
"grad_norm": 2.361544609069824, |
|
"learning_rate": 2.1234482758620692e-06, |
|
"loss": 0.1154, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 2.0667589069526118, |
|
"grad_norm": 1.6328535079956055, |
|
"learning_rate": 2.1062068965517245e-06, |
|
"loss": 0.1128, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.071082670356278, |
|
"grad_norm": 1.546014666557312, |
|
"learning_rate": 2.0889655172413794e-06, |
|
"loss": 0.1102, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 2.0754064337599445, |
|
"grad_norm": 1.43386709690094, |
|
"learning_rate": 2.0717241379310347e-06, |
|
"loss": 0.1024, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.0754064337599445, |
|
"eval_loss": 0.16041299700737, |
|
"eval_runtime": 18346.2572, |
|
"eval_samples_per_second": 1.261, |
|
"eval_steps_per_second": 0.158, |
|
"eval_wer": 21.351273466767566, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.0797301971636113, |
|
"grad_norm": 1.259810209274292, |
|
"learning_rate": 2.0544827586206896e-06, |
|
"loss": 0.1054, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 2.0840539605672777, |
|
"grad_norm": 1.7551687955856323, |
|
"learning_rate": 2.037241379310345e-06, |
|
"loss": 0.1084, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.0883777239709445, |
|
"grad_norm": 1.4865624904632568, |
|
"learning_rate": 2.02e-06, |
|
"loss": 0.1148, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 2.092701487374611, |
|
"grad_norm": 1.7182704210281372, |
|
"learning_rate": 2.0027586206896554e-06, |
|
"loss": 0.1089, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.0970252507782776, |
|
"grad_norm": 1.8586798906326294, |
|
"learning_rate": 1.9855172413793107e-06, |
|
"loss": 0.1058, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 2.101349014181944, |
|
"grad_norm": 1.1890976428985596, |
|
"learning_rate": 1.9682758620689656e-06, |
|
"loss": 0.1118, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.1056727775856103, |
|
"grad_norm": 1.7096284627914429, |
|
"learning_rate": 1.951034482758621e-06, |
|
"loss": 0.1178, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 2.109996540989277, |
|
"grad_norm": 1.6265876293182373, |
|
"learning_rate": 1.933793103448276e-06, |
|
"loss": 0.11, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.1143203043929435, |
|
"grad_norm": 2.068101406097412, |
|
"learning_rate": 1.916551724137931e-06, |
|
"loss": 0.1129, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 2.1186440677966103, |
|
"grad_norm": 1.4093377590179443, |
|
"learning_rate": 1.8993103448275864e-06, |
|
"loss": 0.1033, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.1229678312002767, |
|
"grad_norm": 1.716834306716919, |
|
"learning_rate": 1.8820689655172416e-06, |
|
"loss": 0.1118, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 2.1272915946039435, |
|
"grad_norm": 1.614517331123352, |
|
"learning_rate": 1.8648275862068967e-06, |
|
"loss": 0.1123, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.13161535800761, |
|
"grad_norm": 1.609824299812317, |
|
"learning_rate": 1.847586206896552e-06, |
|
"loss": 0.1135, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 2.135939121411276, |
|
"grad_norm": 1.4824578762054443, |
|
"learning_rate": 1.830344827586207e-06, |
|
"loss": 0.1154, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.140262884814943, |
|
"grad_norm": 2.31526255607605, |
|
"learning_rate": 1.8131034482758622e-06, |
|
"loss": 0.1102, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 2.1445866482186093, |
|
"grad_norm": 1.5895624160766602, |
|
"learning_rate": 1.7958620689655173e-06, |
|
"loss": 0.1114, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.148910411622276, |
|
"grad_norm": 1.8020554780960083, |
|
"learning_rate": 1.7786206896551726e-06, |
|
"loss": 0.114, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 2.1532341750259425, |
|
"grad_norm": 1.7379206418991089, |
|
"learning_rate": 1.7613793103448276e-06, |
|
"loss": 0.1074, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.1575579384296093, |
|
"grad_norm": 1.5234774351119995, |
|
"learning_rate": 1.744137931034483e-06, |
|
"loss": 0.1049, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 2.1618817018332757, |
|
"grad_norm": 2.315284252166748, |
|
"learning_rate": 1.7268965517241382e-06, |
|
"loss": 0.1187, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.166205465236942, |
|
"grad_norm": 1.5843135118484497, |
|
"learning_rate": 1.7096551724137933e-06, |
|
"loss": 0.1099, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 2.170529228640609, |
|
"grad_norm": 1.4398502111434937, |
|
"learning_rate": 1.6924137931034484e-06, |
|
"loss": 0.1179, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.174852992044275, |
|
"grad_norm": 1.6261566877365112, |
|
"learning_rate": 1.6751724137931037e-06, |
|
"loss": 0.1109, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 2.179176755447942, |
|
"grad_norm": 1.493456244468689, |
|
"learning_rate": 1.6579310344827588e-06, |
|
"loss": 0.1087, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.1835005188516083, |
|
"grad_norm": 2.314145565032959, |
|
"learning_rate": 1.6406896551724138e-06, |
|
"loss": 0.1092, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 2.187824282255275, |
|
"grad_norm": 1.5329914093017578, |
|
"learning_rate": 1.623448275862069e-06, |
|
"loss": 0.1179, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.1921480456589415, |
|
"grad_norm": 1.8789763450622559, |
|
"learning_rate": 1.6062068965517244e-06, |
|
"loss": 0.1146, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 2.196471809062608, |
|
"grad_norm": 1.6287965774536133, |
|
"learning_rate": 1.5889655172413795e-06, |
|
"loss": 0.1076, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.2007955724662747, |
|
"grad_norm": 1.3174418210983276, |
|
"learning_rate": 1.5717241379310346e-06, |
|
"loss": 0.1042, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 2.205119335869941, |
|
"grad_norm": 1.481208086013794, |
|
"learning_rate": 1.55448275862069e-06, |
|
"loss": 0.1113, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.209443099273608, |
|
"grad_norm": 1.5273900032043457, |
|
"learning_rate": 1.537241379310345e-06, |
|
"loss": 0.1101, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 2.213766862677274, |
|
"grad_norm": 1.648422122001648, |
|
"learning_rate": 1.52e-06, |
|
"loss": 0.1079, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.218090626080941, |
|
"grad_norm": 1.5516725778579712, |
|
"learning_rate": 1.5027586206896551e-06, |
|
"loss": 0.1147, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 2.2224143894846073, |
|
"grad_norm": 1.68115234375, |
|
"learning_rate": 1.4855172413793104e-06, |
|
"loss": 0.1066, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.226738152888274, |
|
"grad_norm": 1.784679889678955, |
|
"learning_rate": 1.4682758620689657e-06, |
|
"loss": 0.1118, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 2.2310619162919405, |
|
"grad_norm": 1.5086143016815186, |
|
"learning_rate": 1.4510344827586208e-06, |
|
"loss": 0.1175, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.235385679695607, |
|
"grad_norm": 1.4072906970977783, |
|
"learning_rate": 1.433793103448276e-06, |
|
"loss": 0.111, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 2.2397094430992737, |
|
"grad_norm": 1.4439597129821777, |
|
"learning_rate": 1.4165517241379312e-06, |
|
"loss": 0.111, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.24403320650294, |
|
"grad_norm": 1.474888563156128, |
|
"learning_rate": 1.3993103448275863e-06, |
|
"loss": 0.1121, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 2.248356969906607, |
|
"grad_norm": 2.2483158111572266, |
|
"learning_rate": 1.3820689655172416e-06, |
|
"loss": 0.1145, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.248356969906607, |
|
"eval_loss": 0.16119933128356934, |
|
"eval_runtime": 18308.4763, |
|
"eval_samples_per_second": 1.263, |
|
"eval_steps_per_second": 0.158, |
|
"eval_wer": 21.170638221105975, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.252680733310273, |
|
"grad_norm": 1.6092015504837036, |
|
"learning_rate": 1.3648275862068966e-06, |
|
"loss": 0.1097, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 2.25700449671394, |
|
"grad_norm": 1.7606579065322876, |
|
"learning_rate": 1.3475862068965517e-06, |
|
"loss": 0.1175, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.2613282601176063, |
|
"grad_norm": 1.5152302980422974, |
|
"learning_rate": 1.3303448275862072e-06, |
|
"loss": 0.1093, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 2.265652023521273, |
|
"grad_norm": 1.5567739009857178, |
|
"learning_rate": 1.3131034482758623e-06, |
|
"loss": 0.1033, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.2699757869249395, |
|
"grad_norm": 1.738932728767395, |
|
"learning_rate": 1.2958620689655174e-06, |
|
"loss": 0.115, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 2.274299550328606, |
|
"grad_norm": 1.2959973812103271, |
|
"learning_rate": 1.2786206896551725e-06, |
|
"loss": 0.1073, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.2786233137322727, |
|
"grad_norm": 1.391763687133789, |
|
"learning_rate": 1.2613793103448278e-06, |
|
"loss": 0.1045, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 2.282947077135939, |
|
"grad_norm": 1.8664268255233765, |
|
"learning_rate": 1.2441379310344829e-06, |
|
"loss": 0.1146, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.287270840539606, |
|
"grad_norm": 1.708479642868042, |
|
"learning_rate": 1.226896551724138e-06, |
|
"loss": 0.1067, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 2.291594603943272, |
|
"grad_norm": 1.6440658569335938, |
|
"learning_rate": 1.2096551724137932e-06, |
|
"loss": 0.1202, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.295918367346939, |
|
"grad_norm": 1.3780643939971924, |
|
"learning_rate": 1.1924137931034483e-06, |
|
"loss": 0.1183, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 2.3002421307506054, |
|
"grad_norm": 2.0071065425872803, |
|
"learning_rate": 1.1751724137931036e-06, |
|
"loss": 0.1133, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.3045658941542717, |
|
"grad_norm": 1.6017673015594482, |
|
"learning_rate": 1.1579310344827587e-06, |
|
"loss": 0.1087, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 2.3088896575579385, |
|
"grad_norm": 1.8712215423583984, |
|
"learning_rate": 1.140689655172414e-06, |
|
"loss": 0.1165, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.313213420961605, |
|
"grad_norm": 2.241171360015869, |
|
"learning_rate": 1.123448275862069e-06, |
|
"loss": 0.1041, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 2.3175371843652717, |
|
"grad_norm": 1.7374755144119263, |
|
"learning_rate": 1.1062068965517241e-06, |
|
"loss": 0.1088, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.321860947768938, |
|
"grad_norm": 1.387485384941101, |
|
"learning_rate": 1.0889655172413794e-06, |
|
"loss": 0.1124, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 2.326184711172605, |
|
"grad_norm": 1.7821033000946045, |
|
"learning_rate": 1.0717241379310345e-06, |
|
"loss": 0.1129, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.330508474576271, |
|
"grad_norm": 1.8013248443603516, |
|
"learning_rate": 1.0544827586206898e-06, |
|
"loss": 0.1113, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 2.3348322379799376, |
|
"grad_norm": 1.7627577781677246, |
|
"learning_rate": 1.0372413793103449e-06, |
|
"loss": 0.106, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.3391560013836044, |
|
"grad_norm": 1.4727102518081665, |
|
"learning_rate": 1.02e-06, |
|
"loss": 0.119, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 2.3434797647872707, |
|
"grad_norm": 2.0880470275878906, |
|
"learning_rate": 1.0027586206896553e-06, |
|
"loss": 0.1099, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.3478035281909375, |
|
"grad_norm": 1.6722257137298584, |
|
"learning_rate": 9.855172413793104e-07, |
|
"loss": 0.1176, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 2.352127291594604, |
|
"grad_norm": 1.5856317281723022, |
|
"learning_rate": 9.682758620689656e-07, |
|
"loss": 0.1108, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.3564510549982707, |
|
"grad_norm": 1.4923266172409058, |
|
"learning_rate": 9.510344827586207e-07, |
|
"loss": 0.1098, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 2.360774818401937, |
|
"grad_norm": 1.6731258630752563, |
|
"learning_rate": 9.33793103448276e-07, |
|
"loss": 0.1129, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.3650985818056034, |
|
"grad_norm": 1.5794440507888794, |
|
"learning_rate": 9.165517241379311e-07, |
|
"loss": 0.1132, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 2.36942234520927, |
|
"grad_norm": 1.9833779335021973, |
|
"learning_rate": 8.993103448275863e-07, |
|
"loss": 0.1104, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.3737461086129366, |
|
"grad_norm": 1.929689645767212, |
|
"learning_rate": 8.820689655172414e-07, |
|
"loss": 0.1113, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 2.3780698720166034, |
|
"grad_norm": 1.2566806077957153, |
|
"learning_rate": 8.648275862068967e-07, |
|
"loss": 0.1086, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.3823936354202697, |
|
"grad_norm": 2.132885694503784, |
|
"learning_rate": 8.475862068965517e-07, |
|
"loss": 0.1077, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 2.3867173988239365, |
|
"grad_norm": 1.7326260805130005, |
|
"learning_rate": 8.303448275862069e-07, |
|
"loss": 0.1083, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.391041162227603, |
|
"grad_norm": 1.7849270105361938, |
|
"learning_rate": 8.131034482758621e-07, |
|
"loss": 0.1059, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 2.3953649256312692, |
|
"grad_norm": 1.3285590410232544, |
|
"learning_rate": 7.958620689655173e-07, |
|
"loss": 0.0987, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.399688689034936, |
|
"grad_norm": 1.6088476181030273, |
|
"learning_rate": 7.786206896551725e-07, |
|
"loss": 0.1097, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 2.4040124524386024, |
|
"grad_norm": 1.5900739431381226, |
|
"learning_rate": 7.613793103448276e-07, |
|
"loss": 0.107, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.408336215842269, |
|
"grad_norm": 1.4366368055343628, |
|
"learning_rate": 7.441379310344828e-07, |
|
"loss": 0.1087, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 2.4126599792459356, |
|
"grad_norm": 1.583687424659729, |
|
"learning_rate": 7.268965517241381e-07, |
|
"loss": 0.109, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.4169837426496024, |
|
"grad_norm": 1.6272701025009155, |
|
"learning_rate": 7.096551724137931e-07, |
|
"loss": 0.1087, |
|
"step": 13975 |
|
}, |
|
{ |
|
"epoch": 2.4213075060532687, |
|
"grad_norm": 1.6933766603469849, |
|
"learning_rate": 6.924137931034483e-07, |
|
"loss": 0.1051, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.4213075060532687, |
|
"eval_loss": 0.16103602945804596, |
|
"eval_runtime": 18381.9917, |
|
"eval_samples_per_second": 1.258, |
|
"eval_steps_per_second": 0.157, |
|
"eval_wer": 21.26997589187188, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.425631269456935, |
|
"grad_norm": 1.5084308385849, |
|
"learning_rate": 6.751724137931034e-07, |
|
"loss": 0.1108, |
|
"step": 14025 |
|
}, |
|
{ |
|
"epoch": 2.429955032860602, |
|
"grad_norm": 1.3734257221221924, |
|
"learning_rate": 6.579310344827587e-07, |
|
"loss": 0.107, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 2.4342787962642682, |
|
"grad_norm": 2.01283597946167, |
|
"learning_rate": 6.406896551724139e-07, |
|
"loss": 0.1044, |
|
"step": 14075 |
|
}, |
|
{ |
|
"epoch": 2.438602559667935, |
|
"grad_norm": 1.3206173181533813, |
|
"learning_rate": 6.23448275862069e-07, |
|
"loss": 0.1031, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.4429263230716014, |
|
"grad_norm": 1.6226475238800049, |
|
"learning_rate": 6.062068965517242e-07, |
|
"loss": 0.1068, |
|
"step": 14125 |
|
}, |
|
{ |
|
"epoch": 2.447250086475268, |
|
"grad_norm": 1.8544663190841675, |
|
"learning_rate": 5.896551724137931e-07, |
|
"loss": 0.1134, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 2.4515738498789346, |
|
"grad_norm": 1.9135141372680664, |
|
"learning_rate": 5.724137931034483e-07, |
|
"loss": 0.1076, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 2.4558976132826014, |
|
"grad_norm": 1.9522300958633423, |
|
"learning_rate": 5.551724137931035e-07, |
|
"loss": 0.1028, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.4602213766862677, |
|
"grad_norm": 1.5151312351226807, |
|
"learning_rate": 5.379310344827587e-07, |
|
"loss": 0.1047, |
|
"step": 14225 |
|
}, |
|
{ |
|
"epoch": 2.4645451400899345, |
|
"grad_norm": 1.2128667831420898, |
|
"learning_rate": 5.206896551724138e-07, |
|
"loss": 0.1055, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.468868903493601, |
|
"grad_norm": 1.6889070272445679, |
|
"learning_rate": 5.03448275862069e-07, |
|
"loss": 0.1086, |
|
"step": 14275 |
|
}, |
|
{ |
|
"epoch": 2.4731926668972672, |
|
"grad_norm": 1.5473207235336304, |
|
"learning_rate": 4.862068965517241e-07, |
|
"loss": 0.1132, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.477516430300934, |
|
"grad_norm": 1.570099949836731, |
|
"learning_rate": 4.6896551724137934e-07, |
|
"loss": 0.108, |
|
"step": 14325 |
|
}, |
|
{ |
|
"epoch": 2.4818401937046004, |
|
"grad_norm": 1.4723212718963623, |
|
"learning_rate": 4.5172413793103447e-07, |
|
"loss": 0.1115, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 2.486163957108267, |
|
"grad_norm": 1.4830899238586426, |
|
"learning_rate": 4.344827586206897e-07, |
|
"loss": 0.1164, |
|
"step": 14375 |
|
}, |
|
{ |
|
"epoch": 2.4904877205119336, |
|
"grad_norm": 1.7449450492858887, |
|
"learning_rate": 4.1724137931034485e-07, |
|
"loss": 0.109, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.4948114839156004, |
|
"grad_norm": 1.74076247215271, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.1015, |
|
"step": 14425 |
|
}, |
|
{ |
|
"epoch": 2.4991352473192667, |
|
"grad_norm": 1.4947025775909424, |
|
"learning_rate": 3.8275862068965517e-07, |
|
"loss": 0.1071, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 2.503459010722933, |
|
"grad_norm": 1.2934141159057617, |
|
"learning_rate": 3.6551724137931036e-07, |
|
"loss": 0.1041, |
|
"step": 14475 |
|
}, |
|
{ |
|
"epoch": 2.5077827741266, |
|
"grad_norm": 2.003370761871338, |
|
"learning_rate": 3.4827586206896555e-07, |
|
"loss": 0.106, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.5121065375302662, |
|
"grad_norm": 1.6628522872924805, |
|
"learning_rate": 3.3103448275862073e-07, |
|
"loss": 0.1109, |
|
"step": 14525 |
|
}, |
|
{ |
|
"epoch": 2.516430300933933, |
|
"grad_norm": 2.323209762573242, |
|
"learning_rate": 3.1379310344827587e-07, |
|
"loss": 0.1114, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 2.5207540643375994, |
|
"grad_norm": 1.4119327068328857, |
|
"learning_rate": 2.9655172413793106e-07, |
|
"loss": 0.107, |
|
"step": 14575 |
|
}, |
|
{ |
|
"epoch": 2.525077827741266, |
|
"grad_norm": 1.6572582721710205, |
|
"learning_rate": 2.7931034482758624e-07, |
|
"loss": 0.111, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.5294015911449326, |
|
"grad_norm": 1.4858754873275757, |
|
"learning_rate": 2.6206896551724143e-07, |
|
"loss": 0.0951, |
|
"step": 14625 |
|
}, |
|
{ |
|
"epoch": 2.533725354548599, |
|
"grad_norm": 1.6584004163742065, |
|
"learning_rate": 2.4482758620689657e-07, |
|
"loss": 0.1018, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 2.5380491179522657, |
|
"grad_norm": 1.3879257440567017, |
|
"learning_rate": 2.2758620689655175e-07, |
|
"loss": 0.1113, |
|
"step": 14675 |
|
}, |
|
{ |
|
"epoch": 2.542372881355932, |
|
"grad_norm": 1.7336797714233398, |
|
"learning_rate": 2.1034482758620692e-07, |
|
"loss": 0.1016, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.546696644759599, |
|
"grad_norm": 1.9225796461105347, |
|
"learning_rate": 1.931034482758621e-07, |
|
"loss": 0.1199, |
|
"step": 14725 |
|
}, |
|
{ |
|
"epoch": 2.5510204081632653, |
|
"grad_norm": 1.6256892681121826, |
|
"learning_rate": 1.7586206896551726e-07, |
|
"loss": 0.1047, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.555344171566932, |
|
"grad_norm": 1.2848526239395142, |
|
"learning_rate": 1.5862068965517243e-07, |
|
"loss": 0.1045, |
|
"step": 14775 |
|
}, |
|
{ |
|
"epoch": 2.5596679349705984, |
|
"grad_norm": 1.3589074611663818, |
|
"learning_rate": 1.413793103448276e-07, |
|
"loss": 0.1063, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.5639916983742648, |
|
"grad_norm": 1.3591971397399902, |
|
"learning_rate": 1.2413793103448277e-07, |
|
"loss": 0.11, |
|
"step": 14825 |
|
}, |
|
{ |
|
"epoch": 2.5683154617779316, |
|
"grad_norm": 1.975306510925293, |
|
"learning_rate": 1.0689655172413794e-07, |
|
"loss": 0.1082, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 2.572639225181598, |
|
"grad_norm": 1.4814668893814087, |
|
"learning_rate": 8.965517241379311e-08, |
|
"loss": 0.1126, |
|
"step": 14875 |
|
}, |
|
{ |
|
"epoch": 2.5769629885852647, |
|
"grad_norm": 1.5567821264266968, |
|
"learning_rate": 7.241379310344829e-08, |
|
"loss": 0.1082, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.581286751988931, |
|
"grad_norm": 1.6796619892120361, |
|
"learning_rate": 5.517241379310345e-08, |
|
"loss": 0.1098, |
|
"step": 14925 |
|
}, |
|
{ |
|
"epoch": 2.585610515392598, |
|
"grad_norm": 1.6661121845245361, |
|
"learning_rate": 3.793103448275862e-08, |
|
"loss": 0.1075, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 2.5899342787962643, |
|
"grad_norm": 1.9763529300689697, |
|
"learning_rate": 2.0689655172413796e-08, |
|
"loss": 0.1095, |
|
"step": 14975 |
|
}, |
|
{ |
|
"epoch": 2.5942580421999306, |
|
"grad_norm": 1.9784560203552246, |
|
"learning_rate": 3.4482758620689654e-09, |
|
"loss": 0.1159, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.5942580421999306, |
|
"eval_loss": 0.16035248339176178, |
|
"eval_runtime": 18431.1546, |
|
"eval_samples_per_second": 1.255, |
|
"eval_steps_per_second": 0.157, |
|
"eval_wer": 21.29949604875043, |
|
"step": 15000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 15000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.4492272449683456e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|