whisper-medium-vaani-hindi / trainer_state.json
SujithPulikodan's picture
Upload 11 files
41b6e8a verified
{
"best_metric": 21.170638221105975,
"best_model_checkpoint": "./whisper-medium-hi/checkpoint-13000",
"epoch": 2.5942580421999306,
"eval_steps": 1000,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004323763403666551,
"grad_norm": 7.115922451019287,
"learning_rate": 4.800000000000001e-07,
"loss": 0.7628,
"step": 25
},
{
"epoch": 0.008647526807333102,
"grad_norm": 5.3593902587890625,
"learning_rate": 9.800000000000001e-07,
"loss": 0.6367,
"step": 50
},
{
"epoch": 0.012971290210999653,
"grad_norm": 12.570939064025879,
"learning_rate": 1.48e-06,
"loss": 0.5008,
"step": 75
},
{
"epoch": 0.017295053614666205,
"grad_norm": 4.155623912811279,
"learning_rate": 1.98e-06,
"loss": 0.4183,
"step": 100
},
{
"epoch": 0.021618817018332757,
"grad_norm": 3.200021982192993,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.3569,
"step": 125
},
{
"epoch": 0.025942580421999307,
"grad_norm": 4.024964809417725,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.379,
"step": 150
},
{
"epoch": 0.03026634382566586,
"grad_norm": 5.151554107666016,
"learning_rate": 3.48e-06,
"loss": 0.3407,
"step": 175
},
{
"epoch": 0.03459010722933241,
"grad_norm": 3.2938315868377686,
"learning_rate": 3.980000000000001e-06,
"loss": 0.3222,
"step": 200
},
{
"epoch": 0.03891387063299896,
"grad_norm": 4.498330593109131,
"learning_rate": 4.48e-06,
"loss": 0.3147,
"step": 225
},
{
"epoch": 0.043237634036665515,
"grad_norm": 3.980656385421753,
"learning_rate": 4.980000000000001e-06,
"loss": 0.2994,
"step": 250
},
{
"epoch": 0.04756139744033207,
"grad_norm": 4.725027084350586,
"learning_rate": 5.480000000000001e-06,
"loss": 0.2797,
"step": 275
},
{
"epoch": 0.051885160843998614,
"grad_norm": 4.188190937042236,
"learning_rate": 5.98e-06,
"loss": 0.2981,
"step": 300
},
{
"epoch": 0.05620892424766517,
"grad_norm": 3.800278425216675,
"learning_rate": 6.480000000000001e-06,
"loss": 0.285,
"step": 325
},
{
"epoch": 0.06053268765133172,
"grad_norm": 3.427295207977295,
"learning_rate": 6.98e-06,
"loss": 0.2853,
"step": 350
},
{
"epoch": 0.06485645105499827,
"grad_norm": 3.704080104827881,
"learning_rate": 7.48e-06,
"loss": 0.2714,
"step": 375
},
{
"epoch": 0.06918021445866482,
"grad_norm": 4.518213272094727,
"learning_rate": 7.980000000000002e-06,
"loss": 0.2637,
"step": 400
},
{
"epoch": 0.07350397786233137,
"grad_norm": 3.9395949840545654,
"learning_rate": 8.48e-06,
"loss": 0.2698,
"step": 425
},
{
"epoch": 0.07782774126599792,
"grad_norm": 3.8880250453948975,
"learning_rate": 8.98e-06,
"loss": 0.28,
"step": 450
},
{
"epoch": 0.08215150466966448,
"grad_norm": 3.307865858078003,
"learning_rate": 9.48e-06,
"loss": 0.259,
"step": 475
},
{
"epoch": 0.08647526807333103,
"grad_norm": 3.322256565093994,
"learning_rate": 9.980000000000001e-06,
"loss": 0.2505,
"step": 500
},
{
"epoch": 0.09079903147699758,
"grad_norm": 4.282881736755371,
"learning_rate": 9.98344827586207e-06,
"loss": 0.249,
"step": 525
},
{
"epoch": 0.09512279488066414,
"grad_norm": 2.616466522216797,
"learning_rate": 9.966206896551724e-06,
"loss": 0.2422,
"step": 550
},
{
"epoch": 0.09944655828433069,
"grad_norm": 3.2629432678222656,
"learning_rate": 9.94896551724138e-06,
"loss": 0.2617,
"step": 575
},
{
"epoch": 0.10377032168799723,
"grad_norm": 3.622666597366333,
"learning_rate": 9.931724137931036e-06,
"loss": 0.2627,
"step": 600
},
{
"epoch": 0.10809408509166378,
"grad_norm": 3.1837680339813232,
"learning_rate": 9.91448275862069e-06,
"loss": 0.2418,
"step": 625
},
{
"epoch": 0.11241784849533033,
"grad_norm": 4.119836807250977,
"learning_rate": 9.897241379310345e-06,
"loss": 0.2588,
"step": 650
},
{
"epoch": 0.11674161189899689,
"grad_norm": 3.3661813735961914,
"learning_rate": 9.88e-06,
"loss": 0.2401,
"step": 675
},
{
"epoch": 0.12106537530266344,
"grad_norm": 3.551511526107788,
"learning_rate": 9.862758620689657e-06,
"loss": 0.2249,
"step": 700
},
{
"epoch": 0.12538913870632998,
"grad_norm": 3.5980288982391357,
"learning_rate": 9.845517241379312e-06,
"loss": 0.2356,
"step": 725
},
{
"epoch": 0.12971290210999653,
"grad_norm": 3.171095132827759,
"learning_rate": 9.828275862068967e-06,
"loss": 0.2315,
"step": 750
},
{
"epoch": 0.13403666551366308,
"grad_norm": 3.0761499404907227,
"learning_rate": 9.811034482758621e-06,
"loss": 0.2238,
"step": 775
},
{
"epoch": 0.13836042891732964,
"grad_norm": 2.9867141246795654,
"learning_rate": 9.793793103448276e-06,
"loss": 0.2273,
"step": 800
},
{
"epoch": 0.1426841923209962,
"grad_norm": 3.2623560428619385,
"learning_rate": 9.776551724137931e-06,
"loss": 0.2361,
"step": 825
},
{
"epoch": 0.14700795572466274,
"grad_norm": 3.183367967605591,
"learning_rate": 9.759310344827588e-06,
"loss": 0.2301,
"step": 850
},
{
"epoch": 0.1513317191283293,
"grad_norm": 3.199496030807495,
"learning_rate": 9.742068965517243e-06,
"loss": 0.2433,
"step": 875
},
{
"epoch": 0.15565548253199585,
"grad_norm": 2.4695870876312256,
"learning_rate": 9.724827586206897e-06,
"loss": 0.222,
"step": 900
},
{
"epoch": 0.1599792459356624,
"grad_norm": 3.613236427307129,
"learning_rate": 9.707586206896552e-06,
"loss": 0.2337,
"step": 925
},
{
"epoch": 0.16430300933932895,
"grad_norm": 3.4775917530059814,
"learning_rate": 9.690344827586207e-06,
"loss": 0.2243,
"step": 950
},
{
"epoch": 0.1686267727429955,
"grad_norm": 4.184005260467529,
"learning_rate": 9.673103448275862e-06,
"loss": 0.2253,
"step": 975
},
{
"epoch": 0.17295053614666206,
"grad_norm": 2.5935354232788086,
"learning_rate": 9.655862068965519e-06,
"loss": 0.2142,
"step": 1000
},
{
"epoch": 0.17295053614666206,
"eval_loss": 0.21374455094337463,
"eval_runtime": 18340.9505,
"eval_samples_per_second": 1.261,
"eval_steps_per_second": 0.158,
"eval_wer": 25.915886295852886,
"step": 1000
},
{
"epoch": 0.1772742995503286,
"grad_norm": 3.235563039779663,
"learning_rate": 9.638620689655174e-06,
"loss": 0.2267,
"step": 1025
},
{
"epoch": 0.18159806295399517,
"grad_norm": 3.3286187648773193,
"learning_rate": 9.621379310344828e-06,
"loss": 0.2325,
"step": 1050
},
{
"epoch": 0.18592182635766172,
"grad_norm": 3.00522518157959,
"learning_rate": 9.604137931034483e-06,
"loss": 0.2203,
"step": 1075
},
{
"epoch": 0.19024558976132827,
"grad_norm": 3.3868496417999268,
"learning_rate": 9.586896551724138e-06,
"loss": 0.2015,
"step": 1100
},
{
"epoch": 0.19456935316499482,
"grad_norm": 2.8398849964141846,
"learning_rate": 9.569655172413795e-06,
"loss": 0.2255,
"step": 1125
},
{
"epoch": 0.19889311656866138,
"grad_norm": 2.5562820434570312,
"learning_rate": 9.55241379310345e-06,
"loss": 0.2164,
"step": 1150
},
{
"epoch": 0.2032168799723279,
"grad_norm": 2.275176525115967,
"learning_rate": 9.535172413793104e-06,
"loss": 0.2104,
"step": 1175
},
{
"epoch": 0.20754064337599445,
"grad_norm": 3.1678836345672607,
"learning_rate": 9.51793103448276e-06,
"loss": 0.2109,
"step": 1200
},
{
"epoch": 0.211864406779661,
"grad_norm": 3.5133020877838135,
"learning_rate": 9.500689655172414e-06,
"loss": 0.2218,
"step": 1225
},
{
"epoch": 0.21618817018332756,
"grad_norm": 2.210845470428467,
"learning_rate": 9.483448275862069e-06,
"loss": 0.2113,
"step": 1250
},
{
"epoch": 0.2205119335869941,
"grad_norm": 2.5255446434020996,
"learning_rate": 9.466206896551726e-06,
"loss": 0.2115,
"step": 1275
},
{
"epoch": 0.22483569699066067,
"grad_norm": 3.083555221557617,
"learning_rate": 9.44896551724138e-06,
"loss": 0.208,
"step": 1300
},
{
"epoch": 0.22915946039432722,
"grad_norm": 2.392900228500366,
"learning_rate": 9.431724137931035e-06,
"loss": 0.2258,
"step": 1325
},
{
"epoch": 0.23348322379799377,
"grad_norm": 2.5552825927734375,
"learning_rate": 9.41448275862069e-06,
"loss": 0.2023,
"step": 1350
},
{
"epoch": 0.23780698720166032,
"grad_norm": 2.8265864849090576,
"learning_rate": 9.397241379310345e-06,
"loss": 0.2214,
"step": 1375
},
{
"epoch": 0.24213075060532688,
"grad_norm": 3.0209717750549316,
"learning_rate": 9.38e-06,
"loss": 0.217,
"step": 1400
},
{
"epoch": 0.24645451400899343,
"grad_norm": 3.3972790241241455,
"learning_rate": 9.362758620689657e-06,
"loss": 0.2176,
"step": 1425
},
{
"epoch": 0.25077827741265996,
"grad_norm": 2.645206928253174,
"learning_rate": 9.345517241379311e-06,
"loss": 0.2186,
"step": 1450
},
{
"epoch": 0.25510204081632654,
"grad_norm": 2.4541475772857666,
"learning_rate": 9.328275862068966e-06,
"loss": 0.2089,
"step": 1475
},
{
"epoch": 0.25942580421999306,
"grad_norm": 3.789954900741577,
"learning_rate": 9.311034482758621e-06,
"loss": 0.2117,
"step": 1500
},
{
"epoch": 0.26374956762365964,
"grad_norm": 2.3939154148101807,
"learning_rate": 9.293793103448276e-06,
"loss": 0.207,
"step": 1525
},
{
"epoch": 0.26807333102732617,
"grad_norm": 3.0324156284332275,
"learning_rate": 9.276551724137933e-06,
"loss": 0.2276,
"step": 1550
},
{
"epoch": 0.27239709443099275,
"grad_norm": 2.5523455142974854,
"learning_rate": 9.259310344827587e-06,
"loss": 0.2061,
"step": 1575
},
{
"epoch": 0.2767208578346593,
"grad_norm": 2.8267321586608887,
"learning_rate": 9.242068965517242e-06,
"loss": 0.2054,
"step": 1600
},
{
"epoch": 0.28104462123832585,
"grad_norm": 1.9460331201553345,
"learning_rate": 9.224827586206897e-06,
"loss": 0.1974,
"step": 1625
},
{
"epoch": 0.2853683846419924,
"grad_norm": 2.280379056930542,
"learning_rate": 9.207586206896552e-06,
"loss": 0.1978,
"step": 1650
},
{
"epoch": 0.28969214804565896,
"grad_norm": 2.5089499950408936,
"learning_rate": 9.190344827586207e-06,
"loss": 0.2081,
"step": 1675
},
{
"epoch": 0.2940159114493255,
"grad_norm": 2.703047037124634,
"learning_rate": 9.173103448275864e-06,
"loss": 0.2001,
"step": 1700
},
{
"epoch": 0.29833967485299207,
"grad_norm": 2.437891960144043,
"learning_rate": 9.155862068965518e-06,
"loss": 0.2085,
"step": 1725
},
{
"epoch": 0.3026634382566586,
"grad_norm": 2.1032724380493164,
"learning_rate": 9.138620689655173e-06,
"loss": 0.2081,
"step": 1750
},
{
"epoch": 0.30698720166032517,
"grad_norm": 2.795346260070801,
"learning_rate": 9.121379310344828e-06,
"loss": 0.2031,
"step": 1775
},
{
"epoch": 0.3113109650639917,
"grad_norm": 2.2833924293518066,
"learning_rate": 9.104137931034483e-06,
"loss": 0.1887,
"step": 1800
},
{
"epoch": 0.3156347284676583,
"grad_norm": 2.965100049972534,
"learning_rate": 9.08689655172414e-06,
"loss": 0.2158,
"step": 1825
},
{
"epoch": 0.3199584918713248,
"grad_norm": 2.2826411724090576,
"learning_rate": 9.069655172413794e-06,
"loss": 0.2018,
"step": 1850
},
{
"epoch": 0.3242822552749913,
"grad_norm": 2.4495162963867188,
"learning_rate": 9.05241379310345e-06,
"loss": 0.2164,
"step": 1875
},
{
"epoch": 0.3286060186786579,
"grad_norm": 2.1806020736694336,
"learning_rate": 9.035172413793104e-06,
"loss": 0.2039,
"step": 1900
},
{
"epoch": 0.33292978208232443,
"grad_norm": 2.480971574783325,
"learning_rate": 9.017931034482759e-06,
"loss": 0.1978,
"step": 1925
},
{
"epoch": 0.337253545485991,
"grad_norm": 2.4935293197631836,
"learning_rate": 9.000689655172414e-06,
"loss": 0.1989,
"step": 1950
},
{
"epoch": 0.34157730888965754,
"grad_norm": 2.6331419944763184,
"learning_rate": 8.98344827586207e-06,
"loss": 0.2118,
"step": 1975
},
{
"epoch": 0.3459010722933241,
"grad_norm": 2.9479784965515137,
"learning_rate": 8.966206896551725e-06,
"loss": 0.2071,
"step": 2000
},
{
"epoch": 0.3459010722933241,
"eval_loss": 0.18826113641262054,
"eval_runtime": 18564.0039,
"eval_samples_per_second": 1.246,
"eval_steps_per_second": 0.156,
"eval_wer": 24.743280064288342,
"step": 2000
},
{
"epoch": 0.35022483569699064,
"grad_norm": 1.9392203092575073,
"learning_rate": 8.94896551724138e-06,
"loss": 0.1949,
"step": 2025
},
{
"epoch": 0.3545485991006572,
"grad_norm": 2.540531873703003,
"learning_rate": 8.931724137931035e-06,
"loss": 0.1977,
"step": 2050
},
{
"epoch": 0.35887236250432375,
"grad_norm": 2.9909920692443848,
"learning_rate": 8.91448275862069e-06,
"loss": 0.2044,
"step": 2075
},
{
"epoch": 0.36319612590799033,
"grad_norm": 2.2228713035583496,
"learning_rate": 8.897241379310345e-06,
"loss": 0.2024,
"step": 2100
},
{
"epoch": 0.36751988931165686,
"grad_norm": 1.7810205221176147,
"learning_rate": 8.880000000000001e-06,
"loss": 0.1884,
"step": 2125
},
{
"epoch": 0.37184365271532344,
"grad_norm": 1.9215483665466309,
"learning_rate": 8.862758620689656e-06,
"loss": 0.2031,
"step": 2150
},
{
"epoch": 0.37616741611898996,
"grad_norm": 2.5977275371551514,
"learning_rate": 8.845517241379311e-06,
"loss": 0.1986,
"step": 2175
},
{
"epoch": 0.38049117952265654,
"grad_norm": 2.242727518081665,
"learning_rate": 8.828275862068966e-06,
"loss": 0.204,
"step": 2200
},
{
"epoch": 0.38481494292632307,
"grad_norm": 2.6293296813964844,
"learning_rate": 8.811034482758621e-06,
"loss": 0.1922,
"step": 2225
},
{
"epoch": 0.38913870632998965,
"grad_norm": 2.4864461421966553,
"learning_rate": 8.793793103448277e-06,
"loss": 0.2096,
"step": 2250
},
{
"epoch": 0.3934624697336562,
"grad_norm": 2.586041212081909,
"learning_rate": 8.776551724137932e-06,
"loss": 0.2092,
"step": 2275
},
{
"epoch": 0.39778623313732275,
"grad_norm": 2.194061040878296,
"learning_rate": 8.759310344827587e-06,
"loss": 0.1966,
"step": 2300
},
{
"epoch": 0.4021099965409893,
"grad_norm": 2.837115526199341,
"learning_rate": 8.742068965517242e-06,
"loss": 0.1884,
"step": 2325
},
{
"epoch": 0.4064337599446558,
"grad_norm": 2.554459810256958,
"learning_rate": 8.724827586206897e-06,
"loss": 0.1889,
"step": 2350
},
{
"epoch": 0.4107575233483224,
"grad_norm": 2.0888259410858154,
"learning_rate": 8.707586206896552e-06,
"loss": 0.1871,
"step": 2375
},
{
"epoch": 0.4150812867519889,
"grad_norm": 3.076122999191284,
"learning_rate": 8.690344827586208e-06,
"loss": 0.1945,
"step": 2400
},
{
"epoch": 0.4194050501556555,
"grad_norm": 2.9319562911987305,
"learning_rate": 8.673103448275863e-06,
"loss": 0.2083,
"step": 2425
},
{
"epoch": 0.423728813559322,
"grad_norm": 1.6421335935592651,
"learning_rate": 8.655862068965518e-06,
"loss": 0.1914,
"step": 2450
},
{
"epoch": 0.4280525769629886,
"grad_norm": 2.8207969665527344,
"learning_rate": 8.638620689655173e-06,
"loss": 0.1956,
"step": 2475
},
{
"epoch": 0.4323763403666551,
"grad_norm": 2.3561465740203857,
"learning_rate": 8.621379310344828e-06,
"loss": 0.1931,
"step": 2500
},
{
"epoch": 0.4367001037703217,
"grad_norm": 2.1428236961364746,
"learning_rate": 8.604137931034483e-06,
"loss": 0.196,
"step": 2525
},
{
"epoch": 0.4410238671739882,
"grad_norm": 2.463754653930664,
"learning_rate": 8.58689655172414e-06,
"loss": 0.2049,
"step": 2550
},
{
"epoch": 0.4453476305776548,
"grad_norm": 3.0367329120635986,
"learning_rate": 8.569655172413794e-06,
"loss": 0.2048,
"step": 2575
},
{
"epoch": 0.44967139398132133,
"grad_norm": 1.86019766330719,
"learning_rate": 8.552413793103449e-06,
"loss": 0.1966,
"step": 2600
},
{
"epoch": 0.4539951573849879,
"grad_norm": 2.3381924629211426,
"learning_rate": 8.535172413793104e-06,
"loss": 0.1907,
"step": 2625
},
{
"epoch": 0.45831892078865444,
"grad_norm": 1.926013469696045,
"learning_rate": 8.517931034482759e-06,
"loss": 0.1922,
"step": 2650
},
{
"epoch": 0.462642684192321,
"grad_norm": 3.239078998565674,
"learning_rate": 8.500689655172415e-06,
"loss": 0.2048,
"step": 2675
},
{
"epoch": 0.46696644759598754,
"grad_norm": 2.443401336669922,
"learning_rate": 8.48344827586207e-06,
"loss": 0.1868,
"step": 2700
},
{
"epoch": 0.4712902109996541,
"grad_norm": 2.624505043029785,
"learning_rate": 8.466206896551725e-06,
"loss": 0.1953,
"step": 2725
},
{
"epoch": 0.47561397440332065,
"grad_norm": 2.1747653484344482,
"learning_rate": 8.44896551724138e-06,
"loss": 0.2068,
"step": 2750
},
{
"epoch": 0.4799377378069872,
"grad_norm": 2.4387295246124268,
"learning_rate": 8.431724137931035e-06,
"loss": 0.1831,
"step": 2775
},
{
"epoch": 0.48426150121065376,
"grad_norm": 1.7778469324111938,
"learning_rate": 8.41448275862069e-06,
"loss": 0.19,
"step": 2800
},
{
"epoch": 0.4885852646143203,
"grad_norm": 2.475799560546875,
"learning_rate": 8.397241379310346e-06,
"loss": 0.1957,
"step": 2825
},
{
"epoch": 0.49290902801798686,
"grad_norm": 1.8860992193222046,
"learning_rate": 8.380000000000001e-06,
"loss": 0.1963,
"step": 2850
},
{
"epoch": 0.4972327914216534,
"grad_norm": 2.0776500701904297,
"learning_rate": 8.362758620689656e-06,
"loss": 0.1861,
"step": 2875
},
{
"epoch": 0.5015565548253199,
"grad_norm": 2.2878217697143555,
"learning_rate": 8.34551724137931e-06,
"loss": 0.1851,
"step": 2900
},
{
"epoch": 0.5058803182289865,
"grad_norm": 2.424276351928711,
"learning_rate": 8.328275862068966e-06,
"loss": 0.1884,
"step": 2925
},
{
"epoch": 0.5102040816326531,
"grad_norm": 2.194385290145874,
"learning_rate": 8.31103448275862e-06,
"loss": 0.1914,
"step": 2950
},
{
"epoch": 0.5145278450363197,
"grad_norm": 2.2288753986358643,
"learning_rate": 8.293793103448277e-06,
"loss": 0.1961,
"step": 2975
},
{
"epoch": 0.5188516084399861,
"grad_norm": 3.0630691051483154,
"learning_rate": 8.276551724137932e-06,
"loss": 0.1941,
"step": 3000
},
{
"epoch": 0.5188516084399861,
"eval_loss": 0.17789211869239807,
"eval_runtime": 18421.853,
"eval_samples_per_second": 1.255,
"eval_steps_per_second": 0.157,
"eval_wer": 23.17027741918857,
"step": 3000
},
{
"epoch": 0.5231753718436527,
"grad_norm": 2.5339276790618896,
"learning_rate": 8.259310344827587e-06,
"loss": 0.1845,
"step": 3025
},
{
"epoch": 0.5274991352473193,
"grad_norm": 2.1724178791046143,
"learning_rate": 8.242068965517242e-06,
"loss": 0.1775,
"step": 3050
},
{
"epoch": 0.5318228986509859,
"grad_norm": 2.3713173866271973,
"learning_rate": 8.224827586206897e-06,
"loss": 0.1783,
"step": 3075
},
{
"epoch": 0.5361466620546523,
"grad_norm": 2.005528450012207,
"learning_rate": 8.207586206896553e-06,
"loss": 0.1986,
"step": 3100
},
{
"epoch": 0.5404704254583189,
"grad_norm": 1.9392186403274536,
"learning_rate": 8.190344827586208e-06,
"loss": 0.1897,
"step": 3125
},
{
"epoch": 0.5447941888619855,
"grad_norm": 1.6991775035858154,
"learning_rate": 8.173103448275863e-06,
"loss": 0.1992,
"step": 3150
},
{
"epoch": 0.5491179522656521,
"grad_norm": 2.995962381362915,
"learning_rate": 8.155862068965518e-06,
"loss": 0.1867,
"step": 3175
},
{
"epoch": 0.5534417156693185,
"grad_norm": 2.4674553871154785,
"learning_rate": 8.138620689655173e-06,
"loss": 0.1825,
"step": 3200
},
{
"epoch": 0.5577654790729851,
"grad_norm": 1.9179223775863647,
"learning_rate": 8.121379310344828e-06,
"loss": 0.1942,
"step": 3225
},
{
"epoch": 0.5620892424766517,
"grad_norm": 1.9651795625686646,
"learning_rate": 8.104137931034484e-06,
"loss": 0.1836,
"step": 3250
},
{
"epoch": 0.5664130058803182,
"grad_norm": 2.7088942527770996,
"learning_rate": 8.086896551724139e-06,
"loss": 0.1894,
"step": 3275
},
{
"epoch": 0.5707367692839848,
"grad_norm": 2.2452046871185303,
"learning_rate": 8.069655172413794e-06,
"loss": 0.1753,
"step": 3300
},
{
"epoch": 0.5750605326876513,
"grad_norm": 2.9989702701568604,
"learning_rate": 8.052413793103449e-06,
"loss": 0.1837,
"step": 3325
},
{
"epoch": 0.5793842960913179,
"grad_norm": 2.451460599899292,
"learning_rate": 8.035172413793104e-06,
"loss": 0.1856,
"step": 3350
},
{
"epoch": 0.5837080594949844,
"grad_norm": 2.1404428482055664,
"learning_rate": 8.017931034482758e-06,
"loss": 0.1796,
"step": 3375
},
{
"epoch": 0.588031822898651,
"grad_norm": 1.7960869073867798,
"learning_rate": 8.000689655172415e-06,
"loss": 0.181,
"step": 3400
},
{
"epoch": 0.5923555863023175,
"grad_norm": 2.464204788208008,
"learning_rate": 7.98344827586207e-06,
"loss": 0.1784,
"step": 3425
},
{
"epoch": 0.5966793497059841,
"grad_norm": 2.2879879474639893,
"learning_rate": 7.966206896551725e-06,
"loss": 0.1874,
"step": 3450
},
{
"epoch": 0.6010031131096506,
"grad_norm": 2.815308094024658,
"learning_rate": 7.94896551724138e-06,
"loss": 0.19,
"step": 3475
},
{
"epoch": 0.6053268765133172,
"grad_norm": 2.048867702484131,
"learning_rate": 7.931724137931034e-06,
"loss": 0.1803,
"step": 3500
},
{
"epoch": 0.6096506399169838,
"grad_norm": 1.690579891204834,
"learning_rate": 7.914482758620691e-06,
"loss": 0.1857,
"step": 3525
},
{
"epoch": 0.6139744033206503,
"grad_norm": 2.5136778354644775,
"learning_rate": 7.897241379310346e-06,
"loss": 0.1835,
"step": 3550
},
{
"epoch": 0.6182981667243168,
"grad_norm": 1.5882636308670044,
"learning_rate": 7.88e-06,
"loss": 0.1858,
"step": 3575
},
{
"epoch": 0.6226219301279834,
"grad_norm": 1.7922052145004272,
"learning_rate": 7.862758620689656e-06,
"loss": 0.175,
"step": 3600
},
{
"epoch": 0.62694569353165,
"grad_norm": 2.2753283977508545,
"learning_rate": 7.84551724137931e-06,
"loss": 0.1844,
"step": 3625
},
{
"epoch": 0.6312694569353166,
"grad_norm": 2.2595114707946777,
"learning_rate": 7.828275862068965e-06,
"loss": 0.1863,
"step": 3650
},
{
"epoch": 0.635593220338983,
"grad_norm": 2.442291498184204,
"learning_rate": 7.811034482758622e-06,
"loss": 0.1703,
"step": 3675
},
{
"epoch": 0.6399169837426496,
"grad_norm": 2.492649555206299,
"learning_rate": 7.793793103448277e-06,
"loss": 0.1717,
"step": 3700
},
{
"epoch": 0.6442407471463162,
"grad_norm": 2.1932718753814697,
"learning_rate": 7.776551724137932e-06,
"loss": 0.1821,
"step": 3725
},
{
"epoch": 0.6485645105499827,
"grad_norm": 2.30263090133667,
"learning_rate": 7.759310344827587e-06,
"loss": 0.1837,
"step": 3750
},
{
"epoch": 0.6528882739536492,
"grad_norm": 1.7888277769088745,
"learning_rate": 7.742068965517241e-06,
"loss": 0.1788,
"step": 3775
},
{
"epoch": 0.6572120373573158,
"grad_norm": 2.500505208969116,
"learning_rate": 7.724827586206896e-06,
"loss": 0.1901,
"step": 3800
},
{
"epoch": 0.6615358007609824,
"grad_norm": 1.7561728954315186,
"learning_rate": 7.707586206896553e-06,
"loss": 0.1718,
"step": 3825
},
{
"epoch": 0.6658595641646489,
"grad_norm": 2.2992682456970215,
"learning_rate": 7.690344827586208e-06,
"loss": 0.1831,
"step": 3850
},
{
"epoch": 0.6701833275683154,
"grad_norm": 2.2881760597229004,
"learning_rate": 7.673103448275863e-06,
"loss": 0.1848,
"step": 3875
},
{
"epoch": 0.674507090971982,
"grad_norm": 2.236409902572632,
"learning_rate": 7.655862068965517e-06,
"loss": 0.1916,
"step": 3900
},
{
"epoch": 0.6788308543756486,
"grad_norm": 2.074676513671875,
"learning_rate": 7.638620689655172e-06,
"loss": 0.1783,
"step": 3925
},
{
"epoch": 0.6831546177793151,
"grad_norm": 1.740821361541748,
"learning_rate": 7.621379310344829e-06,
"loss": 0.1695,
"step": 3950
},
{
"epoch": 0.6874783811829817,
"grad_norm": 1.9029902219772339,
"learning_rate": 7.604137931034483e-06,
"loss": 0.194,
"step": 3975
},
{
"epoch": 0.6918021445866482,
"grad_norm": 2.5968997478485107,
"learning_rate": 7.586896551724139e-06,
"loss": 0.1883,
"step": 4000
},
{
"epoch": 0.6918021445866482,
"eval_loss": 0.17140337824821472,
"eval_runtime": 18369.4452,
"eval_samples_per_second": 1.259,
"eval_steps_per_second": 0.157,
"eval_wer": 23.089682705170947,
"step": 4000
},
{
"epoch": 0.6961259079903148,
"grad_norm": 1.9076557159423828,
"learning_rate": 7.5696551724137935e-06,
"loss": 0.1955,
"step": 4025
},
{
"epoch": 0.7004496713939813,
"grad_norm": 1.8503438234329224,
"learning_rate": 7.552413793103449e-06,
"loss": 0.1626,
"step": 4050
},
{
"epoch": 0.7047734347976479,
"grad_norm": 2.7024850845336914,
"learning_rate": 7.535172413793104e-06,
"loss": 0.1704,
"step": 4075
},
{
"epoch": 0.7090971982013144,
"grad_norm": 1.7515462636947632,
"learning_rate": 7.517931034482759e-06,
"loss": 0.1721,
"step": 4100
},
{
"epoch": 0.713420961604981,
"grad_norm": 1.8947151899337769,
"learning_rate": 7.500689655172414e-06,
"loss": 0.1727,
"step": 4125
},
{
"epoch": 0.7177447250086475,
"grad_norm": 1.7755450010299683,
"learning_rate": 7.48344827586207e-06,
"loss": 0.184,
"step": 4150
},
{
"epoch": 0.7220684884123141,
"grad_norm": 2.1575655937194824,
"learning_rate": 7.4662068965517244e-06,
"loss": 0.1812,
"step": 4175
},
{
"epoch": 0.7263922518159807,
"grad_norm": 2.710113048553467,
"learning_rate": 7.44896551724138e-06,
"loss": 0.1828,
"step": 4200
},
{
"epoch": 0.7307160152196471,
"grad_norm": 2.426133155822754,
"learning_rate": 7.431724137931036e-06,
"loss": 0.186,
"step": 4225
},
{
"epoch": 0.7350397786233137,
"grad_norm": 2.672506332397461,
"learning_rate": 7.41448275862069e-06,
"loss": 0.1896,
"step": 4250
},
{
"epoch": 0.7393635420269803,
"grad_norm": 1.9614557027816772,
"learning_rate": 7.397241379310346e-06,
"loss": 0.1918,
"step": 4275
},
{
"epoch": 0.7436873054306469,
"grad_norm": 2.511770486831665,
"learning_rate": 7.3800000000000005e-06,
"loss": 0.1976,
"step": 4300
},
{
"epoch": 0.7480110688343133,
"grad_norm": 2.0811338424682617,
"learning_rate": 7.362758620689656e-06,
"loss": 0.1893,
"step": 4325
},
{
"epoch": 0.7523348322379799,
"grad_norm": 2.0760035514831543,
"learning_rate": 7.345517241379311e-06,
"loss": 0.1761,
"step": 4350
},
{
"epoch": 0.7566585956416465,
"grad_norm": 2.1201930046081543,
"learning_rate": 7.328275862068967e-06,
"loss": 0.1898,
"step": 4375
},
{
"epoch": 0.7609823590453131,
"grad_norm": 1.741500735282898,
"learning_rate": 7.311034482758621e-06,
"loss": 0.176,
"step": 4400
},
{
"epoch": 0.7653061224489796,
"grad_norm": 2.043604612350464,
"learning_rate": 7.2937931034482765e-06,
"loss": 0.1718,
"step": 4425
},
{
"epoch": 0.7696298858526461,
"grad_norm": Infinity,
"learning_rate": 7.277241379310346e-06,
"loss": 0.1929,
"step": 4450
},
{
"epoch": 0.7739536492563127,
"grad_norm": 2.5208499431610107,
"learning_rate": 7.260000000000001e-06,
"loss": 0.2019,
"step": 4475
},
{
"epoch": 0.7782774126599793,
"grad_norm": 1.9209998846054077,
"learning_rate": 7.242758620689656e-06,
"loss": 0.1739,
"step": 4500
},
{
"epoch": 0.7826011760636458,
"grad_norm": 1.8282277584075928,
"learning_rate": 7.2255172413793105e-06,
"loss": 0.1733,
"step": 4525
},
{
"epoch": 0.7869249394673123,
"grad_norm": 2.2943856716156006,
"learning_rate": 7.208275862068966e-06,
"loss": 0.1831,
"step": 4550
},
{
"epoch": 0.7912487028709789,
"grad_norm": 1.9153530597686768,
"learning_rate": 7.191034482758621e-06,
"loss": 0.1868,
"step": 4575
},
{
"epoch": 0.7955724662746455,
"grad_norm": 2.1466894149780273,
"learning_rate": 7.173793103448277e-06,
"loss": 0.1727,
"step": 4600
},
{
"epoch": 0.799896229678312,
"grad_norm": 1.8446885347366333,
"learning_rate": 7.156551724137931e-06,
"loss": 0.1801,
"step": 4625
},
{
"epoch": 0.8042199930819786,
"grad_norm": 1.8949089050292969,
"learning_rate": 7.139310344827587e-06,
"loss": 0.1825,
"step": 4650
},
{
"epoch": 0.8085437564856451,
"grad_norm": 1.7999627590179443,
"learning_rate": 7.1220689655172414e-06,
"loss": 0.1801,
"step": 4675
},
{
"epoch": 0.8128675198893116,
"grad_norm": 2.10203218460083,
"learning_rate": 7.104827586206897e-06,
"loss": 0.1696,
"step": 4700
},
{
"epoch": 0.8171912832929782,
"grad_norm": 1.9392683506011963,
"learning_rate": 7.087586206896553e-06,
"loss": 0.1779,
"step": 4725
},
{
"epoch": 0.8215150466966448,
"grad_norm": 2.000790596008301,
"learning_rate": 7.070344827586208e-06,
"loss": 0.179,
"step": 4750
},
{
"epoch": 0.8258388101003113,
"grad_norm": 2.079643964767456,
"learning_rate": 7.0531034482758635e-06,
"loss": 0.184,
"step": 4775
},
{
"epoch": 0.8301625735039778,
"grad_norm": 1.8971096277236938,
"learning_rate": 7.0358620689655175e-06,
"loss": 0.1638,
"step": 4800
},
{
"epoch": 0.8344863369076444,
"grad_norm": 1.8511512279510498,
"learning_rate": 7.018620689655173e-06,
"loss": 0.1696,
"step": 4825
},
{
"epoch": 0.838810100311311,
"grad_norm": 2.439633369445801,
"learning_rate": 7.001379310344828e-06,
"loss": 0.187,
"step": 4850
},
{
"epoch": 0.8431338637149776,
"grad_norm": 1.5714433193206787,
"learning_rate": 6.984137931034484e-06,
"loss": 0.1722,
"step": 4875
},
{
"epoch": 0.847457627118644,
"grad_norm": 1.7673485279083252,
"learning_rate": 6.966896551724139e-06,
"loss": 0.1702,
"step": 4900
},
{
"epoch": 0.8517813905223106,
"grad_norm": 1.4075311422348022,
"learning_rate": 6.9496551724137935e-06,
"loss": 0.1726,
"step": 4925
},
{
"epoch": 0.8561051539259772,
"grad_norm": 1.8928003311157227,
"learning_rate": 6.932413793103448e-06,
"loss": 0.1798,
"step": 4950
},
{
"epoch": 0.8604289173296438,
"grad_norm": 2.118661403656006,
"learning_rate": 6.915172413793104e-06,
"loss": 0.1812,
"step": 4975
},
{
"epoch": 0.8647526807333102,
"grad_norm": 2.0651979446411133,
"learning_rate": 6.897931034482759e-06,
"loss": 0.1929,
"step": 5000
},
{
"epoch": 0.8647526807333102,
"eval_loss": 0.1684178113937378,
"eval_runtime": 18447.2626,
"eval_samples_per_second": 1.254,
"eval_steps_per_second": 0.157,
"eval_wer": 22.593462925260116,
"step": 5000
},
{
"epoch": 0.8690764441369768,
"grad_norm": 1.4254800081253052,
"learning_rate": 6.880689655172415e-06,
"loss": 0.1829,
"step": 5025
},
{
"epoch": 0.8734002075406434,
"grad_norm": 1.8314497470855713,
"learning_rate": 6.863448275862069e-06,
"loss": 0.1804,
"step": 5050
},
{
"epoch": 0.8777239709443099,
"grad_norm": 1.9438308477401733,
"learning_rate": 6.8462068965517245e-06,
"loss": 0.1889,
"step": 5075
},
{
"epoch": 0.8820477343479765,
"grad_norm": 2.4102623462677,
"learning_rate": 6.828965517241379e-06,
"loss": 0.1725,
"step": 5100
},
{
"epoch": 0.886371497751643,
"grad_norm": 2.181542158126831,
"learning_rate": 6.811724137931035e-06,
"loss": 0.1953,
"step": 5125
},
{
"epoch": 0.8906952611553096,
"grad_norm": 1.6767735481262207,
"learning_rate": 6.794482758620691e-06,
"loss": 0.1654,
"step": 5150
},
{
"epoch": 0.8950190245589761,
"grad_norm": 2.3569998741149902,
"learning_rate": 6.777241379310346e-06,
"loss": 0.1765,
"step": 5175
},
{
"epoch": 0.8993427879626427,
"grad_norm": 2.1700599193573,
"learning_rate": 6.760000000000001e-06,
"loss": 0.1845,
"step": 5200
},
{
"epoch": 0.9036665513663092,
"grad_norm": 1.6862359046936035,
"learning_rate": 6.742758620689655e-06,
"loss": 0.1714,
"step": 5225
},
{
"epoch": 0.9079903147699758,
"grad_norm": 1.861148715019226,
"learning_rate": 6.725517241379311e-06,
"loss": 0.1718,
"step": 5250
},
{
"epoch": 0.9123140781736423,
"grad_norm": 1.8183972835540771,
"learning_rate": 6.708275862068966e-06,
"loss": 0.1694,
"step": 5275
},
{
"epoch": 0.9166378415773089,
"grad_norm": 1.633947730064392,
"learning_rate": 6.691034482758622e-06,
"loss": 0.1756,
"step": 5300
},
{
"epoch": 0.9209616049809755,
"grad_norm": 1.9196300506591797,
"learning_rate": 6.6737931034482765e-06,
"loss": 0.1809,
"step": 5325
},
{
"epoch": 0.925285368384642,
"grad_norm": 2.4457297325134277,
"learning_rate": 6.656551724137931e-06,
"loss": 0.1838,
"step": 5350
},
{
"epoch": 0.9296091317883085,
"grad_norm": 1.6058560609817505,
"learning_rate": 6.639310344827586e-06,
"loss": 0.1795,
"step": 5375
},
{
"epoch": 0.9339328951919751,
"grad_norm": 2.3014094829559326,
"learning_rate": 6.622068965517242e-06,
"loss": 0.1791,
"step": 5400
},
{
"epoch": 0.9382566585956417,
"grad_norm": 2.237236738204956,
"learning_rate": 6.604827586206897e-06,
"loss": 0.1675,
"step": 5425
},
{
"epoch": 0.9425804219993082,
"grad_norm": 1.8356267213821411,
"learning_rate": 6.587586206896553e-06,
"loss": 0.1753,
"step": 5450
},
{
"epoch": 0.9469041854029747,
"grad_norm": 1.816933274269104,
"learning_rate": 6.570344827586207e-06,
"loss": 0.1797,
"step": 5475
},
{
"epoch": 0.9512279488066413,
"grad_norm": 1.9767167568206787,
"learning_rate": 6.553103448275862e-06,
"loss": 0.1787,
"step": 5500
},
{
"epoch": 0.9555517122103079,
"grad_norm": 2.3487281799316406,
"learning_rate": 6.535862068965517e-06,
"loss": 0.1713,
"step": 5525
},
{
"epoch": 0.9598754756139743,
"grad_norm": 2.3690738677978516,
"learning_rate": 6.518620689655173e-06,
"loss": 0.175,
"step": 5550
},
{
"epoch": 0.9641992390176409,
"grad_norm": 1.8204621076583862,
"learning_rate": 6.501379310344829e-06,
"loss": 0.1795,
"step": 5575
},
{
"epoch": 0.9685230024213075,
"grad_norm": 1.9036630392074585,
"learning_rate": 6.4841379310344835e-06,
"loss": 0.1846,
"step": 5600
},
{
"epoch": 0.9728467658249741,
"grad_norm": 2.118363618850708,
"learning_rate": 6.466896551724139e-06,
"loss": 0.1742,
"step": 5625
},
{
"epoch": 0.9771705292286406,
"grad_norm": 1.9643069505691528,
"learning_rate": 6.449655172413793e-06,
"loss": 0.1789,
"step": 5650
},
{
"epoch": 0.9814942926323071,
"grad_norm": 1.637279748916626,
"learning_rate": 6.432413793103449e-06,
"loss": 0.1848,
"step": 5675
},
{
"epoch": 0.9858180560359737,
"grad_norm": 1.6916080713272095,
"learning_rate": 6.415172413793104e-06,
"loss": 0.1744,
"step": 5700
},
{
"epoch": 0.9901418194396403,
"grad_norm": 1.6688181161880493,
"learning_rate": 6.3979310344827595e-06,
"loss": 0.1761,
"step": 5725
},
{
"epoch": 0.9944655828433068,
"grad_norm": 2.0284881591796875,
"learning_rate": 6.380689655172414e-06,
"loss": 0.1809,
"step": 5750
},
{
"epoch": 0.9987893462469734,
"grad_norm": 1.8513811826705933,
"learning_rate": 6.363448275862069e-06,
"loss": 0.1787,
"step": 5775
},
{
"epoch": 1.0031131096506398,
"grad_norm": 1.592392921447754,
"learning_rate": 6.346206896551724e-06,
"loss": 0.1542,
"step": 5800
},
{
"epoch": 1.0074368730543064,
"grad_norm": 1.8996076583862305,
"learning_rate": 6.32896551724138e-06,
"loss": 0.1502,
"step": 5825
},
{
"epoch": 1.011760636457973,
"grad_norm": 2.029642343521118,
"learning_rate": 6.311724137931035e-06,
"loss": 0.1436,
"step": 5850
},
{
"epoch": 1.0160843998616396,
"grad_norm": 2.350755453109741,
"learning_rate": 6.2944827586206905e-06,
"loss": 0.1423,
"step": 5875
},
{
"epoch": 1.0204081632653061,
"grad_norm": 2.1733169555664062,
"learning_rate": 6.2772413793103445e-06,
"loss": 0.1515,
"step": 5900
},
{
"epoch": 1.0247319266689727,
"grad_norm": 1.7886191606521606,
"learning_rate": 6.26e-06,
"loss": 0.141,
"step": 5925
},
{
"epoch": 1.0290556900726393,
"grad_norm": 1.4512900114059448,
"learning_rate": 6.242758620689656e-06,
"loss": 0.1419,
"step": 5950
},
{
"epoch": 1.0333794534763059,
"grad_norm": 1.4808529615402222,
"learning_rate": 6.225517241379311e-06,
"loss": 0.1426,
"step": 5975
},
{
"epoch": 1.0377032168799722,
"grad_norm": 1.99927818775177,
"learning_rate": 6.2082758620689665e-06,
"loss": 0.1434,
"step": 6000
},
{
"epoch": 1.0377032168799722,
"eval_loss": 0.1640278398990631,
"eval_runtime": 18500.4166,
"eval_samples_per_second": 1.25,
"eval_steps_per_second": 0.156,
"eval_wer": 22.07428302333264,
"step": 6000
},
{
"epoch": 1.0420269802836388,
"grad_norm": 1.9692049026489258,
"learning_rate": 6.191034482758621e-06,
"loss": 0.1518,
"step": 6025
},
{
"epoch": 1.0463507436873054,
"grad_norm": 1.943925380706787,
"learning_rate": 6.173793103448277e-06,
"loss": 0.1486,
"step": 6050
},
{
"epoch": 1.050674507090972,
"grad_norm": 1.8784852027893066,
"learning_rate": 6.156551724137931e-06,
"loss": 0.1516,
"step": 6075
},
{
"epoch": 1.0549982704946386,
"grad_norm": 1.8840405941009521,
"learning_rate": 6.139310344827587e-06,
"loss": 0.1458,
"step": 6100
},
{
"epoch": 1.0593220338983051,
"grad_norm": 1.711713194847107,
"learning_rate": 6.122068965517242e-06,
"loss": 0.1518,
"step": 6125
},
{
"epoch": 1.0636457973019717,
"grad_norm": 2.1738007068634033,
"learning_rate": 6.104827586206897e-06,
"loss": 0.1578,
"step": 6150
},
{
"epoch": 1.067969560705638,
"grad_norm": 1.4103723764419556,
"learning_rate": 6.087586206896552e-06,
"loss": 0.1433,
"step": 6175
},
{
"epoch": 1.0722933241093047,
"grad_norm": 1.4984797239303589,
"learning_rate": 6.070344827586207e-06,
"loss": 0.1536,
"step": 6200
},
{
"epoch": 1.0766170875129712,
"grad_norm": 1.7718677520751953,
"learning_rate": 6.053103448275862e-06,
"loss": 0.1356,
"step": 6225
},
{
"epoch": 1.0809408509166378,
"grad_norm": 1.9681774377822876,
"learning_rate": 6.035862068965518e-06,
"loss": 0.1312,
"step": 6250
},
{
"epoch": 1.0852646143203044,
"grad_norm": 2.0595345497131348,
"learning_rate": 6.018620689655173e-06,
"loss": 0.1372,
"step": 6275
},
{
"epoch": 1.089588377723971,
"grad_norm": 1.863364338874817,
"learning_rate": 6.001379310344828e-06,
"loss": 0.1355,
"step": 6300
},
{
"epoch": 1.0939121411276376,
"grad_norm": 1.5299562215805054,
"learning_rate": 5.984137931034482e-06,
"loss": 0.1471,
"step": 6325
},
{
"epoch": 1.098235904531304,
"grad_norm": 2.049438238143921,
"learning_rate": 5.966896551724138e-06,
"loss": 0.1508,
"step": 6350
},
{
"epoch": 1.1025596679349705,
"grad_norm": 1.7478573322296143,
"learning_rate": 5.949655172413794e-06,
"loss": 0.1442,
"step": 6375
},
{
"epoch": 1.106883431338637,
"grad_norm": 1.8549025058746338,
"learning_rate": 5.932413793103449e-06,
"loss": 0.1523,
"step": 6400
},
{
"epoch": 1.1112071947423037,
"grad_norm": 1.9961248636245728,
"learning_rate": 5.915172413793104e-06,
"loss": 0.1464,
"step": 6425
},
{
"epoch": 1.1155309581459703,
"grad_norm": 1.5598413944244385,
"learning_rate": 5.897931034482759e-06,
"loss": 0.1478,
"step": 6450
},
{
"epoch": 1.1198547215496368,
"grad_norm": 1.586255669593811,
"learning_rate": 5.880689655172415e-06,
"loss": 0.1429,
"step": 6475
},
{
"epoch": 1.1241784849533034,
"grad_norm": 1.8345210552215576,
"learning_rate": 5.863448275862069e-06,
"loss": 0.1421,
"step": 6500
},
{
"epoch": 1.12850224835697,
"grad_norm": 2.151012897491455,
"learning_rate": 5.846206896551725e-06,
"loss": 0.146,
"step": 6525
},
{
"epoch": 1.1328260117606366,
"grad_norm": 1.7682011127471924,
"learning_rate": 5.82896551724138e-06,
"loss": 0.1422,
"step": 6550
},
{
"epoch": 1.137149775164303,
"grad_norm": 1.4105076789855957,
"learning_rate": 5.811724137931035e-06,
"loss": 0.1593,
"step": 6575
},
{
"epoch": 1.1414735385679695,
"grad_norm": 2.0739102363586426,
"learning_rate": 5.79448275862069e-06,
"loss": 0.1469,
"step": 6600
},
{
"epoch": 1.145797301971636,
"grad_norm": 1.3610910177230835,
"learning_rate": 5.777241379310345e-06,
"loss": 0.1602,
"step": 6625
},
{
"epoch": 1.1501210653753027,
"grad_norm": 2.168071746826172,
"learning_rate": 5.76e-06,
"loss": 0.151,
"step": 6650
},
{
"epoch": 1.1544448287789693,
"grad_norm": 1.9631882905960083,
"learning_rate": 5.742758620689656e-06,
"loss": 0.1474,
"step": 6675
},
{
"epoch": 1.1587685921826358,
"grad_norm": 1.6678909063339233,
"learning_rate": 5.7255172413793105e-06,
"loss": 0.1447,
"step": 6700
},
{
"epoch": 1.1630923555863024,
"grad_norm": 1.8679783344268799,
"learning_rate": 5.708275862068966e-06,
"loss": 0.1553,
"step": 6725
},
{
"epoch": 1.1674161189899688,
"grad_norm": 1.428916573524475,
"learning_rate": 5.691034482758622e-06,
"loss": 0.1371,
"step": 6750
},
{
"epoch": 1.1717398823936354,
"grad_norm": 1.7976248264312744,
"learning_rate": 5.673793103448276e-06,
"loss": 0.1476,
"step": 6775
},
{
"epoch": 1.176063645797302,
"grad_norm": 1.8509339094161987,
"learning_rate": 5.656551724137932e-06,
"loss": 0.132,
"step": 6800
},
{
"epoch": 1.1803874092009685,
"grad_norm": 2.185060739517212,
"learning_rate": 5.6393103448275865e-06,
"loss": 0.1526,
"step": 6825
},
{
"epoch": 1.184711172604635,
"grad_norm": 1.3566219806671143,
"learning_rate": 5.622068965517242e-06,
"loss": 0.1445,
"step": 6850
},
{
"epoch": 1.1890349360083017,
"grad_norm": 1.539624810218811,
"learning_rate": 5.604827586206897e-06,
"loss": 0.1415,
"step": 6875
},
{
"epoch": 1.1933586994119683,
"grad_norm": 1.6758310794830322,
"learning_rate": 5.587586206896553e-06,
"loss": 0.15,
"step": 6900
},
{
"epoch": 1.1976824628156346,
"grad_norm": 1.9276903867721558,
"learning_rate": 5.570344827586207e-06,
"loss": 0.1496,
"step": 6925
},
{
"epoch": 1.2020062262193012,
"grad_norm": 2.1183435916900635,
"learning_rate": 5.553103448275863e-06,
"loss": 0.1379,
"step": 6950
},
{
"epoch": 1.2063299896229678,
"grad_norm": 1.862641453742981,
"learning_rate": 5.5358620689655175e-06,
"loss": 0.1455,
"step": 6975
},
{
"epoch": 1.2106537530266344,
"grad_norm": 1.83571195602417,
"learning_rate": 5.518620689655173e-06,
"loss": 0.1442,
"step": 7000
},
{
"epoch": 1.2106537530266344,
"eval_loss": 0.1629372239112854,
"eval_runtime": 18325.4837,
"eval_samples_per_second": 1.262,
"eval_steps_per_second": 0.158,
"eval_wer": 21.968151030745478,
"step": 7000
},
{
"epoch": 1.214977516430301,
"grad_norm": 1.160624623298645,
"learning_rate": 5.501379310344828e-06,
"loss": 0.1396,
"step": 7025
},
{
"epoch": 1.2193012798339675,
"grad_norm": 2.0968518257141113,
"learning_rate": 5.484137931034483e-06,
"loss": 0.153,
"step": 7050
},
{
"epoch": 1.223625043237634,
"grad_norm": 1.8394380807876587,
"learning_rate": 5.466896551724138e-06,
"loss": 0.1411,
"step": 7075
},
{
"epoch": 1.2279488066413007,
"grad_norm": 2.051907539367676,
"learning_rate": 5.4496551724137935e-06,
"loss": 0.1517,
"step": 7100
},
{
"epoch": 1.2322725700449673,
"grad_norm": 1.548345923423767,
"learning_rate": 5.432413793103448e-06,
"loss": 0.1433,
"step": 7125
},
{
"epoch": 1.2365963334486336,
"grad_norm": 1.661149024963379,
"learning_rate": 5.415172413793104e-06,
"loss": 0.1415,
"step": 7150
},
{
"epoch": 1.2409200968523002,
"grad_norm": 1.7700337171554565,
"learning_rate": 5.39793103448276e-06,
"loss": 0.1415,
"step": 7175
},
{
"epoch": 1.2452438602559668,
"grad_norm": 1.8730343580245972,
"learning_rate": 5.380689655172414e-06,
"loss": 0.1502,
"step": 7200
},
{
"epoch": 1.2495676236596334,
"grad_norm": 1.8379130363464355,
"learning_rate": 5.3634482758620695e-06,
"loss": 0.1364,
"step": 7225
},
{
"epoch": 1.2538913870633,
"grad_norm": 1.5410575866699219,
"learning_rate": 5.346206896551724e-06,
"loss": 0.1471,
"step": 7250
},
{
"epoch": 1.2582151504669665,
"grad_norm": 1.5988543033599854,
"learning_rate": 5.32896551724138e-06,
"loss": 0.1406,
"step": 7275
},
{
"epoch": 1.262538913870633,
"grad_norm": 1.7049413919448853,
"learning_rate": 5.311724137931035e-06,
"loss": 0.1552,
"step": 7300
},
{
"epoch": 1.2668626772742995,
"grad_norm": 1.604293704032898,
"learning_rate": 5.294482758620691e-06,
"loss": 0.1462,
"step": 7325
},
{
"epoch": 1.271186440677966,
"grad_norm": 1.8004859685897827,
"learning_rate": 5.277241379310345e-06,
"loss": 0.1426,
"step": 7350
},
{
"epoch": 1.2755102040816326,
"grad_norm": 1.3034578561782837,
"learning_rate": 5.2600000000000005e-06,
"loss": 0.1324,
"step": 7375
},
{
"epoch": 1.2798339674852992,
"grad_norm": 2.3090808391571045,
"learning_rate": 5.242758620689655e-06,
"loss": 0.1403,
"step": 7400
},
{
"epoch": 1.2841577308889658,
"grad_norm": 1.5892201662063599,
"learning_rate": 5.225517241379311e-06,
"loss": 0.141,
"step": 7425
},
{
"epoch": 1.2884814942926324,
"grad_norm": 1.6891568899154663,
"learning_rate": 5.208275862068966e-06,
"loss": 0.1521,
"step": 7450
},
{
"epoch": 1.292805257696299,
"grad_norm": 2.103891134262085,
"learning_rate": 5.191034482758621e-06,
"loss": 0.1409,
"step": 7475
},
{
"epoch": 1.2971290210999653,
"grad_norm": 1.9179925918579102,
"learning_rate": 5.173793103448276e-06,
"loss": 0.1413,
"step": 7500
},
{
"epoch": 1.3014527845036319,
"grad_norm": 2.1257266998291016,
"learning_rate": 5.156551724137931e-06,
"loss": 0.1383,
"step": 7525
},
{
"epoch": 1.3057765479072985,
"grad_norm": 1.5119720697402954,
"learning_rate": 5.139310344827587e-06,
"loss": 0.1411,
"step": 7550
},
{
"epoch": 1.310100311310965,
"grad_norm": 1.8337161540985107,
"learning_rate": 5.122068965517242e-06,
"loss": 0.1397,
"step": 7575
},
{
"epoch": 1.3144240747146316,
"grad_norm": 1.5722588300704956,
"learning_rate": 5.104827586206898e-06,
"loss": 0.1395,
"step": 7600
},
{
"epoch": 1.3187478381182982,
"grad_norm": 1.2701337337493896,
"learning_rate": 5.088275862068965e-06,
"loss": 0.1481,
"step": 7625
},
{
"epoch": 1.3230716015219648,
"grad_norm": 1.6665078401565552,
"learning_rate": 5.071034482758621e-06,
"loss": 0.1369,
"step": 7650
},
{
"epoch": 1.3273953649256311,
"grad_norm": 2.2191314697265625,
"learning_rate": 5.053793103448277e-06,
"loss": 0.1356,
"step": 7675
},
{
"epoch": 1.331719128329298,
"grad_norm": 1.6571834087371826,
"learning_rate": 5.036551724137932e-06,
"loss": 0.1443,
"step": 7700
},
{
"epoch": 1.3360428917329643,
"grad_norm": 1.906327724456787,
"learning_rate": 5.019310344827587e-06,
"loss": 0.1386,
"step": 7725
},
{
"epoch": 1.340366655136631,
"grad_norm": 2.03704571723938,
"learning_rate": 5.002068965517241e-06,
"loss": 0.1465,
"step": 7750
},
{
"epoch": 1.3446904185402975,
"grad_norm": 1.2263180017471313,
"learning_rate": 4.984827586206897e-06,
"loss": 0.1491,
"step": 7775
},
{
"epoch": 1.349014181943964,
"grad_norm": 1.9075984954833984,
"learning_rate": 4.967586206896552e-06,
"loss": 0.146,
"step": 7800
},
{
"epoch": 1.3533379453476306,
"grad_norm": 1.754158616065979,
"learning_rate": 4.950344827586207e-06,
"loss": 0.1499,
"step": 7825
},
{
"epoch": 1.357661708751297,
"grad_norm": 1.525426983833313,
"learning_rate": 4.933103448275863e-06,
"loss": 0.1372,
"step": 7850
},
{
"epoch": 1.3619854721549638,
"grad_norm": 1.7515060901641846,
"learning_rate": 4.9158620689655175e-06,
"loss": 0.1423,
"step": 7875
},
{
"epoch": 1.3663092355586302,
"grad_norm": 1.675724983215332,
"learning_rate": 4.898620689655173e-06,
"loss": 0.1372,
"step": 7900
},
{
"epoch": 1.3706329989622967,
"grad_norm": 1.8081847429275513,
"learning_rate": 4.881379310344828e-06,
"loss": 0.15,
"step": 7925
},
{
"epoch": 1.3749567623659633,
"grad_norm": 2.3439128398895264,
"learning_rate": 4.864137931034483e-06,
"loss": 0.1512,
"step": 7950
},
{
"epoch": 1.37928052576963,
"grad_norm": 1.5367904901504517,
"learning_rate": 4.846896551724139e-06,
"loss": 0.1343,
"step": 7975
},
{
"epoch": 1.3836042891732965,
"grad_norm": 1.7868484258651733,
"learning_rate": 4.8296551724137935e-06,
"loss": 0.1521,
"step": 8000
},
{
"epoch": 1.3836042891732965,
"eval_loss": 0.16171179711818695,
"eval_runtime": 18373.8485,
"eval_samples_per_second": 1.259,
"eval_steps_per_second": 0.157,
"eval_wer": 22.230552425221457,
"step": 8000
},
{
"epoch": 1.387928052576963,
"grad_norm": 2.016526699066162,
"learning_rate": 4.812413793103448e-06,
"loss": 0.1319,
"step": 8025
},
{
"epoch": 1.3922518159806296,
"grad_norm": 1.9844586849212646,
"learning_rate": 4.795172413793104e-06,
"loss": 0.1467,
"step": 8050
},
{
"epoch": 1.396575579384296,
"grad_norm": 1.9286713600158691,
"learning_rate": 4.777931034482759e-06,
"loss": 0.1409,
"step": 8075
},
{
"epoch": 1.4008993427879626,
"grad_norm": 1.6599113941192627,
"learning_rate": 4.760689655172414e-06,
"loss": 0.1451,
"step": 8100
},
{
"epoch": 1.4052231061916292,
"grad_norm": 1.4539234638214111,
"learning_rate": 4.7434482758620696e-06,
"loss": 0.1381,
"step": 8125
},
{
"epoch": 1.4095468695952957,
"grad_norm": 1.7355366945266724,
"learning_rate": 4.726206896551724e-06,
"loss": 0.1468,
"step": 8150
},
{
"epoch": 1.4138706329989623,
"grad_norm": 1.8770480155944824,
"learning_rate": 4.708965517241379e-06,
"loss": 0.1434,
"step": 8175
},
{
"epoch": 1.418194396402629,
"grad_norm": 1.5144529342651367,
"learning_rate": 4.691724137931035e-06,
"loss": 0.1342,
"step": 8200
},
{
"epoch": 1.4225181598062955,
"grad_norm": 1.4679632186889648,
"learning_rate": 4.67448275862069e-06,
"loss": 0.1417,
"step": 8225
},
{
"epoch": 1.4268419232099618,
"grad_norm": 1.7771397829055786,
"learning_rate": 4.657241379310346e-06,
"loss": 0.1406,
"step": 8250
},
{
"epoch": 1.4311656866136286,
"grad_norm": 2.567934989929199,
"learning_rate": 4.6400000000000005e-06,
"loss": 0.1457,
"step": 8275
},
{
"epoch": 1.435489450017295,
"grad_norm": 1.4760676622390747,
"learning_rate": 4.622758620689655e-06,
"loss": 0.1496,
"step": 8300
},
{
"epoch": 1.4398132134209616,
"grad_norm": 1.6052241325378418,
"learning_rate": 4.605517241379311e-06,
"loss": 0.1409,
"step": 8325
},
{
"epoch": 1.4441369768246282,
"grad_norm": 1.5678763389587402,
"learning_rate": 4.588275862068966e-06,
"loss": 0.1347,
"step": 8350
},
{
"epoch": 1.4484607402282947,
"grad_norm": 1.3863381147384644,
"learning_rate": 4.571034482758621e-06,
"loss": 0.1374,
"step": 8375
},
{
"epoch": 1.4527845036319613,
"grad_norm": 2.31950306892395,
"learning_rate": 4.5537931034482765e-06,
"loss": 0.147,
"step": 8400
},
{
"epoch": 1.4571082670356277,
"grad_norm": 2.1464667320251465,
"learning_rate": 4.536551724137931e-06,
"loss": 0.1434,
"step": 8425
},
{
"epoch": 1.4614320304392945,
"grad_norm": 1.6955801248550415,
"learning_rate": 4.519310344827586e-06,
"loss": 0.1463,
"step": 8450
},
{
"epoch": 1.4657557938429608,
"grad_norm": 1.6823946237564087,
"learning_rate": 4.502068965517242e-06,
"loss": 0.1458,
"step": 8475
},
{
"epoch": 1.4700795572466274,
"grad_norm": 1.947957992553711,
"learning_rate": 4.484827586206897e-06,
"loss": 0.1342,
"step": 8500
},
{
"epoch": 1.474403320650294,
"grad_norm": 1.645462989807129,
"learning_rate": 4.467586206896552e-06,
"loss": 0.1379,
"step": 8525
},
{
"epoch": 1.4787270840539606,
"grad_norm": 1.5544453859329224,
"learning_rate": 4.4503448275862074e-06,
"loss": 0.151,
"step": 8550
},
{
"epoch": 1.4830508474576272,
"grad_norm": 1.968682050704956,
"learning_rate": 4.433103448275862e-06,
"loss": 0.143,
"step": 8575
},
{
"epoch": 1.4873746108612937,
"grad_norm": 1.315320372581482,
"learning_rate": 4.415862068965517e-06,
"loss": 0.1493,
"step": 8600
},
{
"epoch": 1.4916983742649603,
"grad_norm": 1.9570213556289673,
"learning_rate": 4.398620689655173e-06,
"loss": 0.1431,
"step": 8625
},
{
"epoch": 1.4960221376686267,
"grad_norm": 1.6048710346221924,
"learning_rate": 4.381379310344829e-06,
"loss": 0.1446,
"step": 8650
},
{
"epoch": 1.5003459010722935,
"grad_norm": 1.7305105924606323,
"learning_rate": 4.3641379310344835e-06,
"loss": 0.142,
"step": 8675
},
{
"epoch": 1.5046696644759598,
"grad_norm": 1.682835340499878,
"learning_rate": 4.346896551724138e-06,
"loss": 0.1497,
"step": 8700
},
{
"epoch": 1.5089934278796264,
"grad_norm": 1.6621623039245605,
"learning_rate": 4.329655172413793e-06,
"loss": 0.138,
"step": 8725
},
{
"epoch": 1.513317191283293,
"grad_norm": 1.8892101049423218,
"learning_rate": 4.312413793103449e-06,
"loss": 0.1322,
"step": 8750
},
{
"epoch": 1.5176409546869594,
"grad_norm": 1.942769169807434,
"learning_rate": 4.295172413793104e-06,
"loss": 0.1521,
"step": 8775
},
{
"epoch": 1.5219647180906262,
"grad_norm": 2.032823085784912,
"learning_rate": 4.277931034482759e-06,
"loss": 0.151,
"step": 8800
},
{
"epoch": 1.5262884814942925,
"grad_norm": 2.2061808109283447,
"learning_rate": 4.260689655172414e-06,
"loss": 0.1523,
"step": 8825
},
{
"epoch": 1.5306122448979593,
"grad_norm": 1.5324435234069824,
"learning_rate": 4.243448275862069e-06,
"loss": 0.1422,
"step": 8850
},
{
"epoch": 1.5349360083016257,
"grad_norm": 2.379418134689331,
"learning_rate": 4.226206896551724e-06,
"loss": 0.1376,
"step": 8875
},
{
"epoch": 1.5392597717052923,
"grad_norm": 2.124983072280884,
"learning_rate": 4.20896551724138e-06,
"loss": 0.1449,
"step": 8900
},
{
"epoch": 1.5435835351089588,
"grad_norm": 1.7968913316726685,
"learning_rate": 4.191724137931035e-06,
"loss": 0.1469,
"step": 8925
},
{
"epoch": 1.5479072985126254,
"grad_norm": 1.7448081970214844,
"learning_rate": 4.17448275862069e-06,
"loss": 0.137,
"step": 8950
},
{
"epoch": 1.552231061916292,
"grad_norm": 1.6165127754211426,
"learning_rate": 4.157241379310345e-06,
"loss": 0.1435,
"step": 8975
},
{
"epoch": 1.5565548253199584,
"grad_norm": 1.9623734951019287,
"learning_rate": 4.14e-06,
"loss": 0.1411,
"step": 9000
},
{
"epoch": 1.5565548253199584,
"eval_loss": 0.16016647219657898,
"eval_runtime": 18381.9716,
"eval_samples_per_second": 1.258,
"eval_steps_per_second": 0.157,
"eval_wer": 21.488565624948748,
"step": 9000
},
{
"epoch": 1.5608785887236252,
"grad_norm": 2.1674628257751465,
"learning_rate": 4.122758620689655e-06,
"loss": 0.133,
"step": 9025
},
{
"epoch": 1.5652023521272915,
"grad_norm": 2.140751600265503,
"learning_rate": 4.105517241379311e-06,
"loss": 0.138,
"step": 9050
},
{
"epoch": 1.569526115530958,
"grad_norm": 2.360790252685547,
"learning_rate": 4.0882758620689665e-06,
"loss": 0.1556,
"step": 9075
},
{
"epoch": 1.5738498789346247,
"grad_norm": 2.2368650436401367,
"learning_rate": 4.071034482758621e-06,
"loss": 0.1361,
"step": 9100
},
{
"epoch": 1.5781736423382913,
"grad_norm": 1.6915383338928223,
"learning_rate": 4.053793103448276e-06,
"loss": 0.1355,
"step": 9125
},
{
"epoch": 1.5824974057419579,
"grad_norm": 1.6061946153640747,
"learning_rate": 4.036551724137931e-06,
"loss": 0.1429,
"step": 9150
},
{
"epoch": 1.5868211691456242,
"grad_norm": 2.3573153018951416,
"learning_rate": 4.019310344827587e-06,
"loss": 0.1514,
"step": 9175
},
{
"epoch": 1.591144932549291,
"grad_norm": 1.658203363418579,
"learning_rate": 4.002068965517242e-06,
"loss": 0.1253,
"step": 9200
},
{
"epoch": 1.5954686959529574,
"grad_norm": 1.5806010961532593,
"learning_rate": 3.9848275862068965e-06,
"loss": 0.135,
"step": 9225
},
{
"epoch": 1.599792459356624,
"grad_norm": 1.6775792837142944,
"learning_rate": 3.967586206896552e-06,
"loss": 0.1393,
"step": 9250
},
{
"epoch": 1.6041162227602905,
"grad_norm": 1.632627010345459,
"learning_rate": 3.950344827586207e-06,
"loss": 0.1441,
"step": 9275
},
{
"epoch": 1.6084399861639571,
"grad_norm": 1.3760111331939697,
"learning_rate": 3.933103448275862e-06,
"loss": 0.1344,
"step": 9300
},
{
"epoch": 1.6127637495676237,
"grad_norm": 1.8090389966964722,
"learning_rate": 3.915862068965518e-06,
"loss": 0.1469,
"step": 9325
},
{
"epoch": 1.61708751297129,
"grad_norm": 1.6850309371948242,
"learning_rate": 3.898620689655173e-06,
"loss": 0.1361,
"step": 9350
},
{
"epoch": 1.6214112763749569,
"grad_norm": 1.616658329963684,
"learning_rate": 3.8813793103448275e-06,
"loss": 0.143,
"step": 9375
},
{
"epoch": 1.6257350397786232,
"grad_norm": 1.5689523220062256,
"learning_rate": 3.864137931034483e-06,
"loss": 0.1452,
"step": 9400
},
{
"epoch": 1.63005880318229,
"grad_norm": 1.563894510269165,
"learning_rate": 3.846896551724138e-06,
"loss": 0.1326,
"step": 9425
},
{
"epoch": 1.6343825665859564,
"grad_norm": 1.7194911241531372,
"learning_rate": 3.829655172413793e-06,
"loss": 0.1446,
"step": 9450
},
{
"epoch": 1.638706329989623,
"grad_norm": 1.3777263164520264,
"learning_rate": 3.8124137931034486e-06,
"loss": 0.1438,
"step": 9475
},
{
"epoch": 1.6430300933932895,
"grad_norm": 1.9295512437820435,
"learning_rate": 3.795172413793104e-06,
"loss": 0.1358,
"step": 9500
},
{
"epoch": 1.6473538567969561,
"grad_norm": 1.6866202354431152,
"learning_rate": 3.7779310344827592e-06,
"loss": 0.1422,
"step": 9525
},
{
"epoch": 1.6516776202006227,
"grad_norm": 1.8246145248413086,
"learning_rate": 3.760689655172414e-06,
"loss": 0.1366,
"step": 9550
},
{
"epoch": 1.656001383604289,
"grad_norm": 1.933508276939392,
"learning_rate": 3.7434482758620694e-06,
"loss": 0.1384,
"step": 9575
},
{
"epoch": 1.6603251470079559,
"grad_norm": 1.7554436922073364,
"learning_rate": 3.7262068965517247e-06,
"loss": 0.1409,
"step": 9600
},
{
"epoch": 1.6646489104116222,
"grad_norm": 1.4804601669311523,
"learning_rate": 3.7089655172413795e-06,
"loss": 0.1372,
"step": 9625
},
{
"epoch": 1.6689726738152888,
"grad_norm": 1.9584331512451172,
"learning_rate": 3.691724137931035e-06,
"loss": 0.1435,
"step": 9650
},
{
"epoch": 1.6732964372189554,
"grad_norm": 1.8187692165374756,
"learning_rate": 3.67448275862069e-06,
"loss": 0.1403,
"step": 9675
},
{
"epoch": 1.677620200622622,
"grad_norm": 2.358635187149048,
"learning_rate": 3.657241379310345e-06,
"loss": 0.1438,
"step": 9700
},
{
"epoch": 1.6819439640262885,
"grad_norm": 2.2827279567718506,
"learning_rate": 3.6400000000000003e-06,
"loss": 0.1335,
"step": 9725
},
{
"epoch": 1.686267727429955,
"grad_norm": 1.7171095609664917,
"learning_rate": 3.622758620689655e-06,
"loss": 0.1406,
"step": 9750
},
{
"epoch": 1.6905914908336217,
"grad_norm": 1.9003287553787231,
"learning_rate": 3.6055172413793105e-06,
"loss": 0.1428,
"step": 9775
},
{
"epoch": 1.694915254237288,
"grad_norm": 1.6332672834396362,
"learning_rate": 3.5882758620689658e-06,
"loss": 0.1487,
"step": 9800
},
{
"epoch": 1.6992390176409546,
"grad_norm": 1.782727599143982,
"learning_rate": 3.5710344827586206e-06,
"loss": 0.1341,
"step": 9825
},
{
"epoch": 1.7035627810446212,
"grad_norm": 1.5744534730911255,
"learning_rate": 3.553793103448276e-06,
"loss": 0.1447,
"step": 9850
},
{
"epoch": 1.7078865444482878,
"grad_norm": 1.8412572145462036,
"learning_rate": 3.5365517241379316e-06,
"loss": 0.1382,
"step": 9875
},
{
"epoch": 1.7122103078519544,
"grad_norm": 1.9548972845077515,
"learning_rate": 3.5193103448275865e-06,
"loss": 0.1312,
"step": 9900
},
{
"epoch": 1.7165340712556207,
"grad_norm": 1.7109405994415283,
"learning_rate": 3.502068965517242e-06,
"loss": 0.1421,
"step": 9925
},
{
"epoch": 1.7208578346592875,
"grad_norm": 1.8661736249923706,
"learning_rate": 3.484827586206897e-06,
"loss": 0.1429,
"step": 9950
},
{
"epoch": 1.725181598062954,
"grad_norm": 1.750954508781433,
"learning_rate": 3.467586206896552e-06,
"loss": 0.1445,
"step": 9975
},
{
"epoch": 1.7295053614666207,
"grad_norm": 1.9543379545211792,
"learning_rate": 3.4503448275862073e-06,
"loss": 0.1401,
"step": 10000
},
{
"epoch": 1.7295053614666207,
"eval_loss": 0.15686531364917755,
"eval_runtime": 18349.489,
"eval_samples_per_second": 1.26,
"eval_steps_per_second": 0.158,
"eval_wer": 21.496062807648066,
"step": 10000
},
{
"epoch": 1.733829124870287,
"grad_norm": 1.7155358791351318,
"learning_rate": 3.4331034482758626e-06,
"loss": 0.1358,
"step": 10025
},
{
"epoch": 1.7381528882739536,
"grad_norm": 1.5235016345977783,
"learning_rate": 3.4158620689655174e-06,
"loss": 0.144,
"step": 10050
},
{
"epoch": 1.7424766516776202,
"grad_norm": 1.7004233598709106,
"learning_rate": 3.3986206896551727e-06,
"loss": 0.1422,
"step": 10075
},
{
"epoch": 1.7468004150812868,
"grad_norm": 1.533207654953003,
"learning_rate": 3.381379310344828e-06,
"loss": 0.1431,
"step": 10100
},
{
"epoch": 1.7511241784849534,
"grad_norm": 1.442386507987976,
"learning_rate": 3.364827586206897e-06,
"loss": 0.1435,
"step": 10125
},
{
"epoch": 1.7554479418886197,
"grad_norm": 1.8642257452011108,
"learning_rate": 3.347586206896552e-06,
"loss": 0.1433,
"step": 10150
},
{
"epoch": 1.7597717052922865,
"grad_norm": 2.0965137481689453,
"learning_rate": 3.330344827586207e-06,
"loss": 0.1455,
"step": 10175
},
{
"epoch": 1.764095468695953,
"grad_norm": 1.3066208362579346,
"learning_rate": 3.3131034482758624e-06,
"loss": 0.141,
"step": 10200
},
{
"epoch": 1.7684192320996195,
"grad_norm": 1.3160147666931152,
"learning_rate": 3.2958620689655173e-06,
"loss": 0.1357,
"step": 10225
},
{
"epoch": 1.772742995503286,
"grad_norm": 1.9913936853408813,
"learning_rate": 3.2786206896551726e-06,
"loss": 0.1466,
"step": 10250
},
{
"epoch": 1.7770667589069526,
"grad_norm": 1.4349515438079834,
"learning_rate": 3.261379310344828e-06,
"loss": 0.1408,
"step": 10275
},
{
"epoch": 1.7813905223106192,
"grad_norm": 2.0493996143341064,
"learning_rate": 3.2441379310344828e-06,
"loss": 0.1454,
"step": 10300
},
{
"epoch": 1.7857142857142856,
"grad_norm": 2.072068214416504,
"learning_rate": 3.226896551724138e-06,
"loss": 0.1307,
"step": 10325
},
{
"epoch": 1.7900380491179524,
"grad_norm": 1.3982826471328735,
"learning_rate": 3.209655172413793e-06,
"loss": 0.1376,
"step": 10350
},
{
"epoch": 1.7943618125216187,
"grad_norm": 2.1408965587615967,
"learning_rate": 3.1924137931034486e-06,
"loss": 0.1453,
"step": 10375
},
{
"epoch": 1.7986855759252853,
"grad_norm": 1.740058183670044,
"learning_rate": 3.175172413793104e-06,
"loss": 0.1467,
"step": 10400
},
{
"epoch": 1.803009339328952,
"grad_norm": 1.626065969467163,
"learning_rate": 3.1579310344827592e-06,
"loss": 0.1426,
"step": 10425
},
{
"epoch": 1.8073331027326185,
"grad_norm": 1.758325219154358,
"learning_rate": 3.140689655172414e-06,
"loss": 0.1421,
"step": 10450
},
{
"epoch": 1.811656866136285,
"grad_norm": 3.0554895401000977,
"learning_rate": 3.1234482758620694e-06,
"loss": 0.1512,
"step": 10475
},
{
"epoch": 1.8159806295399514,
"grad_norm": 2.0043108463287354,
"learning_rate": 3.1062068965517243e-06,
"loss": 0.1477,
"step": 10500
},
{
"epoch": 1.8203043929436182,
"grad_norm": 1.9781348705291748,
"learning_rate": 3.0889655172413796e-06,
"loss": 0.1434,
"step": 10525
},
{
"epoch": 1.8246281563472846,
"grad_norm": 1.9845682382583618,
"learning_rate": 3.071724137931035e-06,
"loss": 0.1371,
"step": 10550
},
{
"epoch": 1.8289519197509514,
"grad_norm": 1.7266494035720825,
"learning_rate": 3.0544827586206897e-06,
"loss": 0.1414,
"step": 10575
},
{
"epoch": 1.8332756831546178,
"grad_norm": 1.6237975358963013,
"learning_rate": 3.037241379310345e-06,
"loss": 0.1451,
"step": 10600
},
{
"epoch": 1.8375994465582843,
"grad_norm": 1.3192580938339233,
"learning_rate": 3.0200000000000003e-06,
"loss": 0.1335,
"step": 10625
},
{
"epoch": 1.841923209961951,
"grad_norm": 1.5643882751464844,
"learning_rate": 3.002758620689655e-06,
"loss": 0.14,
"step": 10650
},
{
"epoch": 1.8462469733656173,
"grad_norm": 2.649275779724121,
"learning_rate": 2.9855172413793105e-06,
"loss": 0.1412,
"step": 10675
},
{
"epoch": 1.850570736769284,
"grad_norm": 2.3274197578430176,
"learning_rate": 2.9682758620689658e-06,
"loss": 0.1486,
"step": 10700
},
{
"epoch": 1.8548945001729504,
"grad_norm": 2.0510830879211426,
"learning_rate": 2.9510344827586206e-06,
"loss": 0.1295,
"step": 10725
},
{
"epoch": 1.8592182635766172,
"grad_norm": 1.8213071823120117,
"learning_rate": 2.933793103448276e-06,
"loss": 0.1326,
"step": 10750
},
{
"epoch": 1.8635420269802836,
"grad_norm": 1.182237148284912,
"learning_rate": 2.9165517241379316e-06,
"loss": 0.1402,
"step": 10775
},
{
"epoch": 1.8678657903839502,
"grad_norm": 1.5509198904037476,
"learning_rate": 2.8993103448275865e-06,
"loss": 0.1299,
"step": 10800
},
{
"epoch": 1.8721895537876168,
"grad_norm": 1.480782151222229,
"learning_rate": 2.882068965517242e-06,
"loss": 0.1445,
"step": 10825
},
{
"epoch": 1.8765133171912833,
"grad_norm": 1.7314891815185547,
"learning_rate": 2.864827586206897e-06,
"loss": 0.1332,
"step": 10850
},
{
"epoch": 1.88083708059495,
"grad_norm": 1.5692538022994995,
"learning_rate": 2.847586206896552e-06,
"loss": 0.1425,
"step": 10875
},
{
"epoch": 1.8851608439986163,
"grad_norm": 1.927616000175476,
"learning_rate": 2.8303448275862073e-06,
"loss": 0.1386,
"step": 10900
},
{
"epoch": 1.889484607402283,
"grad_norm": 1.9353783130645752,
"learning_rate": 2.813103448275862e-06,
"loss": 0.1418,
"step": 10925
},
{
"epoch": 1.8938083708059494,
"grad_norm": 1.5706393718719482,
"learning_rate": 2.7958620689655174e-06,
"loss": 0.1316,
"step": 10950
},
{
"epoch": 1.898132134209616,
"grad_norm": 1.642783522605896,
"learning_rate": 2.7786206896551727e-06,
"loss": 0.1392,
"step": 10975
},
{
"epoch": 1.9024558976132826,
"grad_norm": 1.7343003749847412,
"learning_rate": 2.7613793103448276e-06,
"loss": 0.148,
"step": 11000
},
{
"epoch": 1.9024558976132826,
"eval_loss": 0.1548272669315338,
"eval_runtime": 18698.2371,
"eval_samples_per_second": 1.237,
"eval_steps_per_second": 0.155,
"eval_wer": 21.28684455294534,
"step": 11000
},
{
"epoch": 1.9067796610169492,
"grad_norm": 1.9196803569793701,
"learning_rate": 2.744137931034483e-06,
"loss": 0.1343,
"step": 11025
},
{
"epoch": 1.9111034244206158,
"grad_norm": 2.108469009399414,
"learning_rate": 2.726896551724138e-06,
"loss": 0.146,
"step": 11050
},
{
"epoch": 1.9154271878242821,
"grad_norm": 1.4290471076965332,
"learning_rate": 2.709655172413793e-06,
"loss": 0.1428,
"step": 11075
},
{
"epoch": 1.919750951227949,
"grad_norm": 2.097700357437134,
"learning_rate": 2.6924137931034483e-06,
"loss": 0.137,
"step": 11100
},
{
"epoch": 1.9240747146316153,
"grad_norm": 1.7768304347991943,
"learning_rate": 2.6751724137931036e-06,
"loss": 0.1339,
"step": 11125
},
{
"epoch": 1.9283984780352819,
"grad_norm": 2.3340935707092285,
"learning_rate": 2.6579310344827585e-06,
"loss": 0.1499,
"step": 11150
},
{
"epoch": 1.9327222414389484,
"grad_norm": 1.390179991722107,
"learning_rate": 2.6406896551724142e-06,
"loss": 0.1332,
"step": 11175
},
{
"epoch": 1.937046004842615,
"grad_norm": 1.4288243055343628,
"learning_rate": 2.6234482758620695e-06,
"loss": 0.1378,
"step": 11200
},
{
"epoch": 1.9413697682462816,
"grad_norm": 1.7294844388961792,
"learning_rate": 2.6062068965517244e-06,
"loss": 0.1456,
"step": 11225
},
{
"epoch": 1.945693531649948,
"grad_norm": 2.5070643424987793,
"learning_rate": 2.5889655172413797e-06,
"loss": 0.1351,
"step": 11250
},
{
"epoch": 1.9500172950536148,
"grad_norm": 1.9430601596832275,
"learning_rate": 2.571724137931035e-06,
"loss": 0.1373,
"step": 11275
},
{
"epoch": 1.9543410584572811,
"grad_norm": 1.5814850330352783,
"learning_rate": 2.55448275862069e-06,
"loss": 0.1423,
"step": 11300
},
{
"epoch": 1.958664821860948,
"grad_norm": 2.2843282222747803,
"learning_rate": 2.537241379310345e-06,
"loss": 0.1426,
"step": 11325
},
{
"epoch": 1.9629885852646143,
"grad_norm": 1.528397798538208,
"learning_rate": 2.52e-06,
"loss": 0.1362,
"step": 11350
},
{
"epoch": 1.9673123486682809,
"grad_norm": 1.5325922966003418,
"learning_rate": 2.5027586206896553e-06,
"loss": 0.1348,
"step": 11375
},
{
"epoch": 1.9716361120719474,
"grad_norm": 1.3210896253585815,
"learning_rate": 2.4855172413793106e-06,
"loss": 0.1399,
"step": 11400
},
{
"epoch": 1.975959875475614,
"grad_norm": 1.7425906658172607,
"learning_rate": 2.4682758620689655e-06,
"loss": 0.1347,
"step": 11425
},
{
"epoch": 1.9802836388792806,
"grad_norm": 1.4508990049362183,
"learning_rate": 2.4510344827586208e-06,
"loss": 0.1361,
"step": 11450
},
{
"epoch": 1.984607402282947,
"grad_norm": 1.4730141162872314,
"learning_rate": 2.433793103448276e-06,
"loss": 0.147,
"step": 11475
},
{
"epoch": 1.9889311656866138,
"grad_norm": 1.602019190788269,
"learning_rate": 2.4165517241379314e-06,
"loss": 0.1363,
"step": 11500
},
{
"epoch": 1.9932549290902801,
"grad_norm": 1.9487652778625488,
"learning_rate": 2.3993103448275866e-06,
"loss": 0.1371,
"step": 11525
},
{
"epoch": 1.9975786924939467,
"grad_norm": 1.735405445098877,
"learning_rate": 2.3820689655172415e-06,
"loss": 0.142,
"step": 11550
},
{
"epoch": 2.0019024558976133,
"grad_norm": 1.195746898651123,
"learning_rate": 2.364827586206897e-06,
"loss": 0.134,
"step": 11575
},
{
"epoch": 2.0062262193012796,
"grad_norm": 1.7679691314697266,
"learning_rate": 2.3475862068965517e-06,
"loss": 0.1181,
"step": 11600
},
{
"epoch": 2.0105499827049464,
"grad_norm": 1.606373906135559,
"learning_rate": 2.330344827586207e-06,
"loss": 0.1136,
"step": 11625
},
{
"epoch": 2.014873746108613,
"grad_norm": 1.4573360681533813,
"learning_rate": 2.3131034482758623e-06,
"loss": 0.1146,
"step": 11650
},
{
"epoch": 2.0191975095122796,
"grad_norm": 1.7319608926773071,
"learning_rate": 2.2958620689655176e-06,
"loss": 0.1073,
"step": 11675
},
{
"epoch": 2.023521272915946,
"grad_norm": 1.566738247871399,
"learning_rate": 2.278620689655173e-06,
"loss": 0.1185,
"step": 11700
},
{
"epoch": 2.0278450363196128,
"grad_norm": 1.3787413835525513,
"learning_rate": 2.2613793103448277e-06,
"loss": 0.1142,
"step": 11725
},
{
"epoch": 2.032168799723279,
"grad_norm": 1.364129662513733,
"learning_rate": 2.244137931034483e-06,
"loss": 0.1221,
"step": 11750
},
{
"epoch": 2.0364925631269455,
"grad_norm": 1.4195500612258911,
"learning_rate": 2.2268965517241383e-06,
"loss": 0.1086,
"step": 11775
},
{
"epoch": 2.0408163265306123,
"grad_norm": 1.7290923595428467,
"learning_rate": 2.209655172413793e-06,
"loss": 0.1236,
"step": 11800
},
{
"epoch": 2.0451400899342786,
"grad_norm": 1.8055263757705688,
"learning_rate": 2.1924137931034485e-06,
"loss": 0.1052,
"step": 11825
},
{
"epoch": 2.0494638533379455,
"grad_norm": 1.3701517581939697,
"learning_rate": 2.1751724137931033e-06,
"loss": 0.1104,
"step": 11850
},
{
"epoch": 2.053787616741612,
"grad_norm": 1.7283467054367065,
"learning_rate": 2.157931034482759e-06,
"loss": 0.119,
"step": 11875
},
{
"epoch": 2.0581113801452786,
"grad_norm": 1.6004694700241089,
"learning_rate": 2.140689655172414e-06,
"loss": 0.1173,
"step": 11900
},
{
"epoch": 2.062435143548945,
"grad_norm": 2.361544609069824,
"learning_rate": 2.1234482758620692e-06,
"loss": 0.1154,
"step": 11925
},
{
"epoch": 2.0667589069526118,
"grad_norm": 1.6328535079956055,
"learning_rate": 2.1062068965517245e-06,
"loss": 0.1128,
"step": 11950
},
{
"epoch": 2.071082670356278,
"grad_norm": 1.546014666557312,
"learning_rate": 2.0889655172413794e-06,
"loss": 0.1102,
"step": 11975
},
{
"epoch": 2.0754064337599445,
"grad_norm": 1.43386709690094,
"learning_rate": 2.0717241379310347e-06,
"loss": 0.1024,
"step": 12000
},
{
"epoch": 2.0754064337599445,
"eval_loss": 0.16041299700737,
"eval_runtime": 18346.2572,
"eval_samples_per_second": 1.261,
"eval_steps_per_second": 0.158,
"eval_wer": 21.351273466767566,
"step": 12000
},
{
"epoch": 2.0797301971636113,
"grad_norm": 1.259810209274292,
"learning_rate": 2.0544827586206896e-06,
"loss": 0.1054,
"step": 12025
},
{
"epoch": 2.0840539605672777,
"grad_norm": 1.7551687955856323,
"learning_rate": 2.037241379310345e-06,
"loss": 0.1084,
"step": 12050
},
{
"epoch": 2.0883777239709445,
"grad_norm": 1.4865624904632568,
"learning_rate": 2.02e-06,
"loss": 0.1148,
"step": 12075
},
{
"epoch": 2.092701487374611,
"grad_norm": 1.7182704210281372,
"learning_rate": 2.0027586206896554e-06,
"loss": 0.1089,
"step": 12100
},
{
"epoch": 2.0970252507782776,
"grad_norm": 1.8586798906326294,
"learning_rate": 1.9855172413793107e-06,
"loss": 0.1058,
"step": 12125
},
{
"epoch": 2.101349014181944,
"grad_norm": 1.1890976428985596,
"learning_rate": 1.9682758620689656e-06,
"loss": 0.1118,
"step": 12150
},
{
"epoch": 2.1056727775856103,
"grad_norm": 1.7096284627914429,
"learning_rate": 1.951034482758621e-06,
"loss": 0.1178,
"step": 12175
},
{
"epoch": 2.109996540989277,
"grad_norm": 1.6265876293182373,
"learning_rate": 1.933793103448276e-06,
"loss": 0.11,
"step": 12200
},
{
"epoch": 2.1143203043929435,
"grad_norm": 2.068101406097412,
"learning_rate": 1.916551724137931e-06,
"loss": 0.1129,
"step": 12225
},
{
"epoch": 2.1186440677966103,
"grad_norm": 1.4093377590179443,
"learning_rate": 1.8993103448275864e-06,
"loss": 0.1033,
"step": 12250
},
{
"epoch": 2.1229678312002767,
"grad_norm": 1.716834306716919,
"learning_rate": 1.8820689655172416e-06,
"loss": 0.1118,
"step": 12275
},
{
"epoch": 2.1272915946039435,
"grad_norm": 1.614517331123352,
"learning_rate": 1.8648275862068967e-06,
"loss": 0.1123,
"step": 12300
},
{
"epoch": 2.13161535800761,
"grad_norm": 1.609824299812317,
"learning_rate": 1.847586206896552e-06,
"loss": 0.1135,
"step": 12325
},
{
"epoch": 2.135939121411276,
"grad_norm": 1.4824578762054443,
"learning_rate": 1.830344827586207e-06,
"loss": 0.1154,
"step": 12350
},
{
"epoch": 2.140262884814943,
"grad_norm": 2.31526255607605,
"learning_rate": 1.8131034482758622e-06,
"loss": 0.1102,
"step": 12375
},
{
"epoch": 2.1445866482186093,
"grad_norm": 1.5895624160766602,
"learning_rate": 1.7958620689655173e-06,
"loss": 0.1114,
"step": 12400
},
{
"epoch": 2.148910411622276,
"grad_norm": 1.8020554780960083,
"learning_rate": 1.7786206896551726e-06,
"loss": 0.114,
"step": 12425
},
{
"epoch": 2.1532341750259425,
"grad_norm": 1.7379206418991089,
"learning_rate": 1.7613793103448276e-06,
"loss": 0.1074,
"step": 12450
},
{
"epoch": 2.1575579384296093,
"grad_norm": 1.5234774351119995,
"learning_rate": 1.744137931034483e-06,
"loss": 0.1049,
"step": 12475
},
{
"epoch": 2.1618817018332757,
"grad_norm": 2.315284252166748,
"learning_rate": 1.7268965517241382e-06,
"loss": 0.1187,
"step": 12500
},
{
"epoch": 2.166205465236942,
"grad_norm": 1.5843135118484497,
"learning_rate": 1.7096551724137933e-06,
"loss": 0.1099,
"step": 12525
},
{
"epoch": 2.170529228640609,
"grad_norm": 1.4398502111434937,
"learning_rate": 1.6924137931034484e-06,
"loss": 0.1179,
"step": 12550
},
{
"epoch": 2.174852992044275,
"grad_norm": 1.6261566877365112,
"learning_rate": 1.6751724137931037e-06,
"loss": 0.1109,
"step": 12575
},
{
"epoch": 2.179176755447942,
"grad_norm": 1.493456244468689,
"learning_rate": 1.6579310344827588e-06,
"loss": 0.1087,
"step": 12600
},
{
"epoch": 2.1835005188516083,
"grad_norm": 2.314145565032959,
"learning_rate": 1.6406896551724138e-06,
"loss": 0.1092,
"step": 12625
},
{
"epoch": 2.187824282255275,
"grad_norm": 1.5329914093017578,
"learning_rate": 1.623448275862069e-06,
"loss": 0.1179,
"step": 12650
},
{
"epoch": 2.1921480456589415,
"grad_norm": 1.8789763450622559,
"learning_rate": 1.6062068965517244e-06,
"loss": 0.1146,
"step": 12675
},
{
"epoch": 2.196471809062608,
"grad_norm": 1.6287965774536133,
"learning_rate": 1.5889655172413795e-06,
"loss": 0.1076,
"step": 12700
},
{
"epoch": 2.2007955724662747,
"grad_norm": 1.3174418210983276,
"learning_rate": 1.5717241379310346e-06,
"loss": 0.1042,
"step": 12725
},
{
"epoch": 2.205119335869941,
"grad_norm": 1.481208086013794,
"learning_rate": 1.55448275862069e-06,
"loss": 0.1113,
"step": 12750
},
{
"epoch": 2.209443099273608,
"grad_norm": 1.5273900032043457,
"learning_rate": 1.537241379310345e-06,
"loss": 0.1101,
"step": 12775
},
{
"epoch": 2.213766862677274,
"grad_norm": 1.648422122001648,
"learning_rate": 1.52e-06,
"loss": 0.1079,
"step": 12800
},
{
"epoch": 2.218090626080941,
"grad_norm": 1.5516725778579712,
"learning_rate": 1.5027586206896551e-06,
"loss": 0.1147,
"step": 12825
},
{
"epoch": 2.2224143894846073,
"grad_norm": 1.68115234375,
"learning_rate": 1.4855172413793104e-06,
"loss": 0.1066,
"step": 12850
},
{
"epoch": 2.226738152888274,
"grad_norm": 1.784679889678955,
"learning_rate": 1.4682758620689657e-06,
"loss": 0.1118,
"step": 12875
},
{
"epoch": 2.2310619162919405,
"grad_norm": 1.5086143016815186,
"learning_rate": 1.4510344827586208e-06,
"loss": 0.1175,
"step": 12900
},
{
"epoch": 2.235385679695607,
"grad_norm": 1.4072906970977783,
"learning_rate": 1.433793103448276e-06,
"loss": 0.111,
"step": 12925
},
{
"epoch": 2.2397094430992737,
"grad_norm": 1.4439597129821777,
"learning_rate": 1.4165517241379312e-06,
"loss": 0.111,
"step": 12950
},
{
"epoch": 2.24403320650294,
"grad_norm": 1.474888563156128,
"learning_rate": 1.3993103448275863e-06,
"loss": 0.1121,
"step": 12975
},
{
"epoch": 2.248356969906607,
"grad_norm": 2.2483158111572266,
"learning_rate": 1.3820689655172416e-06,
"loss": 0.1145,
"step": 13000
},
{
"epoch": 2.248356969906607,
"eval_loss": 0.16119933128356934,
"eval_runtime": 18308.4763,
"eval_samples_per_second": 1.263,
"eval_steps_per_second": 0.158,
"eval_wer": 21.170638221105975,
"step": 13000
},
{
"epoch": 2.252680733310273,
"grad_norm": 1.6092015504837036,
"learning_rate": 1.3648275862068966e-06,
"loss": 0.1097,
"step": 13025
},
{
"epoch": 2.25700449671394,
"grad_norm": 1.7606579065322876,
"learning_rate": 1.3475862068965517e-06,
"loss": 0.1175,
"step": 13050
},
{
"epoch": 2.2613282601176063,
"grad_norm": 1.5152302980422974,
"learning_rate": 1.3303448275862072e-06,
"loss": 0.1093,
"step": 13075
},
{
"epoch": 2.265652023521273,
"grad_norm": 1.5567739009857178,
"learning_rate": 1.3131034482758623e-06,
"loss": 0.1033,
"step": 13100
},
{
"epoch": 2.2699757869249395,
"grad_norm": 1.738932728767395,
"learning_rate": 1.2958620689655174e-06,
"loss": 0.115,
"step": 13125
},
{
"epoch": 2.274299550328606,
"grad_norm": 1.2959973812103271,
"learning_rate": 1.2786206896551725e-06,
"loss": 0.1073,
"step": 13150
},
{
"epoch": 2.2786233137322727,
"grad_norm": 1.391763687133789,
"learning_rate": 1.2613793103448278e-06,
"loss": 0.1045,
"step": 13175
},
{
"epoch": 2.282947077135939,
"grad_norm": 1.8664268255233765,
"learning_rate": 1.2441379310344829e-06,
"loss": 0.1146,
"step": 13200
},
{
"epoch": 2.287270840539606,
"grad_norm": 1.708479642868042,
"learning_rate": 1.226896551724138e-06,
"loss": 0.1067,
"step": 13225
},
{
"epoch": 2.291594603943272,
"grad_norm": 1.6440658569335938,
"learning_rate": 1.2096551724137932e-06,
"loss": 0.1202,
"step": 13250
},
{
"epoch": 2.295918367346939,
"grad_norm": 1.3780643939971924,
"learning_rate": 1.1924137931034483e-06,
"loss": 0.1183,
"step": 13275
},
{
"epoch": 2.3002421307506054,
"grad_norm": 2.0071065425872803,
"learning_rate": 1.1751724137931036e-06,
"loss": 0.1133,
"step": 13300
},
{
"epoch": 2.3045658941542717,
"grad_norm": 1.6017673015594482,
"learning_rate": 1.1579310344827587e-06,
"loss": 0.1087,
"step": 13325
},
{
"epoch": 2.3088896575579385,
"grad_norm": 1.8712215423583984,
"learning_rate": 1.140689655172414e-06,
"loss": 0.1165,
"step": 13350
},
{
"epoch": 2.313213420961605,
"grad_norm": 2.241171360015869,
"learning_rate": 1.123448275862069e-06,
"loss": 0.1041,
"step": 13375
},
{
"epoch": 2.3175371843652717,
"grad_norm": 1.7374755144119263,
"learning_rate": 1.1062068965517241e-06,
"loss": 0.1088,
"step": 13400
},
{
"epoch": 2.321860947768938,
"grad_norm": 1.387485384941101,
"learning_rate": 1.0889655172413794e-06,
"loss": 0.1124,
"step": 13425
},
{
"epoch": 2.326184711172605,
"grad_norm": 1.7821033000946045,
"learning_rate": 1.0717241379310345e-06,
"loss": 0.1129,
"step": 13450
},
{
"epoch": 2.330508474576271,
"grad_norm": 1.8013248443603516,
"learning_rate": 1.0544827586206898e-06,
"loss": 0.1113,
"step": 13475
},
{
"epoch": 2.3348322379799376,
"grad_norm": 1.7627577781677246,
"learning_rate": 1.0372413793103449e-06,
"loss": 0.106,
"step": 13500
},
{
"epoch": 2.3391560013836044,
"grad_norm": 1.4727102518081665,
"learning_rate": 1.02e-06,
"loss": 0.119,
"step": 13525
},
{
"epoch": 2.3434797647872707,
"grad_norm": 2.0880470275878906,
"learning_rate": 1.0027586206896553e-06,
"loss": 0.1099,
"step": 13550
},
{
"epoch": 2.3478035281909375,
"grad_norm": 1.6722257137298584,
"learning_rate": 9.855172413793104e-07,
"loss": 0.1176,
"step": 13575
},
{
"epoch": 2.352127291594604,
"grad_norm": 1.5856317281723022,
"learning_rate": 9.682758620689656e-07,
"loss": 0.1108,
"step": 13600
},
{
"epoch": 2.3564510549982707,
"grad_norm": 1.4923266172409058,
"learning_rate": 9.510344827586207e-07,
"loss": 0.1098,
"step": 13625
},
{
"epoch": 2.360774818401937,
"grad_norm": 1.6731258630752563,
"learning_rate": 9.33793103448276e-07,
"loss": 0.1129,
"step": 13650
},
{
"epoch": 2.3650985818056034,
"grad_norm": 1.5794440507888794,
"learning_rate": 9.165517241379311e-07,
"loss": 0.1132,
"step": 13675
},
{
"epoch": 2.36942234520927,
"grad_norm": 1.9833779335021973,
"learning_rate": 8.993103448275863e-07,
"loss": 0.1104,
"step": 13700
},
{
"epoch": 2.3737461086129366,
"grad_norm": 1.929689645767212,
"learning_rate": 8.820689655172414e-07,
"loss": 0.1113,
"step": 13725
},
{
"epoch": 2.3780698720166034,
"grad_norm": 1.2566806077957153,
"learning_rate": 8.648275862068967e-07,
"loss": 0.1086,
"step": 13750
},
{
"epoch": 2.3823936354202697,
"grad_norm": 2.132885694503784,
"learning_rate": 8.475862068965517e-07,
"loss": 0.1077,
"step": 13775
},
{
"epoch": 2.3867173988239365,
"grad_norm": 1.7326260805130005,
"learning_rate": 8.303448275862069e-07,
"loss": 0.1083,
"step": 13800
},
{
"epoch": 2.391041162227603,
"grad_norm": 1.7849270105361938,
"learning_rate": 8.131034482758621e-07,
"loss": 0.1059,
"step": 13825
},
{
"epoch": 2.3953649256312692,
"grad_norm": 1.3285590410232544,
"learning_rate": 7.958620689655173e-07,
"loss": 0.0987,
"step": 13850
},
{
"epoch": 2.399688689034936,
"grad_norm": 1.6088476181030273,
"learning_rate": 7.786206896551725e-07,
"loss": 0.1097,
"step": 13875
},
{
"epoch": 2.4040124524386024,
"grad_norm": 1.5900739431381226,
"learning_rate": 7.613793103448276e-07,
"loss": 0.107,
"step": 13900
},
{
"epoch": 2.408336215842269,
"grad_norm": 1.4366368055343628,
"learning_rate": 7.441379310344828e-07,
"loss": 0.1087,
"step": 13925
},
{
"epoch": 2.4126599792459356,
"grad_norm": 1.583687424659729,
"learning_rate": 7.268965517241381e-07,
"loss": 0.109,
"step": 13950
},
{
"epoch": 2.4169837426496024,
"grad_norm": 1.6272701025009155,
"learning_rate": 7.096551724137931e-07,
"loss": 0.1087,
"step": 13975
},
{
"epoch": 2.4213075060532687,
"grad_norm": 1.6933766603469849,
"learning_rate": 6.924137931034483e-07,
"loss": 0.1051,
"step": 14000
},
{
"epoch": 2.4213075060532687,
"eval_loss": 0.16103602945804596,
"eval_runtime": 18381.9917,
"eval_samples_per_second": 1.258,
"eval_steps_per_second": 0.157,
"eval_wer": 21.26997589187188,
"step": 14000
},
{
"epoch": 2.425631269456935,
"grad_norm": 1.5084308385849,
"learning_rate": 6.751724137931034e-07,
"loss": 0.1108,
"step": 14025
},
{
"epoch": 2.429955032860602,
"grad_norm": 1.3734257221221924,
"learning_rate": 6.579310344827587e-07,
"loss": 0.107,
"step": 14050
},
{
"epoch": 2.4342787962642682,
"grad_norm": 2.01283597946167,
"learning_rate": 6.406896551724139e-07,
"loss": 0.1044,
"step": 14075
},
{
"epoch": 2.438602559667935,
"grad_norm": 1.3206173181533813,
"learning_rate": 6.23448275862069e-07,
"loss": 0.1031,
"step": 14100
},
{
"epoch": 2.4429263230716014,
"grad_norm": 1.6226475238800049,
"learning_rate": 6.062068965517242e-07,
"loss": 0.1068,
"step": 14125
},
{
"epoch": 2.447250086475268,
"grad_norm": 1.8544663190841675,
"learning_rate": 5.896551724137931e-07,
"loss": 0.1134,
"step": 14150
},
{
"epoch": 2.4515738498789346,
"grad_norm": 1.9135141372680664,
"learning_rate": 5.724137931034483e-07,
"loss": 0.1076,
"step": 14175
},
{
"epoch": 2.4558976132826014,
"grad_norm": 1.9522300958633423,
"learning_rate": 5.551724137931035e-07,
"loss": 0.1028,
"step": 14200
},
{
"epoch": 2.4602213766862677,
"grad_norm": 1.5151312351226807,
"learning_rate": 5.379310344827587e-07,
"loss": 0.1047,
"step": 14225
},
{
"epoch": 2.4645451400899345,
"grad_norm": 1.2128667831420898,
"learning_rate": 5.206896551724138e-07,
"loss": 0.1055,
"step": 14250
},
{
"epoch": 2.468868903493601,
"grad_norm": 1.6889070272445679,
"learning_rate": 5.03448275862069e-07,
"loss": 0.1086,
"step": 14275
},
{
"epoch": 2.4731926668972672,
"grad_norm": 1.5473207235336304,
"learning_rate": 4.862068965517241e-07,
"loss": 0.1132,
"step": 14300
},
{
"epoch": 2.477516430300934,
"grad_norm": 1.570099949836731,
"learning_rate": 4.6896551724137934e-07,
"loss": 0.108,
"step": 14325
},
{
"epoch": 2.4818401937046004,
"grad_norm": 1.4723212718963623,
"learning_rate": 4.5172413793103447e-07,
"loss": 0.1115,
"step": 14350
},
{
"epoch": 2.486163957108267,
"grad_norm": 1.4830899238586426,
"learning_rate": 4.344827586206897e-07,
"loss": 0.1164,
"step": 14375
},
{
"epoch": 2.4904877205119336,
"grad_norm": 1.7449450492858887,
"learning_rate": 4.1724137931034485e-07,
"loss": 0.109,
"step": 14400
},
{
"epoch": 2.4948114839156004,
"grad_norm": 1.74076247215271,
"learning_rate": 4.0000000000000003e-07,
"loss": 0.1015,
"step": 14425
},
{
"epoch": 2.4991352473192667,
"grad_norm": 1.4947025775909424,
"learning_rate": 3.8275862068965517e-07,
"loss": 0.1071,
"step": 14450
},
{
"epoch": 2.503459010722933,
"grad_norm": 1.2934141159057617,
"learning_rate": 3.6551724137931036e-07,
"loss": 0.1041,
"step": 14475
},
{
"epoch": 2.5077827741266,
"grad_norm": 2.003370761871338,
"learning_rate": 3.4827586206896555e-07,
"loss": 0.106,
"step": 14500
},
{
"epoch": 2.5121065375302662,
"grad_norm": 1.6628522872924805,
"learning_rate": 3.3103448275862073e-07,
"loss": 0.1109,
"step": 14525
},
{
"epoch": 2.516430300933933,
"grad_norm": 2.323209762573242,
"learning_rate": 3.1379310344827587e-07,
"loss": 0.1114,
"step": 14550
},
{
"epoch": 2.5207540643375994,
"grad_norm": 1.4119327068328857,
"learning_rate": 2.9655172413793106e-07,
"loss": 0.107,
"step": 14575
},
{
"epoch": 2.525077827741266,
"grad_norm": 1.6572582721710205,
"learning_rate": 2.7931034482758624e-07,
"loss": 0.111,
"step": 14600
},
{
"epoch": 2.5294015911449326,
"grad_norm": 1.4858754873275757,
"learning_rate": 2.6206896551724143e-07,
"loss": 0.0951,
"step": 14625
},
{
"epoch": 2.533725354548599,
"grad_norm": 1.6584004163742065,
"learning_rate": 2.4482758620689657e-07,
"loss": 0.1018,
"step": 14650
},
{
"epoch": 2.5380491179522657,
"grad_norm": 1.3879257440567017,
"learning_rate": 2.2758620689655175e-07,
"loss": 0.1113,
"step": 14675
},
{
"epoch": 2.542372881355932,
"grad_norm": 1.7336797714233398,
"learning_rate": 2.1034482758620692e-07,
"loss": 0.1016,
"step": 14700
},
{
"epoch": 2.546696644759599,
"grad_norm": 1.9225796461105347,
"learning_rate": 1.931034482758621e-07,
"loss": 0.1199,
"step": 14725
},
{
"epoch": 2.5510204081632653,
"grad_norm": 1.6256892681121826,
"learning_rate": 1.7586206896551726e-07,
"loss": 0.1047,
"step": 14750
},
{
"epoch": 2.555344171566932,
"grad_norm": 1.2848526239395142,
"learning_rate": 1.5862068965517243e-07,
"loss": 0.1045,
"step": 14775
},
{
"epoch": 2.5596679349705984,
"grad_norm": 1.3589074611663818,
"learning_rate": 1.413793103448276e-07,
"loss": 0.1063,
"step": 14800
},
{
"epoch": 2.5639916983742648,
"grad_norm": 1.3591971397399902,
"learning_rate": 1.2413793103448277e-07,
"loss": 0.11,
"step": 14825
},
{
"epoch": 2.5683154617779316,
"grad_norm": 1.975306510925293,
"learning_rate": 1.0689655172413794e-07,
"loss": 0.1082,
"step": 14850
},
{
"epoch": 2.572639225181598,
"grad_norm": 1.4814668893814087,
"learning_rate": 8.965517241379311e-08,
"loss": 0.1126,
"step": 14875
},
{
"epoch": 2.5769629885852647,
"grad_norm": 1.5567821264266968,
"learning_rate": 7.241379310344829e-08,
"loss": 0.1082,
"step": 14900
},
{
"epoch": 2.581286751988931,
"grad_norm": 1.6796619892120361,
"learning_rate": 5.517241379310345e-08,
"loss": 0.1098,
"step": 14925
},
{
"epoch": 2.585610515392598,
"grad_norm": 1.6661121845245361,
"learning_rate": 3.793103448275862e-08,
"loss": 0.1075,
"step": 14950
},
{
"epoch": 2.5899342787962643,
"grad_norm": 1.9763529300689697,
"learning_rate": 2.0689655172413796e-08,
"loss": 0.1095,
"step": 14975
},
{
"epoch": 2.5942580421999306,
"grad_norm": 1.9784560203552246,
"learning_rate": 3.4482758620689654e-09,
"loss": 0.1159,
"step": 15000
},
{
"epoch": 2.5942580421999306,
"eval_loss": 0.16035248339176178,
"eval_runtime": 18431.1546,
"eval_samples_per_second": 1.255,
"eval_steps_per_second": 0.157,
"eval_wer": 21.29949604875043,
"step": 15000
}
],
"logging_steps": 25,
"max_steps": 15000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.4492272449683456e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}