whisper-tiny-vaani-hindi / trainer_state.json
SujithPulikodan's picture
Upload 11 files
5dd6521 verified
{
"best_metric": 32.56401305446938,
"best_model_checkpoint": "./whisper-tiny-hi/checkpoint-15000",
"epoch": 2.5942580421999306,
"eval_steps": 1000,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004323763403666551,
"grad_norm": 16.286670684814453,
"learning_rate": 4.6000000000000004e-07,
"loss": 2.1388,
"step": 25
},
{
"epoch": 0.008647526807333102,
"grad_norm": 12.285518646240234,
"learning_rate": 9.600000000000001e-07,
"loss": 2.0213,
"step": 50
},
{
"epoch": 0.012971290210999653,
"grad_norm": 12.98936653137207,
"learning_rate": 1.46e-06,
"loss": 1.852,
"step": 75
},
{
"epoch": 0.017295053614666205,
"grad_norm": 11.324748992919922,
"learning_rate": 1.9600000000000003e-06,
"loss": 1.6847,
"step": 100
},
{
"epoch": 0.021618817018332757,
"grad_norm": 10.082378387451172,
"learning_rate": 2.46e-06,
"loss": 1.4744,
"step": 125
},
{
"epoch": 0.025942580421999307,
"grad_norm": 10.503169059753418,
"learning_rate": 2.96e-06,
"loss": 1.3716,
"step": 150
},
{
"epoch": 0.03026634382566586,
"grad_norm": 10.596274375915527,
"learning_rate": 3.46e-06,
"loss": 1.2115,
"step": 175
},
{
"epoch": 0.03459010722933241,
"grad_norm": 8.612921714782715,
"learning_rate": 3.96e-06,
"loss": 1.0883,
"step": 200
},
{
"epoch": 0.03891387063299896,
"grad_norm": 9.459671020507812,
"learning_rate": 4.4600000000000005e-06,
"loss": 0.9957,
"step": 225
},
{
"epoch": 0.043237634036665515,
"grad_norm": 9.334480285644531,
"learning_rate": 4.960000000000001e-06,
"loss": 0.8869,
"step": 250
},
{
"epoch": 0.04756139744033207,
"grad_norm": 8.94874095916748,
"learning_rate": 5.460000000000001e-06,
"loss": 0.8336,
"step": 275
},
{
"epoch": 0.051885160843998614,
"grad_norm": 7.839649677276611,
"learning_rate": 5.9600000000000005e-06,
"loss": 0.8094,
"step": 300
},
{
"epoch": 0.05620892424766517,
"grad_norm": 11.369429588317871,
"learning_rate": 6.460000000000001e-06,
"loss": 0.76,
"step": 325
},
{
"epoch": 0.06053268765133172,
"grad_norm": 7.519564628601074,
"learning_rate": 6.96e-06,
"loss": 0.7319,
"step": 350
},
{
"epoch": 0.06485645105499827,
"grad_norm": 10.504302024841309,
"learning_rate": 7.4600000000000006e-06,
"loss": 0.6995,
"step": 375
},
{
"epoch": 0.06918021445866482,
"grad_norm": 8.440424919128418,
"learning_rate": 7.960000000000002e-06,
"loss": 0.6782,
"step": 400
},
{
"epoch": 0.07350397786233137,
"grad_norm": 9.647106170654297,
"learning_rate": 8.46e-06,
"loss": 0.6475,
"step": 425
},
{
"epoch": 0.07782774126599792,
"grad_norm": 9.011175155639648,
"learning_rate": 8.96e-06,
"loss": 0.6515,
"step": 450
},
{
"epoch": 0.08215150466966448,
"grad_norm": 7.202056884765625,
"learning_rate": 9.460000000000001e-06,
"loss": 0.6173,
"step": 475
},
{
"epoch": 0.08647526807333103,
"grad_norm": 8.170417785644531,
"learning_rate": 9.960000000000001e-06,
"loss": 0.5954,
"step": 500
},
{
"epoch": 0.09079903147699758,
"grad_norm": 7.681850910186768,
"learning_rate": 9.984137931034483e-06,
"loss": 0.5821,
"step": 525
},
{
"epoch": 0.09512279488066414,
"grad_norm": 6.318211078643799,
"learning_rate": 9.96689655172414e-06,
"loss": 0.5575,
"step": 550
},
{
"epoch": 0.09944655828433069,
"grad_norm": 7.533044815063477,
"learning_rate": 9.949655172413793e-06,
"loss": 0.5778,
"step": 575
},
{
"epoch": 0.10377032168799723,
"grad_norm": 9.38764476776123,
"learning_rate": 9.93241379310345e-06,
"loss": 0.5735,
"step": 600
},
{
"epoch": 0.10809408509166378,
"grad_norm": 6.581768035888672,
"learning_rate": 9.915172413793104e-06,
"loss": 0.5358,
"step": 625
},
{
"epoch": 0.11241784849533033,
"grad_norm": 8.789082527160645,
"learning_rate": 9.897931034482759e-06,
"loss": 0.5619,
"step": 650
},
{
"epoch": 0.11674161189899689,
"grad_norm": 8.615039825439453,
"learning_rate": 9.880689655172414e-06,
"loss": 0.5211,
"step": 675
},
{
"epoch": 0.12106537530266344,
"grad_norm": 8.99618911743164,
"learning_rate": 9.86344827586207e-06,
"loss": 0.5039,
"step": 700
},
{
"epoch": 0.12538913870632998,
"grad_norm": 7.325024127960205,
"learning_rate": 9.846206896551725e-06,
"loss": 0.4967,
"step": 725
},
{
"epoch": 0.12971290210999653,
"grad_norm": 7.267101287841797,
"learning_rate": 9.82896551724138e-06,
"loss": 0.4913,
"step": 750
},
{
"epoch": 0.13403666551366308,
"grad_norm": 8.017245292663574,
"learning_rate": 9.811724137931035e-06,
"loss": 0.4841,
"step": 775
},
{
"epoch": 0.13836042891732964,
"grad_norm": 6.565851211547852,
"learning_rate": 9.79448275862069e-06,
"loss": 0.4826,
"step": 800
},
{
"epoch": 0.1426841923209962,
"grad_norm": 9.583740234375,
"learning_rate": 9.777241379310347e-06,
"loss": 0.4966,
"step": 825
},
{
"epoch": 0.14700795572466274,
"grad_norm": 6.968817234039307,
"learning_rate": 9.760000000000001e-06,
"loss": 0.4736,
"step": 850
},
{
"epoch": 0.1513317191283293,
"grad_norm": 6.600325107574463,
"learning_rate": 9.742758620689656e-06,
"loss": 0.5066,
"step": 875
},
{
"epoch": 0.15565548253199585,
"grad_norm": 7.23114538192749,
"learning_rate": 9.725517241379311e-06,
"loss": 0.4672,
"step": 900
},
{
"epoch": 0.1599792459356624,
"grad_norm": 9.536112785339355,
"learning_rate": 9.708275862068966e-06,
"loss": 0.4724,
"step": 925
},
{
"epoch": 0.16430300933932895,
"grad_norm": 8.753514289855957,
"learning_rate": 9.691034482758621e-06,
"loss": 0.458,
"step": 950
},
{
"epoch": 0.1686267727429955,
"grad_norm": 9.407349586486816,
"learning_rate": 9.673793103448277e-06,
"loss": 0.4598,
"step": 975
},
{
"epoch": 0.17295053614666206,
"grad_norm": 6.315821647644043,
"learning_rate": 9.65655172413793e-06,
"loss": 0.4332,
"step": 1000
},
{
"epoch": 0.17295053614666206,
"eval_loss": 0.4459797739982605,
"eval_runtime": 5597.1729,
"eval_samples_per_second": 4.132,
"eval_steps_per_second": 0.517,
"eval_wer": 49.49288587647923,
"step": 1000
},
{
"epoch": 0.1772742995503286,
"grad_norm": 6.963139533996582,
"learning_rate": 9.639310344827587e-06,
"loss": 0.4519,
"step": 1025
},
{
"epoch": 0.18159806295399517,
"grad_norm": 6.76003360748291,
"learning_rate": 9.622068965517242e-06,
"loss": 0.4627,
"step": 1050
},
{
"epoch": 0.18592182635766172,
"grad_norm": 6.6258416175842285,
"learning_rate": 9.604827586206897e-06,
"loss": 0.4318,
"step": 1075
},
{
"epoch": 0.19024558976132827,
"grad_norm": 6.607818603515625,
"learning_rate": 9.587586206896554e-06,
"loss": 0.4362,
"step": 1100
},
{
"epoch": 0.19456935316499482,
"grad_norm": 6.816850662231445,
"learning_rate": 9.570344827586208e-06,
"loss": 0.4379,
"step": 1125
},
{
"epoch": 0.19889311656866138,
"grad_norm": 7.295541763305664,
"learning_rate": 9.553103448275863e-06,
"loss": 0.4327,
"step": 1150
},
{
"epoch": 0.2032168799723279,
"grad_norm": 5.751529693603516,
"learning_rate": 9.535862068965518e-06,
"loss": 0.434,
"step": 1175
},
{
"epoch": 0.20754064337599445,
"grad_norm": 7.614989280700684,
"learning_rate": 9.518620689655173e-06,
"loss": 0.4214,
"step": 1200
},
{
"epoch": 0.211864406779661,
"grad_norm": 7.232028961181641,
"learning_rate": 9.501379310344828e-06,
"loss": 0.4246,
"step": 1225
},
{
"epoch": 0.21618817018332756,
"grad_norm": 5.579376697540283,
"learning_rate": 9.484137931034484e-06,
"loss": 0.4118,
"step": 1250
},
{
"epoch": 0.2205119335869941,
"grad_norm": 6.2615580558776855,
"learning_rate": 9.46689655172414e-06,
"loss": 0.4286,
"step": 1275
},
{
"epoch": 0.22483569699066067,
"grad_norm": 6.8341240882873535,
"learning_rate": 9.449655172413794e-06,
"loss": 0.411,
"step": 1300
},
{
"epoch": 0.22915946039432722,
"grad_norm": 5.939593315124512,
"learning_rate": 9.432413793103449e-06,
"loss": 0.4254,
"step": 1325
},
{
"epoch": 0.23348322379799377,
"grad_norm": 6.054545879364014,
"learning_rate": 9.415172413793104e-06,
"loss": 0.4094,
"step": 1350
},
{
"epoch": 0.23780698720166032,
"grad_norm": 6.636862277984619,
"learning_rate": 9.397931034482759e-06,
"loss": 0.4256,
"step": 1375
},
{
"epoch": 0.24213075060532688,
"grad_norm": 6.010420322418213,
"learning_rate": 9.380689655172415e-06,
"loss": 0.4267,
"step": 1400
},
{
"epoch": 0.24645451400899343,
"grad_norm": 8.598302841186523,
"learning_rate": 9.363448275862069e-06,
"loss": 0.4223,
"step": 1425
},
{
"epoch": 0.25077827741265996,
"grad_norm": 6.458983421325684,
"learning_rate": 9.346206896551725e-06,
"loss": 0.4123,
"step": 1450
},
{
"epoch": 0.25510204081632654,
"grad_norm": 6.562103748321533,
"learning_rate": 9.32896551724138e-06,
"loss": 0.4,
"step": 1475
},
{
"epoch": 0.25942580421999306,
"grad_norm": 7.095609188079834,
"learning_rate": 9.311724137931035e-06,
"loss": 0.4121,
"step": 1500
},
{
"epoch": 0.26374956762365964,
"grad_norm": 6.2590789794921875,
"learning_rate": 9.294482758620691e-06,
"loss": 0.395,
"step": 1525
},
{
"epoch": 0.26807333102732617,
"grad_norm": 7.436612606048584,
"learning_rate": 9.277241379310346e-06,
"loss": 0.4271,
"step": 1550
},
{
"epoch": 0.27239709443099275,
"grad_norm": 6.528996467590332,
"learning_rate": 9.260000000000001e-06,
"loss": 0.3954,
"step": 1575
},
{
"epoch": 0.2767208578346593,
"grad_norm": 7.369086265563965,
"learning_rate": 9.242758620689656e-06,
"loss": 0.388,
"step": 1600
},
{
"epoch": 0.28104462123832585,
"grad_norm": 5.275778770446777,
"learning_rate": 9.225517241379311e-06,
"loss": 0.3789,
"step": 1625
},
{
"epoch": 0.2853683846419924,
"grad_norm": 5.707178115844727,
"learning_rate": 9.208275862068966e-06,
"loss": 0.3748,
"step": 1650
},
{
"epoch": 0.28969214804565896,
"grad_norm": 6.311554908752441,
"learning_rate": 9.191034482758622e-06,
"loss": 0.399,
"step": 1675
},
{
"epoch": 0.2940159114493255,
"grad_norm": 5.5442633628845215,
"learning_rate": 9.173793103448277e-06,
"loss": 0.3857,
"step": 1700
},
{
"epoch": 0.29833967485299207,
"grad_norm": 5.790552616119385,
"learning_rate": 9.156551724137932e-06,
"loss": 0.3914,
"step": 1725
},
{
"epoch": 0.3026634382566586,
"grad_norm": 6.97217321395874,
"learning_rate": 9.139310344827587e-06,
"loss": 0.3832,
"step": 1750
},
{
"epoch": 0.30698720166032517,
"grad_norm": 7.6458353996276855,
"learning_rate": 9.122068965517242e-06,
"loss": 0.3863,
"step": 1775
},
{
"epoch": 0.3113109650639917,
"grad_norm": 6.375527381896973,
"learning_rate": 9.104827586206897e-06,
"loss": 0.3717,
"step": 1800
},
{
"epoch": 0.3156347284676583,
"grad_norm": 7.621360778808594,
"learning_rate": 9.087586206896553e-06,
"loss": 0.4053,
"step": 1825
},
{
"epoch": 0.3199584918713248,
"grad_norm": 5.717822551727295,
"learning_rate": 9.070344827586206e-06,
"loss": 0.375,
"step": 1850
},
{
"epoch": 0.3242822552749913,
"grad_norm": 6.005116939544678,
"learning_rate": 9.053103448275863e-06,
"loss": 0.407,
"step": 1875
},
{
"epoch": 0.3286060186786579,
"grad_norm": 6.100674629211426,
"learning_rate": 9.035862068965518e-06,
"loss": 0.383,
"step": 1900
},
{
"epoch": 0.33292978208232443,
"grad_norm": 5.4287495613098145,
"learning_rate": 9.018620689655173e-06,
"loss": 0.3685,
"step": 1925
},
{
"epoch": 0.337253545485991,
"grad_norm": 6.621429920196533,
"learning_rate": 9.00137931034483e-06,
"loss": 0.3858,
"step": 1950
},
{
"epoch": 0.34157730888965754,
"grad_norm": 7.438424110412598,
"learning_rate": 8.984137931034484e-06,
"loss": 0.3874,
"step": 1975
},
{
"epoch": 0.3459010722933241,
"grad_norm": 7.171720027923584,
"learning_rate": 8.966896551724139e-06,
"loss": 0.3796,
"step": 2000
},
{
"epoch": 0.3459010722933241,
"eval_loss": 0.3686262369155884,
"eval_runtime": 5442.7156,
"eval_samples_per_second": 4.249,
"eval_steps_per_second": 0.531,
"eval_wer": 43.32270451494399,
"step": 2000
},
{
"epoch": 0.35022483569699064,
"grad_norm": 5.1768059730529785,
"learning_rate": 8.949655172413794e-06,
"loss": 0.3639,
"step": 2025
},
{
"epoch": 0.3545485991006572,
"grad_norm": 6.131287574768066,
"learning_rate": 8.932413793103449e-06,
"loss": 0.3684,
"step": 2050
},
{
"epoch": 0.35887236250432375,
"grad_norm": 6.503818988800049,
"learning_rate": 8.915172413793104e-06,
"loss": 0.3804,
"step": 2075
},
{
"epoch": 0.36319612590799033,
"grad_norm": 5.307014465332031,
"learning_rate": 8.89793103448276e-06,
"loss": 0.3778,
"step": 2100
},
{
"epoch": 0.36751988931165686,
"grad_norm": 4.741936683654785,
"learning_rate": 8.880689655172415e-06,
"loss": 0.3642,
"step": 2125
},
{
"epoch": 0.37184365271532344,
"grad_norm": 5.130821228027344,
"learning_rate": 8.864137931034484e-06,
"loss": 0.3771,
"step": 2150
},
{
"epoch": 0.37616741611898996,
"grad_norm": 6.177253723144531,
"learning_rate": 8.846896551724138e-06,
"loss": 0.3645,
"step": 2175
},
{
"epoch": 0.38049117952265654,
"grad_norm": 5.380115509033203,
"learning_rate": 8.829655172413793e-06,
"loss": 0.3637,
"step": 2200
},
{
"epoch": 0.38481494292632307,
"grad_norm": 7.195498943328857,
"learning_rate": 8.81241379310345e-06,
"loss": 0.3628,
"step": 2225
},
{
"epoch": 0.38913870632998965,
"grad_norm": 6.12623929977417,
"learning_rate": 8.795172413793103e-06,
"loss": 0.3847,
"step": 2250
},
{
"epoch": 0.3934624697336562,
"grad_norm": 6.666962623596191,
"learning_rate": 8.77793103448276e-06,
"loss": 0.3776,
"step": 2275
},
{
"epoch": 0.39778623313732275,
"grad_norm": 6.3228325843811035,
"learning_rate": 8.760689655172415e-06,
"loss": 0.3603,
"step": 2300
},
{
"epoch": 0.4021099965409893,
"grad_norm": 5.662914276123047,
"learning_rate": 8.74344827586207e-06,
"loss": 0.341,
"step": 2325
},
{
"epoch": 0.4064337599446558,
"grad_norm": 6.619643211364746,
"learning_rate": 8.726206896551724e-06,
"loss": 0.3513,
"step": 2350
},
{
"epoch": 0.4107575233483224,
"grad_norm": 5.331205368041992,
"learning_rate": 8.70896551724138e-06,
"loss": 0.3531,
"step": 2375
},
{
"epoch": 0.4150812867519889,
"grad_norm": 6.8525824546813965,
"learning_rate": 8.691724137931034e-06,
"loss": 0.358,
"step": 2400
},
{
"epoch": 0.4194050501556555,
"grad_norm": 6.635438442230225,
"learning_rate": 8.67448275862069e-06,
"loss": 0.3756,
"step": 2425
},
{
"epoch": 0.423728813559322,
"grad_norm": 5.737441062927246,
"learning_rate": 8.657241379310345e-06,
"loss": 0.3502,
"step": 2450
},
{
"epoch": 0.4280525769629886,
"grad_norm": 6.25765323638916,
"learning_rate": 8.64e-06,
"loss": 0.3632,
"step": 2475
},
{
"epoch": 0.4323763403666551,
"grad_norm": 6.037199020385742,
"learning_rate": 8.622758620689657e-06,
"loss": 0.3622,
"step": 2500
},
{
"epoch": 0.4367001037703217,
"grad_norm": 6.072882175445557,
"learning_rate": 8.605517241379312e-06,
"loss": 0.3559,
"step": 2525
},
{
"epoch": 0.4410238671739882,
"grad_norm": 6.014054298400879,
"learning_rate": 8.588275862068967e-06,
"loss": 0.376,
"step": 2550
},
{
"epoch": 0.4453476305776548,
"grad_norm": 6.040060043334961,
"learning_rate": 8.571034482758621e-06,
"loss": 0.3749,
"step": 2575
},
{
"epoch": 0.44967139398132133,
"grad_norm": 5.293423652648926,
"learning_rate": 8.553793103448276e-06,
"loss": 0.3617,
"step": 2600
},
{
"epoch": 0.4539951573849879,
"grad_norm": 6.393920421600342,
"learning_rate": 8.536551724137931e-06,
"loss": 0.3506,
"step": 2625
},
{
"epoch": 0.45831892078865444,
"grad_norm": 5.98794412612915,
"learning_rate": 8.519310344827588e-06,
"loss": 0.3523,
"step": 2650
},
{
"epoch": 0.462642684192321,
"grad_norm": 5.70527458190918,
"learning_rate": 8.502068965517241e-06,
"loss": 0.3622,
"step": 2675
},
{
"epoch": 0.46696644759598754,
"grad_norm": 6.078751087188721,
"learning_rate": 8.484827586206898e-06,
"loss": 0.3403,
"step": 2700
},
{
"epoch": 0.4712902109996541,
"grad_norm": 6.641058444976807,
"learning_rate": 8.467586206896552e-06,
"loss": 0.3506,
"step": 2725
},
{
"epoch": 0.47561397440332065,
"grad_norm": 6.066605091094971,
"learning_rate": 8.450344827586207e-06,
"loss": 0.3773,
"step": 2750
},
{
"epoch": 0.4799377378069872,
"grad_norm": 6.622874736785889,
"learning_rate": 8.433103448275862e-06,
"loss": 0.3435,
"step": 2775
},
{
"epoch": 0.48426150121065376,
"grad_norm": 5.157226085662842,
"learning_rate": 8.415862068965519e-06,
"loss": 0.3586,
"step": 2800
},
{
"epoch": 0.4885852646143203,
"grad_norm": 5.92050838470459,
"learning_rate": 8.398620689655174e-06,
"loss": 0.352,
"step": 2825
},
{
"epoch": 0.49290902801798686,
"grad_norm": 5.522551536560059,
"learning_rate": 8.381379310344828e-06,
"loss": 0.3447,
"step": 2850
},
{
"epoch": 0.4972327914216534,
"grad_norm": 6.284322738647461,
"learning_rate": 8.364137931034483e-06,
"loss": 0.3477,
"step": 2875
},
{
"epoch": 0.5015565548253199,
"grad_norm": 6.7316179275512695,
"learning_rate": 8.346896551724138e-06,
"loss": 0.3402,
"step": 2900
},
{
"epoch": 0.5058803182289865,
"grad_norm": 5.808598518371582,
"learning_rate": 8.329655172413795e-06,
"loss": 0.3332,
"step": 2925
},
{
"epoch": 0.5102040816326531,
"grad_norm": 5.109462261199951,
"learning_rate": 8.31241379310345e-06,
"loss": 0.3465,
"step": 2950
},
{
"epoch": 0.5145278450363197,
"grad_norm": 6.471071720123291,
"learning_rate": 8.295172413793104e-06,
"loss": 0.3501,
"step": 2975
},
{
"epoch": 0.5188516084399861,
"grad_norm": 7.2887139320373535,
"learning_rate": 8.27793103448276e-06,
"loss": 0.3414,
"step": 3000
},
{
"epoch": 0.5188516084399861,
"eval_loss": 0.33114978671073914,
"eval_runtime": 5410.7097,
"eval_samples_per_second": 4.274,
"eval_steps_per_second": 0.534,
"eval_wer": 39.80441724633165,
"step": 3000
},
{
"epoch": 0.5231753718436527,
"grad_norm": 5.811481475830078,
"learning_rate": 8.260689655172414e-06,
"loss": 0.3286,
"step": 3025
},
{
"epoch": 0.5274991352473193,
"grad_norm": 5.4608869552612305,
"learning_rate": 8.243448275862069e-06,
"loss": 0.3255,
"step": 3050
},
{
"epoch": 0.5318228986509859,
"grad_norm": 6.3298797607421875,
"learning_rate": 8.226206896551726e-06,
"loss": 0.3299,
"step": 3075
},
{
"epoch": 0.5361466620546523,
"grad_norm": 5.198216915130615,
"learning_rate": 8.208965517241379e-06,
"loss": 0.3463,
"step": 3100
},
{
"epoch": 0.5404704254583189,
"grad_norm": 4.894079685211182,
"learning_rate": 8.191724137931035e-06,
"loss": 0.3362,
"step": 3125
},
{
"epoch": 0.5447941888619855,
"grad_norm": 5.518843650817871,
"learning_rate": 8.17448275862069e-06,
"loss": 0.3592,
"step": 3150
},
{
"epoch": 0.5491179522656521,
"grad_norm": 5.926825046539307,
"learning_rate": 8.157241379310345e-06,
"loss": 0.3402,
"step": 3175
},
{
"epoch": 0.5534417156693185,
"grad_norm": 5.385416507720947,
"learning_rate": 8.14e-06,
"loss": 0.3308,
"step": 3200
},
{
"epoch": 0.5577654790729851,
"grad_norm": 4.563920497894287,
"learning_rate": 8.122758620689657e-06,
"loss": 0.3512,
"step": 3225
},
{
"epoch": 0.5620892424766517,
"grad_norm": 5.665447235107422,
"learning_rate": 8.105517241379311e-06,
"loss": 0.3256,
"step": 3250
},
{
"epoch": 0.5664130058803182,
"grad_norm": 6.340756416320801,
"learning_rate": 8.088275862068966e-06,
"loss": 0.3264,
"step": 3275
},
{
"epoch": 0.5707367692839848,
"grad_norm": 5.052750587463379,
"learning_rate": 8.071034482758621e-06,
"loss": 0.3144,
"step": 3300
},
{
"epoch": 0.5750605326876513,
"grad_norm": 6.486083984375,
"learning_rate": 8.053793103448276e-06,
"loss": 0.3306,
"step": 3325
},
{
"epoch": 0.5793842960913179,
"grad_norm": 6.098331451416016,
"learning_rate": 8.036551724137933e-06,
"loss": 0.3363,
"step": 3350
},
{
"epoch": 0.5837080594949844,
"grad_norm": 5.329631328582764,
"learning_rate": 8.019310344827587e-06,
"loss": 0.3245,
"step": 3375
},
{
"epoch": 0.588031822898651,
"grad_norm": 5.355101108551025,
"learning_rate": 8.002068965517242e-06,
"loss": 0.3213,
"step": 3400
},
{
"epoch": 0.5923555863023175,
"grad_norm": 6.330533504486084,
"learning_rate": 7.984827586206897e-06,
"loss": 0.338,
"step": 3425
},
{
"epoch": 0.5966793497059841,
"grad_norm": 5.799704551696777,
"learning_rate": 7.967586206896552e-06,
"loss": 0.3348,
"step": 3450
},
{
"epoch": 0.6010031131096506,
"grad_norm": 6.250985145568848,
"learning_rate": 7.950344827586207e-06,
"loss": 0.3275,
"step": 3475
},
{
"epoch": 0.6053268765133172,
"grad_norm": 5.608683109283447,
"learning_rate": 7.933103448275864e-06,
"loss": 0.3264,
"step": 3500
},
{
"epoch": 0.6096506399169838,
"grad_norm": 4.862841606140137,
"learning_rate": 7.915862068965517e-06,
"loss": 0.3352,
"step": 3525
},
{
"epoch": 0.6139744033206503,
"grad_norm": 6.654427528381348,
"learning_rate": 7.898620689655173e-06,
"loss": 0.337,
"step": 3550
},
{
"epoch": 0.6182981667243168,
"grad_norm": 4.726236820220947,
"learning_rate": 7.881379310344828e-06,
"loss": 0.34,
"step": 3575
},
{
"epoch": 0.6226219301279834,
"grad_norm": 5.243809223175049,
"learning_rate": 7.864137931034483e-06,
"loss": 0.3181,
"step": 3600
},
{
"epoch": 0.62694569353165,
"grad_norm": 5.479759216308594,
"learning_rate": 7.84689655172414e-06,
"loss": 0.3297,
"step": 3625
},
{
"epoch": 0.6312694569353166,
"grad_norm": 5.421146392822266,
"learning_rate": 7.829655172413794e-06,
"loss": 0.3221,
"step": 3650
},
{
"epoch": 0.635593220338983,
"grad_norm": 5.118608474731445,
"learning_rate": 7.81241379310345e-06,
"loss": 0.3081,
"step": 3675
},
{
"epoch": 0.6399169837426496,
"grad_norm": 5.862517356872559,
"learning_rate": 7.795172413793104e-06,
"loss": 0.3128,
"step": 3700
},
{
"epoch": 0.6442407471463162,
"grad_norm": 5.084294319152832,
"learning_rate": 7.777931034482759e-06,
"loss": 0.3245,
"step": 3725
},
{
"epoch": 0.6485645105499827,
"grad_norm": 5.693409442901611,
"learning_rate": 7.760689655172414e-06,
"loss": 0.3167,
"step": 3750
},
{
"epoch": 0.6528882739536492,
"grad_norm": 4.5151286125183105,
"learning_rate": 7.74344827586207e-06,
"loss": 0.3278,
"step": 3775
},
{
"epoch": 0.6572120373573158,
"grad_norm": 6.0459442138671875,
"learning_rate": 7.726206896551725e-06,
"loss": 0.3403,
"step": 3800
},
{
"epoch": 0.6615358007609824,
"grad_norm": 5.351093292236328,
"learning_rate": 7.70896551724138e-06,
"loss": 0.311,
"step": 3825
},
{
"epoch": 0.6658595641646489,
"grad_norm": 5.750779151916504,
"learning_rate": 7.691724137931035e-06,
"loss": 0.3112,
"step": 3850
},
{
"epoch": 0.6701833275683154,
"grad_norm": 5.5272216796875,
"learning_rate": 7.67448275862069e-06,
"loss": 0.32,
"step": 3875
},
{
"epoch": 0.674507090971982,
"grad_norm": 5.890946865081787,
"learning_rate": 7.657241379310345e-06,
"loss": 0.3434,
"step": 3900
},
{
"epoch": 0.6788308543756486,
"grad_norm": 6.403041839599609,
"learning_rate": 7.640000000000001e-06,
"loss": 0.3194,
"step": 3925
},
{
"epoch": 0.6831546177793151,
"grad_norm": 4.705821990966797,
"learning_rate": 7.622758620689655e-06,
"loss": 0.3038,
"step": 3950
},
{
"epoch": 0.6874783811829817,
"grad_norm": 4.945014953613281,
"learning_rate": 7.605517241379311e-06,
"loss": 0.3414,
"step": 3975
},
{
"epoch": 0.6918021445866482,
"grad_norm": 6.610292434692383,
"learning_rate": 7.588275862068966e-06,
"loss": 0.3374,
"step": 4000
},
{
"epoch": 0.6918021445866482,
"eval_loss": 0.312212198972702,
"eval_runtime": 5649.9248,
"eval_samples_per_second": 4.093,
"eval_steps_per_second": 0.512,
"eval_wer": 38.40712982074705,
"step": 4000
},
{
"epoch": 0.6961259079903148,
"grad_norm": 5.513612270355225,
"learning_rate": 7.571034482758622e-06,
"loss": 0.3368,
"step": 4025
},
{
"epoch": 0.7004496713939813,
"grad_norm": 4.718435764312744,
"learning_rate": 7.553793103448277e-06,
"loss": 0.291,
"step": 4050
},
{
"epoch": 0.7047734347976479,
"grad_norm": 5.8622965812683105,
"learning_rate": 7.5365517241379315e-06,
"loss": 0.2975,
"step": 4075
},
{
"epoch": 0.7090971982013144,
"grad_norm": 4.535427570343018,
"learning_rate": 7.519310344827587e-06,
"loss": 0.2925,
"step": 4100
},
{
"epoch": 0.713420961604981,
"grad_norm": 5.352747440338135,
"learning_rate": 7.502068965517242e-06,
"loss": 0.3202,
"step": 4125
},
{
"epoch": 0.7177447250086475,
"grad_norm": 4.6951446533203125,
"learning_rate": 7.484827586206898e-06,
"loss": 0.3239,
"step": 4150
},
{
"epoch": 0.7220684884123141,
"grad_norm": 5.972590923309326,
"learning_rate": 7.467586206896552e-06,
"loss": 0.3203,
"step": 4175
},
{
"epoch": 0.7263922518159807,
"grad_norm": 5.971949577331543,
"learning_rate": 7.4503448275862075e-06,
"loss": 0.3207,
"step": 4200
},
{
"epoch": 0.7307160152196471,
"grad_norm": 5.5973334312438965,
"learning_rate": 7.433103448275862e-06,
"loss": 0.3248,
"step": 4225
},
{
"epoch": 0.7350397786233137,
"grad_norm": 5.686942100524902,
"learning_rate": 7.415862068965518e-06,
"loss": 0.3349,
"step": 4250
},
{
"epoch": 0.7393635420269803,
"grad_norm": 4.615256309509277,
"learning_rate": 7.398620689655173e-06,
"loss": 0.3259,
"step": 4275
},
{
"epoch": 0.7436873054306469,
"grad_norm": 7.120574951171875,
"learning_rate": 7.381379310344829e-06,
"loss": 0.3444,
"step": 4300
},
{
"epoch": 0.7480110688343133,
"grad_norm": 5.114111423492432,
"learning_rate": 7.364137931034483e-06,
"loss": 0.315,
"step": 4325
},
{
"epoch": 0.7523348322379799,
"grad_norm": 5.820385456085205,
"learning_rate": 7.346896551724138e-06,
"loss": 0.3149,
"step": 4350
},
{
"epoch": 0.7566585956416465,
"grad_norm": 5.231871128082275,
"learning_rate": 7.330344827586208e-06,
"loss": 0.3251,
"step": 4375
},
{
"epoch": 0.7609823590453131,
"grad_norm": 5.228817462921143,
"learning_rate": 7.313103448275863e-06,
"loss": 0.3162,
"step": 4400
},
{
"epoch": 0.7653061224489796,
"grad_norm": 4.916074752807617,
"learning_rate": 7.2958620689655175e-06,
"loss": 0.304,
"step": 4425
},
{
"epoch": 0.7696298858526461,
"grad_norm": 5.931058883666992,
"learning_rate": 7.278620689655172e-06,
"loss": 0.3304,
"step": 4450
},
{
"epoch": 0.7739536492563127,
"grad_norm": 5.873823165893555,
"learning_rate": 7.261379310344828e-06,
"loss": 0.3398,
"step": 4475
},
{
"epoch": 0.7782774126599793,
"grad_norm": 5.279876708984375,
"learning_rate": 7.244137931034483e-06,
"loss": 0.3064,
"step": 4500
},
{
"epoch": 0.7826011760636458,
"grad_norm": 4.812004089355469,
"learning_rate": 7.226896551724139e-06,
"loss": 0.3063,
"step": 4525
},
{
"epoch": 0.7869249394673123,
"grad_norm": 6.433504581451416,
"learning_rate": 7.2096551724137944e-06,
"loss": 0.3224,
"step": 4550
},
{
"epoch": 0.7912487028709789,
"grad_norm": 5.206650257110596,
"learning_rate": 7.1924137931034485e-06,
"loss": 0.3247,
"step": 4575
},
{
"epoch": 0.7955724662746455,
"grad_norm": 5.398522853851318,
"learning_rate": 7.175172413793104e-06,
"loss": 0.3017,
"step": 4600
},
{
"epoch": 0.799896229678312,
"grad_norm": 4.88048791885376,
"learning_rate": 7.157931034482759e-06,
"loss": 0.3218,
"step": 4625
},
{
"epoch": 0.8042199930819786,
"grad_norm": 4.6434407234191895,
"learning_rate": 7.140689655172415e-06,
"loss": 0.3134,
"step": 4650
},
{
"epoch": 0.8085437564856451,
"grad_norm": 5.189647674560547,
"learning_rate": 7.12344827586207e-06,
"loss": 0.3068,
"step": 4675
},
{
"epoch": 0.8128675198893116,
"grad_norm": 5.023260116577148,
"learning_rate": 7.106206896551725e-06,
"loss": 0.2977,
"step": 4700
},
{
"epoch": 0.8171912832929782,
"grad_norm": 5.623253345489502,
"learning_rate": 7.088965517241379e-06,
"loss": 0.3083,
"step": 4725
},
{
"epoch": 0.8215150466966448,
"grad_norm": 5.7590789794921875,
"learning_rate": 7.071724137931035e-06,
"loss": 0.3113,
"step": 4750
},
{
"epoch": 0.8258388101003113,
"grad_norm": 5.757692813873291,
"learning_rate": 7.05448275862069e-06,
"loss": 0.3204,
"step": 4775
},
{
"epoch": 0.8301625735039778,
"grad_norm": 4.696317195892334,
"learning_rate": 7.037241379310346e-06,
"loss": 0.2966,
"step": 4800
},
{
"epoch": 0.8344863369076444,
"grad_norm": 5.004659175872803,
"learning_rate": 7.0200000000000006e-06,
"loss": 0.3017,
"step": 4825
},
{
"epoch": 0.838810100311311,
"grad_norm": 5.880341529846191,
"learning_rate": 7.002758620689655e-06,
"loss": 0.3177,
"step": 4850
},
{
"epoch": 0.8431338637149776,
"grad_norm": 5.527527809143066,
"learning_rate": 6.98551724137931e-06,
"loss": 0.3056,
"step": 4875
},
{
"epoch": 0.847457627118644,
"grad_norm": 5.48380708694458,
"learning_rate": 6.968275862068966e-06,
"loss": 0.2952,
"step": 4900
},
{
"epoch": 0.8517813905223106,
"grad_norm": 3.9720652103424072,
"learning_rate": 6.951034482758622e-06,
"loss": 0.2961,
"step": 4925
},
{
"epoch": 0.8561051539259772,
"grad_norm": 5.183377265930176,
"learning_rate": 6.933793103448277e-06,
"loss": 0.3051,
"step": 4950
},
{
"epoch": 0.8604289173296438,
"grad_norm": 5.809779644012451,
"learning_rate": 6.916551724137932e-06,
"loss": 0.3191,
"step": 4975
},
{
"epoch": 0.8647526807333102,
"grad_norm": 5.579798698425293,
"learning_rate": 6.899310344827586e-06,
"loss": 0.3185,
"step": 5000
},
{
"epoch": 0.8647526807333102,
"eval_loss": 0.29861804842948914,
"eval_runtime": 5403.1228,
"eval_samples_per_second": 4.28,
"eval_steps_per_second": 0.535,
"eval_wer": 37.68833742945034,
"step": 5000
},
{
"epoch": 0.8690764441369768,
"grad_norm": 3.995452880859375,
"learning_rate": 6.882068965517242e-06,
"loss": 0.3095,
"step": 5025
},
{
"epoch": 0.8734002075406434,
"grad_norm": 5.617555618286133,
"learning_rate": 6.864827586206897e-06,
"loss": 0.3139,
"step": 5050
},
{
"epoch": 0.8777239709443099,
"grad_norm": 5.809163570404053,
"learning_rate": 6.847586206896553e-06,
"loss": 0.3209,
"step": 5075
},
{
"epoch": 0.8820477343479765,
"grad_norm": 6.078767776489258,
"learning_rate": 6.8303448275862075e-06,
"loss": 0.3019,
"step": 5100
},
{
"epoch": 0.886371497751643,
"grad_norm": 5.000216007232666,
"learning_rate": 6.813103448275863e-06,
"loss": 0.3195,
"step": 5125
},
{
"epoch": 0.8906952611553096,
"grad_norm": 4.086789131164551,
"learning_rate": 6.795862068965517e-06,
"loss": 0.2903,
"step": 5150
},
{
"epoch": 0.8950190245589761,
"grad_norm": 6.727631568908691,
"learning_rate": 6.778620689655173e-06,
"loss": 0.3091,
"step": 5175
},
{
"epoch": 0.8993427879626427,
"grad_norm": 5.265336513519287,
"learning_rate": 6.761379310344828e-06,
"loss": 0.309,
"step": 5200
},
{
"epoch": 0.9036665513663092,
"grad_norm": 4.668389320373535,
"learning_rate": 6.7441379310344836e-06,
"loss": 0.2931,
"step": 5225
},
{
"epoch": 0.9079903147699758,
"grad_norm": 5.374257564544678,
"learning_rate": 6.7268965517241384e-06,
"loss": 0.2978,
"step": 5250
},
{
"epoch": 0.9123140781736423,
"grad_norm": 4.311838150024414,
"learning_rate": 6.709655172413793e-06,
"loss": 0.2889,
"step": 5275
},
{
"epoch": 0.9166378415773089,
"grad_norm": 4.521867275238037,
"learning_rate": 6.692413793103448e-06,
"loss": 0.3059,
"step": 5300
},
{
"epoch": 0.9209616049809755,
"grad_norm": 5.439430236816406,
"learning_rate": 6.675172413793104e-06,
"loss": 0.3191,
"step": 5325
},
{
"epoch": 0.925285368384642,
"grad_norm": 6.107820987701416,
"learning_rate": 6.65793103448276e-06,
"loss": 0.3078,
"step": 5350
},
{
"epoch": 0.9296091317883085,
"grad_norm": 5.370652675628662,
"learning_rate": 6.6406896551724145e-06,
"loss": 0.3103,
"step": 5375
},
{
"epoch": 0.9339328951919751,
"grad_norm": 6.220107555389404,
"learning_rate": 6.62344827586207e-06,
"loss": 0.3074,
"step": 5400
},
{
"epoch": 0.9382566585956417,
"grad_norm": 6.766858100891113,
"learning_rate": 6.606206896551724e-06,
"loss": 0.2855,
"step": 5425
},
{
"epoch": 0.9425804219993082,
"grad_norm": 5.859302043914795,
"learning_rate": 6.58896551724138e-06,
"loss": 0.311,
"step": 5450
},
{
"epoch": 0.9469041854029747,
"grad_norm": 5.242767810821533,
"learning_rate": 6.571724137931035e-06,
"loss": 0.3057,
"step": 5475
},
{
"epoch": 0.9512279488066413,
"grad_norm": 5.731853008270264,
"learning_rate": 6.5544827586206905e-06,
"loss": 0.3083,
"step": 5500
},
{
"epoch": 0.9555517122103079,
"grad_norm": 5.8285040855407715,
"learning_rate": 6.537241379310345e-06,
"loss": 0.2989,
"step": 5525
},
{
"epoch": 0.9598754756139743,
"grad_norm": 6.33150053024292,
"learning_rate": 6.520000000000001e-06,
"loss": 0.2985,
"step": 5550
},
{
"epoch": 0.9641992390176409,
"grad_norm": 4.892938613891602,
"learning_rate": 6.502758620689655e-06,
"loss": 0.3094,
"step": 5575
},
{
"epoch": 0.9685230024213075,
"grad_norm": 4.8297648429870605,
"learning_rate": 6.485517241379311e-06,
"loss": 0.3145,
"step": 5600
},
{
"epoch": 0.9728467658249741,
"grad_norm": 5.7645111083984375,
"learning_rate": 6.468275862068966e-06,
"loss": 0.3013,
"step": 5625
},
{
"epoch": 0.9771705292286406,
"grad_norm": 4.90895414352417,
"learning_rate": 6.4510344827586214e-06,
"loss": 0.3054,
"step": 5650
},
{
"epoch": 0.9814942926323071,
"grad_norm": 5.324610710144043,
"learning_rate": 6.433793103448276e-06,
"loss": 0.3134,
"step": 5675
},
{
"epoch": 0.9858180560359737,
"grad_norm": 4.568467140197754,
"learning_rate": 6.416551724137931e-06,
"loss": 0.2974,
"step": 5700
},
{
"epoch": 0.9901418194396403,
"grad_norm": 5.177453994750977,
"learning_rate": 6.399310344827587e-06,
"loss": 0.3156,
"step": 5725
},
{
"epoch": 0.9944655828433068,
"grad_norm": 6.221437454223633,
"learning_rate": 6.382068965517242e-06,
"loss": 0.3109,
"step": 5750
},
{
"epoch": 0.9987893462469734,
"grad_norm": 5.171840667724609,
"learning_rate": 6.3648275862068975e-06,
"loss": 0.315,
"step": 5775
},
{
"epoch": 1.0031131096506398,
"grad_norm": 5.1609907150268555,
"learning_rate": 6.347586206896552e-06,
"loss": 0.2943,
"step": 5800
},
{
"epoch": 1.0074368730543064,
"grad_norm": 5.415836811065674,
"learning_rate": 6.330344827586208e-06,
"loss": 0.2833,
"step": 5825
},
{
"epoch": 1.011760636457973,
"grad_norm": 5.87399435043335,
"learning_rate": 6.313103448275862e-06,
"loss": 0.2806,
"step": 5850
},
{
"epoch": 1.0160843998616396,
"grad_norm": 6.694667816162109,
"learning_rate": 6.295862068965518e-06,
"loss": 0.2844,
"step": 5875
},
{
"epoch": 1.0204081632653061,
"grad_norm": 6.607420444488525,
"learning_rate": 6.278620689655173e-06,
"loss": 0.3028,
"step": 5900
},
{
"epoch": 1.0247319266689727,
"grad_norm": 5.000158309936523,
"learning_rate": 6.261379310344828e-06,
"loss": 0.287,
"step": 5925
},
{
"epoch": 1.0290556900726393,
"grad_norm": 4.624439239501953,
"learning_rate": 6.244137931034483e-06,
"loss": 0.2848,
"step": 5950
},
{
"epoch": 1.0333794534763059,
"grad_norm": 4.626707553863525,
"learning_rate": 6.226896551724139e-06,
"loss": 0.2767,
"step": 5975
},
{
"epoch": 1.0377032168799722,
"grad_norm": 6.180722236633301,
"learning_rate": 6.209655172413793e-06,
"loss": 0.2898,
"step": 6000
},
{
"epoch": 1.0377032168799722,
"eval_loss": 0.28753867745399475,
"eval_runtime": 5032.2441,
"eval_samples_per_second": 4.596,
"eval_steps_per_second": 0.574,
"eval_wer": 35.51532588144612,
"step": 6000
},
{
"epoch": 1.0420269802836388,
"grad_norm": 5.8476176261901855,
"learning_rate": 6.192413793103449e-06,
"loss": 0.2917,
"step": 6025
},
{
"epoch": 1.0463507436873054,
"grad_norm": 4.507814407348633,
"learning_rate": 6.175172413793104e-06,
"loss": 0.2886,
"step": 6050
},
{
"epoch": 1.050674507090972,
"grad_norm": 5.021352767944336,
"learning_rate": 6.157931034482759e-06,
"loss": 0.2953,
"step": 6075
},
{
"epoch": 1.0549982704946386,
"grad_norm": 4.907651901245117,
"learning_rate": 6.140689655172414e-06,
"loss": 0.28,
"step": 6100
},
{
"epoch": 1.0593220338983051,
"grad_norm": 4.90705680847168,
"learning_rate": 6.123448275862069e-06,
"loss": 0.2906,
"step": 6125
},
{
"epoch": 1.0636457973019717,
"grad_norm": 5.755337715148926,
"learning_rate": 6.106206896551725e-06,
"loss": 0.2886,
"step": 6150
},
{
"epoch": 1.067969560705638,
"grad_norm": 3.9755401611328125,
"learning_rate": 6.08896551724138e-06,
"loss": 0.283,
"step": 6175
},
{
"epoch": 1.0722933241093047,
"grad_norm": 4.78421688079834,
"learning_rate": 6.071724137931035e-06,
"loss": 0.3046,
"step": 6200
},
{
"epoch": 1.0766170875129712,
"grad_norm": 5.140844345092773,
"learning_rate": 6.05448275862069e-06,
"loss": 0.2558,
"step": 6225
},
{
"epoch": 1.0809408509166378,
"grad_norm": 4.85251522064209,
"learning_rate": 6.037241379310346e-06,
"loss": 0.2634,
"step": 6250
},
{
"epoch": 1.0852646143203044,
"grad_norm": 5.412033557891846,
"learning_rate": 6.02e-06,
"loss": 0.267,
"step": 6275
},
{
"epoch": 1.089588377723971,
"grad_norm": 4.728926658630371,
"learning_rate": 6.002758620689656e-06,
"loss": 0.2644,
"step": 6300
},
{
"epoch": 1.0939121411276376,
"grad_norm": 4.522409915924072,
"learning_rate": 5.9855172413793105e-06,
"loss": 0.2853,
"step": 6325
},
{
"epoch": 1.098235904531304,
"grad_norm": 4.903046607971191,
"learning_rate": 5.968275862068966e-06,
"loss": 0.2836,
"step": 6350
},
{
"epoch": 1.1025596679349705,
"grad_norm": 5.107143878936768,
"learning_rate": 5.951724137931036e-06,
"loss": 0.2775,
"step": 6375
},
{
"epoch": 1.106883431338637,
"grad_norm": 4.579159259796143,
"learning_rate": 5.93448275862069e-06,
"loss": 0.2855,
"step": 6400
},
{
"epoch": 1.1112071947423037,
"grad_norm": 5.995217323303223,
"learning_rate": 5.917241379310345e-06,
"loss": 0.2759,
"step": 6425
},
{
"epoch": 1.1155309581459703,
"grad_norm": 4.0890350341796875,
"learning_rate": 5.9e-06,
"loss": 0.2806,
"step": 6450
},
{
"epoch": 1.1198547215496368,
"grad_norm": 4.915022850036621,
"learning_rate": 5.882758620689656e-06,
"loss": 0.2787,
"step": 6475
},
{
"epoch": 1.1241784849533034,
"grad_norm": 5.1101579666137695,
"learning_rate": 5.865517241379311e-06,
"loss": 0.2725,
"step": 6500
},
{
"epoch": 1.12850224835697,
"grad_norm": 5.38516902923584,
"learning_rate": 5.848275862068966e-06,
"loss": 0.2921,
"step": 6525
},
{
"epoch": 1.1328260117606366,
"grad_norm": 5.741593360900879,
"learning_rate": 5.831034482758621e-06,
"loss": 0.2747,
"step": 6550
},
{
"epoch": 1.137149775164303,
"grad_norm": 4.1872053146362305,
"learning_rate": 5.813793103448276e-06,
"loss": 0.2992,
"step": 6575
},
{
"epoch": 1.1414735385679695,
"grad_norm": 5.309129238128662,
"learning_rate": 5.796551724137931e-06,
"loss": 0.2776,
"step": 6600
},
{
"epoch": 1.145797301971636,
"grad_norm": 4.281647682189941,
"learning_rate": 5.779310344827587e-06,
"loss": 0.2961,
"step": 6625
},
{
"epoch": 1.1501210653753027,
"grad_norm": 5.709052085876465,
"learning_rate": 5.762068965517243e-06,
"loss": 0.2934,
"step": 6650
},
{
"epoch": 1.1544448287789693,
"grad_norm": 5.323013782501221,
"learning_rate": 5.744827586206897e-06,
"loss": 0.2814,
"step": 6675
},
{
"epoch": 1.1587685921826358,
"grad_norm": 4.633469104766846,
"learning_rate": 5.727586206896552e-06,
"loss": 0.2805,
"step": 6700
},
{
"epoch": 1.1630923555863024,
"grad_norm": 5.769145488739014,
"learning_rate": 5.710344827586207e-06,
"loss": 0.2909,
"step": 6725
},
{
"epoch": 1.1674161189899688,
"grad_norm": 4.406848907470703,
"learning_rate": 5.693103448275863e-06,
"loss": 0.2717,
"step": 6750
},
{
"epoch": 1.1717398823936354,
"grad_norm": 5.0518269538879395,
"learning_rate": 5.675862068965518e-06,
"loss": 0.2822,
"step": 6775
},
{
"epoch": 1.176063645797302,
"grad_norm": 5.016385078430176,
"learning_rate": 5.6586206896551735e-06,
"loss": 0.2574,
"step": 6800
},
{
"epoch": 1.1803874092009685,
"grad_norm": 6.169381618499756,
"learning_rate": 5.6413793103448275e-06,
"loss": 0.2934,
"step": 6825
},
{
"epoch": 1.184711172604635,
"grad_norm": 4.3018879890441895,
"learning_rate": 5.624137931034483e-06,
"loss": 0.2707,
"step": 6850
},
{
"epoch": 1.1890349360083017,
"grad_norm": 5.160860538482666,
"learning_rate": 5.606896551724138e-06,
"loss": 0.2674,
"step": 6875
},
{
"epoch": 1.1933586994119683,
"grad_norm": 4.624250888824463,
"learning_rate": 5.589655172413794e-06,
"loss": 0.2896,
"step": 6900
},
{
"epoch": 1.1976824628156346,
"grad_norm": 4.681013584136963,
"learning_rate": 5.572413793103449e-06,
"loss": 0.2827,
"step": 6925
},
{
"epoch": 1.2020062262193012,
"grad_norm": 5.956193447113037,
"learning_rate": 5.555172413793104e-06,
"loss": 0.2655,
"step": 6950
},
{
"epoch": 1.2063299896229678,
"grad_norm": 5.1831278800964355,
"learning_rate": 5.5379310344827585e-06,
"loss": 0.2847,
"step": 6975
},
{
"epoch": 1.2106537530266344,
"grad_norm": 5.0179595947265625,
"learning_rate": 5.520689655172414e-06,
"loss": 0.2787,
"step": 7000
},
{
"epoch": 1.2106537530266344,
"eval_loss": 0.28157490491867065,
"eval_runtime": 5064.7694,
"eval_samples_per_second": 4.566,
"eval_steps_per_second": 0.571,
"eval_wer": 35.47549709835601,
"step": 7000
},
{
"epoch": 1.214977516430301,
"grad_norm": 3.484829902648926,
"learning_rate": 5.503448275862069e-06,
"loss": 0.2671,
"step": 7025
},
{
"epoch": 1.2193012798339675,
"grad_norm": 5.8819169998168945,
"learning_rate": 5.486206896551725e-06,
"loss": 0.2879,
"step": 7050
},
{
"epoch": 1.223625043237634,
"grad_norm": 4.751934051513672,
"learning_rate": 5.4689655172413805e-06,
"loss": 0.2732,
"step": 7075
},
{
"epoch": 1.2279488066413007,
"grad_norm": 6.112524032592773,
"learning_rate": 5.4517241379310345e-06,
"loss": 0.281,
"step": 7100
},
{
"epoch": 1.2322725700449673,
"grad_norm": 5.031464099884033,
"learning_rate": 5.43448275862069e-06,
"loss": 0.2796,
"step": 7125
},
{
"epoch": 1.2365963334486336,
"grad_norm": 4.925386905670166,
"learning_rate": 5.417241379310345e-06,
"loss": 0.2772,
"step": 7150
},
{
"epoch": 1.2409200968523002,
"grad_norm": 4.831676483154297,
"learning_rate": 5.400000000000001e-06,
"loss": 0.2754,
"step": 7175
},
{
"epoch": 1.2452438602559668,
"grad_norm": 5.259826183319092,
"learning_rate": 5.382758620689656e-06,
"loss": 0.2807,
"step": 7200
},
{
"epoch": 1.2495676236596334,
"grad_norm": 5.998917102813721,
"learning_rate": 5.365517241379311e-06,
"loss": 0.264,
"step": 7225
},
{
"epoch": 1.2538913870633,
"grad_norm": 4.769485950469971,
"learning_rate": 5.3482758620689654e-06,
"loss": 0.2742,
"step": 7250
},
{
"epoch": 1.2582151504669665,
"grad_norm": 4.706541538238525,
"learning_rate": 5.331034482758621e-06,
"loss": 0.2644,
"step": 7275
},
{
"epoch": 1.262538913870633,
"grad_norm": 4.766952991485596,
"learning_rate": 5.313793103448276e-06,
"loss": 0.2863,
"step": 7300
},
{
"epoch": 1.2668626772742995,
"grad_norm": 4.333870887756348,
"learning_rate": 5.296551724137932e-06,
"loss": 0.275,
"step": 7325
},
{
"epoch": 1.271186440677966,
"grad_norm": 5.625321388244629,
"learning_rate": 5.279310344827587e-06,
"loss": 0.2775,
"step": 7350
},
{
"epoch": 1.2755102040816326,
"grad_norm": 4.249492645263672,
"learning_rate": 5.2620689655172415e-06,
"loss": 0.259,
"step": 7375
},
{
"epoch": 1.2798339674852992,
"grad_norm": 6.244980812072754,
"learning_rate": 5.244827586206896e-06,
"loss": 0.2656,
"step": 7400
},
{
"epoch": 1.2841577308889658,
"grad_norm": 4.6464056968688965,
"learning_rate": 5.227586206896552e-06,
"loss": 0.2644,
"step": 7425
},
{
"epoch": 1.2884814942926324,
"grad_norm": 4.944653511047363,
"learning_rate": 5.210344827586208e-06,
"loss": 0.282,
"step": 7450
},
{
"epoch": 1.292805257696299,
"grad_norm": 6.058406352996826,
"learning_rate": 5.193103448275863e-06,
"loss": 0.2717,
"step": 7475
},
{
"epoch": 1.2971290210999653,
"grad_norm": 5.668537139892578,
"learning_rate": 5.175862068965518e-06,
"loss": 0.2731,
"step": 7500
},
{
"epoch": 1.3014527845036319,
"grad_norm": 4.982054710388184,
"learning_rate": 5.158620689655172e-06,
"loss": 0.2579,
"step": 7525
},
{
"epoch": 1.3057765479072985,
"grad_norm": 4.244757175445557,
"learning_rate": 5.141379310344828e-06,
"loss": 0.2684,
"step": 7550
},
{
"epoch": 1.310100311310965,
"grad_norm": 4.842339515686035,
"learning_rate": 5.124137931034483e-06,
"loss": 0.2795,
"step": 7575
},
{
"epoch": 1.3144240747146316,
"grad_norm": 4.516208171844482,
"learning_rate": 5.106896551724139e-06,
"loss": 0.2724,
"step": 7600
},
{
"epoch": 1.3187478381182982,
"grad_norm": 4.063875675201416,
"learning_rate": 5.0896551724137936e-06,
"loss": 0.2816,
"step": 7625
},
{
"epoch": 1.3230716015219648,
"grad_norm": 4.514278888702393,
"learning_rate": 5.072413793103449e-06,
"loss": 0.2664,
"step": 7650
},
{
"epoch": 1.3273953649256311,
"grad_norm": 5.441287994384766,
"learning_rate": 5.055172413793103e-06,
"loss": 0.2656,
"step": 7675
},
{
"epoch": 1.331719128329298,
"grad_norm": 4.973991870880127,
"learning_rate": 5.037931034482759e-06,
"loss": 0.2751,
"step": 7700
},
{
"epoch": 1.3360428917329643,
"grad_norm": 5.129386901855469,
"learning_rate": 5.020689655172414e-06,
"loss": 0.2644,
"step": 7725
},
{
"epoch": 1.340366655136631,
"grad_norm": 5.174379348754883,
"learning_rate": 5.00344827586207e-06,
"loss": 0.2638,
"step": 7750
},
{
"epoch": 1.3446904185402975,
"grad_norm": 3.6351191997528076,
"learning_rate": 4.9862068965517245e-06,
"loss": 0.2712,
"step": 7775
},
{
"epoch": 1.349014181943964,
"grad_norm": 5.315875053405762,
"learning_rate": 4.968965517241379e-06,
"loss": 0.2663,
"step": 7800
},
{
"epoch": 1.3533379453476306,
"grad_norm": 5.3713202476501465,
"learning_rate": 4.951724137931035e-06,
"loss": 0.2867,
"step": 7825
},
{
"epoch": 1.357661708751297,
"grad_norm": 4.494086265563965,
"learning_rate": 4.93448275862069e-06,
"loss": 0.2633,
"step": 7850
},
{
"epoch": 1.3619854721549638,
"grad_norm": 4.476729869842529,
"learning_rate": 4.917241379310345e-06,
"loss": 0.2646,
"step": 7875
},
{
"epoch": 1.3663092355586302,
"grad_norm": 4.904832363128662,
"learning_rate": 4.9000000000000005e-06,
"loss": 0.2543,
"step": 7900
},
{
"epoch": 1.3706329989622967,
"grad_norm": 6.566470623016357,
"learning_rate": 4.882758620689655e-06,
"loss": 0.2778,
"step": 7925
},
{
"epoch": 1.3749567623659633,
"grad_norm": 7.662662982940674,
"learning_rate": 4.86551724137931e-06,
"loss": 0.2749,
"step": 7950
},
{
"epoch": 1.37928052576963,
"grad_norm": 4.465984344482422,
"learning_rate": 4.848275862068966e-06,
"loss": 0.2497,
"step": 7975
},
{
"epoch": 1.3836042891732965,
"grad_norm": 4.482520580291748,
"learning_rate": 4.831034482758621e-06,
"loss": 0.2856,
"step": 8000
},
{
"epoch": 1.3836042891732965,
"eval_loss": 0.27571046352386475,
"eval_runtime": 5061.2899,
"eval_samples_per_second": 4.569,
"eval_steps_per_second": 0.571,
"eval_wer": 35.15030679877327,
"step": 8000
},
{
"epoch": 1.387928052576963,
"grad_norm": 4.781071662902832,
"learning_rate": 4.813793103448276e-06,
"loss": 0.2509,
"step": 8025
},
{
"epoch": 1.3922518159806296,
"grad_norm": 5.495136260986328,
"learning_rate": 4.7965517241379314e-06,
"loss": 0.2804,
"step": 8050
},
{
"epoch": 1.396575579384296,
"grad_norm": 5.323382377624512,
"learning_rate": 4.779310344827587e-06,
"loss": 0.2662,
"step": 8075
},
{
"epoch": 1.4008993427879626,
"grad_norm": 4.909964084625244,
"learning_rate": 4.762068965517242e-06,
"loss": 0.2815,
"step": 8100
},
{
"epoch": 1.4052231061916292,
"grad_norm": 4.8298420906066895,
"learning_rate": 4.744827586206897e-06,
"loss": 0.2605,
"step": 8125
},
{
"epoch": 1.4095468695952957,
"grad_norm": 5.043326377868652,
"learning_rate": 4.727586206896553e-06,
"loss": 0.2805,
"step": 8150
},
{
"epoch": 1.4138706329989623,
"grad_norm": 4.897740364074707,
"learning_rate": 4.7103448275862075e-06,
"loss": 0.2724,
"step": 8175
},
{
"epoch": 1.418194396402629,
"grad_norm": 4.258267402648926,
"learning_rate": 4.693103448275862e-06,
"loss": 0.2559,
"step": 8200
},
{
"epoch": 1.4225181598062955,
"grad_norm": 4.238452911376953,
"learning_rate": 4.675862068965517e-06,
"loss": 0.2633,
"step": 8225
},
{
"epoch": 1.4268419232099618,
"grad_norm": 5.237506866455078,
"learning_rate": 4.658620689655173e-06,
"loss": 0.2695,
"step": 8250
},
{
"epoch": 1.4311656866136286,
"grad_norm": 7.0735182762146,
"learning_rate": 4.641379310344828e-06,
"loss": 0.2792,
"step": 8275
},
{
"epoch": 1.435489450017295,
"grad_norm": 5.525311470031738,
"learning_rate": 4.624137931034483e-06,
"loss": 0.2861,
"step": 8300
},
{
"epoch": 1.4398132134209616,
"grad_norm": 5.706710338592529,
"learning_rate": 4.606896551724138e-06,
"loss": 0.2616,
"step": 8325
},
{
"epoch": 1.4441369768246282,
"grad_norm": 4.5631794929504395,
"learning_rate": 4.589655172413793e-06,
"loss": 0.2528,
"step": 8350
},
{
"epoch": 1.4484607402282947,
"grad_norm": 4.35201358795166,
"learning_rate": 4.572413793103448e-06,
"loss": 0.2669,
"step": 8375
},
{
"epoch": 1.4527845036319613,
"grad_norm": 5.7140793800354,
"learning_rate": 4.555172413793104e-06,
"loss": 0.275,
"step": 8400
},
{
"epoch": 1.4571082670356277,
"grad_norm": 6.433730602264404,
"learning_rate": 4.537931034482759e-06,
"loss": 0.2718,
"step": 8425
},
{
"epoch": 1.4614320304392945,
"grad_norm": 4.385723114013672,
"learning_rate": 4.521379310344828e-06,
"loss": 0.2751,
"step": 8450
},
{
"epoch": 1.4657557938429608,
"grad_norm": 4.835318565368652,
"learning_rate": 4.504137931034483e-06,
"loss": 0.273,
"step": 8475
},
{
"epoch": 1.4700795572466274,
"grad_norm": 5.070703506469727,
"learning_rate": 4.486896551724138e-06,
"loss": 0.2659,
"step": 8500
},
{
"epoch": 1.474403320650294,
"grad_norm": 4.764357089996338,
"learning_rate": 4.4696551724137936e-06,
"loss": 0.2614,
"step": 8525
},
{
"epoch": 1.4787270840539606,
"grad_norm": 4.433307647705078,
"learning_rate": 4.452413793103449e-06,
"loss": 0.2773,
"step": 8550
},
{
"epoch": 1.4830508474576272,
"grad_norm": 5.220912456512451,
"learning_rate": 4.435172413793104e-06,
"loss": 0.2739,
"step": 8575
},
{
"epoch": 1.4873746108612937,
"grad_norm": 4.9389753341674805,
"learning_rate": 4.417931034482759e-06,
"loss": 0.2801,
"step": 8600
},
{
"epoch": 1.4916983742649603,
"grad_norm": 5.52213191986084,
"learning_rate": 4.400689655172414e-06,
"loss": 0.2713,
"step": 8625
},
{
"epoch": 1.4960221376686267,
"grad_norm": 4.186790943145752,
"learning_rate": 4.38344827586207e-06,
"loss": 0.2702,
"step": 8650
},
{
"epoch": 1.5003459010722935,
"grad_norm": 5.3168535232543945,
"learning_rate": 4.3662068965517245e-06,
"loss": 0.2744,
"step": 8675
},
{
"epoch": 1.5046696644759598,
"grad_norm": 5.070104122161865,
"learning_rate": 4.348965517241379e-06,
"loss": 0.282,
"step": 8700
},
{
"epoch": 1.5089934278796264,
"grad_norm": 4.920200347900391,
"learning_rate": 4.331724137931035e-06,
"loss": 0.262,
"step": 8725
},
{
"epoch": 1.513317191283293,
"grad_norm": 4.704413414001465,
"learning_rate": 4.31448275862069e-06,
"loss": 0.2539,
"step": 8750
},
{
"epoch": 1.5176409546869594,
"grad_norm": 5.695456027984619,
"learning_rate": 4.297241379310345e-06,
"loss": 0.2843,
"step": 8775
},
{
"epoch": 1.5219647180906262,
"grad_norm": 5.497166633605957,
"learning_rate": 4.2800000000000005e-06,
"loss": 0.2813,
"step": 8800
},
{
"epoch": 1.5262884814942925,
"grad_norm": 6.1521100997924805,
"learning_rate": 4.262758620689655e-06,
"loss": 0.2835,
"step": 8825
},
{
"epoch": 1.5306122448979593,
"grad_norm": 4.628076076507568,
"learning_rate": 4.24551724137931e-06,
"loss": 0.2685,
"step": 8850
},
{
"epoch": 1.5349360083016257,
"grad_norm": 6.371509552001953,
"learning_rate": 4.228275862068966e-06,
"loss": 0.2596,
"step": 8875
},
{
"epoch": 1.5392597717052923,
"grad_norm": 6.203851699829102,
"learning_rate": 4.211034482758621e-06,
"loss": 0.2801,
"step": 8900
},
{
"epoch": 1.5435835351089588,
"grad_norm": 5.120136260986328,
"learning_rate": 4.193793103448276e-06,
"loss": 0.2749,
"step": 8925
},
{
"epoch": 1.5479072985126254,
"grad_norm": 4.296701908111572,
"learning_rate": 4.1765517241379314e-06,
"loss": 0.268,
"step": 8950
},
{
"epoch": 1.552231061916292,
"grad_norm": 4.204896926879883,
"learning_rate": 4.159310344827587e-06,
"loss": 0.2731,
"step": 8975
},
{
"epoch": 1.5565548253199584,
"grad_norm": 4.787944793701172,
"learning_rate": 4.142068965517242e-06,
"loss": 0.2672,
"step": 9000
},
{
"epoch": 1.5565548253199584,
"eval_loss": 0.27039873600006104,
"eval_runtime": 5069.0399,
"eval_samples_per_second": 4.562,
"eval_steps_per_second": 0.57,
"eval_wer": 34.11897560369892,
"step": 9000
},
{
"epoch": 1.5608785887236252,
"grad_norm": 5.445296764373779,
"learning_rate": 4.124827586206897e-06,
"loss": 0.256,
"step": 9025
},
{
"epoch": 1.5652023521272915,
"grad_norm": 6.453240871429443,
"learning_rate": 4.107586206896552e-06,
"loss": 0.2559,
"step": 9050
},
{
"epoch": 1.569526115530958,
"grad_norm": 6.632162570953369,
"learning_rate": 4.0903448275862075e-06,
"loss": 0.287,
"step": 9075
},
{
"epoch": 1.5738498789346247,
"grad_norm": 5.117345809936523,
"learning_rate": 4.073103448275862e-06,
"loss": 0.2669,
"step": 9100
},
{
"epoch": 1.5781736423382913,
"grad_norm": 4.2106804847717285,
"learning_rate": 4.055862068965517e-06,
"loss": 0.2566,
"step": 9125
},
{
"epoch": 1.5824974057419579,
"grad_norm": 4.997186660766602,
"learning_rate": 4.038620689655173e-06,
"loss": 0.2718,
"step": 9150
},
{
"epoch": 1.5868211691456242,
"grad_norm": 6.009954452514648,
"learning_rate": 4.021379310344828e-06,
"loss": 0.2759,
"step": 9175
},
{
"epoch": 1.591144932549291,
"grad_norm": 4.32164192199707,
"learning_rate": 4.004137931034483e-06,
"loss": 0.2437,
"step": 9200
},
{
"epoch": 1.5954686959529574,
"grad_norm": 4.823526382446289,
"learning_rate": 3.986896551724138e-06,
"loss": 0.2632,
"step": 9225
},
{
"epoch": 1.599792459356624,
"grad_norm": 4.981450080871582,
"learning_rate": 3.969655172413793e-06,
"loss": 0.263,
"step": 9250
},
{
"epoch": 1.6041162227602905,
"grad_norm": 4.784339427947998,
"learning_rate": 3.952413793103448e-06,
"loss": 0.2763,
"step": 9275
},
{
"epoch": 1.6084399861639571,
"grad_norm": 4.33436393737793,
"learning_rate": 3.935172413793104e-06,
"loss": 0.2595,
"step": 9300
},
{
"epoch": 1.6127637495676237,
"grad_norm": 5.4285688400268555,
"learning_rate": 3.917931034482759e-06,
"loss": 0.2783,
"step": 9325
},
{
"epoch": 1.61708751297129,
"grad_norm": 5.2749786376953125,
"learning_rate": 3.9006896551724144e-06,
"loss": 0.2497,
"step": 9350
},
{
"epoch": 1.6214112763749569,
"grad_norm": 4.453002452850342,
"learning_rate": 3.883448275862069e-06,
"loss": 0.2749,
"step": 9375
},
{
"epoch": 1.6257350397786232,
"grad_norm": 4.48176383972168,
"learning_rate": 3.866206896551725e-06,
"loss": 0.2867,
"step": 9400
},
{
"epoch": 1.63005880318229,
"grad_norm": 4.524690628051758,
"learning_rate": 3.84896551724138e-06,
"loss": 0.2569,
"step": 9425
},
{
"epoch": 1.6343825665859564,
"grad_norm": 5.824161529541016,
"learning_rate": 3.831724137931035e-06,
"loss": 0.2774,
"step": 9450
},
{
"epoch": 1.638706329989623,
"grad_norm": 4.404734134674072,
"learning_rate": 3.81448275862069e-06,
"loss": 0.2761,
"step": 9475
},
{
"epoch": 1.6430300933932895,
"grad_norm": 4.873973369598389,
"learning_rate": 3.7972413793103454e-06,
"loss": 0.2595,
"step": 9500
},
{
"epoch": 1.6473538567969561,
"grad_norm": 5.397193908691406,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.2654,
"step": 9525
},
{
"epoch": 1.6516776202006227,
"grad_norm": 5.546463966369629,
"learning_rate": 3.7627586206896555e-06,
"loss": 0.2579,
"step": 9550
},
{
"epoch": 1.656001383604289,
"grad_norm": 4.908322334289551,
"learning_rate": 3.745517241379311e-06,
"loss": 0.2644,
"step": 9575
},
{
"epoch": 1.6603251470079559,
"grad_norm": 4.3278985023498535,
"learning_rate": 3.7282758620689657e-06,
"loss": 0.2522,
"step": 9600
},
{
"epoch": 1.6646489104116222,
"grad_norm": 3.946744918823242,
"learning_rate": 3.711034482758621e-06,
"loss": 0.2612,
"step": 9625
},
{
"epoch": 1.6689726738152888,
"grad_norm": 4.440819263458252,
"learning_rate": 3.693793103448276e-06,
"loss": 0.2743,
"step": 9650
},
{
"epoch": 1.6732964372189554,
"grad_norm": 4.908570766448975,
"learning_rate": 3.676551724137931e-06,
"loss": 0.2556,
"step": 9675
},
{
"epoch": 1.677620200622622,
"grad_norm": 6.872124195098877,
"learning_rate": 3.6593103448275864e-06,
"loss": 0.2755,
"step": 9700
},
{
"epoch": 1.6819439640262885,
"grad_norm": 5.950398921966553,
"learning_rate": 3.6420689655172413e-06,
"loss": 0.258,
"step": 9725
},
{
"epoch": 1.686267727429955,
"grad_norm": 5.322935581207275,
"learning_rate": 3.624827586206897e-06,
"loss": 0.2603,
"step": 9750
},
{
"epoch": 1.6905914908336217,
"grad_norm": 4.969674587249756,
"learning_rate": 3.6075862068965523e-06,
"loss": 0.2719,
"step": 9775
},
{
"epoch": 1.694915254237288,
"grad_norm": 4.605515003204346,
"learning_rate": 3.590344827586207e-06,
"loss": 0.2731,
"step": 9800
},
{
"epoch": 1.6992390176409546,
"grad_norm": 4.371634006500244,
"learning_rate": 3.5731034482758625e-06,
"loss": 0.2581,
"step": 9825
},
{
"epoch": 1.7035627810446212,
"grad_norm": 5.099359512329102,
"learning_rate": 3.5558620689655178e-06,
"loss": 0.2702,
"step": 9850
},
{
"epoch": 1.7078865444482878,
"grad_norm": 5.02194881439209,
"learning_rate": 3.5386206896551726e-06,
"loss": 0.2575,
"step": 9875
},
{
"epoch": 1.7122103078519544,
"grad_norm": 5.109201431274414,
"learning_rate": 3.521379310344828e-06,
"loss": 0.2452,
"step": 9900
},
{
"epoch": 1.7165340712556207,
"grad_norm": 4.52341890335083,
"learning_rate": 3.5041379310344832e-06,
"loss": 0.2708,
"step": 9925
},
{
"epoch": 1.7208578346592875,
"grad_norm": 4.951146602630615,
"learning_rate": 3.486896551724138e-06,
"loss": 0.2612,
"step": 9950
},
{
"epoch": 1.725181598062954,
"grad_norm": 5.593024730682373,
"learning_rate": 3.4696551724137934e-06,
"loss": 0.2716,
"step": 9975
},
{
"epoch": 1.7295053614666207,
"grad_norm": 5.451072692871094,
"learning_rate": 3.4524137931034487e-06,
"loss": 0.2623,
"step": 10000
},
{
"epoch": 1.7295053614666207,
"eval_loss": 0.26585614681243896,
"eval_runtime": 5142.3561,
"eval_samples_per_second": 4.497,
"eval_steps_per_second": 0.562,
"eval_wer": 33.59979570177145,
"step": 10000
},
{
"epoch": 1.733829124870287,
"grad_norm": 4.803134441375732,
"learning_rate": 3.4351724137931036e-06,
"loss": 0.2573,
"step": 10025
},
{
"epoch": 1.7381528882739536,
"grad_norm": 4.306927680969238,
"learning_rate": 3.417931034482759e-06,
"loss": 0.2617,
"step": 10050
},
{
"epoch": 1.7424766516776202,
"grad_norm": 5.380494594573975,
"learning_rate": 3.4006896551724137e-06,
"loss": 0.2653,
"step": 10075
},
{
"epoch": 1.7468004150812868,
"grad_norm": 4.847281455993652,
"learning_rate": 3.383448275862069e-06,
"loss": 0.267,
"step": 10100
},
{
"epoch": 1.7511241784849534,
"grad_norm": 4.344338893890381,
"learning_rate": 3.3662068965517243e-06,
"loss": 0.2612,
"step": 10125
},
{
"epoch": 1.7554479418886197,
"grad_norm": 5.100605487823486,
"learning_rate": 3.34896551724138e-06,
"loss": 0.2624,
"step": 10150
},
{
"epoch": 1.7597717052922865,
"grad_norm": 6.067707061767578,
"learning_rate": 3.331724137931035e-06,
"loss": 0.2797,
"step": 10175
},
{
"epoch": 1.764095468695953,
"grad_norm": 4.3519392013549805,
"learning_rate": 3.31448275862069e-06,
"loss": 0.2607,
"step": 10200
},
{
"epoch": 1.7684192320996195,
"grad_norm": 4.383536338806152,
"learning_rate": 3.297241379310345e-06,
"loss": 0.2516,
"step": 10225
},
{
"epoch": 1.772742995503286,
"grad_norm": 5.012591361999512,
"learning_rate": 3.2800000000000004e-06,
"loss": 0.2724,
"step": 10250
},
{
"epoch": 1.7770667589069526,
"grad_norm": 4.1687774658203125,
"learning_rate": 3.2627586206896557e-06,
"loss": 0.2698,
"step": 10275
},
{
"epoch": 1.7813905223106192,
"grad_norm": 5.5503034591674805,
"learning_rate": 3.2455172413793105e-06,
"loss": 0.2652,
"step": 10300
},
{
"epoch": 1.7857142857142856,
"grad_norm": 5.016941547393799,
"learning_rate": 3.228275862068966e-06,
"loss": 0.248,
"step": 10325
},
{
"epoch": 1.7900380491179524,
"grad_norm": 4.316959381103516,
"learning_rate": 3.211034482758621e-06,
"loss": 0.2598,
"step": 10350
},
{
"epoch": 1.7943618125216187,
"grad_norm": 6.067855358123779,
"learning_rate": 3.193793103448276e-06,
"loss": 0.2677,
"step": 10375
},
{
"epoch": 1.7986855759252853,
"grad_norm": 5.613093852996826,
"learning_rate": 3.1765517241379313e-06,
"loss": 0.2679,
"step": 10400
},
{
"epoch": 1.803009339328952,
"grad_norm": 4.065937519073486,
"learning_rate": 3.1593103448275866e-06,
"loss": 0.2634,
"step": 10425
},
{
"epoch": 1.8073331027326185,
"grad_norm": 5.0161638259887695,
"learning_rate": 3.1427586206896555e-06,
"loss": 0.2706,
"step": 10450
},
{
"epoch": 1.811656866136285,
"grad_norm": 6.777268886566162,
"learning_rate": 3.1255172413793104e-06,
"loss": 0.2756,
"step": 10475
},
{
"epoch": 1.8159806295399514,
"grad_norm": 6.550757884979248,
"learning_rate": 3.1082758620689657e-06,
"loss": 0.2704,
"step": 10500
},
{
"epoch": 1.8203043929436182,
"grad_norm": 5.1796393394470215,
"learning_rate": 3.091034482758621e-06,
"loss": 0.2707,
"step": 10525
},
{
"epoch": 1.8246281563472846,
"grad_norm": 5.0789570808410645,
"learning_rate": 3.073793103448276e-06,
"loss": 0.2592,
"step": 10550
},
{
"epoch": 1.8289519197509514,
"grad_norm": 4.78272819519043,
"learning_rate": 3.056551724137931e-06,
"loss": 0.2616,
"step": 10575
},
{
"epoch": 1.8332756831546178,
"grad_norm": 4.711808681488037,
"learning_rate": 3.0393103448275864e-06,
"loss": 0.2729,
"step": 10600
},
{
"epoch": 1.8375994465582843,
"grad_norm": 4.821509838104248,
"learning_rate": 3.0220689655172413e-06,
"loss": 0.2522,
"step": 10625
},
{
"epoch": 1.841923209961951,
"grad_norm": 4.584725379943848,
"learning_rate": 3.004827586206897e-06,
"loss": 0.2639,
"step": 10650
},
{
"epoch": 1.8462469733656173,
"grad_norm": 6.850635051727295,
"learning_rate": 2.9875862068965523e-06,
"loss": 0.2658,
"step": 10675
},
{
"epoch": 1.850570736769284,
"grad_norm": 6.138273239135742,
"learning_rate": 2.970344827586207e-06,
"loss": 0.2735,
"step": 10700
},
{
"epoch": 1.8548945001729504,
"grad_norm": 5.5177507400512695,
"learning_rate": 2.9531034482758625e-06,
"loss": 0.2497,
"step": 10725
},
{
"epoch": 1.8592182635766172,
"grad_norm": 5.395568370819092,
"learning_rate": 2.9358620689655178e-06,
"loss": 0.2556,
"step": 10750
},
{
"epoch": 1.8635420269802836,
"grad_norm": 3.9228525161743164,
"learning_rate": 2.9186206896551727e-06,
"loss": 0.2697,
"step": 10775
},
{
"epoch": 1.8678657903839502,
"grad_norm": 3.7661633491516113,
"learning_rate": 2.901379310344828e-06,
"loss": 0.24,
"step": 10800
},
{
"epoch": 1.8721895537876168,
"grad_norm": 3.9159317016601562,
"learning_rate": 2.884137931034483e-06,
"loss": 0.2663,
"step": 10825
},
{
"epoch": 1.8765133171912833,
"grad_norm": 5.218613147735596,
"learning_rate": 2.866896551724138e-06,
"loss": 0.2539,
"step": 10850
},
{
"epoch": 1.88083708059495,
"grad_norm": 4.241786479949951,
"learning_rate": 2.8496551724137934e-06,
"loss": 0.2672,
"step": 10875
},
{
"epoch": 1.8851608439986163,
"grad_norm": 5.012778282165527,
"learning_rate": 2.8324137931034483e-06,
"loss": 0.2624,
"step": 10900
},
{
"epoch": 1.889484607402283,
"grad_norm": 5.514505863189697,
"learning_rate": 2.8151724137931036e-06,
"loss": 0.2635,
"step": 10925
},
{
"epoch": 1.8938083708059494,
"grad_norm": 4.5488667488098145,
"learning_rate": 2.797931034482759e-06,
"loss": 0.2456,
"step": 10950
},
{
"epoch": 1.898132134209616,
"grad_norm": 4.1149516105651855,
"learning_rate": 2.7806896551724137e-06,
"loss": 0.2644,
"step": 10975
},
{
"epoch": 1.9024558976132826,
"grad_norm": 5.599881649017334,
"learning_rate": 2.763448275862069e-06,
"loss": 0.2772,
"step": 11000
},
{
"epoch": 1.9024558976132826,
"eval_loss": 0.263325035572052,
"eval_runtime": 5127.6582,
"eval_samples_per_second": 4.51,
"eval_steps_per_second": 0.564,
"eval_wer": 33.32895997675873,
"step": 11000
},
{
"epoch": 1.9067796610169492,
"grad_norm": 5.128805637359619,
"learning_rate": 2.7462068965517243e-06,
"loss": 0.2599,
"step": 11025
},
{
"epoch": 1.9111034244206158,
"grad_norm": 6.218988418579102,
"learning_rate": 2.728965517241379e-06,
"loss": 0.271,
"step": 11050
},
{
"epoch": 1.9154271878242821,
"grad_norm": 4.934520721435547,
"learning_rate": 2.711724137931035e-06,
"loss": 0.2633,
"step": 11075
},
{
"epoch": 1.919750951227949,
"grad_norm": 5.033214092254639,
"learning_rate": 2.69448275862069e-06,
"loss": 0.2563,
"step": 11100
},
{
"epoch": 1.9240747146316153,
"grad_norm": 5.1451416015625,
"learning_rate": 2.677241379310345e-06,
"loss": 0.2525,
"step": 11125
},
{
"epoch": 1.9283984780352819,
"grad_norm": 5.570995330810547,
"learning_rate": 2.6600000000000004e-06,
"loss": 0.2688,
"step": 11150
},
{
"epoch": 1.9327222414389484,
"grad_norm": 3.9599545001983643,
"learning_rate": 2.6427586206896557e-06,
"loss": 0.2533,
"step": 11175
},
{
"epoch": 1.937046004842615,
"grad_norm": 4.358778476715088,
"learning_rate": 2.6255172413793105e-06,
"loss": 0.2596,
"step": 11200
},
{
"epoch": 1.9413697682462816,
"grad_norm": 4.855038166046143,
"learning_rate": 2.608275862068966e-06,
"loss": 0.2651,
"step": 11225
},
{
"epoch": 1.945693531649948,
"grad_norm": 6.604015350341797,
"learning_rate": 2.5910344827586207e-06,
"loss": 0.2601,
"step": 11250
},
{
"epoch": 1.9500172950536148,
"grad_norm": 5.335809230804443,
"learning_rate": 2.573793103448276e-06,
"loss": 0.2574,
"step": 11275
},
{
"epoch": 1.9543410584572811,
"grad_norm": 4.764793395996094,
"learning_rate": 2.5565517241379313e-06,
"loss": 0.2634,
"step": 11300
},
{
"epoch": 1.958664821860948,
"grad_norm": 5.247544765472412,
"learning_rate": 2.539310344827586e-06,
"loss": 0.2664,
"step": 11325
},
{
"epoch": 1.9629885852646143,
"grad_norm": 4.707311630249023,
"learning_rate": 2.5220689655172414e-06,
"loss": 0.2609,
"step": 11350
},
{
"epoch": 1.9673123486682809,
"grad_norm": 4.89914083480835,
"learning_rate": 2.5048275862068967e-06,
"loss": 0.2578,
"step": 11375
},
{
"epoch": 1.9716361120719474,
"grad_norm": 4.219529151916504,
"learning_rate": 2.487586206896552e-06,
"loss": 0.2609,
"step": 11400
},
{
"epoch": 1.975959875475614,
"grad_norm": 5.0991435050964355,
"learning_rate": 2.4703448275862073e-06,
"loss": 0.2511,
"step": 11425
},
{
"epoch": 1.9802836388792806,
"grad_norm": 4.904223918914795,
"learning_rate": 2.453103448275862e-06,
"loss": 0.257,
"step": 11450
},
{
"epoch": 1.984607402282947,
"grad_norm": 4.135773658752441,
"learning_rate": 2.4358620689655175e-06,
"loss": 0.2673,
"step": 11475
},
{
"epoch": 1.9889311656866138,
"grad_norm": 4.5873332023620605,
"learning_rate": 2.4186206896551724e-06,
"loss": 0.2598,
"step": 11500
},
{
"epoch": 1.9932549290902801,
"grad_norm": 5.754453659057617,
"learning_rate": 2.4013793103448277e-06,
"loss": 0.2602,
"step": 11525
},
{
"epoch": 1.9975786924939467,
"grad_norm": 5.299346923828125,
"learning_rate": 2.384137931034483e-06,
"loss": 0.2629,
"step": 11550
},
{
"epoch": 2.0019024558976133,
"grad_norm": 4.596057415008545,
"learning_rate": 2.3668965517241382e-06,
"loss": 0.2577,
"step": 11575
},
{
"epoch": 2.0062262193012796,
"grad_norm": 4.527500629425049,
"learning_rate": 2.3496551724137935e-06,
"loss": 0.2519,
"step": 11600
},
{
"epoch": 2.0105499827049464,
"grad_norm": 4.149202823638916,
"learning_rate": 2.3324137931034484e-06,
"loss": 0.2485,
"step": 11625
},
{
"epoch": 2.014873746108613,
"grad_norm": 4.757724761962891,
"learning_rate": 2.3151724137931037e-06,
"loss": 0.2445,
"step": 11650
},
{
"epoch": 2.0191975095122796,
"grad_norm": 4.565730571746826,
"learning_rate": 2.297931034482759e-06,
"loss": 0.2376,
"step": 11675
},
{
"epoch": 2.023521272915946,
"grad_norm": 4.340237140655518,
"learning_rate": 2.280689655172414e-06,
"loss": 0.2617,
"step": 11700
},
{
"epoch": 2.0278450363196128,
"grad_norm": 5.368824005126953,
"learning_rate": 2.263448275862069e-06,
"loss": 0.2384,
"step": 11725
},
{
"epoch": 2.032168799723279,
"grad_norm": 4.97426176071167,
"learning_rate": 2.246206896551724e-06,
"loss": 0.2501,
"step": 11750
},
{
"epoch": 2.0364925631269455,
"grad_norm": 6.131598949432373,
"learning_rate": 2.2289655172413797e-06,
"loss": 0.2518,
"step": 11775
},
{
"epoch": 2.0408163265306123,
"grad_norm": 4.28442907333374,
"learning_rate": 2.2117241379310346e-06,
"loss": 0.2653,
"step": 11800
},
{
"epoch": 2.0451400899342786,
"grad_norm": 4.612010478973389,
"learning_rate": 2.19448275862069e-06,
"loss": 0.2447,
"step": 11825
},
{
"epoch": 2.0494638533379455,
"grad_norm": 4.856114387512207,
"learning_rate": 2.177241379310345e-06,
"loss": 0.2456,
"step": 11850
},
{
"epoch": 2.053787616741612,
"grad_norm": 5.475619792938232,
"learning_rate": 2.16e-06,
"loss": 0.2516,
"step": 11875
},
{
"epoch": 2.0581113801452786,
"grad_norm": 4.844070911407471,
"learning_rate": 2.1427586206896554e-06,
"loss": 0.2354,
"step": 11900
},
{
"epoch": 2.062435143548945,
"grad_norm": 4.885989189147949,
"learning_rate": 2.1255172413793102e-06,
"loss": 0.2427,
"step": 11925
},
{
"epoch": 2.0667589069526118,
"grad_norm": 5.934988975524902,
"learning_rate": 2.1082758620689655e-06,
"loss": 0.2599,
"step": 11950
},
{
"epoch": 2.071082670356278,
"grad_norm": 5.128411769866943,
"learning_rate": 2.091034482758621e-06,
"loss": 0.2457,
"step": 11975
},
{
"epoch": 2.0754064337599445,
"grad_norm": 4.521295547485352,
"learning_rate": 2.073793103448276e-06,
"loss": 0.2481,
"step": 12000
},
{
"epoch": 2.0754064337599445,
"eval_loss": 0.2604476511478424,
"eval_runtime": 5099.3278,
"eval_samples_per_second": 4.535,
"eval_steps_per_second": 0.567,
"eval_wer": 32.88803191925535,
"step": 12000
},
{
"epoch": 2.0797301971636113,
"grad_norm": 4.452366828918457,
"learning_rate": 2.0565517241379314e-06,
"loss": 0.2485,
"step": 12025
},
{
"epoch": 2.0840539605672777,
"grad_norm": 4.331884384155273,
"learning_rate": 2.0393103448275863e-06,
"loss": 0.2452,
"step": 12050
},
{
"epoch": 2.0883777239709445,
"grad_norm": 4.478641510009766,
"learning_rate": 2.0220689655172416e-06,
"loss": 0.2485,
"step": 12075
},
{
"epoch": 2.092701487374611,
"grad_norm": 4.945328235626221,
"learning_rate": 2.004827586206897e-06,
"loss": 0.2334,
"step": 12100
},
{
"epoch": 2.0970252507782776,
"grad_norm": 5.300661087036133,
"learning_rate": 1.9875862068965517e-06,
"loss": 0.2457,
"step": 12125
},
{
"epoch": 2.101349014181944,
"grad_norm": 5.748834133148193,
"learning_rate": 1.970344827586207e-06,
"loss": 0.2562,
"step": 12150
},
{
"epoch": 2.1056727775856103,
"grad_norm": 4.078002452850342,
"learning_rate": 1.9531034482758623e-06,
"loss": 0.2496,
"step": 12175
},
{
"epoch": 2.109996540989277,
"grad_norm": 4.0208258628845215,
"learning_rate": 1.9358620689655176e-06,
"loss": 0.2528,
"step": 12200
},
{
"epoch": 2.1143203043929435,
"grad_norm": 5.57705545425415,
"learning_rate": 1.9186206896551725e-06,
"loss": 0.239,
"step": 12225
},
{
"epoch": 2.1186440677966103,
"grad_norm": 4.870556831359863,
"learning_rate": 1.9013793103448278e-06,
"loss": 0.2438,
"step": 12250
},
{
"epoch": 2.1229678312002767,
"grad_norm": 5.422897815704346,
"learning_rate": 1.8841379310344829e-06,
"loss": 0.2332,
"step": 12275
},
{
"epoch": 2.1272915946039435,
"grad_norm": 5.2437052726745605,
"learning_rate": 1.866896551724138e-06,
"loss": 0.2505,
"step": 12300
},
{
"epoch": 2.13161535800761,
"grad_norm": 5.451413631439209,
"learning_rate": 1.8496551724137932e-06,
"loss": 0.249,
"step": 12325
},
{
"epoch": 2.135939121411276,
"grad_norm": 4.326362609863281,
"learning_rate": 1.8324137931034483e-06,
"loss": 0.2428,
"step": 12350
},
{
"epoch": 2.140262884814943,
"grad_norm": 5.135347366333008,
"learning_rate": 1.8151724137931036e-06,
"loss": 0.2514,
"step": 12375
},
{
"epoch": 2.1445866482186093,
"grad_norm": 4.476009368896484,
"learning_rate": 1.797931034482759e-06,
"loss": 0.2474,
"step": 12400
},
{
"epoch": 2.148910411622276,
"grad_norm": 4.706514358520508,
"learning_rate": 1.780689655172414e-06,
"loss": 0.2376,
"step": 12425
},
{
"epoch": 2.1532341750259425,
"grad_norm": 5.396040916442871,
"learning_rate": 1.7641379310344827e-06,
"loss": 0.2595,
"step": 12450
},
{
"epoch": 2.1575579384296093,
"grad_norm": 4.83308744430542,
"learning_rate": 1.7468965517241382e-06,
"loss": 0.2447,
"step": 12475
},
{
"epoch": 2.1618817018332757,
"grad_norm": 4.416563034057617,
"learning_rate": 1.7296551724137933e-06,
"loss": 0.2408,
"step": 12500
},
{
"epoch": 2.166205465236942,
"grad_norm": 4.504754543304443,
"learning_rate": 1.7124137931034484e-06,
"loss": 0.2476,
"step": 12525
},
{
"epoch": 2.170529228640609,
"grad_norm": 5.582792282104492,
"learning_rate": 1.6951724137931035e-06,
"loss": 0.2334,
"step": 12550
},
{
"epoch": 2.174852992044275,
"grad_norm": 5.201076030731201,
"learning_rate": 1.6779310344827588e-06,
"loss": 0.2459,
"step": 12575
},
{
"epoch": 2.179176755447942,
"grad_norm": 4.038005828857422,
"learning_rate": 1.6606896551724139e-06,
"loss": 0.2518,
"step": 12600
},
{
"epoch": 2.1835005188516083,
"grad_norm": 4.951401233673096,
"learning_rate": 1.643448275862069e-06,
"loss": 0.2433,
"step": 12625
},
{
"epoch": 2.187824282255275,
"grad_norm": 4.419375896453857,
"learning_rate": 1.6262068965517242e-06,
"loss": 0.2427,
"step": 12650
},
{
"epoch": 2.1921480456589415,
"grad_norm": 4.835983753204346,
"learning_rate": 1.6089655172413795e-06,
"loss": 0.2348,
"step": 12675
},
{
"epoch": 2.196471809062608,
"grad_norm": 4.035714149475098,
"learning_rate": 1.5917241379310346e-06,
"loss": 0.2481,
"step": 12700
},
{
"epoch": 2.2007955724662747,
"grad_norm": 6.195335388183594,
"learning_rate": 1.57448275862069e-06,
"loss": 0.2559,
"step": 12725
},
{
"epoch": 2.205119335869941,
"grad_norm": 4.89235258102417,
"learning_rate": 1.557241379310345e-06,
"loss": 0.2288,
"step": 12750
},
{
"epoch": 2.209443099273608,
"grad_norm": 4.976417541503906,
"learning_rate": 1.54e-06,
"loss": 0.2412,
"step": 12775
},
{
"epoch": 2.213766862677274,
"grad_norm": 4.204568862915039,
"learning_rate": 1.5227586206896552e-06,
"loss": 0.2607,
"step": 12800
},
{
"epoch": 2.218090626080941,
"grad_norm": 4.752575397491455,
"learning_rate": 1.5055172413793105e-06,
"loss": 0.2328,
"step": 12825
},
{
"epoch": 2.2224143894846073,
"grad_norm": 5.109005451202393,
"learning_rate": 1.4882758620689655e-06,
"loss": 0.2472,
"step": 12850
},
{
"epoch": 2.226738152888274,
"grad_norm": 4.870945453643799,
"learning_rate": 1.4710344827586208e-06,
"loss": 0.2411,
"step": 12875
},
{
"epoch": 2.2310619162919405,
"grad_norm": 4.599150657653809,
"learning_rate": 1.4537931034482761e-06,
"loss": 0.2475,
"step": 12900
},
{
"epoch": 2.235385679695607,
"grad_norm": 5.0102057456970215,
"learning_rate": 1.4365517241379312e-06,
"loss": 0.2466,
"step": 12925
},
{
"epoch": 2.2397094430992737,
"grad_norm": 4.145412921905518,
"learning_rate": 1.4193103448275863e-06,
"loss": 0.2484,
"step": 12950
},
{
"epoch": 2.24403320650294,
"grad_norm": 3.842782735824585,
"learning_rate": 1.4020689655172416e-06,
"loss": 0.2357,
"step": 12975
},
{
"epoch": 2.248356969906607,
"grad_norm": 4.585821151733398,
"learning_rate": 1.3848275862068967e-06,
"loss": 0.2436,
"step": 13000
},
{
"epoch": 2.248356969906607,
"eval_loss": 0.25910165905952454,
"eval_runtime": 5129.3259,
"eval_samples_per_second": 4.509,
"eval_steps_per_second": 0.564,
"eval_wer": 32.73105965648846,
"step": 13000
},
{
"epoch": 2.252680733310273,
"grad_norm": 4.6802568435668945,
"learning_rate": 1.3675862068965517e-06,
"loss": 0.2348,
"step": 13025
},
{
"epoch": 2.25700449671394,
"grad_norm": 5.42781925201416,
"learning_rate": 1.3503448275862068e-06,
"loss": 0.2349,
"step": 13050
},
{
"epoch": 2.2613282601176063,
"grad_norm": 5.2979536056518555,
"learning_rate": 1.3331034482758623e-06,
"loss": 0.2421,
"step": 13075
},
{
"epoch": 2.265652023521273,
"grad_norm": 5.278492450714111,
"learning_rate": 1.3158620689655174e-06,
"loss": 0.2438,
"step": 13100
},
{
"epoch": 2.2699757869249395,
"grad_norm": 4.465871810913086,
"learning_rate": 1.2986206896551725e-06,
"loss": 0.2624,
"step": 13125
},
{
"epoch": 2.274299550328606,
"grad_norm": 4.552392959594727,
"learning_rate": 1.2813793103448278e-06,
"loss": 0.2708,
"step": 13150
},
{
"epoch": 2.2786233137322727,
"grad_norm": 4.50985050201416,
"learning_rate": 1.2641379310344829e-06,
"loss": 0.2437,
"step": 13175
},
{
"epoch": 2.282947077135939,
"grad_norm": 5.000662326812744,
"learning_rate": 1.246896551724138e-06,
"loss": 0.2499,
"step": 13200
},
{
"epoch": 2.287270840539606,
"grad_norm": 4.699297904968262,
"learning_rate": 1.2296551724137932e-06,
"loss": 0.2546,
"step": 13225
},
{
"epoch": 2.291594603943272,
"grad_norm": 4.824137210845947,
"learning_rate": 1.2124137931034483e-06,
"loss": 0.2566,
"step": 13250
},
{
"epoch": 2.295918367346939,
"grad_norm": 5.796444416046143,
"learning_rate": 1.1951724137931036e-06,
"loss": 0.254,
"step": 13275
},
{
"epoch": 2.3002421307506054,
"grad_norm": 4.286495208740234,
"learning_rate": 1.1779310344827587e-06,
"loss": 0.2258,
"step": 13300
},
{
"epoch": 2.3045658941542717,
"grad_norm": 4.737240314483643,
"learning_rate": 1.160689655172414e-06,
"loss": 0.2295,
"step": 13325
},
{
"epoch": 2.3088896575579385,
"grad_norm": 5.5657148361206055,
"learning_rate": 1.143448275862069e-06,
"loss": 0.2516,
"step": 13350
},
{
"epoch": 2.313213420961605,
"grad_norm": 6.122943878173828,
"learning_rate": 1.1262068965517242e-06,
"loss": 0.257,
"step": 13375
},
{
"epoch": 2.3175371843652717,
"grad_norm": 5.030111789703369,
"learning_rate": 1.1089655172413795e-06,
"loss": 0.2409,
"step": 13400
},
{
"epoch": 2.321860947768938,
"grad_norm": 4.784854412078857,
"learning_rate": 1.0917241379310345e-06,
"loss": 0.2371,
"step": 13425
},
{
"epoch": 2.326184711172605,
"grad_norm": 4.606825351715088,
"learning_rate": 1.0744827586206898e-06,
"loss": 0.2496,
"step": 13450
},
{
"epoch": 2.330508474576271,
"grad_norm": 5.06074333190918,
"learning_rate": 1.057241379310345e-06,
"loss": 0.2328,
"step": 13475
},
{
"epoch": 2.3348322379799376,
"grad_norm": 4.836973190307617,
"learning_rate": 1.04e-06,
"loss": 0.2485,
"step": 13500
},
{
"epoch": 2.3391560013836044,
"grad_norm": 4.6793365478515625,
"learning_rate": 1.0227586206896553e-06,
"loss": 0.257,
"step": 13525
},
{
"epoch": 2.3434797647872707,
"grad_norm": 4.693976879119873,
"learning_rate": 1.0055172413793104e-06,
"loss": 0.2445,
"step": 13550
},
{
"epoch": 2.3478035281909375,
"grad_norm": 4.091955184936523,
"learning_rate": 9.882758620689657e-07,
"loss": 0.2425,
"step": 13575
},
{
"epoch": 2.352127291594604,
"grad_norm": 4.664127349853516,
"learning_rate": 9.710344827586207e-07,
"loss": 0.2414,
"step": 13600
},
{
"epoch": 2.3564510549982707,
"grad_norm": 4.556197166442871,
"learning_rate": 9.53793103448276e-07,
"loss": 0.2503,
"step": 13625
},
{
"epoch": 2.360774818401937,
"grad_norm": 4.94804573059082,
"learning_rate": 9.365517241379311e-07,
"loss": 0.2394,
"step": 13650
},
{
"epoch": 2.3650985818056034,
"grad_norm": 5.318421840667725,
"learning_rate": 9.193103448275863e-07,
"loss": 0.2515,
"step": 13675
},
{
"epoch": 2.36942234520927,
"grad_norm": 4.128868579864502,
"learning_rate": 9.020689655172414e-07,
"loss": 0.2494,
"step": 13700
},
{
"epoch": 2.3737461086129366,
"grad_norm": 4.886101245880127,
"learning_rate": 8.848275862068967e-07,
"loss": 0.2567,
"step": 13725
},
{
"epoch": 2.3780698720166034,
"grad_norm": 5.137601852416992,
"learning_rate": 8.675862068965518e-07,
"loss": 0.2529,
"step": 13750
},
{
"epoch": 2.3823936354202697,
"grad_norm": 5.788776397705078,
"learning_rate": 8.50344827586207e-07,
"loss": 0.2369,
"step": 13775
},
{
"epoch": 2.3867173988239365,
"grad_norm": 5.918336868286133,
"learning_rate": 8.331034482758621e-07,
"loss": 0.2505,
"step": 13800
},
{
"epoch": 2.391041162227603,
"grad_norm": 4.928556442260742,
"learning_rate": 8.158620689655173e-07,
"loss": 0.2406,
"step": 13825
},
{
"epoch": 2.3953649256312692,
"grad_norm": 5.406356334686279,
"learning_rate": 7.986206896551725e-07,
"loss": 0.2393,
"step": 13850
},
{
"epoch": 2.399688689034936,
"grad_norm": 4.0312018394470215,
"learning_rate": 7.813793103448276e-07,
"loss": 0.2369,
"step": 13875
},
{
"epoch": 2.4040124524386024,
"grad_norm": 5.446890830993652,
"learning_rate": 7.641379310344828e-07,
"loss": 0.2313,
"step": 13900
},
{
"epoch": 2.408336215842269,
"grad_norm": 5.433596611022949,
"learning_rate": 7.468965517241381e-07,
"loss": 0.2434,
"step": 13925
},
{
"epoch": 2.4126599792459356,
"grad_norm": 3.490582227706909,
"learning_rate": 7.296551724137932e-07,
"loss": 0.2268,
"step": 13950
},
{
"epoch": 2.4169837426496024,
"grad_norm": 4.078389644622803,
"learning_rate": 7.124137931034484e-07,
"loss": 0.2397,
"step": 13975
},
{
"epoch": 2.4213075060532687,
"grad_norm": 5.175432205200195,
"learning_rate": 6.951724137931034e-07,
"loss": 0.243,
"step": 14000
},
{
"epoch": 2.4213075060532687,
"eval_loss": 0.2580419182777405,
"eval_runtime": 5125.701,
"eval_samples_per_second": 4.512,
"eval_steps_per_second": 0.564,
"eval_wer": 32.74277400445614,
"step": 14000
},
{
"epoch": 2.425631269456935,
"grad_norm": 5.08500337600708,
"learning_rate": 6.779310344827587e-07,
"loss": 0.2431,
"step": 14025
},
{
"epoch": 2.429955032860602,
"grad_norm": 4.165167331695557,
"learning_rate": 6.606896551724139e-07,
"loss": 0.2549,
"step": 14050
},
{
"epoch": 2.4342787962642682,
"grad_norm": 4.766578197479248,
"learning_rate": 6.43448275862069e-07,
"loss": 0.2325,
"step": 14075
},
{
"epoch": 2.438602559667935,
"grad_norm": 4.852494716644287,
"learning_rate": 6.262068965517242e-07,
"loss": 0.2471,
"step": 14100
},
{
"epoch": 2.4429263230716014,
"grad_norm": 5.093419551849365,
"learning_rate": 6.089655172413794e-07,
"loss": 0.2543,
"step": 14125
},
{
"epoch": 2.447250086475268,
"grad_norm": 6.8064045906066895,
"learning_rate": 5.917241379310346e-07,
"loss": 0.2446,
"step": 14150
},
{
"epoch": 2.4515738498789346,
"grad_norm": 6.039092063903809,
"learning_rate": 5.744827586206897e-07,
"loss": 0.2437,
"step": 14175
},
{
"epoch": 2.4558976132826014,
"grad_norm": 4.570720672607422,
"learning_rate": 5.572413793103449e-07,
"loss": 0.2357,
"step": 14200
},
{
"epoch": 2.4602213766862677,
"grad_norm": 6.016234397888184,
"learning_rate": 5.4e-07,
"loss": 0.2389,
"step": 14225
},
{
"epoch": 2.4645451400899345,
"grad_norm": 5.1399407386779785,
"learning_rate": 5.227586206896552e-07,
"loss": 0.2551,
"step": 14250
},
{
"epoch": 2.468868903493601,
"grad_norm": 4.784331798553467,
"learning_rate": 5.055172413793104e-07,
"loss": 0.2292,
"step": 14275
},
{
"epoch": 2.4731926668972672,
"grad_norm": 4.694582462310791,
"learning_rate": 4.882758620689656e-07,
"loss": 0.238,
"step": 14300
},
{
"epoch": 2.477516430300934,
"grad_norm": 4.42811393737793,
"learning_rate": 4.710344827586207e-07,
"loss": 0.2471,
"step": 14325
},
{
"epoch": 2.4818401937046004,
"grad_norm": 5.3042731285095215,
"learning_rate": 4.537931034482759e-07,
"loss": 0.2469,
"step": 14350
},
{
"epoch": 2.486163957108267,
"grad_norm": 5.875729560852051,
"learning_rate": 4.365517241379311e-07,
"loss": 0.2438,
"step": 14375
},
{
"epoch": 2.4904877205119336,
"grad_norm": 4.108542442321777,
"learning_rate": 4.193103448275863e-07,
"loss": 0.2482,
"step": 14400
},
{
"epoch": 2.4948114839156004,
"grad_norm": 4.775691986083984,
"learning_rate": 4.020689655172414e-07,
"loss": 0.2374,
"step": 14425
},
{
"epoch": 2.4991352473192667,
"grad_norm": 4.536764621734619,
"learning_rate": 3.848275862068966e-07,
"loss": 0.2399,
"step": 14450
},
{
"epoch": 2.503459010722933,
"grad_norm": 3.924809694290161,
"learning_rate": 3.6758620689655174e-07,
"loss": 0.2518,
"step": 14475
},
{
"epoch": 2.5077827741266,
"grad_norm": 4.728981971740723,
"learning_rate": 3.50344827586207e-07,
"loss": 0.2688,
"step": 14500
},
{
"epoch": 2.5121065375302662,
"grad_norm": 4.350174903869629,
"learning_rate": 3.331034482758621e-07,
"loss": 0.2495,
"step": 14525
},
{
"epoch": 2.516430300933933,
"grad_norm": 5.987773418426514,
"learning_rate": 3.158620689655173e-07,
"loss": 0.2436,
"step": 14550
},
{
"epoch": 2.5207540643375994,
"grad_norm": 3.9468040466308594,
"learning_rate": 2.9862068965517244e-07,
"loss": 0.2241,
"step": 14575
},
{
"epoch": 2.525077827741266,
"grad_norm": 4.972886562347412,
"learning_rate": 2.813793103448276e-07,
"loss": 0.2383,
"step": 14600
},
{
"epoch": 2.5294015911449326,
"grad_norm": 5.958413600921631,
"learning_rate": 2.6413793103448276e-07,
"loss": 0.2435,
"step": 14625
},
{
"epoch": 2.533725354548599,
"grad_norm": 4.814051151275635,
"learning_rate": 2.4689655172413795e-07,
"loss": 0.2485,
"step": 14650
},
{
"epoch": 2.5380491179522657,
"grad_norm": 5.348843574523926,
"learning_rate": 2.2965517241379313e-07,
"loss": 0.2542,
"step": 14675
},
{
"epoch": 2.542372881355932,
"grad_norm": 4.570481300354004,
"learning_rate": 2.124137931034483e-07,
"loss": 0.2415,
"step": 14700
},
{
"epoch": 2.546696644759599,
"grad_norm": 4.454525470733643,
"learning_rate": 1.9517241379310346e-07,
"loss": 0.2356,
"step": 14725
},
{
"epoch": 2.5510204081632653,
"grad_norm": 6.421968460083008,
"learning_rate": 1.7793103448275864e-07,
"loss": 0.2421,
"step": 14750
},
{
"epoch": 2.555344171566932,
"grad_norm": 4.840851306915283,
"learning_rate": 1.606896551724138e-07,
"loss": 0.2446,
"step": 14775
},
{
"epoch": 2.5596679349705984,
"grad_norm": 5.429544925689697,
"learning_rate": 1.4344827586206897e-07,
"loss": 0.2477,
"step": 14800
},
{
"epoch": 2.5639916983742648,
"grad_norm": 5.538628101348877,
"learning_rate": 1.2620689655172415e-07,
"loss": 0.2475,
"step": 14825
},
{
"epoch": 2.5683154617779316,
"grad_norm": 4.517398357391357,
"learning_rate": 1.0896551724137932e-07,
"loss": 0.2448,
"step": 14850
},
{
"epoch": 2.572639225181598,
"grad_norm": 4.563333034515381,
"learning_rate": 9.172413793103449e-08,
"loss": 0.2316,
"step": 14875
},
{
"epoch": 2.5769629885852647,
"grad_norm": 4.876601696014404,
"learning_rate": 7.448275862068966e-08,
"loss": 0.2378,
"step": 14900
},
{
"epoch": 2.581286751988931,
"grad_norm": 4.932113170623779,
"learning_rate": 5.724137931034483e-08,
"loss": 0.2549,
"step": 14925
},
{
"epoch": 2.585610515392598,
"grad_norm": 5.311142921447754,
"learning_rate": 4e-08,
"loss": 0.2477,
"step": 14950
},
{
"epoch": 2.5899342787962643,
"grad_norm": 5.145079135894775,
"learning_rate": 2.2758620689655175e-08,
"loss": 0.2361,
"step": 14975
},
{
"epoch": 2.5942580421999306,
"grad_norm": 4.524856090545654,
"learning_rate": 5.517241379310346e-09,
"loss": 0.2344,
"step": 15000
},
{
"epoch": 2.5942580421999306,
"eval_loss": 0.2568697929382324,
"eval_runtime": 5134.1958,
"eval_samples_per_second": 4.504,
"eval_steps_per_second": 0.563,
"eval_wer": 32.56401305446938,
"step": 15000
}
],
"logging_steps": 25,
"max_steps": 15000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.90799081332736e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}