fsicoli's picture
Upload 9 files
a01fb0b verified
{
"best_metric": 0.10248288219107954,
"best_model_checkpoint": "d:\\\\whisper-medium-pt-cv19-fleurs2-lr-wu\\checkpoint-5000",
"epoch": 11.441647597254004,
"eval_steps": 5000,
"global_step": 25000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011441647597254004,
"grad_norm": 16.260902404785156,
"learning_rate": 2.875e-07,
"loss": 0.7432,
"step": 25
},
{
"epoch": 0.02288329519450801,
"grad_norm": 28.320634841918945,
"learning_rate": 6.000000000000001e-07,
"loss": 1.1561,
"step": 50
},
{
"epoch": 0.034324942791762014,
"grad_norm": 9.76030158996582,
"learning_rate": 9.124999999999999e-07,
"loss": 0.479,
"step": 75
},
{
"epoch": 0.04576659038901602,
"grad_norm": 17.32778549194336,
"learning_rate": 1.2250000000000001e-06,
"loss": 0.3224,
"step": 100
},
{
"epoch": 0.057208237986270026,
"grad_norm": 7.594138145446777,
"learning_rate": 1.5375e-06,
"loss": 0.2161,
"step": 125
},
{
"epoch": 0.06864988558352403,
"grad_norm": 18.392087936401367,
"learning_rate": 1.85e-06,
"loss": 0.262,
"step": 150
},
{
"epoch": 0.08009153318077804,
"grad_norm": 10.459943771362305,
"learning_rate": 2.1625e-06,
"loss": 0.1765,
"step": 175
},
{
"epoch": 0.09153318077803203,
"grad_norm": 17.077810287475586,
"learning_rate": 2.4750000000000004e-06,
"loss": 0.2262,
"step": 200
},
{
"epoch": 0.10297482837528604,
"grad_norm": 6.674950122833252,
"learning_rate": 2.7875e-06,
"loss": 0.1917,
"step": 225
},
{
"epoch": 0.11441647597254005,
"grad_norm": 10.16779613494873,
"learning_rate": 3.1e-06,
"loss": 0.2162,
"step": 250
},
{
"epoch": 0.12585812356979406,
"grad_norm": 6.614413738250732,
"learning_rate": 3.4125000000000004e-06,
"loss": 0.2,
"step": 275
},
{
"epoch": 0.13729977116704806,
"grad_norm": 13.418252944946289,
"learning_rate": 3.725e-06,
"loss": 0.2007,
"step": 300
},
{
"epoch": 0.14874141876430205,
"grad_norm": 6.218944549560547,
"learning_rate": 4.037500000000001e-06,
"loss": 0.1574,
"step": 325
},
{
"epoch": 0.16018306636155608,
"grad_norm": 15.351899147033691,
"learning_rate": 4.35e-06,
"loss": 0.1894,
"step": 350
},
{
"epoch": 0.17162471395881007,
"grad_norm": 7.518232822418213,
"learning_rate": 4.6625e-06,
"loss": 0.1861,
"step": 375
},
{
"epoch": 0.18306636155606407,
"grad_norm": 16.16284942626953,
"learning_rate": 4.975000000000001e-06,
"loss": 0.2085,
"step": 400
},
{
"epoch": 0.1945080091533181,
"grad_norm": 8.712101936340332,
"learning_rate": 5.2875e-06,
"loss": 0.1664,
"step": 425
},
{
"epoch": 0.20594965675057209,
"grad_norm": 15.569042205810547,
"learning_rate": 5.600000000000001e-06,
"loss": 0.2054,
"step": 450
},
{
"epoch": 0.21739130434782608,
"grad_norm": 7.28713846206665,
"learning_rate": 5.9125e-06,
"loss": 0.1822,
"step": 475
},
{
"epoch": 0.2288329519450801,
"grad_norm": 14.67645263671875,
"learning_rate": 6.2250000000000005e-06,
"loss": 0.2018,
"step": 500
},
{
"epoch": 0.2402745995423341,
"grad_norm": 9.035391807556152,
"learning_rate": 6.244132653061225e-06,
"loss": 0.175,
"step": 525
},
{
"epoch": 0.2517162471395881,
"grad_norm": 13.814667701721191,
"learning_rate": 6.237755102040817e-06,
"loss": 0.2023,
"step": 550
},
{
"epoch": 0.2631578947368421,
"grad_norm": 5.595588684082031,
"learning_rate": 6.231377551020408e-06,
"loss": 0.1745,
"step": 575
},
{
"epoch": 0.2745995423340961,
"grad_norm": 14.676183700561523,
"learning_rate": 6.2250000000000005e-06,
"loss": 0.2066,
"step": 600
},
{
"epoch": 0.28604118993135014,
"grad_norm": 7.4100213050842285,
"learning_rate": 6.218622448979592e-06,
"loss": 0.1968,
"step": 625
},
{
"epoch": 0.2974828375286041,
"grad_norm": 17.322240829467773,
"learning_rate": 6.212244897959184e-06,
"loss": 0.1916,
"step": 650
},
{
"epoch": 0.30892448512585813,
"grad_norm": 7.691104412078857,
"learning_rate": 6.205867346938775e-06,
"loss": 0.1725,
"step": 675
},
{
"epoch": 0.32036613272311215,
"grad_norm": 16.282310485839844,
"learning_rate": 6.1994897959183676e-06,
"loss": 0.2122,
"step": 700
},
{
"epoch": 0.3318077803203661,
"grad_norm": 8.981350898742676,
"learning_rate": 6.19311224489796e-06,
"loss": 0.1638,
"step": 725
},
{
"epoch": 0.34324942791762014,
"grad_norm": 16.19715690612793,
"learning_rate": 6.1867346938775515e-06,
"loss": 0.2046,
"step": 750
},
{
"epoch": 0.35469107551487417,
"grad_norm": 5.906866073608398,
"learning_rate": 6.180357142857143e-06,
"loss": 0.1916,
"step": 775
},
{
"epoch": 0.36613272311212813,
"grad_norm": 10.55771255493164,
"learning_rate": 6.1739795918367354e-06,
"loss": 0.2104,
"step": 800
},
{
"epoch": 0.37757437070938216,
"grad_norm": 6.290849685668945,
"learning_rate": 6.167602040816327e-06,
"loss": 0.1618,
"step": 825
},
{
"epoch": 0.3890160183066362,
"grad_norm": 15.78228759765625,
"learning_rate": 6.1612244897959185e-06,
"loss": 0.1897,
"step": 850
},
{
"epoch": 0.40045766590389015,
"grad_norm": 4.934595108032227,
"learning_rate": 6.15484693877551e-06,
"loss": 0.1808,
"step": 875
},
{
"epoch": 0.41189931350114417,
"grad_norm": 8.691235542297363,
"learning_rate": 6.1484693877551024e-06,
"loss": 0.1913,
"step": 900
},
{
"epoch": 0.4233409610983982,
"grad_norm": 9.48690414428711,
"learning_rate": 6.142091836734694e-06,
"loss": 0.167,
"step": 925
},
{
"epoch": 0.43478260869565216,
"grad_norm": 17.154409408569336,
"learning_rate": 6.135714285714286e-06,
"loss": 0.1863,
"step": 950
},
{
"epoch": 0.4462242562929062,
"grad_norm": 5.628190517425537,
"learning_rate": 6.129336734693878e-06,
"loss": 0.1632,
"step": 975
},
{
"epoch": 0.4576659038901602,
"grad_norm": 14.407511711120605,
"learning_rate": 6.1229591836734695e-06,
"loss": 0.1877,
"step": 1000
},
{
"epoch": 0.4691075514874142,
"grad_norm": 4.591071128845215,
"learning_rate": 6.116581632653062e-06,
"loss": 0.1698,
"step": 1025
},
{
"epoch": 0.4805491990846682,
"grad_norm": 10.605140686035156,
"learning_rate": 6.110204081632653e-06,
"loss": 0.1764,
"step": 1050
},
{
"epoch": 0.4919908466819222,
"grad_norm": 9.22734260559082,
"learning_rate": 6.103826530612245e-06,
"loss": 0.1632,
"step": 1075
},
{
"epoch": 0.5034324942791762,
"grad_norm": 13.262040138244629,
"learning_rate": 6.0974489795918365e-06,
"loss": 0.1734,
"step": 1100
},
{
"epoch": 0.5148741418764302,
"grad_norm": 9.931065559387207,
"learning_rate": 6.091071428571429e-06,
"loss": 0.1628,
"step": 1125
},
{
"epoch": 0.5263157894736842,
"grad_norm": 17.013771057128906,
"learning_rate": 6.08469387755102e-06,
"loss": 0.1902,
"step": 1150
},
{
"epoch": 0.5377574370709383,
"grad_norm": 4.440840244293213,
"learning_rate": 6.078316326530613e-06,
"loss": 0.1729,
"step": 1175
},
{
"epoch": 0.5491990846681922,
"grad_norm": 6.86649751663208,
"learning_rate": 6.071938775510204e-06,
"loss": 0.1728,
"step": 1200
},
{
"epoch": 0.5606407322654462,
"grad_norm": 5.384552478790283,
"learning_rate": 6.065561224489797e-06,
"loss": 0.1661,
"step": 1225
},
{
"epoch": 0.5720823798627003,
"grad_norm": 16.237516403198242,
"learning_rate": 6.059183673469388e-06,
"loss": 0.1866,
"step": 1250
},
{
"epoch": 0.5835240274599542,
"grad_norm": 9.271600723266602,
"learning_rate": 6.05280612244898e-06,
"loss": 0.1572,
"step": 1275
},
{
"epoch": 0.5949656750572082,
"grad_norm": 12.505192756652832,
"learning_rate": 6.046428571428571e-06,
"loss": 0.1793,
"step": 1300
},
{
"epoch": 0.6064073226544623,
"grad_norm": 4.431430816650391,
"learning_rate": 6.040051020408164e-06,
"loss": 0.1742,
"step": 1325
},
{
"epoch": 0.6178489702517163,
"grad_norm": 13.353468894958496,
"learning_rate": 6.033673469387755e-06,
"loss": 0.1813,
"step": 1350
},
{
"epoch": 0.6292906178489702,
"grad_norm": 10.686460494995117,
"learning_rate": 6.027295918367347e-06,
"loss": 0.1789,
"step": 1375
},
{
"epoch": 0.6407322654462243,
"grad_norm": 8.10379409790039,
"learning_rate": 6.020918367346939e-06,
"loss": 0.1584,
"step": 1400
},
{
"epoch": 0.6521739130434783,
"grad_norm": 5.418963432312012,
"learning_rate": 6.014540816326531e-06,
"loss": 0.1668,
"step": 1425
},
{
"epoch": 0.6636155606407322,
"grad_norm": 12.503841400146484,
"learning_rate": 6.008163265306123e-06,
"loss": 0.1827,
"step": 1450
},
{
"epoch": 0.6750572082379863,
"grad_norm": 6.517795562744141,
"learning_rate": 6.001785714285715e-06,
"loss": 0.171,
"step": 1475
},
{
"epoch": 0.6864988558352403,
"grad_norm": 13.4345703125,
"learning_rate": 5.995408163265306e-06,
"loss": 0.1854,
"step": 1500
},
{
"epoch": 0.6979405034324943,
"grad_norm": 5.514252185821533,
"learning_rate": 5.989030612244898e-06,
"loss": 0.1751,
"step": 1525
},
{
"epoch": 0.7093821510297483,
"grad_norm": 16.108661651611328,
"learning_rate": 5.98265306122449e-06,
"loss": 0.2066,
"step": 1550
},
{
"epoch": 0.7208237986270023,
"grad_norm": 8.58029842376709,
"learning_rate": 5.976275510204082e-06,
"loss": 0.1787,
"step": 1575
},
{
"epoch": 0.7322654462242563,
"grad_norm": 10.38893985748291,
"learning_rate": 5.969897959183673e-06,
"loss": 0.1831,
"step": 1600
},
{
"epoch": 0.7437070938215103,
"grad_norm": 6.40317964553833,
"learning_rate": 5.963520408163266e-06,
"loss": 0.177,
"step": 1625
},
{
"epoch": 0.7551487414187643,
"grad_norm": 14.541539192199707,
"learning_rate": 5.957142857142858e-06,
"loss": 0.1835,
"step": 1650
},
{
"epoch": 0.7665903890160183,
"grad_norm": 7.053554058074951,
"learning_rate": 5.95076530612245e-06,
"loss": 0.158,
"step": 1675
},
{
"epoch": 0.7780320366132724,
"grad_norm": 12.54846477508545,
"learning_rate": 5.944387755102041e-06,
"loss": 0.1742,
"step": 1700
},
{
"epoch": 0.7894736842105263,
"grad_norm": 5.44398307800293,
"learning_rate": 5.938010204081633e-06,
"loss": 0.15,
"step": 1725
},
{
"epoch": 0.8009153318077803,
"grad_norm": 19.205368041992188,
"learning_rate": 5.931632653061225e-06,
"loss": 0.1578,
"step": 1750
},
{
"epoch": 0.8123569794050344,
"grad_norm": 5.504051685333252,
"learning_rate": 5.925255102040817e-06,
"loss": 0.1675,
"step": 1775
},
{
"epoch": 0.8237986270022883,
"grad_norm": 11.270848274230957,
"learning_rate": 5.918877551020408e-06,
"loss": 0.1861,
"step": 1800
},
{
"epoch": 0.8352402745995423,
"grad_norm": 4.46786642074585,
"learning_rate": 5.9125e-06,
"loss": 0.1471,
"step": 1825
},
{
"epoch": 0.8466819221967964,
"grad_norm": 16.594099044799805,
"learning_rate": 5.906122448979592e-06,
"loss": 0.2044,
"step": 1850
},
{
"epoch": 0.8581235697940504,
"grad_norm": 5.97224760055542,
"learning_rate": 5.8997448979591845e-06,
"loss": 0.1671,
"step": 1875
},
{
"epoch": 0.8695652173913043,
"grad_norm": 18.787992477416992,
"learning_rate": 5.893367346938776e-06,
"loss": 0.1896,
"step": 1900
},
{
"epoch": 0.8810068649885584,
"grad_norm": 5.963818550109863,
"learning_rate": 5.886989795918368e-06,
"loss": 0.1563,
"step": 1925
},
{
"epoch": 0.8924485125858124,
"grad_norm": 10.634485244750977,
"learning_rate": 5.880612244897959e-06,
"loss": 0.1709,
"step": 1950
},
{
"epoch": 0.9038901601830663,
"grad_norm": 3.953845739364624,
"learning_rate": 5.8742346938775515e-06,
"loss": 0.1466,
"step": 1975
},
{
"epoch": 0.9153318077803204,
"grad_norm": 10.291520118713379,
"learning_rate": 5.867857142857143e-06,
"loss": 0.1995,
"step": 2000
},
{
"epoch": 0.9267734553775744,
"grad_norm": 7.01460599899292,
"learning_rate": 5.861479591836735e-06,
"loss": 0.1554,
"step": 2025
},
{
"epoch": 0.9382151029748284,
"grad_norm": 9.408185005187988,
"learning_rate": 5.855357142857143e-06,
"loss": 0.2045,
"step": 2050
},
{
"epoch": 0.9496567505720824,
"grad_norm": 7.104637622833252,
"learning_rate": 5.848979591836735e-06,
"loss": 0.1728,
"step": 2075
},
{
"epoch": 0.9610983981693364,
"grad_norm": 16.595233917236328,
"learning_rate": 5.842602040816327e-06,
"loss": 0.2121,
"step": 2100
},
{
"epoch": 0.9725400457665904,
"grad_norm": 3.5816707611083984,
"learning_rate": 5.836224489795919e-06,
"loss": 0.1576,
"step": 2125
},
{
"epoch": 0.9839816933638444,
"grad_norm": 16.117435455322266,
"learning_rate": 5.829846938775511e-06,
"loss": 0.1948,
"step": 2150
},
{
"epoch": 0.9954233409610984,
"grad_norm": 7.5333251953125,
"learning_rate": 5.823469387755102e-06,
"loss": 0.1679,
"step": 2175
},
{
"epoch": 1.0068649885583525,
"grad_norm": 1.987963080406189,
"learning_rate": 5.8170918367346946e-06,
"loss": 0.1037,
"step": 2200
},
{
"epoch": 1.0183066361556063,
"grad_norm": 5.73256778717041,
"learning_rate": 5.810714285714286e-06,
"loss": 0.0692,
"step": 2225
},
{
"epoch": 1.0297482837528604,
"grad_norm": 3.2069363594055176,
"learning_rate": 5.804336734693878e-06,
"loss": 0.0989,
"step": 2250
},
{
"epoch": 1.0411899313501145,
"grad_norm": 5.0149946212768555,
"learning_rate": 5.797959183673469e-06,
"loss": 0.0731,
"step": 2275
},
{
"epoch": 1.0526315789473684,
"grad_norm": 5.605563163757324,
"learning_rate": 5.791581632653062e-06,
"loss": 0.091,
"step": 2300
},
{
"epoch": 1.0640732265446224,
"grad_norm": 4.094449043273926,
"learning_rate": 5.785204081632653e-06,
"loss": 0.0617,
"step": 2325
},
{
"epoch": 1.0755148741418765,
"grad_norm": 3.5027787685394287,
"learning_rate": 5.7788265306122455e-06,
"loss": 0.0905,
"step": 2350
},
{
"epoch": 1.0869565217391304,
"grad_norm": 3.6455764770507812,
"learning_rate": 5.772448979591837e-06,
"loss": 0.0597,
"step": 2375
},
{
"epoch": 1.0983981693363845,
"grad_norm": 3.261543035507202,
"learning_rate": 5.766071428571429e-06,
"loss": 0.097,
"step": 2400
},
{
"epoch": 1.1098398169336385,
"grad_norm": 5.9047698974609375,
"learning_rate": 5.759693877551021e-06,
"loss": 0.0803,
"step": 2425
},
{
"epoch": 1.1212814645308924,
"grad_norm": 1.9865431785583496,
"learning_rate": 5.7533163265306125e-06,
"loss": 0.0913,
"step": 2450
},
{
"epoch": 1.1327231121281465,
"grad_norm": 5.4243316650390625,
"learning_rate": 5.746938775510204e-06,
"loss": 0.0652,
"step": 2475
},
{
"epoch": 1.1441647597254005,
"grad_norm": 5.0221452713012695,
"learning_rate": 5.7405612244897965e-06,
"loss": 0.0825,
"step": 2500
},
{
"epoch": 1.1556064073226544,
"grad_norm": 6.293609619140625,
"learning_rate": 5.734183673469388e-06,
"loss": 0.0819,
"step": 2525
},
{
"epoch": 1.1670480549199085,
"grad_norm": 4.149010181427002,
"learning_rate": 5.7278061224489796e-06,
"loss": 0.1188,
"step": 2550
},
{
"epoch": 1.1784897025171626,
"grad_norm": 2.6588358879089355,
"learning_rate": 5.721428571428572e-06,
"loss": 0.0723,
"step": 2575
},
{
"epoch": 1.1899313501144164,
"grad_norm": 3.407505750656128,
"learning_rate": 5.7150510204081635e-06,
"loss": 0.0948,
"step": 2600
},
{
"epoch": 1.2013729977116705,
"grad_norm": 6.228948593139648,
"learning_rate": 5.708673469387756e-06,
"loss": 0.0777,
"step": 2625
},
{
"epoch": 1.2128146453089246,
"grad_norm": 2.339284896850586,
"learning_rate": 5.7022959183673474e-06,
"loss": 0.0896,
"step": 2650
},
{
"epoch": 1.2242562929061784,
"grad_norm": 7.423865795135498,
"learning_rate": 5.695918367346939e-06,
"loss": 0.0721,
"step": 2675
},
{
"epoch": 1.2356979405034325,
"grad_norm": 4.202932834625244,
"learning_rate": 5.6895408163265305e-06,
"loss": 0.096,
"step": 2700
},
{
"epoch": 1.2471395881006866,
"grad_norm": 5.0601725578308105,
"learning_rate": 5.683163265306123e-06,
"loss": 0.0829,
"step": 2725
},
{
"epoch": 1.2585812356979404,
"grad_norm": 4.4953460693359375,
"learning_rate": 5.6767857142857144e-06,
"loss": 0.1027,
"step": 2750
},
{
"epoch": 1.2700228832951945,
"grad_norm": 6.2997612953186035,
"learning_rate": 5.670408163265306e-06,
"loss": 0.0789,
"step": 2775
},
{
"epoch": 1.2814645308924484,
"grad_norm": 2.9170215129852295,
"learning_rate": 5.664030612244898e-06,
"loss": 0.1007,
"step": 2800
},
{
"epoch": 1.2929061784897025,
"grad_norm": 6.78530740737915,
"learning_rate": 5.65765306122449e-06,
"loss": 0.0787,
"step": 2825
},
{
"epoch": 1.3043478260869565,
"grad_norm": 3.2991139888763428,
"learning_rate": 5.651275510204082e-06,
"loss": 0.1118,
"step": 2850
},
{
"epoch": 1.3157894736842106,
"grad_norm": 8.449702262878418,
"learning_rate": 5.644897959183674e-06,
"loss": 0.0742,
"step": 2875
},
{
"epoch": 1.3272311212814645,
"grad_norm": 5.295764923095703,
"learning_rate": 5.638520408163265e-06,
"loss": 0.0951,
"step": 2900
},
{
"epoch": 1.3386727688787186,
"grad_norm": 5.989330291748047,
"learning_rate": 5.632142857142857e-06,
"loss": 0.0758,
"step": 2925
},
{
"epoch": 1.3501144164759724,
"grad_norm": 5.153733253479004,
"learning_rate": 5.625765306122449e-06,
"loss": 0.0984,
"step": 2950
},
{
"epoch": 1.3615560640732265,
"grad_norm": 6.334990501403809,
"learning_rate": 5.619387755102041e-06,
"loss": 0.0678,
"step": 2975
},
{
"epoch": 1.3729977116704806,
"grad_norm": 4.231469631195068,
"learning_rate": 5.613010204081632e-06,
"loss": 0.098,
"step": 3000
},
{
"epoch": 1.3844393592677346,
"grad_norm": 4.833641529083252,
"learning_rate": 5.606632653061225e-06,
"loss": 0.0624,
"step": 3025
},
{
"epoch": 1.3958810068649885,
"grad_norm": 3.244565725326538,
"learning_rate": 5.600255102040817e-06,
"loss": 0.0894,
"step": 3050
},
{
"epoch": 1.4073226544622426,
"grad_norm": 1.8014638423919678,
"learning_rate": 5.593877551020409e-06,
"loss": 0.0709,
"step": 3075
},
{
"epoch": 1.4187643020594964,
"grad_norm": 4.825936794281006,
"learning_rate": 5.5875e-06,
"loss": 0.1008,
"step": 3100
},
{
"epoch": 1.4302059496567505,
"grad_norm": 5.295871734619141,
"learning_rate": 5.581122448979592e-06,
"loss": 0.0663,
"step": 3125
},
{
"epoch": 1.4416475972540046,
"grad_norm": 4.34277868270874,
"learning_rate": 5.574744897959184e-06,
"loss": 0.0854,
"step": 3150
},
{
"epoch": 1.4530892448512587,
"grad_norm": 6.447768688201904,
"learning_rate": 5.568367346938776e-06,
"loss": 0.0813,
"step": 3175
},
{
"epoch": 1.4645308924485125,
"grad_norm": 3.3338069915771484,
"learning_rate": 5.561989795918367e-06,
"loss": 0.0921,
"step": 3200
},
{
"epoch": 1.4759725400457666,
"grad_norm": 4.244719505310059,
"learning_rate": 5.555612244897959e-06,
"loss": 0.0683,
"step": 3225
},
{
"epoch": 1.4874141876430205,
"grad_norm": 3.7263808250427246,
"learning_rate": 5.549234693877551e-06,
"loss": 0.095,
"step": 3250
},
{
"epoch": 1.4988558352402745,
"grad_norm": 6.328766822814941,
"learning_rate": 5.542857142857144e-06,
"loss": 0.0837,
"step": 3275
},
{
"epoch": 1.5102974828375286,
"grad_norm": 2.921656370162964,
"learning_rate": 5.536479591836735e-06,
"loss": 0.1021,
"step": 3300
},
{
"epoch": 1.5217391304347827,
"grad_norm": 8.238923072814941,
"learning_rate": 5.530102040816327e-06,
"loss": 0.0697,
"step": 3325
},
{
"epoch": 1.5331807780320366,
"grad_norm": 3.801389455795288,
"learning_rate": 5.523724489795918e-06,
"loss": 0.0995,
"step": 3350
},
{
"epoch": 1.5446224256292906,
"grad_norm": 6.373222351074219,
"learning_rate": 5.517346938775511e-06,
"loss": 0.0693,
"step": 3375
},
{
"epoch": 1.5560640732265445,
"grad_norm": 5.936155319213867,
"learning_rate": 5.510969387755102e-06,
"loss": 0.0989,
"step": 3400
},
{
"epoch": 1.5675057208237986,
"grad_norm": 3.9559526443481445,
"learning_rate": 5.504591836734694e-06,
"loss": 0.0799,
"step": 3425
},
{
"epoch": 1.5789473684210527,
"grad_norm": 2.2364418506622314,
"learning_rate": 5.498214285714285e-06,
"loss": 0.1069,
"step": 3450
},
{
"epoch": 1.5903890160183067,
"grad_norm": 5.343243598937988,
"learning_rate": 5.4918367346938785e-06,
"loss": 0.0711,
"step": 3475
},
{
"epoch": 1.6018306636155606,
"grad_norm": 6.238850116729736,
"learning_rate": 5.48545918367347e-06,
"loss": 0.0833,
"step": 3500
},
{
"epoch": 1.6132723112128147,
"grad_norm": 7.247325897216797,
"learning_rate": 5.479081632653062e-06,
"loss": 0.0772,
"step": 3525
},
{
"epoch": 1.6247139588100685,
"grad_norm": 5.513607025146484,
"learning_rate": 5.472704081632653e-06,
"loss": 0.102,
"step": 3550
},
{
"epoch": 1.6361556064073226,
"grad_norm": 4.2667365074157715,
"learning_rate": 5.4663265306122455e-06,
"loss": 0.084,
"step": 3575
},
{
"epoch": 1.6475972540045767,
"grad_norm": 3.866903066635132,
"learning_rate": 5.459948979591837e-06,
"loss": 0.0984,
"step": 3600
},
{
"epoch": 1.6590389016018308,
"grad_norm": 6.93393611907959,
"learning_rate": 5.453571428571429e-06,
"loss": 0.0793,
"step": 3625
},
{
"epoch": 1.6704805491990846,
"grad_norm": 5.100342750549316,
"learning_rate": 5.44719387755102e-06,
"loss": 0.0999,
"step": 3650
},
{
"epoch": 1.6819221967963387,
"grad_norm": 8.363016128540039,
"learning_rate": 5.4408163265306126e-06,
"loss": 0.0709,
"step": 3675
},
{
"epoch": 1.6933638443935926,
"grad_norm": 4.775864124298096,
"learning_rate": 5.434438775510204e-06,
"loss": 0.1039,
"step": 3700
},
{
"epoch": 1.7048054919908466,
"grad_norm": 6.310733795166016,
"learning_rate": 5.4280612244897965e-06,
"loss": 0.08,
"step": 3725
},
{
"epoch": 1.7162471395881007,
"grad_norm": 3.2022969722747803,
"learning_rate": 5.421683673469388e-06,
"loss": 0.0969,
"step": 3750
},
{
"epoch": 1.7276887871853548,
"grad_norm": 1.5878781080245972,
"learning_rate": 5.41530612244898e-06,
"loss": 0.0807,
"step": 3775
},
{
"epoch": 1.7391304347826086,
"grad_norm": 4.442117691040039,
"learning_rate": 5.408928571428572e-06,
"loss": 0.0978,
"step": 3800
},
{
"epoch": 1.7505720823798627,
"grad_norm": 6.980683326721191,
"learning_rate": 5.4025510204081635e-06,
"loss": 0.0804,
"step": 3825
},
{
"epoch": 1.7620137299771166,
"grad_norm": 1.5422896146774292,
"learning_rate": 5.396173469387755e-06,
"loss": 0.0971,
"step": 3850
},
{
"epoch": 1.7734553775743707,
"grad_norm": 5.739109516143799,
"learning_rate": 5.389795918367347e-06,
"loss": 0.0946,
"step": 3875
},
{
"epoch": 1.7848970251716247,
"grad_norm": 3.076174020767212,
"learning_rate": 5.383418367346939e-06,
"loss": 0.1183,
"step": 3900
},
{
"epoch": 1.7963386727688788,
"grad_norm": 7.785946846008301,
"learning_rate": 5.3770408163265305e-06,
"loss": 0.0806,
"step": 3925
},
{
"epoch": 1.8077803203661327,
"grad_norm": 3.3288745880126953,
"learning_rate": 5.370663265306123e-06,
"loss": 0.0924,
"step": 3950
},
{
"epoch": 1.8192219679633868,
"grad_norm": 3.700629472732544,
"learning_rate": 5.3642857142857145e-06,
"loss": 0.0827,
"step": 3975
},
{
"epoch": 1.8306636155606406,
"grad_norm": 5.729650020599365,
"learning_rate": 5.357908163265307e-06,
"loss": 0.1036,
"step": 4000
},
{
"epoch": 1.8421052631578947,
"grad_norm": 6.0177202224731445,
"learning_rate": 5.351530612244898e-06,
"loss": 0.085,
"step": 4025
},
{
"epoch": 1.8535469107551488,
"grad_norm": 4.52460241317749,
"learning_rate": 5.34515306122449e-06,
"loss": 0.0985,
"step": 4050
},
{
"epoch": 1.8649885583524028,
"grad_norm": 11.662372589111328,
"learning_rate": 5.3387755102040815e-06,
"loss": 0.0845,
"step": 4075
},
{
"epoch": 1.8764302059496567,
"grad_norm": 5.090856552124023,
"learning_rate": 5.332397959183674e-06,
"loss": 0.0884,
"step": 4100
},
{
"epoch": 1.8878718535469108,
"grad_norm": 6.701780319213867,
"learning_rate": 5.3260204081632654e-06,
"loss": 0.0708,
"step": 4125
},
{
"epoch": 1.8993135011441646,
"grad_norm": 2.6517765522003174,
"learning_rate": 5.319897959183674e-06,
"loss": 0.0956,
"step": 4150
},
{
"epoch": 1.9107551487414187,
"grad_norm": 4.654801368713379,
"learning_rate": 5.313520408163265e-06,
"loss": 0.073,
"step": 4175
},
{
"epoch": 1.9221967963386728,
"grad_norm": 4.046535968780518,
"learning_rate": 5.307142857142857e-06,
"loss": 0.0984,
"step": 4200
},
{
"epoch": 1.9336384439359269,
"grad_norm": 5.534332752227783,
"learning_rate": 5.30076530612245e-06,
"loss": 0.0764,
"step": 4225
},
{
"epoch": 1.9450800915331807,
"grad_norm": 4.218522071838379,
"learning_rate": 5.2943877551020414e-06,
"loss": 0.0941,
"step": 4250
},
{
"epoch": 1.9565217391304348,
"grad_norm": 5.872758865356445,
"learning_rate": 5.288010204081633e-06,
"loss": 0.069,
"step": 4275
},
{
"epoch": 1.9679633867276887,
"grad_norm": 4.280802249908447,
"learning_rate": 5.2816326530612245e-06,
"loss": 0.0986,
"step": 4300
},
{
"epoch": 1.9794050343249427,
"grad_norm": 6.042983531951904,
"learning_rate": 5.275255102040817e-06,
"loss": 0.0711,
"step": 4325
},
{
"epoch": 1.9908466819221968,
"grad_norm": 3.573798179626465,
"learning_rate": 5.2688775510204085e-06,
"loss": 0.0891,
"step": 4350
},
{
"epoch": 2.002288329519451,
"grad_norm": 2.0011675357818604,
"learning_rate": 5.2625e-06,
"loss": 0.0834,
"step": 4375
},
{
"epoch": 2.013729977116705,
"grad_norm": 0.759939432144165,
"learning_rate": 5.2561224489795916e-06,
"loss": 0.037,
"step": 4400
},
{
"epoch": 2.0251716247139586,
"grad_norm": 3.735200881958008,
"learning_rate": 5.249744897959184e-06,
"loss": 0.0448,
"step": 4425
},
{
"epoch": 2.0366132723112127,
"grad_norm": 2.4551169872283936,
"learning_rate": 5.243367346938776e-06,
"loss": 0.038,
"step": 4450
},
{
"epoch": 2.0480549199084668,
"grad_norm": 1.7801021337509155,
"learning_rate": 5.236989795918368e-06,
"loss": 0.05,
"step": 4475
},
{
"epoch": 2.059496567505721,
"grad_norm": 2.692507743835449,
"learning_rate": 5.2306122448979594e-06,
"loss": 0.042,
"step": 4500
},
{
"epoch": 2.070938215102975,
"grad_norm": 2.6997761726379395,
"learning_rate": 5.224234693877551e-06,
"loss": 0.0459,
"step": 4525
},
{
"epoch": 2.082379862700229,
"grad_norm": 3.892010450363159,
"learning_rate": 5.217857142857143e-06,
"loss": 0.0364,
"step": 4550
},
{
"epoch": 2.0938215102974826,
"grad_norm": 3.354294776916504,
"learning_rate": 5.211479591836735e-06,
"loss": 0.0584,
"step": 4575
},
{
"epoch": 2.1052631578947367,
"grad_norm": 3.581019163131714,
"learning_rate": 5.2051020408163264e-06,
"loss": 0.035,
"step": 4600
},
{
"epoch": 2.116704805491991,
"grad_norm": 1.7799072265625,
"learning_rate": 5.198724489795918e-06,
"loss": 0.0534,
"step": 4625
},
{
"epoch": 2.128146453089245,
"grad_norm": 4.093613624572754,
"learning_rate": 5.19234693877551e-06,
"loss": 0.0299,
"step": 4650
},
{
"epoch": 2.139588100686499,
"grad_norm": 4.328442573547363,
"learning_rate": 5.185969387755103e-06,
"loss": 0.0504,
"step": 4675
},
{
"epoch": 2.151029748283753,
"grad_norm": 4.18818998336792,
"learning_rate": 5.179591836734694e-06,
"loss": 0.0371,
"step": 4700
},
{
"epoch": 2.1624713958810067,
"grad_norm": 4.5619587898254395,
"learning_rate": 5.173214285714286e-06,
"loss": 0.0514,
"step": 4725
},
{
"epoch": 2.1739130434782608,
"grad_norm": 2.6793291568756104,
"learning_rate": 5.166836734693878e-06,
"loss": 0.0415,
"step": 4750
},
{
"epoch": 2.185354691075515,
"grad_norm": 3.8424887657165527,
"learning_rate": 5.16045918367347e-06,
"loss": 0.0584,
"step": 4775
},
{
"epoch": 2.196796338672769,
"grad_norm": 1.9429773092269897,
"learning_rate": 5.154081632653061e-06,
"loss": 0.0361,
"step": 4800
},
{
"epoch": 2.208237986270023,
"grad_norm": 2.7046594619750977,
"learning_rate": 5.147704081632653e-06,
"loss": 0.0501,
"step": 4825
},
{
"epoch": 2.219679633867277,
"grad_norm": 0.6767886281013489,
"learning_rate": 5.141326530612245e-06,
"loss": 0.0284,
"step": 4850
},
{
"epoch": 2.2311212814645307,
"grad_norm": 1.749976634979248,
"learning_rate": 5.134948979591837e-06,
"loss": 0.0641,
"step": 4875
},
{
"epoch": 2.242562929061785,
"grad_norm": 5.310193061828613,
"learning_rate": 5.128571428571429e-06,
"loss": 0.0392,
"step": 4900
},
{
"epoch": 2.254004576659039,
"grad_norm": 3.5149447917938232,
"learning_rate": 5.122193877551021e-06,
"loss": 0.064,
"step": 4925
},
{
"epoch": 2.265446224256293,
"grad_norm": 4.282719135284424,
"learning_rate": 5.115816326530612e-06,
"loss": 0.0433,
"step": 4950
},
{
"epoch": 2.276887871853547,
"grad_norm": 2.836113929748535,
"learning_rate": 5.109438775510205e-06,
"loss": 0.0547,
"step": 4975
},
{
"epoch": 2.288329519450801,
"grad_norm": 3.2092835903167725,
"learning_rate": 5.103061224489796e-06,
"loss": 0.0339,
"step": 5000
},
{
"epoch": 2.288329519450801,
"eval_loss": 0.16944891214370728,
"eval_runtime": 5496.3202,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.217,
"eval_wer": 0.10248288219107954,
"step": 5000
},
{
"epoch": 2.2997711670480547,
"grad_norm": 2.923719882965088,
"learning_rate": 5.096683673469388e-06,
"loss": 0.063,
"step": 5025
},
{
"epoch": 2.311212814645309,
"grad_norm": 3.116055727005005,
"learning_rate": 5.090306122448979e-06,
"loss": 0.0394,
"step": 5050
},
{
"epoch": 2.322654462242563,
"grad_norm": 2.1266860961914062,
"learning_rate": 5.083928571428572e-06,
"loss": 0.0497,
"step": 5075
},
{
"epoch": 2.334096109839817,
"grad_norm": 3.964972972869873,
"learning_rate": 5.077551020408163e-06,
"loss": 0.0355,
"step": 5100
},
{
"epoch": 2.345537757437071,
"grad_norm": 3.5802409648895264,
"learning_rate": 5.071173469387756e-06,
"loss": 0.0483,
"step": 5125
},
{
"epoch": 2.356979405034325,
"grad_norm": 4.559951305389404,
"learning_rate": 5.064795918367347e-06,
"loss": 0.0365,
"step": 5150
},
{
"epoch": 2.3684210526315788,
"grad_norm": 2.1596357822418213,
"learning_rate": 5.0584183673469396e-06,
"loss": 0.0507,
"step": 5175
},
{
"epoch": 2.379862700228833,
"grad_norm": 6.24343204498291,
"learning_rate": 5.052040816326531e-06,
"loss": 0.0394,
"step": 5200
},
{
"epoch": 2.391304347826087,
"grad_norm": 3.8776490688323975,
"learning_rate": 5.045663265306123e-06,
"loss": 0.0519,
"step": 5225
},
{
"epoch": 2.402745995423341,
"grad_norm": 5.080620288848877,
"learning_rate": 5.039285714285714e-06,
"loss": 0.0372,
"step": 5250
},
{
"epoch": 2.414187643020595,
"grad_norm": 2.12448787689209,
"learning_rate": 5.032908163265307e-06,
"loss": 0.0578,
"step": 5275
},
{
"epoch": 2.425629290617849,
"grad_norm": 2.113422393798828,
"learning_rate": 5.026530612244898e-06,
"loss": 0.0381,
"step": 5300
},
{
"epoch": 2.437070938215103,
"grad_norm": 3.075899124145508,
"learning_rate": 5.02015306122449e-06,
"loss": 0.051,
"step": 5325
},
{
"epoch": 2.448512585812357,
"grad_norm": 4.633288860321045,
"learning_rate": 5.013775510204082e-06,
"loss": 0.0477,
"step": 5350
},
{
"epoch": 2.459954233409611,
"grad_norm": 3.1297614574432373,
"learning_rate": 5.007397959183674e-06,
"loss": 0.0518,
"step": 5375
},
{
"epoch": 2.471395881006865,
"grad_norm": 1.0485554933547974,
"learning_rate": 5.001020408163266e-06,
"loss": 0.047,
"step": 5400
},
{
"epoch": 2.482837528604119,
"grad_norm": 2.6167232990264893,
"learning_rate": 4.9946428571428575e-06,
"loss": 0.0515,
"step": 5425
},
{
"epoch": 2.494279176201373,
"grad_norm": 3.787649393081665,
"learning_rate": 4.988265306122449e-06,
"loss": 0.0406,
"step": 5450
},
{
"epoch": 2.505720823798627,
"grad_norm": 3.5073530673980713,
"learning_rate": 4.981887755102041e-06,
"loss": 0.0451,
"step": 5475
},
{
"epoch": 2.517162471395881,
"grad_norm": 3.0327556133270264,
"learning_rate": 4.975510204081633e-06,
"loss": 0.0381,
"step": 5500
},
{
"epoch": 2.528604118993135,
"grad_norm": 4.393261432647705,
"learning_rate": 4.9691326530612246e-06,
"loss": 0.0505,
"step": 5525
},
{
"epoch": 2.540045766590389,
"grad_norm": 1.906299114227295,
"learning_rate": 4.962755102040816e-06,
"loss": 0.0406,
"step": 5550
},
{
"epoch": 2.551487414187643,
"grad_norm": 3.095064401626587,
"learning_rate": 4.9563775510204085e-06,
"loss": 0.0545,
"step": 5575
},
{
"epoch": 2.5629290617848968,
"grad_norm": 2.1432371139526367,
"learning_rate": 4.950000000000001e-06,
"loss": 0.038,
"step": 5600
},
{
"epoch": 2.5743707093821513,
"grad_norm": 2.254021167755127,
"learning_rate": 4.9436224489795924e-06,
"loss": 0.05,
"step": 5625
},
{
"epoch": 2.585812356979405,
"grad_norm": 4.11069917678833,
"learning_rate": 4.937244897959184e-06,
"loss": 0.0484,
"step": 5650
},
{
"epoch": 2.597254004576659,
"grad_norm": 2.1790149211883545,
"learning_rate": 4.9308673469387755e-06,
"loss": 0.0584,
"step": 5675
},
{
"epoch": 2.608695652173913,
"grad_norm": 2.150855541229248,
"learning_rate": 4.924489795918368e-06,
"loss": 0.0355,
"step": 5700
},
{
"epoch": 2.620137299771167,
"grad_norm": 2.5849735736846924,
"learning_rate": 4.9181122448979595e-06,
"loss": 0.0486,
"step": 5725
},
{
"epoch": 2.6315789473684212,
"grad_norm": 3.598619222640991,
"learning_rate": 4.911734693877551e-06,
"loss": 0.043,
"step": 5750
},
{
"epoch": 2.643020594965675,
"grad_norm": 3.094461441040039,
"learning_rate": 4.9053571428571425e-06,
"loss": 0.0497,
"step": 5775
},
{
"epoch": 2.654462242562929,
"grad_norm": 6.460962772369385,
"learning_rate": 4.898979591836735e-06,
"loss": 0.0369,
"step": 5800
},
{
"epoch": 2.665903890160183,
"grad_norm": 2.5654125213623047,
"learning_rate": 4.892602040816327e-06,
"loss": 0.0517,
"step": 5825
},
{
"epoch": 2.677345537757437,
"grad_norm": 1.984237790107727,
"learning_rate": 4.886224489795919e-06,
"loss": 0.0374,
"step": 5850
},
{
"epoch": 2.688787185354691,
"grad_norm": 2.4424266815185547,
"learning_rate": 4.87984693877551e-06,
"loss": 0.0586,
"step": 5875
},
{
"epoch": 2.700228832951945,
"grad_norm": 3.130814552307129,
"learning_rate": 4.873469387755102e-06,
"loss": 0.0379,
"step": 5900
},
{
"epoch": 2.7116704805491993,
"grad_norm": 1.8555734157562256,
"learning_rate": 4.867091836734694e-06,
"loss": 0.0472,
"step": 5925
},
{
"epoch": 2.723112128146453,
"grad_norm": 2.3630707263946533,
"learning_rate": 4.860714285714286e-06,
"loss": 0.0412,
"step": 5950
},
{
"epoch": 2.734553775743707,
"grad_norm": 2.636423110961914,
"learning_rate": 4.8543367346938774e-06,
"loss": 0.0509,
"step": 5975
},
{
"epoch": 2.745995423340961,
"grad_norm": 6.911414623260498,
"learning_rate": 4.847959183673469e-06,
"loss": 0.0399,
"step": 6000
},
{
"epoch": 2.757437070938215,
"grad_norm": 3.120143413543701,
"learning_rate": 4.841581632653061e-06,
"loss": 0.0517,
"step": 6025
},
{
"epoch": 2.7688787185354693,
"grad_norm": 2.109907627105713,
"learning_rate": 4.835204081632654e-06,
"loss": 0.0299,
"step": 6050
},
{
"epoch": 2.780320366132723,
"grad_norm": 3.6579062938690186,
"learning_rate": 4.828826530612245e-06,
"loss": 0.0494,
"step": 6075
},
{
"epoch": 2.791762013729977,
"grad_norm": 0.9419932961463928,
"learning_rate": 4.822448979591837e-06,
"loss": 0.0296,
"step": 6100
},
{
"epoch": 2.803203661327231,
"grad_norm": 1.7163337469100952,
"learning_rate": 4.816071428571429e-06,
"loss": 0.0512,
"step": 6125
},
{
"epoch": 2.814645308924485,
"grad_norm": 3.6414005756378174,
"learning_rate": 4.809693877551021e-06,
"loss": 0.0516,
"step": 6150
},
{
"epoch": 2.8260869565217392,
"grad_norm": 2.7387633323669434,
"learning_rate": 4.803571428571429e-06,
"loss": 0.0491,
"step": 6175
},
{
"epoch": 2.837528604118993,
"grad_norm": 6.319782733917236,
"learning_rate": 4.7971938775510205e-06,
"loss": 0.0418,
"step": 6200
},
{
"epoch": 2.8489702517162474,
"grad_norm": 3.232647180557251,
"learning_rate": 4.790816326530612e-06,
"loss": 0.0492,
"step": 6225
},
{
"epoch": 2.860411899313501,
"grad_norm": 2.536766767501831,
"learning_rate": 4.784438775510204e-06,
"loss": 0.0394,
"step": 6250
},
{
"epoch": 2.871853546910755,
"grad_norm": 3.481765031814575,
"learning_rate": 4.778061224489796e-06,
"loss": 0.0563,
"step": 6275
},
{
"epoch": 2.883295194508009,
"grad_norm": 2.599041700363159,
"learning_rate": 4.771683673469388e-06,
"loss": 0.041,
"step": 6300
},
{
"epoch": 2.8947368421052633,
"grad_norm": 2.9180805683135986,
"learning_rate": 4.76530612244898e-06,
"loss": 0.0455,
"step": 6325
},
{
"epoch": 2.9061784897025174,
"grad_norm": 5.283931255340576,
"learning_rate": 4.758928571428571e-06,
"loss": 0.045,
"step": 6350
},
{
"epoch": 2.917620137299771,
"grad_norm": 3.000849485397339,
"learning_rate": 4.752551020408164e-06,
"loss": 0.0451,
"step": 6375
},
{
"epoch": 2.929061784897025,
"grad_norm": 5.314061641693115,
"learning_rate": 4.746173469387755e-06,
"loss": 0.0401,
"step": 6400
},
{
"epoch": 2.940503432494279,
"grad_norm": 4.263325214385986,
"learning_rate": 4.739795918367347e-06,
"loss": 0.0481,
"step": 6425
},
{
"epoch": 2.9519450800915332,
"grad_norm": 3.5418529510498047,
"learning_rate": 4.7334183673469384e-06,
"loss": 0.0494,
"step": 6450
},
{
"epoch": 2.9633867276887873,
"grad_norm": 3.0929176807403564,
"learning_rate": 4.727040816326531e-06,
"loss": 0.0583,
"step": 6475
},
{
"epoch": 2.974828375286041,
"grad_norm": 3.1736650466918945,
"learning_rate": 4.720663265306122e-06,
"loss": 0.0377,
"step": 6500
},
{
"epoch": 2.9862700228832955,
"grad_norm": 2.7548913955688477,
"learning_rate": 4.714285714285715e-06,
"loss": 0.0385,
"step": 6525
},
{
"epoch": 2.997711670480549,
"grad_norm": 3.6645278930664062,
"learning_rate": 4.707908163265306e-06,
"loss": 0.0454,
"step": 6550
},
{
"epoch": 3.009153318077803,
"grad_norm": 1.9902344942092896,
"learning_rate": 4.701530612244899e-06,
"loss": 0.0287,
"step": 6575
},
{
"epoch": 3.0205949656750573,
"grad_norm": 7.351073265075684,
"learning_rate": 4.69515306122449e-06,
"loss": 0.0237,
"step": 6600
},
{
"epoch": 3.0320366132723113,
"grad_norm": 3.1090986728668213,
"learning_rate": 4.688775510204082e-06,
"loss": 0.0232,
"step": 6625
},
{
"epoch": 3.0434782608695654,
"grad_norm": 1.9417141675949097,
"learning_rate": 4.682397959183673e-06,
"loss": 0.0227,
"step": 6650
},
{
"epoch": 3.054919908466819,
"grad_norm": 4.676495552062988,
"learning_rate": 4.676020408163266e-06,
"loss": 0.0369,
"step": 6675
},
{
"epoch": 3.066361556064073,
"grad_norm": 1.6039929389953613,
"learning_rate": 4.669642857142857e-06,
"loss": 0.0184,
"step": 6700
},
{
"epoch": 3.077803203661327,
"grad_norm": 1.7776933908462524,
"learning_rate": 4.663265306122449e-06,
"loss": 0.0244,
"step": 6725
},
{
"epoch": 3.0892448512585813,
"grad_norm": 4.828967571258545,
"learning_rate": 4.656887755102041e-06,
"loss": 0.0312,
"step": 6750
},
{
"epoch": 3.1006864988558354,
"grad_norm": 8.169689178466797,
"learning_rate": 4.650510204081633e-06,
"loss": 0.0328,
"step": 6775
},
{
"epoch": 3.1121281464530894,
"grad_norm": 3.101473093032837,
"learning_rate": 4.644132653061225e-06,
"loss": 0.016,
"step": 6800
},
{
"epoch": 3.123569794050343,
"grad_norm": 0.8072414398193359,
"learning_rate": 4.637755102040817e-06,
"loss": 0.0299,
"step": 6825
},
{
"epoch": 3.135011441647597,
"grad_norm": 1.684613585472107,
"learning_rate": 4.631377551020408e-06,
"loss": 0.03,
"step": 6850
},
{
"epoch": 3.1464530892448512,
"grad_norm": 4.120328426361084,
"learning_rate": 4.625e-06,
"loss": 0.0336,
"step": 6875
},
{
"epoch": 3.1578947368421053,
"grad_norm": 3.2325098514556885,
"learning_rate": 4.618622448979592e-06,
"loss": 0.0197,
"step": 6900
},
{
"epoch": 3.1693363844393594,
"grad_norm": 2.3901312351226807,
"learning_rate": 4.612244897959184e-06,
"loss": 0.0251,
"step": 6925
},
{
"epoch": 3.1807780320366135,
"grad_norm": 3.9952516555786133,
"learning_rate": 4.605867346938775e-06,
"loss": 0.0322,
"step": 6950
},
{
"epoch": 3.192219679633867,
"grad_norm": 3.828606605529785,
"learning_rate": 4.599489795918368e-06,
"loss": 0.0406,
"step": 6975
},
{
"epoch": 3.203661327231121,
"grad_norm": 1.8031266927719116,
"learning_rate": 4.59311224489796e-06,
"loss": 0.0236,
"step": 7000
},
{
"epoch": 3.2151029748283753,
"grad_norm": 3.989572286605835,
"learning_rate": 4.5867346938775516e-06,
"loss": 0.0289,
"step": 7025
},
{
"epoch": 3.2265446224256293,
"grad_norm": 5.1198296546936035,
"learning_rate": 4.580357142857143e-06,
"loss": 0.0257,
"step": 7050
},
{
"epoch": 3.2379862700228834,
"grad_norm": 3.9181671142578125,
"learning_rate": 4.573979591836735e-06,
"loss": 0.0344,
"step": 7075
},
{
"epoch": 3.2494279176201375,
"grad_norm": 2.100790500640869,
"learning_rate": 4.567602040816327e-06,
"loss": 0.0182,
"step": 7100
},
{
"epoch": 3.260869565217391,
"grad_norm": 1.8334282636642456,
"learning_rate": 4.561224489795919e-06,
"loss": 0.0287,
"step": 7125
},
{
"epoch": 3.272311212814645,
"grad_norm": 4.7145771980285645,
"learning_rate": 4.55484693877551e-06,
"loss": 0.0339,
"step": 7150
},
{
"epoch": 3.2837528604118993,
"grad_norm": 3.8231897354125977,
"learning_rate": 4.548469387755102e-06,
"loss": 0.0243,
"step": 7175
},
{
"epoch": 3.2951945080091534,
"grad_norm": 5.662996768951416,
"learning_rate": 4.542091836734694e-06,
"loss": 0.0281,
"step": 7200
},
{
"epoch": 3.3066361556064074,
"grad_norm": 1.8065590858459473,
"learning_rate": 4.5357142857142865e-06,
"loss": 0.0361,
"step": 7225
},
{
"epoch": 3.3180778032036615,
"grad_norm": 4.327584266662598,
"learning_rate": 4.529336734693878e-06,
"loss": 0.0285,
"step": 7250
},
{
"epoch": 3.329519450800915,
"grad_norm": 3.929110527038574,
"learning_rate": 4.5229591836734695e-06,
"loss": 0.029,
"step": 7275
},
{
"epoch": 3.3409610983981692,
"grad_norm": 4.633998870849609,
"learning_rate": 4.516581632653061e-06,
"loss": 0.0277,
"step": 7300
},
{
"epoch": 3.3524027459954233,
"grad_norm": 1.1148935556411743,
"learning_rate": 4.5102040816326535e-06,
"loss": 0.0319,
"step": 7325
},
{
"epoch": 3.3638443935926774,
"grad_norm": 8.381105422973633,
"learning_rate": 4.503826530612245e-06,
"loss": 0.0214,
"step": 7350
},
{
"epoch": 3.3752860411899315,
"grad_norm": 3.111279010772705,
"learning_rate": 4.4974489795918366e-06,
"loss": 0.0341,
"step": 7375
},
{
"epoch": 3.386727688787185,
"grad_norm": 2.937441349029541,
"learning_rate": 4.491071428571428e-06,
"loss": 0.0228,
"step": 7400
},
{
"epoch": 3.398169336384439,
"grad_norm": 2.1108193397521973,
"learning_rate": 4.484693877551021e-06,
"loss": 0.0276,
"step": 7425
},
{
"epoch": 3.4096109839816933,
"grad_norm": 5.9128031730651855,
"learning_rate": 4.478316326530613e-06,
"loss": 0.0273,
"step": 7450
},
{
"epoch": 3.4210526315789473,
"grad_norm": 0.7438346743583679,
"learning_rate": 4.4719387755102044e-06,
"loss": 0.0305,
"step": 7475
},
{
"epoch": 3.4324942791762014,
"grad_norm": 7.93667459487915,
"learning_rate": 4.465561224489796e-06,
"loss": 0.0265,
"step": 7500
},
{
"epoch": 3.4439359267734555,
"grad_norm": 4.7441277503967285,
"learning_rate": 4.459183673469388e-06,
"loss": 0.0285,
"step": 7525
},
{
"epoch": 3.4553775743707096,
"grad_norm": 6.173753261566162,
"learning_rate": 4.45280612244898e-06,
"loss": 0.0341,
"step": 7550
},
{
"epoch": 3.466819221967963,
"grad_norm": 1.397835373878479,
"learning_rate": 4.4464285714285715e-06,
"loss": 0.0304,
"step": 7575
},
{
"epoch": 3.4782608695652173,
"grad_norm": 1.049957275390625,
"learning_rate": 4.440051020408163e-06,
"loss": 0.0315,
"step": 7600
},
{
"epoch": 3.4897025171624714,
"grad_norm": 1.9728070497512817,
"learning_rate": 4.433673469387755e-06,
"loss": 0.0345,
"step": 7625
},
{
"epoch": 3.5011441647597255,
"grad_norm": 3.764899492263794,
"learning_rate": 4.427295918367347e-06,
"loss": 0.0312,
"step": 7650
},
{
"epoch": 3.5125858123569795,
"grad_norm": 3.4769232273101807,
"learning_rate": 4.420918367346939e-06,
"loss": 0.0314,
"step": 7675
},
{
"epoch": 3.524027459954233,
"grad_norm": 2.4961886405944824,
"learning_rate": 4.414540816326531e-06,
"loss": 0.0269,
"step": 7700
},
{
"epoch": 3.5354691075514877,
"grad_norm": 2.6600871086120605,
"learning_rate": 4.408163265306122e-06,
"loss": 0.0304,
"step": 7725
},
{
"epoch": 3.5469107551487413,
"grad_norm": 3.568831205368042,
"learning_rate": 4.401785714285715e-06,
"loss": 0.026,
"step": 7750
},
{
"epoch": 3.5583524027459954,
"grad_norm": 2.488873243331909,
"learning_rate": 4.395408163265306e-06,
"loss": 0.0319,
"step": 7775
},
{
"epoch": 3.5697940503432495,
"grad_norm": 3.0144600868225098,
"learning_rate": 4.389030612244898e-06,
"loss": 0.0241,
"step": 7800
},
{
"epoch": 3.5812356979405036,
"grad_norm": 5.78762149810791,
"learning_rate": 4.3826530612244894e-06,
"loss": 0.0335,
"step": 7825
},
{
"epoch": 3.5926773455377576,
"grad_norm": 7.364430904388428,
"learning_rate": 4.376275510204082e-06,
"loss": 0.03,
"step": 7850
},
{
"epoch": 3.6041189931350113,
"grad_norm": 1.7618129253387451,
"learning_rate": 4.369897959183673e-06,
"loss": 0.0269,
"step": 7875
},
{
"epoch": 3.6155606407322654,
"grad_norm": 2.5900871753692627,
"learning_rate": 4.363520408163266e-06,
"loss": 0.021,
"step": 7900
},
{
"epoch": 3.6270022883295194,
"grad_norm": 1.509843111038208,
"learning_rate": 4.357142857142857e-06,
"loss": 0.0355,
"step": 7925
},
{
"epoch": 3.6384439359267735,
"grad_norm": 7.716503143310547,
"learning_rate": 4.35076530612245e-06,
"loss": 0.0257,
"step": 7950
},
{
"epoch": 3.6498855835240276,
"grad_norm": 1.4650408029556274,
"learning_rate": 4.344387755102041e-06,
"loss": 0.0377,
"step": 7975
},
{
"epoch": 3.6613272311212812,
"grad_norm": 2.744730234146118,
"learning_rate": 4.338010204081633e-06,
"loss": 0.0277,
"step": 8000
},
{
"epoch": 3.6727688787185357,
"grad_norm": 3.6976919174194336,
"learning_rate": 4.331632653061224e-06,
"loss": 0.0282,
"step": 8025
},
{
"epoch": 3.6842105263157894,
"grad_norm": 7.903183460235596,
"learning_rate": 4.325255102040817e-06,
"loss": 0.0324,
"step": 8050
},
{
"epoch": 3.6956521739130435,
"grad_norm": 2.9782605171203613,
"learning_rate": 4.318877551020408e-06,
"loss": 0.0304,
"step": 8075
},
{
"epoch": 3.7070938215102975,
"grad_norm": 3.6786653995513916,
"learning_rate": 4.3125e-06,
"loss": 0.0304,
"step": 8100
},
{
"epoch": 3.7185354691075516,
"grad_norm": 2.000511884689331,
"learning_rate": 4.306122448979592e-06,
"loss": 0.0266,
"step": 8125
},
{
"epoch": 3.7299771167048057,
"grad_norm": 3.0682075023651123,
"learning_rate": 4.299744897959184e-06,
"loss": 0.0314,
"step": 8150
},
{
"epoch": 3.7414187643020593,
"grad_norm": 3.421633720397949,
"learning_rate": 4.293367346938776e-06,
"loss": 0.0296,
"step": 8175
},
{
"epoch": 3.7528604118993134,
"grad_norm": 1.5098000764846802,
"learning_rate": 4.286989795918368e-06,
"loss": 0.0211,
"step": 8200
},
{
"epoch": 3.7643020594965675,
"grad_norm": 1.048941731452942,
"learning_rate": 4.280612244897959e-06,
"loss": 0.032,
"step": 8225
},
{
"epoch": 3.7757437070938216,
"grad_norm": 13.178288459777832,
"learning_rate": 4.274234693877551e-06,
"loss": 0.0266,
"step": 8250
},
{
"epoch": 3.7871853546910756,
"grad_norm": 2.7297298908233643,
"learning_rate": 4.267857142857143e-06,
"loss": 0.0254,
"step": 8275
},
{
"epoch": 3.7986270022883293,
"grad_norm": 5.409793376922607,
"learning_rate": 4.261479591836735e-06,
"loss": 0.0283,
"step": 8300
},
{
"epoch": 3.8100686498855834,
"grad_norm": 1.6612489223480225,
"learning_rate": 4.255102040816326e-06,
"loss": 0.0263,
"step": 8325
},
{
"epoch": 3.8215102974828374,
"grad_norm": 9.250696182250977,
"learning_rate": 4.248724489795919e-06,
"loss": 0.0265,
"step": 8350
},
{
"epoch": 3.8329519450800915,
"grad_norm": 4.991941928863525,
"learning_rate": 4.242346938775511e-06,
"loss": 0.0314,
"step": 8375
},
{
"epoch": 3.8443935926773456,
"grad_norm": 2.8890774250030518,
"learning_rate": 4.2359693877551025e-06,
"loss": 0.0222,
"step": 8400
},
{
"epoch": 3.8558352402745997,
"grad_norm": 2.2660281658172607,
"learning_rate": 4.229591836734694e-06,
"loss": 0.0281,
"step": 8425
},
{
"epoch": 3.8672768878718538,
"grad_norm": 2.7722203731536865,
"learning_rate": 4.223214285714286e-06,
"loss": 0.0315,
"step": 8450
},
{
"epoch": 3.8787185354691074,
"grad_norm": 3.3059165477752686,
"learning_rate": 4.216836734693878e-06,
"loss": 0.0349,
"step": 8475
},
{
"epoch": 3.8901601830663615,
"grad_norm": 4.720456600189209,
"learning_rate": 4.2104591836734696e-06,
"loss": 0.0171,
"step": 8500
},
{
"epoch": 3.9016018306636155,
"grad_norm": 2.5770797729492188,
"learning_rate": 4.204081632653061e-06,
"loss": 0.0285,
"step": 8525
},
{
"epoch": 3.9130434782608696,
"grad_norm": 7.210494041442871,
"learning_rate": 4.197704081632653e-06,
"loss": 0.0243,
"step": 8550
},
{
"epoch": 3.9244851258581237,
"grad_norm": 0.5488982796669006,
"learning_rate": 4.191326530612245e-06,
"loss": 0.0293,
"step": 8575
},
{
"epoch": 3.9359267734553773,
"grad_norm": 4.990447044372559,
"learning_rate": 4.1849489795918374e-06,
"loss": 0.0262,
"step": 8600
},
{
"epoch": 3.9473684210526314,
"grad_norm": 4.837020397186279,
"learning_rate": 4.178571428571429e-06,
"loss": 0.0366,
"step": 8625
},
{
"epoch": 3.9588100686498855,
"grad_norm": 8.747390747070312,
"learning_rate": 4.1721938775510205e-06,
"loss": 0.0228,
"step": 8650
},
{
"epoch": 3.9702517162471396,
"grad_norm": 0.8226907849311829,
"learning_rate": 4.166071428571429e-06,
"loss": 0.0315,
"step": 8675
},
{
"epoch": 3.9816933638443937,
"grad_norm": 4.007413864135742,
"learning_rate": 4.159693877551021e-06,
"loss": 0.0251,
"step": 8700
},
{
"epoch": 3.9931350114416477,
"grad_norm": 2.9124577045440674,
"learning_rate": 4.153316326530613e-06,
"loss": 0.0287,
"step": 8725
},
{
"epoch": 4.004576659038902,
"grad_norm": 2.2100229263305664,
"learning_rate": 4.146938775510204e-06,
"loss": 0.0337,
"step": 8750
},
{
"epoch": 4.016018306636155,
"grad_norm": 1.504098653793335,
"learning_rate": 4.140561224489796e-06,
"loss": 0.0166,
"step": 8775
},
{
"epoch": 4.02745995423341,
"grad_norm": 2.594133138656616,
"learning_rate": 4.134183673469388e-06,
"loss": 0.0193,
"step": 8800
},
{
"epoch": 4.038901601830664,
"grad_norm": 3.2971105575561523,
"learning_rate": 4.12780612244898e-06,
"loss": 0.0116,
"step": 8825
},
{
"epoch": 4.050343249427917,
"grad_norm": 2.0989925861358643,
"learning_rate": 4.121428571428572e-06,
"loss": 0.0199,
"step": 8850
},
{
"epoch": 4.061784897025172,
"grad_norm": 2.077739715576172,
"learning_rate": 4.115306122448979e-06,
"loss": 0.0132,
"step": 8875
},
{
"epoch": 4.073226544622425,
"grad_norm": 2.2226176261901855,
"learning_rate": 4.108928571428572e-06,
"loss": 0.0203,
"step": 8900
},
{
"epoch": 4.08466819221968,
"grad_norm": 5.787581443786621,
"learning_rate": 4.102551020408163e-06,
"loss": 0.0194,
"step": 8925
},
{
"epoch": 4.0961098398169336,
"grad_norm": 1.7698355913162231,
"learning_rate": 4.096173469387756e-06,
"loss": 0.0225,
"step": 8950
},
{
"epoch": 4.107551487414188,
"grad_norm": 7.658657073974609,
"learning_rate": 4.089795918367347e-06,
"loss": 0.017,
"step": 8975
},
{
"epoch": 4.118993135011442,
"grad_norm": 1.4201191663742065,
"learning_rate": 4.083418367346939e-06,
"loss": 0.025,
"step": 9000
},
{
"epoch": 4.130434782608695,
"grad_norm": 1.703041434288025,
"learning_rate": 4.07704081632653e-06,
"loss": 0.0114,
"step": 9025
},
{
"epoch": 4.14187643020595,
"grad_norm": 0.8880120515823364,
"learning_rate": 4.070663265306123e-06,
"loss": 0.0247,
"step": 9050
},
{
"epoch": 4.1533180778032035,
"grad_norm": 2.5007543563842773,
"learning_rate": 4.064285714285714e-06,
"loss": 0.0221,
"step": 9075
},
{
"epoch": 4.164759725400458,
"grad_norm": 9.973050117492676,
"learning_rate": 4.057908163265306e-06,
"loss": 0.0212,
"step": 9100
},
{
"epoch": 4.176201372997712,
"grad_norm": 3.308204412460327,
"learning_rate": 4.051530612244898e-06,
"loss": 0.0117,
"step": 9125
},
{
"epoch": 4.187643020594965,
"grad_norm": 1.699859380722046,
"learning_rate": 4.0451530612244905e-06,
"loss": 0.0227,
"step": 9150
},
{
"epoch": 4.19908466819222,
"grad_norm": 1.368072271347046,
"learning_rate": 4.038775510204082e-06,
"loss": 0.0105,
"step": 9175
},
{
"epoch": 4.2105263157894735,
"grad_norm": 0.6647465229034424,
"learning_rate": 4.032397959183674e-06,
"loss": 0.022,
"step": 9200
},
{
"epoch": 4.221967963386728,
"grad_norm": 4.245569705963135,
"learning_rate": 4.026020408163265e-06,
"loss": 0.0151,
"step": 9225
},
{
"epoch": 4.233409610983982,
"grad_norm": 0.7587606906890869,
"learning_rate": 4.0196428571428576e-06,
"loss": 0.0235,
"step": 9250
},
{
"epoch": 4.244851258581235,
"grad_norm": 2.736632823944092,
"learning_rate": 4.013265306122449e-06,
"loss": 0.0157,
"step": 9275
},
{
"epoch": 4.25629290617849,
"grad_norm": 2.0381782054901123,
"learning_rate": 4.006887755102041e-06,
"loss": 0.0188,
"step": 9300
},
{
"epoch": 4.267734553775743,
"grad_norm": 1.4942119121551514,
"learning_rate": 4.000510204081632e-06,
"loss": 0.0101,
"step": 9325
},
{
"epoch": 4.279176201372998,
"grad_norm": 3.967541217803955,
"learning_rate": 3.994132653061225e-06,
"loss": 0.021,
"step": 9350
},
{
"epoch": 4.290617848970252,
"grad_norm": 0.7107511758804321,
"learning_rate": 3.987755102040817e-06,
"loss": 0.0106,
"step": 9375
},
{
"epoch": 4.302059496567506,
"grad_norm": 1.9319146871566772,
"learning_rate": 3.9813775510204085e-06,
"loss": 0.0278,
"step": 9400
},
{
"epoch": 4.31350114416476,
"grad_norm": 4.877601146697998,
"learning_rate": 3.975e-06,
"loss": 0.0157,
"step": 9425
},
{
"epoch": 4.324942791762013,
"grad_norm": 3.034367799758911,
"learning_rate": 3.968622448979592e-06,
"loss": 0.0248,
"step": 9450
},
{
"epoch": 4.336384439359268,
"grad_norm": 0.23764902353286743,
"learning_rate": 3.962244897959184e-06,
"loss": 0.0105,
"step": 9475
},
{
"epoch": 4.3478260869565215,
"grad_norm": 3.390343189239502,
"learning_rate": 3.9558673469387755e-06,
"loss": 0.0252,
"step": 9500
},
{
"epoch": 4.359267734553776,
"grad_norm": 0.8049426078796387,
"learning_rate": 3.949489795918367e-06,
"loss": 0.0127,
"step": 9525
},
{
"epoch": 4.37070938215103,
"grad_norm": 2.284611463546753,
"learning_rate": 3.9431122448979595e-06,
"loss": 0.0295,
"step": 9550
},
{
"epoch": 4.382151029748284,
"grad_norm": 1.1632159948349,
"learning_rate": 3.936734693877552e-06,
"loss": 0.014,
"step": 9575
},
{
"epoch": 4.393592677345538,
"grad_norm": 3.538940906524658,
"learning_rate": 3.930357142857143e-06,
"loss": 0.0254,
"step": 9600
},
{
"epoch": 4.4050343249427915,
"grad_norm": 3.8108420372009277,
"learning_rate": 3.923979591836735e-06,
"loss": 0.0131,
"step": 9625
},
{
"epoch": 4.416475972540046,
"grad_norm": 0.45241427421569824,
"learning_rate": 3.9176020408163265e-06,
"loss": 0.0201,
"step": 9650
},
{
"epoch": 4.4279176201373,
"grad_norm": 3.477997064590454,
"learning_rate": 3.911224489795919e-06,
"loss": 0.0174,
"step": 9675
},
{
"epoch": 4.439359267734554,
"grad_norm": 1.1490719318389893,
"learning_rate": 3.9048469387755104e-06,
"loss": 0.0151,
"step": 9700
},
{
"epoch": 4.450800915331808,
"grad_norm": 7.397353649139404,
"learning_rate": 3.898469387755102e-06,
"loss": 0.0169,
"step": 9725
},
{
"epoch": 4.462242562929061,
"grad_norm": 0.5268431305885315,
"learning_rate": 3.8920918367346935e-06,
"loss": 0.0246,
"step": 9750
},
{
"epoch": 4.473684210526316,
"grad_norm": 1.0522817373275757,
"learning_rate": 3.885714285714286e-06,
"loss": 0.0159,
"step": 9775
},
{
"epoch": 4.48512585812357,
"grad_norm": 1.1104140281677246,
"learning_rate": 3.879336734693878e-06,
"loss": 0.0312,
"step": 9800
},
{
"epoch": 4.496567505720824,
"grad_norm": 11.163028717041016,
"learning_rate": 3.87295918367347e-06,
"loss": 0.0169,
"step": 9825
},
{
"epoch": 4.508009153318078,
"grad_norm": 3.165876865386963,
"learning_rate": 3.866581632653061e-06,
"loss": 0.0159,
"step": 9850
},
{
"epoch": 4.519450800915331,
"grad_norm": 3.8829312324523926,
"learning_rate": 3.860204081632653e-06,
"loss": 0.0163,
"step": 9875
},
{
"epoch": 4.530892448512586,
"grad_norm": 3.46303129196167,
"learning_rate": 3.853826530612245e-06,
"loss": 0.0279,
"step": 9900
},
{
"epoch": 4.5423340961098395,
"grad_norm": 1.1138535737991333,
"learning_rate": 3.847448979591837e-06,
"loss": 0.0154,
"step": 9925
},
{
"epoch": 4.553775743707094,
"grad_norm": 4.287326335906982,
"learning_rate": 3.841071428571428e-06,
"loss": 0.0193,
"step": 9950
},
{
"epoch": 4.565217391304348,
"grad_norm": 0.4523741602897644,
"learning_rate": 3.83469387755102e-06,
"loss": 0.0126,
"step": 9975
},
{
"epoch": 4.576659038901602,
"grad_norm": 0.8860822916030884,
"learning_rate": 3.828316326530612e-06,
"loss": 0.0281,
"step": 10000
},
{
"epoch": 4.576659038901602,
"eval_loss": 0.1852317899465561,
"eval_runtime": 5272.523,
"eval_samples_per_second": 1.806,
"eval_steps_per_second": 0.226,
"eval_wer": 0.10049913611057784,
"step": 10000
},
{
"epoch": 4.588100686498856,
"grad_norm": 2.3640730381011963,
"learning_rate": 3.821938775510205e-06,
"loss": 0.0134,
"step": 10025
},
{
"epoch": 4.5995423340961095,
"grad_norm": 0.9958900809288025,
"learning_rate": 3.815561224489796e-06,
"loss": 0.017,
"step": 10050
},
{
"epoch": 4.610983981693364,
"grad_norm": 4.021085739135742,
"learning_rate": 3.8091836734693882e-06,
"loss": 0.0169,
"step": 10075
},
{
"epoch": 4.622425629290618,
"grad_norm": 1.908926010131836,
"learning_rate": 3.8028061224489798e-06,
"loss": 0.0208,
"step": 10100
},
{
"epoch": 4.633867276887872,
"grad_norm": 4.975993633270264,
"learning_rate": 3.7964285714285717e-06,
"loss": 0.0133,
"step": 10125
},
{
"epoch": 4.645308924485126,
"grad_norm": 0.652805745601654,
"learning_rate": 3.7900510204081633e-06,
"loss": 0.0208,
"step": 10150
},
{
"epoch": 4.65675057208238,
"grad_norm": 1.805553913116455,
"learning_rate": 3.7836734693877553e-06,
"loss": 0.0132,
"step": 10175
},
{
"epoch": 4.668192219679634,
"grad_norm": 0.581511378288269,
"learning_rate": 3.777295918367347e-06,
"loss": 0.0243,
"step": 10200
},
{
"epoch": 4.679633867276888,
"grad_norm": 4.517154693603516,
"learning_rate": 3.7709183673469388e-06,
"loss": 0.0142,
"step": 10225
},
{
"epoch": 4.691075514874142,
"grad_norm": 3.5025482177734375,
"learning_rate": 3.764540816326531e-06,
"loss": 0.0258,
"step": 10250
},
{
"epoch": 4.702517162471396,
"grad_norm": 2.5942628383636475,
"learning_rate": 3.7581632653061227e-06,
"loss": 0.0121,
"step": 10275
},
{
"epoch": 4.71395881006865,
"grad_norm": 0.7120451927185059,
"learning_rate": 3.7517857142857147e-06,
"loss": 0.0254,
"step": 10300
},
{
"epoch": 4.725400457665904,
"grad_norm": 8.78650951385498,
"learning_rate": 3.745408163265306e-06,
"loss": 0.0141,
"step": 10325
},
{
"epoch": 4.7368421052631575,
"grad_norm": 1.8899227380752563,
"learning_rate": 3.739030612244898e-06,
"loss": 0.0264,
"step": 10350
},
{
"epoch": 4.748283752860412,
"grad_norm": 0.5672324895858765,
"learning_rate": 3.7326530612244897e-06,
"loss": 0.0095,
"step": 10375
},
{
"epoch": 4.759725400457666,
"grad_norm": 0.8572821617126465,
"learning_rate": 3.7262755102040817e-06,
"loss": 0.0236,
"step": 10400
},
{
"epoch": 4.77116704805492,
"grad_norm": 0.23919886350631714,
"learning_rate": 3.7198979591836732e-06,
"loss": 0.0117,
"step": 10425
},
{
"epoch": 4.782608695652174,
"grad_norm": 0.6619164943695068,
"learning_rate": 3.713520408163265e-06,
"loss": 0.0256,
"step": 10450
},
{
"epoch": 4.7940503432494275,
"grad_norm": 1.8749737739562988,
"learning_rate": 3.7071428571428576e-06,
"loss": 0.0132,
"step": 10475
},
{
"epoch": 4.805491990846682,
"grad_norm": 3.1002092361450195,
"learning_rate": 3.7007653061224496e-06,
"loss": 0.0264,
"step": 10500
},
{
"epoch": 4.816933638443936,
"grad_norm": 1.090328335762024,
"learning_rate": 3.694387755102041e-06,
"loss": 0.0166,
"step": 10525
},
{
"epoch": 4.82837528604119,
"grad_norm": 0.8507818579673767,
"learning_rate": 3.688010204081633e-06,
"loss": 0.0206,
"step": 10550
},
{
"epoch": 4.839816933638444,
"grad_norm": 2.137056589126587,
"learning_rate": 3.6816326530612246e-06,
"loss": 0.0114,
"step": 10575
},
{
"epoch": 4.851258581235698,
"grad_norm": 3.8611929416656494,
"learning_rate": 3.6752551020408166e-06,
"loss": 0.0214,
"step": 10600
},
{
"epoch": 4.862700228832952,
"grad_norm": 5.636245250701904,
"learning_rate": 3.668877551020408e-06,
"loss": 0.017,
"step": 10625
},
{
"epoch": 4.874141876430206,
"grad_norm": 1.076035499572754,
"learning_rate": 3.6625e-06,
"loss": 0.0316,
"step": 10650
},
{
"epoch": 4.88558352402746,
"grad_norm": 2.079132556915283,
"learning_rate": 3.6561224489795916e-06,
"loss": 0.0136,
"step": 10675
},
{
"epoch": 4.897025171624714,
"grad_norm": 2.008894443511963,
"learning_rate": 3.649744897959184e-06,
"loss": 0.0239,
"step": 10700
},
{
"epoch": 4.908466819221968,
"grad_norm": 0.4755445420742035,
"learning_rate": 3.643367346938776e-06,
"loss": 0.0131,
"step": 10725
},
{
"epoch": 4.919908466819222,
"grad_norm": 2.093269109725952,
"learning_rate": 3.6369897959183675e-06,
"loss": 0.023,
"step": 10750
},
{
"epoch": 4.931350114416476,
"grad_norm": 0.36701831221580505,
"learning_rate": 3.6306122448979595e-06,
"loss": 0.0137,
"step": 10775
},
{
"epoch": 4.94279176201373,
"grad_norm": 2.4676878452301025,
"learning_rate": 3.624234693877551e-06,
"loss": 0.0324,
"step": 10800
},
{
"epoch": 4.954233409610984,
"grad_norm": 2.1514317989349365,
"learning_rate": 3.617857142857143e-06,
"loss": 0.0163,
"step": 10825
},
{
"epoch": 4.965675057208238,
"grad_norm": 0.4548572897911072,
"learning_rate": 3.6114795918367346e-06,
"loss": 0.0225,
"step": 10850
},
{
"epoch": 4.977116704805492,
"grad_norm": 4.841091632843018,
"learning_rate": 3.6051020408163265e-06,
"loss": 0.015,
"step": 10875
},
{
"epoch": 4.988558352402746,
"grad_norm": 1.0846531391143799,
"learning_rate": 3.598724489795918e-06,
"loss": 0.0241,
"step": 10900
},
{
"epoch": 5.0,
"grad_norm": 3.79087495803833,
"learning_rate": 3.592346938775511e-06,
"loss": 0.0228,
"step": 10925
},
{
"epoch": 5.011441647597254,
"grad_norm": 7.431537628173828,
"learning_rate": 3.5859693877551024e-06,
"loss": 0.0131,
"step": 10950
},
{
"epoch": 5.022883295194508,
"grad_norm": 9.645064353942871,
"learning_rate": 3.5795918367346944e-06,
"loss": 0.0099,
"step": 10975
},
{
"epoch": 5.034324942791762,
"grad_norm": 4.066143035888672,
"learning_rate": 3.573214285714286e-06,
"loss": 0.0122,
"step": 11000
},
{
"epoch": 5.045766590389016,
"grad_norm": 15.450593948364258,
"learning_rate": 3.566836734693878e-06,
"loss": 0.0238,
"step": 11025
},
{
"epoch": 5.05720823798627,
"grad_norm": 4.155067443847656,
"learning_rate": 3.5604591836734694e-06,
"loss": 0.0146,
"step": 11050
},
{
"epoch": 5.068649885583524,
"grad_norm": 1.2085702419281006,
"learning_rate": 3.5540816326530614e-06,
"loss": 0.0205,
"step": 11075
},
{
"epoch": 5.080091533180778,
"grad_norm": 2.0568387508392334,
"learning_rate": 3.547704081632653e-06,
"loss": 0.0104,
"step": 11100
},
{
"epoch": 5.091533180778032,
"grad_norm": 2.4178097248077393,
"learning_rate": 3.541326530612245e-06,
"loss": 0.0152,
"step": 11125
},
{
"epoch": 5.102974828375286,
"grad_norm": 6.744433879852295,
"learning_rate": 3.5349489795918373e-06,
"loss": 0.0118,
"step": 11150
},
{
"epoch": 5.11441647597254,
"grad_norm": 2.1950619220733643,
"learning_rate": 3.528571428571429e-06,
"loss": 0.013,
"step": 11175
},
{
"epoch": 5.125858123569794,
"grad_norm": 7.050522804260254,
"learning_rate": 3.522193877551021e-06,
"loss": 0.0117,
"step": 11200
},
{
"epoch": 5.137299771167048,
"grad_norm": 18.47918128967285,
"learning_rate": 3.5158163265306124e-06,
"loss": 0.0153,
"step": 11225
},
{
"epoch": 5.148741418764302,
"grad_norm": 0.19902612268924713,
"learning_rate": 3.5094387755102043e-06,
"loss": 0.0111,
"step": 11250
},
{
"epoch": 5.160183066361556,
"grad_norm": 4.996516704559326,
"learning_rate": 3.503061224489796e-06,
"loss": 0.0147,
"step": 11275
},
{
"epoch": 5.17162471395881,
"grad_norm": 0.06878898292779922,
"learning_rate": 3.496683673469388e-06,
"loss": 0.0084,
"step": 11300
},
{
"epoch": 5.183066361556064,
"grad_norm": 3.7109427452087402,
"learning_rate": 3.4903061224489794e-06,
"loss": 0.0171,
"step": 11325
},
{
"epoch": 5.194508009153318,
"grad_norm": 1.2318646907806396,
"learning_rate": 3.484183673469388e-06,
"loss": 0.0104,
"step": 11350
},
{
"epoch": 5.2059496567505725,
"grad_norm": 5.8200364112854,
"learning_rate": 3.4778061224489795e-06,
"loss": 0.0178,
"step": 11375
},
{
"epoch": 5.217391304347826,
"grad_norm": 2.4029898643493652,
"learning_rate": 3.4714285714285715e-06,
"loss": 0.0143,
"step": 11400
},
{
"epoch": 5.22883295194508,
"grad_norm": 5.977914810180664,
"learning_rate": 3.465051020408164e-06,
"loss": 0.0137,
"step": 11425
},
{
"epoch": 5.240274599542334,
"grad_norm": 1.1518189907073975,
"learning_rate": 3.4586734693877554e-06,
"loss": 0.0115,
"step": 11450
},
{
"epoch": 5.251716247139588,
"grad_norm": 9.050216674804688,
"learning_rate": 3.4522959183673474e-06,
"loss": 0.019,
"step": 11475
},
{
"epoch": 5.2631578947368425,
"grad_norm": 2.823286294937134,
"learning_rate": 3.445918367346939e-06,
"loss": 0.0105,
"step": 11500
},
{
"epoch": 5.274599542334096,
"grad_norm": 2.077913284301758,
"learning_rate": 3.439540816326531e-06,
"loss": 0.012,
"step": 11525
},
{
"epoch": 5.28604118993135,
"grad_norm": 1.8525944948196411,
"learning_rate": 3.4331632653061224e-06,
"loss": 0.0128,
"step": 11550
},
{
"epoch": 5.297482837528604,
"grad_norm": 4.57568883895874,
"learning_rate": 3.4267857142857144e-06,
"loss": 0.0168,
"step": 11575
},
{
"epoch": 5.308924485125858,
"grad_norm": 0.7349223494529724,
"learning_rate": 3.420408163265306e-06,
"loss": 0.0103,
"step": 11600
},
{
"epoch": 5.320366132723112,
"grad_norm": 8.281305313110352,
"learning_rate": 3.414030612244898e-06,
"loss": 0.0142,
"step": 11625
},
{
"epoch": 5.331807780320366,
"grad_norm": 2.828233480453491,
"learning_rate": 3.4076530612244903e-06,
"loss": 0.0074,
"step": 11650
},
{
"epoch": 5.34324942791762,
"grad_norm": 3.6413841247558594,
"learning_rate": 3.4012755102040823e-06,
"loss": 0.0145,
"step": 11675
},
{
"epoch": 5.354691075514874,
"grad_norm": 8.595261573791504,
"learning_rate": 3.394897959183674e-06,
"loss": 0.0104,
"step": 11700
},
{
"epoch": 5.366132723112128,
"grad_norm": 8.777588844299316,
"learning_rate": 3.3885204081632658e-06,
"loss": 0.016,
"step": 11725
},
{
"epoch": 5.377574370709382,
"grad_norm": 0.81954026222229,
"learning_rate": 3.3821428571428573e-06,
"loss": 0.0076,
"step": 11750
},
{
"epoch": 5.389016018306636,
"grad_norm": 2.593818187713623,
"learning_rate": 3.3757653061224493e-06,
"loss": 0.0178,
"step": 11775
},
{
"epoch": 5.4004576659038905,
"grad_norm": 1.6006801128387451,
"learning_rate": 3.369387755102041e-06,
"loss": 0.0099,
"step": 11800
},
{
"epoch": 5.411899313501144,
"grad_norm": 3.7885050773620605,
"learning_rate": 3.363010204081633e-06,
"loss": 0.0165,
"step": 11825
},
{
"epoch": 5.423340961098398,
"grad_norm": 1.194949984550476,
"learning_rate": 3.3566326530612243e-06,
"loss": 0.0108,
"step": 11850
},
{
"epoch": 5.434782608695652,
"grad_norm": 11.738698959350586,
"learning_rate": 3.3502551020408167e-06,
"loss": 0.0215,
"step": 11875
},
{
"epoch": 5.446224256292906,
"grad_norm": 2.309065103530884,
"learning_rate": 3.3438775510204087e-06,
"loss": 0.0121,
"step": 11900
},
{
"epoch": 5.4576659038901605,
"grad_norm": 7.016556262969971,
"learning_rate": 3.3375000000000002e-06,
"loss": 0.0138,
"step": 11925
},
{
"epoch": 5.469107551487414,
"grad_norm": 2.991405963897705,
"learning_rate": 3.331122448979592e-06,
"loss": 0.0146,
"step": 11950
},
{
"epoch": 5.480549199084669,
"grad_norm": 7.811606407165527,
"learning_rate": 3.3247448979591837e-06,
"loss": 0.0194,
"step": 11975
},
{
"epoch": 5.491990846681922,
"grad_norm": 3.6345748901367188,
"learning_rate": 3.3183673469387757e-06,
"loss": 0.0142,
"step": 12000
},
{
"epoch": 5.503432494279176,
"grad_norm": 7.627600193023682,
"learning_rate": 3.3119897959183673e-06,
"loss": 0.0179,
"step": 12025
},
{
"epoch": 5.51487414187643,
"grad_norm": 1.8679994344711304,
"learning_rate": 3.3056122448979592e-06,
"loss": 0.0105,
"step": 12050
},
{
"epoch": 5.526315789473684,
"grad_norm": 0.5164907574653625,
"learning_rate": 3.2992346938775508e-06,
"loss": 0.0105,
"step": 12075
},
{
"epoch": 5.537757437070939,
"grad_norm": 0.4254048764705658,
"learning_rate": 3.2928571428571427e-06,
"loss": 0.0081,
"step": 12100
},
{
"epoch": 5.549199084668192,
"grad_norm": 5.356523513793945,
"learning_rate": 3.286479591836735e-06,
"loss": 0.0218,
"step": 12125
},
{
"epoch": 5.560640732265446,
"grad_norm": 3.510284662246704,
"learning_rate": 3.280102040816327e-06,
"loss": 0.0085,
"step": 12150
},
{
"epoch": 5.5720823798627,
"grad_norm": 6.946488380432129,
"learning_rate": 3.2737244897959186e-06,
"loss": 0.0184,
"step": 12175
},
{
"epoch": 5.583524027459954,
"grad_norm": 0.3557557463645935,
"learning_rate": 3.2673469387755106e-06,
"loss": 0.0106,
"step": 12200
},
{
"epoch": 5.5949656750572085,
"grad_norm": 3.596979856491089,
"learning_rate": 3.260969387755102e-06,
"loss": 0.0258,
"step": 12225
},
{
"epoch": 5.606407322654462,
"grad_norm": 0.7150298953056335,
"learning_rate": 3.254591836734694e-06,
"loss": 0.0094,
"step": 12250
},
{
"epoch": 5.617848970251716,
"grad_norm": 4.756287097930908,
"learning_rate": 3.2482142857142857e-06,
"loss": 0.0164,
"step": 12275
},
{
"epoch": 5.62929061784897,
"grad_norm": 3.6292340755462646,
"learning_rate": 3.2418367346938776e-06,
"loss": 0.0107,
"step": 12300
},
{
"epoch": 5.640732265446224,
"grad_norm": 9.82845687866211,
"learning_rate": 3.235459183673469e-06,
"loss": 0.019,
"step": 12325
},
{
"epoch": 5.6521739130434785,
"grad_norm": 0.3698252737522125,
"learning_rate": 3.2290816326530616e-06,
"loss": 0.0079,
"step": 12350
},
{
"epoch": 5.663615560640732,
"grad_norm": 1.8457624912261963,
"learning_rate": 3.2227040816326535e-06,
"loss": 0.023,
"step": 12375
},
{
"epoch": 5.675057208237987,
"grad_norm": 0.7149168848991394,
"learning_rate": 3.216326530612245e-06,
"loss": 0.016,
"step": 12400
},
{
"epoch": 5.68649885583524,
"grad_norm": 4.959986209869385,
"learning_rate": 3.209948979591837e-06,
"loss": 0.0184,
"step": 12425
},
{
"epoch": 5.697940503432494,
"grad_norm": 1.3709676265716553,
"learning_rate": 3.2035714285714286e-06,
"loss": 0.0118,
"step": 12450
},
{
"epoch": 5.709382151029748,
"grad_norm": 4.3518829345703125,
"learning_rate": 3.1971938775510205e-06,
"loss": 0.0172,
"step": 12475
},
{
"epoch": 5.720823798627002,
"grad_norm": 4.1297502517700195,
"learning_rate": 3.190816326530612e-06,
"loss": 0.0126,
"step": 12500
},
{
"epoch": 5.732265446224257,
"grad_norm": 4.261162281036377,
"learning_rate": 3.184438775510204e-06,
"loss": 0.0133,
"step": 12525
},
{
"epoch": 5.74370709382151,
"grad_norm": 3.297361135482788,
"learning_rate": 3.1780612244897956e-06,
"loss": 0.0116,
"step": 12550
},
{
"epoch": 5.755148741418765,
"grad_norm": 6.726646423339844,
"learning_rate": 3.171683673469388e-06,
"loss": 0.019,
"step": 12575
},
{
"epoch": 5.766590389016018,
"grad_norm": 0.5207387208938599,
"learning_rate": 3.16530612244898e-06,
"loss": 0.0092,
"step": 12600
},
{
"epoch": 5.778032036613272,
"grad_norm": 11.70545768737793,
"learning_rate": 3.158928571428572e-06,
"loss": 0.024,
"step": 12625
},
{
"epoch": 5.7894736842105265,
"grad_norm": 0.3776039481163025,
"learning_rate": 3.1525510204081635e-06,
"loss": 0.01,
"step": 12650
},
{
"epoch": 5.80091533180778,
"grad_norm": 16.962390899658203,
"learning_rate": 3.1461734693877554e-06,
"loss": 0.0147,
"step": 12675
},
{
"epoch": 5.812356979405035,
"grad_norm": 3.64579701423645,
"learning_rate": 3.139795918367347e-06,
"loss": 0.011,
"step": 12700
},
{
"epoch": 5.823798627002288,
"grad_norm": 6.796038627624512,
"learning_rate": 3.133418367346939e-06,
"loss": 0.0138,
"step": 12725
},
{
"epoch": 5.835240274599542,
"grad_norm": 1.9765185117721558,
"learning_rate": 3.1270408163265305e-06,
"loss": 0.011,
"step": 12750
},
{
"epoch": 5.8466819221967965,
"grad_norm": 4.846302509307861,
"learning_rate": 3.1206632653061225e-06,
"loss": 0.0107,
"step": 12775
},
{
"epoch": 5.85812356979405,
"grad_norm": 3.6651906967163086,
"learning_rate": 3.1142857142857144e-06,
"loss": 0.0094,
"step": 12800
},
{
"epoch": 5.869565217391305,
"grad_norm": 6.605918884277344,
"learning_rate": 3.107908163265306e-06,
"loss": 0.0158,
"step": 12825
},
{
"epoch": 5.881006864988558,
"grad_norm": 3.4638562202453613,
"learning_rate": 3.1015306122448984e-06,
"loss": 0.0119,
"step": 12850
},
{
"epoch": 5.892448512585812,
"grad_norm": 1.832309603691101,
"learning_rate": 3.09515306122449e-06,
"loss": 0.0164,
"step": 12875
},
{
"epoch": 5.9038901601830664,
"grad_norm": 0.39115744829177856,
"learning_rate": 3.088775510204082e-06,
"loss": 0.0137,
"step": 12900
},
{
"epoch": 5.91533180778032,
"grad_norm": 5.2722086906433105,
"learning_rate": 3.0823979591836734e-06,
"loss": 0.0173,
"step": 12925
},
{
"epoch": 5.926773455377575,
"grad_norm": 0.5123651623725891,
"learning_rate": 3.0760204081632654e-06,
"loss": 0.0109,
"step": 12950
},
{
"epoch": 5.938215102974828,
"grad_norm": 5.007115840911865,
"learning_rate": 3.0696428571428573e-06,
"loss": 0.0161,
"step": 12975
},
{
"epoch": 5.949656750572083,
"grad_norm": 1.0899899005889893,
"learning_rate": 3.0632653061224493e-06,
"loss": 0.0098,
"step": 13000
},
{
"epoch": 5.961098398169336,
"grad_norm": 6.381730079650879,
"learning_rate": 3.056887755102041e-06,
"loss": 0.0206,
"step": 13025
},
{
"epoch": 5.97254004576659,
"grad_norm": 2.857933521270752,
"learning_rate": 3.050510204081633e-06,
"loss": 0.0124,
"step": 13050
},
{
"epoch": 5.983981693363845,
"grad_norm": 9.316118240356445,
"learning_rate": 3.0441326530612248e-06,
"loss": 0.0134,
"step": 13075
},
{
"epoch": 5.995423340961098,
"grad_norm": 0.3497377932071686,
"learning_rate": 3.0377551020408163e-06,
"loss": 0.0106,
"step": 13100
},
{
"epoch": 6.006864988558353,
"grad_norm": 4.825676918029785,
"learning_rate": 3.0313775510204083e-06,
"loss": 0.0167,
"step": 13125
},
{
"epoch": 6.018306636155606,
"grad_norm": 8.057607650756836,
"learning_rate": 3.0250000000000003e-06,
"loss": 0.0133,
"step": 13150
},
{
"epoch": 6.02974828375286,
"grad_norm": 0.27666619420051575,
"learning_rate": 3.018622448979592e-06,
"loss": 0.0215,
"step": 13175
},
{
"epoch": 6.0411899313501145,
"grad_norm": 0.5118196606636047,
"learning_rate": 3.0122448979591838e-06,
"loss": 0.0144,
"step": 13200
},
{
"epoch": 6.052631578947368,
"grad_norm": 12.31119155883789,
"learning_rate": 3.0058673469387757e-06,
"loss": 0.0134,
"step": 13225
},
{
"epoch": 6.064073226544623,
"grad_norm": 0.9528910517692566,
"learning_rate": 2.9994897959183673e-06,
"loss": 0.0096,
"step": 13250
},
{
"epoch": 6.075514874141876,
"grad_norm": 1.3786743879318237,
"learning_rate": 2.9931122448979592e-06,
"loss": 0.0155,
"step": 13275
},
{
"epoch": 6.086956521739131,
"grad_norm": 4.0814337730407715,
"learning_rate": 2.9867346938775512e-06,
"loss": 0.0113,
"step": 13300
},
{
"epoch": 6.0983981693363845,
"grad_norm": 0.17644235491752625,
"learning_rate": 2.980357142857143e-06,
"loss": 0.0086,
"step": 13325
},
{
"epoch": 6.109839816933638,
"grad_norm": 2.6412882804870605,
"learning_rate": 2.9739795918367347e-06,
"loss": 0.0087,
"step": 13350
},
{
"epoch": 6.121281464530893,
"grad_norm": 4.250133514404297,
"learning_rate": 2.9676020408163267e-06,
"loss": 0.0123,
"step": 13375
},
{
"epoch": 6.132723112128146,
"grad_norm": 1.6422185897827148,
"learning_rate": 2.9612244897959182e-06,
"loss": 0.0067,
"step": 13400
},
{
"epoch": 6.144164759725401,
"grad_norm": 3.366658926010132,
"learning_rate": 2.9548469387755106e-06,
"loss": 0.0117,
"step": 13425
},
{
"epoch": 6.155606407322654,
"grad_norm": 6.940051555633545,
"learning_rate": 2.948469387755102e-06,
"loss": 0.0072,
"step": 13450
},
{
"epoch": 6.167048054919908,
"grad_norm": 0.4400598108768463,
"learning_rate": 2.942091836734694e-06,
"loss": 0.0159,
"step": 13475
},
{
"epoch": 6.178489702517163,
"grad_norm": 3.105194091796875,
"learning_rate": 2.9357142857142857e-06,
"loss": 0.0091,
"step": 13500
},
{
"epoch": 6.189931350114416,
"grad_norm": 1.1099435091018677,
"learning_rate": 2.9293367346938776e-06,
"loss": 0.019,
"step": 13525
},
{
"epoch": 6.201372997711671,
"grad_norm": 1.7562593221664429,
"learning_rate": 2.9229591836734696e-06,
"loss": 0.0084,
"step": 13550
},
{
"epoch": 6.212814645308924,
"grad_norm": 2.2326951026916504,
"learning_rate": 2.916581632653061e-06,
"loss": 0.0148,
"step": 13575
},
{
"epoch": 6.224256292906179,
"grad_norm": 0.38852858543395996,
"learning_rate": 2.910204081632653e-06,
"loss": 0.0068,
"step": 13600
},
{
"epoch": 6.2356979405034325,
"grad_norm": 3.4514925479888916,
"learning_rate": 2.903826530612245e-06,
"loss": 0.0111,
"step": 13625
},
{
"epoch": 6.247139588100686,
"grad_norm": 2.002838134765625,
"learning_rate": 2.897448979591837e-06,
"loss": 0.0082,
"step": 13650
},
{
"epoch": 6.258581235697941,
"grad_norm": 0.08688002824783325,
"learning_rate": 2.8910714285714286e-06,
"loss": 0.0267,
"step": 13675
},
{
"epoch": 6.270022883295194,
"grad_norm": 2.080207347869873,
"learning_rate": 2.8846938775510206e-06,
"loss": 0.0096,
"step": 13700
},
{
"epoch": 6.281464530892449,
"grad_norm": 0.9220284819602966,
"learning_rate": 2.878316326530612e-06,
"loss": 0.0086,
"step": 13725
},
{
"epoch": 6.2929061784897025,
"grad_norm": 0.35067400336265564,
"learning_rate": 2.8719387755102045e-06,
"loss": 0.0052,
"step": 13750
},
{
"epoch": 6.304347826086957,
"grad_norm": 3.5281693935394287,
"learning_rate": 2.865561224489796e-06,
"loss": 0.0191,
"step": 13775
},
{
"epoch": 6.315789473684211,
"grad_norm": 3.336193323135376,
"learning_rate": 2.859183673469388e-06,
"loss": 0.0063,
"step": 13800
},
{
"epoch": 6.327231121281464,
"grad_norm": 2.0798017978668213,
"learning_rate": 2.8528061224489796e-06,
"loss": 0.0132,
"step": 13825
},
{
"epoch": 6.338672768878719,
"grad_norm": 0.20700007677078247,
"learning_rate": 2.8464285714285715e-06,
"loss": 0.0061,
"step": 13850
},
{
"epoch": 6.350114416475972,
"grad_norm": 0.1666710078716278,
"learning_rate": 2.8400510204081635e-06,
"loss": 0.0073,
"step": 13875
},
{
"epoch": 6.361556064073227,
"grad_norm": 4.459512233734131,
"learning_rate": 2.8336734693877555e-06,
"loss": 0.0107,
"step": 13900
},
{
"epoch": 6.372997711670481,
"grad_norm": 2.873020887374878,
"learning_rate": 2.827295918367347e-06,
"loss": 0.0124,
"step": 13925
},
{
"epoch": 6.384439359267734,
"grad_norm": 0.23313076794147491,
"learning_rate": 2.820918367346939e-06,
"loss": 0.012,
"step": 13950
},
{
"epoch": 6.395881006864989,
"grad_norm": 0.10002599656581879,
"learning_rate": 2.814540816326531e-06,
"loss": 0.01,
"step": 13975
},
{
"epoch": 6.407322654462242,
"grad_norm": 0.2222910374403,
"learning_rate": 2.8081632653061225e-06,
"loss": 0.0061,
"step": 14000
},
{
"epoch": 6.418764302059497,
"grad_norm": 1.97636079788208,
"learning_rate": 2.8017857142857144e-06,
"loss": 0.0125,
"step": 14025
},
{
"epoch": 6.4302059496567505,
"grad_norm": 0.23159927129745483,
"learning_rate": 2.795408163265306e-06,
"loss": 0.0121,
"step": 14050
},
{
"epoch": 6.441647597254004,
"grad_norm": 2.222355365753174,
"learning_rate": 2.789030612244898e-06,
"loss": 0.0082,
"step": 14075
},
{
"epoch": 6.453089244851259,
"grad_norm": 1.612273931503296,
"learning_rate": 2.78265306122449e-06,
"loss": 0.0129,
"step": 14100
},
{
"epoch": 6.464530892448512,
"grad_norm": 3.7675607204437256,
"learning_rate": 2.776275510204082e-06,
"loss": 0.0126,
"step": 14125
},
{
"epoch": 6.475972540045767,
"grad_norm": 2.6924796104431152,
"learning_rate": 2.7698979591836734e-06,
"loss": 0.0123,
"step": 14150
},
{
"epoch": 6.4874141876430205,
"grad_norm": 0.5433311462402344,
"learning_rate": 2.7635204081632654e-06,
"loss": 0.0083,
"step": 14175
},
{
"epoch": 6.498855835240275,
"grad_norm": 1.2560780048370361,
"learning_rate": 2.7571428571428574e-06,
"loss": 0.0119,
"step": 14200
},
{
"epoch": 6.510297482837529,
"grad_norm": 0.1697181910276413,
"learning_rate": 2.7507653061224493e-06,
"loss": 0.0142,
"step": 14225
},
{
"epoch": 6.521739130434782,
"grad_norm": 2.489327907562256,
"learning_rate": 2.744387755102041e-06,
"loss": 0.0098,
"step": 14250
},
{
"epoch": 6.533180778032037,
"grad_norm": 0.10007134824991226,
"learning_rate": 2.738010204081633e-06,
"loss": 0.0118,
"step": 14275
},
{
"epoch": 6.54462242562929,
"grad_norm": 1.808995008468628,
"learning_rate": 2.7316326530612244e-06,
"loss": 0.0091,
"step": 14300
},
{
"epoch": 6.556064073226545,
"grad_norm": 0.3042183816432953,
"learning_rate": 2.7252551020408168e-06,
"loss": 0.0117,
"step": 14325
},
{
"epoch": 6.567505720823799,
"grad_norm": 0.8734502196311951,
"learning_rate": 2.7188775510204083e-06,
"loss": 0.0099,
"step": 14350
},
{
"epoch": 6.578947368421053,
"grad_norm": 0.13821353018283844,
"learning_rate": 2.7125000000000003e-06,
"loss": 0.0115,
"step": 14375
},
{
"epoch": 6.590389016018307,
"grad_norm": 0.3783830404281616,
"learning_rate": 2.706122448979592e-06,
"loss": 0.0068,
"step": 14400
},
{
"epoch": 6.60183066361556,
"grad_norm": 0.3584430515766144,
"learning_rate": 2.699744897959184e-06,
"loss": 0.0101,
"step": 14425
},
{
"epoch": 6.613272311212815,
"grad_norm": 3.8657848834991455,
"learning_rate": 2.6933673469387758e-06,
"loss": 0.0098,
"step": 14450
},
{
"epoch": 6.6247139588100685,
"grad_norm": 0.2408173382282257,
"learning_rate": 2.6869897959183673e-06,
"loss": 0.0112,
"step": 14475
},
{
"epoch": 6.636155606407323,
"grad_norm": 0.08185845613479614,
"learning_rate": 2.6806122448979593e-06,
"loss": 0.0059,
"step": 14500
},
{
"epoch": 6.647597254004577,
"grad_norm": 0.0738307312130928,
"learning_rate": 2.674234693877551e-06,
"loss": 0.0132,
"step": 14525
},
{
"epoch": 6.65903890160183,
"grad_norm": 3.3000831604003906,
"learning_rate": 2.667857142857143e-06,
"loss": 0.01,
"step": 14550
},
{
"epoch": 6.670480549199085,
"grad_norm": 1.2340450286865234,
"learning_rate": 2.6614795918367348e-06,
"loss": 0.0141,
"step": 14575
},
{
"epoch": 6.6819221967963385,
"grad_norm": 2.966970682144165,
"learning_rate": 2.6551020408163267e-06,
"loss": 0.0052,
"step": 14600
},
{
"epoch": 6.693363844393593,
"grad_norm": 1.181470274925232,
"learning_rate": 2.6487244897959183e-06,
"loss": 0.015,
"step": 14625
},
{
"epoch": 6.704805491990847,
"grad_norm": 0.6192874908447266,
"learning_rate": 2.6423469387755102e-06,
"loss": 0.0063,
"step": 14650
},
{
"epoch": 6.7162471395881,
"grad_norm": 2.0739264488220215,
"learning_rate": 2.635969387755102e-06,
"loss": 0.0145,
"step": 14675
},
{
"epoch": 6.727688787185355,
"grad_norm": 1.339400291442871,
"learning_rate": 2.629591836734694e-06,
"loss": 0.0076,
"step": 14700
},
{
"epoch": 6.739130434782608,
"grad_norm": 3.128563404083252,
"learning_rate": 2.6232142857142857e-06,
"loss": 0.0192,
"step": 14725
},
{
"epoch": 6.750572082379863,
"grad_norm": 3.493795394897461,
"learning_rate": 2.6168367346938777e-06,
"loss": 0.005,
"step": 14750
},
{
"epoch": 6.762013729977117,
"grad_norm": 1.3682916164398193,
"learning_rate": 2.6104591836734696e-06,
"loss": 0.0134,
"step": 14775
},
{
"epoch": 6.77345537757437,
"grad_norm": 1.8985130786895752,
"learning_rate": 2.6040816326530616e-06,
"loss": 0.0082,
"step": 14800
},
{
"epoch": 6.784897025171625,
"grad_norm": 0.5169524550437927,
"learning_rate": 2.597704081632653e-06,
"loss": 0.0202,
"step": 14825
},
{
"epoch": 6.796338672768878,
"grad_norm": 0.1607659012079239,
"learning_rate": 2.591326530612245e-06,
"loss": 0.0084,
"step": 14850
},
{
"epoch": 6.807780320366133,
"grad_norm": 0.6245440244674683,
"learning_rate": 2.5849489795918367e-06,
"loss": 0.0152,
"step": 14875
},
{
"epoch": 6.8192219679633865,
"grad_norm": 0.2351321130990982,
"learning_rate": 2.5785714285714286e-06,
"loss": 0.0071,
"step": 14900
},
{
"epoch": 6.830663615560641,
"grad_norm": 5.026946067810059,
"learning_rate": 2.5721938775510206e-06,
"loss": 0.0168,
"step": 14925
},
{
"epoch": 6.842105263157895,
"grad_norm": 1.034845232963562,
"learning_rate": 2.565816326530612e-06,
"loss": 0.0084,
"step": 14950
},
{
"epoch": 6.853546910755149,
"grad_norm": 0.16656899452209473,
"learning_rate": 2.559438775510204e-06,
"loss": 0.0148,
"step": 14975
},
{
"epoch": 6.864988558352403,
"grad_norm": 7.876341819763184,
"learning_rate": 2.553061224489796e-06,
"loss": 0.0139,
"step": 15000
},
{
"epoch": 6.864988558352403,
"eval_loss": 0.20922012627124786,
"eval_runtime": 5296.0961,
"eval_samples_per_second": 1.798,
"eval_steps_per_second": 0.225,
"eval_wer": 0.10019517501759775,
"step": 15000
},
{
"epoch": 6.8764302059496565,
"grad_norm": 1.0131422281265259,
"learning_rate": 2.546683673469388e-06,
"loss": 0.0175,
"step": 15025
},
{
"epoch": 6.887871853546911,
"grad_norm": 0.43789076805114746,
"learning_rate": 2.5403061224489796e-06,
"loss": 0.0071,
"step": 15050
},
{
"epoch": 6.899313501144165,
"grad_norm": 0.10620521008968353,
"learning_rate": 2.5339285714285716e-06,
"loss": 0.0118,
"step": 15075
},
{
"epoch": 6.910755148741419,
"grad_norm": 0.22217608988285065,
"learning_rate": 2.527551020408163e-06,
"loss": 0.0103,
"step": 15100
},
{
"epoch": 6.922196796338673,
"grad_norm": 1.88958740234375,
"learning_rate": 2.5211734693877555e-06,
"loss": 0.0146,
"step": 15125
},
{
"epoch": 6.933638443935926,
"grad_norm": 0.2760096788406372,
"learning_rate": 2.514795918367347e-06,
"loss": 0.0051,
"step": 15150
},
{
"epoch": 6.945080091533181,
"grad_norm": 0.23248428106307983,
"learning_rate": 2.508418367346939e-06,
"loss": 0.0133,
"step": 15175
},
{
"epoch": 6.956521739130435,
"grad_norm": 1.852455496788025,
"learning_rate": 2.5020408163265305e-06,
"loss": 0.0072,
"step": 15200
},
{
"epoch": 6.967963386727689,
"grad_norm": 0.9175077676773071,
"learning_rate": 2.495663265306123e-06,
"loss": 0.0118,
"step": 15225
},
{
"epoch": 6.979405034324943,
"grad_norm": 4.601647853851318,
"learning_rate": 2.4892857142857145e-06,
"loss": 0.0081,
"step": 15250
},
{
"epoch": 6.990846681922196,
"grad_norm": 0.6206708550453186,
"learning_rate": 2.4829081632653064e-06,
"loss": 0.0128,
"step": 15275
},
{
"epoch": 7.002288329519451,
"grad_norm": 1.0712556838989258,
"learning_rate": 2.476530612244898e-06,
"loss": 0.0097,
"step": 15300
},
{
"epoch": 7.0137299771167045,
"grad_norm": 0.7450961470603943,
"learning_rate": 2.47015306122449e-06,
"loss": 0.0058,
"step": 15325
},
{
"epoch": 7.025171624713959,
"grad_norm": 1.869686245918274,
"learning_rate": 2.463775510204082e-06,
"loss": 0.0188,
"step": 15350
},
{
"epoch": 7.036613272311213,
"grad_norm": 0.30196335911750793,
"learning_rate": 2.4573979591836735e-06,
"loss": 0.0056,
"step": 15375
},
{
"epoch": 7.048054919908467,
"grad_norm": 1.2585679292678833,
"learning_rate": 2.4510204081632654e-06,
"loss": 0.0092,
"step": 15400
},
{
"epoch": 7.059496567505721,
"grad_norm": 0.07888475805521011,
"learning_rate": 2.444642857142857e-06,
"loss": 0.0066,
"step": 15425
},
{
"epoch": 7.0709382151029745,
"grad_norm": 1.7455697059631348,
"learning_rate": 2.4382653061224494e-06,
"loss": 0.0107,
"step": 15450
},
{
"epoch": 7.082379862700229,
"grad_norm": 0.1373489499092102,
"learning_rate": 2.431887755102041e-06,
"loss": 0.0055,
"step": 15475
},
{
"epoch": 7.093821510297483,
"grad_norm": 2.089559555053711,
"learning_rate": 2.425510204081633e-06,
"loss": 0.0107,
"step": 15500
},
{
"epoch": 7.105263157894737,
"grad_norm": 5.042351245880127,
"learning_rate": 2.4191326530612244e-06,
"loss": 0.0052,
"step": 15525
},
{
"epoch": 7.116704805491991,
"grad_norm": 0.10954425483942032,
"learning_rate": 2.4127551020408164e-06,
"loss": 0.0091,
"step": 15550
},
{
"epoch": 7.128146453089244,
"grad_norm": 0.06460034102201462,
"learning_rate": 2.4063775510204083e-06,
"loss": 0.0047,
"step": 15575
},
{
"epoch": 7.139588100686499,
"grad_norm": 2.1633071899414062,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.0126,
"step": 15600
},
{
"epoch": 7.151029748283753,
"grad_norm": 0.39932990074157715,
"learning_rate": 2.393622448979592e-06,
"loss": 0.0068,
"step": 15625
},
{
"epoch": 7.162471395881007,
"grad_norm": 0.31883901357650757,
"learning_rate": 2.387244897959184e-06,
"loss": 0.0121,
"step": 15650
},
{
"epoch": 7.173913043478261,
"grad_norm": 0.2664627134799957,
"learning_rate": 2.380867346938776e-06,
"loss": 0.0085,
"step": 15675
},
{
"epoch": 7.185354691075515,
"grad_norm": 2.2720088958740234,
"learning_rate": 2.3744897959183678e-06,
"loss": 0.0195,
"step": 15700
},
{
"epoch": 7.196796338672769,
"grad_norm": 0.13204744458198547,
"learning_rate": 2.3681122448979593e-06,
"loss": 0.0072,
"step": 15725
},
{
"epoch": 7.2082379862700225,
"grad_norm": 1.5910881757736206,
"learning_rate": 2.3617346938775513e-06,
"loss": 0.0127,
"step": 15750
},
{
"epoch": 7.219679633867277,
"grad_norm": 2.159229040145874,
"learning_rate": 2.355357142857143e-06,
"loss": 0.008,
"step": 15775
},
{
"epoch": 7.231121281464531,
"grad_norm": 1.8656185865402222,
"learning_rate": 2.3489795918367348e-06,
"loss": 0.0058,
"step": 15800
},
{
"epoch": 7.242562929061785,
"grad_norm": 2.223982334136963,
"learning_rate": 2.3426020408163267e-06,
"loss": 0.0088,
"step": 15825
},
{
"epoch": 7.254004576659039,
"grad_norm": 0.8876869082450867,
"learning_rate": 2.3362244897959183e-06,
"loss": 0.0173,
"step": 15850
},
{
"epoch": 7.2654462242562925,
"grad_norm": 0.246218740940094,
"learning_rate": 2.3298469387755103e-06,
"loss": 0.0043,
"step": 15875
},
{
"epoch": 7.276887871853547,
"grad_norm": 1.6510305404663086,
"learning_rate": 2.3234693877551022e-06,
"loss": 0.0149,
"step": 15900
},
{
"epoch": 7.288329519450801,
"grad_norm": 1.7581284046173096,
"learning_rate": 2.317091836734694e-06,
"loss": 0.0055,
"step": 15925
},
{
"epoch": 7.299771167048055,
"grad_norm": 1.8961583375930786,
"learning_rate": 2.3107142857142857e-06,
"loss": 0.0087,
"step": 15950
},
{
"epoch": 7.311212814645309,
"grad_norm": 0.2390100657939911,
"learning_rate": 2.3043367346938777e-06,
"loss": 0.0104,
"step": 15975
},
{
"epoch": 7.322654462242563,
"grad_norm": 1.6409028768539429,
"learning_rate": 2.2979591836734692e-06,
"loss": 0.0062,
"step": 16000
},
{
"epoch": 7.334096109839817,
"grad_norm": 0.10882013291120529,
"learning_rate": 2.2915816326530616e-06,
"loss": 0.0048,
"step": 16025
},
{
"epoch": 7.345537757437071,
"grad_norm": 1.4074586629867554,
"learning_rate": 2.285204081632653e-06,
"loss": 0.0178,
"step": 16050
},
{
"epoch": 7.356979405034325,
"grad_norm": 0.09742479026317596,
"learning_rate": 2.278826530612245e-06,
"loss": 0.0067,
"step": 16075
},
{
"epoch": 7.368421052631579,
"grad_norm": 2.2490932941436768,
"learning_rate": 2.2724489795918367e-06,
"loss": 0.0142,
"step": 16100
},
{
"epoch": 7.379862700228833,
"grad_norm": 0.0923023372888565,
"learning_rate": 2.2660714285714287e-06,
"loss": 0.0101,
"step": 16125
},
{
"epoch": 7.391304347826087,
"grad_norm": 1.3611345291137695,
"learning_rate": 2.2596938775510206e-06,
"loss": 0.0118,
"step": 16150
},
{
"epoch": 7.4027459954233406,
"grad_norm": 2.053800106048584,
"learning_rate": 2.2533163265306126e-06,
"loss": 0.0041,
"step": 16175
},
{
"epoch": 7.414187643020595,
"grad_norm": 2.127472162246704,
"learning_rate": 2.246938775510204e-06,
"loss": 0.0177,
"step": 16200
},
{
"epoch": 7.425629290617849,
"grad_norm": 4.369998931884766,
"learning_rate": 2.240561224489796e-06,
"loss": 0.0065,
"step": 16225
},
{
"epoch": 7.437070938215103,
"grad_norm": 2.2975997924804688,
"learning_rate": 2.234183673469388e-06,
"loss": 0.0076,
"step": 16250
},
{
"epoch": 7.448512585812357,
"grad_norm": 2.5363821983337402,
"learning_rate": 2.2278061224489796e-06,
"loss": 0.0111,
"step": 16275
},
{
"epoch": 7.459954233409611,
"grad_norm": 0.09316561371088028,
"learning_rate": 2.2214285714285716e-06,
"loss": 0.0095,
"step": 16300
},
{
"epoch": 7.471395881006865,
"grad_norm": 0.7145214676856995,
"learning_rate": 2.215051020408163e-06,
"loss": 0.0055,
"step": 16325
},
{
"epoch": 7.482837528604119,
"grad_norm": 2.495028018951416,
"learning_rate": 2.208673469387755e-06,
"loss": 0.0112,
"step": 16350
},
{
"epoch": 7.494279176201373,
"grad_norm": 0.3236389458179474,
"learning_rate": 2.202295918367347e-06,
"loss": 0.0044,
"step": 16375
},
{
"epoch": 7.505720823798627,
"grad_norm": 1.0565695762634277,
"learning_rate": 2.195918367346939e-06,
"loss": 0.0129,
"step": 16400
},
{
"epoch": 7.517162471395881,
"grad_norm": 1.8314106464385986,
"learning_rate": 2.1895408163265306e-06,
"loss": 0.0052,
"step": 16425
},
{
"epoch": 7.528604118993135,
"grad_norm": 2.1195669174194336,
"learning_rate": 2.1831632653061225e-06,
"loss": 0.0212,
"step": 16450
},
{
"epoch": 7.540045766590389,
"grad_norm": 1.8161754608154297,
"learning_rate": 2.1767857142857145e-06,
"loss": 0.0066,
"step": 16475
},
{
"epoch": 7.551487414187643,
"grad_norm": 0.14074856042861938,
"learning_rate": 2.1704081632653065e-06,
"loss": 0.0068,
"step": 16500
},
{
"epoch": 7.562929061784897,
"grad_norm": 0.07693963497877121,
"learning_rate": 2.164030612244898e-06,
"loss": 0.005,
"step": 16525
},
{
"epoch": 7.574370709382151,
"grad_norm": 1.0592836141586304,
"learning_rate": 2.15765306122449e-06,
"loss": 0.0124,
"step": 16550
},
{
"epoch": 7.585812356979405,
"grad_norm": 3.276205539703369,
"learning_rate": 2.1512755102040815e-06,
"loss": 0.0077,
"step": 16575
},
{
"epoch": 7.597254004576659,
"grad_norm": 0.2712470293045044,
"learning_rate": 2.1448979591836735e-06,
"loss": 0.0137,
"step": 16600
},
{
"epoch": 7.608695652173913,
"grad_norm": 2.913771629333496,
"learning_rate": 2.1385204081632655e-06,
"loss": 0.0084,
"step": 16625
},
{
"epoch": 7.620137299771167,
"grad_norm": 0.8734631538391113,
"learning_rate": 2.132142857142857e-06,
"loss": 0.0162,
"step": 16650
},
{
"epoch": 7.631578947368421,
"grad_norm": 2.229383707046509,
"learning_rate": 2.125765306122449e-06,
"loss": 0.0089,
"step": 16675
},
{
"epoch": 7.643020594965675,
"grad_norm": 1.0993914604187012,
"learning_rate": 2.119387755102041e-06,
"loss": 0.0105,
"step": 16700
},
{
"epoch": 7.654462242562929,
"grad_norm": 0.42427483201026917,
"learning_rate": 2.113010204081633e-06,
"loss": 0.0063,
"step": 16725
},
{
"epoch": 7.665903890160183,
"grad_norm": 1.0200995206832886,
"learning_rate": 2.1066326530612244e-06,
"loss": 0.012,
"step": 16750
},
{
"epoch": 7.6773455377574376,
"grad_norm": 0.11753875017166138,
"learning_rate": 2.1002551020408164e-06,
"loss": 0.0069,
"step": 16775
},
{
"epoch": 7.688787185354691,
"grad_norm": 1.3853702545166016,
"learning_rate": 2.0941326530612246e-06,
"loss": 0.015,
"step": 16800
},
{
"epoch": 7.700228832951945,
"grad_norm": 0.7108054757118225,
"learning_rate": 2.0877551020408165e-06,
"loss": 0.0094,
"step": 16825
},
{
"epoch": 7.711670480549199,
"grad_norm": 0.9410815834999084,
"learning_rate": 2.081377551020408e-06,
"loss": 0.0091,
"step": 16850
},
{
"epoch": 7.723112128146453,
"grad_norm": 1.315185308456421,
"learning_rate": 2.075e-06,
"loss": 0.0066,
"step": 16875
},
{
"epoch": 7.7345537757437075,
"grad_norm": 2.3978397846221924,
"learning_rate": 2.068622448979592e-06,
"loss": 0.0082,
"step": 16900
},
{
"epoch": 7.745995423340961,
"grad_norm": 0.3152960538864136,
"learning_rate": 2.0622448979591835e-06,
"loss": 0.0045,
"step": 16925
},
{
"epoch": 7.757437070938215,
"grad_norm": 3.8207294940948486,
"learning_rate": 2.0558673469387755e-06,
"loss": 0.0139,
"step": 16950
},
{
"epoch": 7.768878718535469,
"grad_norm": 0.7664352059364319,
"learning_rate": 2.0494897959183675e-06,
"loss": 0.0082,
"step": 16975
},
{
"epoch": 7.780320366132723,
"grad_norm": 2.621455192565918,
"learning_rate": 2.0431122448979594e-06,
"loss": 0.0138,
"step": 17000
},
{
"epoch": 7.7917620137299775,
"grad_norm": 2.7599635124206543,
"learning_rate": 2.036734693877551e-06,
"loss": 0.006,
"step": 17025
},
{
"epoch": 7.803203661327231,
"grad_norm": 2.1494710445404053,
"learning_rate": 2.030357142857143e-06,
"loss": 0.0133,
"step": 17050
},
{
"epoch": 7.814645308924485,
"grad_norm": 2.6911447048187256,
"learning_rate": 2.0239795918367345e-06,
"loss": 0.0079,
"step": 17075
},
{
"epoch": 7.826086956521739,
"grad_norm": 1.8577873706817627,
"learning_rate": 2.017602040816327e-06,
"loss": 0.0098,
"step": 17100
},
{
"epoch": 7.837528604118993,
"grad_norm": 0.08260419219732285,
"learning_rate": 2.0112244897959184e-06,
"loss": 0.0073,
"step": 17125
},
{
"epoch": 7.848970251716247,
"grad_norm": 0.34523025155067444,
"learning_rate": 2.0048469387755104e-06,
"loss": 0.0097,
"step": 17150
},
{
"epoch": 7.860411899313501,
"grad_norm": 2.8545644283294678,
"learning_rate": 1.998469387755102e-06,
"loss": 0.0093,
"step": 17175
},
{
"epoch": 7.871853546910755,
"grad_norm": 0.9209850430488586,
"learning_rate": 1.9920918367346943e-06,
"loss": 0.0193,
"step": 17200
},
{
"epoch": 7.883295194508009,
"grad_norm": 0.15244260430335999,
"learning_rate": 1.985714285714286e-06,
"loss": 0.0074,
"step": 17225
},
{
"epoch": 7.894736842105263,
"grad_norm": 0.9773492813110352,
"learning_rate": 1.979336734693878e-06,
"loss": 0.0102,
"step": 17250
},
{
"epoch": 7.906178489702517,
"grad_norm": 3.8897087574005127,
"learning_rate": 1.9729591836734694e-06,
"loss": 0.0092,
"step": 17275
},
{
"epoch": 7.917620137299771,
"grad_norm": 1.736915946006775,
"learning_rate": 1.9665816326530614e-06,
"loss": 0.0087,
"step": 17300
},
{
"epoch": 7.9290617848970255,
"grad_norm": 0.2637374699115753,
"learning_rate": 1.9602040816326533e-06,
"loss": 0.0056,
"step": 17325
},
{
"epoch": 7.940503432494279,
"grad_norm": 0.23918192088603973,
"learning_rate": 1.953826530612245e-06,
"loss": 0.0101,
"step": 17350
},
{
"epoch": 7.951945080091534,
"grad_norm": 2.3854148387908936,
"learning_rate": 1.947448979591837e-06,
"loss": 0.0082,
"step": 17375
},
{
"epoch": 7.963386727688787,
"grad_norm": 1.505005955696106,
"learning_rate": 1.9410714285714284e-06,
"loss": 0.0092,
"step": 17400
},
{
"epoch": 7.974828375286041,
"grad_norm": 5.391610145568848,
"learning_rate": 1.9346938775510208e-06,
"loss": 0.0088,
"step": 17425
},
{
"epoch": 7.9862700228832955,
"grad_norm": 0.1221591904759407,
"learning_rate": 1.9283163265306123e-06,
"loss": 0.0139,
"step": 17450
},
{
"epoch": 7.997711670480549,
"grad_norm": 3.126054286956787,
"learning_rate": 1.9219387755102043e-06,
"loss": 0.006,
"step": 17475
},
{
"epoch": 8.009153318077804,
"grad_norm": 0.13957707583904266,
"learning_rate": 1.915561224489796e-06,
"loss": 0.0126,
"step": 17500
},
{
"epoch": 8.020594965675057,
"grad_norm": 0.3743535876274109,
"learning_rate": 1.909183673469388e-06,
"loss": 0.0066,
"step": 17525
},
{
"epoch": 8.03203661327231,
"grad_norm": 2.4367594718933105,
"learning_rate": 1.9028061224489798e-06,
"loss": 0.0095,
"step": 17550
},
{
"epoch": 8.043478260869565,
"grad_norm": 0.27416449785232544,
"learning_rate": 1.8964285714285715e-06,
"loss": 0.0061,
"step": 17575
},
{
"epoch": 8.05491990846682,
"grad_norm": 2.7768595218658447,
"learning_rate": 1.8900510204081633e-06,
"loss": 0.0079,
"step": 17600
},
{
"epoch": 8.066361556064074,
"grad_norm": 1.4709582328796387,
"learning_rate": 1.883673469387755e-06,
"loss": 0.0077,
"step": 17625
},
{
"epoch": 8.077803203661327,
"grad_norm": 2.9024922847747803,
"learning_rate": 1.8772959183673472e-06,
"loss": 0.0044,
"step": 17650
},
{
"epoch": 8.08924485125858,
"grad_norm": 1.4688998460769653,
"learning_rate": 1.870918367346939e-06,
"loss": 0.0051,
"step": 17675
},
{
"epoch": 8.100686498855834,
"grad_norm": 0.9765781760215759,
"learning_rate": 1.8645408163265307e-06,
"loss": 0.0117,
"step": 17700
},
{
"epoch": 8.11212814645309,
"grad_norm": 0.23804131150245667,
"learning_rate": 1.8581632653061225e-06,
"loss": 0.0079,
"step": 17725
},
{
"epoch": 8.123569794050344,
"grad_norm": 0.10259401798248291,
"learning_rate": 1.8517857142857142e-06,
"loss": 0.0063,
"step": 17750
},
{
"epoch": 8.135011441647597,
"grad_norm": 3.3533835411071777,
"learning_rate": 1.8454081632653064e-06,
"loss": 0.0072,
"step": 17775
},
{
"epoch": 8.14645308924485,
"grad_norm": 0.26105380058288574,
"learning_rate": 1.8390306122448982e-06,
"loss": 0.0123,
"step": 17800
},
{
"epoch": 8.157894736842104,
"grad_norm": 3.717785358428955,
"learning_rate": 1.83265306122449e-06,
"loss": 0.0057,
"step": 17825
},
{
"epoch": 8.16933638443936,
"grad_norm": 0.14977721869945526,
"learning_rate": 1.8262755102040817e-06,
"loss": 0.0099,
"step": 17850
},
{
"epoch": 8.180778032036613,
"grad_norm": 0.706764817237854,
"learning_rate": 1.8198979591836736e-06,
"loss": 0.0088,
"step": 17875
},
{
"epoch": 8.192219679633867,
"grad_norm": 0.2173665463924408,
"learning_rate": 1.8135204081632654e-06,
"loss": 0.0047,
"step": 17900
},
{
"epoch": 8.20366132723112,
"grad_norm": 1.6431961059570312,
"learning_rate": 1.8071428571428571e-06,
"loss": 0.0079,
"step": 17925
},
{
"epoch": 8.215102974828376,
"grad_norm": 1.231646180152893,
"learning_rate": 1.8007653061224491e-06,
"loss": 0.0086,
"step": 17950
},
{
"epoch": 8.22654462242563,
"grad_norm": 0.3211919069290161,
"learning_rate": 1.7943877551020409e-06,
"loss": 0.0093,
"step": 17975
},
{
"epoch": 8.237986270022883,
"grad_norm": 0.0584256611764431,
"learning_rate": 1.7880102040816328e-06,
"loss": 0.0072,
"step": 18000
},
{
"epoch": 8.249427917620137,
"grad_norm": 1.3097387552261353,
"learning_rate": 1.7816326530612246e-06,
"loss": 0.0064,
"step": 18025
},
{
"epoch": 8.26086956521739,
"grad_norm": 2.2777936458587646,
"learning_rate": 1.7752551020408163e-06,
"loss": 0.0062,
"step": 18050
},
{
"epoch": 8.272311212814646,
"grad_norm": 2.830906867980957,
"learning_rate": 1.768877551020408e-06,
"loss": 0.0071,
"step": 18075
},
{
"epoch": 8.2837528604119,
"grad_norm": 1.2469624280929565,
"learning_rate": 1.7624999999999999e-06,
"loss": 0.0086,
"step": 18100
},
{
"epoch": 8.295194508009153,
"grad_norm": 1.5804486274719238,
"learning_rate": 1.756122448979592e-06,
"loss": 0.0052,
"step": 18125
},
{
"epoch": 8.306636155606407,
"grad_norm": 2.0198347568511963,
"learning_rate": 1.7497448979591838e-06,
"loss": 0.009,
"step": 18150
},
{
"epoch": 8.31807780320366,
"grad_norm": 1.086702585220337,
"learning_rate": 1.7433673469387755e-06,
"loss": 0.0034,
"step": 18175
},
{
"epoch": 8.329519450800916,
"grad_norm": 0.40313854813575745,
"learning_rate": 1.7369897959183673e-06,
"loss": 0.011,
"step": 18200
},
{
"epoch": 8.34096109839817,
"grad_norm": 1.1296424865722656,
"learning_rate": 1.7306122448979595e-06,
"loss": 0.0051,
"step": 18225
},
{
"epoch": 8.352402745995423,
"grad_norm": 0.12974560260772705,
"learning_rate": 1.7242346938775512e-06,
"loss": 0.0103,
"step": 18250
},
{
"epoch": 8.363844393592677,
"grad_norm": 6.7116265296936035,
"learning_rate": 1.717857142857143e-06,
"loss": 0.0076,
"step": 18275
},
{
"epoch": 8.37528604118993,
"grad_norm": 3.5247104167938232,
"learning_rate": 1.7114795918367347e-06,
"loss": 0.0096,
"step": 18300
},
{
"epoch": 8.386727688787186,
"grad_norm": 1.0144484043121338,
"learning_rate": 1.7051020408163265e-06,
"loss": 0.0073,
"step": 18325
},
{
"epoch": 8.39816933638444,
"grad_norm": 0.3811889886856079,
"learning_rate": 1.6987244897959185e-06,
"loss": 0.0056,
"step": 18350
},
{
"epoch": 8.409610983981693,
"grad_norm": 0.449440598487854,
"learning_rate": 1.6923469387755102e-06,
"loss": 0.0056,
"step": 18375
},
{
"epoch": 8.421052631578947,
"grad_norm": 1.2429654598236084,
"learning_rate": 1.685969387755102e-06,
"loss": 0.0089,
"step": 18400
},
{
"epoch": 8.4324942791762,
"grad_norm": 0.530453085899353,
"learning_rate": 1.6795918367346937e-06,
"loss": 0.0054,
"step": 18425
},
{
"epoch": 8.443935926773456,
"grad_norm": 0.31273970007896423,
"learning_rate": 1.673214285714286e-06,
"loss": 0.0072,
"step": 18450
},
{
"epoch": 8.45537757437071,
"grad_norm": 5.236429214477539,
"learning_rate": 1.6668367346938777e-06,
"loss": 0.005,
"step": 18475
},
{
"epoch": 8.466819221967963,
"grad_norm": 0.06535414606332779,
"learning_rate": 1.6604591836734694e-06,
"loss": 0.0065,
"step": 18500
},
{
"epoch": 8.478260869565217,
"grad_norm": 1.5802868604660034,
"learning_rate": 1.6540816326530612e-06,
"loss": 0.0063,
"step": 18525
},
{
"epoch": 8.48970251716247,
"grad_norm": 1.6625992059707642,
"learning_rate": 1.647704081632653e-06,
"loss": 0.0106,
"step": 18550
},
{
"epoch": 8.501144164759726,
"grad_norm": 0.21594946086406708,
"learning_rate": 1.6413265306122451e-06,
"loss": 0.0036,
"step": 18575
},
{
"epoch": 8.51258581235698,
"grad_norm": 0.22096717357635498,
"learning_rate": 1.6349489795918369e-06,
"loss": 0.009,
"step": 18600
},
{
"epoch": 8.524027459954233,
"grad_norm": 0.08814064413309097,
"learning_rate": 1.6285714285714286e-06,
"loss": 0.0021,
"step": 18625
},
{
"epoch": 8.535469107551487,
"grad_norm": 0.07081279903650284,
"learning_rate": 1.6221938775510204e-06,
"loss": 0.0077,
"step": 18650
},
{
"epoch": 8.546910755148742,
"grad_norm": 0.4350265562534332,
"learning_rate": 1.6158163265306126e-06,
"loss": 0.0069,
"step": 18675
},
{
"epoch": 8.558352402745996,
"grad_norm": 1.913025975227356,
"learning_rate": 1.6094387755102043e-06,
"loss": 0.0094,
"step": 18700
},
{
"epoch": 8.56979405034325,
"grad_norm": 0.0708305835723877,
"learning_rate": 1.603061224489796e-06,
"loss": 0.0066,
"step": 18725
},
{
"epoch": 8.581235697940503,
"grad_norm": 0.2957993149757385,
"learning_rate": 1.5966836734693878e-06,
"loss": 0.0103,
"step": 18750
},
{
"epoch": 8.592677345537757,
"grad_norm": 0.3358778953552246,
"learning_rate": 1.5903061224489796e-06,
"loss": 0.0064,
"step": 18775
},
{
"epoch": 8.604118993135012,
"grad_norm": 6.7304887771606445,
"learning_rate": 1.5839285714285715e-06,
"loss": 0.0093,
"step": 18800
},
{
"epoch": 8.615560640732266,
"grad_norm": 3.724275588989258,
"learning_rate": 1.5775510204081633e-06,
"loss": 0.0084,
"step": 18825
},
{
"epoch": 8.62700228832952,
"grad_norm": 0.04007994756102562,
"learning_rate": 1.571173469387755e-06,
"loss": 0.0136,
"step": 18850
},
{
"epoch": 8.638443935926773,
"grad_norm": 0.05183762311935425,
"learning_rate": 1.5647959183673468e-06,
"loss": 0.0061,
"step": 18875
},
{
"epoch": 8.649885583524027,
"grad_norm": 3.223602771759033,
"learning_rate": 1.5584183673469388e-06,
"loss": 0.0098,
"step": 18900
},
{
"epoch": 8.661327231121282,
"grad_norm": 3.744605541229248,
"learning_rate": 1.5520408163265307e-06,
"loss": 0.0106,
"step": 18925
},
{
"epoch": 8.672768878718536,
"grad_norm": 0.07725165784358978,
"learning_rate": 1.5456632653061225e-06,
"loss": 0.0076,
"step": 18950
},
{
"epoch": 8.68421052631579,
"grad_norm": 14.24994945526123,
"learning_rate": 1.5392857142857143e-06,
"loss": 0.0086,
"step": 18975
},
{
"epoch": 8.695652173913043,
"grad_norm": 0.2724073529243469,
"learning_rate": 1.5329081632653062e-06,
"loss": 0.0141,
"step": 19000
},
{
"epoch": 8.707093821510298,
"grad_norm": 0.03878064826130867,
"learning_rate": 1.526530612244898e-06,
"loss": 0.0071,
"step": 19025
},
{
"epoch": 8.718535469107552,
"grad_norm": 0.631288468837738,
"learning_rate": 1.5204081632653061e-06,
"loss": 0.011,
"step": 19050
},
{
"epoch": 8.729977116704806,
"grad_norm": 9.200640678405762,
"learning_rate": 1.5140306122448979e-06,
"loss": 0.009,
"step": 19075
},
{
"epoch": 8.74141876430206,
"grad_norm": 0.18505507707595825,
"learning_rate": 1.5076530612244898e-06,
"loss": 0.0084,
"step": 19100
},
{
"epoch": 8.752860411899313,
"grad_norm": 5.33227014541626,
"learning_rate": 1.5012755102040816e-06,
"loss": 0.0057,
"step": 19125
},
{
"epoch": 8.764302059496568,
"grad_norm": 0.31844860315322876,
"learning_rate": 1.4948979591836736e-06,
"loss": 0.0081,
"step": 19150
},
{
"epoch": 8.775743707093822,
"grad_norm": 0.5912889242172241,
"learning_rate": 1.4885204081632653e-06,
"loss": 0.0083,
"step": 19175
},
{
"epoch": 8.787185354691076,
"grad_norm": 0.05793588235974312,
"learning_rate": 1.4821428571428573e-06,
"loss": 0.0061,
"step": 19200
},
{
"epoch": 8.79862700228833,
"grad_norm": 3.5258617401123047,
"learning_rate": 1.475765306122449e-06,
"loss": 0.0091,
"step": 19225
},
{
"epoch": 8.810068649885583,
"grad_norm": 0.5657800436019897,
"learning_rate": 1.4693877551020408e-06,
"loss": 0.0082,
"step": 19250
},
{
"epoch": 8.821510297482838,
"grad_norm": 0.11896498501300812,
"learning_rate": 1.4630102040816328e-06,
"loss": 0.0061,
"step": 19275
},
{
"epoch": 8.832951945080092,
"grad_norm": 2.1698849201202393,
"learning_rate": 1.4566326530612245e-06,
"loss": 0.0071,
"step": 19300
},
{
"epoch": 8.844393592677346,
"grad_norm": 2.6796391010284424,
"learning_rate": 1.4502551020408165e-06,
"loss": 0.0064,
"step": 19325
},
{
"epoch": 8.8558352402746,
"grad_norm": 3.8971478939056396,
"learning_rate": 1.4438775510204082e-06,
"loss": 0.0071,
"step": 19350
},
{
"epoch": 8.867276887871853,
"grad_norm": 3.8938040733337402,
"learning_rate": 1.4375000000000002e-06,
"loss": 0.0034,
"step": 19375
},
{
"epoch": 8.878718535469108,
"grad_norm": 0.1528436839580536,
"learning_rate": 1.431122448979592e-06,
"loss": 0.0083,
"step": 19400
},
{
"epoch": 8.890160183066362,
"grad_norm": 0.024963609874248505,
"learning_rate": 1.4247448979591837e-06,
"loss": 0.0048,
"step": 19425
},
{
"epoch": 8.901601830663616,
"grad_norm": 0.10767273604869843,
"learning_rate": 1.4183673469387757e-06,
"loss": 0.0048,
"step": 19450
},
{
"epoch": 8.91304347826087,
"grad_norm": 0.09147515892982483,
"learning_rate": 1.4119897959183674e-06,
"loss": 0.0054,
"step": 19475
},
{
"epoch": 8.924485125858123,
"grad_norm": 0.6032863855361938,
"learning_rate": 1.4056122448979592e-06,
"loss": 0.0112,
"step": 19500
},
{
"epoch": 8.935926773455378,
"grad_norm": 0.663342297077179,
"learning_rate": 1.399234693877551e-06,
"loss": 0.0089,
"step": 19525
},
{
"epoch": 8.947368421052632,
"grad_norm": 0.2562825381755829,
"learning_rate": 1.392857142857143e-06,
"loss": 0.0052,
"step": 19550
},
{
"epoch": 8.958810068649885,
"grad_norm": 1.2865808010101318,
"learning_rate": 1.3864795918367347e-06,
"loss": 0.0105,
"step": 19575
},
{
"epoch": 8.97025171624714,
"grad_norm": 6.299253940582275,
"learning_rate": 1.3801020408163266e-06,
"loss": 0.0086,
"step": 19600
},
{
"epoch": 8.981693363844393,
"grad_norm": 0.08225379884243011,
"learning_rate": 1.3737244897959184e-06,
"loss": 0.0036,
"step": 19625
},
{
"epoch": 8.993135011441648,
"grad_norm": 0.18679049611091614,
"learning_rate": 1.3673469387755102e-06,
"loss": 0.0039,
"step": 19650
},
{
"epoch": 9.004576659038902,
"grad_norm": 0.39510053396224976,
"learning_rate": 1.3609693877551021e-06,
"loss": 0.0088,
"step": 19675
},
{
"epoch": 9.016018306636155,
"grad_norm": 0.06154235452413559,
"learning_rate": 1.3545918367346939e-06,
"loss": 0.0053,
"step": 19700
},
{
"epoch": 9.027459954233409,
"grad_norm": 0.6689733862876892,
"learning_rate": 1.3482142857142858e-06,
"loss": 0.0163,
"step": 19725
},
{
"epoch": 9.038901601830664,
"grad_norm": 0.4997970759868622,
"learning_rate": 1.3418367346938776e-06,
"loss": 0.0049,
"step": 19750
},
{
"epoch": 9.050343249427918,
"grad_norm": 0.23966868221759796,
"learning_rate": 1.3354591836734696e-06,
"loss": 0.0061,
"step": 19775
},
{
"epoch": 9.061784897025172,
"grad_norm": 3.6731507778167725,
"learning_rate": 1.3290816326530613e-06,
"loss": 0.0085,
"step": 19800
},
{
"epoch": 9.073226544622425,
"grad_norm": 1.2267433404922485,
"learning_rate": 1.3227040816326533e-06,
"loss": 0.0082,
"step": 19825
},
{
"epoch": 9.084668192219679,
"grad_norm": 3.816725492477417,
"learning_rate": 1.316326530612245e-06,
"loss": 0.0031,
"step": 19850
},
{
"epoch": 9.096109839816934,
"grad_norm": 0.5195302963256836,
"learning_rate": 1.3099489795918368e-06,
"loss": 0.0081,
"step": 19875
},
{
"epoch": 9.107551487414188,
"grad_norm": 0.04216615855693817,
"learning_rate": 1.3035714285714286e-06,
"loss": 0.006,
"step": 19900
},
{
"epoch": 9.118993135011442,
"grad_norm": 0.15574586391448975,
"learning_rate": 1.2971938775510203e-06,
"loss": 0.0089,
"step": 19925
},
{
"epoch": 9.130434782608695,
"grad_norm": 0.7792266011238098,
"learning_rate": 1.2908163265306123e-06,
"loss": 0.0054,
"step": 19950
},
{
"epoch": 9.141876430205949,
"grad_norm": 0.07065439969301224,
"learning_rate": 1.284438775510204e-06,
"loss": 0.0075,
"step": 19975
},
{
"epoch": 9.153318077803204,
"grad_norm": 1.0782091617584229,
"learning_rate": 1.278061224489796e-06,
"loss": 0.0044,
"step": 20000
},
{
"epoch": 9.153318077803204,
"eval_loss": 0.20870625972747803,
"eval_runtime": 5198.5381,
"eval_samples_per_second": 1.832,
"eval_steps_per_second": 0.229,
"eval_wer": 0.0960037115249248,
"step": 20000
},
{
"epoch": 9.164759725400458,
"grad_norm": 2.809683322906494,
"learning_rate": 1.2716836734693878e-06,
"loss": 0.0099,
"step": 20025
},
{
"epoch": 9.176201372997712,
"grad_norm": 0.10887058824300766,
"learning_rate": 1.2653061224489797e-06,
"loss": 0.0042,
"step": 20050
},
{
"epoch": 9.187643020594965,
"grad_norm": 1.1026456356048584,
"learning_rate": 1.2589285714285715e-06,
"loss": 0.0086,
"step": 20075
},
{
"epoch": 9.199084668192219,
"grad_norm": 2.4570958614349365,
"learning_rate": 1.2525510204081632e-06,
"loss": 0.0041,
"step": 20100
},
{
"epoch": 9.210526315789474,
"grad_norm": 0.12701238691806793,
"learning_rate": 1.2461734693877552e-06,
"loss": 0.0069,
"step": 20125
},
{
"epoch": 9.221967963386728,
"grad_norm": 0.08014000952243805,
"learning_rate": 1.239795918367347e-06,
"loss": 0.0038,
"step": 20150
},
{
"epoch": 9.233409610983982,
"grad_norm": 0.03979151323437691,
"learning_rate": 1.233418367346939e-06,
"loss": 0.0055,
"step": 20175
},
{
"epoch": 9.244851258581235,
"grad_norm": 0.17478398978710175,
"learning_rate": 1.2270408163265307e-06,
"loss": 0.0042,
"step": 20200
},
{
"epoch": 9.256292906178489,
"grad_norm": 1.452242136001587,
"learning_rate": 1.2206632653061226e-06,
"loss": 0.0062,
"step": 20225
},
{
"epoch": 9.267734553775744,
"grad_norm": 1.9169418811798096,
"learning_rate": 1.2142857142857144e-06,
"loss": 0.005,
"step": 20250
},
{
"epoch": 9.279176201372998,
"grad_norm": 0.07548157870769501,
"learning_rate": 1.2079081632653064e-06,
"loss": 0.0065,
"step": 20275
},
{
"epoch": 9.290617848970252,
"grad_norm": 0.0355396643280983,
"learning_rate": 1.2015306122448981e-06,
"loss": 0.0059,
"step": 20300
},
{
"epoch": 9.302059496567505,
"grad_norm": 0.0616096630692482,
"learning_rate": 1.1951530612244899e-06,
"loss": 0.0067,
"step": 20325
},
{
"epoch": 9.31350114416476,
"grad_norm": 4.230183124542236,
"learning_rate": 1.1887755102040816e-06,
"loss": 0.0054,
"step": 20350
},
{
"epoch": 9.324942791762014,
"grad_norm": 4.540670871734619,
"learning_rate": 1.1823979591836734e-06,
"loss": 0.0102,
"step": 20375
},
{
"epoch": 9.336384439359268,
"grad_norm": 0.9268777370452881,
"learning_rate": 1.1760204081632654e-06,
"loss": 0.0075,
"step": 20400
},
{
"epoch": 9.347826086956522,
"grad_norm": 1.7255213260650635,
"learning_rate": 1.1696428571428571e-06,
"loss": 0.0065,
"step": 20425
},
{
"epoch": 9.359267734553775,
"grad_norm": 2.455357074737549,
"learning_rate": 1.163265306122449e-06,
"loss": 0.0031,
"step": 20450
},
{
"epoch": 9.37070938215103,
"grad_norm": 0.6424604058265686,
"learning_rate": 1.1568877551020408e-06,
"loss": 0.0111,
"step": 20475
},
{
"epoch": 9.382151029748284,
"grad_norm": 5.830212116241455,
"learning_rate": 1.1505102040816326e-06,
"loss": 0.0076,
"step": 20500
},
{
"epoch": 9.393592677345538,
"grad_norm": 0.1393570601940155,
"learning_rate": 1.1441326530612246e-06,
"loss": 0.0067,
"step": 20525
},
{
"epoch": 9.405034324942791,
"grad_norm": 0.03577861189842224,
"learning_rate": 1.1377551020408163e-06,
"loss": 0.003,
"step": 20550
},
{
"epoch": 9.416475972540045,
"grad_norm": 0.26087144017219543,
"learning_rate": 1.1313775510204083e-06,
"loss": 0.0056,
"step": 20575
},
{
"epoch": 9.4279176201373,
"grad_norm": 0.6032670140266418,
"learning_rate": 1.125e-06,
"loss": 0.0038,
"step": 20600
},
{
"epoch": 9.439359267734554,
"grad_norm": 0.40521496534347534,
"learning_rate": 1.118622448979592e-06,
"loss": 0.0042,
"step": 20625
},
{
"epoch": 9.450800915331808,
"grad_norm": 0.04099424555897713,
"learning_rate": 1.1122448979591838e-06,
"loss": 0.0049,
"step": 20650
},
{
"epoch": 9.462242562929061,
"grad_norm": 0.08814136683940887,
"learning_rate": 1.1058673469387757e-06,
"loss": 0.0113,
"step": 20675
},
{
"epoch": 9.473684210526315,
"grad_norm": 0.07661563158035278,
"learning_rate": 1.0994897959183675e-06,
"loss": 0.0043,
"step": 20700
},
{
"epoch": 9.48512585812357,
"grad_norm": 0.3395727872848511,
"learning_rate": 1.0931122448979592e-06,
"loss": 0.0107,
"step": 20725
},
{
"epoch": 9.496567505720824,
"grad_norm": 0.7869210243225098,
"learning_rate": 1.0867346938775512e-06,
"loss": 0.0051,
"step": 20750
},
{
"epoch": 9.508009153318078,
"grad_norm": 0.7539875507354736,
"learning_rate": 1.080357142857143e-06,
"loss": 0.0056,
"step": 20775
},
{
"epoch": 9.519450800915331,
"grad_norm": 16.64691162109375,
"learning_rate": 1.0739795918367347e-06,
"loss": 0.0066,
"step": 20800
},
{
"epoch": 9.530892448512585,
"grad_norm": 0.7613354325294495,
"learning_rate": 1.0676020408163265e-06,
"loss": 0.0139,
"step": 20825
},
{
"epoch": 9.54233409610984,
"grad_norm": 3.5544533729553223,
"learning_rate": 1.0612244897959184e-06,
"loss": 0.0058,
"step": 20850
},
{
"epoch": 9.553775743707094,
"grad_norm": 0.14786852896213531,
"learning_rate": 1.0548469387755102e-06,
"loss": 0.0072,
"step": 20875
},
{
"epoch": 9.565217391304348,
"grad_norm": 0.15534640848636627,
"learning_rate": 1.0484693877551021e-06,
"loss": 0.0045,
"step": 20900
},
{
"epoch": 9.576659038901601,
"grad_norm": 0.23513126373291016,
"learning_rate": 1.042091836734694e-06,
"loss": 0.0042,
"step": 20925
},
{
"epoch": 9.588100686498855,
"grad_norm": 0.39259815216064453,
"learning_rate": 1.0357142857142857e-06,
"loss": 0.0052,
"step": 20950
},
{
"epoch": 9.59954233409611,
"grad_norm": 3.45914626121521,
"learning_rate": 1.0293367346938776e-06,
"loss": 0.0083,
"step": 20975
},
{
"epoch": 9.610983981693364,
"grad_norm": 0.8460655212402344,
"learning_rate": 1.0229591836734694e-06,
"loss": 0.006,
"step": 21000
},
{
"epoch": 9.622425629290618,
"grad_norm": 0.3035661280155182,
"learning_rate": 1.0165816326530613e-06,
"loss": 0.0066,
"step": 21025
},
{
"epoch": 9.633867276887871,
"grad_norm": 0.07911881059408188,
"learning_rate": 1.010204081632653e-06,
"loss": 0.0044,
"step": 21050
},
{
"epoch": 9.645308924485127,
"grad_norm": 1.1485878229141235,
"learning_rate": 1.003826530612245e-06,
"loss": 0.0061,
"step": 21075
},
{
"epoch": 9.65675057208238,
"grad_norm": 3.1659584045410156,
"learning_rate": 9.974489795918368e-07,
"loss": 0.0062,
"step": 21100
},
{
"epoch": 9.668192219679634,
"grad_norm": 0.08537878096103668,
"learning_rate": 9.910714285714286e-07,
"loss": 0.0065,
"step": 21125
},
{
"epoch": 9.679633867276888,
"grad_norm": 0.27491432428359985,
"learning_rate": 9.846938775510205e-07,
"loss": 0.0054,
"step": 21150
},
{
"epoch": 9.691075514874141,
"grad_norm": 1.1356456279754639,
"learning_rate": 9.783163265306123e-07,
"loss": 0.0106,
"step": 21175
},
{
"epoch": 9.702517162471397,
"grad_norm": 0.0873272642493248,
"learning_rate": 9.71938775510204e-07,
"loss": 0.0028,
"step": 21200
},
{
"epoch": 9.71395881006865,
"grad_norm": 0.33347004652023315,
"learning_rate": 9.655612244897958e-07,
"loss": 0.0134,
"step": 21225
},
{
"epoch": 9.725400457665904,
"grad_norm": 0.14462488889694214,
"learning_rate": 9.591836734693878e-07,
"loss": 0.0046,
"step": 21250
},
{
"epoch": 9.736842105263158,
"grad_norm": 1.358343482017517,
"learning_rate": 9.528061224489796e-07,
"loss": 0.0103,
"step": 21275
},
{
"epoch": 9.748283752860411,
"grad_norm": 0.039507463574409485,
"learning_rate": 9.464285714285715e-07,
"loss": 0.0041,
"step": 21300
},
{
"epoch": 9.759725400457667,
"grad_norm": 0.7528636455535889,
"learning_rate": 9.400510204081633e-07,
"loss": 0.0028,
"step": 21325
},
{
"epoch": 9.77116704805492,
"grad_norm": 1.8518726825714111,
"learning_rate": 9.33673469387755e-07,
"loss": 0.0054,
"step": 21350
},
{
"epoch": 9.782608695652174,
"grad_norm": 0.5261640548706055,
"learning_rate": 9.27295918367347e-07,
"loss": 0.008,
"step": 21375
},
{
"epoch": 9.794050343249427,
"grad_norm": 0.48610520362854004,
"learning_rate": 9.209183673469387e-07,
"loss": 0.0045,
"step": 21400
},
{
"epoch": 9.805491990846681,
"grad_norm": 0.35371649265289307,
"learning_rate": 9.145408163265307e-07,
"loss": 0.0067,
"step": 21425
},
{
"epoch": 9.816933638443937,
"grad_norm": 4.049976825714111,
"learning_rate": 9.081632653061225e-07,
"loss": 0.0065,
"step": 21450
},
{
"epoch": 9.82837528604119,
"grad_norm": 4.127640724182129,
"learning_rate": 9.017857142857144e-07,
"loss": 0.009,
"step": 21475
},
{
"epoch": 9.839816933638444,
"grad_norm": 0.15992511808872223,
"learning_rate": 8.954081632653062e-07,
"loss": 0.0047,
"step": 21500
},
{
"epoch": 9.851258581235697,
"grad_norm": 0.5393081307411194,
"learning_rate": 8.89030612244898e-07,
"loss": 0.0054,
"step": 21525
},
{
"epoch": 9.862700228832953,
"grad_norm": 4.514695167541504,
"learning_rate": 8.826530612244898e-07,
"loss": 0.0054,
"step": 21550
},
{
"epoch": 9.874141876430206,
"grad_norm": 1.2299213409423828,
"learning_rate": 8.762755102040816e-07,
"loss": 0.0042,
"step": 21575
},
{
"epoch": 9.88558352402746,
"grad_norm": 0.12707316875457764,
"learning_rate": 8.698979591836735e-07,
"loss": 0.0062,
"step": 21600
},
{
"epoch": 9.897025171624714,
"grad_norm": 0.8991334438323975,
"learning_rate": 8.635204081632653e-07,
"loss": 0.0078,
"step": 21625
},
{
"epoch": 9.908466819221967,
"grad_norm": 0.9138942360877991,
"learning_rate": 8.571428571428572e-07,
"loss": 0.0034,
"step": 21650
},
{
"epoch": 9.919908466819223,
"grad_norm": 0.07458051294088364,
"learning_rate": 8.50765306122449e-07,
"loss": 0.0061,
"step": 21675
},
{
"epoch": 9.931350114416476,
"grad_norm": 0.15481892228126526,
"learning_rate": 8.44387755102041e-07,
"loss": 0.0062,
"step": 21700
},
{
"epoch": 9.94279176201373,
"grad_norm": 0.4163806736469269,
"learning_rate": 8.380102040816327e-07,
"loss": 0.0037,
"step": 21725
},
{
"epoch": 9.954233409610984,
"grad_norm": 0.1567489504814148,
"learning_rate": 8.316326530612246e-07,
"loss": 0.0041,
"step": 21750
},
{
"epoch": 9.965675057208237,
"grad_norm": 2.551042079925537,
"learning_rate": 8.252551020408163e-07,
"loss": 0.0093,
"step": 21775
},
{
"epoch": 9.977116704805493,
"grad_norm": 0.11054814606904984,
"learning_rate": 8.188775510204081e-07,
"loss": 0.0058,
"step": 21800
},
{
"epoch": 9.988558352402746,
"grad_norm": 0.0360272191464901,
"learning_rate": 8.125000000000001e-07,
"loss": 0.0072,
"step": 21825
},
{
"epoch": 10.0,
"grad_norm": 5.81298303604126,
"learning_rate": 8.061224489795918e-07,
"loss": 0.0068,
"step": 21850
},
{
"epoch": 10.011441647597254,
"grad_norm": 0.0239973533898592,
"learning_rate": 7.997448979591838e-07,
"loss": 0.0039,
"step": 21875
},
{
"epoch": 10.022883295194507,
"grad_norm": 0.14009559154510498,
"learning_rate": 7.933673469387755e-07,
"loss": 0.0083,
"step": 21900
},
{
"epoch": 10.034324942791763,
"grad_norm": 0.05973900109529495,
"learning_rate": 7.869897959183675e-07,
"loss": 0.0067,
"step": 21925
},
{
"epoch": 10.045766590389016,
"grad_norm": 2.9854466915130615,
"learning_rate": 7.806122448979593e-07,
"loss": 0.005,
"step": 21950
},
{
"epoch": 10.05720823798627,
"grad_norm": 2.130392074584961,
"learning_rate": 7.74234693877551e-07,
"loss": 0.0026,
"step": 21975
},
{
"epoch": 10.068649885583524,
"grad_norm": 13.454663276672363,
"learning_rate": 7.678571428571429e-07,
"loss": 0.0094,
"step": 22000
},
{
"epoch": 10.080091533180777,
"grad_norm": 2.081770420074463,
"learning_rate": 7.614795918367347e-07,
"loss": 0.0069,
"step": 22025
},
{
"epoch": 10.091533180778033,
"grad_norm": 1.7810285091400146,
"learning_rate": 7.551020408163266e-07,
"loss": 0.0113,
"step": 22050
},
{
"epoch": 10.102974828375286,
"grad_norm": 3.7953577041625977,
"learning_rate": 7.487244897959183e-07,
"loss": 0.005,
"step": 22075
},
{
"epoch": 10.11441647597254,
"grad_norm": 7.428182601928711,
"learning_rate": 7.423469387755102e-07,
"loss": 0.0077,
"step": 22100
},
{
"epoch": 10.125858123569794,
"grad_norm": 4.6249165534973145,
"learning_rate": 7.359693877551021e-07,
"loss": 0.0032,
"step": 22125
},
{
"epoch": 10.137299771167047,
"grad_norm": 7.158110618591309,
"learning_rate": 7.295918367346939e-07,
"loss": 0.0071,
"step": 22150
},
{
"epoch": 10.148741418764303,
"grad_norm": 0.05203917250037193,
"learning_rate": 7.232142857142858e-07,
"loss": 0.0025,
"step": 22175
},
{
"epoch": 10.160183066361556,
"grad_norm": 0.4155661165714264,
"learning_rate": 7.168367346938775e-07,
"loss": 0.0057,
"step": 22200
},
{
"epoch": 10.17162471395881,
"grad_norm": 0.3524133563041687,
"learning_rate": 7.104591836734694e-07,
"loss": 0.0042,
"step": 22225
},
{
"epoch": 10.183066361556063,
"grad_norm": 7.646109580993652,
"learning_rate": 7.040816326530613e-07,
"loss": 0.0074,
"step": 22250
},
{
"epoch": 10.194508009153319,
"grad_norm": 1.1890356540679932,
"learning_rate": 6.977040816326531e-07,
"loss": 0.0024,
"step": 22275
},
{
"epoch": 10.205949656750573,
"grad_norm": 2.9643213748931885,
"learning_rate": 6.913265306122449e-07,
"loss": 0.0113,
"step": 22300
},
{
"epoch": 10.217391304347826,
"grad_norm": 0.13656121492385864,
"learning_rate": 6.849489795918367e-07,
"loss": 0.0027,
"step": 22325
},
{
"epoch": 10.22883295194508,
"grad_norm": 3.043562173843384,
"learning_rate": 6.785714285714286e-07,
"loss": 0.015,
"step": 22350
},
{
"epoch": 10.240274599542333,
"grad_norm": 0.23451216518878937,
"learning_rate": 6.721938775510205e-07,
"loss": 0.004,
"step": 22375
},
{
"epoch": 10.251716247139589,
"grad_norm": 6.707335948944092,
"learning_rate": 6.658163265306123e-07,
"loss": 0.0029,
"step": 22400
},
{
"epoch": 10.263157894736842,
"grad_norm": 0.055560849606990814,
"learning_rate": 6.594387755102041e-07,
"loss": 0.0088,
"step": 22425
},
{
"epoch": 10.274599542334096,
"grad_norm": 8.039435386657715,
"learning_rate": 6.53061224489796e-07,
"loss": 0.0068,
"step": 22450
},
{
"epoch": 10.28604118993135,
"grad_norm": 0.11787508428096771,
"learning_rate": 6.466836734693878e-07,
"loss": 0.0035,
"step": 22475
},
{
"epoch": 10.297482837528603,
"grad_norm": 1.5206620693206787,
"learning_rate": 6.403061224489796e-07,
"loss": 0.0094,
"step": 22500
},
{
"epoch": 10.308924485125859,
"grad_norm": 0.029644370079040527,
"learning_rate": 6.339285714285714e-07,
"loss": 0.0054,
"step": 22525
},
{
"epoch": 10.320366132723112,
"grad_norm": 0.48167529702186584,
"learning_rate": 6.275510204081633e-07,
"loss": 0.0117,
"step": 22550
},
{
"epoch": 10.331807780320366,
"grad_norm": 0.14305919408798218,
"learning_rate": 6.211734693877551e-07,
"loss": 0.0048,
"step": 22575
},
{
"epoch": 10.34324942791762,
"grad_norm": 1.8980730772018433,
"learning_rate": 6.14795918367347e-07,
"loss": 0.0026,
"step": 22600
},
{
"epoch": 10.354691075514873,
"grad_norm": 1.3038541078567505,
"learning_rate": 6.084183673469388e-07,
"loss": 0.0039,
"step": 22625
},
{
"epoch": 10.366132723112129,
"grad_norm": 5.523509502410889,
"learning_rate": 6.020408163265306e-07,
"loss": 0.0066,
"step": 22650
},
{
"epoch": 10.377574370709382,
"grad_norm": 0.04104507714509964,
"learning_rate": 5.956632653061225e-07,
"loss": 0.0042,
"step": 22675
},
{
"epoch": 10.389016018306636,
"grad_norm": 1.2055227756500244,
"learning_rate": 5.892857142857143e-07,
"loss": 0.0038,
"step": 22700
},
{
"epoch": 10.40045766590389,
"grad_norm": 0.02921094372868538,
"learning_rate": 5.829081632653061e-07,
"loss": 0.0036,
"step": 22725
},
{
"epoch": 10.411899313501145,
"grad_norm": 16.179323196411133,
"learning_rate": 5.76530612244898e-07,
"loss": 0.0049,
"step": 22750
},
{
"epoch": 10.423340961098399,
"grad_norm": 0.6530442833900452,
"learning_rate": 5.701530612244898e-07,
"loss": 0.0022,
"step": 22775
},
{
"epoch": 10.434782608695652,
"grad_norm": 8.9596529006958,
"learning_rate": 5.637755102040817e-07,
"loss": 0.0077,
"step": 22800
},
{
"epoch": 10.446224256292906,
"grad_norm": 0.03742032125592232,
"learning_rate": 5.573979591836735e-07,
"loss": 0.004,
"step": 22825
},
{
"epoch": 10.45766590389016,
"grad_norm": 4.956221103668213,
"learning_rate": 5.510204081632653e-07,
"loss": 0.0062,
"step": 22850
},
{
"epoch": 10.469107551487415,
"grad_norm": 0.04372161999344826,
"learning_rate": 5.446428571428572e-07,
"loss": 0.0029,
"step": 22875
},
{
"epoch": 10.480549199084669,
"grad_norm": 2.4792611598968506,
"learning_rate": 5.38265306122449e-07,
"loss": 0.0047,
"step": 22900
},
{
"epoch": 10.491990846681922,
"grad_norm": 1.2810579538345337,
"learning_rate": 5.318877551020408e-07,
"loss": 0.0054,
"step": 22925
},
{
"epoch": 10.503432494279176,
"grad_norm": 16.370330810546875,
"learning_rate": 5.255102040816326e-07,
"loss": 0.0114,
"step": 22950
},
{
"epoch": 10.51487414187643,
"grad_norm": 0.6860277652740479,
"learning_rate": 5.191326530612245e-07,
"loss": 0.004,
"step": 22975
},
{
"epoch": 10.526315789473685,
"grad_norm": 1.6368955373764038,
"learning_rate": 5.127551020408164e-07,
"loss": 0.0074,
"step": 23000
},
{
"epoch": 10.537757437070939,
"grad_norm": 1.6935875415802002,
"learning_rate": 5.063775510204082e-07,
"loss": 0.0043,
"step": 23025
},
{
"epoch": 10.549199084668192,
"grad_norm": 0.27796322107315063,
"learning_rate": 5.002551020408164e-07,
"loss": 0.0075,
"step": 23050
},
{
"epoch": 10.560640732265446,
"grad_norm": 0.05163831263780594,
"learning_rate": 4.938775510204081e-07,
"loss": 0.0033,
"step": 23075
},
{
"epoch": 10.5720823798627,
"grad_norm": 3.6930410861968994,
"learning_rate": 4.875e-07,
"loss": 0.0067,
"step": 23100
},
{
"epoch": 10.583524027459955,
"grad_norm": 2.0602188110351562,
"learning_rate": 4.811224489795919e-07,
"loss": 0.0046,
"step": 23125
},
{
"epoch": 10.594965675057209,
"grad_norm": 6.743757724761963,
"learning_rate": 4.747448979591837e-07,
"loss": 0.0119,
"step": 23150
},
{
"epoch": 10.606407322654462,
"grad_norm": 0.08728916943073273,
"learning_rate": 4.683673469387756e-07,
"loss": 0.0023,
"step": 23175
},
{
"epoch": 10.617848970251716,
"grad_norm": 0.6951027512550354,
"learning_rate": 4.6198979591836744e-07,
"loss": 0.0067,
"step": 23200
},
{
"epoch": 10.62929061784897,
"grad_norm": 0.9762528538703918,
"learning_rate": 4.556122448979592e-07,
"loss": 0.0033,
"step": 23225
},
{
"epoch": 10.640732265446225,
"grad_norm": 4.385350704193115,
"learning_rate": 4.49234693877551e-07,
"loss": 0.006,
"step": 23250
},
{
"epoch": 10.652173913043478,
"grad_norm": 0.12361292541027069,
"learning_rate": 4.4285714285714286e-07,
"loss": 0.002,
"step": 23275
},
{
"epoch": 10.663615560640732,
"grad_norm": 4.548694610595703,
"learning_rate": 4.364795918367347e-07,
"loss": 0.0063,
"step": 23300
},
{
"epoch": 10.675057208237986,
"grad_norm": 3.6494503021240234,
"learning_rate": 4.301020408163266e-07,
"loss": 0.005,
"step": 23325
},
{
"epoch": 10.68649885583524,
"grad_norm": 5.889870643615723,
"learning_rate": 4.237244897959184e-07,
"loss": 0.0057,
"step": 23350
},
{
"epoch": 10.697940503432495,
"grad_norm": 3.1003196239471436,
"learning_rate": 4.1734693877551025e-07,
"loss": 0.0073,
"step": 23375
},
{
"epoch": 10.709382151029748,
"grad_norm": 3.9601292610168457,
"learning_rate": 4.109693877551021e-07,
"loss": 0.0089,
"step": 23400
},
{
"epoch": 10.720823798627002,
"grad_norm": 0.19110934436321259,
"learning_rate": 4.0459183673469387e-07,
"loss": 0.0035,
"step": 23425
},
{
"epoch": 10.732265446224256,
"grad_norm": 6.233257293701172,
"learning_rate": 3.9821428571428573e-07,
"loss": 0.0094,
"step": 23450
},
{
"epoch": 10.743707093821511,
"grad_norm": 0.034573789685964584,
"learning_rate": 3.9183673469387754e-07,
"loss": 0.0049,
"step": 23475
},
{
"epoch": 10.755148741418765,
"grad_norm": 4.074402332305908,
"learning_rate": 3.854591836734694e-07,
"loss": 0.0098,
"step": 23500
},
{
"epoch": 10.766590389016018,
"grad_norm": 0.19702082872390747,
"learning_rate": 3.7908163265306126e-07,
"loss": 0.0034,
"step": 23525
},
{
"epoch": 10.778032036613272,
"grad_norm": 6.4033522605896,
"learning_rate": 3.7270408163265307e-07,
"loss": 0.0061,
"step": 23550
},
{
"epoch": 10.789473684210526,
"grad_norm": 0.6741847395896912,
"learning_rate": 3.6632653061224493e-07,
"loss": 0.0042,
"step": 23575
},
{
"epoch": 10.800915331807781,
"grad_norm": 0.5898193717002869,
"learning_rate": 3.5994897959183674e-07,
"loss": 0.0067,
"step": 23600
},
{
"epoch": 10.812356979405035,
"grad_norm": 0.05853106081485748,
"learning_rate": 3.535714285714286e-07,
"loss": 0.0068,
"step": 23625
},
{
"epoch": 10.823798627002288,
"grad_norm": 0.1718439757823944,
"learning_rate": 3.4719387755102046e-07,
"loss": 0.0067,
"step": 23650
},
{
"epoch": 10.835240274599542,
"grad_norm": 0.39581310749053955,
"learning_rate": 3.4081632653061227e-07,
"loss": 0.0035,
"step": 23675
},
{
"epoch": 10.846681922196796,
"grad_norm": 3.5774476528167725,
"learning_rate": 3.344387755102041e-07,
"loss": 0.0081,
"step": 23700
},
{
"epoch": 10.858123569794051,
"grad_norm": 0.2080162763595581,
"learning_rate": 3.2806122448979594e-07,
"loss": 0.0038,
"step": 23725
},
{
"epoch": 10.869565217391305,
"grad_norm": 0.9511691331863403,
"learning_rate": 3.216836734693878e-07,
"loss": 0.0075,
"step": 23750
},
{
"epoch": 10.881006864988558,
"grad_norm": 2.3875999450683594,
"learning_rate": 3.153061224489796e-07,
"loss": 0.0054,
"step": 23775
},
{
"epoch": 10.892448512585812,
"grad_norm": 0.2756766080856323,
"learning_rate": 3.089285714285714e-07,
"loss": 0.0081,
"step": 23800
},
{
"epoch": 10.903890160183066,
"grad_norm": 0.8285855054855347,
"learning_rate": 3.025510204081633e-07,
"loss": 0.0039,
"step": 23825
},
{
"epoch": 10.915331807780321,
"grad_norm": 7.757495403289795,
"learning_rate": 2.9617346938775513e-07,
"loss": 0.0048,
"step": 23850
},
{
"epoch": 10.926773455377575,
"grad_norm": 3.3156180381774902,
"learning_rate": 2.8979591836734694e-07,
"loss": 0.0044,
"step": 23875
},
{
"epoch": 10.938215102974828,
"grad_norm": 1.0153356790542603,
"learning_rate": 2.8341836734693875e-07,
"loss": 0.0043,
"step": 23900
},
{
"epoch": 10.949656750572082,
"grad_norm": 14.048026084899902,
"learning_rate": 2.770408163265306e-07,
"loss": 0.0062,
"step": 23925
},
{
"epoch": 10.961098398169337,
"grad_norm": 3.859445810317993,
"learning_rate": 2.7066326530612247e-07,
"loss": 0.0067,
"step": 23950
},
{
"epoch": 10.972540045766591,
"grad_norm": 2.134991407394409,
"learning_rate": 2.6428571428571433e-07,
"loss": 0.0033,
"step": 23975
},
{
"epoch": 10.983981693363845,
"grad_norm": 9.151762008666992,
"learning_rate": 2.579081632653061e-07,
"loss": 0.0121,
"step": 24000
},
{
"epoch": 10.995423340961098,
"grad_norm": 0.3495076298713684,
"learning_rate": 2.5153061224489795e-07,
"loss": 0.0048,
"step": 24025
},
{
"epoch": 11.006864988558352,
"grad_norm": 0.10012809187173843,
"learning_rate": 2.451530612244898e-07,
"loss": 0.0055,
"step": 24050
},
{
"epoch": 11.018306636155607,
"grad_norm": 0.06472992897033691,
"learning_rate": 2.3877551020408167e-07,
"loss": 0.0019,
"step": 24075
},
{
"epoch": 11.02974828375286,
"grad_norm": 0.16011592745780945,
"learning_rate": 2.323979591836735e-07,
"loss": 0.0075,
"step": 24100
},
{
"epoch": 11.041189931350115,
"grad_norm": 2.186831474304199,
"learning_rate": 2.2602040816326532e-07,
"loss": 0.0037,
"step": 24125
},
{
"epoch": 11.052631578947368,
"grad_norm": 0.32710880041122437,
"learning_rate": 2.1964285714285715e-07,
"loss": 0.0073,
"step": 24150
},
{
"epoch": 11.064073226544622,
"grad_norm": 0.054156772792339325,
"learning_rate": 2.1326530612244898e-07,
"loss": 0.0052,
"step": 24175
},
{
"epoch": 11.075514874141877,
"grad_norm": 0.07884764671325684,
"learning_rate": 2.0688775510204085e-07,
"loss": 0.003,
"step": 24200
},
{
"epoch": 11.08695652173913,
"grad_norm": 0.04855496436357498,
"learning_rate": 2.0051020408163265e-07,
"loss": 0.0039,
"step": 24225
},
{
"epoch": 11.098398169336384,
"grad_norm": 3.0451197624206543,
"learning_rate": 1.941326530612245e-07,
"loss": 0.0037,
"step": 24250
},
{
"epoch": 11.109839816933638,
"grad_norm": 2.733774185180664,
"learning_rate": 1.8775510204081635e-07,
"loss": 0.0049,
"step": 24275
},
{
"epoch": 11.121281464530892,
"grad_norm": 0.01790427230298519,
"learning_rate": 1.816326530612245e-07,
"loss": 0.0082,
"step": 24300
},
{
"epoch": 11.132723112128147,
"grad_norm": 24.361324310302734,
"learning_rate": 1.7525510204081634e-07,
"loss": 0.0067,
"step": 24325
},
{
"epoch": 11.1441647597254,
"grad_norm": 0.0411471389234066,
"learning_rate": 1.6887755102040817e-07,
"loss": 0.0056,
"step": 24350
},
{
"epoch": 11.155606407322654,
"grad_norm": 0.5140661597251892,
"learning_rate": 1.625e-07,
"loss": 0.0036,
"step": 24375
},
{
"epoch": 11.167048054919908,
"grad_norm": 0.36049628257751465,
"learning_rate": 1.5612244897959184e-07,
"loss": 0.008,
"step": 24400
},
{
"epoch": 11.178489702517162,
"grad_norm": 0.6188111305236816,
"learning_rate": 1.4974489795918368e-07,
"loss": 0.0021,
"step": 24425
},
{
"epoch": 11.189931350114417,
"grad_norm": 0.12496360391378403,
"learning_rate": 1.433673469387755e-07,
"loss": 0.0052,
"step": 24450
},
{
"epoch": 11.20137299771167,
"grad_norm": 2.986814498901367,
"learning_rate": 1.3698979591836734e-07,
"loss": 0.0062,
"step": 24475
},
{
"epoch": 11.212814645308924,
"grad_norm": 1.2586897611618042,
"learning_rate": 1.3061224489795918e-07,
"loss": 0.0067,
"step": 24500
},
{
"epoch": 11.224256292906178,
"grad_norm": 2.0878169536590576,
"learning_rate": 1.2423469387755104e-07,
"loss": 0.0052,
"step": 24525
},
{
"epoch": 11.235697940503432,
"grad_norm": 0.07096085697412491,
"learning_rate": 1.1785714285714286e-07,
"loss": 0.0073,
"step": 24550
},
{
"epoch": 11.247139588100687,
"grad_norm": 0.042739126831293106,
"learning_rate": 1.1147959183673471e-07,
"loss": 0.0043,
"step": 24575
},
{
"epoch": 11.25858123569794,
"grad_norm": 1.214879035949707,
"learning_rate": 1.0510204081632653e-07,
"loss": 0.0076,
"step": 24600
},
{
"epoch": 11.270022883295194,
"grad_norm": 0.14526697993278503,
"learning_rate": 9.872448979591838e-08,
"loss": 0.0041,
"step": 24625
},
{
"epoch": 11.281464530892448,
"grad_norm": 2.3068532943725586,
"learning_rate": 9.23469387755102e-08,
"loss": 0.011,
"step": 24650
},
{
"epoch": 11.292906178489703,
"grad_norm": 0.07109973579645157,
"learning_rate": 8.596938775510205e-08,
"loss": 0.002,
"step": 24675
},
{
"epoch": 11.304347826086957,
"grad_norm": 0.05076288804411888,
"learning_rate": 7.959183673469388e-08,
"loss": 0.0091,
"step": 24700
},
{
"epoch": 11.31578947368421,
"grad_norm": 0.026290345937013626,
"learning_rate": 7.321428571428572e-08,
"loss": 0.0057,
"step": 24725
},
{
"epoch": 11.327231121281464,
"grad_norm": 0.04591425135731697,
"learning_rate": 6.683673469387755e-08,
"loss": 0.0046,
"step": 24750
},
{
"epoch": 11.338672768878718,
"grad_norm": 0.057588379830121994,
"learning_rate": 6.04591836734694e-08,
"loss": 0.0051,
"step": 24775
},
{
"epoch": 11.350114416475973,
"grad_norm": 0.19686830043792725,
"learning_rate": 5.4081632653061234e-08,
"loss": 0.0064,
"step": 24800
},
{
"epoch": 11.361556064073227,
"grad_norm": 0.039152998477220535,
"learning_rate": 4.770408163265306e-08,
"loss": 0.004,
"step": 24825
},
{
"epoch": 11.37299771167048,
"grad_norm": 0.1060929223895073,
"learning_rate": 4.1326530612244896e-08,
"loss": 0.0053,
"step": 24850
},
{
"epoch": 11.384439359267734,
"grad_norm": 0.057997722178697586,
"learning_rate": 3.494897959183673e-08,
"loss": 0.0036,
"step": 24875
},
{
"epoch": 11.395881006864988,
"grad_norm": 0.3169647753238678,
"learning_rate": 2.8571428571428575e-08,
"loss": 0.0059,
"step": 24900
},
{
"epoch": 11.407322654462243,
"grad_norm": 0.020627867430448532,
"learning_rate": 2.2193877551020407e-08,
"loss": 0.0043,
"step": 24925
},
{
"epoch": 11.418764302059497,
"grad_norm": 0.04437786713242531,
"learning_rate": 1.5816326530612248e-08,
"loss": 0.0075,
"step": 24950
},
{
"epoch": 11.43020594965675,
"grad_norm": 0.03256097808480263,
"learning_rate": 9.438775510204082e-09,
"loss": 0.0022,
"step": 24975
},
{
"epoch": 11.441647597254004,
"grad_norm": 3.5547521114349365,
"learning_rate": 3.0612244897959183e-09,
"loss": 0.0055,
"step": 25000
},
{
"epoch": 11.441647597254004,
"eval_loss": 0.21081247925758362,
"eval_runtime": 5131.1271,
"eval_samples_per_second": 1.856,
"eval_steps_per_second": 0.232,
"eval_wer": 0.09488385486657708,
"step": 25000
}
],
"logging_steps": 25,
"max_steps": 25000,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.081970563920691e+20,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}