{ "best_metric": 0.10248288219107954, "best_model_checkpoint": "d:\\\\whisper-medium-pt-cv19-fleurs2-lr-wu\\checkpoint-5000", "epoch": 11.441647597254004, "eval_steps": 5000, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011441647597254004, "grad_norm": 16.260902404785156, "learning_rate": 2.875e-07, "loss": 0.7432, "step": 25 }, { "epoch": 0.02288329519450801, "grad_norm": 28.320634841918945, "learning_rate": 6.000000000000001e-07, "loss": 1.1561, "step": 50 }, { "epoch": 0.034324942791762014, "grad_norm": 9.76030158996582, "learning_rate": 9.124999999999999e-07, "loss": 0.479, "step": 75 }, { "epoch": 0.04576659038901602, "grad_norm": 17.32778549194336, "learning_rate": 1.2250000000000001e-06, "loss": 0.3224, "step": 100 }, { "epoch": 0.057208237986270026, "grad_norm": 7.594138145446777, "learning_rate": 1.5375e-06, "loss": 0.2161, "step": 125 }, { "epoch": 0.06864988558352403, "grad_norm": 18.392087936401367, "learning_rate": 1.85e-06, "loss": 0.262, "step": 150 }, { "epoch": 0.08009153318077804, "grad_norm": 10.459943771362305, "learning_rate": 2.1625e-06, "loss": 0.1765, "step": 175 }, { "epoch": 0.09153318077803203, "grad_norm": 17.077810287475586, "learning_rate": 2.4750000000000004e-06, "loss": 0.2262, "step": 200 }, { "epoch": 0.10297482837528604, "grad_norm": 6.674950122833252, "learning_rate": 2.7875e-06, "loss": 0.1917, "step": 225 }, { "epoch": 0.11441647597254005, "grad_norm": 10.16779613494873, "learning_rate": 3.1e-06, "loss": 0.2162, "step": 250 }, { "epoch": 0.12585812356979406, "grad_norm": 6.614413738250732, "learning_rate": 3.4125000000000004e-06, "loss": 0.2, "step": 275 }, { "epoch": 0.13729977116704806, "grad_norm": 13.418252944946289, "learning_rate": 3.725e-06, "loss": 0.2007, "step": 300 }, { "epoch": 0.14874141876430205, "grad_norm": 6.218944549560547, "learning_rate": 4.037500000000001e-06, "loss": 0.1574, "step": 325 }, { "epoch": 0.16018306636155608, "grad_norm": 15.351899147033691, "learning_rate": 4.35e-06, "loss": 0.1894, "step": 350 }, { "epoch": 0.17162471395881007, "grad_norm": 7.518232822418213, "learning_rate": 4.6625e-06, "loss": 0.1861, "step": 375 }, { "epoch": 0.18306636155606407, "grad_norm": 16.16284942626953, "learning_rate": 4.975000000000001e-06, "loss": 0.2085, "step": 400 }, { "epoch": 0.1945080091533181, "grad_norm": 8.712101936340332, "learning_rate": 5.2875e-06, "loss": 0.1664, "step": 425 }, { "epoch": 0.20594965675057209, "grad_norm": 15.569042205810547, "learning_rate": 5.600000000000001e-06, "loss": 0.2054, "step": 450 }, { "epoch": 0.21739130434782608, "grad_norm": 7.28713846206665, "learning_rate": 5.9125e-06, "loss": 0.1822, "step": 475 }, { "epoch": 0.2288329519450801, "grad_norm": 14.67645263671875, "learning_rate": 6.2250000000000005e-06, "loss": 0.2018, "step": 500 }, { "epoch": 0.2402745995423341, "grad_norm": 9.035391807556152, "learning_rate": 6.244132653061225e-06, "loss": 0.175, "step": 525 }, { "epoch": 0.2517162471395881, "grad_norm": 13.814667701721191, "learning_rate": 6.237755102040817e-06, "loss": 0.2023, "step": 550 }, { "epoch": 0.2631578947368421, "grad_norm": 5.595588684082031, "learning_rate": 6.231377551020408e-06, "loss": 0.1745, "step": 575 }, { "epoch": 0.2745995423340961, "grad_norm": 14.676183700561523, "learning_rate": 6.2250000000000005e-06, "loss": 0.2066, "step": 600 }, { "epoch": 0.28604118993135014, "grad_norm": 7.4100213050842285, "learning_rate": 6.218622448979592e-06, "loss": 0.1968, "step": 625 }, { "epoch": 0.2974828375286041, "grad_norm": 17.322240829467773, "learning_rate": 6.212244897959184e-06, "loss": 0.1916, "step": 650 }, { "epoch": 0.30892448512585813, "grad_norm": 7.691104412078857, "learning_rate": 6.205867346938775e-06, "loss": 0.1725, "step": 675 }, { "epoch": 0.32036613272311215, "grad_norm": 16.282310485839844, "learning_rate": 6.1994897959183676e-06, "loss": 0.2122, "step": 700 }, { "epoch": 0.3318077803203661, "grad_norm": 8.981350898742676, "learning_rate": 6.19311224489796e-06, "loss": 0.1638, "step": 725 }, { "epoch": 0.34324942791762014, "grad_norm": 16.19715690612793, "learning_rate": 6.1867346938775515e-06, "loss": 0.2046, "step": 750 }, { "epoch": 0.35469107551487417, "grad_norm": 5.906866073608398, "learning_rate": 6.180357142857143e-06, "loss": 0.1916, "step": 775 }, { "epoch": 0.36613272311212813, "grad_norm": 10.55771255493164, "learning_rate": 6.1739795918367354e-06, "loss": 0.2104, "step": 800 }, { "epoch": 0.37757437070938216, "grad_norm": 6.290849685668945, "learning_rate": 6.167602040816327e-06, "loss": 0.1618, "step": 825 }, { "epoch": 0.3890160183066362, "grad_norm": 15.78228759765625, "learning_rate": 6.1612244897959185e-06, "loss": 0.1897, "step": 850 }, { "epoch": 0.40045766590389015, "grad_norm": 4.934595108032227, "learning_rate": 6.15484693877551e-06, "loss": 0.1808, "step": 875 }, { "epoch": 0.41189931350114417, "grad_norm": 8.691235542297363, "learning_rate": 6.1484693877551024e-06, "loss": 0.1913, "step": 900 }, { "epoch": 0.4233409610983982, "grad_norm": 9.48690414428711, "learning_rate": 6.142091836734694e-06, "loss": 0.167, "step": 925 }, { "epoch": 0.43478260869565216, "grad_norm": 17.154409408569336, "learning_rate": 6.135714285714286e-06, "loss": 0.1863, "step": 950 }, { "epoch": 0.4462242562929062, "grad_norm": 5.628190517425537, "learning_rate": 6.129336734693878e-06, "loss": 0.1632, "step": 975 }, { "epoch": 0.4576659038901602, "grad_norm": 14.407511711120605, "learning_rate": 6.1229591836734695e-06, "loss": 0.1877, "step": 1000 }, { "epoch": 0.4691075514874142, "grad_norm": 4.591071128845215, "learning_rate": 6.116581632653062e-06, "loss": 0.1698, "step": 1025 }, { "epoch": 0.4805491990846682, "grad_norm": 10.605140686035156, "learning_rate": 6.110204081632653e-06, "loss": 0.1764, "step": 1050 }, { "epoch": 0.4919908466819222, "grad_norm": 9.22734260559082, "learning_rate": 6.103826530612245e-06, "loss": 0.1632, "step": 1075 }, { "epoch": 0.5034324942791762, "grad_norm": 13.262040138244629, "learning_rate": 6.0974489795918365e-06, "loss": 0.1734, "step": 1100 }, { "epoch": 0.5148741418764302, "grad_norm": 9.931065559387207, "learning_rate": 6.091071428571429e-06, "loss": 0.1628, "step": 1125 }, { "epoch": 0.5263157894736842, "grad_norm": 17.013771057128906, "learning_rate": 6.08469387755102e-06, "loss": 0.1902, "step": 1150 }, { "epoch": 0.5377574370709383, "grad_norm": 4.440840244293213, "learning_rate": 6.078316326530613e-06, "loss": 0.1729, "step": 1175 }, { "epoch": 0.5491990846681922, "grad_norm": 6.86649751663208, "learning_rate": 6.071938775510204e-06, "loss": 0.1728, "step": 1200 }, { "epoch": 0.5606407322654462, "grad_norm": 5.384552478790283, "learning_rate": 6.065561224489797e-06, "loss": 0.1661, "step": 1225 }, { "epoch": 0.5720823798627003, "grad_norm": 16.237516403198242, "learning_rate": 6.059183673469388e-06, "loss": 0.1866, "step": 1250 }, { "epoch": 0.5835240274599542, "grad_norm": 9.271600723266602, "learning_rate": 6.05280612244898e-06, "loss": 0.1572, "step": 1275 }, { "epoch": 0.5949656750572082, "grad_norm": 12.505192756652832, "learning_rate": 6.046428571428571e-06, "loss": 0.1793, "step": 1300 }, { "epoch": 0.6064073226544623, "grad_norm": 4.431430816650391, "learning_rate": 6.040051020408164e-06, "loss": 0.1742, "step": 1325 }, { "epoch": 0.6178489702517163, "grad_norm": 13.353468894958496, "learning_rate": 6.033673469387755e-06, "loss": 0.1813, "step": 1350 }, { "epoch": 0.6292906178489702, "grad_norm": 10.686460494995117, "learning_rate": 6.027295918367347e-06, "loss": 0.1789, "step": 1375 }, { "epoch": 0.6407322654462243, "grad_norm": 8.10379409790039, "learning_rate": 6.020918367346939e-06, "loss": 0.1584, "step": 1400 }, { "epoch": 0.6521739130434783, "grad_norm": 5.418963432312012, "learning_rate": 6.014540816326531e-06, "loss": 0.1668, "step": 1425 }, { "epoch": 0.6636155606407322, "grad_norm": 12.503841400146484, "learning_rate": 6.008163265306123e-06, "loss": 0.1827, "step": 1450 }, { "epoch": 0.6750572082379863, "grad_norm": 6.517795562744141, "learning_rate": 6.001785714285715e-06, "loss": 0.171, "step": 1475 }, { "epoch": 0.6864988558352403, "grad_norm": 13.4345703125, "learning_rate": 5.995408163265306e-06, "loss": 0.1854, "step": 1500 }, { "epoch": 0.6979405034324943, "grad_norm": 5.514252185821533, "learning_rate": 5.989030612244898e-06, "loss": 0.1751, "step": 1525 }, { "epoch": 0.7093821510297483, "grad_norm": 16.108661651611328, "learning_rate": 5.98265306122449e-06, "loss": 0.2066, "step": 1550 }, { "epoch": 0.7208237986270023, "grad_norm": 8.58029842376709, "learning_rate": 5.976275510204082e-06, "loss": 0.1787, "step": 1575 }, { "epoch": 0.7322654462242563, "grad_norm": 10.38893985748291, "learning_rate": 5.969897959183673e-06, "loss": 0.1831, "step": 1600 }, { "epoch": 0.7437070938215103, "grad_norm": 6.40317964553833, "learning_rate": 5.963520408163266e-06, "loss": 0.177, "step": 1625 }, { "epoch": 0.7551487414187643, "grad_norm": 14.541539192199707, "learning_rate": 5.957142857142858e-06, "loss": 0.1835, "step": 1650 }, { "epoch": 0.7665903890160183, "grad_norm": 7.053554058074951, "learning_rate": 5.95076530612245e-06, "loss": 0.158, "step": 1675 }, { "epoch": 0.7780320366132724, "grad_norm": 12.54846477508545, "learning_rate": 5.944387755102041e-06, "loss": 0.1742, "step": 1700 }, { "epoch": 0.7894736842105263, "grad_norm": 5.44398307800293, "learning_rate": 5.938010204081633e-06, "loss": 0.15, "step": 1725 }, { "epoch": 0.8009153318077803, "grad_norm": 19.205368041992188, "learning_rate": 5.931632653061225e-06, "loss": 0.1578, "step": 1750 }, { "epoch": 0.8123569794050344, "grad_norm": 5.504051685333252, "learning_rate": 5.925255102040817e-06, "loss": 0.1675, "step": 1775 }, { "epoch": 0.8237986270022883, "grad_norm": 11.270848274230957, "learning_rate": 5.918877551020408e-06, "loss": 0.1861, "step": 1800 }, { "epoch": 0.8352402745995423, "grad_norm": 4.46786642074585, "learning_rate": 5.9125e-06, "loss": 0.1471, "step": 1825 }, { "epoch": 0.8466819221967964, "grad_norm": 16.594099044799805, "learning_rate": 5.906122448979592e-06, "loss": 0.2044, "step": 1850 }, { "epoch": 0.8581235697940504, "grad_norm": 5.97224760055542, "learning_rate": 5.8997448979591845e-06, "loss": 0.1671, "step": 1875 }, { "epoch": 0.8695652173913043, "grad_norm": 18.787992477416992, "learning_rate": 5.893367346938776e-06, "loss": 0.1896, "step": 1900 }, { "epoch": 0.8810068649885584, "grad_norm": 5.963818550109863, "learning_rate": 5.886989795918368e-06, "loss": 0.1563, "step": 1925 }, { "epoch": 0.8924485125858124, "grad_norm": 10.634485244750977, "learning_rate": 5.880612244897959e-06, "loss": 0.1709, "step": 1950 }, { "epoch": 0.9038901601830663, "grad_norm": 3.953845739364624, "learning_rate": 5.8742346938775515e-06, "loss": 0.1466, "step": 1975 }, { "epoch": 0.9153318077803204, "grad_norm": 10.291520118713379, "learning_rate": 5.867857142857143e-06, "loss": 0.1995, "step": 2000 }, { "epoch": 0.9267734553775744, "grad_norm": 7.01460599899292, "learning_rate": 5.861479591836735e-06, "loss": 0.1554, "step": 2025 }, { "epoch": 0.9382151029748284, "grad_norm": 9.408185005187988, "learning_rate": 5.855357142857143e-06, "loss": 0.2045, "step": 2050 }, { "epoch": 0.9496567505720824, "grad_norm": 7.104637622833252, "learning_rate": 5.848979591836735e-06, "loss": 0.1728, "step": 2075 }, { "epoch": 0.9610983981693364, "grad_norm": 16.595233917236328, "learning_rate": 5.842602040816327e-06, "loss": 0.2121, "step": 2100 }, { "epoch": 0.9725400457665904, "grad_norm": 3.5816707611083984, "learning_rate": 5.836224489795919e-06, "loss": 0.1576, "step": 2125 }, { "epoch": 0.9839816933638444, "grad_norm": 16.117435455322266, "learning_rate": 5.829846938775511e-06, "loss": 0.1948, "step": 2150 }, { "epoch": 0.9954233409610984, "grad_norm": 7.5333251953125, "learning_rate": 5.823469387755102e-06, "loss": 0.1679, "step": 2175 }, { "epoch": 1.0068649885583525, "grad_norm": 1.987963080406189, "learning_rate": 5.8170918367346946e-06, "loss": 0.1037, "step": 2200 }, { "epoch": 1.0183066361556063, "grad_norm": 5.73256778717041, "learning_rate": 5.810714285714286e-06, "loss": 0.0692, "step": 2225 }, { "epoch": 1.0297482837528604, "grad_norm": 3.2069363594055176, "learning_rate": 5.804336734693878e-06, "loss": 0.0989, "step": 2250 }, { "epoch": 1.0411899313501145, "grad_norm": 5.0149946212768555, "learning_rate": 5.797959183673469e-06, "loss": 0.0731, "step": 2275 }, { "epoch": 1.0526315789473684, "grad_norm": 5.605563163757324, "learning_rate": 5.791581632653062e-06, "loss": 0.091, "step": 2300 }, { "epoch": 1.0640732265446224, "grad_norm": 4.094449043273926, "learning_rate": 5.785204081632653e-06, "loss": 0.0617, "step": 2325 }, { "epoch": 1.0755148741418765, "grad_norm": 3.5027787685394287, "learning_rate": 5.7788265306122455e-06, "loss": 0.0905, "step": 2350 }, { "epoch": 1.0869565217391304, "grad_norm": 3.6455764770507812, "learning_rate": 5.772448979591837e-06, "loss": 0.0597, "step": 2375 }, { "epoch": 1.0983981693363845, "grad_norm": 3.261543035507202, "learning_rate": 5.766071428571429e-06, "loss": 0.097, "step": 2400 }, { "epoch": 1.1098398169336385, "grad_norm": 5.9047698974609375, "learning_rate": 5.759693877551021e-06, "loss": 0.0803, "step": 2425 }, { "epoch": 1.1212814645308924, "grad_norm": 1.9865431785583496, "learning_rate": 5.7533163265306125e-06, "loss": 0.0913, "step": 2450 }, { "epoch": 1.1327231121281465, "grad_norm": 5.4243316650390625, "learning_rate": 5.746938775510204e-06, "loss": 0.0652, "step": 2475 }, { "epoch": 1.1441647597254005, "grad_norm": 5.0221452713012695, "learning_rate": 5.7405612244897965e-06, "loss": 0.0825, "step": 2500 }, { "epoch": 1.1556064073226544, "grad_norm": 6.293609619140625, "learning_rate": 5.734183673469388e-06, "loss": 0.0819, "step": 2525 }, { "epoch": 1.1670480549199085, "grad_norm": 4.149010181427002, "learning_rate": 5.7278061224489796e-06, "loss": 0.1188, "step": 2550 }, { "epoch": 1.1784897025171626, "grad_norm": 2.6588358879089355, "learning_rate": 5.721428571428572e-06, "loss": 0.0723, "step": 2575 }, { "epoch": 1.1899313501144164, "grad_norm": 3.407505750656128, "learning_rate": 5.7150510204081635e-06, "loss": 0.0948, "step": 2600 }, { "epoch": 1.2013729977116705, "grad_norm": 6.228948593139648, "learning_rate": 5.708673469387756e-06, "loss": 0.0777, "step": 2625 }, { "epoch": 1.2128146453089246, "grad_norm": 2.339284896850586, "learning_rate": 5.7022959183673474e-06, "loss": 0.0896, "step": 2650 }, { "epoch": 1.2242562929061784, "grad_norm": 7.423865795135498, "learning_rate": 5.695918367346939e-06, "loss": 0.0721, "step": 2675 }, { "epoch": 1.2356979405034325, "grad_norm": 4.202932834625244, "learning_rate": 5.6895408163265305e-06, "loss": 0.096, "step": 2700 }, { "epoch": 1.2471395881006866, "grad_norm": 5.0601725578308105, "learning_rate": 5.683163265306123e-06, "loss": 0.0829, "step": 2725 }, { "epoch": 1.2585812356979404, "grad_norm": 4.4953460693359375, "learning_rate": 5.6767857142857144e-06, "loss": 0.1027, "step": 2750 }, { "epoch": 1.2700228832951945, "grad_norm": 6.2997612953186035, "learning_rate": 5.670408163265306e-06, "loss": 0.0789, "step": 2775 }, { "epoch": 1.2814645308924484, "grad_norm": 2.9170215129852295, "learning_rate": 5.664030612244898e-06, "loss": 0.1007, "step": 2800 }, { "epoch": 1.2929061784897025, "grad_norm": 6.78530740737915, "learning_rate": 5.65765306122449e-06, "loss": 0.0787, "step": 2825 }, { "epoch": 1.3043478260869565, "grad_norm": 3.2991139888763428, "learning_rate": 5.651275510204082e-06, "loss": 0.1118, "step": 2850 }, { "epoch": 1.3157894736842106, "grad_norm": 8.449702262878418, "learning_rate": 5.644897959183674e-06, "loss": 0.0742, "step": 2875 }, { "epoch": 1.3272311212814645, "grad_norm": 5.295764923095703, "learning_rate": 5.638520408163265e-06, "loss": 0.0951, "step": 2900 }, { "epoch": 1.3386727688787186, "grad_norm": 5.989330291748047, "learning_rate": 5.632142857142857e-06, "loss": 0.0758, "step": 2925 }, { "epoch": 1.3501144164759724, "grad_norm": 5.153733253479004, "learning_rate": 5.625765306122449e-06, "loss": 0.0984, "step": 2950 }, { "epoch": 1.3615560640732265, "grad_norm": 6.334990501403809, "learning_rate": 5.619387755102041e-06, "loss": 0.0678, "step": 2975 }, { "epoch": 1.3729977116704806, "grad_norm": 4.231469631195068, "learning_rate": 5.613010204081632e-06, "loss": 0.098, "step": 3000 }, { "epoch": 1.3844393592677346, "grad_norm": 4.833641529083252, "learning_rate": 5.606632653061225e-06, "loss": 0.0624, "step": 3025 }, { "epoch": 1.3958810068649885, "grad_norm": 3.244565725326538, "learning_rate": 5.600255102040817e-06, "loss": 0.0894, "step": 3050 }, { "epoch": 1.4073226544622426, "grad_norm": 1.8014638423919678, "learning_rate": 5.593877551020409e-06, "loss": 0.0709, "step": 3075 }, { "epoch": 1.4187643020594964, "grad_norm": 4.825936794281006, "learning_rate": 5.5875e-06, "loss": 0.1008, "step": 3100 }, { "epoch": 1.4302059496567505, "grad_norm": 5.295871734619141, "learning_rate": 5.581122448979592e-06, "loss": 0.0663, "step": 3125 }, { "epoch": 1.4416475972540046, "grad_norm": 4.34277868270874, "learning_rate": 5.574744897959184e-06, "loss": 0.0854, "step": 3150 }, { "epoch": 1.4530892448512587, "grad_norm": 6.447768688201904, "learning_rate": 5.568367346938776e-06, "loss": 0.0813, "step": 3175 }, { "epoch": 1.4645308924485125, "grad_norm": 3.3338069915771484, "learning_rate": 5.561989795918367e-06, "loss": 0.0921, "step": 3200 }, { "epoch": 1.4759725400457666, "grad_norm": 4.244719505310059, "learning_rate": 5.555612244897959e-06, "loss": 0.0683, "step": 3225 }, { "epoch": 1.4874141876430205, "grad_norm": 3.7263808250427246, "learning_rate": 5.549234693877551e-06, "loss": 0.095, "step": 3250 }, { "epoch": 1.4988558352402745, "grad_norm": 6.328766822814941, "learning_rate": 5.542857142857144e-06, "loss": 0.0837, "step": 3275 }, { "epoch": 1.5102974828375286, "grad_norm": 2.921656370162964, "learning_rate": 5.536479591836735e-06, "loss": 0.1021, "step": 3300 }, { "epoch": 1.5217391304347827, "grad_norm": 8.238923072814941, "learning_rate": 5.530102040816327e-06, "loss": 0.0697, "step": 3325 }, { "epoch": 1.5331807780320366, "grad_norm": 3.801389455795288, "learning_rate": 5.523724489795918e-06, "loss": 0.0995, "step": 3350 }, { "epoch": 1.5446224256292906, "grad_norm": 6.373222351074219, "learning_rate": 5.517346938775511e-06, "loss": 0.0693, "step": 3375 }, { "epoch": 1.5560640732265445, "grad_norm": 5.936155319213867, "learning_rate": 5.510969387755102e-06, "loss": 0.0989, "step": 3400 }, { "epoch": 1.5675057208237986, "grad_norm": 3.9559526443481445, "learning_rate": 5.504591836734694e-06, "loss": 0.0799, "step": 3425 }, { "epoch": 1.5789473684210527, "grad_norm": 2.2364418506622314, "learning_rate": 5.498214285714285e-06, "loss": 0.1069, "step": 3450 }, { "epoch": 1.5903890160183067, "grad_norm": 5.343243598937988, "learning_rate": 5.4918367346938785e-06, "loss": 0.0711, "step": 3475 }, { "epoch": 1.6018306636155606, "grad_norm": 6.238850116729736, "learning_rate": 5.48545918367347e-06, "loss": 0.0833, "step": 3500 }, { "epoch": 1.6132723112128147, "grad_norm": 7.247325897216797, "learning_rate": 5.479081632653062e-06, "loss": 0.0772, "step": 3525 }, { "epoch": 1.6247139588100685, "grad_norm": 5.513607025146484, "learning_rate": 5.472704081632653e-06, "loss": 0.102, "step": 3550 }, { "epoch": 1.6361556064073226, "grad_norm": 4.2667365074157715, "learning_rate": 5.4663265306122455e-06, "loss": 0.084, "step": 3575 }, { "epoch": 1.6475972540045767, "grad_norm": 3.866903066635132, "learning_rate": 5.459948979591837e-06, "loss": 0.0984, "step": 3600 }, { "epoch": 1.6590389016018308, "grad_norm": 6.93393611907959, "learning_rate": 5.453571428571429e-06, "loss": 0.0793, "step": 3625 }, { "epoch": 1.6704805491990846, "grad_norm": 5.100342750549316, "learning_rate": 5.44719387755102e-06, "loss": 0.0999, "step": 3650 }, { "epoch": 1.6819221967963387, "grad_norm": 8.363016128540039, "learning_rate": 5.4408163265306126e-06, "loss": 0.0709, "step": 3675 }, { "epoch": 1.6933638443935926, "grad_norm": 4.775864124298096, "learning_rate": 5.434438775510204e-06, "loss": 0.1039, "step": 3700 }, { "epoch": 1.7048054919908466, "grad_norm": 6.310733795166016, "learning_rate": 5.4280612244897965e-06, "loss": 0.08, "step": 3725 }, { "epoch": 1.7162471395881007, "grad_norm": 3.2022969722747803, "learning_rate": 5.421683673469388e-06, "loss": 0.0969, "step": 3750 }, { "epoch": 1.7276887871853548, "grad_norm": 1.5878781080245972, "learning_rate": 5.41530612244898e-06, "loss": 0.0807, "step": 3775 }, { "epoch": 1.7391304347826086, "grad_norm": 4.442117691040039, "learning_rate": 5.408928571428572e-06, "loss": 0.0978, "step": 3800 }, { "epoch": 1.7505720823798627, "grad_norm": 6.980683326721191, "learning_rate": 5.4025510204081635e-06, "loss": 0.0804, "step": 3825 }, { "epoch": 1.7620137299771166, "grad_norm": 1.5422896146774292, "learning_rate": 5.396173469387755e-06, "loss": 0.0971, "step": 3850 }, { "epoch": 1.7734553775743707, "grad_norm": 5.739109516143799, "learning_rate": 5.389795918367347e-06, "loss": 0.0946, "step": 3875 }, { "epoch": 1.7848970251716247, "grad_norm": 3.076174020767212, "learning_rate": 5.383418367346939e-06, "loss": 0.1183, "step": 3900 }, { "epoch": 1.7963386727688788, "grad_norm": 7.785946846008301, "learning_rate": 5.3770408163265305e-06, "loss": 0.0806, "step": 3925 }, { "epoch": 1.8077803203661327, "grad_norm": 3.3288745880126953, "learning_rate": 5.370663265306123e-06, "loss": 0.0924, "step": 3950 }, { "epoch": 1.8192219679633868, "grad_norm": 3.700629472732544, "learning_rate": 5.3642857142857145e-06, "loss": 0.0827, "step": 3975 }, { "epoch": 1.8306636155606406, "grad_norm": 5.729650020599365, "learning_rate": 5.357908163265307e-06, "loss": 0.1036, "step": 4000 }, { "epoch": 1.8421052631578947, "grad_norm": 6.0177202224731445, "learning_rate": 5.351530612244898e-06, "loss": 0.085, "step": 4025 }, { "epoch": 1.8535469107551488, "grad_norm": 4.52460241317749, "learning_rate": 5.34515306122449e-06, "loss": 0.0985, "step": 4050 }, { "epoch": 1.8649885583524028, "grad_norm": 11.662372589111328, "learning_rate": 5.3387755102040815e-06, "loss": 0.0845, "step": 4075 }, { "epoch": 1.8764302059496567, "grad_norm": 5.090856552124023, "learning_rate": 5.332397959183674e-06, "loss": 0.0884, "step": 4100 }, { "epoch": 1.8878718535469108, "grad_norm": 6.701780319213867, "learning_rate": 5.3260204081632654e-06, "loss": 0.0708, "step": 4125 }, { "epoch": 1.8993135011441646, "grad_norm": 2.6517765522003174, "learning_rate": 5.319897959183674e-06, "loss": 0.0956, "step": 4150 }, { "epoch": 1.9107551487414187, "grad_norm": 4.654801368713379, "learning_rate": 5.313520408163265e-06, "loss": 0.073, "step": 4175 }, { "epoch": 1.9221967963386728, "grad_norm": 4.046535968780518, "learning_rate": 5.307142857142857e-06, "loss": 0.0984, "step": 4200 }, { "epoch": 1.9336384439359269, "grad_norm": 5.534332752227783, "learning_rate": 5.30076530612245e-06, "loss": 0.0764, "step": 4225 }, { "epoch": 1.9450800915331807, "grad_norm": 4.218522071838379, "learning_rate": 5.2943877551020414e-06, "loss": 0.0941, "step": 4250 }, { "epoch": 1.9565217391304348, "grad_norm": 5.872758865356445, "learning_rate": 5.288010204081633e-06, "loss": 0.069, "step": 4275 }, { "epoch": 1.9679633867276887, "grad_norm": 4.280802249908447, "learning_rate": 5.2816326530612245e-06, "loss": 0.0986, "step": 4300 }, { "epoch": 1.9794050343249427, "grad_norm": 6.042983531951904, "learning_rate": 5.275255102040817e-06, "loss": 0.0711, "step": 4325 }, { "epoch": 1.9908466819221968, "grad_norm": 3.573798179626465, "learning_rate": 5.2688775510204085e-06, "loss": 0.0891, "step": 4350 }, { "epoch": 2.002288329519451, "grad_norm": 2.0011675357818604, "learning_rate": 5.2625e-06, "loss": 0.0834, "step": 4375 }, { "epoch": 2.013729977116705, "grad_norm": 0.759939432144165, "learning_rate": 5.2561224489795916e-06, "loss": 0.037, "step": 4400 }, { "epoch": 2.0251716247139586, "grad_norm": 3.735200881958008, "learning_rate": 5.249744897959184e-06, "loss": 0.0448, "step": 4425 }, { "epoch": 2.0366132723112127, "grad_norm": 2.4551169872283936, "learning_rate": 5.243367346938776e-06, "loss": 0.038, "step": 4450 }, { "epoch": 2.0480549199084668, "grad_norm": 1.7801021337509155, "learning_rate": 5.236989795918368e-06, "loss": 0.05, "step": 4475 }, { "epoch": 2.059496567505721, "grad_norm": 2.692507743835449, "learning_rate": 5.2306122448979594e-06, "loss": 0.042, "step": 4500 }, { "epoch": 2.070938215102975, "grad_norm": 2.6997761726379395, "learning_rate": 5.224234693877551e-06, "loss": 0.0459, "step": 4525 }, { "epoch": 2.082379862700229, "grad_norm": 3.892010450363159, "learning_rate": 5.217857142857143e-06, "loss": 0.0364, "step": 4550 }, { "epoch": 2.0938215102974826, "grad_norm": 3.354294776916504, "learning_rate": 5.211479591836735e-06, "loss": 0.0584, "step": 4575 }, { "epoch": 2.1052631578947367, "grad_norm": 3.581019163131714, "learning_rate": 5.2051020408163264e-06, "loss": 0.035, "step": 4600 }, { "epoch": 2.116704805491991, "grad_norm": 1.7799072265625, "learning_rate": 5.198724489795918e-06, "loss": 0.0534, "step": 4625 }, { "epoch": 2.128146453089245, "grad_norm": 4.093613624572754, "learning_rate": 5.19234693877551e-06, "loss": 0.0299, "step": 4650 }, { "epoch": 2.139588100686499, "grad_norm": 4.328442573547363, "learning_rate": 5.185969387755103e-06, "loss": 0.0504, "step": 4675 }, { "epoch": 2.151029748283753, "grad_norm": 4.18818998336792, "learning_rate": 5.179591836734694e-06, "loss": 0.0371, "step": 4700 }, { "epoch": 2.1624713958810067, "grad_norm": 4.5619587898254395, "learning_rate": 5.173214285714286e-06, "loss": 0.0514, "step": 4725 }, { "epoch": 2.1739130434782608, "grad_norm": 2.6793291568756104, "learning_rate": 5.166836734693878e-06, "loss": 0.0415, "step": 4750 }, { "epoch": 2.185354691075515, "grad_norm": 3.8424887657165527, "learning_rate": 5.16045918367347e-06, "loss": 0.0584, "step": 4775 }, { "epoch": 2.196796338672769, "grad_norm": 1.9429773092269897, "learning_rate": 5.154081632653061e-06, "loss": 0.0361, "step": 4800 }, { "epoch": 2.208237986270023, "grad_norm": 2.7046594619750977, "learning_rate": 5.147704081632653e-06, "loss": 0.0501, "step": 4825 }, { "epoch": 2.219679633867277, "grad_norm": 0.6767886281013489, "learning_rate": 5.141326530612245e-06, "loss": 0.0284, "step": 4850 }, { "epoch": 2.2311212814645307, "grad_norm": 1.749976634979248, "learning_rate": 5.134948979591837e-06, "loss": 0.0641, "step": 4875 }, { "epoch": 2.242562929061785, "grad_norm": 5.310193061828613, "learning_rate": 5.128571428571429e-06, "loss": 0.0392, "step": 4900 }, { "epoch": 2.254004576659039, "grad_norm": 3.5149447917938232, "learning_rate": 5.122193877551021e-06, "loss": 0.064, "step": 4925 }, { "epoch": 2.265446224256293, "grad_norm": 4.282719135284424, "learning_rate": 5.115816326530612e-06, "loss": 0.0433, "step": 4950 }, { "epoch": 2.276887871853547, "grad_norm": 2.836113929748535, "learning_rate": 5.109438775510205e-06, "loss": 0.0547, "step": 4975 }, { "epoch": 2.288329519450801, "grad_norm": 3.2092835903167725, "learning_rate": 5.103061224489796e-06, "loss": 0.0339, "step": 5000 }, { "epoch": 2.288329519450801, "eval_loss": 0.16944891214370728, "eval_runtime": 5496.3202, "eval_samples_per_second": 1.732, "eval_steps_per_second": 0.217, "eval_wer": 0.10248288219107954, "step": 5000 }, { "epoch": 2.2997711670480547, "grad_norm": 2.923719882965088, "learning_rate": 5.096683673469388e-06, "loss": 0.063, "step": 5025 }, { "epoch": 2.311212814645309, "grad_norm": 3.116055727005005, "learning_rate": 5.090306122448979e-06, "loss": 0.0394, "step": 5050 }, { "epoch": 2.322654462242563, "grad_norm": 2.1266860961914062, "learning_rate": 5.083928571428572e-06, "loss": 0.0497, "step": 5075 }, { "epoch": 2.334096109839817, "grad_norm": 3.964972972869873, "learning_rate": 5.077551020408163e-06, "loss": 0.0355, "step": 5100 }, { "epoch": 2.345537757437071, "grad_norm": 3.5802409648895264, "learning_rate": 5.071173469387756e-06, "loss": 0.0483, "step": 5125 }, { "epoch": 2.356979405034325, "grad_norm": 4.559951305389404, "learning_rate": 5.064795918367347e-06, "loss": 0.0365, "step": 5150 }, { "epoch": 2.3684210526315788, "grad_norm": 2.1596357822418213, "learning_rate": 5.0584183673469396e-06, "loss": 0.0507, "step": 5175 }, { "epoch": 2.379862700228833, "grad_norm": 6.24343204498291, "learning_rate": 5.052040816326531e-06, "loss": 0.0394, "step": 5200 }, { "epoch": 2.391304347826087, "grad_norm": 3.8776490688323975, "learning_rate": 5.045663265306123e-06, "loss": 0.0519, "step": 5225 }, { "epoch": 2.402745995423341, "grad_norm": 5.080620288848877, "learning_rate": 5.039285714285714e-06, "loss": 0.0372, "step": 5250 }, { "epoch": 2.414187643020595, "grad_norm": 2.12448787689209, "learning_rate": 5.032908163265307e-06, "loss": 0.0578, "step": 5275 }, { "epoch": 2.425629290617849, "grad_norm": 2.113422393798828, "learning_rate": 5.026530612244898e-06, "loss": 0.0381, "step": 5300 }, { "epoch": 2.437070938215103, "grad_norm": 3.075899124145508, "learning_rate": 5.02015306122449e-06, "loss": 0.051, "step": 5325 }, { "epoch": 2.448512585812357, "grad_norm": 4.633288860321045, "learning_rate": 5.013775510204082e-06, "loss": 0.0477, "step": 5350 }, { "epoch": 2.459954233409611, "grad_norm": 3.1297614574432373, "learning_rate": 5.007397959183674e-06, "loss": 0.0518, "step": 5375 }, { "epoch": 2.471395881006865, "grad_norm": 1.0485554933547974, "learning_rate": 5.001020408163266e-06, "loss": 0.047, "step": 5400 }, { "epoch": 2.482837528604119, "grad_norm": 2.6167232990264893, "learning_rate": 4.9946428571428575e-06, "loss": 0.0515, "step": 5425 }, { "epoch": 2.494279176201373, "grad_norm": 3.787649393081665, "learning_rate": 4.988265306122449e-06, "loss": 0.0406, "step": 5450 }, { "epoch": 2.505720823798627, "grad_norm": 3.5073530673980713, "learning_rate": 4.981887755102041e-06, "loss": 0.0451, "step": 5475 }, { "epoch": 2.517162471395881, "grad_norm": 3.0327556133270264, "learning_rate": 4.975510204081633e-06, "loss": 0.0381, "step": 5500 }, { "epoch": 2.528604118993135, "grad_norm": 4.393261432647705, "learning_rate": 4.9691326530612246e-06, "loss": 0.0505, "step": 5525 }, { "epoch": 2.540045766590389, "grad_norm": 1.906299114227295, "learning_rate": 4.962755102040816e-06, "loss": 0.0406, "step": 5550 }, { "epoch": 2.551487414187643, "grad_norm": 3.095064401626587, "learning_rate": 4.9563775510204085e-06, "loss": 0.0545, "step": 5575 }, { "epoch": 2.5629290617848968, "grad_norm": 2.1432371139526367, "learning_rate": 4.950000000000001e-06, "loss": 0.038, "step": 5600 }, { "epoch": 2.5743707093821513, "grad_norm": 2.254021167755127, "learning_rate": 4.9436224489795924e-06, "loss": 0.05, "step": 5625 }, { "epoch": 2.585812356979405, "grad_norm": 4.11069917678833, "learning_rate": 4.937244897959184e-06, "loss": 0.0484, "step": 5650 }, { "epoch": 2.597254004576659, "grad_norm": 2.1790149211883545, "learning_rate": 4.9308673469387755e-06, "loss": 0.0584, "step": 5675 }, { "epoch": 2.608695652173913, "grad_norm": 2.150855541229248, "learning_rate": 4.924489795918368e-06, "loss": 0.0355, "step": 5700 }, { "epoch": 2.620137299771167, "grad_norm": 2.5849735736846924, "learning_rate": 4.9181122448979595e-06, "loss": 0.0486, "step": 5725 }, { "epoch": 2.6315789473684212, "grad_norm": 3.598619222640991, "learning_rate": 4.911734693877551e-06, "loss": 0.043, "step": 5750 }, { "epoch": 2.643020594965675, "grad_norm": 3.094461441040039, "learning_rate": 4.9053571428571425e-06, "loss": 0.0497, "step": 5775 }, { "epoch": 2.654462242562929, "grad_norm": 6.460962772369385, "learning_rate": 4.898979591836735e-06, "loss": 0.0369, "step": 5800 }, { "epoch": 2.665903890160183, "grad_norm": 2.5654125213623047, "learning_rate": 4.892602040816327e-06, "loss": 0.0517, "step": 5825 }, { "epoch": 2.677345537757437, "grad_norm": 1.984237790107727, "learning_rate": 4.886224489795919e-06, "loss": 0.0374, "step": 5850 }, { "epoch": 2.688787185354691, "grad_norm": 2.4424266815185547, "learning_rate": 4.87984693877551e-06, "loss": 0.0586, "step": 5875 }, { "epoch": 2.700228832951945, "grad_norm": 3.130814552307129, "learning_rate": 4.873469387755102e-06, "loss": 0.0379, "step": 5900 }, { "epoch": 2.7116704805491993, "grad_norm": 1.8555734157562256, "learning_rate": 4.867091836734694e-06, "loss": 0.0472, "step": 5925 }, { "epoch": 2.723112128146453, "grad_norm": 2.3630707263946533, "learning_rate": 4.860714285714286e-06, "loss": 0.0412, "step": 5950 }, { "epoch": 2.734553775743707, "grad_norm": 2.636423110961914, "learning_rate": 4.8543367346938774e-06, "loss": 0.0509, "step": 5975 }, { "epoch": 2.745995423340961, "grad_norm": 6.911414623260498, "learning_rate": 4.847959183673469e-06, "loss": 0.0399, "step": 6000 }, { "epoch": 2.757437070938215, "grad_norm": 3.120143413543701, "learning_rate": 4.841581632653061e-06, "loss": 0.0517, "step": 6025 }, { "epoch": 2.7688787185354693, "grad_norm": 2.109907627105713, "learning_rate": 4.835204081632654e-06, "loss": 0.0299, "step": 6050 }, { "epoch": 2.780320366132723, "grad_norm": 3.6579062938690186, "learning_rate": 4.828826530612245e-06, "loss": 0.0494, "step": 6075 }, { "epoch": 2.791762013729977, "grad_norm": 0.9419932961463928, "learning_rate": 4.822448979591837e-06, "loss": 0.0296, "step": 6100 }, { "epoch": 2.803203661327231, "grad_norm": 1.7163337469100952, "learning_rate": 4.816071428571429e-06, "loss": 0.0512, "step": 6125 }, { "epoch": 2.814645308924485, "grad_norm": 3.6414005756378174, "learning_rate": 4.809693877551021e-06, "loss": 0.0516, "step": 6150 }, { "epoch": 2.8260869565217392, "grad_norm": 2.7387633323669434, "learning_rate": 4.803571428571429e-06, "loss": 0.0491, "step": 6175 }, { "epoch": 2.837528604118993, "grad_norm": 6.319782733917236, "learning_rate": 4.7971938775510205e-06, "loss": 0.0418, "step": 6200 }, { "epoch": 2.8489702517162474, "grad_norm": 3.232647180557251, "learning_rate": 4.790816326530612e-06, "loss": 0.0492, "step": 6225 }, { "epoch": 2.860411899313501, "grad_norm": 2.536766767501831, "learning_rate": 4.784438775510204e-06, "loss": 0.0394, "step": 6250 }, { "epoch": 2.871853546910755, "grad_norm": 3.481765031814575, "learning_rate": 4.778061224489796e-06, "loss": 0.0563, "step": 6275 }, { "epoch": 2.883295194508009, "grad_norm": 2.599041700363159, "learning_rate": 4.771683673469388e-06, "loss": 0.041, "step": 6300 }, { "epoch": 2.8947368421052633, "grad_norm": 2.9180805683135986, "learning_rate": 4.76530612244898e-06, "loss": 0.0455, "step": 6325 }, { "epoch": 2.9061784897025174, "grad_norm": 5.283931255340576, "learning_rate": 4.758928571428571e-06, "loss": 0.045, "step": 6350 }, { "epoch": 2.917620137299771, "grad_norm": 3.000849485397339, "learning_rate": 4.752551020408164e-06, "loss": 0.0451, "step": 6375 }, { "epoch": 2.929061784897025, "grad_norm": 5.314061641693115, "learning_rate": 4.746173469387755e-06, "loss": 0.0401, "step": 6400 }, { "epoch": 2.940503432494279, "grad_norm": 4.263325214385986, "learning_rate": 4.739795918367347e-06, "loss": 0.0481, "step": 6425 }, { "epoch": 2.9519450800915332, "grad_norm": 3.5418529510498047, "learning_rate": 4.7334183673469384e-06, "loss": 0.0494, "step": 6450 }, { "epoch": 2.9633867276887873, "grad_norm": 3.0929176807403564, "learning_rate": 4.727040816326531e-06, "loss": 0.0583, "step": 6475 }, { "epoch": 2.974828375286041, "grad_norm": 3.1736650466918945, "learning_rate": 4.720663265306122e-06, "loss": 0.0377, "step": 6500 }, { "epoch": 2.9862700228832955, "grad_norm": 2.7548913955688477, "learning_rate": 4.714285714285715e-06, "loss": 0.0385, "step": 6525 }, { "epoch": 2.997711670480549, "grad_norm": 3.6645278930664062, "learning_rate": 4.707908163265306e-06, "loss": 0.0454, "step": 6550 }, { "epoch": 3.009153318077803, "grad_norm": 1.9902344942092896, "learning_rate": 4.701530612244899e-06, "loss": 0.0287, "step": 6575 }, { "epoch": 3.0205949656750573, "grad_norm": 7.351073265075684, "learning_rate": 4.69515306122449e-06, "loss": 0.0237, "step": 6600 }, { "epoch": 3.0320366132723113, "grad_norm": 3.1090986728668213, "learning_rate": 4.688775510204082e-06, "loss": 0.0232, "step": 6625 }, { "epoch": 3.0434782608695654, "grad_norm": 1.9417141675949097, "learning_rate": 4.682397959183673e-06, "loss": 0.0227, "step": 6650 }, { "epoch": 3.054919908466819, "grad_norm": 4.676495552062988, "learning_rate": 4.676020408163266e-06, "loss": 0.0369, "step": 6675 }, { "epoch": 3.066361556064073, "grad_norm": 1.6039929389953613, "learning_rate": 4.669642857142857e-06, "loss": 0.0184, "step": 6700 }, { "epoch": 3.077803203661327, "grad_norm": 1.7776933908462524, "learning_rate": 4.663265306122449e-06, "loss": 0.0244, "step": 6725 }, { "epoch": 3.0892448512585813, "grad_norm": 4.828967571258545, "learning_rate": 4.656887755102041e-06, "loss": 0.0312, "step": 6750 }, { "epoch": 3.1006864988558354, "grad_norm": 8.169689178466797, "learning_rate": 4.650510204081633e-06, "loss": 0.0328, "step": 6775 }, { "epoch": 3.1121281464530894, "grad_norm": 3.101473093032837, "learning_rate": 4.644132653061225e-06, "loss": 0.016, "step": 6800 }, { "epoch": 3.123569794050343, "grad_norm": 0.8072414398193359, "learning_rate": 4.637755102040817e-06, "loss": 0.0299, "step": 6825 }, { "epoch": 3.135011441647597, "grad_norm": 1.684613585472107, "learning_rate": 4.631377551020408e-06, "loss": 0.03, "step": 6850 }, { "epoch": 3.1464530892448512, "grad_norm": 4.120328426361084, "learning_rate": 4.625e-06, "loss": 0.0336, "step": 6875 }, { "epoch": 3.1578947368421053, "grad_norm": 3.2325098514556885, "learning_rate": 4.618622448979592e-06, "loss": 0.0197, "step": 6900 }, { "epoch": 3.1693363844393594, "grad_norm": 2.3901312351226807, "learning_rate": 4.612244897959184e-06, "loss": 0.0251, "step": 6925 }, { "epoch": 3.1807780320366135, "grad_norm": 3.9952516555786133, "learning_rate": 4.605867346938775e-06, "loss": 0.0322, "step": 6950 }, { "epoch": 3.192219679633867, "grad_norm": 3.828606605529785, "learning_rate": 4.599489795918368e-06, "loss": 0.0406, "step": 6975 }, { "epoch": 3.203661327231121, "grad_norm": 1.8031266927719116, "learning_rate": 4.59311224489796e-06, "loss": 0.0236, "step": 7000 }, { "epoch": 3.2151029748283753, "grad_norm": 3.989572286605835, "learning_rate": 4.5867346938775516e-06, "loss": 0.0289, "step": 7025 }, { "epoch": 3.2265446224256293, "grad_norm": 5.1198296546936035, "learning_rate": 4.580357142857143e-06, "loss": 0.0257, "step": 7050 }, { "epoch": 3.2379862700228834, "grad_norm": 3.9181671142578125, "learning_rate": 4.573979591836735e-06, "loss": 0.0344, "step": 7075 }, { "epoch": 3.2494279176201375, "grad_norm": 2.100790500640869, "learning_rate": 4.567602040816327e-06, "loss": 0.0182, "step": 7100 }, { "epoch": 3.260869565217391, "grad_norm": 1.8334282636642456, "learning_rate": 4.561224489795919e-06, "loss": 0.0287, "step": 7125 }, { "epoch": 3.272311212814645, "grad_norm": 4.7145771980285645, "learning_rate": 4.55484693877551e-06, "loss": 0.0339, "step": 7150 }, { "epoch": 3.2837528604118993, "grad_norm": 3.8231897354125977, "learning_rate": 4.548469387755102e-06, "loss": 0.0243, "step": 7175 }, { "epoch": 3.2951945080091534, "grad_norm": 5.662996768951416, "learning_rate": 4.542091836734694e-06, "loss": 0.0281, "step": 7200 }, { "epoch": 3.3066361556064074, "grad_norm": 1.8065590858459473, "learning_rate": 4.5357142857142865e-06, "loss": 0.0361, "step": 7225 }, { "epoch": 3.3180778032036615, "grad_norm": 4.327584266662598, "learning_rate": 4.529336734693878e-06, "loss": 0.0285, "step": 7250 }, { "epoch": 3.329519450800915, "grad_norm": 3.929110527038574, "learning_rate": 4.5229591836734695e-06, "loss": 0.029, "step": 7275 }, { "epoch": 3.3409610983981692, "grad_norm": 4.633998870849609, "learning_rate": 4.516581632653061e-06, "loss": 0.0277, "step": 7300 }, { "epoch": 3.3524027459954233, "grad_norm": 1.1148935556411743, "learning_rate": 4.5102040816326535e-06, "loss": 0.0319, "step": 7325 }, { "epoch": 3.3638443935926774, "grad_norm": 8.381105422973633, "learning_rate": 4.503826530612245e-06, "loss": 0.0214, "step": 7350 }, { "epoch": 3.3752860411899315, "grad_norm": 3.111279010772705, "learning_rate": 4.4974489795918366e-06, "loss": 0.0341, "step": 7375 }, { "epoch": 3.386727688787185, "grad_norm": 2.937441349029541, "learning_rate": 4.491071428571428e-06, "loss": 0.0228, "step": 7400 }, { "epoch": 3.398169336384439, "grad_norm": 2.1108193397521973, "learning_rate": 4.484693877551021e-06, "loss": 0.0276, "step": 7425 }, { "epoch": 3.4096109839816933, "grad_norm": 5.9128031730651855, "learning_rate": 4.478316326530613e-06, "loss": 0.0273, "step": 7450 }, { "epoch": 3.4210526315789473, "grad_norm": 0.7438346743583679, "learning_rate": 4.4719387755102044e-06, "loss": 0.0305, "step": 7475 }, { "epoch": 3.4324942791762014, "grad_norm": 7.93667459487915, "learning_rate": 4.465561224489796e-06, "loss": 0.0265, "step": 7500 }, { "epoch": 3.4439359267734555, "grad_norm": 4.7441277503967285, "learning_rate": 4.459183673469388e-06, "loss": 0.0285, "step": 7525 }, { "epoch": 3.4553775743707096, "grad_norm": 6.173753261566162, "learning_rate": 4.45280612244898e-06, "loss": 0.0341, "step": 7550 }, { "epoch": 3.466819221967963, "grad_norm": 1.397835373878479, "learning_rate": 4.4464285714285715e-06, "loss": 0.0304, "step": 7575 }, { "epoch": 3.4782608695652173, "grad_norm": 1.049957275390625, "learning_rate": 4.440051020408163e-06, "loss": 0.0315, "step": 7600 }, { "epoch": 3.4897025171624714, "grad_norm": 1.9728070497512817, "learning_rate": 4.433673469387755e-06, "loss": 0.0345, "step": 7625 }, { "epoch": 3.5011441647597255, "grad_norm": 3.764899492263794, "learning_rate": 4.427295918367347e-06, "loss": 0.0312, "step": 7650 }, { "epoch": 3.5125858123569795, "grad_norm": 3.4769232273101807, "learning_rate": 4.420918367346939e-06, "loss": 0.0314, "step": 7675 }, { "epoch": 3.524027459954233, "grad_norm": 2.4961886405944824, "learning_rate": 4.414540816326531e-06, "loss": 0.0269, "step": 7700 }, { "epoch": 3.5354691075514877, "grad_norm": 2.6600871086120605, "learning_rate": 4.408163265306122e-06, "loss": 0.0304, "step": 7725 }, { "epoch": 3.5469107551487413, "grad_norm": 3.568831205368042, "learning_rate": 4.401785714285715e-06, "loss": 0.026, "step": 7750 }, { "epoch": 3.5583524027459954, "grad_norm": 2.488873243331909, "learning_rate": 4.395408163265306e-06, "loss": 0.0319, "step": 7775 }, { "epoch": 3.5697940503432495, "grad_norm": 3.0144600868225098, "learning_rate": 4.389030612244898e-06, "loss": 0.0241, "step": 7800 }, { "epoch": 3.5812356979405036, "grad_norm": 5.78762149810791, "learning_rate": 4.3826530612244894e-06, "loss": 0.0335, "step": 7825 }, { "epoch": 3.5926773455377576, "grad_norm": 7.364430904388428, "learning_rate": 4.376275510204082e-06, "loss": 0.03, "step": 7850 }, { "epoch": 3.6041189931350113, "grad_norm": 1.7618129253387451, "learning_rate": 4.369897959183673e-06, "loss": 0.0269, "step": 7875 }, { "epoch": 3.6155606407322654, "grad_norm": 2.5900871753692627, "learning_rate": 4.363520408163266e-06, "loss": 0.021, "step": 7900 }, { "epoch": 3.6270022883295194, "grad_norm": 1.509843111038208, "learning_rate": 4.357142857142857e-06, "loss": 0.0355, "step": 7925 }, { "epoch": 3.6384439359267735, "grad_norm": 7.716503143310547, "learning_rate": 4.35076530612245e-06, "loss": 0.0257, "step": 7950 }, { "epoch": 3.6498855835240276, "grad_norm": 1.4650408029556274, "learning_rate": 4.344387755102041e-06, "loss": 0.0377, "step": 7975 }, { "epoch": 3.6613272311212812, "grad_norm": 2.744730234146118, "learning_rate": 4.338010204081633e-06, "loss": 0.0277, "step": 8000 }, { "epoch": 3.6727688787185357, "grad_norm": 3.6976919174194336, "learning_rate": 4.331632653061224e-06, "loss": 0.0282, "step": 8025 }, { "epoch": 3.6842105263157894, "grad_norm": 7.903183460235596, "learning_rate": 4.325255102040817e-06, "loss": 0.0324, "step": 8050 }, { "epoch": 3.6956521739130435, "grad_norm": 2.9782605171203613, "learning_rate": 4.318877551020408e-06, "loss": 0.0304, "step": 8075 }, { "epoch": 3.7070938215102975, "grad_norm": 3.6786653995513916, "learning_rate": 4.3125e-06, "loss": 0.0304, "step": 8100 }, { "epoch": 3.7185354691075516, "grad_norm": 2.000511884689331, "learning_rate": 4.306122448979592e-06, "loss": 0.0266, "step": 8125 }, { "epoch": 3.7299771167048057, "grad_norm": 3.0682075023651123, "learning_rate": 4.299744897959184e-06, "loss": 0.0314, "step": 8150 }, { "epoch": 3.7414187643020593, "grad_norm": 3.421633720397949, "learning_rate": 4.293367346938776e-06, "loss": 0.0296, "step": 8175 }, { "epoch": 3.7528604118993134, "grad_norm": 1.5098000764846802, "learning_rate": 4.286989795918368e-06, "loss": 0.0211, "step": 8200 }, { "epoch": 3.7643020594965675, "grad_norm": 1.048941731452942, "learning_rate": 4.280612244897959e-06, "loss": 0.032, "step": 8225 }, { "epoch": 3.7757437070938216, "grad_norm": 13.178288459777832, "learning_rate": 4.274234693877551e-06, "loss": 0.0266, "step": 8250 }, { "epoch": 3.7871853546910756, "grad_norm": 2.7297298908233643, "learning_rate": 4.267857142857143e-06, "loss": 0.0254, "step": 8275 }, { "epoch": 3.7986270022883293, "grad_norm": 5.409793376922607, "learning_rate": 4.261479591836735e-06, "loss": 0.0283, "step": 8300 }, { "epoch": 3.8100686498855834, "grad_norm": 1.6612489223480225, "learning_rate": 4.255102040816326e-06, "loss": 0.0263, "step": 8325 }, { "epoch": 3.8215102974828374, "grad_norm": 9.250696182250977, "learning_rate": 4.248724489795919e-06, "loss": 0.0265, "step": 8350 }, { "epoch": 3.8329519450800915, "grad_norm": 4.991941928863525, "learning_rate": 4.242346938775511e-06, "loss": 0.0314, "step": 8375 }, { "epoch": 3.8443935926773456, "grad_norm": 2.8890774250030518, "learning_rate": 4.2359693877551025e-06, "loss": 0.0222, "step": 8400 }, { "epoch": 3.8558352402745997, "grad_norm": 2.2660281658172607, "learning_rate": 4.229591836734694e-06, "loss": 0.0281, "step": 8425 }, { "epoch": 3.8672768878718538, "grad_norm": 2.7722203731536865, "learning_rate": 4.223214285714286e-06, "loss": 0.0315, "step": 8450 }, { "epoch": 3.8787185354691074, "grad_norm": 3.3059165477752686, "learning_rate": 4.216836734693878e-06, "loss": 0.0349, "step": 8475 }, { "epoch": 3.8901601830663615, "grad_norm": 4.720456600189209, "learning_rate": 4.2104591836734696e-06, "loss": 0.0171, "step": 8500 }, { "epoch": 3.9016018306636155, "grad_norm": 2.5770797729492188, "learning_rate": 4.204081632653061e-06, "loss": 0.0285, "step": 8525 }, { "epoch": 3.9130434782608696, "grad_norm": 7.210494041442871, "learning_rate": 4.197704081632653e-06, "loss": 0.0243, "step": 8550 }, { "epoch": 3.9244851258581237, "grad_norm": 0.5488982796669006, "learning_rate": 4.191326530612245e-06, "loss": 0.0293, "step": 8575 }, { "epoch": 3.9359267734553773, "grad_norm": 4.990447044372559, "learning_rate": 4.1849489795918374e-06, "loss": 0.0262, "step": 8600 }, { "epoch": 3.9473684210526314, "grad_norm": 4.837020397186279, "learning_rate": 4.178571428571429e-06, "loss": 0.0366, "step": 8625 }, { "epoch": 3.9588100686498855, "grad_norm": 8.747390747070312, "learning_rate": 4.1721938775510205e-06, "loss": 0.0228, "step": 8650 }, { "epoch": 3.9702517162471396, "grad_norm": 0.8226907849311829, "learning_rate": 4.166071428571429e-06, "loss": 0.0315, "step": 8675 }, { "epoch": 3.9816933638443937, "grad_norm": 4.007413864135742, "learning_rate": 4.159693877551021e-06, "loss": 0.0251, "step": 8700 }, { "epoch": 3.9931350114416477, "grad_norm": 2.9124577045440674, "learning_rate": 4.153316326530613e-06, "loss": 0.0287, "step": 8725 }, { "epoch": 4.004576659038902, "grad_norm": 2.2100229263305664, "learning_rate": 4.146938775510204e-06, "loss": 0.0337, "step": 8750 }, { "epoch": 4.016018306636155, "grad_norm": 1.504098653793335, "learning_rate": 4.140561224489796e-06, "loss": 0.0166, "step": 8775 }, { "epoch": 4.02745995423341, "grad_norm": 2.594133138656616, "learning_rate": 4.134183673469388e-06, "loss": 0.0193, "step": 8800 }, { "epoch": 4.038901601830664, "grad_norm": 3.2971105575561523, "learning_rate": 4.12780612244898e-06, "loss": 0.0116, "step": 8825 }, { "epoch": 4.050343249427917, "grad_norm": 2.0989925861358643, "learning_rate": 4.121428571428572e-06, "loss": 0.0199, "step": 8850 }, { "epoch": 4.061784897025172, "grad_norm": 2.077739715576172, "learning_rate": 4.115306122448979e-06, "loss": 0.0132, "step": 8875 }, { "epoch": 4.073226544622425, "grad_norm": 2.2226176261901855, "learning_rate": 4.108928571428572e-06, "loss": 0.0203, "step": 8900 }, { "epoch": 4.08466819221968, "grad_norm": 5.787581443786621, "learning_rate": 4.102551020408163e-06, "loss": 0.0194, "step": 8925 }, { "epoch": 4.0961098398169336, "grad_norm": 1.7698355913162231, "learning_rate": 4.096173469387756e-06, "loss": 0.0225, "step": 8950 }, { "epoch": 4.107551487414188, "grad_norm": 7.658657073974609, "learning_rate": 4.089795918367347e-06, "loss": 0.017, "step": 8975 }, { "epoch": 4.118993135011442, "grad_norm": 1.4201191663742065, "learning_rate": 4.083418367346939e-06, "loss": 0.025, "step": 9000 }, { "epoch": 4.130434782608695, "grad_norm": 1.703041434288025, "learning_rate": 4.07704081632653e-06, "loss": 0.0114, "step": 9025 }, { "epoch": 4.14187643020595, "grad_norm": 0.8880120515823364, "learning_rate": 4.070663265306123e-06, "loss": 0.0247, "step": 9050 }, { "epoch": 4.1533180778032035, "grad_norm": 2.5007543563842773, "learning_rate": 4.064285714285714e-06, "loss": 0.0221, "step": 9075 }, { "epoch": 4.164759725400458, "grad_norm": 9.973050117492676, "learning_rate": 4.057908163265306e-06, "loss": 0.0212, "step": 9100 }, { "epoch": 4.176201372997712, "grad_norm": 3.308204412460327, "learning_rate": 4.051530612244898e-06, "loss": 0.0117, "step": 9125 }, { "epoch": 4.187643020594965, "grad_norm": 1.699859380722046, "learning_rate": 4.0451530612244905e-06, "loss": 0.0227, "step": 9150 }, { "epoch": 4.19908466819222, "grad_norm": 1.368072271347046, "learning_rate": 4.038775510204082e-06, "loss": 0.0105, "step": 9175 }, { "epoch": 4.2105263157894735, "grad_norm": 0.6647465229034424, "learning_rate": 4.032397959183674e-06, "loss": 0.022, "step": 9200 }, { "epoch": 4.221967963386728, "grad_norm": 4.245569705963135, "learning_rate": 4.026020408163265e-06, "loss": 0.0151, "step": 9225 }, { "epoch": 4.233409610983982, "grad_norm": 0.7587606906890869, "learning_rate": 4.0196428571428576e-06, "loss": 0.0235, "step": 9250 }, { "epoch": 4.244851258581235, "grad_norm": 2.736632823944092, "learning_rate": 4.013265306122449e-06, "loss": 0.0157, "step": 9275 }, { "epoch": 4.25629290617849, "grad_norm": 2.0381782054901123, "learning_rate": 4.006887755102041e-06, "loss": 0.0188, "step": 9300 }, { "epoch": 4.267734553775743, "grad_norm": 1.4942119121551514, "learning_rate": 4.000510204081632e-06, "loss": 0.0101, "step": 9325 }, { "epoch": 4.279176201372998, "grad_norm": 3.967541217803955, "learning_rate": 3.994132653061225e-06, "loss": 0.021, "step": 9350 }, { "epoch": 4.290617848970252, "grad_norm": 0.7107511758804321, "learning_rate": 3.987755102040817e-06, "loss": 0.0106, "step": 9375 }, { "epoch": 4.302059496567506, "grad_norm": 1.9319146871566772, "learning_rate": 3.9813775510204085e-06, "loss": 0.0278, "step": 9400 }, { "epoch": 4.31350114416476, "grad_norm": 4.877601146697998, "learning_rate": 3.975e-06, "loss": 0.0157, "step": 9425 }, { "epoch": 4.324942791762013, "grad_norm": 3.034367799758911, "learning_rate": 3.968622448979592e-06, "loss": 0.0248, "step": 9450 }, { "epoch": 4.336384439359268, "grad_norm": 0.23764902353286743, "learning_rate": 3.962244897959184e-06, "loss": 0.0105, "step": 9475 }, { "epoch": 4.3478260869565215, "grad_norm": 3.390343189239502, "learning_rate": 3.9558673469387755e-06, "loss": 0.0252, "step": 9500 }, { "epoch": 4.359267734553776, "grad_norm": 0.8049426078796387, "learning_rate": 3.949489795918367e-06, "loss": 0.0127, "step": 9525 }, { "epoch": 4.37070938215103, "grad_norm": 2.284611463546753, "learning_rate": 3.9431122448979595e-06, "loss": 0.0295, "step": 9550 }, { "epoch": 4.382151029748284, "grad_norm": 1.1632159948349, "learning_rate": 3.936734693877552e-06, "loss": 0.014, "step": 9575 }, { "epoch": 4.393592677345538, "grad_norm": 3.538940906524658, "learning_rate": 3.930357142857143e-06, "loss": 0.0254, "step": 9600 }, { "epoch": 4.4050343249427915, "grad_norm": 3.8108420372009277, "learning_rate": 3.923979591836735e-06, "loss": 0.0131, "step": 9625 }, { "epoch": 4.416475972540046, "grad_norm": 0.45241427421569824, "learning_rate": 3.9176020408163265e-06, "loss": 0.0201, "step": 9650 }, { "epoch": 4.4279176201373, "grad_norm": 3.477997064590454, "learning_rate": 3.911224489795919e-06, "loss": 0.0174, "step": 9675 }, { "epoch": 4.439359267734554, "grad_norm": 1.1490719318389893, "learning_rate": 3.9048469387755104e-06, "loss": 0.0151, "step": 9700 }, { "epoch": 4.450800915331808, "grad_norm": 7.397353649139404, "learning_rate": 3.898469387755102e-06, "loss": 0.0169, "step": 9725 }, { "epoch": 4.462242562929061, "grad_norm": 0.5268431305885315, "learning_rate": 3.8920918367346935e-06, "loss": 0.0246, "step": 9750 }, { "epoch": 4.473684210526316, "grad_norm": 1.0522817373275757, "learning_rate": 3.885714285714286e-06, "loss": 0.0159, "step": 9775 }, { "epoch": 4.48512585812357, "grad_norm": 1.1104140281677246, "learning_rate": 3.879336734693878e-06, "loss": 0.0312, "step": 9800 }, { "epoch": 4.496567505720824, "grad_norm": 11.163028717041016, "learning_rate": 3.87295918367347e-06, "loss": 0.0169, "step": 9825 }, { "epoch": 4.508009153318078, "grad_norm": 3.165876865386963, "learning_rate": 3.866581632653061e-06, "loss": 0.0159, "step": 9850 }, { "epoch": 4.519450800915331, "grad_norm": 3.8829312324523926, "learning_rate": 3.860204081632653e-06, "loss": 0.0163, "step": 9875 }, { "epoch": 4.530892448512586, "grad_norm": 3.46303129196167, "learning_rate": 3.853826530612245e-06, "loss": 0.0279, "step": 9900 }, { "epoch": 4.5423340961098395, "grad_norm": 1.1138535737991333, "learning_rate": 3.847448979591837e-06, "loss": 0.0154, "step": 9925 }, { "epoch": 4.553775743707094, "grad_norm": 4.287326335906982, "learning_rate": 3.841071428571428e-06, "loss": 0.0193, "step": 9950 }, { "epoch": 4.565217391304348, "grad_norm": 0.4523741602897644, "learning_rate": 3.83469387755102e-06, "loss": 0.0126, "step": 9975 }, { "epoch": 4.576659038901602, "grad_norm": 0.8860822916030884, "learning_rate": 3.828316326530612e-06, "loss": 0.0281, "step": 10000 }, { "epoch": 4.576659038901602, "eval_loss": 0.1852317899465561, "eval_runtime": 5272.523, "eval_samples_per_second": 1.806, "eval_steps_per_second": 0.226, "eval_wer": 0.10049913611057784, "step": 10000 }, { "epoch": 4.588100686498856, "grad_norm": 2.3640730381011963, "learning_rate": 3.821938775510205e-06, "loss": 0.0134, "step": 10025 }, { "epoch": 4.5995423340961095, "grad_norm": 0.9958900809288025, "learning_rate": 3.815561224489796e-06, "loss": 0.017, "step": 10050 }, { "epoch": 4.610983981693364, "grad_norm": 4.021085739135742, "learning_rate": 3.8091836734693882e-06, "loss": 0.0169, "step": 10075 }, { "epoch": 4.622425629290618, "grad_norm": 1.908926010131836, "learning_rate": 3.8028061224489798e-06, "loss": 0.0208, "step": 10100 }, { "epoch": 4.633867276887872, "grad_norm": 4.975993633270264, "learning_rate": 3.7964285714285717e-06, "loss": 0.0133, "step": 10125 }, { "epoch": 4.645308924485126, "grad_norm": 0.652805745601654, "learning_rate": 3.7900510204081633e-06, "loss": 0.0208, "step": 10150 }, { "epoch": 4.65675057208238, "grad_norm": 1.805553913116455, "learning_rate": 3.7836734693877553e-06, "loss": 0.0132, "step": 10175 }, { "epoch": 4.668192219679634, "grad_norm": 0.581511378288269, "learning_rate": 3.777295918367347e-06, "loss": 0.0243, "step": 10200 }, { "epoch": 4.679633867276888, "grad_norm": 4.517154693603516, "learning_rate": 3.7709183673469388e-06, "loss": 0.0142, "step": 10225 }, { "epoch": 4.691075514874142, "grad_norm": 3.5025482177734375, "learning_rate": 3.764540816326531e-06, "loss": 0.0258, "step": 10250 }, { "epoch": 4.702517162471396, "grad_norm": 2.5942628383636475, "learning_rate": 3.7581632653061227e-06, "loss": 0.0121, "step": 10275 }, { "epoch": 4.71395881006865, "grad_norm": 0.7120451927185059, "learning_rate": 3.7517857142857147e-06, "loss": 0.0254, "step": 10300 }, { "epoch": 4.725400457665904, "grad_norm": 8.78650951385498, "learning_rate": 3.745408163265306e-06, "loss": 0.0141, "step": 10325 }, { "epoch": 4.7368421052631575, "grad_norm": 1.8899227380752563, "learning_rate": 3.739030612244898e-06, "loss": 0.0264, "step": 10350 }, { "epoch": 4.748283752860412, "grad_norm": 0.5672324895858765, "learning_rate": 3.7326530612244897e-06, "loss": 0.0095, "step": 10375 }, { "epoch": 4.759725400457666, "grad_norm": 0.8572821617126465, "learning_rate": 3.7262755102040817e-06, "loss": 0.0236, "step": 10400 }, { "epoch": 4.77116704805492, "grad_norm": 0.23919886350631714, "learning_rate": 3.7198979591836732e-06, "loss": 0.0117, "step": 10425 }, { "epoch": 4.782608695652174, "grad_norm": 0.6619164943695068, "learning_rate": 3.713520408163265e-06, "loss": 0.0256, "step": 10450 }, { "epoch": 4.7940503432494275, "grad_norm": 1.8749737739562988, "learning_rate": 3.7071428571428576e-06, "loss": 0.0132, "step": 10475 }, { "epoch": 4.805491990846682, "grad_norm": 3.1002092361450195, "learning_rate": 3.7007653061224496e-06, "loss": 0.0264, "step": 10500 }, { "epoch": 4.816933638443936, "grad_norm": 1.090328335762024, "learning_rate": 3.694387755102041e-06, "loss": 0.0166, "step": 10525 }, { "epoch": 4.82837528604119, "grad_norm": 0.8507818579673767, "learning_rate": 3.688010204081633e-06, "loss": 0.0206, "step": 10550 }, { "epoch": 4.839816933638444, "grad_norm": 2.137056589126587, "learning_rate": 3.6816326530612246e-06, "loss": 0.0114, "step": 10575 }, { "epoch": 4.851258581235698, "grad_norm": 3.8611929416656494, "learning_rate": 3.6752551020408166e-06, "loss": 0.0214, "step": 10600 }, { "epoch": 4.862700228832952, "grad_norm": 5.636245250701904, "learning_rate": 3.668877551020408e-06, "loss": 0.017, "step": 10625 }, { "epoch": 4.874141876430206, "grad_norm": 1.076035499572754, "learning_rate": 3.6625e-06, "loss": 0.0316, "step": 10650 }, { "epoch": 4.88558352402746, "grad_norm": 2.079132556915283, "learning_rate": 3.6561224489795916e-06, "loss": 0.0136, "step": 10675 }, { "epoch": 4.897025171624714, "grad_norm": 2.008894443511963, "learning_rate": 3.649744897959184e-06, "loss": 0.0239, "step": 10700 }, { "epoch": 4.908466819221968, "grad_norm": 0.4755445420742035, "learning_rate": 3.643367346938776e-06, "loss": 0.0131, "step": 10725 }, { "epoch": 4.919908466819222, "grad_norm": 2.093269109725952, "learning_rate": 3.6369897959183675e-06, "loss": 0.023, "step": 10750 }, { "epoch": 4.931350114416476, "grad_norm": 0.36701831221580505, "learning_rate": 3.6306122448979595e-06, "loss": 0.0137, "step": 10775 }, { "epoch": 4.94279176201373, "grad_norm": 2.4676878452301025, "learning_rate": 3.624234693877551e-06, "loss": 0.0324, "step": 10800 }, { "epoch": 4.954233409610984, "grad_norm": 2.1514317989349365, "learning_rate": 3.617857142857143e-06, "loss": 0.0163, "step": 10825 }, { "epoch": 4.965675057208238, "grad_norm": 0.4548572897911072, "learning_rate": 3.6114795918367346e-06, "loss": 0.0225, "step": 10850 }, { "epoch": 4.977116704805492, "grad_norm": 4.841091632843018, "learning_rate": 3.6051020408163265e-06, "loss": 0.015, "step": 10875 }, { "epoch": 4.988558352402746, "grad_norm": 1.0846531391143799, "learning_rate": 3.598724489795918e-06, "loss": 0.0241, "step": 10900 }, { "epoch": 5.0, "grad_norm": 3.79087495803833, "learning_rate": 3.592346938775511e-06, "loss": 0.0228, "step": 10925 }, { "epoch": 5.011441647597254, "grad_norm": 7.431537628173828, "learning_rate": 3.5859693877551024e-06, "loss": 0.0131, "step": 10950 }, { "epoch": 5.022883295194508, "grad_norm": 9.645064353942871, "learning_rate": 3.5795918367346944e-06, "loss": 0.0099, "step": 10975 }, { "epoch": 5.034324942791762, "grad_norm": 4.066143035888672, "learning_rate": 3.573214285714286e-06, "loss": 0.0122, "step": 11000 }, { "epoch": 5.045766590389016, "grad_norm": 15.450593948364258, "learning_rate": 3.566836734693878e-06, "loss": 0.0238, "step": 11025 }, { "epoch": 5.05720823798627, "grad_norm": 4.155067443847656, "learning_rate": 3.5604591836734694e-06, "loss": 0.0146, "step": 11050 }, { "epoch": 5.068649885583524, "grad_norm": 1.2085702419281006, "learning_rate": 3.5540816326530614e-06, "loss": 0.0205, "step": 11075 }, { "epoch": 5.080091533180778, "grad_norm": 2.0568387508392334, "learning_rate": 3.547704081632653e-06, "loss": 0.0104, "step": 11100 }, { "epoch": 5.091533180778032, "grad_norm": 2.4178097248077393, "learning_rate": 3.541326530612245e-06, "loss": 0.0152, "step": 11125 }, { "epoch": 5.102974828375286, "grad_norm": 6.744433879852295, "learning_rate": 3.5349489795918373e-06, "loss": 0.0118, "step": 11150 }, { "epoch": 5.11441647597254, "grad_norm": 2.1950619220733643, "learning_rate": 3.528571428571429e-06, "loss": 0.013, "step": 11175 }, { "epoch": 5.125858123569794, "grad_norm": 7.050522804260254, "learning_rate": 3.522193877551021e-06, "loss": 0.0117, "step": 11200 }, { "epoch": 5.137299771167048, "grad_norm": 18.47918128967285, "learning_rate": 3.5158163265306124e-06, "loss": 0.0153, "step": 11225 }, { "epoch": 5.148741418764302, "grad_norm": 0.19902612268924713, "learning_rate": 3.5094387755102043e-06, "loss": 0.0111, "step": 11250 }, { "epoch": 5.160183066361556, "grad_norm": 4.996516704559326, "learning_rate": 3.503061224489796e-06, "loss": 0.0147, "step": 11275 }, { "epoch": 5.17162471395881, "grad_norm": 0.06878898292779922, "learning_rate": 3.496683673469388e-06, "loss": 0.0084, "step": 11300 }, { "epoch": 5.183066361556064, "grad_norm": 3.7109427452087402, "learning_rate": 3.4903061224489794e-06, "loss": 0.0171, "step": 11325 }, { "epoch": 5.194508009153318, "grad_norm": 1.2318646907806396, "learning_rate": 3.484183673469388e-06, "loss": 0.0104, "step": 11350 }, { "epoch": 5.2059496567505725, "grad_norm": 5.8200364112854, "learning_rate": 3.4778061224489795e-06, "loss": 0.0178, "step": 11375 }, { "epoch": 5.217391304347826, "grad_norm": 2.4029898643493652, "learning_rate": 3.4714285714285715e-06, "loss": 0.0143, "step": 11400 }, { "epoch": 5.22883295194508, "grad_norm": 5.977914810180664, "learning_rate": 3.465051020408164e-06, "loss": 0.0137, "step": 11425 }, { "epoch": 5.240274599542334, "grad_norm": 1.1518189907073975, "learning_rate": 3.4586734693877554e-06, "loss": 0.0115, "step": 11450 }, { "epoch": 5.251716247139588, "grad_norm": 9.050216674804688, "learning_rate": 3.4522959183673474e-06, "loss": 0.019, "step": 11475 }, { "epoch": 5.2631578947368425, "grad_norm": 2.823286294937134, "learning_rate": 3.445918367346939e-06, "loss": 0.0105, "step": 11500 }, { "epoch": 5.274599542334096, "grad_norm": 2.077913284301758, "learning_rate": 3.439540816326531e-06, "loss": 0.012, "step": 11525 }, { "epoch": 5.28604118993135, "grad_norm": 1.8525944948196411, "learning_rate": 3.4331632653061224e-06, "loss": 0.0128, "step": 11550 }, { "epoch": 5.297482837528604, "grad_norm": 4.57568883895874, "learning_rate": 3.4267857142857144e-06, "loss": 0.0168, "step": 11575 }, { "epoch": 5.308924485125858, "grad_norm": 0.7349223494529724, "learning_rate": 3.420408163265306e-06, "loss": 0.0103, "step": 11600 }, { "epoch": 5.320366132723112, "grad_norm": 8.281305313110352, "learning_rate": 3.414030612244898e-06, "loss": 0.0142, "step": 11625 }, { "epoch": 5.331807780320366, "grad_norm": 2.828233480453491, "learning_rate": 3.4076530612244903e-06, "loss": 0.0074, "step": 11650 }, { "epoch": 5.34324942791762, "grad_norm": 3.6413841247558594, "learning_rate": 3.4012755102040823e-06, "loss": 0.0145, "step": 11675 }, { "epoch": 5.354691075514874, "grad_norm": 8.595261573791504, "learning_rate": 3.394897959183674e-06, "loss": 0.0104, "step": 11700 }, { "epoch": 5.366132723112128, "grad_norm": 8.777588844299316, "learning_rate": 3.3885204081632658e-06, "loss": 0.016, "step": 11725 }, { "epoch": 5.377574370709382, "grad_norm": 0.81954026222229, "learning_rate": 3.3821428571428573e-06, "loss": 0.0076, "step": 11750 }, { "epoch": 5.389016018306636, "grad_norm": 2.593818187713623, "learning_rate": 3.3757653061224493e-06, "loss": 0.0178, "step": 11775 }, { "epoch": 5.4004576659038905, "grad_norm": 1.6006801128387451, "learning_rate": 3.369387755102041e-06, "loss": 0.0099, "step": 11800 }, { "epoch": 5.411899313501144, "grad_norm": 3.7885050773620605, "learning_rate": 3.363010204081633e-06, "loss": 0.0165, "step": 11825 }, { "epoch": 5.423340961098398, "grad_norm": 1.194949984550476, "learning_rate": 3.3566326530612243e-06, "loss": 0.0108, "step": 11850 }, { "epoch": 5.434782608695652, "grad_norm": 11.738698959350586, "learning_rate": 3.3502551020408167e-06, "loss": 0.0215, "step": 11875 }, { "epoch": 5.446224256292906, "grad_norm": 2.309065103530884, "learning_rate": 3.3438775510204087e-06, "loss": 0.0121, "step": 11900 }, { "epoch": 5.4576659038901605, "grad_norm": 7.016556262969971, "learning_rate": 3.3375000000000002e-06, "loss": 0.0138, "step": 11925 }, { "epoch": 5.469107551487414, "grad_norm": 2.991405963897705, "learning_rate": 3.331122448979592e-06, "loss": 0.0146, "step": 11950 }, { "epoch": 5.480549199084669, "grad_norm": 7.811606407165527, "learning_rate": 3.3247448979591837e-06, "loss": 0.0194, "step": 11975 }, { "epoch": 5.491990846681922, "grad_norm": 3.6345748901367188, "learning_rate": 3.3183673469387757e-06, "loss": 0.0142, "step": 12000 }, { "epoch": 5.503432494279176, "grad_norm": 7.627600193023682, "learning_rate": 3.3119897959183673e-06, "loss": 0.0179, "step": 12025 }, { "epoch": 5.51487414187643, "grad_norm": 1.8679994344711304, "learning_rate": 3.3056122448979592e-06, "loss": 0.0105, "step": 12050 }, { "epoch": 5.526315789473684, "grad_norm": 0.5164907574653625, "learning_rate": 3.2992346938775508e-06, "loss": 0.0105, "step": 12075 }, { "epoch": 5.537757437070939, "grad_norm": 0.4254048764705658, "learning_rate": 3.2928571428571427e-06, "loss": 0.0081, "step": 12100 }, { "epoch": 5.549199084668192, "grad_norm": 5.356523513793945, "learning_rate": 3.286479591836735e-06, "loss": 0.0218, "step": 12125 }, { "epoch": 5.560640732265446, "grad_norm": 3.510284662246704, "learning_rate": 3.280102040816327e-06, "loss": 0.0085, "step": 12150 }, { "epoch": 5.5720823798627, "grad_norm": 6.946488380432129, "learning_rate": 3.2737244897959186e-06, "loss": 0.0184, "step": 12175 }, { "epoch": 5.583524027459954, "grad_norm": 0.3557557463645935, "learning_rate": 3.2673469387755106e-06, "loss": 0.0106, "step": 12200 }, { "epoch": 5.5949656750572085, "grad_norm": 3.596979856491089, "learning_rate": 3.260969387755102e-06, "loss": 0.0258, "step": 12225 }, { "epoch": 5.606407322654462, "grad_norm": 0.7150298953056335, "learning_rate": 3.254591836734694e-06, "loss": 0.0094, "step": 12250 }, { "epoch": 5.617848970251716, "grad_norm": 4.756287097930908, "learning_rate": 3.2482142857142857e-06, "loss": 0.0164, "step": 12275 }, { "epoch": 5.62929061784897, "grad_norm": 3.6292340755462646, "learning_rate": 3.2418367346938776e-06, "loss": 0.0107, "step": 12300 }, { "epoch": 5.640732265446224, "grad_norm": 9.82845687866211, "learning_rate": 3.235459183673469e-06, "loss": 0.019, "step": 12325 }, { "epoch": 5.6521739130434785, "grad_norm": 0.3698252737522125, "learning_rate": 3.2290816326530616e-06, "loss": 0.0079, "step": 12350 }, { "epoch": 5.663615560640732, "grad_norm": 1.8457624912261963, "learning_rate": 3.2227040816326535e-06, "loss": 0.023, "step": 12375 }, { "epoch": 5.675057208237987, "grad_norm": 0.7149168848991394, "learning_rate": 3.216326530612245e-06, "loss": 0.016, "step": 12400 }, { "epoch": 5.68649885583524, "grad_norm": 4.959986209869385, "learning_rate": 3.209948979591837e-06, "loss": 0.0184, "step": 12425 }, { "epoch": 5.697940503432494, "grad_norm": 1.3709676265716553, "learning_rate": 3.2035714285714286e-06, "loss": 0.0118, "step": 12450 }, { "epoch": 5.709382151029748, "grad_norm": 4.3518829345703125, "learning_rate": 3.1971938775510205e-06, "loss": 0.0172, "step": 12475 }, { "epoch": 5.720823798627002, "grad_norm": 4.1297502517700195, "learning_rate": 3.190816326530612e-06, "loss": 0.0126, "step": 12500 }, { "epoch": 5.732265446224257, "grad_norm": 4.261162281036377, "learning_rate": 3.184438775510204e-06, "loss": 0.0133, "step": 12525 }, { "epoch": 5.74370709382151, "grad_norm": 3.297361135482788, "learning_rate": 3.1780612244897956e-06, "loss": 0.0116, "step": 12550 }, { "epoch": 5.755148741418765, "grad_norm": 6.726646423339844, "learning_rate": 3.171683673469388e-06, "loss": 0.019, "step": 12575 }, { "epoch": 5.766590389016018, "grad_norm": 0.5207387208938599, "learning_rate": 3.16530612244898e-06, "loss": 0.0092, "step": 12600 }, { "epoch": 5.778032036613272, "grad_norm": 11.70545768737793, "learning_rate": 3.158928571428572e-06, "loss": 0.024, "step": 12625 }, { "epoch": 5.7894736842105265, "grad_norm": 0.3776039481163025, "learning_rate": 3.1525510204081635e-06, "loss": 0.01, "step": 12650 }, { "epoch": 5.80091533180778, "grad_norm": 16.962390899658203, "learning_rate": 3.1461734693877554e-06, "loss": 0.0147, "step": 12675 }, { "epoch": 5.812356979405035, "grad_norm": 3.64579701423645, "learning_rate": 3.139795918367347e-06, "loss": 0.011, "step": 12700 }, { "epoch": 5.823798627002288, "grad_norm": 6.796038627624512, "learning_rate": 3.133418367346939e-06, "loss": 0.0138, "step": 12725 }, { "epoch": 5.835240274599542, "grad_norm": 1.9765185117721558, "learning_rate": 3.1270408163265305e-06, "loss": 0.011, "step": 12750 }, { "epoch": 5.8466819221967965, "grad_norm": 4.846302509307861, "learning_rate": 3.1206632653061225e-06, "loss": 0.0107, "step": 12775 }, { "epoch": 5.85812356979405, "grad_norm": 3.6651906967163086, "learning_rate": 3.1142857142857144e-06, "loss": 0.0094, "step": 12800 }, { "epoch": 5.869565217391305, "grad_norm": 6.605918884277344, "learning_rate": 3.107908163265306e-06, "loss": 0.0158, "step": 12825 }, { "epoch": 5.881006864988558, "grad_norm": 3.4638562202453613, "learning_rate": 3.1015306122448984e-06, "loss": 0.0119, "step": 12850 }, { "epoch": 5.892448512585812, "grad_norm": 1.832309603691101, "learning_rate": 3.09515306122449e-06, "loss": 0.0164, "step": 12875 }, { "epoch": 5.9038901601830664, "grad_norm": 0.39115744829177856, "learning_rate": 3.088775510204082e-06, "loss": 0.0137, "step": 12900 }, { "epoch": 5.91533180778032, "grad_norm": 5.2722086906433105, "learning_rate": 3.0823979591836734e-06, "loss": 0.0173, "step": 12925 }, { "epoch": 5.926773455377575, "grad_norm": 0.5123651623725891, "learning_rate": 3.0760204081632654e-06, "loss": 0.0109, "step": 12950 }, { "epoch": 5.938215102974828, "grad_norm": 5.007115840911865, "learning_rate": 3.0696428571428573e-06, "loss": 0.0161, "step": 12975 }, { "epoch": 5.949656750572083, "grad_norm": 1.0899899005889893, "learning_rate": 3.0632653061224493e-06, "loss": 0.0098, "step": 13000 }, { "epoch": 5.961098398169336, "grad_norm": 6.381730079650879, "learning_rate": 3.056887755102041e-06, "loss": 0.0206, "step": 13025 }, { "epoch": 5.97254004576659, "grad_norm": 2.857933521270752, "learning_rate": 3.050510204081633e-06, "loss": 0.0124, "step": 13050 }, { "epoch": 5.983981693363845, "grad_norm": 9.316118240356445, "learning_rate": 3.0441326530612248e-06, "loss": 0.0134, "step": 13075 }, { "epoch": 5.995423340961098, "grad_norm": 0.3497377932071686, "learning_rate": 3.0377551020408163e-06, "loss": 0.0106, "step": 13100 }, { "epoch": 6.006864988558353, "grad_norm": 4.825676918029785, "learning_rate": 3.0313775510204083e-06, "loss": 0.0167, "step": 13125 }, { "epoch": 6.018306636155606, "grad_norm": 8.057607650756836, "learning_rate": 3.0250000000000003e-06, "loss": 0.0133, "step": 13150 }, { "epoch": 6.02974828375286, "grad_norm": 0.27666619420051575, "learning_rate": 3.018622448979592e-06, "loss": 0.0215, "step": 13175 }, { "epoch": 6.0411899313501145, "grad_norm": 0.5118196606636047, "learning_rate": 3.0122448979591838e-06, "loss": 0.0144, "step": 13200 }, { "epoch": 6.052631578947368, "grad_norm": 12.31119155883789, "learning_rate": 3.0058673469387757e-06, "loss": 0.0134, "step": 13225 }, { "epoch": 6.064073226544623, "grad_norm": 0.9528910517692566, "learning_rate": 2.9994897959183673e-06, "loss": 0.0096, "step": 13250 }, { "epoch": 6.075514874141876, "grad_norm": 1.3786743879318237, "learning_rate": 2.9931122448979592e-06, "loss": 0.0155, "step": 13275 }, { "epoch": 6.086956521739131, "grad_norm": 4.0814337730407715, "learning_rate": 2.9867346938775512e-06, "loss": 0.0113, "step": 13300 }, { "epoch": 6.0983981693363845, "grad_norm": 0.17644235491752625, "learning_rate": 2.980357142857143e-06, "loss": 0.0086, "step": 13325 }, { "epoch": 6.109839816933638, "grad_norm": 2.6412882804870605, "learning_rate": 2.9739795918367347e-06, "loss": 0.0087, "step": 13350 }, { "epoch": 6.121281464530893, "grad_norm": 4.250133514404297, "learning_rate": 2.9676020408163267e-06, "loss": 0.0123, "step": 13375 }, { "epoch": 6.132723112128146, "grad_norm": 1.6422185897827148, "learning_rate": 2.9612244897959182e-06, "loss": 0.0067, "step": 13400 }, { "epoch": 6.144164759725401, "grad_norm": 3.366658926010132, "learning_rate": 2.9548469387755106e-06, "loss": 0.0117, "step": 13425 }, { "epoch": 6.155606407322654, "grad_norm": 6.940051555633545, "learning_rate": 2.948469387755102e-06, "loss": 0.0072, "step": 13450 }, { "epoch": 6.167048054919908, "grad_norm": 0.4400598108768463, "learning_rate": 2.942091836734694e-06, "loss": 0.0159, "step": 13475 }, { "epoch": 6.178489702517163, "grad_norm": 3.105194091796875, "learning_rate": 2.9357142857142857e-06, "loss": 0.0091, "step": 13500 }, { "epoch": 6.189931350114416, "grad_norm": 1.1099435091018677, "learning_rate": 2.9293367346938776e-06, "loss": 0.019, "step": 13525 }, { "epoch": 6.201372997711671, "grad_norm": 1.7562593221664429, "learning_rate": 2.9229591836734696e-06, "loss": 0.0084, "step": 13550 }, { "epoch": 6.212814645308924, "grad_norm": 2.2326951026916504, "learning_rate": 2.916581632653061e-06, "loss": 0.0148, "step": 13575 }, { "epoch": 6.224256292906179, "grad_norm": 0.38852858543395996, "learning_rate": 2.910204081632653e-06, "loss": 0.0068, "step": 13600 }, { "epoch": 6.2356979405034325, "grad_norm": 3.4514925479888916, "learning_rate": 2.903826530612245e-06, "loss": 0.0111, "step": 13625 }, { "epoch": 6.247139588100686, "grad_norm": 2.002838134765625, "learning_rate": 2.897448979591837e-06, "loss": 0.0082, "step": 13650 }, { "epoch": 6.258581235697941, "grad_norm": 0.08688002824783325, "learning_rate": 2.8910714285714286e-06, "loss": 0.0267, "step": 13675 }, { "epoch": 6.270022883295194, "grad_norm": 2.080207347869873, "learning_rate": 2.8846938775510206e-06, "loss": 0.0096, "step": 13700 }, { "epoch": 6.281464530892449, "grad_norm": 0.9220284819602966, "learning_rate": 2.878316326530612e-06, "loss": 0.0086, "step": 13725 }, { "epoch": 6.2929061784897025, "grad_norm": 0.35067400336265564, "learning_rate": 2.8719387755102045e-06, "loss": 0.0052, "step": 13750 }, { "epoch": 6.304347826086957, "grad_norm": 3.5281693935394287, "learning_rate": 2.865561224489796e-06, "loss": 0.0191, "step": 13775 }, { "epoch": 6.315789473684211, "grad_norm": 3.336193323135376, "learning_rate": 2.859183673469388e-06, "loss": 0.0063, "step": 13800 }, { "epoch": 6.327231121281464, "grad_norm": 2.0798017978668213, "learning_rate": 2.8528061224489796e-06, "loss": 0.0132, "step": 13825 }, { "epoch": 6.338672768878719, "grad_norm": 0.20700007677078247, "learning_rate": 2.8464285714285715e-06, "loss": 0.0061, "step": 13850 }, { "epoch": 6.350114416475972, "grad_norm": 0.1666710078716278, "learning_rate": 2.8400510204081635e-06, "loss": 0.0073, "step": 13875 }, { "epoch": 6.361556064073227, "grad_norm": 4.459512233734131, "learning_rate": 2.8336734693877555e-06, "loss": 0.0107, "step": 13900 }, { "epoch": 6.372997711670481, "grad_norm": 2.873020887374878, "learning_rate": 2.827295918367347e-06, "loss": 0.0124, "step": 13925 }, { "epoch": 6.384439359267734, "grad_norm": 0.23313076794147491, "learning_rate": 2.820918367346939e-06, "loss": 0.012, "step": 13950 }, { "epoch": 6.395881006864989, "grad_norm": 0.10002599656581879, "learning_rate": 2.814540816326531e-06, "loss": 0.01, "step": 13975 }, { "epoch": 6.407322654462242, "grad_norm": 0.2222910374403, "learning_rate": 2.8081632653061225e-06, "loss": 0.0061, "step": 14000 }, { "epoch": 6.418764302059497, "grad_norm": 1.97636079788208, "learning_rate": 2.8017857142857144e-06, "loss": 0.0125, "step": 14025 }, { "epoch": 6.4302059496567505, "grad_norm": 0.23159927129745483, "learning_rate": 2.795408163265306e-06, "loss": 0.0121, "step": 14050 }, { "epoch": 6.441647597254004, "grad_norm": 2.222355365753174, "learning_rate": 2.789030612244898e-06, "loss": 0.0082, "step": 14075 }, { "epoch": 6.453089244851259, "grad_norm": 1.612273931503296, "learning_rate": 2.78265306122449e-06, "loss": 0.0129, "step": 14100 }, { "epoch": 6.464530892448512, "grad_norm": 3.7675607204437256, "learning_rate": 2.776275510204082e-06, "loss": 0.0126, "step": 14125 }, { "epoch": 6.475972540045767, "grad_norm": 2.6924796104431152, "learning_rate": 2.7698979591836734e-06, "loss": 0.0123, "step": 14150 }, { "epoch": 6.4874141876430205, "grad_norm": 0.5433311462402344, "learning_rate": 2.7635204081632654e-06, "loss": 0.0083, "step": 14175 }, { "epoch": 6.498855835240275, "grad_norm": 1.2560780048370361, "learning_rate": 2.7571428571428574e-06, "loss": 0.0119, "step": 14200 }, { "epoch": 6.510297482837529, "grad_norm": 0.1697181910276413, "learning_rate": 2.7507653061224493e-06, "loss": 0.0142, "step": 14225 }, { "epoch": 6.521739130434782, "grad_norm": 2.489327907562256, "learning_rate": 2.744387755102041e-06, "loss": 0.0098, "step": 14250 }, { "epoch": 6.533180778032037, "grad_norm": 0.10007134824991226, "learning_rate": 2.738010204081633e-06, "loss": 0.0118, "step": 14275 }, { "epoch": 6.54462242562929, "grad_norm": 1.808995008468628, "learning_rate": 2.7316326530612244e-06, "loss": 0.0091, "step": 14300 }, { "epoch": 6.556064073226545, "grad_norm": 0.3042183816432953, "learning_rate": 2.7252551020408168e-06, "loss": 0.0117, "step": 14325 }, { "epoch": 6.567505720823799, "grad_norm": 0.8734502196311951, "learning_rate": 2.7188775510204083e-06, "loss": 0.0099, "step": 14350 }, { "epoch": 6.578947368421053, "grad_norm": 0.13821353018283844, "learning_rate": 2.7125000000000003e-06, "loss": 0.0115, "step": 14375 }, { "epoch": 6.590389016018307, "grad_norm": 0.3783830404281616, "learning_rate": 2.706122448979592e-06, "loss": 0.0068, "step": 14400 }, { "epoch": 6.60183066361556, "grad_norm": 0.3584430515766144, "learning_rate": 2.699744897959184e-06, "loss": 0.0101, "step": 14425 }, { "epoch": 6.613272311212815, "grad_norm": 3.8657848834991455, "learning_rate": 2.6933673469387758e-06, "loss": 0.0098, "step": 14450 }, { "epoch": 6.6247139588100685, "grad_norm": 0.2408173382282257, "learning_rate": 2.6869897959183673e-06, "loss": 0.0112, "step": 14475 }, { "epoch": 6.636155606407323, "grad_norm": 0.08185845613479614, "learning_rate": 2.6806122448979593e-06, "loss": 0.0059, "step": 14500 }, { "epoch": 6.647597254004577, "grad_norm": 0.0738307312130928, "learning_rate": 2.674234693877551e-06, "loss": 0.0132, "step": 14525 }, { "epoch": 6.65903890160183, "grad_norm": 3.3000831604003906, "learning_rate": 2.667857142857143e-06, "loss": 0.01, "step": 14550 }, { "epoch": 6.670480549199085, "grad_norm": 1.2340450286865234, "learning_rate": 2.6614795918367348e-06, "loss": 0.0141, "step": 14575 }, { "epoch": 6.6819221967963385, "grad_norm": 2.966970682144165, "learning_rate": 2.6551020408163267e-06, "loss": 0.0052, "step": 14600 }, { "epoch": 6.693363844393593, "grad_norm": 1.181470274925232, "learning_rate": 2.6487244897959183e-06, "loss": 0.015, "step": 14625 }, { "epoch": 6.704805491990847, "grad_norm": 0.6192874908447266, "learning_rate": 2.6423469387755102e-06, "loss": 0.0063, "step": 14650 }, { "epoch": 6.7162471395881, "grad_norm": 2.0739264488220215, "learning_rate": 2.635969387755102e-06, "loss": 0.0145, "step": 14675 }, { "epoch": 6.727688787185355, "grad_norm": 1.339400291442871, "learning_rate": 2.629591836734694e-06, "loss": 0.0076, "step": 14700 }, { "epoch": 6.739130434782608, "grad_norm": 3.128563404083252, "learning_rate": 2.6232142857142857e-06, "loss": 0.0192, "step": 14725 }, { "epoch": 6.750572082379863, "grad_norm": 3.493795394897461, "learning_rate": 2.6168367346938777e-06, "loss": 0.005, "step": 14750 }, { "epoch": 6.762013729977117, "grad_norm": 1.3682916164398193, "learning_rate": 2.6104591836734696e-06, "loss": 0.0134, "step": 14775 }, { "epoch": 6.77345537757437, "grad_norm": 1.8985130786895752, "learning_rate": 2.6040816326530616e-06, "loss": 0.0082, "step": 14800 }, { "epoch": 6.784897025171625, "grad_norm": 0.5169524550437927, "learning_rate": 2.597704081632653e-06, "loss": 0.0202, "step": 14825 }, { "epoch": 6.796338672768878, "grad_norm": 0.1607659012079239, "learning_rate": 2.591326530612245e-06, "loss": 0.0084, "step": 14850 }, { "epoch": 6.807780320366133, "grad_norm": 0.6245440244674683, "learning_rate": 2.5849489795918367e-06, "loss": 0.0152, "step": 14875 }, { "epoch": 6.8192219679633865, "grad_norm": 0.2351321130990982, "learning_rate": 2.5785714285714286e-06, "loss": 0.0071, "step": 14900 }, { "epoch": 6.830663615560641, "grad_norm": 5.026946067810059, "learning_rate": 2.5721938775510206e-06, "loss": 0.0168, "step": 14925 }, { "epoch": 6.842105263157895, "grad_norm": 1.034845232963562, "learning_rate": 2.565816326530612e-06, "loss": 0.0084, "step": 14950 }, { "epoch": 6.853546910755149, "grad_norm": 0.16656899452209473, "learning_rate": 2.559438775510204e-06, "loss": 0.0148, "step": 14975 }, { "epoch": 6.864988558352403, "grad_norm": 7.876341819763184, "learning_rate": 2.553061224489796e-06, "loss": 0.0139, "step": 15000 }, { "epoch": 6.864988558352403, "eval_loss": 0.20922012627124786, "eval_runtime": 5296.0961, "eval_samples_per_second": 1.798, "eval_steps_per_second": 0.225, "eval_wer": 0.10019517501759775, "step": 15000 }, { "epoch": 6.8764302059496565, "grad_norm": 1.0131422281265259, "learning_rate": 2.546683673469388e-06, "loss": 0.0175, "step": 15025 }, { "epoch": 6.887871853546911, "grad_norm": 0.43789076805114746, "learning_rate": 2.5403061224489796e-06, "loss": 0.0071, "step": 15050 }, { "epoch": 6.899313501144165, "grad_norm": 0.10620521008968353, "learning_rate": 2.5339285714285716e-06, "loss": 0.0118, "step": 15075 }, { "epoch": 6.910755148741419, "grad_norm": 0.22217608988285065, "learning_rate": 2.527551020408163e-06, "loss": 0.0103, "step": 15100 }, { "epoch": 6.922196796338673, "grad_norm": 1.88958740234375, "learning_rate": 2.5211734693877555e-06, "loss": 0.0146, "step": 15125 }, { "epoch": 6.933638443935926, "grad_norm": 0.2760096788406372, "learning_rate": 2.514795918367347e-06, "loss": 0.0051, "step": 15150 }, { "epoch": 6.945080091533181, "grad_norm": 0.23248428106307983, "learning_rate": 2.508418367346939e-06, "loss": 0.0133, "step": 15175 }, { "epoch": 6.956521739130435, "grad_norm": 1.852455496788025, "learning_rate": 2.5020408163265305e-06, "loss": 0.0072, "step": 15200 }, { "epoch": 6.967963386727689, "grad_norm": 0.9175077676773071, "learning_rate": 2.495663265306123e-06, "loss": 0.0118, "step": 15225 }, { "epoch": 6.979405034324943, "grad_norm": 4.601647853851318, "learning_rate": 2.4892857142857145e-06, "loss": 0.0081, "step": 15250 }, { "epoch": 6.990846681922196, "grad_norm": 0.6206708550453186, "learning_rate": 2.4829081632653064e-06, "loss": 0.0128, "step": 15275 }, { "epoch": 7.002288329519451, "grad_norm": 1.0712556838989258, "learning_rate": 2.476530612244898e-06, "loss": 0.0097, "step": 15300 }, { "epoch": 7.0137299771167045, "grad_norm": 0.7450961470603943, "learning_rate": 2.47015306122449e-06, "loss": 0.0058, "step": 15325 }, { "epoch": 7.025171624713959, "grad_norm": 1.869686245918274, "learning_rate": 2.463775510204082e-06, "loss": 0.0188, "step": 15350 }, { "epoch": 7.036613272311213, "grad_norm": 0.30196335911750793, "learning_rate": 2.4573979591836735e-06, "loss": 0.0056, "step": 15375 }, { "epoch": 7.048054919908467, "grad_norm": 1.2585679292678833, "learning_rate": 2.4510204081632654e-06, "loss": 0.0092, "step": 15400 }, { "epoch": 7.059496567505721, "grad_norm": 0.07888475805521011, "learning_rate": 2.444642857142857e-06, "loss": 0.0066, "step": 15425 }, { "epoch": 7.0709382151029745, "grad_norm": 1.7455697059631348, "learning_rate": 2.4382653061224494e-06, "loss": 0.0107, "step": 15450 }, { "epoch": 7.082379862700229, "grad_norm": 0.1373489499092102, "learning_rate": 2.431887755102041e-06, "loss": 0.0055, "step": 15475 }, { "epoch": 7.093821510297483, "grad_norm": 2.089559555053711, "learning_rate": 2.425510204081633e-06, "loss": 0.0107, "step": 15500 }, { "epoch": 7.105263157894737, "grad_norm": 5.042351245880127, "learning_rate": 2.4191326530612244e-06, "loss": 0.0052, "step": 15525 }, { "epoch": 7.116704805491991, "grad_norm": 0.10954425483942032, "learning_rate": 2.4127551020408164e-06, "loss": 0.0091, "step": 15550 }, { "epoch": 7.128146453089244, "grad_norm": 0.06460034102201462, "learning_rate": 2.4063775510204083e-06, "loss": 0.0047, "step": 15575 }, { "epoch": 7.139588100686499, "grad_norm": 2.1633071899414062, "learning_rate": 2.4000000000000003e-06, "loss": 0.0126, "step": 15600 }, { "epoch": 7.151029748283753, "grad_norm": 0.39932990074157715, "learning_rate": 2.393622448979592e-06, "loss": 0.0068, "step": 15625 }, { "epoch": 7.162471395881007, "grad_norm": 0.31883901357650757, "learning_rate": 2.387244897959184e-06, "loss": 0.0121, "step": 15650 }, { "epoch": 7.173913043478261, "grad_norm": 0.2664627134799957, "learning_rate": 2.380867346938776e-06, "loss": 0.0085, "step": 15675 }, { "epoch": 7.185354691075515, "grad_norm": 2.2720088958740234, "learning_rate": 2.3744897959183678e-06, "loss": 0.0195, "step": 15700 }, { "epoch": 7.196796338672769, "grad_norm": 0.13204744458198547, "learning_rate": 2.3681122448979593e-06, "loss": 0.0072, "step": 15725 }, { "epoch": 7.2082379862700225, "grad_norm": 1.5910881757736206, "learning_rate": 2.3617346938775513e-06, "loss": 0.0127, "step": 15750 }, { "epoch": 7.219679633867277, "grad_norm": 2.159229040145874, "learning_rate": 2.355357142857143e-06, "loss": 0.008, "step": 15775 }, { "epoch": 7.231121281464531, "grad_norm": 1.8656185865402222, "learning_rate": 2.3489795918367348e-06, "loss": 0.0058, "step": 15800 }, { "epoch": 7.242562929061785, "grad_norm": 2.223982334136963, "learning_rate": 2.3426020408163267e-06, "loss": 0.0088, "step": 15825 }, { "epoch": 7.254004576659039, "grad_norm": 0.8876869082450867, "learning_rate": 2.3362244897959183e-06, "loss": 0.0173, "step": 15850 }, { "epoch": 7.2654462242562925, "grad_norm": 0.246218740940094, "learning_rate": 2.3298469387755103e-06, "loss": 0.0043, "step": 15875 }, { "epoch": 7.276887871853547, "grad_norm": 1.6510305404663086, "learning_rate": 2.3234693877551022e-06, "loss": 0.0149, "step": 15900 }, { "epoch": 7.288329519450801, "grad_norm": 1.7581284046173096, "learning_rate": 2.317091836734694e-06, "loss": 0.0055, "step": 15925 }, { "epoch": 7.299771167048055, "grad_norm": 1.8961583375930786, "learning_rate": 2.3107142857142857e-06, "loss": 0.0087, "step": 15950 }, { "epoch": 7.311212814645309, "grad_norm": 0.2390100657939911, "learning_rate": 2.3043367346938777e-06, "loss": 0.0104, "step": 15975 }, { "epoch": 7.322654462242563, "grad_norm": 1.6409028768539429, "learning_rate": 2.2979591836734692e-06, "loss": 0.0062, "step": 16000 }, { "epoch": 7.334096109839817, "grad_norm": 0.10882013291120529, "learning_rate": 2.2915816326530616e-06, "loss": 0.0048, "step": 16025 }, { "epoch": 7.345537757437071, "grad_norm": 1.4074586629867554, "learning_rate": 2.285204081632653e-06, "loss": 0.0178, "step": 16050 }, { "epoch": 7.356979405034325, "grad_norm": 0.09742479026317596, "learning_rate": 2.278826530612245e-06, "loss": 0.0067, "step": 16075 }, { "epoch": 7.368421052631579, "grad_norm": 2.2490932941436768, "learning_rate": 2.2724489795918367e-06, "loss": 0.0142, "step": 16100 }, { "epoch": 7.379862700228833, "grad_norm": 0.0923023372888565, "learning_rate": 2.2660714285714287e-06, "loss": 0.0101, "step": 16125 }, { "epoch": 7.391304347826087, "grad_norm": 1.3611345291137695, "learning_rate": 2.2596938775510206e-06, "loss": 0.0118, "step": 16150 }, { "epoch": 7.4027459954233406, "grad_norm": 2.053800106048584, "learning_rate": 2.2533163265306126e-06, "loss": 0.0041, "step": 16175 }, { "epoch": 7.414187643020595, "grad_norm": 2.127472162246704, "learning_rate": 2.246938775510204e-06, "loss": 0.0177, "step": 16200 }, { "epoch": 7.425629290617849, "grad_norm": 4.369998931884766, "learning_rate": 2.240561224489796e-06, "loss": 0.0065, "step": 16225 }, { "epoch": 7.437070938215103, "grad_norm": 2.2975997924804688, "learning_rate": 2.234183673469388e-06, "loss": 0.0076, "step": 16250 }, { "epoch": 7.448512585812357, "grad_norm": 2.5363821983337402, "learning_rate": 2.2278061224489796e-06, "loss": 0.0111, "step": 16275 }, { "epoch": 7.459954233409611, "grad_norm": 0.09316561371088028, "learning_rate": 2.2214285714285716e-06, "loss": 0.0095, "step": 16300 }, { "epoch": 7.471395881006865, "grad_norm": 0.7145214676856995, "learning_rate": 2.215051020408163e-06, "loss": 0.0055, "step": 16325 }, { "epoch": 7.482837528604119, "grad_norm": 2.495028018951416, "learning_rate": 2.208673469387755e-06, "loss": 0.0112, "step": 16350 }, { "epoch": 7.494279176201373, "grad_norm": 0.3236389458179474, "learning_rate": 2.202295918367347e-06, "loss": 0.0044, "step": 16375 }, { "epoch": 7.505720823798627, "grad_norm": 1.0565695762634277, "learning_rate": 2.195918367346939e-06, "loss": 0.0129, "step": 16400 }, { "epoch": 7.517162471395881, "grad_norm": 1.8314106464385986, "learning_rate": 2.1895408163265306e-06, "loss": 0.0052, "step": 16425 }, { "epoch": 7.528604118993135, "grad_norm": 2.1195669174194336, "learning_rate": 2.1831632653061225e-06, "loss": 0.0212, "step": 16450 }, { "epoch": 7.540045766590389, "grad_norm": 1.8161754608154297, "learning_rate": 2.1767857142857145e-06, "loss": 0.0066, "step": 16475 }, { "epoch": 7.551487414187643, "grad_norm": 0.14074856042861938, "learning_rate": 2.1704081632653065e-06, "loss": 0.0068, "step": 16500 }, { "epoch": 7.562929061784897, "grad_norm": 0.07693963497877121, "learning_rate": 2.164030612244898e-06, "loss": 0.005, "step": 16525 }, { "epoch": 7.574370709382151, "grad_norm": 1.0592836141586304, "learning_rate": 2.15765306122449e-06, "loss": 0.0124, "step": 16550 }, { "epoch": 7.585812356979405, "grad_norm": 3.276205539703369, "learning_rate": 2.1512755102040815e-06, "loss": 0.0077, "step": 16575 }, { "epoch": 7.597254004576659, "grad_norm": 0.2712470293045044, "learning_rate": 2.1448979591836735e-06, "loss": 0.0137, "step": 16600 }, { "epoch": 7.608695652173913, "grad_norm": 2.913771629333496, "learning_rate": 2.1385204081632655e-06, "loss": 0.0084, "step": 16625 }, { "epoch": 7.620137299771167, "grad_norm": 0.8734631538391113, "learning_rate": 2.132142857142857e-06, "loss": 0.0162, "step": 16650 }, { "epoch": 7.631578947368421, "grad_norm": 2.229383707046509, "learning_rate": 2.125765306122449e-06, "loss": 0.0089, "step": 16675 }, { "epoch": 7.643020594965675, "grad_norm": 1.0993914604187012, "learning_rate": 2.119387755102041e-06, "loss": 0.0105, "step": 16700 }, { "epoch": 7.654462242562929, "grad_norm": 0.42427483201026917, "learning_rate": 2.113010204081633e-06, "loss": 0.0063, "step": 16725 }, { "epoch": 7.665903890160183, "grad_norm": 1.0200995206832886, "learning_rate": 2.1066326530612244e-06, "loss": 0.012, "step": 16750 }, { "epoch": 7.6773455377574376, "grad_norm": 0.11753875017166138, "learning_rate": 2.1002551020408164e-06, "loss": 0.0069, "step": 16775 }, { "epoch": 7.688787185354691, "grad_norm": 1.3853702545166016, "learning_rate": 2.0941326530612246e-06, "loss": 0.015, "step": 16800 }, { "epoch": 7.700228832951945, "grad_norm": 0.7108054757118225, "learning_rate": 2.0877551020408165e-06, "loss": 0.0094, "step": 16825 }, { "epoch": 7.711670480549199, "grad_norm": 0.9410815834999084, "learning_rate": 2.081377551020408e-06, "loss": 0.0091, "step": 16850 }, { "epoch": 7.723112128146453, "grad_norm": 1.315185308456421, "learning_rate": 2.075e-06, "loss": 0.0066, "step": 16875 }, { "epoch": 7.7345537757437075, "grad_norm": 2.3978397846221924, "learning_rate": 2.068622448979592e-06, "loss": 0.0082, "step": 16900 }, { "epoch": 7.745995423340961, "grad_norm": 0.3152960538864136, "learning_rate": 2.0622448979591835e-06, "loss": 0.0045, "step": 16925 }, { "epoch": 7.757437070938215, "grad_norm": 3.8207294940948486, "learning_rate": 2.0558673469387755e-06, "loss": 0.0139, "step": 16950 }, { "epoch": 7.768878718535469, "grad_norm": 0.7664352059364319, "learning_rate": 2.0494897959183675e-06, "loss": 0.0082, "step": 16975 }, { "epoch": 7.780320366132723, "grad_norm": 2.621455192565918, "learning_rate": 2.0431122448979594e-06, "loss": 0.0138, "step": 17000 }, { "epoch": 7.7917620137299775, "grad_norm": 2.7599635124206543, "learning_rate": 2.036734693877551e-06, "loss": 0.006, "step": 17025 }, { "epoch": 7.803203661327231, "grad_norm": 2.1494710445404053, "learning_rate": 2.030357142857143e-06, "loss": 0.0133, "step": 17050 }, { "epoch": 7.814645308924485, "grad_norm": 2.6911447048187256, "learning_rate": 2.0239795918367345e-06, "loss": 0.0079, "step": 17075 }, { "epoch": 7.826086956521739, "grad_norm": 1.8577873706817627, "learning_rate": 2.017602040816327e-06, "loss": 0.0098, "step": 17100 }, { "epoch": 7.837528604118993, "grad_norm": 0.08260419219732285, "learning_rate": 2.0112244897959184e-06, "loss": 0.0073, "step": 17125 }, { "epoch": 7.848970251716247, "grad_norm": 0.34523025155067444, "learning_rate": 2.0048469387755104e-06, "loss": 0.0097, "step": 17150 }, { "epoch": 7.860411899313501, "grad_norm": 2.8545644283294678, "learning_rate": 1.998469387755102e-06, "loss": 0.0093, "step": 17175 }, { "epoch": 7.871853546910755, "grad_norm": 0.9209850430488586, "learning_rate": 1.9920918367346943e-06, "loss": 0.0193, "step": 17200 }, { "epoch": 7.883295194508009, "grad_norm": 0.15244260430335999, "learning_rate": 1.985714285714286e-06, "loss": 0.0074, "step": 17225 }, { "epoch": 7.894736842105263, "grad_norm": 0.9773492813110352, "learning_rate": 1.979336734693878e-06, "loss": 0.0102, "step": 17250 }, { "epoch": 7.906178489702517, "grad_norm": 3.8897087574005127, "learning_rate": 1.9729591836734694e-06, "loss": 0.0092, "step": 17275 }, { "epoch": 7.917620137299771, "grad_norm": 1.736915946006775, "learning_rate": 1.9665816326530614e-06, "loss": 0.0087, "step": 17300 }, { "epoch": 7.9290617848970255, "grad_norm": 0.2637374699115753, "learning_rate": 1.9602040816326533e-06, "loss": 0.0056, "step": 17325 }, { "epoch": 7.940503432494279, "grad_norm": 0.23918192088603973, "learning_rate": 1.953826530612245e-06, "loss": 0.0101, "step": 17350 }, { "epoch": 7.951945080091534, "grad_norm": 2.3854148387908936, "learning_rate": 1.947448979591837e-06, "loss": 0.0082, "step": 17375 }, { "epoch": 7.963386727688787, "grad_norm": 1.505005955696106, "learning_rate": 1.9410714285714284e-06, "loss": 0.0092, "step": 17400 }, { "epoch": 7.974828375286041, "grad_norm": 5.391610145568848, "learning_rate": 1.9346938775510208e-06, "loss": 0.0088, "step": 17425 }, { "epoch": 7.9862700228832955, "grad_norm": 0.1221591904759407, "learning_rate": 1.9283163265306123e-06, "loss": 0.0139, "step": 17450 }, { "epoch": 7.997711670480549, "grad_norm": 3.126054286956787, "learning_rate": 1.9219387755102043e-06, "loss": 0.006, "step": 17475 }, { "epoch": 8.009153318077804, "grad_norm": 0.13957707583904266, "learning_rate": 1.915561224489796e-06, "loss": 0.0126, "step": 17500 }, { "epoch": 8.020594965675057, "grad_norm": 0.3743535876274109, "learning_rate": 1.909183673469388e-06, "loss": 0.0066, "step": 17525 }, { "epoch": 8.03203661327231, "grad_norm": 2.4367594718933105, "learning_rate": 1.9028061224489798e-06, "loss": 0.0095, "step": 17550 }, { "epoch": 8.043478260869565, "grad_norm": 0.27416449785232544, "learning_rate": 1.8964285714285715e-06, "loss": 0.0061, "step": 17575 }, { "epoch": 8.05491990846682, "grad_norm": 2.7768595218658447, "learning_rate": 1.8900510204081633e-06, "loss": 0.0079, "step": 17600 }, { "epoch": 8.066361556064074, "grad_norm": 1.4709582328796387, "learning_rate": 1.883673469387755e-06, "loss": 0.0077, "step": 17625 }, { "epoch": 8.077803203661327, "grad_norm": 2.9024922847747803, "learning_rate": 1.8772959183673472e-06, "loss": 0.0044, "step": 17650 }, { "epoch": 8.08924485125858, "grad_norm": 1.4688998460769653, "learning_rate": 1.870918367346939e-06, "loss": 0.0051, "step": 17675 }, { "epoch": 8.100686498855834, "grad_norm": 0.9765781760215759, "learning_rate": 1.8645408163265307e-06, "loss": 0.0117, "step": 17700 }, { "epoch": 8.11212814645309, "grad_norm": 0.23804131150245667, "learning_rate": 1.8581632653061225e-06, "loss": 0.0079, "step": 17725 }, { "epoch": 8.123569794050344, "grad_norm": 0.10259401798248291, "learning_rate": 1.8517857142857142e-06, "loss": 0.0063, "step": 17750 }, { "epoch": 8.135011441647597, "grad_norm": 3.3533835411071777, "learning_rate": 1.8454081632653064e-06, "loss": 0.0072, "step": 17775 }, { "epoch": 8.14645308924485, "grad_norm": 0.26105380058288574, "learning_rate": 1.8390306122448982e-06, "loss": 0.0123, "step": 17800 }, { "epoch": 8.157894736842104, "grad_norm": 3.717785358428955, "learning_rate": 1.83265306122449e-06, "loss": 0.0057, "step": 17825 }, { "epoch": 8.16933638443936, "grad_norm": 0.14977721869945526, "learning_rate": 1.8262755102040817e-06, "loss": 0.0099, "step": 17850 }, { "epoch": 8.180778032036613, "grad_norm": 0.706764817237854, "learning_rate": 1.8198979591836736e-06, "loss": 0.0088, "step": 17875 }, { "epoch": 8.192219679633867, "grad_norm": 0.2173665463924408, "learning_rate": 1.8135204081632654e-06, "loss": 0.0047, "step": 17900 }, { "epoch": 8.20366132723112, "grad_norm": 1.6431961059570312, "learning_rate": 1.8071428571428571e-06, "loss": 0.0079, "step": 17925 }, { "epoch": 8.215102974828376, "grad_norm": 1.231646180152893, "learning_rate": 1.8007653061224491e-06, "loss": 0.0086, "step": 17950 }, { "epoch": 8.22654462242563, "grad_norm": 0.3211919069290161, "learning_rate": 1.7943877551020409e-06, "loss": 0.0093, "step": 17975 }, { "epoch": 8.237986270022883, "grad_norm": 0.0584256611764431, "learning_rate": 1.7880102040816328e-06, "loss": 0.0072, "step": 18000 }, { "epoch": 8.249427917620137, "grad_norm": 1.3097387552261353, "learning_rate": 1.7816326530612246e-06, "loss": 0.0064, "step": 18025 }, { "epoch": 8.26086956521739, "grad_norm": 2.2777936458587646, "learning_rate": 1.7752551020408163e-06, "loss": 0.0062, "step": 18050 }, { "epoch": 8.272311212814646, "grad_norm": 2.830906867980957, "learning_rate": 1.768877551020408e-06, "loss": 0.0071, "step": 18075 }, { "epoch": 8.2837528604119, "grad_norm": 1.2469624280929565, "learning_rate": 1.7624999999999999e-06, "loss": 0.0086, "step": 18100 }, { "epoch": 8.295194508009153, "grad_norm": 1.5804486274719238, "learning_rate": 1.756122448979592e-06, "loss": 0.0052, "step": 18125 }, { "epoch": 8.306636155606407, "grad_norm": 2.0198347568511963, "learning_rate": 1.7497448979591838e-06, "loss": 0.009, "step": 18150 }, { "epoch": 8.31807780320366, "grad_norm": 1.086702585220337, "learning_rate": 1.7433673469387755e-06, "loss": 0.0034, "step": 18175 }, { "epoch": 8.329519450800916, "grad_norm": 0.40313854813575745, "learning_rate": 1.7369897959183673e-06, "loss": 0.011, "step": 18200 }, { "epoch": 8.34096109839817, "grad_norm": 1.1296424865722656, "learning_rate": 1.7306122448979595e-06, "loss": 0.0051, "step": 18225 }, { "epoch": 8.352402745995423, "grad_norm": 0.12974560260772705, "learning_rate": 1.7242346938775512e-06, "loss": 0.0103, "step": 18250 }, { "epoch": 8.363844393592677, "grad_norm": 6.7116265296936035, "learning_rate": 1.717857142857143e-06, "loss": 0.0076, "step": 18275 }, { "epoch": 8.37528604118993, "grad_norm": 3.5247104167938232, "learning_rate": 1.7114795918367347e-06, "loss": 0.0096, "step": 18300 }, { "epoch": 8.386727688787186, "grad_norm": 1.0144484043121338, "learning_rate": 1.7051020408163265e-06, "loss": 0.0073, "step": 18325 }, { "epoch": 8.39816933638444, "grad_norm": 0.3811889886856079, "learning_rate": 1.6987244897959185e-06, "loss": 0.0056, "step": 18350 }, { "epoch": 8.409610983981693, "grad_norm": 0.449440598487854, "learning_rate": 1.6923469387755102e-06, "loss": 0.0056, "step": 18375 }, { "epoch": 8.421052631578947, "grad_norm": 1.2429654598236084, "learning_rate": 1.685969387755102e-06, "loss": 0.0089, "step": 18400 }, { "epoch": 8.4324942791762, "grad_norm": 0.530453085899353, "learning_rate": 1.6795918367346937e-06, "loss": 0.0054, "step": 18425 }, { "epoch": 8.443935926773456, "grad_norm": 0.31273970007896423, "learning_rate": 1.673214285714286e-06, "loss": 0.0072, "step": 18450 }, { "epoch": 8.45537757437071, "grad_norm": 5.236429214477539, "learning_rate": 1.6668367346938777e-06, "loss": 0.005, "step": 18475 }, { "epoch": 8.466819221967963, "grad_norm": 0.06535414606332779, "learning_rate": 1.6604591836734694e-06, "loss": 0.0065, "step": 18500 }, { "epoch": 8.478260869565217, "grad_norm": 1.5802868604660034, "learning_rate": 1.6540816326530612e-06, "loss": 0.0063, "step": 18525 }, { "epoch": 8.48970251716247, "grad_norm": 1.6625992059707642, "learning_rate": 1.647704081632653e-06, "loss": 0.0106, "step": 18550 }, { "epoch": 8.501144164759726, "grad_norm": 0.21594946086406708, "learning_rate": 1.6413265306122451e-06, "loss": 0.0036, "step": 18575 }, { "epoch": 8.51258581235698, "grad_norm": 0.22096717357635498, "learning_rate": 1.6349489795918369e-06, "loss": 0.009, "step": 18600 }, { "epoch": 8.524027459954233, "grad_norm": 0.08814064413309097, "learning_rate": 1.6285714285714286e-06, "loss": 0.0021, "step": 18625 }, { "epoch": 8.535469107551487, "grad_norm": 0.07081279903650284, "learning_rate": 1.6221938775510204e-06, "loss": 0.0077, "step": 18650 }, { "epoch": 8.546910755148742, "grad_norm": 0.4350265562534332, "learning_rate": 1.6158163265306126e-06, "loss": 0.0069, "step": 18675 }, { "epoch": 8.558352402745996, "grad_norm": 1.913025975227356, "learning_rate": 1.6094387755102043e-06, "loss": 0.0094, "step": 18700 }, { "epoch": 8.56979405034325, "grad_norm": 0.0708305835723877, "learning_rate": 1.603061224489796e-06, "loss": 0.0066, "step": 18725 }, { "epoch": 8.581235697940503, "grad_norm": 0.2957993149757385, "learning_rate": 1.5966836734693878e-06, "loss": 0.0103, "step": 18750 }, { "epoch": 8.592677345537757, "grad_norm": 0.3358778953552246, "learning_rate": 1.5903061224489796e-06, "loss": 0.0064, "step": 18775 }, { "epoch": 8.604118993135012, "grad_norm": 6.7304887771606445, "learning_rate": 1.5839285714285715e-06, "loss": 0.0093, "step": 18800 }, { "epoch": 8.615560640732266, "grad_norm": 3.724275588989258, "learning_rate": 1.5775510204081633e-06, "loss": 0.0084, "step": 18825 }, { "epoch": 8.62700228832952, "grad_norm": 0.04007994756102562, "learning_rate": 1.571173469387755e-06, "loss": 0.0136, "step": 18850 }, { "epoch": 8.638443935926773, "grad_norm": 0.05183762311935425, "learning_rate": 1.5647959183673468e-06, "loss": 0.0061, "step": 18875 }, { "epoch": 8.649885583524027, "grad_norm": 3.223602771759033, "learning_rate": 1.5584183673469388e-06, "loss": 0.0098, "step": 18900 }, { "epoch": 8.661327231121282, "grad_norm": 3.744605541229248, "learning_rate": 1.5520408163265307e-06, "loss": 0.0106, "step": 18925 }, { "epoch": 8.672768878718536, "grad_norm": 0.07725165784358978, "learning_rate": 1.5456632653061225e-06, "loss": 0.0076, "step": 18950 }, { "epoch": 8.68421052631579, "grad_norm": 14.24994945526123, "learning_rate": 1.5392857142857143e-06, "loss": 0.0086, "step": 18975 }, { "epoch": 8.695652173913043, "grad_norm": 0.2724073529243469, "learning_rate": 1.5329081632653062e-06, "loss": 0.0141, "step": 19000 }, { "epoch": 8.707093821510298, "grad_norm": 0.03878064826130867, "learning_rate": 1.526530612244898e-06, "loss": 0.0071, "step": 19025 }, { "epoch": 8.718535469107552, "grad_norm": 0.631288468837738, "learning_rate": 1.5204081632653061e-06, "loss": 0.011, "step": 19050 }, { "epoch": 8.729977116704806, "grad_norm": 9.200640678405762, "learning_rate": 1.5140306122448979e-06, "loss": 0.009, "step": 19075 }, { "epoch": 8.74141876430206, "grad_norm": 0.18505507707595825, "learning_rate": 1.5076530612244898e-06, "loss": 0.0084, "step": 19100 }, { "epoch": 8.752860411899313, "grad_norm": 5.33227014541626, "learning_rate": 1.5012755102040816e-06, "loss": 0.0057, "step": 19125 }, { "epoch": 8.764302059496568, "grad_norm": 0.31844860315322876, "learning_rate": 1.4948979591836736e-06, "loss": 0.0081, "step": 19150 }, { "epoch": 8.775743707093822, "grad_norm": 0.5912889242172241, "learning_rate": 1.4885204081632653e-06, "loss": 0.0083, "step": 19175 }, { "epoch": 8.787185354691076, "grad_norm": 0.05793588235974312, "learning_rate": 1.4821428571428573e-06, "loss": 0.0061, "step": 19200 }, { "epoch": 8.79862700228833, "grad_norm": 3.5258617401123047, "learning_rate": 1.475765306122449e-06, "loss": 0.0091, "step": 19225 }, { "epoch": 8.810068649885583, "grad_norm": 0.5657800436019897, "learning_rate": 1.4693877551020408e-06, "loss": 0.0082, "step": 19250 }, { "epoch": 8.821510297482838, "grad_norm": 0.11896498501300812, "learning_rate": 1.4630102040816328e-06, "loss": 0.0061, "step": 19275 }, { "epoch": 8.832951945080092, "grad_norm": 2.1698849201202393, "learning_rate": 1.4566326530612245e-06, "loss": 0.0071, "step": 19300 }, { "epoch": 8.844393592677346, "grad_norm": 2.6796391010284424, "learning_rate": 1.4502551020408165e-06, "loss": 0.0064, "step": 19325 }, { "epoch": 8.8558352402746, "grad_norm": 3.8971478939056396, "learning_rate": 1.4438775510204082e-06, "loss": 0.0071, "step": 19350 }, { "epoch": 8.867276887871853, "grad_norm": 3.8938040733337402, "learning_rate": 1.4375000000000002e-06, "loss": 0.0034, "step": 19375 }, { "epoch": 8.878718535469108, "grad_norm": 0.1528436839580536, "learning_rate": 1.431122448979592e-06, "loss": 0.0083, "step": 19400 }, { "epoch": 8.890160183066362, "grad_norm": 0.024963609874248505, "learning_rate": 1.4247448979591837e-06, "loss": 0.0048, "step": 19425 }, { "epoch": 8.901601830663616, "grad_norm": 0.10767273604869843, "learning_rate": 1.4183673469387757e-06, "loss": 0.0048, "step": 19450 }, { "epoch": 8.91304347826087, "grad_norm": 0.09147515892982483, "learning_rate": 1.4119897959183674e-06, "loss": 0.0054, "step": 19475 }, { "epoch": 8.924485125858123, "grad_norm": 0.6032863855361938, "learning_rate": 1.4056122448979592e-06, "loss": 0.0112, "step": 19500 }, { "epoch": 8.935926773455378, "grad_norm": 0.663342297077179, "learning_rate": 1.399234693877551e-06, "loss": 0.0089, "step": 19525 }, { "epoch": 8.947368421052632, "grad_norm": 0.2562825381755829, "learning_rate": 1.392857142857143e-06, "loss": 0.0052, "step": 19550 }, { "epoch": 8.958810068649885, "grad_norm": 1.2865808010101318, "learning_rate": 1.3864795918367347e-06, "loss": 0.0105, "step": 19575 }, { "epoch": 8.97025171624714, "grad_norm": 6.299253940582275, "learning_rate": 1.3801020408163266e-06, "loss": 0.0086, "step": 19600 }, { "epoch": 8.981693363844393, "grad_norm": 0.08225379884243011, "learning_rate": 1.3737244897959184e-06, "loss": 0.0036, "step": 19625 }, { "epoch": 8.993135011441648, "grad_norm": 0.18679049611091614, "learning_rate": 1.3673469387755102e-06, "loss": 0.0039, "step": 19650 }, { "epoch": 9.004576659038902, "grad_norm": 0.39510053396224976, "learning_rate": 1.3609693877551021e-06, "loss": 0.0088, "step": 19675 }, { "epoch": 9.016018306636155, "grad_norm": 0.06154235452413559, "learning_rate": 1.3545918367346939e-06, "loss": 0.0053, "step": 19700 }, { "epoch": 9.027459954233409, "grad_norm": 0.6689733862876892, "learning_rate": 1.3482142857142858e-06, "loss": 0.0163, "step": 19725 }, { "epoch": 9.038901601830664, "grad_norm": 0.4997970759868622, "learning_rate": 1.3418367346938776e-06, "loss": 0.0049, "step": 19750 }, { "epoch": 9.050343249427918, "grad_norm": 0.23966868221759796, "learning_rate": 1.3354591836734696e-06, "loss": 0.0061, "step": 19775 }, { "epoch": 9.061784897025172, "grad_norm": 3.6731507778167725, "learning_rate": 1.3290816326530613e-06, "loss": 0.0085, "step": 19800 }, { "epoch": 9.073226544622425, "grad_norm": 1.2267433404922485, "learning_rate": 1.3227040816326533e-06, "loss": 0.0082, "step": 19825 }, { "epoch": 9.084668192219679, "grad_norm": 3.816725492477417, "learning_rate": 1.316326530612245e-06, "loss": 0.0031, "step": 19850 }, { "epoch": 9.096109839816934, "grad_norm": 0.5195302963256836, "learning_rate": 1.3099489795918368e-06, "loss": 0.0081, "step": 19875 }, { "epoch": 9.107551487414188, "grad_norm": 0.04216615855693817, "learning_rate": 1.3035714285714286e-06, "loss": 0.006, "step": 19900 }, { "epoch": 9.118993135011442, "grad_norm": 0.15574586391448975, "learning_rate": 1.2971938775510203e-06, "loss": 0.0089, "step": 19925 }, { "epoch": 9.130434782608695, "grad_norm": 0.7792266011238098, "learning_rate": 1.2908163265306123e-06, "loss": 0.0054, "step": 19950 }, { "epoch": 9.141876430205949, "grad_norm": 0.07065439969301224, "learning_rate": 1.284438775510204e-06, "loss": 0.0075, "step": 19975 }, { "epoch": 9.153318077803204, "grad_norm": 1.0782091617584229, "learning_rate": 1.278061224489796e-06, "loss": 0.0044, "step": 20000 }, { "epoch": 9.153318077803204, "eval_loss": 0.20870625972747803, "eval_runtime": 5198.5381, "eval_samples_per_second": 1.832, "eval_steps_per_second": 0.229, "eval_wer": 0.0960037115249248, "step": 20000 }, { "epoch": 9.164759725400458, "grad_norm": 2.809683322906494, "learning_rate": 1.2716836734693878e-06, "loss": 0.0099, "step": 20025 }, { "epoch": 9.176201372997712, "grad_norm": 0.10887058824300766, "learning_rate": 1.2653061224489797e-06, "loss": 0.0042, "step": 20050 }, { "epoch": 9.187643020594965, "grad_norm": 1.1026456356048584, "learning_rate": 1.2589285714285715e-06, "loss": 0.0086, "step": 20075 }, { "epoch": 9.199084668192219, "grad_norm": 2.4570958614349365, "learning_rate": 1.2525510204081632e-06, "loss": 0.0041, "step": 20100 }, { "epoch": 9.210526315789474, "grad_norm": 0.12701238691806793, "learning_rate": 1.2461734693877552e-06, "loss": 0.0069, "step": 20125 }, { "epoch": 9.221967963386728, "grad_norm": 0.08014000952243805, "learning_rate": 1.239795918367347e-06, "loss": 0.0038, "step": 20150 }, { "epoch": 9.233409610983982, "grad_norm": 0.03979151323437691, "learning_rate": 1.233418367346939e-06, "loss": 0.0055, "step": 20175 }, { "epoch": 9.244851258581235, "grad_norm": 0.17478398978710175, "learning_rate": 1.2270408163265307e-06, "loss": 0.0042, "step": 20200 }, { "epoch": 9.256292906178489, "grad_norm": 1.452242136001587, "learning_rate": 1.2206632653061226e-06, "loss": 0.0062, "step": 20225 }, { "epoch": 9.267734553775744, "grad_norm": 1.9169418811798096, "learning_rate": 1.2142857142857144e-06, "loss": 0.005, "step": 20250 }, { "epoch": 9.279176201372998, "grad_norm": 0.07548157870769501, "learning_rate": 1.2079081632653064e-06, "loss": 0.0065, "step": 20275 }, { "epoch": 9.290617848970252, "grad_norm": 0.0355396643280983, "learning_rate": 1.2015306122448981e-06, "loss": 0.0059, "step": 20300 }, { "epoch": 9.302059496567505, "grad_norm": 0.0616096630692482, "learning_rate": 1.1951530612244899e-06, "loss": 0.0067, "step": 20325 }, { "epoch": 9.31350114416476, "grad_norm": 4.230183124542236, "learning_rate": 1.1887755102040816e-06, "loss": 0.0054, "step": 20350 }, { "epoch": 9.324942791762014, "grad_norm": 4.540670871734619, "learning_rate": 1.1823979591836734e-06, "loss": 0.0102, "step": 20375 }, { "epoch": 9.336384439359268, "grad_norm": 0.9268777370452881, "learning_rate": 1.1760204081632654e-06, "loss": 0.0075, "step": 20400 }, { "epoch": 9.347826086956522, "grad_norm": 1.7255213260650635, "learning_rate": 1.1696428571428571e-06, "loss": 0.0065, "step": 20425 }, { "epoch": 9.359267734553775, "grad_norm": 2.455357074737549, "learning_rate": 1.163265306122449e-06, "loss": 0.0031, "step": 20450 }, { "epoch": 9.37070938215103, "grad_norm": 0.6424604058265686, "learning_rate": 1.1568877551020408e-06, "loss": 0.0111, "step": 20475 }, { "epoch": 9.382151029748284, "grad_norm": 5.830212116241455, "learning_rate": 1.1505102040816326e-06, "loss": 0.0076, "step": 20500 }, { "epoch": 9.393592677345538, "grad_norm": 0.1393570601940155, "learning_rate": 1.1441326530612246e-06, "loss": 0.0067, "step": 20525 }, { "epoch": 9.405034324942791, "grad_norm": 0.03577861189842224, "learning_rate": 1.1377551020408163e-06, "loss": 0.003, "step": 20550 }, { "epoch": 9.416475972540045, "grad_norm": 0.26087144017219543, "learning_rate": 1.1313775510204083e-06, "loss": 0.0056, "step": 20575 }, { "epoch": 9.4279176201373, "grad_norm": 0.6032670140266418, "learning_rate": 1.125e-06, "loss": 0.0038, "step": 20600 }, { "epoch": 9.439359267734554, "grad_norm": 0.40521496534347534, "learning_rate": 1.118622448979592e-06, "loss": 0.0042, "step": 20625 }, { "epoch": 9.450800915331808, "grad_norm": 0.04099424555897713, "learning_rate": 1.1122448979591838e-06, "loss": 0.0049, "step": 20650 }, { "epoch": 9.462242562929061, "grad_norm": 0.08814136683940887, "learning_rate": 1.1058673469387757e-06, "loss": 0.0113, "step": 20675 }, { "epoch": 9.473684210526315, "grad_norm": 0.07661563158035278, "learning_rate": 1.0994897959183675e-06, "loss": 0.0043, "step": 20700 }, { "epoch": 9.48512585812357, "grad_norm": 0.3395727872848511, "learning_rate": 1.0931122448979592e-06, "loss": 0.0107, "step": 20725 }, { "epoch": 9.496567505720824, "grad_norm": 0.7869210243225098, "learning_rate": 1.0867346938775512e-06, "loss": 0.0051, "step": 20750 }, { "epoch": 9.508009153318078, "grad_norm": 0.7539875507354736, "learning_rate": 1.080357142857143e-06, "loss": 0.0056, "step": 20775 }, { "epoch": 9.519450800915331, "grad_norm": 16.64691162109375, "learning_rate": 1.0739795918367347e-06, "loss": 0.0066, "step": 20800 }, { "epoch": 9.530892448512585, "grad_norm": 0.7613354325294495, "learning_rate": 1.0676020408163265e-06, "loss": 0.0139, "step": 20825 }, { "epoch": 9.54233409610984, "grad_norm": 3.5544533729553223, "learning_rate": 1.0612244897959184e-06, "loss": 0.0058, "step": 20850 }, { "epoch": 9.553775743707094, "grad_norm": 0.14786852896213531, "learning_rate": 1.0548469387755102e-06, "loss": 0.0072, "step": 20875 }, { "epoch": 9.565217391304348, "grad_norm": 0.15534640848636627, "learning_rate": 1.0484693877551021e-06, "loss": 0.0045, "step": 20900 }, { "epoch": 9.576659038901601, "grad_norm": 0.23513126373291016, "learning_rate": 1.042091836734694e-06, "loss": 0.0042, "step": 20925 }, { "epoch": 9.588100686498855, "grad_norm": 0.39259815216064453, "learning_rate": 1.0357142857142857e-06, "loss": 0.0052, "step": 20950 }, { "epoch": 9.59954233409611, "grad_norm": 3.45914626121521, "learning_rate": 1.0293367346938776e-06, "loss": 0.0083, "step": 20975 }, { "epoch": 9.610983981693364, "grad_norm": 0.8460655212402344, "learning_rate": 1.0229591836734694e-06, "loss": 0.006, "step": 21000 }, { "epoch": 9.622425629290618, "grad_norm": 0.3035661280155182, "learning_rate": 1.0165816326530613e-06, "loss": 0.0066, "step": 21025 }, { "epoch": 9.633867276887871, "grad_norm": 0.07911881059408188, "learning_rate": 1.010204081632653e-06, "loss": 0.0044, "step": 21050 }, { "epoch": 9.645308924485127, "grad_norm": 1.1485878229141235, "learning_rate": 1.003826530612245e-06, "loss": 0.0061, "step": 21075 }, { "epoch": 9.65675057208238, "grad_norm": 3.1659584045410156, "learning_rate": 9.974489795918368e-07, "loss": 0.0062, "step": 21100 }, { "epoch": 9.668192219679634, "grad_norm": 0.08537878096103668, "learning_rate": 9.910714285714286e-07, "loss": 0.0065, "step": 21125 }, { "epoch": 9.679633867276888, "grad_norm": 0.27491432428359985, "learning_rate": 9.846938775510205e-07, "loss": 0.0054, "step": 21150 }, { "epoch": 9.691075514874141, "grad_norm": 1.1356456279754639, "learning_rate": 9.783163265306123e-07, "loss": 0.0106, "step": 21175 }, { "epoch": 9.702517162471397, "grad_norm": 0.0873272642493248, "learning_rate": 9.71938775510204e-07, "loss": 0.0028, "step": 21200 }, { "epoch": 9.71395881006865, "grad_norm": 0.33347004652023315, "learning_rate": 9.655612244897958e-07, "loss": 0.0134, "step": 21225 }, { "epoch": 9.725400457665904, "grad_norm": 0.14462488889694214, "learning_rate": 9.591836734693878e-07, "loss": 0.0046, "step": 21250 }, { "epoch": 9.736842105263158, "grad_norm": 1.358343482017517, "learning_rate": 9.528061224489796e-07, "loss": 0.0103, "step": 21275 }, { "epoch": 9.748283752860411, "grad_norm": 0.039507463574409485, "learning_rate": 9.464285714285715e-07, "loss": 0.0041, "step": 21300 }, { "epoch": 9.759725400457667, "grad_norm": 0.7528636455535889, "learning_rate": 9.400510204081633e-07, "loss": 0.0028, "step": 21325 }, { "epoch": 9.77116704805492, "grad_norm": 1.8518726825714111, "learning_rate": 9.33673469387755e-07, "loss": 0.0054, "step": 21350 }, { "epoch": 9.782608695652174, "grad_norm": 0.5261640548706055, "learning_rate": 9.27295918367347e-07, "loss": 0.008, "step": 21375 }, { "epoch": 9.794050343249427, "grad_norm": 0.48610520362854004, "learning_rate": 9.209183673469387e-07, "loss": 0.0045, "step": 21400 }, { "epoch": 9.805491990846681, "grad_norm": 0.35371649265289307, "learning_rate": 9.145408163265307e-07, "loss": 0.0067, "step": 21425 }, { "epoch": 9.816933638443937, "grad_norm": 4.049976825714111, "learning_rate": 9.081632653061225e-07, "loss": 0.0065, "step": 21450 }, { "epoch": 9.82837528604119, "grad_norm": 4.127640724182129, "learning_rate": 9.017857142857144e-07, "loss": 0.009, "step": 21475 }, { "epoch": 9.839816933638444, "grad_norm": 0.15992511808872223, "learning_rate": 8.954081632653062e-07, "loss": 0.0047, "step": 21500 }, { "epoch": 9.851258581235697, "grad_norm": 0.5393081307411194, "learning_rate": 8.89030612244898e-07, "loss": 0.0054, "step": 21525 }, { "epoch": 9.862700228832953, "grad_norm": 4.514695167541504, "learning_rate": 8.826530612244898e-07, "loss": 0.0054, "step": 21550 }, { "epoch": 9.874141876430206, "grad_norm": 1.2299213409423828, "learning_rate": 8.762755102040816e-07, "loss": 0.0042, "step": 21575 }, { "epoch": 9.88558352402746, "grad_norm": 0.12707316875457764, "learning_rate": 8.698979591836735e-07, "loss": 0.0062, "step": 21600 }, { "epoch": 9.897025171624714, "grad_norm": 0.8991334438323975, "learning_rate": 8.635204081632653e-07, "loss": 0.0078, "step": 21625 }, { "epoch": 9.908466819221967, "grad_norm": 0.9138942360877991, "learning_rate": 8.571428571428572e-07, "loss": 0.0034, "step": 21650 }, { "epoch": 9.919908466819223, "grad_norm": 0.07458051294088364, "learning_rate": 8.50765306122449e-07, "loss": 0.0061, "step": 21675 }, { "epoch": 9.931350114416476, "grad_norm": 0.15481892228126526, "learning_rate": 8.44387755102041e-07, "loss": 0.0062, "step": 21700 }, { "epoch": 9.94279176201373, "grad_norm": 0.4163806736469269, "learning_rate": 8.380102040816327e-07, "loss": 0.0037, "step": 21725 }, { "epoch": 9.954233409610984, "grad_norm": 0.1567489504814148, "learning_rate": 8.316326530612246e-07, "loss": 0.0041, "step": 21750 }, { "epoch": 9.965675057208237, "grad_norm": 2.551042079925537, "learning_rate": 8.252551020408163e-07, "loss": 0.0093, "step": 21775 }, { "epoch": 9.977116704805493, "grad_norm": 0.11054814606904984, "learning_rate": 8.188775510204081e-07, "loss": 0.0058, "step": 21800 }, { "epoch": 9.988558352402746, "grad_norm": 0.0360272191464901, "learning_rate": 8.125000000000001e-07, "loss": 0.0072, "step": 21825 }, { "epoch": 10.0, "grad_norm": 5.81298303604126, "learning_rate": 8.061224489795918e-07, "loss": 0.0068, "step": 21850 }, { "epoch": 10.011441647597254, "grad_norm": 0.0239973533898592, "learning_rate": 7.997448979591838e-07, "loss": 0.0039, "step": 21875 }, { "epoch": 10.022883295194507, "grad_norm": 0.14009559154510498, "learning_rate": 7.933673469387755e-07, "loss": 0.0083, "step": 21900 }, { "epoch": 10.034324942791763, "grad_norm": 0.05973900109529495, "learning_rate": 7.869897959183675e-07, "loss": 0.0067, "step": 21925 }, { "epoch": 10.045766590389016, "grad_norm": 2.9854466915130615, "learning_rate": 7.806122448979593e-07, "loss": 0.005, "step": 21950 }, { "epoch": 10.05720823798627, "grad_norm": 2.130392074584961, "learning_rate": 7.74234693877551e-07, "loss": 0.0026, "step": 21975 }, { "epoch": 10.068649885583524, "grad_norm": 13.454663276672363, "learning_rate": 7.678571428571429e-07, "loss": 0.0094, "step": 22000 }, { "epoch": 10.080091533180777, "grad_norm": 2.081770420074463, "learning_rate": 7.614795918367347e-07, "loss": 0.0069, "step": 22025 }, { "epoch": 10.091533180778033, "grad_norm": 1.7810285091400146, "learning_rate": 7.551020408163266e-07, "loss": 0.0113, "step": 22050 }, { "epoch": 10.102974828375286, "grad_norm": 3.7953577041625977, "learning_rate": 7.487244897959183e-07, "loss": 0.005, "step": 22075 }, { "epoch": 10.11441647597254, "grad_norm": 7.428182601928711, "learning_rate": 7.423469387755102e-07, "loss": 0.0077, "step": 22100 }, { "epoch": 10.125858123569794, "grad_norm": 4.6249165534973145, "learning_rate": 7.359693877551021e-07, "loss": 0.0032, "step": 22125 }, { "epoch": 10.137299771167047, "grad_norm": 7.158110618591309, "learning_rate": 7.295918367346939e-07, "loss": 0.0071, "step": 22150 }, { "epoch": 10.148741418764303, "grad_norm": 0.05203917250037193, "learning_rate": 7.232142857142858e-07, "loss": 0.0025, "step": 22175 }, { "epoch": 10.160183066361556, "grad_norm": 0.4155661165714264, "learning_rate": 7.168367346938775e-07, "loss": 0.0057, "step": 22200 }, { "epoch": 10.17162471395881, "grad_norm": 0.3524133563041687, "learning_rate": 7.104591836734694e-07, "loss": 0.0042, "step": 22225 }, { "epoch": 10.183066361556063, "grad_norm": 7.646109580993652, "learning_rate": 7.040816326530613e-07, "loss": 0.0074, "step": 22250 }, { "epoch": 10.194508009153319, "grad_norm": 1.1890356540679932, "learning_rate": 6.977040816326531e-07, "loss": 0.0024, "step": 22275 }, { "epoch": 10.205949656750573, "grad_norm": 2.9643213748931885, "learning_rate": 6.913265306122449e-07, "loss": 0.0113, "step": 22300 }, { "epoch": 10.217391304347826, "grad_norm": 0.13656121492385864, "learning_rate": 6.849489795918367e-07, "loss": 0.0027, "step": 22325 }, { "epoch": 10.22883295194508, "grad_norm": 3.043562173843384, "learning_rate": 6.785714285714286e-07, "loss": 0.015, "step": 22350 }, { "epoch": 10.240274599542333, "grad_norm": 0.23451216518878937, "learning_rate": 6.721938775510205e-07, "loss": 0.004, "step": 22375 }, { "epoch": 10.251716247139589, "grad_norm": 6.707335948944092, "learning_rate": 6.658163265306123e-07, "loss": 0.0029, "step": 22400 }, { "epoch": 10.263157894736842, "grad_norm": 0.055560849606990814, "learning_rate": 6.594387755102041e-07, "loss": 0.0088, "step": 22425 }, { "epoch": 10.274599542334096, "grad_norm": 8.039435386657715, "learning_rate": 6.53061224489796e-07, "loss": 0.0068, "step": 22450 }, { "epoch": 10.28604118993135, "grad_norm": 0.11787508428096771, "learning_rate": 6.466836734693878e-07, "loss": 0.0035, "step": 22475 }, { "epoch": 10.297482837528603, "grad_norm": 1.5206620693206787, "learning_rate": 6.403061224489796e-07, "loss": 0.0094, "step": 22500 }, { "epoch": 10.308924485125859, "grad_norm": 0.029644370079040527, "learning_rate": 6.339285714285714e-07, "loss": 0.0054, "step": 22525 }, { "epoch": 10.320366132723112, "grad_norm": 0.48167529702186584, "learning_rate": 6.275510204081633e-07, "loss": 0.0117, "step": 22550 }, { "epoch": 10.331807780320366, "grad_norm": 0.14305919408798218, "learning_rate": 6.211734693877551e-07, "loss": 0.0048, "step": 22575 }, { "epoch": 10.34324942791762, "grad_norm": 1.8980730772018433, "learning_rate": 6.14795918367347e-07, "loss": 0.0026, "step": 22600 }, { "epoch": 10.354691075514873, "grad_norm": 1.3038541078567505, "learning_rate": 6.084183673469388e-07, "loss": 0.0039, "step": 22625 }, { "epoch": 10.366132723112129, "grad_norm": 5.523509502410889, "learning_rate": 6.020408163265306e-07, "loss": 0.0066, "step": 22650 }, { "epoch": 10.377574370709382, "grad_norm": 0.04104507714509964, "learning_rate": 5.956632653061225e-07, "loss": 0.0042, "step": 22675 }, { "epoch": 10.389016018306636, "grad_norm": 1.2055227756500244, "learning_rate": 5.892857142857143e-07, "loss": 0.0038, "step": 22700 }, { "epoch": 10.40045766590389, "grad_norm": 0.02921094372868538, "learning_rate": 5.829081632653061e-07, "loss": 0.0036, "step": 22725 }, { "epoch": 10.411899313501145, "grad_norm": 16.179323196411133, "learning_rate": 5.76530612244898e-07, "loss": 0.0049, "step": 22750 }, { "epoch": 10.423340961098399, "grad_norm": 0.6530442833900452, "learning_rate": 5.701530612244898e-07, "loss": 0.0022, "step": 22775 }, { "epoch": 10.434782608695652, "grad_norm": 8.9596529006958, "learning_rate": 5.637755102040817e-07, "loss": 0.0077, "step": 22800 }, { "epoch": 10.446224256292906, "grad_norm": 0.03742032125592232, "learning_rate": 5.573979591836735e-07, "loss": 0.004, "step": 22825 }, { "epoch": 10.45766590389016, "grad_norm": 4.956221103668213, "learning_rate": 5.510204081632653e-07, "loss": 0.0062, "step": 22850 }, { "epoch": 10.469107551487415, "grad_norm": 0.04372161999344826, "learning_rate": 5.446428571428572e-07, "loss": 0.0029, "step": 22875 }, { "epoch": 10.480549199084669, "grad_norm": 2.4792611598968506, "learning_rate": 5.38265306122449e-07, "loss": 0.0047, "step": 22900 }, { "epoch": 10.491990846681922, "grad_norm": 1.2810579538345337, "learning_rate": 5.318877551020408e-07, "loss": 0.0054, "step": 22925 }, { "epoch": 10.503432494279176, "grad_norm": 16.370330810546875, "learning_rate": 5.255102040816326e-07, "loss": 0.0114, "step": 22950 }, { "epoch": 10.51487414187643, "grad_norm": 0.6860277652740479, "learning_rate": 5.191326530612245e-07, "loss": 0.004, "step": 22975 }, { "epoch": 10.526315789473685, "grad_norm": 1.6368955373764038, "learning_rate": 5.127551020408164e-07, "loss": 0.0074, "step": 23000 }, { "epoch": 10.537757437070939, "grad_norm": 1.6935875415802002, "learning_rate": 5.063775510204082e-07, "loss": 0.0043, "step": 23025 }, { "epoch": 10.549199084668192, "grad_norm": 0.27796322107315063, "learning_rate": 5.002551020408164e-07, "loss": 0.0075, "step": 23050 }, { "epoch": 10.560640732265446, "grad_norm": 0.05163831263780594, "learning_rate": 4.938775510204081e-07, "loss": 0.0033, "step": 23075 }, { "epoch": 10.5720823798627, "grad_norm": 3.6930410861968994, "learning_rate": 4.875e-07, "loss": 0.0067, "step": 23100 }, { "epoch": 10.583524027459955, "grad_norm": 2.0602188110351562, "learning_rate": 4.811224489795919e-07, "loss": 0.0046, "step": 23125 }, { "epoch": 10.594965675057209, "grad_norm": 6.743757724761963, "learning_rate": 4.747448979591837e-07, "loss": 0.0119, "step": 23150 }, { "epoch": 10.606407322654462, "grad_norm": 0.08728916943073273, "learning_rate": 4.683673469387756e-07, "loss": 0.0023, "step": 23175 }, { "epoch": 10.617848970251716, "grad_norm": 0.6951027512550354, "learning_rate": 4.6198979591836744e-07, "loss": 0.0067, "step": 23200 }, { "epoch": 10.62929061784897, "grad_norm": 0.9762528538703918, "learning_rate": 4.556122448979592e-07, "loss": 0.0033, "step": 23225 }, { "epoch": 10.640732265446225, "grad_norm": 4.385350704193115, "learning_rate": 4.49234693877551e-07, "loss": 0.006, "step": 23250 }, { "epoch": 10.652173913043478, "grad_norm": 0.12361292541027069, "learning_rate": 4.4285714285714286e-07, "loss": 0.002, "step": 23275 }, { "epoch": 10.663615560640732, "grad_norm": 4.548694610595703, "learning_rate": 4.364795918367347e-07, "loss": 0.0063, "step": 23300 }, { "epoch": 10.675057208237986, "grad_norm": 3.6494503021240234, "learning_rate": 4.301020408163266e-07, "loss": 0.005, "step": 23325 }, { "epoch": 10.68649885583524, "grad_norm": 5.889870643615723, "learning_rate": 4.237244897959184e-07, "loss": 0.0057, "step": 23350 }, { "epoch": 10.697940503432495, "grad_norm": 3.1003196239471436, "learning_rate": 4.1734693877551025e-07, "loss": 0.0073, "step": 23375 }, { "epoch": 10.709382151029748, "grad_norm": 3.9601292610168457, "learning_rate": 4.109693877551021e-07, "loss": 0.0089, "step": 23400 }, { "epoch": 10.720823798627002, "grad_norm": 0.19110934436321259, "learning_rate": 4.0459183673469387e-07, "loss": 0.0035, "step": 23425 }, { "epoch": 10.732265446224256, "grad_norm": 6.233257293701172, "learning_rate": 3.9821428571428573e-07, "loss": 0.0094, "step": 23450 }, { "epoch": 10.743707093821511, "grad_norm": 0.034573789685964584, "learning_rate": 3.9183673469387754e-07, "loss": 0.0049, "step": 23475 }, { "epoch": 10.755148741418765, "grad_norm": 4.074402332305908, "learning_rate": 3.854591836734694e-07, "loss": 0.0098, "step": 23500 }, { "epoch": 10.766590389016018, "grad_norm": 0.19702082872390747, "learning_rate": 3.7908163265306126e-07, "loss": 0.0034, "step": 23525 }, { "epoch": 10.778032036613272, "grad_norm": 6.4033522605896, "learning_rate": 3.7270408163265307e-07, "loss": 0.0061, "step": 23550 }, { "epoch": 10.789473684210526, "grad_norm": 0.6741847395896912, "learning_rate": 3.6632653061224493e-07, "loss": 0.0042, "step": 23575 }, { "epoch": 10.800915331807781, "grad_norm": 0.5898193717002869, "learning_rate": 3.5994897959183674e-07, "loss": 0.0067, "step": 23600 }, { "epoch": 10.812356979405035, "grad_norm": 0.05853106081485748, "learning_rate": 3.535714285714286e-07, "loss": 0.0068, "step": 23625 }, { "epoch": 10.823798627002288, "grad_norm": 0.1718439757823944, "learning_rate": 3.4719387755102046e-07, "loss": 0.0067, "step": 23650 }, { "epoch": 10.835240274599542, "grad_norm": 0.39581310749053955, "learning_rate": 3.4081632653061227e-07, "loss": 0.0035, "step": 23675 }, { "epoch": 10.846681922196796, "grad_norm": 3.5774476528167725, "learning_rate": 3.344387755102041e-07, "loss": 0.0081, "step": 23700 }, { "epoch": 10.858123569794051, "grad_norm": 0.2080162763595581, "learning_rate": 3.2806122448979594e-07, "loss": 0.0038, "step": 23725 }, { "epoch": 10.869565217391305, "grad_norm": 0.9511691331863403, "learning_rate": 3.216836734693878e-07, "loss": 0.0075, "step": 23750 }, { "epoch": 10.881006864988558, "grad_norm": 2.3875999450683594, "learning_rate": 3.153061224489796e-07, "loss": 0.0054, "step": 23775 }, { "epoch": 10.892448512585812, "grad_norm": 0.2756766080856323, "learning_rate": 3.089285714285714e-07, "loss": 0.0081, "step": 23800 }, { "epoch": 10.903890160183066, "grad_norm": 0.8285855054855347, "learning_rate": 3.025510204081633e-07, "loss": 0.0039, "step": 23825 }, { "epoch": 10.915331807780321, "grad_norm": 7.757495403289795, "learning_rate": 2.9617346938775513e-07, "loss": 0.0048, "step": 23850 }, { "epoch": 10.926773455377575, "grad_norm": 3.3156180381774902, "learning_rate": 2.8979591836734694e-07, "loss": 0.0044, "step": 23875 }, { "epoch": 10.938215102974828, "grad_norm": 1.0153356790542603, "learning_rate": 2.8341836734693875e-07, "loss": 0.0043, "step": 23900 }, { "epoch": 10.949656750572082, "grad_norm": 14.048026084899902, "learning_rate": 2.770408163265306e-07, "loss": 0.0062, "step": 23925 }, { "epoch": 10.961098398169337, "grad_norm": 3.859445810317993, "learning_rate": 2.7066326530612247e-07, "loss": 0.0067, "step": 23950 }, { "epoch": 10.972540045766591, "grad_norm": 2.134991407394409, "learning_rate": 2.6428571428571433e-07, "loss": 0.0033, "step": 23975 }, { "epoch": 10.983981693363845, "grad_norm": 9.151762008666992, "learning_rate": 2.579081632653061e-07, "loss": 0.0121, "step": 24000 }, { "epoch": 10.995423340961098, "grad_norm": 0.3495076298713684, "learning_rate": 2.5153061224489795e-07, "loss": 0.0048, "step": 24025 }, { "epoch": 11.006864988558352, "grad_norm": 0.10012809187173843, "learning_rate": 2.451530612244898e-07, "loss": 0.0055, "step": 24050 }, { "epoch": 11.018306636155607, "grad_norm": 0.06472992897033691, "learning_rate": 2.3877551020408167e-07, "loss": 0.0019, "step": 24075 }, { "epoch": 11.02974828375286, "grad_norm": 0.16011592745780945, "learning_rate": 2.323979591836735e-07, "loss": 0.0075, "step": 24100 }, { "epoch": 11.041189931350115, "grad_norm": 2.186831474304199, "learning_rate": 2.2602040816326532e-07, "loss": 0.0037, "step": 24125 }, { "epoch": 11.052631578947368, "grad_norm": 0.32710880041122437, "learning_rate": 2.1964285714285715e-07, "loss": 0.0073, "step": 24150 }, { "epoch": 11.064073226544622, "grad_norm": 0.054156772792339325, "learning_rate": 2.1326530612244898e-07, "loss": 0.0052, "step": 24175 }, { "epoch": 11.075514874141877, "grad_norm": 0.07884764671325684, "learning_rate": 2.0688775510204085e-07, "loss": 0.003, "step": 24200 }, { "epoch": 11.08695652173913, "grad_norm": 0.04855496436357498, "learning_rate": 2.0051020408163265e-07, "loss": 0.0039, "step": 24225 }, { "epoch": 11.098398169336384, "grad_norm": 3.0451197624206543, "learning_rate": 1.941326530612245e-07, "loss": 0.0037, "step": 24250 }, { "epoch": 11.109839816933638, "grad_norm": 2.733774185180664, "learning_rate": 1.8775510204081635e-07, "loss": 0.0049, "step": 24275 }, { "epoch": 11.121281464530892, "grad_norm": 0.01790427230298519, "learning_rate": 1.816326530612245e-07, "loss": 0.0082, "step": 24300 }, { "epoch": 11.132723112128147, "grad_norm": 24.361324310302734, "learning_rate": 1.7525510204081634e-07, "loss": 0.0067, "step": 24325 }, { "epoch": 11.1441647597254, "grad_norm": 0.0411471389234066, "learning_rate": 1.6887755102040817e-07, "loss": 0.0056, "step": 24350 }, { "epoch": 11.155606407322654, "grad_norm": 0.5140661597251892, "learning_rate": 1.625e-07, "loss": 0.0036, "step": 24375 }, { "epoch": 11.167048054919908, "grad_norm": 0.36049628257751465, "learning_rate": 1.5612244897959184e-07, "loss": 0.008, "step": 24400 }, { "epoch": 11.178489702517162, "grad_norm": 0.6188111305236816, "learning_rate": 1.4974489795918368e-07, "loss": 0.0021, "step": 24425 }, { "epoch": 11.189931350114417, "grad_norm": 0.12496360391378403, "learning_rate": 1.433673469387755e-07, "loss": 0.0052, "step": 24450 }, { "epoch": 11.20137299771167, "grad_norm": 2.986814498901367, "learning_rate": 1.3698979591836734e-07, "loss": 0.0062, "step": 24475 }, { "epoch": 11.212814645308924, "grad_norm": 1.2586897611618042, "learning_rate": 1.3061224489795918e-07, "loss": 0.0067, "step": 24500 }, { "epoch": 11.224256292906178, "grad_norm": 2.0878169536590576, "learning_rate": 1.2423469387755104e-07, "loss": 0.0052, "step": 24525 }, { "epoch": 11.235697940503432, "grad_norm": 0.07096085697412491, "learning_rate": 1.1785714285714286e-07, "loss": 0.0073, "step": 24550 }, { "epoch": 11.247139588100687, "grad_norm": 0.042739126831293106, "learning_rate": 1.1147959183673471e-07, "loss": 0.0043, "step": 24575 }, { "epoch": 11.25858123569794, "grad_norm": 1.214879035949707, "learning_rate": 1.0510204081632653e-07, "loss": 0.0076, "step": 24600 }, { "epoch": 11.270022883295194, "grad_norm": 0.14526697993278503, "learning_rate": 9.872448979591838e-08, "loss": 0.0041, "step": 24625 }, { "epoch": 11.281464530892448, "grad_norm": 2.3068532943725586, "learning_rate": 9.23469387755102e-08, "loss": 0.011, "step": 24650 }, { "epoch": 11.292906178489703, "grad_norm": 0.07109973579645157, "learning_rate": 8.596938775510205e-08, "loss": 0.002, "step": 24675 }, { "epoch": 11.304347826086957, "grad_norm": 0.05076288804411888, "learning_rate": 7.959183673469388e-08, "loss": 0.0091, "step": 24700 }, { "epoch": 11.31578947368421, "grad_norm": 0.026290345937013626, "learning_rate": 7.321428571428572e-08, "loss": 0.0057, "step": 24725 }, { "epoch": 11.327231121281464, "grad_norm": 0.04591425135731697, "learning_rate": 6.683673469387755e-08, "loss": 0.0046, "step": 24750 }, { "epoch": 11.338672768878718, "grad_norm": 0.057588379830121994, "learning_rate": 6.04591836734694e-08, "loss": 0.0051, "step": 24775 }, { "epoch": 11.350114416475973, "grad_norm": 0.19686830043792725, "learning_rate": 5.4081632653061234e-08, "loss": 0.0064, "step": 24800 }, { "epoch": 11.361556064073227, "grad_norm": 0.039152998477220535, "learning_rate": 4.770408163265306e-08, "loss": 0.004, "step": 24825 }, { "epoch": 11.37299771167048, "grad_norm": 0.1060929223895073, "learning_rate": 4.1326530612244896e-08, "loss": 0.0053, "step": 24850 }, { "epoch": 11.384439359267734, "grad_norm": 0.057997722178697586, "learning_rate": 3.494897959183673e-08, "loss": 0.0036, "step": 24875 }, { "epoch": 11.395881006864988, "grad_norm": 0.3169647753238678, "learning_rate": 2.8571428571428575e-08, "loss": 0.0059, "step": 24900 }, { "epoch": 11.407322654462243, "grad_norm": 0.020627867430448532, "learning_rate": 2.2193877551020407e-08, "loss": 0.0043, "step": 24925 }, { "epoch": 11.418764302059497, "grad_norm": 0.04437786713242531, "learning_rate": 1.5816326530612248e-08, "loss": 0.0075, "step": 24950 }, { "epoch": 11.43020594965675, "grad_norm": 0.03256097808480263, "learning_rate": 9.438775510204082e-09, "loss": 0.0022, "step": 24975 }, { "epoch": 11.441647597254004, "grad_norm": 3.5547521114349365, "learning_rate": 3.0612244897959183e-09, "loss": 0.0055, "step": 25000 }, { "epoch": 11.441647597254004, "eval_loss": 0.21081247925758362, "eval_runtime": 5131.1271, "eval_samples_per_second": 1.856, "eval_steps_per_second": 0.232, "eval_wer": 0.09488385486657708, "step": 25000 } ], "logging_steps": 25, "max_steps": 25000, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.081970563920691e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }