{ "best_metric": null, "best_model_checkpoint": null, "epoch": 496.73202614379085, "eval_steps": 500, "global_step": 19000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26143790849673204, "grad_norm": 0.7138202786445618, "learning_rate": 0.00019999986330190926, "loss": 2.463, "step": 10 }, { "epoch": 0.5228758169934641, "grad_norm": 0.790814220905304, "learning_rate": 0.00019999945320801072, "loss": 2.1963, "step": 20 }, { "epoch": 0.7843137254901961, "grad_norm": 0.8431825041770935, "learning_rate": 0.00019999876971942557, "loss": 1.7823, "step": 30 }, { "epoch": 1.0457516339869282, "grad_norm": 0.8540534377098083, "learning_rate": 0.0001999978128380225, "loss": 1.4428, "step": 40 }, { "epoch": 1.3071895424836601, "grad_norm": 0.8526931405067444, "learning_rate": 0.00019999658256641747, "loss": 1.1373, "step": 50 }, { "epoch": 1.5686274509803921, "grad_norm": 1.1626569032669067, "learning_rate": 0.00019999507890797408, "loss": 0.8482, "step": 60 }, { "epoch": 1.8300653594771243, "grad_norm": 1.191985845565796, "learning_rate": 0.0001999933018668033, "loss": 0.6793, "step": 70 }, { "epoch": 2.0915032679738563, "grad_norm": 0.8180794715881348, "learning_rate": 0.0001999912514477634, "loss": 0.5741, "step": 80 }, { "epoch": 2.3529411764705883, "grad_norm": 0.7215670347213745, "learning_rate": 0.00019998892765646026, "loss": 0.424, "step": 90 }, { "epoch": 2.6143790849673203, "grad_norm": 0.9890243411064148, "learning_rate": 0.0001999863304992469, "loss": 0.4335, "step": 100 }, { "epoch": 2.8758169934640523, "grad_norm": 0.8641286492347717, "learning_rate": 0.00019998345998322397, "loss": 0.4301, "step": 110 }, { "epoch": 3.1372549019607843, "grad_norm": 0.8116602301597595, "learning_rate": 0.0001999803161162393, "loss": 0.3547, "step": 120 }, { "epoch": 3.3986928104575163, "grad_norm": 0.9898081421852112, "learning_rate": 0.0001999768989068881, "loss": 0.3373, "step": 130 }, { "epoch": 3.6601307189542482, "grad_norm": 0.762677788734436, "learning_rate": 0.0001999732083645129, "loss": 0.3405, "step": 140 }, { "epoch": 3.9215686274509802, "grad_norm": 0.9873971343040466, "learning_rate": 0.0001999692444992035, "loss": 0.3842, "step": 150 }, { "epoch": 4.183006535947713, "grad_norm": 0.9828356504440308, "learning_rate": 0.00019996500732179695, "loss": 0.313, "step": 160 }, { "epoch": 4.444444444444445, "grad_norm": 0.8973499536514282, "learning_rate": 0.0001999604968438775, "loss": 0.3191, "step": 170 }, { "epoch": 4.705882352941177, "grad_norm": 0.9795909523963928, "learning_rate": 0.0001999557130777767, "loss": 0.3101, "step": 180 }, { "epoch": 4.967320261437909, "grad_norm": 1.2029128074645996, "learning_rate": 0.00019995065603657316, "loss": 0.3403, "step": 190 }, { "epoch": 5.228758169934641, "grad_norm": 1.0984058380126953, "learning_rate": 0.00019994532573409262, "loss": 0.2631, "step": 200 }, { "epoch": 5.490196078431373, "grad_norm": 0.8392295241355896, "learning_rate": 0.0001999397221849079, "loss": 0.2942, "step": 210 }, { "epoch": 5.751633986928105, "grad_norm": 0.8194178938865662, "learning_rate": 0.00019993384540433894, "loss": 0.3027, "step": 220 }, { "epoch": 6.0130718954248366, "grad_norm": 1.1430736780166626, "learning_rate": 0.00019992769540845258, "loss": 0.312, "step": 230 }, { "epoch": 6.2745098039215685, "grad_norm": 1.0457488298416138, "learning_rate": 0.00019992127221406275, "loss": 0.2631, "step": 240 }, { "epoch": 6.5359477124183005, "grad_norm": 0.7075788974761963, "learning_rate": 0.0001999145758387301, "loss": 0.2814, "step": 250 }, { "epoch": 6.7973856209150325, "grad_norm": 0.8417288064956665, "learning_rate": 0.00019990760630076237, "loss": 0.2945, "step": 260 }, { "epoch": 7.0588235294117645, "grad_norm": 1.1337305307388306, "learning_rate": 0.000199900363619214, "loss": 0.288, "step": 270 }, { "epoch": 7.3202614379084965, "grad_norm": 0.9480029940605164, "learning_rate": 0.00019989284781388617, "loss": 0.2696, "step": 280 }, { "epoch": 7.5816993464052285, "grad_norm": 0.9333047270774841, "learning_rate": 0.0001998850589053268, "loss": 0.2647, "step": 290 }, { "epoch": 7.8431372549019605, "grad_norm": 0.8173583149909973, "learning_rate": 0.00019987699691483048, "loss": 0.2595, "step": 300 }, { "epoch": 8.104575163398692, "grad_norm": 1.2094272375106812, "learning_rate": 0.0001998686618644384, "loss": 0.2696, "step": 310 }, { "epoch": 8.366013071895425, "grad_norm": 0.8234169483184814, "learning_rate": 0.00019986005377693825, "loss": 0.2446, "step": 320 }, { "epoch": 8.627450980392156, "grad_norm": 1.0735175609588623, "learning_rate": 0.00019985117267586424, "loss": 0.2667, "step": 330 }, { "epoch": 8.88888888888889, "grad_norm": 0.8714181184768677, "learning_rate": 0.00019984201858549693, "loss": 0.2737, "step": 340 }, { "epoch": 9.15032679738562, "grad_norm": 1.0795856714248657, "learning_rate": 0.00019983259153086327, "loss": 0.2782, "step": 350 }, { "epoch": 9.411764705882353, "grad_norm": 1.0457770824432373, "learning_rate": 0.00019982289153773646, "loss": 0.2415, "step": 360 }, { "epoch": 9.673202614379084, "grad_norm": 0.8930032849311829, "learning_rate": 0.00019981291863263592, "loss": 0.2492, "step": 370 }, { "epoch": 9.934640522875817, "grad_norm": 1.0066367387771606, "learning_rate": 0.00019980267284282717, "loss": 0.2467, "step": 380 }, { "epoch": 10.196078431372548, "grad_norm": 1.148116111755371, "learning_rate": 0.00019979215419632182, "loss": 0.2323, "step": 390 }, { "epoch": 10.457516339869281, "grad_norm": 1.03926420211792, "learning_rate": 0.00019978136272187747, "loss": 0.237, "step": 400 }, { "epoch": 10.718954248366012, "grad_norm": 0.9982312917709351, "learning_rate": 0.00019977029844899758, "loss": 0.2597, "step": 410 }, { "epoch": 10.980392156862745, "grad_norm": 1.2115322351455688, "learning_rate": 0.00019975896140793142, "loss": 0.2409, "step": 420 }, { "epoch": 11.241830065359476, "grad_norm": 1.4162884950637817, "learning_rate": 0.0001997473516296741, "loss": 0.2381, "step": 430 }, { "epoch": 11.50326797385621, "grad_norm": 1.3810129165649414, "learning_rate": 0.00019973546914596623, "loss": 0.2491, "step": 440 }, { "epoch": 11.764705882352942, "grad_norm": 1.0123716592788696, "learning_rate": 0.0001997233139892941, "loss": 0.2401, "step": 450 }, { "epoch": 12.026143790849673, "grad_norm": 0.9082147479057312, "learning_rate": 0.0001997108861928895, "loss": 0.2382, "step": 460 }, { "epoch": 12.287581699346406, "grad_norm": 0.9988301992416382, "learning_rate": 0.00019969818579072945, "loss": 0.2217, "step": 470 }, { "epoch": 12.549019607843137, "grad_norm": 1.2657634019851685, "learning_rate": 0.00019968521281753642, "loss": 0.2417, "step": 480 }, { "epoch": 12.81045751633987, "grad_norm": 0.8032001852989197, "learning_rate": 0.00019967196730877803, "loss": 0.2309, "step": 490 }, { "epoch": 13.071895424836601, "grad_norm": 1.2560818195343018, "learning_rate": 0.000199658449300667, "loss": 0.2306, "step": 500 }, { "epoch": 13.333333333333334, "grad_norm": 0.9680474996566772, "learning_rate": 0.000199644658830161, "loss": 0.217, "step": 510 }, { "epoch": 13.594771241830065, "grad_norm": 0.8553510904312134, "learning_rate": 0.00019963059593496268, "loss": 0.2362, "step": 520 }, { "epoch": 13.856209150326798, "grad_norm": 1.1278111934661865, "learning_rate": 0.00019961626065351947, "loss": 0.2252, "step": 530 }, { "epoch": 14.117647058823529, "grad_norm": 0.8731440305709839, "learning_rate": 0.0001996016530250235, "loss": 0.2101, "step": 540 }, { "epoch": 14.379084967320262, "grad_norm": 1.0482535362243652, "learning_rate": 0.00019958677308941139, "loss": 0.2113, "step": 550 }, { "epoch": 14.640522875816993, "grad_norm": 0.8031530380249023, "learning_rate": 0.0001995716208873644, "loss": 0.2285, "step": 560 }, { "epoch": 14.901960784313726, "grad_norm": 1.2343863248825073, "learning_rate": 0.00019955619646030802, "loss": 0.2225, "step": 570 }, { "epoch": 15.163398692810457, "grad_norm": 1.141625165939331, "learning_rate": 0.00019954049985041204, "loss": 0.222, "step": 580 }, { "epoch": 15.42483660130719, "grad_norm": 1.163808822631836, "learning_rate": 0.00019952453110059045, "loss": 0.2099, "step": 590 }, { "epoch": 15.686274509803921, "grad_norm": 1.0026309490203857, "learning_rate": 0.00019950829025450114, "loss": 0.222, "step": 600 }, { "epoch": 15.947712418300654, "grad_norm": 0.9590998888015747, "learning_rate": 0.00019949177735654601, "loss": 0.2148, "step": 610 }, { "epoch": 16.209150326797385, "grad_norm": 1.1316090822219849, "learning_rate": 0.00019947499245187068, "loss": 0.2063, "step": 620 }, { "epoch": 16.470588235294116, "grad_norm": 1.1160564422607422, "learning_rate": 0.00019945793558636437, "loss": 0.2083, "step": 630 }, { "epoch": 16.73202614379085, "grad_norm": 1.0182403326034546, "learning_rate": 0.00019944060680666002, "loss": 0.2133, "step": 640 }, { "epoch": 16.99346405228758, "grad_norm": 0.9575550556182861, "learning_rate": 0.0001994230061601338, "loss": 0.2088, "step": 650 }, { "epoch": 17.254901960784313, "grad_norm": 1.2370105981826782, "learning_rate": 0.00019940513369490516, "loss": 0.205, "step": 660 }, { "epoch": 17.516339869281047, "grad_norm": 0.7969275712966919, "learning_rate": 0.00019938698945983676, "loss": 0.2053, "step": 670 }, { "epoch": 17.77777777777778, "grad_norm": 1.0018616914749146, "learning_rate": 0.0001993685735045343, "loss": 0.2086, "step": 680 }, { "epoch": 18.03921568627451, "grad_norm": 0.9241987466812134, "learning_rate": 0.00019934988587934623, "loss": 0.2025, "step": 690 }, { "epoch": 18.30065359477124, "grad_norm": 1.1506460905075073, "learning_rate": 0.00019933092663536382, "loss": 0.1951, "step": 700 }, { "epoch": 18.562091503267975, "grad_norm": 1.3489614725112915, "learning_rate": 0.00019931169582442095, "loss": 0.2139, "step": 710 }, { "epoch": 18.823529411764707, "grad_norm": 1.0618153810501099, "learning_rate": 0.00019929219349909392, "loss": 0.2014, "step": 720 }, { "epoch": 19.084967320261438, "grad_norm": 1.1984943151474, "learning_rate": 0.0001992724197127013, "loss": 0.2084, "step": 730 }, { "epoch": 19.34640522875817, "grad_norm": 0.9617899656295776, "learning_rate": 0.0001992523745193039, "loss": 0.1913, "step": 740 }, { "epoch": 19.607843137254903, "grad_norm": 0.951430082321167, "learning_rate": 0.0001992320579737045, "loss": 0.1922, "step": 750 }, { "epoch": 19.869281045751634, "grad_norm": 1.0308970212936401, "learning_rate": 0.0001992114701314478, "loss": 0.2028, "step": 760 }, { "epoch": 20.130718954248366, "grad_norm": 1.298295497894287, "learning_rate": 0.0001991906110488201, "loss": 0.1985, "step": 770 }, { "epoch": 20.392156862745097, "grad_norm": 0.980803370475769, "learning_rate": 0.0001991694807828494, "loss": 0.2017, "step": 780 }, { "epoch": 20.65359477124183, "grad_norm": 0.9850193858146667, "learning_rate": 0.000199148079391305, "loss": 0.1808, "step": 790 }, { "epoch": 20.915032679738562, "grad_norm": 0.8435742855072021, "learning_rate": 0.00019912640693269752, "loss": 0.1991, "step": 800 }, { "epoch": 21.176470588235293, "grad_norm": 1.2562199831008911, "learning_rate": 0.00019910446346627862, "loss": 0.1902, "step": 810 }, { "epoch": 21.437908496732025, "grad_norm": 1.2811332941055298, "learning_rate": 0.0001990822490520409, "loss": 0.1895, "step": 820 }, { "epoch": 21.69934640522876, "grad_norm": 1.185219407081604, "learning_rate": 0.00019905976375071772, "loss": 0.1873, "step": 830 }, { "epoch": 21.96078431372549, "grad_norm": 1.0126444101333618, "learning_rate": 0.000199037007623783, "loss": 0.2022, "step": 840 }, { "epoch": 22.22222222222222, "grad_norm": 1.0308198928833008, "learning_rate": 0.00019901398073345118, "loss": 0.186, "step": 850 }, { "epoch": 22.483660130718953, "grad_norm": 0.9931133389472961, "learning_rate": 0.00019899068314267688, "loss": 0.1892, "step": 860 }, { "epoch": 22.745098039215687, "grad_norm": 1.1834982633590698, "learning_rate": 0.00019896711491515482, "loss": 0.1866, "step": 870 }, { "epoch": 23.00653594771242, "grad_norm": 1.0095534324645996, "learning_rate": 0.0001989432761153196, "loss": 0.1877, "step": 880 }, { "epoch": 23.26797385620915, "grad_norm": 1.0486822128295898, "learning_rate": 0.00019891916680834566, "loss": 0.1848, "step": 890 }, { "epoch": 23.529411764705884, "grad_norm": 1.0561635494232178, "learning_rate": 0.00019889478706014687, "loss": 0.1825, "step": 900 }, { "epoch": 23.790849673202615, "grad_norm": 1.0218344926834106, "learning_rate": 0.00019887013693737653, "loss": 0.1781, "step": 910 }, { "epoch": 24.052287581699346, "grad_norm": 0.907986044883728, "learning_rate": 0.00019884521650742715, "loss": 0.1925, "step": 920 }, { "epoch": 24.313725490196077, "grad_norm": 1.3268672227859497, "learning_rate": 0.00019882002583843024, "loss": 0.1783, "step": 930 }, { "epoch": 24.575163398692812, "grad_norm": 0.8679687976837158, "learning_rate": 0.00019879456499925614, "loss": 0.1814, "step": 940 }, { "epoch": 24.836601307189543, "grad_norm": 1.194421648979187, "learning_rate": 0.00019876883405951377, "loss": 0.1818, "step": 950 }, { "epoch": 25.098039215686274, "grad_norm": 1.2440810203552246, "learning_rate": 0.00019874283308955057, "loss": 0.1841, "step": 960 }, { "epoch": 25.359477124183005, "grad_norm": 0.8485437035560608, "learning_rate": 0.0001987165621604522, "loss": 0.1721, "step": 970 }, { "epoch": 25.62091503267974, "grad_norm": 1.1203727722167969, "learning_rate": 0.00019869002134404235, "loss": 0.1772, "step": 980 }, { "epoch": 25.88235294117647, "grad_norm": 1.2701900005340576, "learning_rate": 0.0001986632107128826, "loss": 0.1823, "step": 990 }, { "epoch": 26.143790849673202, "grad_norm": 0.7330073118209839, "learning_rate": 0.00019863613034027224, "loss": 0.1762, "step": 1000 }, { "epoch": 26.405228758169933, "grad_norm": 1.0160173177719116, "learning_rate": 0.0001986087803002479, "loss": 0.1748, "step": 1010 }, { "epoch": 26.666666666666668, "grad_norm": 1.2125264406204224, "learning_rate": 0.00019858116066758362, "loss": 0.1818, "step": 1020 }, { "epoch": 26.9281045751634, "grad_norm": 1.3150966167449951, "learning_rate": 0.00019855327151779042, "loss": 0.1791, "step": 1030 }, { "epoch": 27.18954248366013, "grad_norm": 0.9498183131217957, "learning_rate": 0.00019852511292711608, "loss": 0.1746, "step": 1040 }, { "epoch": 27.45098039215686, "grad_norm": 1.101545810699463, "learning_rate": 0.0001984966849725452, "loss": 0.17, "step": 1050 }, { "epoch": 27.712418300653596, "grad_norm": 0.9787285923957825, "learning_rate": 0.00019846798773179866, "loss": 0.1759, "step": 1060 }, { "epoch": 27.973856209150327, "grad_norm": 0.9753018021583557, "learning_rate": 0.00019843902128333367, "loss": 0.1823, "step": 1070 }, { "epoch": 28.235294117647058, "grad_norm": 1.1331627368927002, "learning_rate": 0.0001984097857063434, "loss": 0.1682, "step": 1080 }, { "epoch": 28.49673202614379, "grad_norm": 1.199524998664856, "learning_rate": 0.00019838028108075671, "loss": 0.1735, "step": 1090 }, { "epoch": 28.758169934640524, "grad_norm": 0.83395916223526, "learning_rate": 0.00019835050748723824, "loss": 0.1683, "step": 1100 }, { "epoch": 29.019607843137255, "grad_norm": 0.954365611076355, "learning_rate": 0.0001983204650071878, "loss": 0.1764, "step": 1110 }, { "epoch": 29.281045751633986, "grad_norm": 1.1084930896759033, "learning_rate": 0.00019829015372274038, "loss": 0.1663, "step": 1120 }, { "epoch": 29.54248366013072, "grad_norm": 0.9061603546142578, "learning_rate": 0.00019825957371676588, "loss": 0.1618, "step": 1130 }, { "epoch": 29.80392156862745, "grad_norm": 1.2576953172683716, "learning_rate": 0.0001982287250728689, "loss": 0.1773, "step": 1140 }, { "epoch": 30.065359477124183, "grad_norm": 0.680230438709259, "learning_rate": 0.0001981976078753884, "loss": 0.1749, "step": 1150 }, { "epoch": 30.326797385620914, "grad_norm": 1.0407228469848633, "learning_rate": 0.0001981662222093976, "loss": 0.1608, "step": 1160 }, { "epoch": 30.58823529411765, "grad_norm": 1.0547195672988892, "learning_rate": 0.0001981345681607038, "loss": 0.1626, "step": 1170 }, { "epoch": 30.84967320261438, "grad_norm": 1.2236053943634033, "learning_rate": 0.00019810264581584787, "loss": 0.1701, "step": 1180 }, { "epoch": 31.11111111111111, "grad_norm": 1.0089291334152222, "learning_rate": 0.00019807045526210436, "loss": 0.1679, "step": 1190 }, { "epoch": 31.372549019607842, "grad_norm": 1.067421555519104, "learning_rate": 0.00019803799658748094, "loss": 0.1614, "step": 1200 }, { "epoch": 31.633986928104576, "grad_norm": 1.1328610181808472, "learning_rate": 0.00019800526988071843, "loss": 0.1691, "step": 1210 }, { "epoch": 31.895424836601308, "grad_norm": 1.216069221496582, "learning_rate": 0.0001979722752312904, "loss": 0.1674, "step": 1220 }, { "epoch": 32.15686274509804, "grad_norm": 0.9567670822143555, "learning_rate": 0.00019793901272940293, "loss": 0.1592, "step": 1230 }, { "epoch": 32.41830065359477, "grad_norm": 0.9017887115478516, "learning_rate": 0.00019790548246599447, "loss": 0.1598, "step": 1240 }, { "epoch": 32.6797385620915, "grad_norm": 1.1113390922546387, "learning_rate": 0.00019787168453273544, "loss": 0.1659, "step": 1250 }, { "epoch": 32.94117647058823, "grad_norm": 1.2047311067581177, "learning_rate": 0.00019783761902202813, "loss": 0.1682, "step": 1260 }, { "epoch": 33.20261437908497, "grad_norm": 1.0487027168273926, "learning_rate": 0.00019780328602700636, "loss": 0.16, "step": 1270 }, { "epoch": 33.4640522875817, "grad_norm": 1.0085713863372803, "learning_rate": 0.00019776868564153516, "loss": 0.1544, "step": 1280 }, { "epoch": 33.72549019607843, "grad_norm": 1.049383521080017, "learning_rate": 0.0001977338179602107, "loss": 0.1687, "step": 1290 }, { "epoch": 33.98692810457516, "grad_norm": 1.0386006832122803, "learning_rate": 0.00019769868307835994, "loss": 0.1637, "step": 1300 }, { "epoch": 34.248366013071895, "grad_norm": 1.0458704233169556, "learning_rate": 0.00019766328109204024, "loss": 0.1578, "step": 1310 }, { "epoch": 34.509803921568626, "grad_norm": 1.0181804895401, "learning_rate": 0.00019762761209803927, "loss": 0.1556, "step": 1320 }, { "epoch": 34.77124183006536, "grad_norm": 0.8453966975212097, "learning_rate": 0.00019759167619387476, "loss": 0.1594, "step": 1330 }, { "epoch": 35.032679738562095, "grad_norm": 0.8666368126869202, "learning_rate": 0.00019755547347779403, "loss": 0.1639, "step": 1340 }, { "epoch": 35.294117647058826, "grad_norm": 0.8416698575019836, "learning_rate": 0.000197519004048774, "loss": 0.1495, "step": 1350 }, { "epoch": 35.55555555555556, "grad_norm": 1.3281161785125732, "learning_rate": 0.0001974822680065206, "loss": 0.1574, "step": 1360 }, { "epoch": 35.81699346405229, "grad_norm": 0.953856348991394, "learning_rate": 0.00019744526545146886, "loss": 0.1614, "step": 1370 }, { "epoch": 36.07843137254902, "grad_norm": 0.6768958568572998, "learning_rate": 0.00019740799648478233, "loss": 0.1571, "step": 1380 }, { "epoch": 36.33986928104575, "grad_norm": 1.09906005859375, "learning_rate": 0.0001973704612083529, "loss": 0.1551, "step": 1390 }, { "epoch": 36.60130718954248, "grad_norm": 1.172494649887085, "learning_rate": 0.0001973326597248006, "loss": 0.1597, "step": 1400 }, { "epoch": 36.86274509803921, "grad_norm": 1.0662363767623901, "learning_rate": 0.00019729459213747326, "loss": 0.1625, "step": 1410 }, { "epoch": 37.12418300653595, "grad_norm": 0.7910565137863159, "learning_rate": 0.00019725625855044617, "loss": 0.1504, "step": 1420 }, { "epoch": 37.38562091503268, "grad_norm": 1.1636861562728882, "learning_rate": 0.00019721765906852197, "loss": 0.1508, "step": 1430 }, { "epoch": 37.64705882352941, "grad_norm": 1.0530037879943848, "learning_rate": 0.00019717879379723012, "loss": 0.1543, "step": 1440 }, { "epoch": 37.908496732026144, "grad_norm": 1.4254627227783203, "learning_rate": 0.00019713966284282678, "loss": 0.1624, "step": 1450 }, { "epoch": 38.169934640522875, "grad_norm": 1.1266350746154785, "learning_rate": 0.0001971002663122945, "loss": 0.1505, "step": 1460 }, { "epoch": 38.431372549019606, "grad_norm": 0.8052533864974976, "learning_rate": 0.00019706060431334187, "loss": 0.1473, "step": 1470 }, { "epoch": 38.69281045751634, "grad_norm": 1.1820138692855835, "learning_rate": 0.00019702067695440332, "loss": 0.1512, "step": 1480 }, { "epoch": 38.95424836601307, "grad_norm": 1.0051476955413818, "learning_rate": 0.0001969804843446387, "loss": 0.164, "step": 1490 }, { "epoch": 39.21568627450981, "grad_norm": 1.0447814464569092, "learning_rate": 0.00019694002659393305, "loss": 0.1503, "step": 1500 }, { "epoch": 39.47712418300654, "grad_norm": 0.8165223598480225, "learning_rate": 0.00019689930381289634, "loss": 0.1525, "step": 1510 }, { "epoch": 39.73856209150327, "grad_norm": 0.9069657325744629, "learning_rate": 0.0001968583161128631, "loss": 0.1539, "step": 1520 }, { "epoch": 40.0, "grad_norm": 1.4593009948730469, "learning_rate": 0.00019681706360589216, "loss": 0.1572, "step": 1530 }, { "epoch": 40.26143790849673, "grad_norm": 1.3013628721237183, "learning_rate": 0.00019677554640476624, "loss": 0.1459, "step": 1540 }, { "epoch": 40.52287581699346, "grad_norm": 1.1023391485214233, "learning_rate": 0.00019673376462299184, "loss": 0.1526, "step": 1550 }, { "epoch": 40.78431372549019, "grad_norm": 1.0940853357315063, "learning_rate": 0.00019669171837479873, "loss": 0.1581, "step": 1560 }, { "epoch": 41.04575163398693, "grad_norm": 0.940166175365448, "learning_rate": 0.00019664940777513974, "loss": 0.1492, "step": 1570 }, { "epoch": 41.30718954248366, "grad_norm": 0.9569455981254578, "learning_rate": 0.00019660683293969041, "loss": 0.1453, "step": 1580 }, { "epoch": 41.568627450980394, "grad_norm": 0.7959948182106018, "learning_rate": 0.0001965639939848488, "loss": 0.1505, "step": 1590 }, { "epoch": 41.830065359477125, "grad_norm": 0.991958498954773, "learning_rate": 0.00019652089102773488, "loss": 0.1518, "step": 1600 }, { "epoch": 42.091503267973856, "grad_norm": 1.0243059396743774, "learning_rate": 0.00019647752418619054, "loss": 0.1515, "step": 1610 }, { "epoch": 42.35294117647059, "grad_norm": 0.826440155506134, "learning_rate": 0.00019643389357877907, "loss": 0.1465, "step": 1620 }, { "epoch": 42.61437908496732, "grad_norm": 0.8773382902145386, "learning_rate": 0.00019638999932478486, "loss": 0.1468, "step": 1630 }, { "epoch": 42.87581699346405, "grad_norm": 0.7697790861129761, "learning_rate": 0.00019634584154421317, "loss": 0.1442, "step": 1640 }, { "epoch": 43.13725490196079, "grad_norm": 0.7857301831245422, "learning_rate": 0.00019630142035778964, "loss": 0.1473, "step": 1650 }, { "epoch": 43.39869281045752, "grad_norm": 0.8533034920692444, "learning_rate": 0.00019625673588696008, "loss": 0.1453, "step": 1660 }, { "epoch": 43.66013071895425, "grad_norm": 0.8283002376556396, "learning_rate": 0.0001962117882538902, "loss": 0.1469, "step": 1670 }, { "epoch": 43.92156862745098, "grad_norm": 1.0466629266738892, "learning_rate": 0.00019616657758146503, "loss": 0.1501, "step": 1680 }, { "epoch": 44.18300653594771, "grad_norm": 0.7576202750205994, "learning_rate": 0.00019612110399328892, "loss": 0.1438, "step": 1690 }, { "epoch": 44.44444444444444, "grad_norm": 0.8621203303337097, "learning_rate": 0.00019607536761368484, "loss": 0.1382, "step": 1700 }, { "epoch": 44.705882352941174, "grad_norm": 0.864361047744751, "learning_rate": 0.0001960293685676943, "loss": 0.1488, "step": 1710 }, { "epoch": 44.967320261437905, "grad_norm": 1.3524795770645142, "learning_rate": 0.00019598310698107702, "loss": 0.1501, "step": 1720 }, { "epoch": 45.22875816993464, "grad_norm": 0.8673699498176575, "learning_rate": 0.00019593658298031034, "loss": 0.1446, "step": 1730 }, { "epoch": 45.490196078431374, "grad_norm": 0.7573821544647217, "learning_rate": 0.0001958897966925891, "loss": 0.1404, "step": 1740 }, { "epoch": 45.751633986928105, "grad_norm": 1.00885009765625, "learning_rate": 0.0001958427482458253, "loss": 0.1499, "step": 1750 }, { "epoch": 46.01307189542484, "grad_norm": 1.3049975633621216, "learning_rate": 0.0001957954377686475, "loss": 0.1494, "step": 1760 }, { "epoch": 46.27450980392157, "grad_norm": 0.8838759660720825, "learning_rate": 0.00019574786539040077, "loss": 0.1401, "step": 1770 }, { "epoch": 46.5359477124183, "grad_norm": 0.7831525206565857, "learning_rate": 0.00019570003124114619, "loss": 0.1471, "step": 1780 }, { "epoch": 46.79738562091503, "grad_norm": 0.7171520590782166, "learning_rate": 0.00019565193545166052, "loss": 0.1414, "step": 1790 }, { "epoch": 47.05882352941177, "grad_norm": 0.7733674645423889, "learning_rate": 0.00019560357815343577, "loss": 0.1465, "step": 1800 }, { "epoch": 47.3202614379085, "grad_norm": 0.833474338054657, "learning_rate": 0.00019555495947867895, "loss": 0.1402, "step": 1810 }, { "epoch": 47.58169934640523, "grad_norm": 0.8420645594596863, "learning_rate": 0.0001955060795603117, "loss": 0.1412, "step": 1820 }, { "epoch": 47.84313725490196, "grad_norm": 0.9879257678985596, "learning_rate": 0.00019545693853196983, "loss": 0.1444, "step": 1830 }, { "epoch": 48.10457516339869, "grad_norm": 0.9052282571792603, "learning_rate": 0.000195407536528003, "loss": 0.1433, "step": 1840 }, { "epoch": 48.36601307189542, "grad_norm": 1.194151759147644, "learning_rate": 0.00019535787368347442, "loss": 0.1419, "step": 1850 }, { "epoch": 48.627450980392155, "grad_norm": 1.0415573120117188, "learning_rate": 0.00019530795013416046, "loss": 0.1413, "step": 1860 }, { "epoch": 48.888888888888886, "grad_norm": 0.9441238641738892, "learning_rate": 0.00019525776601655014, "loss": 0.1473, "step": 1870 }, { "epoch": 49.150326797385624, "grad_norm": 0.7534403800964355, "learning_rate": 0.00019520732146784491, "loss": 0.1386, "step": 1880 }, { "epoch": 49.411764705882355, "grad_norm": 1.0693260431289673, "learning_rate": 0.0001951566166259583, "loss": 0.1419, "step": 1890 }, { "epoch": 49.673202614379086, "grad_norm": 0.9667706489562988, "learning_rate": 0.00019510565162951537, "loss": 0.1411, "step": 1900 }, { "epoch": 49.93464052287582, "grad_norm": 0.8170807361602783, "learning_rate": 0.0001950544266178525, "loss": 0.146, "step": 1910 }, { "epoch": 50.19607843137255, "grad_norm": 0.820950448513031, "learning_rate": 0.00019500294173101687, "loss": 0.1409, "step": 1920 }, { "epoch": 50.45751633986928, "grad_norm": 0.9750927686691284, "learning_rate": 0.00019495119710976626, "loss": 0.1381, "step": 1930 }, { "epoch": 50.71895424836601, "grad_norm": 0.9925455451011658, "learning_rate": 0.00019489919289556845, "loss": 0.1455, "step": 1940 }, { "epoch": 50.98039215686274, "grad_norm": 0.7548269033432007, "learning_rate": 0.00019484692923060095, "loss": 0.1399, "step": 1950 }, { "epoch": 51.24183006535948, "grad_norm": 0.7531190514564514, "learning_rate": 0.0001947944062577507, "loss": 0.1343, "step": 1960 }, { "epoch": 51.50326797385621, "grad_norm": 0.9674361348152161, "learning_rate": 0.0001947416241206134, "loss": 0.1382, "step": 1970 }, { "epoch": 51.76470588235294, "grad_norm": 0.9091185331344604, "learning_rate": 0.0001946885829634935, "loss": 0.1445, "step": 1980 }, { "epoch": 52.02614379084967, "grad_norm": 0.987326979637146, "learning_rate": 0.00019463528293140345, "loss": 0.1453, "step": 1990 }, { "epoch": 52.287581699346404, "grad_norm": 0.6855144500732422, "learning_rate": 0.00019458172417006347, "loss": 0.1349, "step": 2000 }, { "epoch": 52.549019607843135, "grad_norm": 0.7305368781089783, "learning_rate": 0.00019452790682590124, "loss": 0.1397, "step": 2010 }, { "epoch": 52.810457516339866, "grad_norm": 0.9413096904754639, "learning_rate": 0.00019447383104605125, "loss": 0.1446, "step": 2020 }, { "epoch": 53.071895424836605, "grad_norm": 0.7378990650177002, "learning_rate": 0.0001944194969783547, "loss": 0.1409, "step": 2030 }, { "epoch": 53.333333333333336, "grad_norm": 0.6865296959877014, "learning_rate": 0.00019436490477135878, "loss": 0.1352, "step": 2040 }, { "epoch": 53.59477124183007, "grad_norm": 0.71687912940979, "learning_rate": 0.00019431005457431653, "loss": 0.1423, "step": 2050 }, { "epoch": 53.8562091503268, "grad_norm": 0.7911390662193298, "learning_rate": 0.0001942549465371863, "loss": 0.1374, "step": 2060 }, { "epoch": 54.11764705882353, "grad_norm": 0.8051833510398865, "learning_rate": 0.00019419958081063138, "loss": 0.1363, "step": 2070 }, { "epoch": 54.37908496732026, "grad_norm": 0.6623182892799377, "learning_rate": 0.00019414395754601947, "loss": 0.1375, "step": 2080 }, { "epoch": 54.64052287581699, "grad_norm": 0.8147381544113159, "learning_rate": 0.00019408807689542257, "loss": 0.1331, "step": 2090 }, { "epoch": 54.90196078431372, "grad_norm": 0.7623370885848999, "learning_rate": 0.00019403193901161613, "loss": 0.14, "step": 2100 }, { "epoch": 55.16339869281046, "grad_norm": 0.8199501037597656, "learning_rate": 0.00019397554404807906, "loss": 0.1389, "step": 2110 }, { "epoch": 55.42483660130719, "grad_norm": 1.0729644298553467, "learning_rate": 0.00019391889215899299, "loss": 0.1376, "step": 2120 }, { "epoch": 55.68627450980392, "grad_norm": 0.6667032241821289, "learning_rate": 0.00019386198349924207, "loss": 0.1359, "step": 2130 }, { "epoch": 55.947712418300654, "grad_norm": 0.9523484706878662, "learning_rate": 0.00019380481822441235, "loss": 0.1423, "step": 2140 }, { "epoch": 56.209150326797385, "grad_norm": 0.8002906441688538, "learning_rate": 0.00019374739649079153, "loss": 0.1315, "step": 2150 }, { "epoch": 56.470588235294116, "grad_norm": 0.6775190830230713, "learning_rate": 0.00019368971845536845, "loss": 0.1342, "step": 2160 }, { "epoch": 56.73202614379085, "grad_norm": 0.870460569858551, "learning_rate": 0.00019363178427583266, "loss": 0.1398, "step": 2170 }, { "epoch": 56.99346405228758, "grad_norm": 0.8694108128547668, "learning_rate": 0.000193573594110574, "loss": 0.1453, "step": 2180 }, { "epoch": 57.254901960784316, "grad_norm": 1.0844203233718872, "learning_rate": 0.00019351514811868207, "loss": 0.1336, "step": 2190 }, { "epoch": 57.51633986928105, "grad_norm": 0.7382115125656128, "learning_rate": 0.0001934564464599461, "loss": 0.1379, "step": 2200 }, { "epoch": 57.77777777777778, "grad_norm": 0.996875524520874, "learning_rate": 0.0001933974892948541, "loss": 0.1353, "step": 2210 }, { "epoch": 58.03921568627451, "grad_norm": 0.7248476147651672, "learning_rate": 0.0001933382767845928, "loss": 0.1386, "step": 2220 }, { "epoch": 58.30065359477124, "grad_norm": 0.671381413936615, "learning_rate": 0.00019327880909104683, "loss": 0.1347, "step": 2230 }, { "epoch": 58.56209150326797, "grad_norm": 0.5566993355751038, "learning_rate": 0.00019321908637679865, "loss": 0.1365, "step": 2240 }, { "epoch": 58.8235294117647, "grad_norm": 0.8637556433677673, "learning_rate": 0.0001931591088051279, "loss": 0.1344, "step": 2250 }, { "epoch": 59.08496732026144, "grad_norm": 0.6594296097755432, "learning_rate": 0.00019309887654001096, "loss": 0.1359, "step": 2260 }, { "epoch": 59.34640522875817, "grad_norm": 0.8212260603904724, "learning_rate": 0.0001930383897461205, "loss": 0.1286, "step": 2270 }, { "epoch": 59.6078431372549, "grad_norm": 0.7467018365859985, "learning_rate": 0.00019297764858882514, "loss": 0.1379, "step": 2280 }, { "epoch": 59.869281045751634, "grad_norm": 0.7997498512268066, "learning_rate": 0.0001929166532341889, "loss": 0.1379, "step": 2290 }, { "epoch": 60.130718954248366, "grad_norm": 0.5982789993286133, "learning_rate": 0.00019285540384897073, "loss": 0.1323, "step": 2300 }, { "epoch": 60.3921568627451, "grad_norm": 0.8748401403427124, "learning_rate": 0.00019279390060062407, "loss": 0.1317, "step": 2310 }, { "epoch": 60.65359477124183, "grad_norm": 0.965622067451477, "learning_rate": 0.00019273214365729655, "loss": 0.1355, "step": 2320 }, { "epoch": 60.91503267973856, "grad_norm": 0.5320419073104858, "learning_rate": 0.0001926701331878292, "loss": 0.1383, "step": 2330 }, { "epoch": 61.1764705882353, "grad_norm": 0.8016908764839172, "learning_rate": 0.00019260786936175635, "loss": 0.1356, "step": 2340 }, { "epoch": 61.43790849673203, "grad_norm": 0.5881072878837585, "learning_rate": 0.00019254535234930486, "loss": 0.1344, "step": 2350 }, { "epoch": 61.69934640522876, "grad_norm": 0.7665542960166931, "learning_rate": 0.00019248258232139388, "loss": 0.1352, "step": 2360 }, { "epoch": 61.96078431372549, "grad_norm": 0.709434449672699, "learning_rate": 0.0001924195594496343, "loss": 0.1352, "step": 2370 }, { "epoch": 62.22222222222222, "grad_norm": 0.5670029520988464, "learning_rate": 0.00019235628390632822, "loss": 0.1282, "step": 2380 }, { "epoch": 62.48366013071895, "grad_norm": 0.9107353091239929, "learning_rate": 0.0001922927558644685, "loss": 0.1294, "step": 2390 }, { "epoch": 62.745098039215684, "grad_norm": 0.6936364769935608, "learning_rate": 0.00019222897549773848, "loss": 0.1342, "step": 2400 }, { "epoch": 63.00653594771242, "grad_norm": 0.7786808013916016, "learning_rate": 0.0001921649429805112, "loss": 0.1399, "step": 2410 }, { "epoch": 63.26797385620915, "grad_norm": 0.6474490165710449, "learning_rate": 0.00019210065848784913, "loss": 0.1266, "step": 2420 }, { "epoch": 63.529411764705884, "grad_norm": 0.8766961097717285, "learning_rate": 0.00019203612219550358, "loss": 0.1319, "step": 2430 }, { "epoch": 63.790849673202615, "grad_norm": 0.8006402254104614, "learning_rate": 0.00019197133427991436, "loss": 0.1338, "step": 2440 }, { "epoch": 64.05228758169935, "grad_norm": 0.7504803538322449, "learning_rate": 0.00019190629491820912, "loss": 0.1408, "step": 2450 }, { "epoch": 64.31372549019608, "grad_norm": 0.814525842666626, "learning_rate": 0.000191841004288203, "loss": 0.1326, "step": 2460 }, { "epoch": 64.57516339869281, "grad_norm": 0.8137060403823853, "learning_rate": 0.00019177546256839812, "loss": 0.1316, "step": 2470 }, { "epoch": 64.83660130718954, "grad_norm": 0.7670857310295105, "learning_rate": 0.000191709669937983, "loss": 0.1348, "step": 2480 }, { "epoch": 65.09803921568627, "grad_norm": 0.646980345249176, "learning_rate": 0.00019164362657683222, "loss": 0.1333, "step": 2490 }, { "epoch": 65.359477124183, "grad_norm": 1.1511456966400146, "learning_rate": 0.00019157733266550575, "loss": 0.1341, "step": 2500 }, { "epoch": 65.62091503267973, "grad_norm": 0.6792668104171753, "learning_rate": 0.00019151078838524867, "loss": 0.1306, "step": 2510 }, { "epoch": 65.88235294117646, "grad_norm": 0.8312354683876038, "learning_rate": 0.00019144399391799043, "loss": 0.1354, "step": 2520 }, { "epoch": 66.14379084967321, "grad_norm": 0.7580569386482239, "learning_rate": 0.00019137694944634464, "loss": 0.1333, "step": 2530 }, { "epoch": 66.40522875816994, "grad_norm": 0.7622929811477661, "learning_rate": 0.0001913096551536083, "loss": 0.1302, "step": 2540 }, { "epoch": 66.66666666666667, "grad_norm": 0.9241070747375488, "learning_rate": 0.00019124211122376137, "loss": 0.131, "step": 2550 }, { "epoch": 66.9281045751634, "grad_norm": 0.94864821434021, "learning_rate": 0.00019117431784146645, "loss": 0.1374, "step": 2560 }, { "epoch": 67.18954248366013, "grad_norm": 0.6629276871681213, "learning_rate": 0.00019110627519206805, "loss": 0.1281, "step": 2570 }, { "epoch": 67.45098039215686, "grad_norm": 0.868118941783905, "learning_rate": 0.00019103798346159213, "loss": 0.1308, "step": 2580 }, { "epoch": 67.7124183006536, "grad_norm": 0.7864583730697632, "learning_rate": 0.00019096944283674571, "loss": 0.1332, "step": 2590 }, { "epoch": 67.97385620915033, "grad_norm": 0.7753363251686096, "learning_rate": 0.00019090065350491626, "loss": 0.136, "step": 2600 }, { "epoch": 68.23529411764706, "grad_norm": 0.4261542856693268, "learning_rate": 0.00019083161565417115, "loss": 0.1266, "step": 2610 }, { "epoch": 68.49673202614379, "grad_norm": 0.7071658968925476, "learning_rate": 0.00019076232947325722, "loss": 0.1277, "step": 2620 }, { "epoch": 68.75816993464052, "grad_norm": 0.913390040397644, "learning_rate": 0.00019069279515160025, "loss": 0.1335, "step": 2630 }, { "epoch": 69.01960784313725, "grad_norm": 0.8911900520324707, "learning_rate": 0.00019062301287930446, "loss": 0.1352, "step": 2640 }, { "epoch": 69.28104575163398, "grad_norm": 0.7245123982429504, "learning_rate": 0.00019055298284715192, "loss": 0.1286, "step": 2650 }, { "epoch": 69.54248366013071, "grad_norm": 0.7637701034545898, "learning_rate": 0.00019048270524660196, "loss": 0.1325, "step": 2660 }, { "epoch": 69.80392156862744, "grad_norm": 0.5611773133277893, "learning_rate": 0.00019041218026979095, "loss": 0.132, "step": 2670 }, { "epoch": 70.06535947712419, "grad_norm": 0.6525811553001404, "learning_rate": 0.0001903414081095315, "loss": 0.1303, "step": 2680 }, { "epoch": 70.32679738562092, "grad_norm": 0.5672200322151184, "learning_rate": 0.0001902703889593119, "loss": 0.1253, "step": 2690 }, { "epoch": 70.58823529411765, "grad_norm": 0.698940098285675, "learning_rate": 0.00019019912301329592, "loss": 0.1261, "step": 2700 }, { "epoch": 70.84967320261438, "grad_norm": 0.616579532623291, "learning_rate": 0.0001901276104663218, "loss": 0.1354, "step": 2710 }, { "epoch": 71.11111111111111, "grad_norm": 0.5749228000640869, "learning_rate": 0.00019005585151390223, "loss": 0.1297, "step": 2720 }, { "epoch": 71.37254901960785, "grad_norm": 0.9096235632896423, "learning_rate": 0.00018998384635222334, "loss": 0.1263, "step": 2730 }, { "epoch": 71.63398692810458, "grad_norm": 0.8338767290115356, "learning_rate": 0.0001899115951781446, "loss": 0.1301, "step": 2740 }, { "epoch": 71.89542483660131, "grad_norm": 0.7974913716316223, "learning_rate": 0.0001898390981891979, "loss": 0.1337, "step": 2750 }, { "epoch": 72.15686274509804, "grad_norm": 0.5519678592681885, "learning_rate": 0.00018976635558358722, "loss": 0.1287, "step": 2760 }, { "epoch": 72.41830065359477, "grad_norm": 0.7348606586456299, "learning_rate": 0.00018969336756018815, "loss": 0.1272, "step": 2770 }, { "epoch": 72.6797385620915, "grad_norm": 0.6163354516029358, "learning_rate": 0.00018962013431854702, "loss": 0.1294, "step": 2780 }, { "epoch": 72.94117647058823, "grad_norm": 0.8160730004310608, "learning_rate": 0.00018954665605888088, "loss": 0.1358, "step": 2790 }, { "epoch": 73.20261437908496, "grad_norm": 0.6369748711585999, "learning_rate": 0.00018947293298207635, "loss": 0.1265, "step": 2800 }, { "epoch": 73.4640522875817, "grad_norm": 0.8559942245483398, "learning_rate": 0.0001893989652896896, "loss": 0.1301, "step": 2810 }, { "epoch": 73.72549019607843, "grad_norm": 0.5984659790992737, "learning_rate": 0.0001893247531839454, "loss": 0.1317, "step": 2820 }, { "epoch": 73.98692810457516, "grad_norm": 0.7031850814819336, "learning_rate": 0.0001892502968677369, "loss": 0.1331, "step": 2830 }, { "epoch": 74.2483660130719, "grad_norm": 0.7341207265853882, "learning_rate": 0.00018917559654462474, "loss": 0.1268, "step": 2840 }, { "epoch": 74.50980392156863, "grad_norm": 0.6445427536964417, "learning_rate": 0.0001891006524188368, "loss": 0.1271, "step": 2850 }, { "epoch": 74.77124183006536, "grad_norm": 0.7867002487182617, "learning_rate": 0.00018902546469526743, "loss": 0.134, "step": 2860 }, { "epoch": 75.0326797385621, "grad_norm": 0.5532680153846741, "learning_rate": 0.00018895003357947705, "loss": 0.1271, "step": 2870 }, { "epoch": 75.29411764705883, "grad_norm": 0.6302035450935364, "learning_rate": 0.00018887435927769137, "loss": 0.1254, "step": 2880 }, { "epoch": 75.55555555555556, "grad_norm": 0.6897308826446533, "learning_rate": 0.0001887984419968011, "loss": 0.1271, "step": 2890 }, { "epoch": 75.81699346405229, "grad_norm": 0.5863034725189209, "learning_rate": 0.0001887222819443612, "loss": 0.1258, "step": 2900 }, { "epoch": 76.07843137254902, "grad_norm": 0.6140713095664978, "learning_rate": 0.00018864587932859028, "loss": 0.1323, "step": 2910 }, { "epoch": 76.33986928104575, "grad_norm": 0.8416547775268555, "learning_rate": 0.00018856923435837022, "loss": 0.1254, "step": 2920 }, { "epoch": 76.60130718954248, "grad_norm": 0.6811145544052124, "learning_rate": 0.00018849234724324543, "loss": 0.1269, "step": 2930 }, { "epoch": 76.86274509803921, "grad_norm": 0.7164523601531982, "learning_rate": 0.00018841521819342236, "loss": 0.1328, "step": 2940 }, { "epoch": 77.12418300653594, "grad_norm": 0.6024401783943176, "learning_rate": 0.0001883378474197689, "loss": 0.1281, "step": 2950 }, { "epoch": 77.38562091503267, "grad_norm": 0.6707399487495422, "learning_rate": 0.0001882602351338137, "loss": 0.1278, "step": 2960 }, { "epoch": 77.6470588235294, "grad_norm": 0.7291834354400635, "learning_rate": 0.00018818238154774588, "loss": 0.1306, "step": 2970 }, { "epoch": 77.90849673202614, "grad_norm": 0.8068193793296814, "learning_rate": 0.00018810428687441414, "loss": 0.1321, "step": 2980 }, { "epoch": 78.16993464052288, "grad_norm": 0.5824514627456665, "learning_rate": 0.00018802595132732635, "loss": 0.1228, "step": 2990 }, { "epoch": 78.43137254901961, "grad_norm": 0.7537580132484436, "learning_rate": 0.0001879473751206489, "loss": 0.1255, "step": 3000 }, { "epoch": 78.69281045751634, "grad_norm": 0.7544134259223938, "learning_rate": 0.00018786855846920615, "loss": 0.1286, "step": 3010 }, { "epoch": 78.95424836601308, "grad_norm": 0.695929229259491, "learning_rate": 0.00018778950158847976, "loss": 0.1332, "step": 3020 }, { "epoch": 79.2156862745098, "grad_norm": 0.5690885782241821, "learning_rate": 0.0001877102046946083, "loss": 0.1209, "step": 3030 }, { "epoch": 79.47712418300654, "grad_norm": 0.7582102417945862, "learning_rate": 0.00018763066800438636, "loss": 0.1267, "step": 3040 }, { "epoch": 79.73856209150327, "grad_norm": 0.48170891404151917, "learning_rate": 0.0001875508917352643, "loss": 0.1304, "step": 3050 }, { "epoch": 80.0, "grad_norm": 0.7321044206619263, "learning_rate": 0.00018747087610534736, "loss": 0.1319, "step": 3060 }, { "epoch": 80.26143790849673, "grad_norm": 0.6163921356201172, "learning_rate": 0.00018739062133339517, "loss": 0.1239, "step": 3070 }, { "epoch": 80.52287581699346, "grad_norm": 0.8649224042892456, "learning_rate": 0.00018731012763882133, "loss": 0.1294, "step": 3080 }, { "epoch": 80.7843137254902, "grad_norm": 0.635418713092804, "learning_rate": 0.0001872293952416924, "loss": 0.1274, "step": 3090 }, { "epoch": 81.04575163398692, "grad_norm": 0.5143495202064514, "learning_rate": 0.00018714842436272773, "loss": 0.1272, "step": 3100 }, { "epoch": 81.30718954248366, "grad_norm": 0.5938199758529663, "learning_rate": 0.00018706721522329862, "loss": 0.1274, "step": 3110 }, { "epoch": 81.56862745098039, "grad_norm": 0.6700434684753418, "learning_rate": 0.00018698576804542777, "loss": 0.1239, "step": 3120 }, { "epoch": 81.83006535947712, "grad_norm": 0.6935993432998657, "learning_rate": 0.00018690408305178856, "loss": 0.129, "step": 3130 }, { "epoch": 82.09150326797386, "grad_norm": 0.616468071937561, "learning_rate": 0.00018682216046570475, "loss": 0.1309, "step": 3140 }, { "epoch": 82.3529411764706, "grad_norm": 0.6784673929214478, "learning_rate": 0.00018674000051114952, "loss": 0.1256, "step": 3150 }, { "epoch": 82.61437908496733, "grad_norm": 0.6315344572067261, "learning_rate": 0.00018665760341274505, "loss": 0.1248, "step": 3160 }, { "epoch": 82.87581699346406, "grad_norm": 0.5992943644523621, "learning_rate": 0.00018657496939576186, "loss": 0.1289, "step": 3170 }, { "epoch": 83.13725490196079, "grad_norm": 0.5748761892318726, "learning_rate": 0.0001864920986861182, "loss": 0.13, "step": 3180 }, { "epoch": 83.39869281045752, "grad_norm": 0.4966670572757721, "learning_rate": 0.00018640899151037944, "loss": 0.125, "step": 3190 }, { "epoch": 83.66013071895425, "grad_norm": 0.7322400212287903, "learning_rate": 0.00018632564809575742, "loss": 0.1296, "step": 3200 }, { "epoch": 83.92156862745098, "grad_norm": 0.8756809234619141, "learning_rate": 0.0001862420686701098, "loss": 0.1258, "step": 3210 }, { "epoch": 84.18300653594771, "grad_norm": 0.6855464577674866, "learning_rate": 0.0001861582534619396, "loss": 0.1293, "step": 3220 }, { "epoch": 84.44444444444444, "grad_norm": 0.5204270482063293, "learning_rate": 0.0001860742027003944, "loss": 0.1239, "step": 3230 }, { "epoch": 84.70588235294117, "grad_norm": 0.6892998814582825, "learning_rate": 0.00018598991661526572, "loss": 0.1276, "step": 3240 }, { "epoch": 84.9673202614379, "grad_norm": 0.5613439083099365, "learning_rate": 0.00018590539543698854, "loss": 0.1291, "step": 3250 }, { "epoch": 85.22875816993464, "grad_norm": 0.6739300489425659, "learning_rate": 0.0001858206393966405, "loss": 0.1264, "step": 3260 }, { "epoch": 85.49019607843137, "grad_norm": 0.6257854700088501, "learning_rate": 0.00018573564872594145, "loss": 0.1228, "step": 3270 }, { "epoch": 85.7516339869281, "grad_norm": 0.7718020081520081, "learning_rate": 0.00018565042365725258, "loss": 0.1272, "step": 3280 }, { "epoch": 86.01307189542484, "grad_norm": 0.599905252456665, "learning_rate": 0.00018556496442357595, "loss": 0.131, "step": 3290 }, { "epoch": 86.27450980392157, "grad_norm": 0.4919801950454712, "learning_rate": 0.0001854792712585539, "loss": 0.1187, "step": 3300 }, { "epoch": 86.5359477124183, "grad_norm": 0.7735366821289062, "learning_rate": 0.00018539334439646824, "loss": 0.1292, "step": 3310 }, { "epoch": 86.79738562091504, "grad_norm": 0.5821804404258728, "learning_rate": 0.00018530718407223974, "loss": 0.1241, "step": 3320 }, { "epoch": 87.05882352941177, "grad_norm": 0.5197769403457642, "learning_rate": 0.00018522079052142747, "loss": 0.1327, "step": 3330 }, { "epoch": 87.3202614379085, "grad_norm": 0.728046715259552, "learning_rate": 0.00018513416398022802, "loss": 0.125, "step": 3340 }, { "epoch": 87.58169934640523, "grad_norm": 0.5133826732635498, "learning_rate": 0.0001850473046854751, "loss": 0.125, "step": 3350 }, { "epoch": 87.84313725490196, "grad_norm": 0.5190628170967102, "learning_rate": 0.0001849602128746387, "loss": 0.1266, "step": 3360 }, { "epoch": 88.10457516339869, "grad_norm": 0.5673719048500061, "learning_rate": 0.00018487288878582447, "loss": 0.1264, "step": 3370 }, { "epoch": 88.36601307189542, "grad_norm": 0.5447489023208618, "learning_rate": 0.00018478533265777318, "loss": 0.1227, "step": 3380 }, { "epoch": 88.62745098039215, "grad_norm": 0.7577037811279297, "learning_rate": 0.00018469754472985993, "loss": 0.1301, "step": 3390 }, { "epoch": 88.88888888888889, "grad_norm": 0.6202467083930969, "learning_rate": 0.00018460952524209355, "loss": 0.1282, "step": 3400 }, { "epoch": 89.15032679738562, "grad_norm": 0.5380336046218872, "learning_rate": 0.00018452127443511598, "loss": 0.1208, "step": 3410 }, { "epoch": 89.41176470588235, "grad_norm": 0.6075873970985413, "learning_rate": 0.00018443279255020152, "loss": 0.1253, "step": 3420 }, { "epoch": 89.67320261437908, "grad_norm": 0.6594501733779907, "learning_rate": 0.0001843440798292563, "loss": 0.1292, "step": 3430 }, { "epoch": 89.93464052287581, "grad_norm": 0.8450397849082947, "learning_rate": 0.00018425513651481747, "loss": 0.1272, "step": 3440 }, { "epoch": 90.19607843137256, "grad_norm": 0.5860391855239868, "learning_rate": 0.00018416596285005272, "loss": 0.1235, "step": 3450 }, { "epoch": 90.45751633986929, "grad_norm": 1.0777153968811035, "learning_rate": 0.0001840765590787594, "loss": 0.1236, "step": 3460 }, { "epoch": 90.71895424836602, "grad_norm": 0.5201640129089355, "learning_rate": 0.00018398692544536397, "loss": 0.1257, "step": 3470 }, { "epoch": 90.98039215686275, "grad_norm": 0.6437315344810486, "learning_rate": 0.00018389706219492147, "loss": 0.1309, "step": 3480 }, { "epoch": 91.24183006535948, "grad_norm": 0.7315922975540161, "learning_rate": 0.00018380696957311449, "loss": 0.1202, "step": 3490 }, { "epoch": 91.50326797385621, "grad_norm": 0.6596841216087341, "learning_rate": 0.00018371664782625287, "loss": 0.123, "step": 3500 }, { "epoch": 91.76470588235294, "grad_norm": 0.7001237869262695, "learning_rate": 0.0001836260972012728, "loss": 0.131, "step": 3510 }, { "epoch": 92.02614379084967, "grad_norm": 0.5139759182929993, "learning_rate": 0.00018353531794573625, "loss": 0.1269, "step": 3520 }, { "epoch": 92.2875816993464, "grad_norm": 0.5682742595672607, "learning_rate": 0.0001834443103078302, "loss": 0.1219, "step": 3530 }, { "epoch": 92.54901960784314, "grad_norm": 0.5991658568382263, "learning_rate": 0.0001833530745363661, "loss": 0.1253, "step": 3540 }, { "epoch": 92.81045751633987, "grad_norm": 0.600954532623291, "learning_rate": 0.00018326161088077903, "loss": 0.1286, "step": 3550 }, { "epoch": 93.0718954248366, "grad_norm": 0.6942703723907471, "learning_rate": 0.00018316991959112716, "loss": 0.129, "step": 3560 }, { "epoch": 93.33333333333333, "grad_norm": 0.723839282989502, "learning_rate": 0.00018307800091809097, "loss": 0.1243, "step": 3570 }, { "epoch": 93.59477124183006, "grad_norm": 0.8791704773902893, "learning_rate": 0.0001829858551129726, "loss": 0.1266, "step": 3580 }, { "epoch": 93.85620915032679, "grad_norm": 0.6914976835250854, "learning_rate": 0.00018289348242769515, "loss": 0.1252, "step": 3590 }, { "epoch": 94.11764705882354, "grad_norm": 0.40806615352630615, "learning_rate": 0.00018280088311480201, "loss": 0.1245, "step": 3600 }, { "epoch": 94.37908496732027, "grad_norm": 0.47459840774536133, "learning_rate": 0.00018270805742745617, "loss": 0.124, "step": 3610 }, { "epoch": 94.640522875817, "grad_norm": 0.5964429378509521, "learning_rate": 0.00018261500561943955, "loss": 0.1247, "step": 3620 }, { "epoch": 94.90196078431373, "grad_norm": 0.5425547957420349, "learning_rate": 0.00018252172794515223, "loss": 0.1291, "step": 3630 }, { "epoch": 95.16339869281046, "grad_norm": 0.6276856660842896, "learning_rate": 0.00018242822465961176, "loss": 0.1291, "step": 3640 }, { "epoch": 95.42483660130719, "grad_norm": 1.3327772617340088, "learning_rate": 0.00018233449601845258, "loss": 0.1209, "step": 3650 }, { "epoch": 95.68627450980392, "grad_norm": 0.9959401488304138, "learning_rate": 0.00018224054227792524, "loss": 0.1237, "step": 3660 }, { "epoch": 95.94771241830065, "grad_norm": 0.6117873787879944, "learning_rate": 0.00018214636369489563, "loss": 0.1336, "step": 3670 }, { "epoch": 96.20915032679738, "grad_norm": 0.5268058776855469, "learning_rate": 0.00018205196052684445, "loss": 0.1212, "step": 3680 }, { "epoch": 96.47058823529412, "grad_norm": 0.6816233396530151, "learning_rate": 0.00018195733303186633, "loss": 0.1291, "step": 3690 }, { "epoch": 96.73202614379085, "grad_norm": 0.6062767505645752, "learning_rate": 0.00018186248146866927, "loss": 0.1248, "step": 3700 }, { "epoch": 96.99346405228758, "grad_norm": 0.9882863759994507, "learning_rate": 0.0001817674060965737, "loss": 0.1297, "step": 3710 }, { "epoch": 97.25490196078431, "grad_norm": 0.5438330769538879, "learning_rate": 0.00018167210717551224, "loss": 0.1207, "step": 3720 }, { "epoch": 97.51633986928104, "grad_norm": 0.5340378284454346, "learning_rate": 0.00018157658496602833, "loss": 0.1219, "step": 3730 }, { "epoch": 97.77777777777777, "grad_norm": 0.6972679495811462, "learning_rate": 0.00018148083972927616, "loss": 0.1272, "step": 3740 }, { "epoch": 98.03921568627452, "grad_norm": 0.5716882348060608, "learning_rate": 0.0001813848717270195, "loss": 0.1293, "step": 3750 }, { "epoch": 98.30065359477125, "grad_norm": 0.6160675883293152, "learning_rate": 0.00018128868122163123, "loss": 0.1231, "step": 3760 }, { "epoch": 98.56209150326798, "grad_norm": 0.6029688715934753, "learning_rate": 0.00018119226847609245, "loss": 0.1243, "step": 3770 }, { "epoch": 98.82352941176471, "grad_norm": 0.48037102818489075, "learning_rate": 0.000181095633753992, "loss": 0.1244, "step": 3780 }, { "epoch": 99.08496732026144, "grad_norm": 0.5181258916854858, "learning_rate": 0.0001809987773195255, "loss": 0.1293, "step": 3790 }, { "epoch": 99.34640522875817, "grad_norm": 0.5500502586364746, "learning_rate": 0.00018090169943749476, "loss": 0.1223, "step": 3800 }, { "epoch": 99.6078431372549, "grad_norm": 0.5222991704940796, "learning_rate": 0.00018080440037330695, "loss": 0.1234, "step": 3810 }, { "epoch": 99.86928104575163, "grad_norm": 0.44636550545692444, "learning_rate": 0.00018070688039297403, "loss": 0.1229, "step": 3820 }, { "epoch": 100.13071895424837, "grad_norm": 0.6223607063293457, "learning_rate": 0.00018060913976311192, "loss": 0.1248, "step": 3830 }, { "epoch": 100.3921568627451, "grad_norm": 0.5242628455162048, "learning_rate": 0.00018051117875093976, "loss": 0.1228, "step": 3840 }, { "epoch": 100.65359477124183, "grad_norm": 0.5237357020378113, "learning_rate": 0.00018041299762427916, "loss": 0.1249, "step": 3850 }, { "epoch": 100.91503267973856, "grad_norm": 0.48059752583503723, "learning_rate": 0.00018031459665155363, "loss": 0.1244, "step": 3860 }, { "epoch": 101.17647058823529, "grad_norm": 0.6612676382064819, "learning_rate": 0.00018021597610178768, "loss": 0.1232, "step": 3870 }, { "epoch": 101.43790849673202, "grad_norm": 0.5979042053222656, "learning_rate": 0.00018011713624460608, "loss": 0.1238, "step": 3880 }, { "epoch": 101.69934640522875, "grad_norm": 0.5387808084487915, "learning_rate": 0.0001800180773502333, "loss": 0.1256, "step": 3890 }, { "epoch": 101.96078431372548, "grad_norm": 0.526494026184082, "learning_rate": 0.0001799187996894925, "loss": 0.1237, "step": 3900 }, { "epoch": 102.22222222222223, "grad_norm": 0.5353228449821472, "learning_rate": 0.00017981930353380503, "loss": 0.1196, "step": 3910 }, { "epoch": 102.48366013071896, "grad_norm": 0.4083467125892639, "learning_rate": 0.0001797195891551896, "loss": 0.1242, "step": 3920 }, { "epoch": 102.74509803921569, "grad_norm": 0.4955314099788666, "learning_rate": 0.0001796196568262615, "loss": 0.1273, "step": 3930 }, { "epoch": 103.00653594771242, "grad_norm": 0.6663371920585632, "learning_rate": 0.00017951950682023191, "loss": 0.1271, "step": 3940 }, { "epoch": 103.26797385620915, "grad_norm": 0.5380375385284424, "learning_rate": 0.0001794191394109071, "loss": 0.1197, "step": 3950 }, { "epoch": 103.52941176470588, "grad_norm": 0.5058585405349731, "learning_rate": 0.00017931855487268782, "loss": 0.1253, "step": 3960 }, { "epoch": 103.79084967320262, "grad_norm": 0.4719361364841461, "learning_rate": 0.0001792177534805682, "loss": 0.1221, "step": 3970 }, { "epoch": 104.05228758169935, "grad_norm": 0.5762635469436646, "learning_rate": 0.00017911673551013551, "loss": 0.1273, "step": 3980 }, { "epoch": 104.31372549019608, "grad_norm": 0.5460364818572998, "learning_rate": 0.00017901550123756906, "loss": 0.1207, "step": 3990 }, { "epoch": 104.57516339869281, "grad_norm": 0.6880167722702026, "learning_rate": 0.00017891405093963938, "loss": 0.123, "step": 4000 }, { "epoch": 104.83660130718954, "grad_norm": 0.5890039205551147, "learning_rate": 0.00017881238489370776, "loss": 0.1261, "step": 4010 }, { "epoch": 105.09803921568627, "grad_norm": 0.5330226421356201, "learning_rate": 0.00017871050337772525, "loss": 0.1257, "step": 4020 }, { "epoch": 105.359477124183, "grad_norm": 0.7687032222747803, "learning_rate": 0.00017860840667023212, "loss": 0.1191, "step": 4030 }, { "epoch": 105.62091503267973, "grad_norm": 0.5523906946182251, "learning_rate": 0.0001785060950503568, "loss": 0.1222, "step": 4040 }, { "epoch": 105.88235294117646, "grad_norm": 0.710148811340332, "learning_rate": 0.0001784035687978153, "loss": 0.1285, "step": 4050 }, { "epoch": 106.14379084967321, "grad_norm": 0.5394968390464783, "learning_rate": 0.0001783008281929106, "loss": 0.1236, "step": 4060 }, { "epoch": 106.40522875816994, "grad_norm": 0.9128403663635254, "learning_rate": 0.0001781978735165315, "loss": 0.123, "step": 4070 }, { "epoch": 106.66666666666667, "grad_norm": 0.6581969857215881, "learning_rate": 0.0001780947050501522, "loss": 0.1233, "step": 4080 }, { "epoch": 106.9281045751634, "grad_norm": 0.7595931887626648, "learning_rate": 0.00017799132307583132, "loss": 0.1265, "step": 4090 }, { "epoch": 107.18954248366013, "grad_norm": 0.4747548997402191, "learning_rate": 0.00017788772787621126, "loss": 0.1205, "step": 4100 }, { "epoch": 107.45098039215686, "grad_norm": 0.6191473603248596, "learning_rate": 0.0001777839197345173, "loss": 0.1227, "step": 4110 }, { "epoch": 107.7124183006536, "grad_norm": 0.6291956305503845, "learning_rate": 0.00017767989893455698, "loss": 0.1241, "step": 4120 }, { "epoch": 107.97385620915033, "grad_norm": 0.8078779578208923, "learning_rate": 0.00017757566576071914, "loss": 0.1276, "step": 4130 }, { "epoch": 108.23529411764706, "grad_norm": 0.534305989742279, "learning_rate": 0.00017747122049797335, "loss": 0.1237, "step": 4140 }, { "epoch": 108.49673202614379, "grad_norm": 0.6715922951698303, "learning_rate": 0.00017736656343186896, "loss": 0.1222, "step": 4150 }, { "epoch": 108.75816993464052, "grad_norm": 0.4797965884208679, "learning_rate": 0.00017726169484853438, "loss": 0.1233, "step": 4160 }, { "epoch": 109.01960784313725, "grad_norm": 0.5531324148178101, "learning_rate": 0.0001771566150346763, "loss": 0.1248, "step": 4170 }, { "epoch": 109.28104575163398, "grad_norm": 0.6008434891700745, "learning_rate": 0.00017705132427757895, "loss": 0.1191, "step": 4180 }, { "epoch": 109.54248366013071, "grad_norm": 0.4768673777580261, "learning_rate": 0.0001769458228651032, "loss": 0.1221, "step": 4190 }, { "epoch": 109.80392156862744, "grad_norm": 0.6039496064186096, "learning_rate": 0.00017684011108568592, "loss": 0.1272, "step": 4200 }, { "epoch": 110.06535947712419, "grad_norm": 0.4699447453022003, "learning_rate": 0.00017673418922833903, "loss": 0.124, "step": 4210 }, { "epoch": 110.32679738562092, "grad_norm": 0.5179737210273743, "learning_rate": 0.00017662805758264893, "loss": 0.1216, "step": 4220 }, { "epoch": 110.58823529411765, "grad_norm": 0.5560812950134277, "learning_rate": 0.0001765217164387754, "loss": 0.1254, "step": 4230 }, { "epoch": 110.84967320261438, "grad_norm": 0.5947309732437134, "learning_rate": 0.00017641516608745114, "loss": 0.126, "step": 4240 }, { "epoch": 111.11111111111111, "grad_norm": 0.5091108083724976, "learning_rate": 0.00017630840681998066, "loss": 0.1253, "step": 4250 }, { "epoch": 111.37254901960785, "grad_norm": 0.6985629796981812, "learning_rate": 0.00017620143892823977, "loss": 0.1234, "step": 4260 }, { "epoch": 111.63398692810458, "grad_norm": 0.4949619770050049, "learning_rate": 0.00017609426270467462, "loss": 0.1243, "step": 4270 }, { "epoch": 111.89542483660131, "grad_norm": 0.6455708742141724, "learning_rate": 0.00017598687844230088, "loss": 0.1268, "step": 4280 }, { "epoch": 112.15686274509804, "grad_norm": 0.45366206765174866, "learning_rate": 0.000175879286434703, "loss": 0.1218, "step": 4290 }, { "epoch": 112.41830065359477, "grad_norm": 0.4446161687374115, "learning_rate": 0.0001757714869760335, "loss": 0.1159, "step": 4300 }, { "epoch": 112.6797385620915, "grad_norm": 0.5011366009712219, "learning_rate": 0.00017566348036101187, "loss": 0.1237, "step": 4310 }, { "epoch": 112.94117647058823, "grad_norm": 0.6293872594833374, "learning_rate": 0.0001755552668849242, "loss": 0.1261, "step": 4320 }, { "epoch": 113.20261437908496, "grad_norm": 0.5598844885826111, "learning_rate": 0.0001754468468436219, "loss": 0.1257, "step": 4330 }, { "epoch": 113.4640522875817, "grad_norm": 0.4953804314136505, "learning_rate": 0.00017533822053352128, "loss": 0.1193, "step": 4340 }, { "epoch": 113.72549019607843, "grad_norm": 0.4813307225704193, "learning_rate": 0.0001752293882516025, "loss": 0.1203, "step": 4350 }, { "epoch": 113.98692810457516, "grad_norm": 0.5343033075332642, "learning_rate": 0.00017512035029540885, "loss": 0.1287, "step": 4360 }, { "epoch": 114.2483660130719, "grad_norm": 0.3980255722999573, "learning_rate": 0.00017501110696304596, "loss": 0.1194, "step": 4370 }, { "epoch": 114.50980392156863, "grad_norm": 0.49828794598579407, "learning_rate": 0.00017490165855318094, "loss": 0.1225, "step": 4380 }, { "epoch": 114.77124183006536, "grad_norm": 0.5137373208999634, "learning_rate": 0.00017479200536504156, "loss": 0.1246, "step": 4390 }, { "epoch": 115.0326797385621, "grad_norm": 0.4799012839794159, "learning_rate": 0.0001746821476984154, "loss": 0.1267, "step": 4400 }, { "epoch": 115.29411764705883, "grad_norm": 0.4675128757953644, "learning_rate": 0.00017457208585364918, "loss": 0.1212, "step": 4410 }, { "epoch": 115.55555555555556, "grad_norm": 0.5032903552055359, "learning_rate": 0.00017446182013164778, "loss": 0.1233, "step": 4420 }, { "epoch": 115.81699346405229, "grad_norm": 0.5266563892364502, "learning_rate": 0.00017435135083387345, "loss": 0.1259, "step": 4430 }, { "epoch": 116.07843137254902, "grad_norm": 0.5633729696273804, "learning_rate": 0.000174240678262345, "loss": 0.1229, "step": 4440 }, { "epoch": 116.33986928104575, "grad_norm": 0.6905789375305176, "learning_rate": 0.0001741298027196371, "loss": 0.1198, "step": 4450 }, { "epoch": 116.60130718954248, "grad_norm": 0.39248159527778625, "learning_rate": 0.00017401872450887917, "loss": 0.1201, "step": 4460 }, { "epoch": 116.86274509803921, "grad_norm": 0.48288437724113464, "learning_rate": 0.00017390744393375486, "loss": 0.1258, "step": 4470 }, { "epoch": 117.12418300653594, "grad_norm": 0.5383310914039612, "learning_rate": 0.00017379596129850098, "loss": 0.1219, "step": 4480 }, { "epoch": 117.38562091503267, "grad_norm": 0.5818719267845154, "learning_rate": 0.00017368427690790677, "loss": 0.122, "step": 4490 }, { "epoch": 117.6470588235294, "grad_norm": 0.6253119111061096, "learning_rate": 0.00017357239106731317, "loss": 0.1218, "step": 4500 }, { "epoch": 117.90849673202614, "grad_norm": 0.560431182384491, "learning_rate": 0.00017346030408261172, "loss": 0.1254, "step": 4510 }, { "epoch": 118.16993464052288, "grad_norm": 0.4778011441230774, "learning_rate": 0.000173348016260244, "loss": 0.1264, "step": 4520 }, { "epoch": 118.43137254901961, "grad_norm": 0.44935017824172974, "learning_rate": 0.00017323552790720058, "loss": 0.1194, "step": 4530 }, { "epoch": 118.69281045751634, "grad_norm": 0.5576562881469727, "learning_rate": 0.00017312283933102038, "loss": 0.1262, "step": 4540 }, { "epoch": 118.95424836601308, "grad_norm": 0.43395355343818665, "learning_rate": 0.00017300995083978965, "loss": 0.1206, "step": 4550 }, { "epoch": 119.2156862745098, "grad_norm": 0.5254650712013245, "learning_rate": 0.00017289686274214118, "loss": 0.1222, "step": 4560 }, { "epoch": 119.47712418300654, "grad_norm": 0.5673931241035461, "learning_rate": 0.0001727835753472535, "loss": 0.1186, "step": 4570 }, { "epoch": 119.73856209150327, "grad_norm": 0.38074398040771484, "learning_rate": 0.0001726700889648501, "loss": 0.1214, "step": 4580 }, { "epoch": 120.0, "grad_norm": 0.5379620790481567, "learning_rate": 0.00017255640390519836, "loss": 0.1262, "step": 4590 }, { "epoch": 120.26143790849673, "grad_norm": 0.46410471200942993, "learning_rate": 0.00017244252047910892, "loss": 0.1187, "step": 4600 }, { "epoch": 120.52287581699346, "grad_norm": 0.5001809000968933, "learning_rate": 0.00017232843899793468, "loss": 0.1183, "step": 4610 }, { "epoch": 120.7843137254902, "grad_norm": 0.402111291885376, "learning_rate": 0.00017221415977357007, "loss": 0.1243, "step": 4620 }, { "epoch": 121.04575163398692, "grad_norm": 0.389432817697525, "learning_rate": 0.00017209968311845012, "loss": 0.1246, "step": 4630 }, { "epoch": 121.30718954248366, "grad_norm": 0.5195785760879517, "learning_rate": 0.00017198500934554966, "loss": 0.1171, "step": 4640 }, { "epoch": 121.56862745098039, "grad_norm": 0.4785175323486328, "learning_rate": 0.0001718701387683824, "loss": 0.1199, "step": 4650 }, { "epoch": 121.83006535947712, "grad_norm": 0.5644505620002747, "learning_rate": 0.0001717550717010001, "loss": 0.1251, "step": 4660 }, { "epoch": 122.09150326797386, "grad_norm": 0.506691038608551, "learning_rate": 0.0001716398084579917, "loss": 0.1259, "step": 4670 }, { "epoch": 122.3529411764706, "grad_norm": 0.491377592086792, "learning_rate": 0.00017152434935448256, "loss": 0.1203, "step": 4680 }, { "epoch": 122.61437908496733, "grad_norm": 0.5997154712677002, "learning_rate": 0.00017140869470613342, "loss": 0.1179, "step": 4690 }, { "epoch": 122.87581699346406, "grad_norm": 0.4591546356678009, "learning_rate": 0.00017129284482913972, "loss": 0.1218, "step": 4700 }, { "epoch": 123.13725490196079, "grad_norm": 0.3804793655872345, "learning_rate": 0.00017117680004023056, "loss": 0.1195, "step": 4710 }, { "epoch": 123.39869281045752, "grad_norm": 0.4563983082771301, "learning_rate": 0.00017106056065666793, "loss": 0.1177, "step": 4720 }, { "epoch": 123.66013071895425, "grad_norm": 0.5122204422950745, "learning_rate": 0.00017094412699624595, "loss": 0.1221, "step": 4730 }, { "epoch": 123.92156862745098, "grad_norm": 0.4884132146835327, "learning_rate": 0.00017082749937728973, "loss": 0.1245, "step": 4740 }, { "epoch": 124.18300653594771, "grad_norm": 0.38031864166259766, "learning_rate": 0.00017071067811865476, "loss": 0.1196, "step": 4750 }, { "epoch": 124.44444444444444, "grad_norm": 0.4657631814479828, "learning_rate": 0.0001705936635397259, "loss": 0.1197, "step": 4760 }, { "epoch": 124.70588235294117, "grad_norm": 0.5115662217140198, "learning_rate": 0.00017047645596041653, "loss": 0.1217, "step": 4770 }, { "epoch": 124.9673202614379, "grad_norm": 0.39813730120658875, "learning_rate": 0.0001703590557011677, "loss": 0.12, "step": 4780 }, { "epoch": 125.22875816993464, "grad_norm": 0.4247078001499176, "learning_rate": 0.00017024146308294724, "loss": 0.1169, "step": 4790 }, { "epoch": 125.49019607843137, "grad_norm": 0.41345420479774475, "learning_rate": 0.00017012367842724887, "loss": 0.1212, "step": 4800 }, { "epoch": 125.7516339869281, "grad_norm": 0.43008559942245483, "learning_rate": 0.00017000570205609136, "loss": 0.1198, "step": 4810 }, { "epoch": 126.01307189542484, "grad_norm": 0.47986850142478943, "learning_rate": 0.00016988753429201755, "loss": 0.125, "step": 4820 }, { "epoch": 126.27450980392157, "grad_norm": 0.3475463390350342, "learning_rate": 0.00016976917545809367, "loss": 0.1146, "step": 4830 }, { "epoch": 126.5359477124183, "grad_norm": 0.38883545994758606, "learning_rate": 0.00016965062587790823, "loss": 0.1189, "step": 4840 }, { "epoch": 126.79738562091504, "grad_norm": 0.44550979137420654, "learning_rate": 0.00016953188587557122, "loss": 0.1247, "step": 4850 }, { "epoch": 127.05882352941177, "grad_norm": 0.3760294020175934, "learning_rate": 0.0001694129557757133, "loss": 0.125, "step": 4860 }, { "epoch": 127.3202614379085, "grad_norm": 0.46050703525543213, "learning_rate": 0.0001692938359034848, "loss": 0.1161, "step": 4870 }, { "epoch": 127.58169934640523, "grad_norm": 0.48216715455055237, "learning_rate": 0.00016917452658455495, "loss": 0.1198, "step": 4880 }, { "epoch": 127.84313725490196, "grad_norm": 0.4969286620616913, "learning_rate": 0.00016905502814511082, "loss": 0.1231, "step": 4890 }, { "epoch": 128.1045751633987, "grad_norm": 0.3721112012863159, "learning_rate": 0.0001689353409118566, "loss": 0.1275, "step": 4900 }, { "epoch": 128.36601307189542, "grad_norm": 0.518031895160675, "learning_rate": 0.0001688154652120126, "loss": 0.118, "step": 4910 }, { "epoch": 128.62745098039215, "grad_norm": 0.49686360359191895, "learning_rate": 0.00016869540137331445, "loss": 0.1179, "step": 4920 }, { "epoch": 128.88888888888889, "grad_norm": 0.5079768300056458, "learning_rate": 0.00016857514972401207, "loss": 0.1252, "step": 4930 }, { "epoch": 129.15032679738562, "grad_norm": 0.4462049901485443, "learning_rate": 0.00016845471059286887, "loss": 0.1196, "step": 4940 }, { "epoch": 129.41176470588235, "grad_norm": 0.4287566840648651, "learning_rate": 0.00016833408430916085, "loss": 0.1198, "step": 4950 }, { "epoch": 129.67320261437908, "grad_norm": 0.4560422897338867, "learning_rate": 0.00016821327120267567, "loss": 0.1218, "step": 4960 }, { "epoch": 129.9346405228758, "grad_norm": 0.40253371000289917, "learning_rate": 0.0001680922716037117, "loss": 0.1219, "step": 4970 }, { "epoch": 130.19607843137254, "grad_norm": 0.33488982915878296, "learning_rate": 0.00016797108584307732, "loss": 0.1202, "step": 4980 }, { "epoch": 130.45751633986927, "grad_norm": 0.4206305146217346, "learning_rate": 0.00016784971425208965, "loss": 0.1189, "step": 4990 }, { "epoch": 130.718954248366, "grad_norm": 0.4833426773548126, "learning_rate": 0.00016772815716257412, "loss": 0.1209, "step": 5000 }, { "epoch": 130.98039215686273, "grad_norm": 0.5023657083511353, "learning_rate": 0.00016760641490686307, "loss": 0.1221, "step": 5010 }, { "epoch": 131.24183006535947, "grad_norm": 0.4237312972545624, "learning_rate": 0.0001674844878177952, "loss": 0.1204, "step": 5020 }, { "epoch": 131.5032679738562, "grad_norm": 0.42303699254989624, "learning_rate": 0.00016736237622871452, "loss": 0.1143, "step": 5030 }, { "epoch": 131.76470588235293, "grad_norm": 0.4471609890460968, "learning_rate": 0.00016724008047346947, "loss": 0.1242, "step": 5040 }, { "epoch": 132.0261437908497, "grad_norm": 0.43694472312927246, "learning_rate": 0.00016711760088641196, "loss": 0.1245, "step": 5050 }, { "epoch": 132.28758169934642, "grad_norm": 0.460407018661499, "learning_rate": 0.0001669949378023965, "loss": 0.119, "step": 5060 }, { "epoch": 132.54901960784315, "grad_norm": 0.39372462034225464, "learning_rate": 0.00016687209155677929, "loss": 0.1159, "step": 5070 }, { "epoch": 132.81045751633988, "grad_norm": 0.38842689990997314, "learning_rate": 0.00016674906248541726, "loss": 0.1241, "step": 5080 }, { "epoch": 133.0718954248366, "grad_norm": 0.40953928232192993, "learning_rate": 0.00016662585092466723, "loss": 0.1254, "step": 5090 }, { "epoch": 133.33333333333334, "grad_norm": 0.4259697496891022, "learning_rate": 0.0001665024572113848, "loss": 0.1187, "step": 5100 }, { "epoch": 133.59477124183007, "grad_norm": 0.46274760365486145, "learning_rate": 0.00016637888168292384, "loss": 0.1174, "step": 5110 }, { "epoch": 133.8562091503268, "grad_norm": 0.5445423126220703, "learning_rate": 0.000166255124677135, "loss": 0.121, "step": 5120 }, { "epoch": 134.11764705882354, "grad_norm": 0.38918498158454895, "learning_rate": 0.00016613118653236518, "loss": 0.122, "step": 5130 }, { "epoch": 134.37908496732027, "grad_norm": 0.3996954560279846, "learning_rate": 0.00016600706758745668, "loss": 0.1202, "step": 5140 }, { "epoch": 134.640522875817, "grad_norm": 0.42393553256988525, "learning_rate": 0.0001658827681817458, "loss": 0.1226, "step": 5150 }, { "epoch": 134.90196078431373, "grad_norm": 0.5191143751144409, "learning_rate": 0.00016575828865506245, "loss": 0.1208, "step": 5160 }, { "epoch": 135.16339869281046, "grad_norm": 0.3241782784461975, "learning_rate": 0.00016563362934772892, "loss": 0.119, "step": 5170 }, { "epoch": 135.4248366013072, "grad_norm": 0.4285520017147064, "learning_rate": 0.00016550879060055895, "loss": 0.1167, "step": 5180 }, { "epoch": 135.68627450980392, "grad_norm": 0.4682152271270752, "learning_rate": 0.00016538377275485691, "loss": 0.1234, "step": 5190 }, { "epoch": 135.94771241830065, "grad_norm": 0.5249207615852356, "learning_rate": 0.00016525857615241687, "loss": 0.122, "step": 5200 }, { "epoch": 136.20915032679738, "grad_norm": 0.3867131173610687, "learning_rate": 0.00016513320113552152, "loss": 0.1183, "step": 5210 }, { "epoch": 136.47058823529412, "grad_norm": 0.437995582818985, "learning_rate": 0.0001650076480469413, "loss": 0.1216, "step": 5220 }, { "epoch": 136.73202614379085, "grad_norm": 0.47116023302078247, "learning_rate": 0.0001648819172299337, "loss": 0.1208, "step": 5230 }, { "epoch": 136.99346405228758, "grad_norm": 0.4164656698703766, "learning_rate": 0.0001647560090282419, "loss": 0.1206, "step": 5240 }, { "epoch": 137.2549019607843, "grad_norm": 0.41538143157958984, "learning_rate": 0.00016462992378609407, "loss": 0.1144, "step": 5250 }, { "epoch": 137.51633986928104, "grad_norm": 0.48748481273651123, "learning_rate": 0.00016450366184820255, "loss": 0.1241, "step": 5260 }, { "epoch": 137.77777777777777, "grad_norm": 0.49401819705963135, "learning_rate": 0.00016437722355976258, "loss": 0.1237, "step": 5270 }, { "epoch": 138.0392156862745, "grad_norm": 0.41417965292930603, "learning_rate": 0.00016425060926645167, "loss": 0.1196, "step": 5280 }, { "epoch": 138.30065359477123, "grad_norm": 0.34107911586761475, "learning_rate": 0.00016412381931442838, "loss": 0.1149, "step": 5290 }, { "epoch": 138.56209150326796, "grad_norm": 0.5301646590232849, "learning_rate": 0.00016399685405033167, "loss": 0.1173, "step": 5300 }, { "epoch": 138.8235294117647, "grad_norm": 0.500399649143219, "learning_rate": 0.0001638697138212797, "loss": 0.123, "step": 5310 }, { "epoch": 139.08496732026143, "grad_norm": 0.43028515577316284, "learning_rate": 0.000163742398974869, "loss": 0.1254, "step": 5320 }, { "epoch": 139.34640522875816, "grad_norm": 0.3889625668525696, "learning_rate": 0.0001636149098591735, "loss": 0.1163, "step": 5330 }, { "epoch": 139.6078431372549, "grad_norm": 0.4060676693916321, "learning_rate": 0.00016348724682274353, "loss": 0.1185, "step": 5340 }, { "epoch": 139.86928104575162, "grad_norm": 0.44942519068717957, "learning_rate": 0.00016335941021460506, "loss": 0.1216, "step": 5350 }, { "epoch": 140.13071895424838, "grad_norm": 0.4668162167072296, "learning_rate": 0.00016323140038425842, "loss": 0.1222, "step": 5360 }, { "epoch": 140.3921568627451, "grad_norm": 0.4359992444515228, "learning_rate": 0.00016310321768167762, "loss": 0.1183, "step": 5370 }, { "epoch": 140.65359477124184, "grad_norm": 0.48698097467422485, "learning_rate": 0.00016297486245730927, "loss": 0.1182, "step": 5380 }, { "epoch": 140.91503267973857, "grad_norm": 0.4088447391986847, "learning_rate": 0.0001628463350620716, "loss": 0.1259, "step": 5390 }, { "epoch": 141.1764705882353, "grad_norm": 0.407049298286438, "learning_rate": 0.0001627176358473537, "loss": 0.1209, "step": 5400 }, { "epoch": 141.43790849673204, "grad_norm": 0.4178103804588318, "learning_rate": 0.00016258876516501424, "loss": 0.1221, "step": 5410 }, { "epoch": 141.69934640522877, "grad_norm": 0.44751349091529846, "learning_rate": 0.0001624597233673808, "loss": 0.1201, "step": 5420 }, { "epoch": 141.9607843137255, "grad_norm": 0.4328495264053345, "learning_rate": 0.00016233051080724868, "loss": 0.1221, "step": 5430 }, { "epoch": 142.22222222222223, "grad_norm": 0.44036683440208435, "learning_rate": 0.0001622011278378801, "loss": 0.1167, "step": 5440 }, { "epoch": 142.48366013071896, "grad_norm": 0.5208015441894531, "learning_rate": 0.00016207157481300312, "loss": 0.1191, "step": 5450 }, { "epoch": 142.7450980392157, "grad_norm": 0.4127715826034546, "learning_rate": 0.00016194185208681083, "loss": 0.1215, "step": 5460 }, { "epoch": 143.00653594771242, "grad_norm": 0.4773133397102356, "learning_rate": 0.00016181196001396019, "loss": 0.1204, "step": 5470 }, { "epoch": 143.26797385620915, "grad_norm": 0.37050458788871765, "learning_rate": 0.0001616818989495711, "loss": 0.1149, "step": 5480 }, { "epoch": 143.52941176470588, "grad_norm": 0.34875085949897766, "learning_rate": 0.00016155166924922566, "loss": 0.1201, "step": 5490 }, { "epoch": 143.79084967320262, "grad_norm": 0.3563850224018097, "learning_rate": 0.0001614212712689668, "loss": 0.1218, "step": 5500 }, { "epoch": 144.05228758169935, "grad_norm": 0.37683019042015076, "learning_rate": 0.00016129070536529766, "loss": 0.1199, "step": 5510 }, { "epoch": 144.31372549019608, "grad_norm": 0.415547639131546, "learning_rate": 0.00016115997189518043, "loss": 0.1165, "step": 5520 }, { "epoch": 144.5751633986928, "grad_norm": 0.3484375476837158, "learning_rate": 0.00016102907121603543, "loss": 0.1197, "step": 5530 }, { "epoch": 144.83660130718954, "grad_norm": 0.3772566616535187, "learning_rate": 0.00016089800368574014, "loss": 0.1236, "step": 5540 }, { "epoch": 145.09803921568627, "grad_norm": 0.35378995537757874, "learning_rate": 0.00016076676966262813, "loss": 0.1188, "step": 5550 }, { "epoch": 145.359477124183, "grad_norm": 0.34689784049987793, "learning_rate": 0.00016063536950548826, "loss": 0.1169, "step": 5560 }, { "epoch": 145.62091503267973, "grad_norm": 0.35840165615081787, "learning_rate": 0.0001605038035735635, "loss": 0.1199, "step": 5570 }, { "epoch": 145.88235294117646, "grad_norm": 0.40955159068107605, "learning_rate": 0.0001603720722265501, "loss": 0.1236, "step": 5580 }, { "epoch": 146.1437908496732, "grad_norm": 0.3746141493320465, "learning_rate": 0.00016024017582459652, "loss": 0.1176, "step": 5590 }, { "epoch": 146.40522875816993, "grad_norm": 0.4733276069164276, "learning_rate": 0.00016010811472830252, "loss": 0.1198, "step": 5600 }, { "epoch": 146.66666666666666, "grad_norm": 0.4011492431163788, "learning_rate": 0.00015997588929871808, "loss": 0.1199, "step": 5610 }, { "epoch": 146.9281045751634, "grad_norm": 0.5024054050445557, "learning_rate": 0.00015985674620589864, "loss": 0.121, "step": 5620 }, { "epoch": 147.18954248366012, "grad_norm": 0.4476403594017029, "learning_rate": 0.00015972420953936335, "loss": 0.1189, "step": 5630 }, { "epoch": 147.45098039215685, "grad_norm": 0.45632895827293396, "learning_rate": 0.0001595915095891198, "loss": 0.1168, "step": 5640 }, { "epoch": 147.71241830065358, "grad_norm": 0.48059502243995667, "learning_rate": 0.00015945864671796452, "loss": 0.1226, "step": 5650 }, { "epoch": 147.9738562091503, "grad_norm": 0.3604694604873657, "learning_rate": 0.0001593256212891395, "loss": 0.1218, "step": 5660 }, { "epoch": 148.23529411764707, "grad_norm": 1.3692547082901, "learning_rate": 0.00015919243366633126, "loss": 0.1168, "step": 5670 }, { "epoch": 148.4967320261438, "grad_norm": 0.70684415102005, "learning_rate": 0.00015905908421366962, "loss": 0.1165, "step": 5680 }, { "epoch": 148.75816993464053, "grad_norm": 0.9317061901092529, "learning_rate": 0.0001589255732957269, "loss": 0.1231, "step": 5690 }, { "epoch": 149.01960784313727, "grad_norm": 0.6531818509101868, "learning_rate": 0.00015879190127751684, "loss": 0.1257, "step": 5700 }, { "epoch": 149.281045751634, "grad_norm": 0.5326528549194336, "learning_rate": 0.00015865806852449367, "loss": 0.1136, "step": 5710 }, { "epoch": 149.54248366013073, "grad_norm": 0.5003425478935242, "learning_rate": 0.00015852407540255104, "loss": 0.123, "step": 5720 }, { "epoch": 149.80392156862746, "grad_norm": 0.4232426583766937, "learning_rate": 0.00015838992227802093, "loss": 0.1245, "step": 5730 }, { "epoch": 150.0653594771242, "grad_norm": 0.3918308615684509, "learning_rate": 0.00015825560951767298, "loss": 0.118, "step": 5740 }, { "epoch": 150.32679738562092, "grad_norm": 0.4566536545753479, "learning_rate": 0.00015812113748871304, "loss": 0.1194, "step": 5750 }, { "epoch": 150.58823529411765, "grad_norm": 0.4792901277542114, "learning_rate": 0.00015798650655878262, "loss": 0.1235, "step": 5760 }, { "epoch": 150.84967320261438, "grad_norm": 0.4351622462272644, "learning_rate": 0.00015785171709595743, "loss": 0.1183, "step": 5770 }, { "epoch": 151.11111111111111, "grad_norm": 0.4398731589317322, "learning_rate": 0.0001577167694687468, "loss": 0.1213, "step": 5780 }, { "epoch": 151.37254901960785, "grad_norm": 0.4514036476612091, "learning_rate": 0.00015758166404609232, "loss": 0.1184, "step": 5790 }, { "epoch": 151.63398692810458, "grad_norm": 0.46276119351387024, "learning_rate": 0.0001574464011973671, "loss": 0.1182, "step": 5800 }, { "epoch": 151.8954248366013, "grad_norm": 0.4101347327232361, "learning_rate": 0.00015731098129237458, "loss": 0.1245, "step": 5810 }, { "epoch": 152.15686274509804, "grad_norm": 0.45118212699890137, "learning_rate": 0.00015717540470134761, "loss": 0.1173, "step": 5820 }, { "epoch": 152.41830065359477, "grad_norm": 0.47819066047668457, "learning_rate": 0.00015703967179494748, "loss": 0.1193, "step": 5830 }, { "epoch": 152.6797385620915, "grad_norm": 0.4761189818382263, "learning_rate": 0.00015690378294426266, "loss": 0.1208, "step": 5840 }, { "epoch": 152.94117647058823, "grad_norm": 0.47973722219467163, "learning_rate": 0.00015676773852080813, "loss": 0.1205, "step": 5850 }, { "epoch": 153.20261437908496, "grad_norm": 0.4598498046398163, "learning_rate": 0.0001566315388965242, "loss": 0.119, "step": 5860 }, { "epoch": 153.4640522875817, "grad_norm": 0.42741018533706665, "learning_rate": 0.00015649518444377537, "loss": 0.1176, "step": 5870 }, { "epoch": 153.72549019607843, "grad_norm": 0.40698984265327454, "learning_rate": 0.00015635867553534955, "loss": 0.1215, "step": 5880 }, { "epoch": 153.98692810457516, "grad_norm": 0.35974419116973877, "learning_rate": 0.00015622201254445684, "loss": 0.1221, "step": 5890 }, { "epoch": 154.2483660130719, "grad_norm": 0.4019433856010437, "learning_rate": 0.0001560851958447287, "loss": 0.1168, "step": 5900 }, { "epoch": 154.50980392156862, "grad_norm": 0.4910339117050171, "learning_rate": 0.0001559482258102167, "loss": 0.1213, "step": 5910 }, { "epoch": 154.77124183006535, "grad_norm": 0.40200385451316833, "learning_rate": 0.00015581110281539173, "loss": 0.12, "step": 5920 }, { "epoch": 155.03267973856208, "grad_norm": 0.36135050654411316, "learning_rate": 0.0001556738272351428, "loss": 0.1166, "step": 5930 }, { "epoch": 155.2941176470588, "grad_norm": 0.41371849179267883, "learning_rate": 0.00015553639944477612, "loss": 0.1184, "step": 5940 }, { "epoch": 155.55555555555554, "grad_norm": 0.4281088411808014, "learning_rate": 0.000155398819820014, "loss": 0.1187, "step": 5950 }, { "epoch": 155.81699346405227, "grad_norm": 0.530092716217041, "learning_rate": 0.00015526108873699387, "loss": 0.1203, "step": 5960 }, { "epoch": 156.07843137254903, "grad_norm": 0.3755616843700409, "learning_rate": 0.00015512320657226728, "loss": 0.1175, "step": 5970 }, { "epoch": 156.33986928104576, "grad_norm": 0.35955289006233215, "learning_rate": 0.00015498517370279884, "loss": 0.1173, "step": 5980 }, { "epoch": 156.6013071895425, "grad_norm": 0.406409353017807, "learning_rate": 0.00015484699050596505, "loss": 0.1221, "step": 5990 }, { "epoch": 156.86274509803923, "grad_norm": 0.4339780807495117, "learning_rate": 0.00015470865735955357, "loss": 0.1183, "step": 6000 }, { "epoch": 157.12418300653596, "grad_norm": 0.35623541474342346, "learning_rate": 0.00015457017464176191, "loss": 0.1215, "step": 6010 }, { "epoch": 157.3856209150327, "grad_norm": 0.43511274456977844, "learning_rate": 0.0001544315427311965, "loss": 0.1169, "step": 6020 }, { "epoch": 157.64705882352942, "grad_norm": 0.4758042097091675, "learning_rate": 0.00015429276200687177, "loss": 0.1177, "step": 6030 }, { "epoch": 157.90849673202615, "grad_norm": 0.3972184360027313, "learning_rate": 0.00015415383284820888, "loss": 0.123, "step": 6040 }, { "epoch": 158.16993464052288, "grad_norm": 0.3831634223461151, "learning_rate": 0.0001540147556350348, "loss": 0.1187, "step": 6050 }, { "epoch": 158.4313725490196, "grad_norm": 0.3707033097743988, "learning_rate": 0.0001538755307475814, "loss": 0.1166, "step": 6060 }, { "epoch": 158.69281045751634, "grad_norm": 0.40588223934173584, "learning_rate": 0.00015373615856648418, "loss": 0.1201, "step": 6070 }, { "epoch": 158.95424836601308, "grad_norm": 0.5210367441177368, "learning_rate": 0.0001535966394727813, "loss": 0.1223, "step": 6080 }, { "epoch": 159.2156862745098, "grad_norm": 0.3465724587440491, "learning_rate": 0.00015345697384791274, "loss": 0.1162, "step": 6090 }, { "epoch": 159.47712418300654, "grad_norm": 0.48409783840179443, "learning_rate": 0.00015331716207371888, "loss": 0.1186, "step": 6100 }, { "epoch": 159.73856209150327, "grad_norm": 0.49288466572761536, "learning_rate": 0.00015317720453243981, "loss": 0.1201, "step": 6110 }, { "epoch": 160.0, "grad_norm": 0.41373661160469055, "learning_rate": 0.00015303710160671416, "loss": 0.1221, "step": 6120 }, { "epoch": 160.26143790849673, "grad_norm": 0.30801451206207275, "learning_rate": 0.00015289685367957792, "loss": 0.1187, "step": 6130 }, { "epoch": 160.52287581699346, "grad_norm": 0.4610970616340637, "learning_rate": 0.0001527564611344636, "loss": 0.1183, "step": 6140 }, { "epoch": 160.7843137254902, "grad_norm": 0.3737730383872986, "learning_rate": 0.000152615924355199, "loss": 0.1174, "step": 6150 }, { "epoch": 161.04575163398692, "grad_norm": 0.4367047846317291, "learning_rate": 0.00015247524372600637, "loss": 0.1245, "step": 6160 }, { "epoch": 161.30718954248366, "grad_norm": 0.4288366734981537, "learning_rate": 0.00015233441963150113, "loss": 0.1165, "step": 6170 }, { "epoch": 161.5686274509804, "grad_norm": 0.5186516642570496, "learning_rate": 0.00015219345245669105, "loss": 0.1196, "step": 6180 }, { "epoch": 161.83006535947712, "grad_norm": 0.32446616888046265, "learning_rate": 0.00015205234258697494, "loss": 0.1206, "step": 6190 }, { "epoch": 162.09150326797385, "grad_norm": 0.34923261404037476, "learning_rate": 0.00015191109040814176, "loss": 0.1184, "step": 6200 }, { "epoch": 162.35294117647058, "grad_norm": 0.4656030833721161, "learning_rate": 0.0001517696963063697, "loss": 0.1186, "step": 6210 }, { "epoch": 162.6143790849673, "grad_norm": 0.3549903631210327, "learning_rate": 0.0001516281606682247, "loss": 0.1204, "step": 6220 }, { "epoch": 162.87581699346404, "grad_norm": 0.3952346742153168, "learning_rate": 0.0001514864838806599, "loss": 0.1161, "step": 6230 }, { "epoch": 163.13725490196077, "grad_norm": 0.3911607563495636, "learning_rate": 0.0001513446663310141, "loss": 0.1198, "step": 6240 }, { "epoch": 163.3986928104575, "grad_norm": 0.346305787563324, "learning_rate": 0.00015120270840701124, "loss": 0.1118, "step": 6250 }, { "epoch": 163.66013071895424, "grad_norm": 0.42828211188316345, "learning_rate": 0.0001510606104967587, "loss": 0.1195, "step": 6260 }, { "epoch": 163.92156862745097, "grad_norm": 0.4532879590988159, "learning_rate": 0.00015091837298874682, "loss": 0.1241, "step": 6270 }, { "epoch": 164.18300653594773, "grad_norm": 0.33145442605018616, "learning_rate": 0.00015077599627184754, "loss": 0.116, "step": 6280 }, { "epoch": 164.44444444444446, "grad_norm": 0.4350489675998688, "learning_rate": 0.00015063348073531324, "loss": 0.1173, "step": 6290 }, { "epoch": 164.7058823529412, "grad_norm": 0.4127410054206848, "learning_rate": 0.00015049082676877614, "loss": 0.1209, "step": 6300 }, { "epoch": 164.96732026143792, "grad_norm": 0.36359113454818726, "learning_rate": 0.00015034803476224657, "loss": 0.1215, "step": 6310 }, { "epoch": 165.22875816993465, "grad_norm": 0.3537936210632324, "learning_rate": 0.00015020510510611255, "loss": 0.1203, "step": 6320 }, { "epoch": 165.49019607843138, "grad_norm": 0.33729109168052673, "learning_rate": 0.00015006203819113823, "loss": 0.1162, "step": 6330 }, { "epoch": 165.7516339869281, "grad_norm": 0.4218641221523285, "learning_rate": 0.00014991883440846308, "loss": 0.1162, "step": 6340 }, { "epoch": 166.01307189542484, "grad_norm": 0.44932490587234497, "learning_rate": 0.00014977549414960084, "loss": 0.1243, "step": 6350 }, { "epoch": 166.27450980392157, "grad_norm": 0.430530846118927, "learning_rate": 0.00014963201780643823, "loss": 0.1167, "step": 6360 }, { "epoch": 166.5359477124183, "grad_norm": 0.4142252504825592, "learning_rate": 0.00014948840577123416, "loss": 0.1172, "step": 6370 }, { "epoch": 166.79738562091504, "grad_norm": 0.2989576458930969, "learning_rate": 0.00014934465843661842, "loss": 0.1197, "step": 6380 }, { "epoch": 167.05882352941177, "grad_norm": 0.37099623680114746, "learning_rate": 0.00014920077619559073, "loss": 0.1207, "step": 6390 }, { "epoch": 167.3202614379085, "grad_norm": 0.3625079393386841, "learning_rate": 0.00014905675944151966, "loss": 0.1186, "step": 6400 }, { "epoch": 167.58169934640523, "grad_norm": 0.4216829836368561, "learning_rate": 0.00014891260856814148, "loss": 0.1154, "step": 6410 }, { "epoch": 167.84313725490196, "grad_norm": 0.44304612278938293, "learning_rate": 0.0001487683239695592, "loss": 0.1211, "step": 6420 }, { "epoch": 168.1045751633987, "grad_norm": 0.38862261176109314, "learning_rate": 0.00014862390604024144, "loss": 0.1166, "step": 6430 }, { "epoch": 168.36601307189542, "grad_norm": 0.38382863998413086, "learning_rate": 0.00014847935517502123, "loss": 0.1139, "step": 6440 }, { "epoch": 168.62745098039215, "grad_norm": 0.42605075240135193, "learning_rate": 0.00014833467176909515, "loss": 0.1209, "step": 6450 }, { "epoch": 168.88888888888889, "grad_norm": 0.3948115408420563, "learning_rate": 0.00014818985621802212, "loss": 0.12, "step": 6460 }, { "epoch": 169.15032679738562, "grad_norm": 0.33048856258392334, "learning_rate": 0.00014804490891772232, "loss": 0.1185, "step": 6470 }, { "epoch": 169.41176470588235, "grad_norm": 0.33838731050491333, "learning_rate": 0.00014789983026447612, "loss": 0.1139, "step": 6480 }, { "epoch": 169.67320261437908, "grad_norm": 0.40736544132232666, "learning_rate": 0.000147754620654923, "loss": 0.1212, "step": 6490 }, { "epoch": 169.9346405228758, "grad_norm": 0.5088180899620056, "learning_rate": 0.00014760928048606055, "loss": 0.1211, "step": 6500 }, { "epoch": 170.19607843137254, "grad_norm": 0.36517319083213806, "learning_rate": 0.00014746381015524323, "loss": 0.1204, "step": 6510 }, { "epoch": 170.45751633986927, "grad_norm": 0.4173334836959839, "learning_rate": 0.00014731821006018131, "loss": 0.1138, "step": 6520 }, { "epoch": 170.718954248366, "grad_norm": 0.3489775061607361, "learning_rate": 0.00014717248059893992, "loss": 0.1197, "step": 6530 }, { "epoch": 170.98039215686273, "grad_norm": 0.3684404492378235, "learning_rate": 0.00014702662216993785, "loss": 0.1226, "step": 6540 }, { "epoch": 171.24183006535947, "grad_norm": 0.3432365953922272, "learning_rate": 0.0001468806351719465, "loss": 0.1186, "step": 6550 }, { "epoch": 171.5032679738562, "grad_norm": 0.4222089350223541, "learning_rate": 0.0001467345200040887, "loss": 0.1193, "step": 6560 }, { "epoch": 171.76470588235293, "grad_norm": 0.43939515948295593, "learning_rate": 0.0001465882770658378, "loss": 0.1197, "step": 6570 }, { "epoch": 172.0261437908497, "grad_norm": 0.3959405720233917, "learning_rate": 0.00014644190675701632, "loss": 0.1188, "step": 6580 }, { "epoch": 172.28758169934642, "grad_norm": 0.3560381531715393, "learning_rate": 0.00014629540947779516, "loss": 0.111, "step": 6590 }, { "epoch": 172.54901960784315, "grad_norm": 0.31908681988716125, "learning_rate": 0.0001461487856286923, "loss": 0.1179, "step": 6600 }, { "epoch": 172.81045751633988, "grad_norm": 0.4550021290779114, "learning_rate": 0.0001460020356105717, "loss": 0.1229, "step": 6610 }, { "epoch": 173.0718954248366, "grad_norm": 0.31013500690460205, "learning_rate": 0.00014585515982464234, "loss": 0.1192, "step": 6620 }, { "epoch": 173.33333333333334, "grad_norm": 0.41469523310661316, "learning_rate": 0.00014570815867245696, "loss": 0.1183, "step": 6630 }, { "epoch": 173.59477124183007, "grad_norm": 0.40070420503616333, "learning_rate": 0.00014556103255591114, "loss": 0.1177, "step": 6640 }, { "epoch": 173.8562091503268, "grad_norm": 0.3658972978591919, "learning_rate": 0.0001454137818772421, "loss": 0.1203, "step": 6650 }, { "epoch": 174.11764705882354, "grad_norm": 0.4114379584789276, "learning_rate": 0.00014526640703902747, "loss": 0.1159, "step": 6660 }, { "epoch": 174.37908496732027, "grad_norm": 0.37510064244270325, "learning_rate": 0.00014511890844418453, "loss": 0.1205, "step": 6670 }, { "epoch": 174.640522875817, "grad_norm": 0.41856005787849426, "learning_rate": 0.00014497128649596875, "loss": 0.1169, "step": 6680 }, { "epoch": 174.90196078431373, "grad_norm": 0.35670095682144165, "learning_rate": 0.00014482354159797288, "loss": 0.1187, "step": 6690 }, { "epoch": 175.16339869281046, "grad_norm": 0.3349741995334625, "learning_rate": 0.0001446756741541259, "loss": 0.1191, "step": 6700 }, { "epoch": 175.4248366013072, "grad_norm": 0.4219711422920227, "learning_rate": 0.00014452768456869173, "loss": 0.1167, "step": 6710 }, { "epoch": 175.68627450980392, "grad_norm": 0.30478203296661377, "learning_rate": 0.0001443795732462682, "loss": 0.1172, "step": 6720 }, { "epoch": 175.94771241830065, "grad_norm": 0.3981144428253174, "learning_rate": 0.00014423134059178607, "loss": 0.121, "step": 6730 }, { "epoch": 176.20915032679738, "grad_norm": 0.3811021149158478, "learning_rate": 0.00014408298701050774, "loss": 0.1182, "step": 6740 }, { "epoch": 176.47058823529412, "grad_norm": 0.4203794002532959, "learning_rate": 0.00014393451290802619, "loss": 0.1191, "step": 6750 }, { "epoch": 176.73202614379085, "grad_norm": 0.38078659772872925, "learning_rate": 0.000143785918690264, "loss": 0.117, "step": 6760 }, { "epoch": 176.99346405228758, "grad_norm": 0.3496187627315521, "learning_rate": 0.0001436372047634721, "loss": 0.1197, "step": 6770 }, { "epoch": 177.2549019607843, "grad_norm": 0.3604697287082672, "learning_rate": 0.00014348837153422864, "loss": 0.1182, "step": 6780 }, { "epoch": 177.51633986928104, "grad_norm": 0.3979366421699524, "learning_rate": 0.000143339419409438, "loss": 0.1169, "step": 6790 }, { "epoch": 177.77777777777777, "grad_norm": 0.4077358543872833, "learning_rate": 0.00014319034879632962, "loss": 0.1189, "step": 6800 }, { "epoch": 178.0392156862745, "grad_norm": 0.377392053604126, "learning_rate": 0.00014304116010245685, "loss": 0.1205, "step": 6810 }, { "epoch": 178.30065359477123, "grad_norm": 0.9518114924430847, "learning_rate": 0.00014289185373569585, "loss": 0.1185, "step": 6820 }, { "epoch": 178.56209150326796, "grad_norm": 0.4851488173007965, "learning_rate": 0.00014274243010424457, "loss": 0.1222, "step": 6830 }, { "epoch": 178.8235294117647, "grad_norm": 0.4688913822174072, "learning_rate": 0.00014259288961662153, "loss": 0.1211, "step": 6840 }, { "epoch": 179.08496732026143, "grad_norm": 0.4016769528388977, "learning_rate": 0.00014244323268166467, "loss": 0.1216, "step": 6850 }, { "epoch": 179.34640522875816, "grad_norm": 0.661056637763977, "learning_rate": 0.00014229345970853032, "loss": 0.1203, "step": 6860 }, { "epoch": 179.6078431372549, "grad_norm": 0.8823976516723633, "learning_rate": 0.00014214357110669211, "loss": 0.1218, "step": 6870 }, { "epoch": 179.86928104575162, "grad_norm": 0.6820003390312195, "learning_rate": 0.00014199356728593977, "loss": 0.1225, "step": 6880 }, { "epoch": 180.13071895424838, "grad_norm": 1.0853325128555298, "learning_rate": 0.000141843448656378, "loss": 0.1259, "step": 6890 }, { "epoch": 180.3921568627451, "grad_norm": 0.9575023055076599, "learning_rate": 0.00014169321562842535, "loss": 0.1179, "step": 6900 }, { "epoch": 180.65359477124184, "grad_norm": 0.4306863248348236, "learning_rate": 0.00014154286861281325, "loss": 0.124, "step": 6910 }, { "epoch": 180.91503267973857, "grad_norm": 0.48212000727653503, "learning_rate": 0.00014139240802058464, "loss": 0.1216, "step": 6920 }, { "epoch": 181.1764705882353, "grad_norm": 0.7190878391265869, "learning_rate": 0.0001412418342630931, "loss": 0.121, "step": 6930 }, { "epoch": 181.43790849673204, "grad_norm": 0.6326178908348083, "learning_rate": 0.0001410911477520015, "loss": 0.122, "step": 6940 }, { "epoch": 181.69934640522877, "grad_norm": 0.6375879645347595, "learning_rate": 0.000140940348899281, "loss": 0.1231, "step": 6950 }, { "epoch": 181.9607843137255, "grad_norm": 0.8026982545852661, "learning_rate": 0.0001407894381172099, "loss": 0.1278, "step": 6960 }, { "epoch": 182.22222222222223, "grad_norm": 0.8561803102493286, "learning_rate": 0.00014063841581837255, "loss": 0.1169, "step": 6970 }, { "epoch": 182.48366013071896, "grad_norm": 0.6810368895530701, "learning_rate": 0.00014048728241565812, "loss": 0.1255, "step": 6980 }, { "epoch": 182.7450980392157, "grad_norm": 0.8623539209365845, "learning_rate": 0.00014033603832225956, "loss": 0.1241, "step": 6990 }, { "epoch": 183.00653594771242, "grad_norm": 0.5568687915802002, "learning_rate": 0.00014018468395167246, "loss": 0.1291, "step": 7000 }, { "epoch": 183.26797385620915, "grad_norm": 0.7413952946662903, "learning_rate": 0.00014003321971769385, "loss": 0.1186, "step": 7010 }, { "epoch": 183.52941176470588, "grad_norm": 0.39381900429725647, "learning_rate": 0.00013988164603442126, "loss": 0.1181, "step": 7020 }, { "epoch": 183.79084967320262, "grad_norm": 0.8067420721054077, "learning_rate": 0.00013972996331625126, "loss": 0.1233, "step": 7030 }, { "epoch": 184.05228758169935, "grad_norm": 0.4819212257862091, "learning_rate": 0.00013957817197787865, "loss": 0.1228, "step": 7040 }, { "epoch": 184.31372549019608, "grad_norm": 0.5741551518440247, "learning_rate": 0.00013942627243429512, "loss": 0.1186, "step": 7050 }, { "epoch": 184.5751633986928, "grad_norm": 0.44853687286376953, "learning_rate": 0.0001392742651007882, "loss": 0.1195, "step": 7060 }, { "epoch": 184.83660130718954, "grad_norm": 0.39852526783943176, "learning_rate": 0.00013912215039294028, "loss": 0.1197, "step": 7070 }, { "epoch": 185.09803921568627, "grad_norm": 0.3141544461250305, "learning_rate": 0.000138969928726627, "loss": 0.1196, "step": 7080 }, { "epoch": 185.359477124183, "grad_norm": 0.40422919392585754, "learning_rate": 0.00013881760051801667, "loss": 0.117, "step": 7090 }, { "epoch": 185.62091503267973, "grad_norm": 0.38428983092308044, "learning_rate": 0.00013866516618356875, "loss": 0.1165, "step": 7100 }, { "epoch": 185.88235294117646, "grad_norm": 0.42969101667404175, "learning_rate": 0.00013851262614003292, "loss": 0.1211, "step": 7110 }, { "epoch": 186.1437908496732, "grad_norm": 0.35507911443710327, "learning_rate": 0.0001383599808044478, "loss": 0.1167, "step": 7120 }, { "epoch": 186.40522875816993, "grad_norm": 0.3685397207736969, "learning_rate": 0.00013820723059413995, "loss": 0.1169, "step": 7130 }, { "epoch": 186.66666666666666, "grad_norm": 0.4494422972202301, "learning_rate": 0.00013805437592672262, "loss": 0.1144, "step": 7140 }, { "epoch": 186.9281045751634, "grad_norm": 0.5370398759841919, "learning_rate": 0.00013790141722009458, "loss": 0.1222, "step": 7150 }, { "epoch": 187.18954248366012, "grad_norm": 0.4284844994544983, "learning_rate": 0.00013774835489243912, "loss": 0.1185, "step": 7160 }, { "epoch": 187.45098039215685, "grad_norm": 0.5446126461029053, "learning_rate": 0.0001375951893622228, "loss": 0.1202, "step": 7170 }, { "epoch": 187.71241830065358, "grad_norm": 0.388539582490921, "learning_rate": 0.00013744192104819437, "loss": 0.118, "step": 7180 }, { "epoch": 187.9738562091503, "grad_norm": 0.3365419805049896, "learning_rate": 0.00013728855036938348, "loss": 0.1169, "step": 7190 }, { "epoch": 188.23529411764707, "grad_norm": 0.36737489700317383, "learning_rate": 0.00013713507774509973, "loss": 0.1145, "step": 7200 }, { "epoch": 188.4967320261438, "grad_norm": 0.44414252042770386, "learning_rate": 0.00013698150359493143, "loss": 0.1166, "step": 7210 }, { "epoch": 188.75816993464053, "grad_norm": 0.3925827443599701, "learning_rate": 0.00013682782833874442, "loss": 0.1198, "step": 7220 }, { "epoch": 189.01960784313727, "grad_norm": 0.46063002943992615, "learning_rate": 0.00013667405239668106, "loss": 0.1211, "step": 7230 }, { "epoch": 189.281045751634, "grad_norm": 0.33849823474884033, "learning_rate": 0.0001365201761891588, "loss": 0.1114, "step": 7240 }, { "epoch": 189.54248366013073, "grad_norm": 0.4201454222202301, "learning_rate": 0.00013636620013686936, "loss": 0.1176, "step": 7250 }, { "epoch": 189.80392156862746, "grad_norm": 0.3787872791290283, "learning_rate": 0.00013621212466077736, "loss": 0.1181, "step": 7260 }, { "epoch": 190.0653594771242, "grad_norm": 0.3665624260902405, "learning_rate": 0.00013605795018211932, "loss": 0.1196, "step": 7270 }, { "epoch": 190.32679738562092, "grad_norm": 0.3760620355606079, "learning_rate": 0.0001359036771224024, "loss": 0.1164, "step": 7280 }, { "epoch": 190.58823529411765, "grad_norm": 0.4009071886539459, "learning_rate": 0.00013574930590340314, "loss": 0.1208, "step": 7290 }, { "epoch": 190.84967320261438, "grad_norm": 0.3013065755367279, "learning_rate": 0.00013559483694716663, "loss": 0.119, "step": 7300 }, { "epoch": 191.11111111111111, "grad_norm": 0.3610047996044159, "learning_rate": 0.00013544027067600512, "loss": 0.115, "step": 7310 }, { "epoch": 191.37254901960785, "grad_norm": 0.3881337344646454, "learning_rate": 0.00013528560751249687, "loss": 0.1157, "step": 7320 }, { "epoch": 191.63398692810458, "grad_norm": 0.36018821597099304, "learning_rate": 0.00013513084787948504, "loss": 0.1158, "step": 7330 }, { "epoch": 191.8954248366013, "grad_norm": 0.4123559296131134, "learning_rate": 0.00013497599220007656, "loss": 0.1193, "step": 7340 }, { "epoch": 192.15686274509804, "grad_norm": 0.35036417841911316, "learning_rate": 0.00013482104089764096, "loss": 0.117, "step": 7350 }, { "epoch": 192.41830065359477, "grad_norm": 0.4409710168838501, "learning_rate": 0.0001346659943958092, "loss": 0.1147, "step": 7360 }, { "epoch": 192.6797385620915, "grad_norm": 0.3965952694416046, "learning_rate": 0.0001345108531184725, "loss": 0.1178, "step": 7370 }, { "epoch": 192.94117647058823, "grad_norm": 0.34038016200065613, "learning_rate": 0.00013435561748978113, "loss": 0.1216, "step": 7380 }, { "epoch": 193.20261437908496, "grad_norm": 0.30114054679870605, "learning_rate": 0.00013420028793414344, "loss": 0.1153, "step": 7390 }, { "epoch": 193.4640522875817, "grad_norm": 0.34901145100593567, "learning_rate": 0.00013404486487622442, "loss": 0.1176, "step": 7400 }, { "epoch": 193.72549019607843, "grad_norm": 0.4058627784252167, "learning_rate": 0.00013388934874094489, "loss": 0.1168, "step": 7410 }, { "epoch": 193.98692810457516, "grad_norm": 0.31916511058807373, "learning_rate": 0.00013373373995347995, "loss": 0.1214, "step": 7420 }, { "epoch": 194.2483660130719, "grad_norm": 0.3582738935947418, "learning_rate": 0.00013357803893925807, "loss": 0.1155, "step": 7430 }, { "epoch": 194.50980392156862, "grad_norm": 0.3602858781814575, "learning_rate": 0.00013342224612395993, "loss": 0.121, "step": 7440 }, { "epoch": 194.77124183006535, "grad_norm": 0.30642154812812805, "learning_rate": 0.0001332663619335171, "loss": 0.1149, "step": 7450 }, { "epoch": 195.03267973856208, "grad_norm": 0.32698705792427063, "learning_rate": 0.00013311038679411104, "loss": 0.1184, "step": 7460 }, { "epoch": 195.2941176470588, "grad_norm": 0.34545981884002686, "learning_rate": 0.00013295432113217176, "loss": 0.1139, "step": 7470 }, { "epoch": 195.55555555555554, "grad_norm": 0.2823973298072815, "learning_rate": 0.00013279816537437687, "loss": 0.1156, "step": 7480 }, { "epoch": 195.81699346405227, "grad_norm": 0.3787044882774353, "learning_rate": 0.00013264191994765028, "loss": 0.1199, "step": 7490 }, { "epoch": 196.07843137254903, "grad_norm": 0.2924850583076477, "learning_rate": 0.00013248558527916094, "loss": 0.1169, "step": 7500 }, { "epoch": 196.33986928104576, "grad_norm": 0.307283490896225, "learning_rate": 0.00013232916179632193, "loss": 0.1156, "step": 7510 }, { "epoch": 196.6013071895425, "grad_norm": 0.3378380835056305, "learning_rate": 0.00013217264992678907, "loss": 0.1156, "step": 7520 }, { "epoch": 196.86274509803923, "grad_norm": 0.3563665449619293, "learning_rate": 0.00013201605009845977, "loss": 0.1197, "step": 7530 }, { "epoch": 197.12418300653596, "grad_norm": 0.32628506422042847, "learning_rate": 0.00013185936273947207, "loss": 0.118, "step": 7540 }, { "epoch": 197.3856209150327, "grad_norm": 0.4200800657272339, "learning_rate": 0.00013170258827820318, "loss": 0.1185, "step": 7550 }, { "epoch": 197.64705882352942, "grad_norm": 0.39217332005500793, "learning_rate": 0.00013154572714326848, "loss": 0.1141, "step": 7560 }, { "epoch": 197.90849673202615, "grad_norm": 0.37061387300491333, "learning_rate": 0.00013138877976352035, "loss": 0.1179, "step": 7570 }, { "epoch": 198.16993464052288, "grad_norm": 0.311758816242218, "learning_rate": 0.00013123174656804693, "loss": 0.1191, "step": 7580 }, { "epoch": 198.4313725490196, "grad_norm": 0.35725489258766174, "learning_rate": 0.00013107462798617097, "loss": 0.1135, "step": 7590 }, { "epoch": 198.69281045751634, "grad_norm": 0.35550937056541443, "learning_rate": 0.0001309174244474487, "loss": 0.117, "step": 7600 }, { "epoch": 198.95424836601308, "grad_norm": 0.3795972466468811, "learning_rate": 0.00013076013638166852, "loss": 0.1215, "step": 7610 }, { "epoch": 199.2156862745098, "grad_norm": 0.3138248026371002, "learning_rate": 0.0001306027642188501, "loss": 0.1133, "step": 7620 }, { "epoch": 199.47712418300654, "grad_norm": 0.4113026559352875, "learning_rate": 0.00013044530838924283, "loss": 0.1178, "step": 7630 }, { "epoch": 199.73856209150327, "grad_norm": 0.3749026358127594, "learning_rate": 0.00013028776932332497, "loss": 0.1183, "step": 7640 }, { "epoch": 200.0, "grad_norm": 0.4342474043369293, "learning_rate": 0.00013013014745180237, "loss": 0.1204, "step": 7650 }, { "epoch": 200.26143790849673, "grad_norm": 0.348385214805603, "learning_rate": 0.0001299724432056071, "loss": 0.1152, "step": 7660 }, { "epoch": 200.52287581699346, "grad_norm": 0.4169091284275055, "learning_rate": 0.00012981465701589664, "loss": 0.1179, "step": 7670 }, { "epoch": 200.7843137254902, "grad_norm": 0.36309823393821716, "learning_rate": 0.00012965678931405232, "loss": 0.1173, "step": 7680 }, { "epoch": 201.04575163398692, "grad_norm": 0.3717479705810547, "learning_rate": 0.00012949884053167846, "loss": 0.1168, "step": 7690 }, { "epoch": 201.30718954248366, "grad_norm": 0.3208405673503876, "learning_rate": 0.00012934081110060105, "loss": 0.1159, "step": 7700 }, { "epoch": 201.5686274509804, "grad_norm": 0.348286509513855, "learning_rate": 0.00012918270145286642, "loss": 0.1153, "step": 7710 }, { "epoch": 201.83006535947712, "grad_norm": 0.418100506067276, "learning_rate": 0.00012902451202074038, "loss": 0.1165, "step": 7720 }, { "epoch": 202.09150326797385, "grad_norm": 0.3307877480983734, "learning_rate": 0.00012886624323670676, "loss": 0.121, "step": 7730 }, { "epoch": 202.35294117647058, "grad_norm": 0.45300108194351196, "learning_rate": 0.0001287078955334664, "loss": 0.1147, "step": 7740 }, { "epoch": 202.6143790849673, "grad_norm": 0.37652915716171265, "learning_rate": 0.00012854946934393586, "loss": 0.1165, "step": 7750 }, { "epoch": 202.87581699346404, "grad_norm": 0.35546672344207764, "learning_rate": 0.0001283909651012463, "loss": 0.1186, "step": 7760 }, { "epoch": 203.13725490196077, "grad_norm": 0.3252201974391937, "learning_rate": 0.00012823238323874224, "loss": 0.1182, "step": 7770 }, { "epoch": 203.3986928104575, "grad_norm": 0.36323535442352295, "learning_rate": 0.00012807372418998045, "loss": 0.1143, "step": 7780 }, { "epoch": 203.66013071895424, "grad_norm": 0.37268149852752686, "learning_rate": 0.00012791498838872874, "loss": 0.1175, "step": 7790 }, { "epoch": 203.92156862745097, "grad_norm": 0.3726952075958252, "learning_rate": 0.00012775617626896468, "loss": 0.1182, "step": 7800 }, { "epoch": 204.18300653594773, "grad_norm": 0.34012719988822937, "learning_rate": 0.0001275972882648746, "loss": 0.1178, "step": 7810 }, { "epoch": 204.44444444444446, "grad_norm": 0.3655106723308563, "learning_rate": 0.0001274383248108522, "loss": 0.1148, "step": 7820 }, { "epoch": 204.7058823529412, "grad_norm": 0.3548455238342285, "learning_rate": 0.00012727928634149744, "loss": 0.1191, "step": 7830 }, { "epoch": 204.96732026143792, "grad_norm": 0.3439328670501709, "learning_rate": 0.00012712017329161553, "loss": 0.1176, "step": 7840 }, { "epoch": 205.22875816993465, "grad_norm": 0.3686840534210205, "learning_rate": 0.00012696098609621542, "loss": 0.1167, "step": 7850 }, { "epoch": 205.49019607843138, "grad_norm": 0.39468914270401, "learning_rate": 0.00012680172519050883, "loss": 0.1162, "step": 7860 }, { "epoch": 205.7516339869281, "grad_norm": 0.31318965554237366, "learning_rate": 0.00012664239100990897, "loss": 0.1176, "step": 7870 }, { "epoch": 206.01307189542484, "grad_norm": 0.3187226355075836, "learning_rate": 0.00012648298399002946, "loss": 0.1186, "step": 7880 }, { "epoch": 206.27450980392157, "grad_norm": 0.350243479013443, "learning_rate": 0.0001263235045666829, "loss": 0.1142, "step": 7890 }, { "epoch": 206.5359477124183, "grad_norm": 0.3626524806022644, "learning_rate": 0.00012616395317588007, "loss": 0.1152, "step": 7900 }, { "epoch": 206.79738562091504, "grad_norm": 0.3574896454811096, "learning_rate": 0.00012600433025382833, "loss": 0.1173, "step": 7910 }, { "epoch": 207.05882352941177, "grad_norm": 0.3026224374771118, "learning_rate": 0.00012584463623693064, "loss": 0.1206, "step": 7920 }, { "epoch": 207.3202614379085, "grad_norm": 0.3898176848888397, "learning_rate": 0.00012568487156178434, "loss": 0.1132, "step": 7930 }, { "epoch": 207.58169934640523, "grad_norm": 0.3034379780292511, "learning_rate": 0.00012552503666517998, "loss": 0.1162, "step": 7940 }, { "epoch": 207.84313725490196, "grad_norm": 0.38479849696159363, "learning_rate": 0.00012536513198410006, "loss": 0.1191, "step": 7950 }, { "epoch": 208.1045751633987, "grad_norm": 0.4212307631969452, "learning_rate": 0.00012520515795571785, "loss": 0.1182, "step": 7960 }, { "epoch": 208.36601307189542, "grad_norm": 0.44150879979133606, "learning_rate": 0.00012504511501739622, "loss": 0.1161, "step": 7970 }, { "epoch": 208.62745098039215, "grad_norm": 0.49862542748451233, "learning_rate": 0.0001248850036066865, "loss": 0.1186, "step": 7980 }, { "epoch": 208.88888888888889, "grad_norm": 0.37723541259765625, "learning_rate": 0.00012472482416132712, "loss": 0.1159, "step": 7990 }, { "epoch": 209.15032679738562, "grad_norm": 0.39432400465011597, "learning_rate": 0.00012456457711924266, "loss": 0.1139, "step": 8000 }, { "epoch": 209.41176470588235, "grad_norm": 0.320744127035141, "learning_rate": 0.0001244042629185423, "loss": 0.1138, "step": 8010 }, { "epoch": 209.67320261437908, "grad_norm": 0.37809300422668457, "learning_rate": 0.00012424388199751903, "loss": 0.1164, "step": 8020 }, { "epoch": 209.9346405228758, "grad_norm": 0.3541184961795807, "learning_rate": 0.0001240834347946481, "loss": 0.1208, "step": 8030 }, { "epoch": 210.19607843137254, "grad_norm": 0.38199830055236816, "learning_rate": 0.00012392292174858606, "loss": 0.1171, "step": 8040 }, { "epoch": 210.45751633986927, "grad_norm": 0.30296802520751953, "learning_rate": 0.00012376234329816949, "loss": 0.1173, "step": 8050 }, { "epoch": 210.718954248366, "grad_norm": 0.38834238052368164, "learning_rate": 0.00012360169988241367, "loss": 0.1184, "step": 8060 }, { "epoch": 210.98039215686273, "grad_norm": 0.3523692190647125, "learning_rate": 0.0001234409919405116, "loss": 0.1163, "step": 8070 }, { "epoch": 211.24183006535947, "grad_norm": 0.31442514061927795, "learning_rate": 0.0001232802199118327, "loss": 0.1154, "step": 8080 }, { "epoch": 211.5032679738562, "grad_norm": 0.365710586309433, "learning_rate": 0.00012311938423592152, "loss": 0.1143, "step": 8090 }, { "epoch": 211.76470588235293, "grad_norm": 0.36913615465164185, "learning_rate": 0.00012295848535249658, "loss": 0.1169, "step": 8100 }, { "epoch": 212.0261437908497, "grad_norm": 0.2978919744491577, "learning_rate": 0.0001227975237014494, "loss": 0.1208, "step": 8110 }, { "epoch": 212.28758169934642, "grad_norm": 0.3304121494293213, "learning_rate": 0.00012263649972284294, "loss": 0.1137, "step": 8120 }, { "epoch": 212.54901960784315, "grad_norm": 0.3513433337211609, "learning_rate": 0.00012247541385691058, "loss": 0.1198, "step": 8130 }, { "epoch": 212.81045751633988, "grad_norm": 0.4109828472137451, "learning_rate": 0.000122314266544055, "loss": 0.1185, "step": 8140 }, { "epoch": 213.0718954248366, "grad_norm": 0.2574869692325592, "learning_rate": 0.00012215305822484672, "loss": 0.1133, "step": 8150 }, { "epoch": 213.33333333333334, "grad_norm": 0.307307630777359, "learning_rate": 0.00012199178934002317, "loss": 0.1153, "step": 8160 }, { "epoch": 213.59477124183007, "grad_norm": 0.3619244396686554, "learning_rate": 0.00012183046033048736, "loss": 0.1173, "step": 8170 }, { "epoch": 213.8562091503268, "grad_norm": 0.39120060205459595, "learning_rate": 0.00012166907163730656, "loss": 0.1166, "step": 8180 }, { "epoch": 214.11764705882354, "grad_norm": 0.3365266025066376, "learning_rate": 0.00012150762370171136, "loss": 0.1201, "step": 8190 }, { "epoch": 214.37908496732027, "grad_norm": 0.4538247883319855, "learning_rate": 0.00012134611696509419, "loss": 0.1157, "step": 8200 }, { "epoch": 214.640522875817, "grad_norm": 0.3183276951313019, "learning_rate": 0.00012118455186900836, "loss": 0.1128, "step": 8210 }, { "epoch": 214.90196078431373, "grad_norm": 0.8887193202972412, "learning_rate": 0.00012102292885516666, "loss": 0.1214, "step": 8220 }, { "epoch": 215.16339869281046, "grad_norm": 0.3512011766433716, "learning_rate": 0.00012086124836544024, "loss": 0.1171, "step": 8230 }, { "epoch": 215.4248366013072, "grad_norm": 0.3707999289035797, "learning_rate": 0.00012069951084185733, "loss": 0.1164, "step": 8240 }, { "epoch": 215.68627450980392, "grad_norm": 0.3448478877544403, "learning_rate": 0.00012053771672660221, "loss": 0.1188, "step": 8250 }, { "epoch": 215.94771241830065, "grad_norm": 0.3120673596858978, "learning_rate": 0.00012037586646201378, "loss": 0.1177, "step": 8260 }, { "epoch": 216.20915032679738, "grad_norm": 0.46193230152130127, "learning_rate": 0.00012021396049058451, "loss": 0.1163, "step": 8270 }, { "epoch": 216.47058823529412, "grad_norm": 0.34690043330192566, "learning_rate": 0.00012005199925495914, "loss": 0.1157, "step": 8280 }, { "epoch": 216.73202614379085, "grad_norm": 0.4091133177280426, "learning_rate": 0.00011988998319793346, "loss": 0.1175, "step": 8290 }, { "epoch": 216.99346405228758, "grad_norm": 0.32104718685150146, "learning_rate": 0.00011972791276245321, "loss": 0.1182, "step": 8300 }, { "epoch": 217.2549019607843, "grad_norm": 0.3396114408969879, "learning_rate": 0.00011956578839161279, "loss": 0.1178, "step": 8310 }, { "epoch": 217.51633986928104, "grad_norm": 0.39414018392562866, "learning_rate": 0.00011940361052865401, "loss": 0.1184, "step": 8320 }, { "epoch": 217.77777777777777, "grad_norm": 0.3328598141670227, "learning_rate": 0.000119241379616965, "loss": 0.114, "step": 8330 }, { "epoch": 218.0392156862745, "grad_norm": 0.334634929895401, "learning_rate": 0.00011907909610007884, "loss": 0.116, "step": 8340 }, { "epoch": 218.30065359477123, "grad_norm": 0.32335221767425537, "learning_rate": 0.00011891676042167246, "loss": 0.1176, "step": 8350 }, { "epoch": 218.56209150326796, "grad_norm": 0.2827315926551819, "learning_rate": 0.00011875437302556543, "loss": 0.1162, "step": 8360 }, { "epoch": 218.8235294117647, "grad_norm": 0.3528691828250885, "learning_rate": 0.0001185919343557187, "loss": 0.1164, "step": 8370 }, { "epoch": 219.08496732026143, "grad_norm": 0.28620168566703796, "learning_rate": 0.00011842944485623335, "loss": 0.1172, "step": 8380 }, { "epoch": 219.34640522875816, "grad_norm": 0.38785773515701294, "learning_rate": 0.0001182669049713495, "loss": 0.1156, "step": 8390 }, { "epoch": 219.6078431372549, "grad_norm": 0.30017638206481934, "learning_rate": 0.00011810431514544496, "loss": 0.1149, "step": 8400 }, { "epoch": 219.86928104575162, "grad_norm": 0.3550892770290375, "learning_rate": 0.00011794167582303412, "loss": 0.1184, "step": 8410 }, { "epoch": 220.13071895424838, "grad_norm": 0.2966924011707306, "learning_rate": 0.00011777898744876673, "loss": 0.1166, "step": 8420 }, { "epoch": 220.3921568627451, "grad_norm": 0.2973492443561554, "learning_rate": 0.00011761625046742651, "loss": 0.1152, "step": 8430 }, { "epoch": 220.65359477124184, "grad_norm": 0.3460974395275116, "learning_rate": 0.00011745346532393017, "loss": 0.115, "step": 8440 }, { "epoch": 220.91503267973857, "grad_norm": 0.3842596113681793, "learning_rate": 0.0001172906324633261, "loss": 0.118, "step": 8450 }, { "epoch": 221.1764705882353, "grad_norm": 0.3122817575931549, "learning_rate": 0.00011712775233079311, "loss": 0.1159, "step": 8460 }, { "epoch": 221.43790849673204, "grad_norm": 0.3846839368343353, "learning_rate": 0.00011696482537163933, "loss": 0.1144, "step": 8470 }, { "epoch": 221.69934640522877, "grad_norm": 0.3940717875957489, "learning_rate": 0.00011680185203130075, "loss": 0.1156, "step": 8480 }, { "epoch": 221.9607843137255, "grad_norm": 0.35572728514671326, "learning_rate": 0.00011663883275534029, "loss": 0.1221, "step": 8490 }, { "epoch": 222.22222222222223, "grad_norm": 0.2821555733680725, "learning_rate": 0.0001164757679894464, "loss": 0.1159, "step": 8500 }, { "epoch": 222.48366013071896, "grad_norm": 0.4151257574558258, "learning_rate": 0.00011631265817943198, "loss": 0.1136, "step": 8510 }, { "epoch": 222.7450980392157, "grad_norm": 0.3188461661338806, "learning_rate": 0.000116149503771233, "loss": 0.1156, "step": 8520 }, { "epoch": 223.00653594771242, "grad_norm": 0.3545646667480469, "learning_rate": 0.00011598630521090734, "loss": 0.1183, "step": 8530 }, { "epoch": 223.26797385620915, "grad_norm": 0.3330908417701721, "learning_rate": 0.00011582306294463372, "loss": 0.1144, "step": 8540 }, { "epoch": 223.52941176470588, "grad_norm": 0.3198162913322449, "learning_rate": 0.00011565977741871018, "loss": 0.1182, "step": 8550 }, { "epoch": 223.79084967320262, "grad_norm": 0.38695403933525085, "learning_rate": 0.00011549644907955315, "loss": 0.1156, "step": 8560 }, { "epoch": 224.05228758169935, "grad_norm": 0.3442593514919281, "learning_rate": 0.00011533307837369607, "loss": 0.1191, "step": 8570 }, { "epoch": 224.31372549019608, "grad_norm": 0.3656689524650574, "learning_rate": 0.00011516966574778822, "loss": 0.1154, "step": 8580 }, { "epoch": 224.5751633986928, "grad_norm": 0.3869684636592865, "learning_rate": 0.00011500621164859347, "loss": 0.1176, "step": 8590 }, { "epoch": 224.83660130718954, "grad_norm": 0.36732858419418335, "learning_rate": 0.00011484271652298906, "loss": 0.1121, "step": 8600 }, { "epoch": 225.09803921568627, "grad_norm": 0.35245758295059204, "learning_rate": 0.00011467918081796445, "loss": 0.1189, "step": 8610 }, { "epoch": 225.359477124183, "grad_norm": 0.3855646848678589, "learning_rate": 0.00011451560498062, "loss": 0.1169, "step": 8620 }, { "epoch": 225.62091503267973, "grad_norm": 0.28450724482536316, "learning_rate": 0.00011435198945816584, "loss": 0.1142, "step": 8630 }, { "epoch": 225.88235294117646, "grad_norm": 0.3665756583213806, "learning_rate": 0.00011418833469792047, "loss": 0.1202, "step": 8640 }, { "epoch": 226.1437908496732, "grad_norm": 0.23904922604560852, "learning_rate": 0.00011402464114730989, "loss": 0.1133, "step": 8650 }, { "epoch": 226.40522875816993, "grad_norm": 0.3275870382785797, "learning_rate": 0.0001138609092538659, "loss": 0.1138, "step": 8660 }, { "epoch": 226.66666666666666, "grad_norm": 0.3351685404777527, "learning_rate": 0.00011369713946522532, "loss": 0.1143, "step": 8670 }, { "epoch": 226.9281045751634, "grad_norm": 0.29419705271720886, "learning_rate": 0.00011353333222912843, "loss": 0.1198, "step": 8680 }, { "epoch": 227.18954248366012, "grad_norm": 0.3791927993297577, "learning_rate": 0.00011336948799341798, "loss": 0.116, "step": 8690 }, { "epoch": 227.45098039215685, "grad_norm": 0.487420916557312, "learning_rate": 0.00011320560720603792, "loss": 0.1183, "step": 8700 }, { "epoch": 227.71241830065358, "grad_norm": 0.2873092591762543, "learning_rate": 0.00011304169031503197, "loss": 0.1146, "step": 8710 }, { "epoch": 227.9738562091503, "grad_norm": 0.3828370273113251, "learning_rate": 0.00011287773776854273, "loss": 0.1202, "step": 8720 }, { "epoch": 228.23529411764707, "grad_norm": 0.3244374990463257, "learning_rate": 0.00011271375001481015, "loss": 0.1141, "step": 8730 }, { "epoch": 228.4967320261438, "grad_norm": 0.3474271893501282, "learning_rate": 0.0001125497275021705, "loss": 0.1144, "step": 8740 }, { "epoch": 228.75816993464053, "grad_norm": 0.43531376123428345, "learning_rate": 0.00011238567067905507, "loss": 0.1183, "step": 8750 }, { "epoch": 229.01960784313727, "grad_norm": 0.27749860286712646, "learning_rate": 0.00011222157999398895, "loss": 0.1166, "step": 8760 }, { "epoch": 229.281045751634, "grad_norm": 0.35005658864974976, "learning_rate": 0.00011205745589558982, "loss": 0.1135, "step": 8770 }, { "epoch": 229.54248366013073, "grad_norm": 0.3147236704826355, "learning_rate": 0.00011189329883256668, "loss": 0.1161, "step": 8780 }, { "epoch": 229.80392156862746, "grad_norm": 0.3600068688392639, "learning_rate": 0.00011172910925371865, "loss": 0.121, "step": 8790 }, { "epoch": 230.0653594771242, "grad_norm": 0.2795291543006897, "learning_rate": 0.00011156488760793383, "loss": 0.1145, "step": 8800 }, { "epoch": 230.32679738562092, "grad_norm": 0.33064013719558716, "learning_rate": 0.00011140063434418788, "loss": 0.1133, "step": 8810 }, { "epoch": 230.58823529411765, "grad_norm": 0.38541939854621887, "learning_rate": 0.00011123634991154294, "loss": 0.1172, "step": 8820 }, { "epoch": 230.84967320261438, "grad_norm": 0.4045771062374115, "learning_rate": 0.00011107203475914643, "loss": 0.1183, "step": 8830 }, { "epoch": 231.11111111111111, "grad_norm": 0.2556995153427124, "learning_rate": 0.00011090768933622966, "loss": 0.1141, "step": 8840 }, { "epoch": 231.37254901960785, "grad_norm": 0.2857918441295624, "learning_rate": 0.00011074331409210677, "loss": 0.1128, "step": 8850 }, { "epoch": 231.63398692810458, "grad_norm": 0.4650552570819855, "learning_rate": 0.00011057890947617338, "loss": 0.1198, "step": 8860 }, { "epoch": 231.8954248366013, "grad_norm": 0.31443580985069275, "learning_rate": 0.00011041447593790544, "loss": 0.1166, "step": 8870 }, { "epoch": 232.15686274509804, "grad_norm": 0.32467028498649597, "learning_rate": 0.00011025001392685794, "loss": 0.1167, "step": 8880 }, { "epoch": 232.41830065359477, "grad_norm": 0.2936582565307617, "learning_rate": 0.00011008552389266376, "loss": 0.1145, "step": 8890 }, { "epoch": 232.6797385620915, "grad_norm": 0.31260111927986145, "learning_rate": 0.0001099210062850324, "loss": 0.1147, "step": 8900 }, { "epoch": 232.94117647058823, "grad_norm": 0.2891233265399933, "learning_rate": 0.00010975646155374868, "loss": 0.1194, "step": 8910 }, { "epoch": 233.20261437908496, "grad_norm": 0.3409523367881775, "learning_rate": 0.00010959189014867161, "loss": 0.1156, "step": 8920 }, { "epoch": 233.4640522875817, "grad_norm": 0.29765674471855164, "learning_rate": 0.00010942729251973313, "loss": 0.1151, "step": 8930 }, { "epoch": 233.72549019607843, "grad_norm": 0.3181908428668976, "learning_rate": 0.0001092626691169369, "loss": 0.1181, "step": 8940 }, { "epoch": 233.98692810457516, "grad_norm": 0.3303356170654297, "learning_rate": 0.00010909802039035701, "loss": 0.1163, "step": 8950 }, { "epoch": 234.2483660130719, "grad_norm": 0.3642708957195282, "learning_rate": 0.00010893334679013676, "loss": 0.1132, "step": 8960 }, { "epoch": 234.50980392156862, "grad_norm": 0.36728131771087646, "learning_rate": 0.00010876864876648751, "loss": 0.1159, "step": 8970 }, { "epoch": 234.77124183006535, "grad_norm": 0.34145310521125793, "learning_rate": 0.00010860392676968736, "loss": 0.1168, "step": 8980 }, { "epoch": 235.03267973856208, "grad_norm": 0.34180009365081787, "learning_rate": 0.00010843918125007995, "loss": 0.1201, "step": 8990 }, { "epoch": 235.2941176470588, "grad_norm": 0.3454407751560211, "learning_rate": 0.00010827441265807328, "loss": 0.1149, "step": 9000 }, { "epoch": 235.55555555555554, "grad_norm": 0.3704677224159241, "learning_rate": 0.00010810962144413834, "loss": 0.1117, "step": 9010 }, { "epoch": 235.81699346405227, "grad_norm": 0.3329799175262451, "learning_rate": 0.00010794480805880804, "loss": 0.1181, "step": 9020 }, { "epoch": 236.07843137254903, "grad_norm": 0.3676550090312958, "learning_rate": 0.00010777997295267588, "loss": 0.1148, "step": 9030 }, { "epoch": 236.33986928104576, "grad_norm": 0.34352195262908936, "learning_rate": 0.00010761511657639474, "loss": 0.1154, "step": 9040 }, { "epoch": 236.6013071895425, "grad_norm": 0.3431718349456787, "learning_rate": 0.00010745023938067569, "loss": 0.1154, "step": 9050 }, { "epoch": 236.86274509803923, "grad_norm": 0.37354880571365356, "learning_rate": 0.00010728534181628665, "loss": 0.1186, "step": 9060 }, { "epoch": 237.12418300653596, "grad_norm": 0.2959931790828705, "learning_rate": 0.00010712042433405125, "loss": 0.1153, "step": 9070 }, { "epoch": 237.3856209150327, "grad_norm": 0.3524629771709442, "learning_rate": 0.00010695548738484762, "loss": 0.1135, "step": 9080 }, { "epoch": 237.64705882352942, "grad_norm": 0.37540528178215027, "learning_rate": 0.00010679053141960709, "loss": 0.1199, "step": 9090 }, { "epoch": 237.90849673202615, "grad_norm": 0.4271695017814636, "learning_rate": 0.00010662555688931295, "loss": 0.119, "step": 9100 }, { "epoch": 238.16993464052288, "grad_norm": 0.3042963147163391, "learning_rate": 0.00010646056424499926, "loss": 0.1134, "step": 9110 }, { "epoch": 238.4313725490196, "grad_norm": 0.3255995512008667, "learning_rate": 0.00010629555393774962, "loss": 0.1155, "step": 9120 }, { "epoch": 238.69281045751634, "grad_norm": 0.3537774980068207, "learning_rate": 0.00010613052641869592, "loss": 0.1181, "step": 9130 }, { "epoch": 238.95424836601308, "grad_norm": 0.34757131338119507, "learning_rate": 0.00010596548213901708, "loss": 0.1167, "step": 9140 }, { "epoch": 239.2156862745098, "grad_norm": 0.2638850808143616, "learning_rate": 0.00010580042154993786, "loss": 0.1128, "step": 9150 }, { "epoch": 239.47712418300654, "grad_norm": 0.39982908964157104, "learning_rate": 0.00010563534510272763, "loss": 0.1166, "step": 9160 }, { "epoch": 239.73856209150327, "grad_norm": 0.4114007353782654, "learning_rate": 0.00010547025324869903, "loss": 0.1164, "step": 9170 }, { "epoch": 240.0, "grad_norm": 0.4774865210056305, "learning_rate": 0.00010530514643920697, "loss": 0.1184, "step": 9180 }, { "epoch": 240.26143790849673, "grad_norm": 0.2834888994693756, "learning_rate": 0.00010514002512564714, "loss": 0.112, "step": 9190 }, { "epoch": 240.52287581699346, "grad_norm": 0.3496600091457367, "learning_rate": 0.00010497488975945484, "loss": 0.1152, "step": 9200 }, { "epoch": 240.7843137254902, "grad_norm": 0.35766535997390747, "learning_rate": 0.00010480974079210392, "loss": 0.1181, "step": 9210 }, { "epoch": 241.04575163398692, "grad_norm": 0.30494368076324463, "learning_rate": 0.00010464457867510533, "loss": 0.1177, "step": 9220 }, { "epoch": 241.30718954248366, "grad_norm": 0.33818888664245605, "learning_rate": 0.00010447940386000601, "loss": 0.1138, "step": 9230 }, { "epoch": 241.5686274509804, "grad_norm": 0.36898425221443176, "learning_rate": 0.00010431421679838758, "loss": 0.1136, "step": 9240 }, { "epoch": 241.83006535947712, "grad_norm": 0.3632810711860657, "learning_rate": 0.00010414901794186514, "loss": 0.1196, "step": 9250 }, { "epoch": 242.09150326797385, "grad_norm": 0.3109024167060852, "learning_rate": 0.00010398380774208609, "loss": 0.1166, "step": 9260 }, { "epoch": 242.35294117647058, "grad_norm": 0.29130157828330994, "learning_rate": 0.00010381858665072878, "loss": 0.1143, "step": 9270 }, { "epoch": 242.6143790849673, "grad_norm": 0.32316336035728455, "learning_rate": 0.00010365335511950142, "loss": 0.1122, "step": 9280 }, { "epoch": 242.87581699346404, "grad_norm": 0.3277498185634613, "learning_rate": 0.00010348811360014063, "loss": 0.1192, "step": 9290 }, { "epoch": 243.13725490196077, "grad_norm": 0.3015691637992859, "learning_rate": 0.00010332286254441049, "loss": 0.1161, "step": 9300 }, { "epoch": 243.3986928104575, "grad_norm": 0.36535218358039856, "learning_rate": 0.00010315760240410097, "loss": 0.1162, "step": 9310 }, { "epoch": 243.66013071895424, "grad_norm": 0.40011629462242126, "learning_rate": 0.0001029923336310271, "loss": 0.1183, "step": 9320 }, { "epoch": 243.92156862745097, "grad_norm": 0.3597753643989563, "learning_rate": 0.00010282705667702734, "loss": 0.115, "step": 9330 }, { "epoch": 244.18300653594773, "grad_norm": 0.37653326988220215, "learning_rate": 0.00010266177199396257, "loss": 0.1148, "step": 9340 }, { "epoch": 244.44444444444446, "grad_norm": 0.30488893389701843, "learning_rate": 0.00010249648003371482, "loss": 0.1152, "step": 9350 }, { "epoch": 244.7058823529412, "grad_norm": 0.351938396692276, "learning_rate": 0.00010233118124818595, "loss": 0.1174, "step": 9360 }, { "epoch": 244.96732026143792, "grad_norm": 0.3121070861816406, "learning_rate": 0.0001021658760892966, "loss": 0.1136, "step": 9370 }, { "epoch": 245.22875816993465, "grad_norm": 0.33020684123039246, "learning_rate": 0.0001020005650089847, "loss": 0.1108, "step": 9380 }, { "epoch": 245.49019607843138, "grad_norm": 0.431669145822525, "learning_rate": 0.00010183524845920447, "loss": 0.1175, "step": 9390 }, { "epoch": 245.7516339869281, "grad_norm": 0.2955373227596283, "learning_rate": 0.00010166992689192505, "loss": 0.1176, "step": 9400 }, { "epoch": 246.01307189542484, "grad_norm": 0.3435670733451843, "learning_rate": 0.00010150460075912922, "loss": 0.1167, "step": 9410 }, { "epoch": 246.27450980392157, "grad_norm": 0.3121711313724518, "learning_rate": 0.00010133927051281243, "loss": 0.114, "step": 9420 }, { "epoch": 246.5359477124183, "grad_norm": 0.32946914434432983, "learning_rate": 0.00010117393660498116, "loss": 0.1133, "step": 9430 }, { "epoch": 246.79738562091504, "grad_norm": 0.30625325441360474, "learning_rate": 0.00010100859948765204, "loss": 0.119, "step": 9440 }, { "epoch": 247.05882352941177, "grad_norm": 0.312551885843277, "learning_rate": 0.00010084325961285046, "loss": 0.1157, "step": 9450 }, { "epoch": 247.3202614379085, "grad_norm": 0.24457718431949615, "learning_rate": 0.00010067791743260924, "loss": 0.11, "step": 9460 }, { "epoch": 247.58169934640523, "grad_norm": 0.33879950642585754, "learning_rate": 0.00010051257339896771, "loss": 0.1174, "step": 9470 }, { "epoch": 247.84313725490196, "grad_norm": 0.33240655064582825, "learning_rate": 0.00010034722796397004, "loss": 0.1179, "step": 9480 }, { "epoch": 248.1045751633987, "grad_norm": 0.30761128664016724, "learning_rate": 0.00010018188157966442, "loss": 0.1172, "step": 9490 }, { "epoch": 248.36601307189542, "grad_norm": 0.30122920870780945, "learning_rate": 0.00010001653469810145, "loss": 0.1152, "step": 9500 }, { "epoch": 248.62745098039215, "grad_norm": 0.3038076162338257, "learning_rate": 9.985118777133329e-05, "loss": 0.1161, "step": 9510 }, { "epoch": 248.88888888888889, "grad_norm": 0.38542667031288147, "learning_rate": 9.968584125141204e-05, "loss": 0.1167, "step": 9520 }, { "epoch": 249.15032679738562, "grad_norm": 0.2926845848560333, "learning_rate": 9.952049559038885e-05, "loss": 0.1153, "step": 9530 }, { "epoch": 249.41176470588235, "grad_norm": 0.35238757729530334, "learning_rate": 9.935515124031239e-05, "loss": 0.1134, "step": 9540 }, { "epoch": 249.67320261437908, "grad_norm": 0.31012189388275146, "learning_rate": 9.918980865322782e-05, "loss": 0.116, "step": 9550 }, { "epoch": 249.9346405228758, "grad_norm": 0.3544747531414032, "learning_rate": 9.902446828117545e-05, "loss": 0.1189, "step": 9560 }, { "epoch": 250.19607843137254, "grad_norm": 0.4231995940208435, "learning_rate": 9.885913057618955e-05, "loss": 0.1148, "step": 9570 }, { "epoch": 250.45751633986927, "grad_norm": 0.3783901333808899, "learning_rate": 9.869379599029708e-05, "loss": 0.1158, "step": 9580 }, { "epoch": 250.718954248366, "grad_norm": 0.32323822379112244, "learning_rate": 9.852846497551653e-05, "loss": 0.1172, "step": 9590 }, { "epoch": 250.98039215686273, "grad_norm": 0.34913671016693115, "learning_rate": 9.836313798385653e-05, "loss": 0.1143, "step": 9600 }, { "epoch": 251.24183006535947, "grad_norm": 0.2595481276512146, "learning_rate": 9.819781546731476e-05, "loss": 0.1137, "step": 9610 }, { "epoch": 251.5032679738562, "grad_norm": 0.30071359872817993, "learning_rate": 9.803249787787669e-05, "loss": 0.1141, "step": 9620 }, { "epoch": 251.76470588235293, "grad_norm": 0.32096344232559204, "learning_rate": 9.786718566751431e-05, "loss": 0.1176, "step": 9630 }, { "epoch": 252.0261437908497, "grad_norm": 0.30524900555610657, "learning_rate": 9.770187928818487e-05, "loss": 0.118, "step": 9640 }, { "epoch": 252.28758169934642, "grad_norm": 0.3532697856426239, "learning_rate": 9.75365791918297e-05, "loss": 0.1158, "step": 9650 }, { "epoch": 252.54901960784315, "grad_norm": 0.29913774132728577, "learning_rate": 9.737128583037295e-05, "loss": 0.1145, "step": 9660 }, { "epoch": 252.81045751633988, "grad_norm": 0.35638126730918884, "learning_rate": 9.720599965572036e-05, "loss": 0.1145, "step": 9670 }, { "epoch": 253.0718954248366, "grad_norm": 0.28130611777305603, "learning_rate": 9.704072111975802e-05, "loss": 0.1143, "step": 9680 }, { "epoch": 253.33333333333334, "grad_norm": 0.39420101046562195, "learning_rate": 9.687545067435116e-05, "loss": 0.1128, "step": 9690 }, { "epoch": 253.59477124183007, "grad_norm": 0.33940833806991577, "learning_rate": 9.671018877134284e-05, "loss": 0.1152, "step": 9700 }, { "epoch": 253.8562091503268, "grad_norm": 0.38873952627182007, "learning_rate": 9.654493586255278e-05, "loss": 0.1207, "step": 9710 }, { "epoch": 254.11764705882354, "grad_norm": 0.242060124874115, "learning_rate": 9.637969239977614e-05, "loss": 0.1143, "step": 9720 }, { "epoch": 254.37908496732027, "grad_norm": 0.29076987504959106, "learning_rate": 9.62144588347823e-05, "loss": 0.1119, "step": 9730 }, { "epoch": 254.640522875817, "grad_norm": 0.28226718306541443, "learning_rate": 9.604923561931337e-05, "loss": 0.1151, "step": 9740 }, { "epoch": 254.90196078431373, "grad_norm": 0.3857768177986145, "learning_rate": 9.588402320508342e-05, "loss": 0.1177, "step": 9750 }, { "epoch": 255.16339869281046, "grad_norm": 0.2802301347255707, "learning_rate": 9.571882204377687e-05, "loss": 0.1194, "step": 9760 }, { "epoch": 255.4248366013072, "grad_norm": 0.2925792932510376, "learning_rate": 9.555363258704737e-05, "loss": 0.1099, "step": 9770 }, { "epoch": 255.68627450980392, "grad_norm": 0.3970232605934143, "learning_rate": 9.538845528651665e-05, "loss": 0.1168, "step": 9780 }, { "epoch": 255.94771241830065, "grad_norm": 0.32095807790756226, "learning_rate": 9.5223290593773e-05, "loss": 0.1202, "step": 9790 }, { "epoch": 256.2091503267974, "grad_norm": 0.2640714943408966, "learning_rate": 9.50581389603705e-05, "loss": 0.1147, "step": 9800 }, { "epoch": 256.47058823529414, "grad_norm": 0.31910863518714905, "learning_rate": 9.489300083782737e-05, "loss": 0.1157, "step": 9810 }, { "epoch": 256.73202614379085, "grad_norm": 0.32858380675315857, "learning_rate": 9.472787667762493e-05, "loss": 0.1166, "step": 9820 }, { "epoch": 256.9934640522876, "grad_norm": 0.3285580575466156, "learning_rate": 9.456276693120639e-05, "loss": 0.1159, "step": 9830 }, { "epoch": 257.2549019607843, "grad_norm": 0.35466259717941284, "learning_rate": 9.43976720499754e-05, "loss": 0.1129, "step": 9840 }, { "epoch": 257.51633986928107, "grad_norm": 0.30083316564559937, "learning_rate": 9.423259248529511e-05, "loss": 0.1155, "step": 9850 }, { "epoch": 257.77777777777777, "grad_norm": 0.37580639123916626, "learning_rate": 9.406752868848673e-05, "loss": 0.1166, "step": 9860 }, { "epoch": 258.03921568627453, "grad_norm": 0.36361733078956604, "learning_rate": 9.390248111082842e-05, "loss": 0.1191, "step": 9870 }, { "epoch": 258.30065359477123, "grad_norm": 0.3550747334957123, "learning_rate": 9.373745020355387e-05, "loss": 0.115, "step": 9880 }, { "epoch": 258.562091503268, "grad_norm": 0.2830721437931061, "learning_rate": 9.357243641785134e-05, "loss": 0.1162, "step": 9890 }, { "epoch": 258.8235294117647, "grad_norm": 0.2829933762550354, "learning_rate": 9.340744020486222e-05, "loss": 0.1133, "step": 9900 }, { "epoch": 259.08496732026146, "grad_norm": 0.29842811822891235, "learning_rate": 9.324246201567984e-05, "loss": 0.1147, "step": 9910 }, { "epoch": 259.34640522875816, "grad_norm": 0.32608217000961304, "learning_rate": 9.30775023013483e-05, "loss": 0.1143, "step": 9920 }, { "epoch": 259.6078431372549, "grad_norm": 0.38989517092704773, "learning_rate": 9.291256151286109e-05, "loss": 0.1172, "step": 9930 }, { "epoch": 259.8692810457516, "grad_norm": 0.3222101330757141, "learning_rate": 9.274764010116008e-05, "loss": 0.1171, "step": 9940 }, { "epoch": 260.1307189542484, "grad_norm": 0.3502764105796814, "learning_rate": 9.25827385171341e-05, "loss": 0.1135, "step": 9950 }, { "epoch": 260.3921568627451, "grad_norm": 0.2739408612251282, "learning_rate": 9.241785721161779e-05, "loss": 0.1119, "step": 9960 }, { "epoch": 260.65359477124184, "grad_norm": 0.289969801902771, "learning_rate": 9.225299663539038e-05, "loss": 0.1165, "step": 9970 }, { "epoch": 260.91503267973854, "grad_norm": 0.4188239276409149, "learning_rate": 9.20881572391743e-05, "loss": 0.1183, "step": 9980 }, { "epoch": 261.1764705882353, "grad_norm": 0.30096152424812317, "learning_rate": 9.192333947363423e-05, "loss": 0.1143, "step": 9990 }, { "epoch": 261.437908496732, "grad_norm": 0.3192864954471588, "learning_rate": 9.175854378937563e-05, "loss": 0.1124, "step": 10000 }, { "epoch": 261.69934640522877, "grad_norm": 0.3204704523086548, "learning_rate": 9.159377063694365e-05, "loss": 0.1145, "step": 10010 }, { "epoch": 261.96078431372547, "grad_norm": 0.45152097940444946, "learning_rate": 9.142902046682171e-05, "loss": 0.119, "step": 10020 }, { "epoch": 262.22222222222223, "grad_norm": 0.3182998597621918, "learning_rate": 9.126429372943053e-05, "loss": 0.1164, "step": 10030 }, { "epoch": 262.48366013071893, "grad_norm": 0.2740117311477661, "learning_rate": 9.109959087512673e-05, "loss": 0.1142, "step": 10040 }, { "epoch": 262.7450980392157, "grad_norm": 0.3511454164981842, "learning_rate": 9.093491235420162e-05, "loss": 0.1154, "step": 10050 }, { "epoch": 263.0065359477124, "grad_norm": 0.3681274652481079, "learning_rate": 9.077025861688003e-05, "loss": 0.1172, "step": 10060 }, { "epoch": 263.26797385620915, "grad_norm": 0.31222420930862427, "learning_rate": 9.060563011331888e-05, "loss": 0.1122, "step": 10070 }, { "epoch": 263.52941176470586, "grad_norm": 0.32507622241973877, "learning_rate": 9.044102729360627e-05, "loss": 0.1127, "step": 10080 }, { "epoch": 263.7908496732026, "grad_norm": 0.4463390111923218, "learning_rate": 9.027645060776006e-05, "loss": 0.1199, "step": 10090 }, { "epoch": 264.0522875816994, "grad_norm": 0.27414682507514954, "learning_rate": 9.011190050572659e-05, "loss": 0.1166, "step": 10100 }, { "epoch": 264.3137254901961, "grad_norm": 0.35508808493614197, "learning_rate": 8.994737743737961e-05, "loss": 0.116, "step": 10110 }, { "epoch": 264.57516339869284, "grad_norm": 0.29134297370910645, "learning_rate": 8.978288185251881e-05, "loss": 0.1134, "step": 10120 }, { "epoch": 264.83660130718954, "grad_norm": 0.3992556631565094, "learning_rate": 8.961841420086886e-05, "loss": 0.1177, "step": 10130 }, { "epoch": 265.0980392156863, "grad_norm": 0.24055442214012146, "learning_rate": 8.945397493207809e-05, "loss": 0.1124, "step": 10140 }, { "epoch": 265.359477124183, "grad_norm": 0.3088826835155487, "learning_rate": 8.92895644957172e-05, "loss": 0.1138, "step": 10150 }, { "epoch": 265.62091503267976, "grad_norm": 0.4448016285896301, "learning_rate": 8.912518334127795e-05, "loss": 0.1183, "step": 10160 }, { "epoch": 265.88235294117646, "grad_norm": 0.31662651896476746, "learning_rate": 8.896083191817221e-05, "loss": 0.1166, "step": 10170 }, { "epoch": 266.1437908496732, "grad_norm": 0.26434990763664246, "learning_rate": 8.879651067573044e-05, "loss": 0.1122, "step": 10180 }, { "epoch": 266.4052287581699, "grad_norm": 0.25964638590812683, "learning_rate": 8.86322200632007e-05, "loss": 0.1123, "step": 10190 }, { "epoch": 266.6666666666667, "grad_norm": 0.35823872685432434, "learning_rate": 8.846796052974727e-05, "loss": 0.1156, "step": 10200 }, { "epoch": 266.9281045751634, "grad_norm": 0.37653812766075134, "learning_rate": 8.830373252444937e-05, "loss": 0.119, "step": 10210 }, { "epoch": 267.18954248366015, "grad_norm": 0.33142727613449097, "learning_rate": 8.813953649630012e-05, "loss": 0.1158, "step": 10220 }, { "epoch": 267.45098039215685, "grad_norm": 0.3292408883571625, "learning_rate": 8.797537289420519e-05, "loss": 0.1137, "step": 10230 }, { "epoch": 267.7124183006536, "grad_norm": 0.40815725922584534, "learning_rate": 8.781124216698161e-05, "loss": 0.1146, "step": 10240 }, { "epoch": 267.9738562091503, "grad_norm": 0.36364370584487915, "learning_rate": 8.764714476335657e-05, "loss": 0.1189, "step": 10250 }, { "epoch": 268.2352941176471, "grad_norm": 0.3509305715560913, "learning_rate": 8.748308113196602e-05, "loss": 0.1144, "step": 10260 }, { "epoch": 268.4967320261438, "grad_norm": 0.31511905789375305, "learning_rate": 8.731905172135369e-05, "loss": 0.113, "step": 10270 }, { "epoch": 268.75816993464053, "grad_norm": 0.37249597907066345, "learning_rate": 8.715505697996971e-05, "loss": 0.1169, "step": 10280 }, { "epoch": 269.01960784313724, "grad_norm": 0.3088653087615967, "learning_rate": 8.699109735616952e-05, "loss": 0.1186, "step": 10290 }, { "epoch": 269.281045751634, "grad_norm": 0.3084110617637634, "learning_rate": 8.68271732982124e-05, "loss": 0.1107, "step": 10300 }, { "epoch": 269.5424836601307, "grad_norm": 0.31472697854042053, "learning_rate": 8.666328525426045e-05, "loss": 0.117, "step": 10310 }, { "epoch": 269.80392156862746, "grad_norm": 0.28198713064193726, "learning_rate": 8.649943367237736e-05, "loss": 0.1148, "step": 10320 }, { "epoch": 270.06535947712416, "grad_norm": 0.3456663191318512, "learning_rate": 8.633561900052708e-05, "loss": 0.1184, "step": 10330 }, { "epoch": 270.3267973856209, "grad_norm": 0.2773241102695465, "learning_rate": 8.617184168657275e-05, "loss": 0.1117, "step": 10340 }, { "epoch": 270.5882352941176, "grad_norm": 0.34341636300086975, "learning_rate": 8.600810217827515e-05, "loss": 0.114, "step": 10350 }, { "epoch": 270.8496732026144, "grad_norm": 0.3856281042098999, "learning_rate": 8.584440092329193e-05, "loss": 0.1174, "step": 10360 }, { "epoch": 271.1111111111111, "grad_norm": 0.28968098759651184, "learning_rate": 8.568073836917607e-05, "loss": 0.1161, "step": 10370 }, { "epoch": 271.37254901960785, "grad_norm": 0.3231571912765503, "learning_rate": 8.551711496337469e-05, "loss": 0.1162, "step": 10380 }, { "epoch": 271.63398692810455, "grad_norm": 0.31247514486312866, "learning_rate": 8.535353115322806e-05, "loss": 0.1143, "step": 10390 }, { "epoch": 271.8954248366013, "grad_norm": 0.3014558255672455, "learning_rate": 8.518998738596791e-05, "loss": 0.1173, "step": 10400 }, { "epoch": 272.15686274509807, "grad_norm": 0.27148059010505676, "learning_rate": 8.502648410871675e-05, "loss": 0.11, "step": 10410 }, { "epoch": 272.41830065359477, "grad_norm": 0.35765767097473145, "learning_rate": 8.486302176848624e-05, "loss": 0.1151, "step": 10420 }, { "epoch": 272.67973856209153, "grad_norm": 0.3530411124229431, "learning_rate": 8.469960081217627e-05, "loss": 0.1165, "step": 10430 }, { "epoch": 272.94117647058823, "grad_norm": 0.3553902804851532, "learning_rate": 8.45362216865734e-05, "loss": 0.1165, "step": 10440 }, { "epoch": 273.202614379085, "grad_norm": 0.2596057653427124, "learning_rate": 8.437288483834997e-05, "loss": 0.1139, "step": 10450 }, { "epoch": 273.4640522875817, "grad_norm": 0.29487183690071106, "learning_rate": 8.420959071406266e-05, "loss": 0.118, "step": 10460 }, { "epoch": 273.72549019607845, "grad_norm": 0.3081933557987213, "learning_rate": 8.404633976015134e-05, "loss": 0.1147, "step": 10470 }, { "epoch": 273.98692810457516, "grad_norm": 0.3125523030757904, "learning_rate": 8.388313242293802e-05, "loss": 0.1136, "step": 10480 }, { "epoch": 274.2483660130719, "grad_norm": 0.35245898365974426, "learning_rate": 8.371996914862519e-05, "loss": 0.1136, "step": 10490 }, { "epoch": 274.5098039215686, "grad_norm": 0.36222124099731445, "learning_rate": 8.355685038329504e-05, "loss": 0.1148, "step": 10500 }, { "epoch": 274.7712418300654, "grad_norm": 0.34575581550598145, "learning_rate": 8.339377657290808e-05, "loss": 0.114, "step": 10510 }, { "epoch": 275.0326797385621, "grad_norm": 0.4005025625228882, "learning_rate": 8.323074816330183e-05, "loss": 0.1188, "step": 10520 }, { "epoch": 275.29411764705884, "grad_norm": 0.32579559087753296, "learning_rate": 8.306776560018985e-05, "loss": 0.1138, "step": 10530 }, { "epoch": 275.55555555555554, "grad_norm": 0.27991876006126404, "learning_rate": 8.29048293291601e-05, "loss": 0.1125, "step": 10540 }, { "epoch": 275.8169934640523, "grad_norm": 0.3079290986061096, "learning_rate": 8.27419397956742e-05, "loss": 0.1142, "step": 10550 }, { "epoch": 276.078431372549, "grad_norm": 0.25126445293426514, "learning_rate": 8.257909744506589e-05, "loss": 0.1178, "step": 10560 }, { "epoch": 276.33986928104576, "grad_norm": 0.2905788719654083, "learning_rate": 8.241630272253998e-05, "loss": 0.1125, "step": 10570 }, { "epoch": 276.60130718954247, "grad_norm": 0.38739460706710815, "learning_rate": 8.225355607317096e-05, "loss": 0.1152, "step": 10580 }, { "epoch": 276.8627450980392, "grad_norm": 0.33940622210502625, "learning_rate": 8.2090857941902e-05, "loss": 0.1168, "step": 10590 }, { "epoch": 277.12418300653593, "grad_norm": 0.2534843981266022, "learning_rate": 8.192820877354357e-05, "loss": 0.1112, "step": 10600 }, { "epoch": 277.3856209150327, "grad_norm": 0.3085792660713196, "learning_rate": 8.176560901277229e-05, "loss": 0.1165, "step": 10610 }, { "epoch": 277.6470588235294, "grad_norm": 0.32212698459625244, "learning_rate": 8.16030591041297e-05, "loss": 0.1138, "step": 10620 }, { "epoch": 277.90849673202615, "grad_norm": 0.31586194038391113, "learning_rate": 8.144055949202101e-05, "loss": 0.1186, "step": 10630 }, { "epoch": 278.16993464052285, "grad_norm": 0.2730450928211212, "learning_rate": 8.127811062071398e-05, "loss": 0.1163, "step": 10640 }, { "epoch": 278.4313725490196, "grad_norm": 0.2704620659351349, "learning_rate": 8.111571293433764e-05, "loss": 0.1104, "step": 10650 }, { "epoch": 278.6928104575163, "grad_norm": 0.3569394052028656, "learning_rate": 8.095336687688102e-05, "loss": 0.1171, "step": 10660 }, { "epoch": 278.9542483660131, "grad_norm": 0.28940895199775696, "learning_rate": 8.079107289219209e-05, "loss": 0.1172, "step": 10670 }, { "epoch": 279.2156862745098, "grad_norm": 0.30181649327278137, "learning_rate": 8.062883142397635e-05, "loss": 0.1124, "step": 10680 }, { "epoch": 279.47712418300654, "grad_norm": 0.3073585629463196, "learning_rate": 8.046664291579584e-05, "loss": 0.1137, "step": 10690 }, { "epoch": 279.73856209150324, "grad_norm": 0.30750972032546997, "learning_rate": 8.03045078110677e-05, "loss": 0.115, "step": 10700 }, { "epoch": 280.0, "grad_norm": 0.33145976066589355, "learning_rate": 8.014242655306315e-05, "loss": 0.1187, "step": 10710 }, { "epoch": 280.26143790849676, "grad_norm": 0.2784498631954193, "learning_rate": 7.998039958490613e-05, "loss": 0.1113, "step": 10720 }, { "epoch": 280.52287581699346, "grad_norm": 0.3815988302230835, "learning_rate": 7.981842734957221e-05, "loss": 0.1154, "step": 10730 }, { "epoch": 280.7843137254902, "grad_norm": 0.35885512828826904, "learning_rate": 7.965651028988726e-05, "loss": 0.1192, "step": 10740 }, { "epoch": 281.0457516339869, "grad_norm": 0.3109668791294098, "learning_rate": 7.949464884852638e-05, "loss": 0.114, "step": 10750 }, { "epoch": 281.3071895424837, "grad_norm": 0.2830188274383545, "learning_rate": 7.933284346801258e-05, "loss": 0.1111, "step": 10760 }, { "epoch": 281.5686274509804, "grad_norm": 0.30252325534820557, "learning_rate": 7.917109459071553e-05, "loss": 0.113, "step": 10770 }, { "epoch": 281.83006535947715, "grad_norm": 0.3681179881095886, "learning_rate": 7.900940265885052e-05, "loss": 0.1172, "step": 10780 }, { "epoch": 282.09150326797385, "grad_norm": 0.3035522401332855, "learning_rate": 7.884776811447712e-05, "loss": 0.1148, "step": 10790 }, { "epoch": 282.3529411764706, "grad_norm": 0.2895407974720001, "learning_rate": 7.8686191399498e-05, "loss": 0.1125, "step": 10800 }, { "epoch": 282.6143790849673, "grad_norm": 0.37216058373451233, "learning_rate": 7.852467295565775e-05, "loss": 0.1155, "step": 10810 }, { "epoch": 282.87581699346407, "grad_norm": 0.3305184543132782, "learning_rate": 7.836321322454159e-05, "loss": 0.1163, "step": 10820 }, { "epoch": 283.1372549019608, "grad_norm": 0.30414262413978577, "learning_rate": 7.820181264757427e-05, "loss": 0.1154, "step": 10830 }, { "epoch": 283.39869281045753, "grad_norm": 0.2971784174442291, "learning_rate": 7.80404716660188e-05, "loss": 0.1126, "step": 10840 }, { "epoch": 283.66013071895424, "grad_norm": 0.36141228675842285, "learning_rate": 7.787919072097531e-05, "loss": 0.1155, "step": 10850 }, { "epoch": 283.921568627451, "grad_norm": 0.2840782403945923, "learning_rate": 7.771797025337968e-05, "loss": 0.1198, "step": 10860 }, { "epoch": 284.1830065359477, "grad_norm": 0.2825259268283844, "learning_rate": 7.755681070400253e-05, "loss": 0.1123, "step": 10870 }, { "epoch": 284.44444444444446, "grad_norm": 0.3692414462566376, "learning_rate": 7.739571251344794e-05, "loss": 0.1157, "step": 10880 }, { "epoch": 284.70588235294116, "grad_norm": 0.2816650867462158, "learning_rate": 7.723467612215219e-05, "loss": 0.1159, "step": 10890 }, { "epoch": 284.9673202614379, "grad_norm": 0.33248651027679443, "learning_rate": 7.707370197038265e-05, "loss": 0.1158, "step": 10900 }, { "epoch": 285.2287581699346, "grad_norm": 0.3684132695198059, "learning_rate": 7.691279049823646e-05, "loss": 0.1138, "step": 10910 }, { "epoch": 285.4901960784314, "grad_norm": 0.28321191668510437, "learning_rate": 7.675194214563948e-05, "loss": 0.1137, "step": 10920 }, { "epoch": 285.7516339869281, "grad_norm": 0.39478781819343567, "learning_rate": 7.659115735234494e-05, "loss": 0.1193, "step": 10930 }, { "epoch": 286.01307189542484, "grad_norm": 0.3251636028289795, "learning_rate": 7.643043655793235e-05, "loss": 0.1118, "step": 10940 }, { "epoch": 286.27450980392155, "grad_norm": 0.32104846835136414, "learning_rate": 7.62697802018062e-05, "loss": 0.1134, "step": 10950 }, { "epoch": 286.5359477124183, "grad_norm": 0.3238525092601776, "learning_rate": 7.610918872319483e-05, "loss": 0.1126, "step": 10960 }, { "epoch": 286.797385620915, "grad_norm": 0.3236340284347534, "learning_rate": 7.594866256114921e-05, "loss": 0.1178, "step": 10970 }, { "epoch": 287.05882352941177, "grad_norm": 0.2717225253582001, "learning_rate": 7.578820215454178e-05, "loss": 0.1153, "step": 10980 }, { "epoch": 287.32026143790847, "grad_norm": 0.34041455388069153, "learning_rate": 7.562780794206514e-05, "loss": 0.1129, "step": 10990 }, { "epoch": 287.58169934640523, "grad_norm": 0.27632108330726624, "learning_rate": 7.546748036223091e-05, "loss": 0.1154, "step": 11000 }, { "epoch": 287.84313725490193, "grad_norm": 0.4436096251010895, "learning_rate": 7.530721985336861e-05, "loss": 0.1157, "step": 11010 }, { "epoch": 288.1045751633987, "grad_norm": 0.3542175889015198, "learning_rate": 7.514702685362434e-05, "loss": 0.1161, "step": 11020 }, { "epoch": 288.36601307189545, "grad_norm": 0.32913169264793396, "learning_rate": 7.498690180095963e-05, "loss": 0.115, "step": 11030 }, { "epoch": 288.62745098039215, "grad_norm": 0.4230281710624695, "learning_rate": 7.48268451331503e-05, "loss": 0.1147, "step": 11040 }, { "epoch": 288.8888888888889, "grad_norm": 0.31047868728637695, "learning_rate": 7.466685728778513e-05, "loss": 0.1136, "step": 11050 }, { "epoch": 289.1503267973856, "grad_norm": 0.2447769194841385, "learning_rate": 7.450693870226478e-05, "loss": 0.1124, "step": 11060 }, { "epoch": 289.4117647058824, "grad_norm": 0.2783534824848175, "learning_rate": 7.434708981380057e-05, "loss": 0.1145, "step": 11070 }, { "epoch": 289.6732026143791, "grad_norm": 0.2729416787624359, "learning_rate": 7.418731105941328e-05, "loss": 0.1152, "step": 11080 }, { "epoch": 289.93464052287584, "grad_norm": 0.29936328530311584, "learning_rate": 7.402760287593189e-05, "loss": 0.1175, "step": 11090 }, { "epoch": 290.19607843137254, "grad_norm": 0.3042921721935272, "learning_rate": 7.386796569999246e-05, "loss": 0.1128, "step": 11100 }, { "epoch": 290.4575163398693, "grad_norm": 0.3261522948741913, "learning_rate": 7.370839996803697e-05, "loss": 0.1144, "step": 11110 }, { "epoch": 290.718954248366, "grad_norm": 0.30387112498283386, "learning_rate": 7.354890611631202e-05, "loss": 0.1139, "step": 11120 }, { "epoch": 290.98039215686276, "grad_norm": 0.3516126275062561, "learning_rate": 7.338948458086774e-05, "loss": 0.1167, "step": 11130 }, { "epoch": 291.24183006535947, "grad_norm": 0.2861202359199524, "learning_rate": 7.323013579755647e-05, "loss": 0.1138, "step": 11140 }, { "epoch": 291.5032679738562, "grad_norm": 0.3707253634929657, "learning_rate": 7.307086020203173e-05, "loss": 0.1136, "step": 11150 }, { "epoch": 291.7647058823529, "grad_norm": 0.34529221057891846, "learning_rate": 7.29116582297469e-05, "loss": 0.1167, "step": 11160 }, { "epoch": 292.0261437908497, "grad_norm": 0.2350694239139557, "learning_rate": 7.275253031595413e-05, "loss": 0.1139, "step": 11170 }, { "epoch": 292.2875816993464, "grad_norm": 0.35073357820510864, "learning_rate": 7.259347689570304e-05, "loss": 0.1141, "step": 11180 }, { "epoch": 292.54901960784315, "grad_norm": 0.3456834554672241, "learning_rate": 7.243449840383958e-05, "loss": 0.1117, "step": 11190 }, { "epoch": 292.81045751633985, "grad_norm": 0.3201320171356201, "learning_rate": 7.227559527500489e-05, "loss": 0.1177, "step": 11200 }, { "epoch": 293.0718954248366, "grad_norm": 0.2568330466747284, "learning_rate": 7.211676794363407e-05, "loss": 0.1144, "step": 11210 }, { "epoch": 293.3333333333333, "grad_norm": 0.29000136256217957, "learning_rate": 7.1958016843955e-05, "loss": 0.1121, "step": 11220 }, { "epoch": 293.5947712418301, "grad_norm": 0.30854013562202454, "learning_rate": 7.179934240998706e-05, "loss": 0.1206, "step": 11230 }, { "epoch": 293.8562091503268, "grad_norm": 0.27561551332473755, "learning_rate": 7.164074507554015e-05, "loss": 0.1137, "step": 11240 }, { "epoch": 294.11764705882354, "grad_norm": 0.28580352663993835, "learning_rate": 7.148222527421331e-05, "loss": 0.1125, "step": 11250 }, { "epoch": 294.37908496732024, "grad_norm": 0.29733917117118835, "learning_rate": 7.132378343939361e-05, "loss": 0.1142, "step": 11260 }, { "epoch": 294.640522875817, "grad_norm": 0.3131226897239685, "learning_rate": 7.116542000425501e-05, "loss": 0.1151, "step": 11270 }, { "epoch": 294.9019607843137, "grad_norm": 0.2978150248527527, "learning_rate": 7.100713540175706e-05, "loss": 0.1152, "step": 11280 }, { "epoch": 295.16339869281046, "grad_norm": 0.31860676407814026, "learning_rate": 7.084893006464383e-05, "loss": 0.1136, "step": 11290 }, { "epoch": 295.42483660130716, "grad_norm": 0.36646994948387146, "learning_rate": 7.069080442544267e-05, "loss": 0.1127, "step": 11300 }, { "epoch": 295.6862745098039, "grad_norm": 0.3542885184288025, "learning_rate": 7.053275891646303e-05, "loss": 0.1134, "step": 11310 }, { "epoch": 295.9477124183006, "grad_norm": 0.46107035875320435, "learning_rate": 7.037479396979535e-05, "loss": 0.1177, "step": 11320 }, { "epoch": 296.2091503267974, "grad_norm": 0.32113420963287354, "learning_rate": 7.02169100173097e-05, "loss": 0.1149, "step": 11330 }, { "epoch": 296.47058823529414, "grad_norm": 0.3642306625843048, "learning_rate": 7.005910749065478e-05, "loss": 0.1157, "step": 11340 }, { "epoch": 296.73202614379085, "grad_norm": 0.35815343260765076, "learning_rate": 6.990138682125671e-05, "loss": 0.1135, "step": 11350 }, { "epoch": 296.9934640522876, "grad_norm": 0.365302175283432, "learning_rate": 6.974374844031779e-05, "loss": 0.1158, "step": 11360 }, { "epoch": 297.2549019607843, "grad_norm": 0.26450252532958984, "learning_rate": 6.958619277881524e-05, "loss": 0.1148, "step": 11370 }, { "epoch": 297.51633986928107, "grad_norm": 0.29163211584091187, "learning_rate": 6.942872026750029e-05, "loss": 0.1134, "step": 11380 }, { "epoch": 297.77777777777777, "grad_norm": 0.3450673520565033, "learning_rate": 6.927133133689678e-05, "loss": 0.1158, "step": 11390 }, { "epoch": 298.03921568627453, "grad_norm": 0.2773265838623047, "learning_rate": 6.911402641730003e-05, "loss": 0.1136, "step": 11400 }, { "epoch": 298.30065359477123, "grad_norm": 0.29598814249038696, "learning_rate": 6.895680593877571e-05, "loss": 0.1168, "step": 11410 }, { "epoch": 298.562091503268, "grad_norm": 0.2809271812438965, "learning_rate": 6.879967033115853e-05, "loss": 0.1142, "step": 11420 }, { "epoch": 298.8235294117647, "grad_norm": 0.2930368185043335, "learning_rate": 6.864262002405129e-05, "loss": 0.1148, "step": 11430 }, { "epoch": 299.08496732026146, "grad_norm": 0.25596320629119873, "learning_rate": 6.848565544682352e-05, "loss": 0.1114, "step": 11440 }, { "epoch": 299.34640522875816, "grad_norm": 0.26733312010765076, "learning_rate": 6.832877702861037e-05, "loss": 0.1138, "step": 11450 }, { "epoch": 299.6078431372549, "grad_norm": 0.33392226696014404, "learning_rate": 6.817198519831154e-05, "loss": 0.116, "step": 11460 }, { "epoch": 299.8692810457516, "grad_norm": 0.28333500027656555, "learning_rate": 6.801528038458974e-05, "loss": 0.1129, "step": 11470 }, { "epoch": 300.1307189542484, "grad_norm": 0.28260430693626404, "learning_rate": 6.785866301587007e-05, "loss": 0.1151, "step": 11480 }, { "epoch": 300.3921568627451, "grad_norm": 0.35571715235710144, "learning_rate": 6.770213352033839e-05, "loss": 0.1129, "step": 11490 }, { "epoch": 300.65359477124184, "grad_norm": 0.3103904724121094, "learning_rate": 6.754569232594042e-05, "loss": 0.1145, "step": 11500 }, { "epoch": 300.91503267973854, "grad_norm": 0.3481941223144531, "learning_rate": 6.738933986038033e-05, "loss": 0.1163, "step": 11510 }, { "epoch": 301.1764705882353, "grad_norm": 0.2778976559638977, "learning_rate": 6.72330765511198e-05, "loss": 0.1141, "step": 11520 }, { "epoch": 301.437908496732, "grad_norm": 0.472317099571228, "learning_rate": 6.70769028253768e-05, "loss": 0.1159, "step": 11530 }, { "epoch": 301.69934640522877, "grad_norm": 0.3168316185474396, "learning_rate": 6.692081911012431e-05, "loss": 0.117, "step": 11540 }, { "epoch": 301.96078431372547, "grad_norm": 0.2718193531036377, "learning_rate": 6.676482583208929e-05, "loss": 0.114, "step": 11550 }, { "epoch": 302.22222222222223, "grad_norm": 0.32255101203918457, "learning_rate": 6.660892341775132e-05, "loss": 0.1129, "step": 11560 }, { "epoch": 302.48366013071893, "grad_norm": 0.4141261577606201, "learning_rate": 6.645311229334167e-05, "loss": 0.1154, "step": 11570 }, { "epoch": 302.7450980392157, "grad_norm": 0.28321996331214905, "learning_rate": 6.629739288484204e-05, "loss": 0.1125, "step": 11580 }, { "epoch": 303.0065359477124, "grad_norm": 0.27643659710884094, "learning_rate": 6.614176561798335e-05, "loss": 0.1153, "step": 11590 }, { "epoch": 303.26797385620915, "grad_norm": 0.3224067986011505, "learning_rate": 6.598623091824461e-05, "loss": 0.1103, "step": 11600 }, { "epoch": 303.52941176470586, "grad_norm": 0.31204745173454285, "learning_rate": 6.583078921085167e-05, "loss": 0.114, "step": 11610 }, { "epoch": 303.7908496732026, "grad_norm": 0.3363180458545685, "learning_rate": 6.567544092077631e-05, "loss": 0.1164, "step": 11620 }, { "epoch": 304.0522875816994, "grad_norm": 0.3073705732822418, "learning_rate": 6.55357076825483e-05, "loss": 0.1173, "step": 11630 }, { "epoch": 304.3137254901961, "grad_norm": 0.38721707463264465, "learning_rate": 6.538053805525763e-05, "loss": 0.1128, "step": 11640 }, { "epoch": 304.57516339869284, "grad_norm": 0.3045378625392914, "learning_rate": 6.522546307625399e-05, "loss": 0.1154, "step": 11650 }, { "epoch": 304.83660130718954, "grad_norm": 0.29825395345687866, "learning_rate": 6.507048316950648e-05, "loss": 0.1135, "step": 11660 }, { "epoch": 305.0980392156863, "grad_norm": 0.32951053977012634, "learning_rate": 6.491559875872415e-05, "loss": 0.1168, "step": 11670 }, { "epoch": 305.359477124183, "grad_norm": 0.25783228874206543, "learning_rate": 6.476081026735513e-05, "loss": 0.1094, "step": 11680 }, { "epoch": 305.62091503267976, "grad_norm": 0.3310535252094269, "learning_rate": 6.460611811858521e-05, "loss": 0.1162, "step": 11690 }, { "epoch": 305.88235294117646, "grad_norm": 0.3044494390487671, "learning_rate": 6.445152273533687e-05, "loss": 0.1172, "step": 11700 }, { "epoch": 306.1437908496732, "grad_norm": 0.2858486771583557, "learning_rate": 6.429702454026798e-05, "loss": 0.1128, "step": 11710 }, { "epoch": 306.4052287581699, "grad_norm": 0.28763970732688904, "learning_rate": 6.414262395577065e-05, "loss": 0.1123, "step": 11720 }, { "epoch": 306.6666666666667, "grad_norm": 0.33545467257499695, "learning_rate": 6.398832140397022e-05, "loss": 0.117, "step": 11730 }, { "epoch": 306.9281045751634, "grad_norm": 0.33642587065696716, "learning_rate": 6.383411730672394e-05, "loss": 0.1168, "step": 11740 }, { "epoch": 307.18954248366015, "grad_norm": 0.3563029170036316, "learning_rate": 6.368001208561998e-05, "loss": 0.1135, "step": 11750 }, { "epoch": 307.45098039215685, "grad_norm": 0.31238290667533875, "learning_rate": 6.352600616197615e-05, "loss": 0.1139, "step": 11760 }, { "epoch": 307.7124183006536, "grad_norm": 0.3694748282432556, "learning_rate": 6.337209995683867e-05, "loss": 0.1166, "step": 11770 }, { "epoch": 307.9738562091503, "grad_norm": 0.2764953672885895, "learning_rate": 6.321829389098126e-05, "loss": 0.1135, "step": 11780 }, { "epoch": 308.2352941176471, "grad_norm": 0.28875818848609924, "learning_rate": 6.306458838490385e-05, "loss": 0.1121, "step": 11790 }, { "epoch": 308.4967320261438, "grad_norm": 0.3236418068408966, "learning_rate": 6.291098385883146e-05, "loss": 0.1093, "step": 11800 }, { "epoch": 308.75816993464053, "grad_norm": 0.2844972014427185, "learning_rate": 6.275748073271292e-05, "loss": 0.1184, "step": 11810 }, { "epoch": 309.01960784313724, "grad_norm": 0.39473089575767517, "learning_rate": 6.260407942621998e-05, "loss": 0.1169, "step": 11820 }, { "epoch": 309.281045751634, "grad_norm": 0.2659190893173218, "learning_rate": 6.245078035874591e-05, "loss": 0.1132, "step": 11830 }, { "epoch": 309.5424836601307, "grad_norm": 0.28653818368911743, "learning_rate": 6.22975839494045e-05, "loss": 0.1137, "step": 11840 }, { "epoch": 309.80392156862746, "grad_norm": 0.311483770608902, "learning_rate": 6.214449061702898e-05, "loss": 0.1127, "step": 11850 }, { "epoch": 310.06535947712416, "grad_norm": 0.2597990930080414, "learning_rate": 6.199150078017057e-05, "loss": 0.1135, "step": 11860 }, { "epoch": 310.3267973856209, "grad_norm": 0.32311543822288513, "learning_rate": 6.183861485709765e-05, "loss": 0.1154, "step": 11870 }, { "epoch": 310.5882352941176, "grad_norm": 0.3250320553779602, "learning_rate": 6.168583326579456e-05, "loss": 0.1135, "step": 11880 }, { "epoch": 310.8496732026144, "grad_norm": 0.2815916836261749, "learning_rate": 6.153315642396025e-05, "loss": 0.1145, "step": 11890 }, { "epoch": 311.1111111111111, "grad_norm": 0.3453966975212097, "learning_rate": 6.13805847490075e-05, "loss": 0.1141, "step": 11900 }, { "epoch": 311.37254901960785, "grad_norm": 0.3138068914413452, "learning_rate": 6.122811865806131e-05, "loss": 0.1127, "step": 11910 }, { "epoch": 311.63398692810455, "grad_norm": 0.3160851299762726, "learning_rate": 6.107575856795822e-05, "loss": 0.1143, "step": 11920 }, { "epoch": 311.8954248366013, "grad_norm": 0.3921455144882202, "learning_rate": 6.0923504895244875e-05, "loss": 0.1145, "step": 11930 }, { "epoch": 312.15686274509807, "grad_norm": 0.31224459409713745, "learning_rate": 6.077135805617705e-05, "loss": 0.1113, "step": 11940 }, { "epoch": 312.41830065359477, "grad_norm": 0.330119788646698, "learning_rate": 6.061931846671833e-05, "loss": 0.1154, "step": 11950 }, { "epoch": 312.67973856209153, "grad_norm": 0.3644309639930725, "learning_rate": 6.046738654253918e-05, "loss": 0.1165, "step": 11960 }, { "epoch": 312.94117647058823, "grad_norm": 0.30337658524513245, "learning_rate": 6.031556269901567e-05, "loss": 0.1154, "step": 11970 }, { "epoch": 313.202614379085, "grad_norm": 0.31728994846343994, "learning_rate": 6.0163847351228395e-05, "loss": 0.1113, "step": 11980 }, { "epoch": 313.4640522875817, "grad_norm": 0.32171520590782166, "learning_rate": 6.0012240913961334e-05, "loss": 0.1156, "step": 11990 }, { "epoch": 313.72549019607845, "grad_norm": 0.2858985364437103, "learning_rate": 5.986074380170068e-05, "loss": 0.1139, "step": 12000 }, { "epoch": 313.98692810457516, "grad_norm": 0.32188278436660767, "learning_rate": 5.9709356428633746e-05, "loss": 0.1163, "step": 12010 }, { "epoch": 314.2483660130719, "grad_norm": 0.2941359877586365, "learning_rate": 5.955807920864784e-05, "loss": 0.1123, "step": 12020 }, { "epoch": 314.5098039215686, "grad_norm": 0.29654598236083984, "learning_rate": 5.940691255532912e-05, "loss": 0.1116, "step": 12030 }, { "epoch": 314.7712418300654, "grad_norm": 0.29309600591659546, "learning_rate": 5.9255856881961444e-05, "loss": 0.113, "step": 12040 }, { "epoch": 315.0326797385621, "grad_norm": 0.3037513792514801, "learning_rate": 5.910491260152522e-05, "loss": 0.1196, "step": 12050 }, { "epoch": 315.29411764705884, "grad_norm": 0.37008363008499146, "learning_rate": 5.8954080126696366e-05, "loss": 0.1123, "step": 12060 }, { "epoch": 315.55555555555554, "grad_norm": 0.2903066575527191, "learning_rate": 5.880335986984512e-05, "loss": 0.1139, "step": 12070 }, { "epoch": 315.8169934640523, "grad_norm": 0.42981478571891785, "learning_rate": 5.865275224303491e-05, "loss": 0.1145, "step": 12080 }, { "epoch": 316.078431372549, "grad_norm": 0.2848221957683563, "learning_rate": 5.850225765802122e-05, "loss": 0.1178, "step": 12090 }, { "epoch": 316.33986928104576, "grad_norm": 0.3206409811973572, "learning_rate": 5.835187652625047e-05, "loss": 0.1125, "step": 12100 }, { "epoch": 316.60130718954247, "grad_norm": 0.30614250898361206, "learning_rate": 5.820160925885902e-05, "loss": 0.1139, "step": 12110 }, { "epoch": 316.8627450980392, "grad_norm": 0.32410693168640137, "learning_rate": 5.8051456266671746e-05, "loss": 0.1144, "step": 12120 }, { "epoch": 317.12418300653593, "grad_norm": 0.3014141619205475, "learning_rate": 5.790141796020132e-05, "loss": 0.114, "step": 12130 }, { "epoch": 317.3856209150327, "grad_norm": 0.30295130610466003, "learning_rate": 5.7751494749646575e-05, "loss": 0.115, "step": 12140 }, { "epoch": 317.6470588235294, "grad_norm": 0.3472057282924652, "learning_rate": 5.7601687044891925e-05, "loss": 0.1126, "step": 12150 }, { "epoch": 317.90849673202615, "grad_norm": 0.2875531315803528, "learning_rate": 5.745199525550596e-05, "loss": 0.1153, "step": 12160 }, { "epoch": 318.16993464052285, "grad_norm": 0.3130854070186615, "learning_rate": 5.730241979074025e-05, "loss": 0.1143, "step": 12170 }, { "epoch": 318.4313725490196, "grad_norm": 0.27834898233413696, "learning_rate": 5.71529610595285e-05, "loss": 0.1129, "step": 12180 }, { "epoch": 318.6928104575163, "grad_norm": 0.37341535091400146, "learning_rate": 5.7003619470485016e-05, "loss": 0.1133, "step": 12190 }, { "epoch": 318.9542483660131, "grad_norm": 0.31970056891441345, "learning_rate": 5.6854395431904094e-05, "loss": 0.1151, "step": 12200 }, { "epoch": 319.2156862745098, "grad_norm": 0.3472343683242798, "learning_rate": 5.6705289351758584e-05, "loss": 0.1109, "step": 12210 }, { "epoch": 319.47712418300654, "grad_norm": 0.3212395906448364, "learning_rate": 5.6556301637698785e-05, "loss": 0.1119, "step": 12220 }, { "epoch": 319.73856209150324, "grad_norm": 0.40940025448799133, "learning_rate": 5.6407432697051424e-05, "loss": 0.1174, "step": 12230 }, { "epoch": 320.0, "grad_norm": 0.3223443925380707, "learning_rate": 5.625868293681844e-05, "loss": 0.1155, "step": 12240 }, { "epoch": 320.26143790849676, "grad_norm": 0.295582115650177, "learning_rate": 5.611005276367605e-05, "loss": 0.1145, "step": 12250 }, { "epoch": 320.52287581699346, "grad_norm": 0.3024216592311859, "learning_rate": 5.596154258397353e-05, "loss": 0.1104, "step": 12260 }, { "epoch": 320.7843137254902, "grad_norm": 0.3048156797885895, "learning_rate": 5.581315280373195e-05, "loss": 0.1134, "step": 12270 }, { "epoch": 321.0457516339869, "grad_norm": 0.256906121969223, "learning_rate": 5.566488382864334e-05, "loss": 0.1156, "step": 12280 }, { "epoch": 321.3071895424837, "grad_norm": 0.2839376628398895, "learning_rate": 5.55167360640694e-05, "loss": 0.1141, "step": 12290 }, { "epoch": 321.5686274509804, "grad_norm": 0.27784302830696106, "learning_rate": 5.536870991504044e-05, "loss": 0.1135, "step": 12300 }, { "epoch": 321.83006535947715, "grad_norm": 0.2698685824871063, "learning_rate": 5.522080578625438e-05, "loss": 0.1134, "step": 12310 }, { "epoch": 322.09150326797385, "grad_norm": 0.3085765838623047, "learning_rate": 5.507302408207542e-05, "loss": 0.1138, "step": 12320 }, { "epoch": 322.3529411764706, "grad_norm": 0.3435112237930298, "learning_rate": 5.492536520653307e-05, "loss": 0.1124, "step": 12330 }, { "epoch": 322.6143790849673, "grad_norm": 0.3543654680252075, "learning_rate": 5.4777829563321046e-05, "loss": 0.1152, "step": 12340 }, { "epoch": 322.87581699346407, "grad_norm": 0.2766146957874298, "learning_rate": 5.463041755579619e-05, "loss": 0.1144, "step": 12350 }, { "epoch": 323.1372549019608, "grad_norm": 0.3165183961391449, "learning_rate": 5.4483129586977386e-05, "loss": 0.1154, "step": 12360 }, { "epoch": 323.39869281045753, "grad_norm": 0.3891643285751343, "learning_rate": 5.433596605954415e-05, "loss": 0.1137, "step": 12370 }, { "epoch": 323.66013071895424, "grad_norm": 0.28057435154914856, "learning_rate": 5.4188927375836074e-05, "loss": 0.1116, "step": 12380 }, { "epoch": 323.921568627451, "grad_norm": 0.2857229709625244, "learning_rate": 5.404201393785122e-05, "loss": 0.1143, "step": 12390 }, { "epoch": 324.1830065359477, "grad_norm": 0.31078898906707764, "learning_rate": 5.389522614724536e-05, "loss": 0.113, "step": 12400 }, { "epoch": 324.44444444444446, "grad_norm": 0.2750803530216217, "learning_rate": 5.374856440533078e-05, "loss": 0.1129, "step": 12410 }, { "epoch": 324.70588235294116, "grad_norm": 0.3001578748226166, "learning_rate": 5.360202911307493e-05, "loss": 0.1167, "step": 12420 }, { "epoch": 324.9673202614379, "grad_norm": 0.2825237810611725, "learning_rate": 5.345562067109984e-05, "loss": 0.1131, "step": 12430 }, { "epoch": 325.2287581699346, "grad_norm": 0.40779075026512146, "learning_rate": 5.3309339479680485e-05, "loss": 0.1165, "step": 12440 }, { "epoch": 325.4901960784314, "grad_norm": 0.30739492177963257, "learning_rate": 5.316318593874415e-05, "loss": 0.1128, "step": 12450 }, { "epoch": 325.7516339869281, "grad_norm": 0.2763489782810211, "learning_rate": 5.301716044786902e-05, "loss": 0.1109, "step": 12460 }, { "epoch": 326.01307189542484, "grad_norm": 0.3952416479587555, "learning_rate": 5.287126340628312e-05, "loss": 0.1146, "step": 12470 }, { "epoch": 326.27450980392155, "grad_norm": 0.36667054891586304, "learning_rate": 5.2725495212863494e-05, "loss": 0.1158, "step": 12480 }, { "epoch": 326.5359477124183, "grad_norm": 0.2867089807987213, "learning_rate": 5.25798562661348e-05, "loss": 0.1112, "step": 12490 }, { "epoch": 326.797385620915, "grad_norm": 0.39078038930892944, "learning_rate": 5.2434346964268344e-05, "loss": 0.1159, "step": 12500 }, { "epoch": 327.05882352941177, "grad_norm": 0.30490151047706604, "learning_rate": 5.2288967705081e-05, "loss": 0.116, "step": 12510 }, { "epoch": 327.32026143790847, "grad_norm": 0.2912214696407318, "learning_rate": 5.214371888603409e-05, "loss": 0.1097, "step": 12520 }, { "epoch": 327.58169934640523, "grad_norm": 0.32746198773384094, "learning_rate": 5.199860090423233e-05, "loss": 0.1136, "step": 12530 }, { "epoch": 327.84313725490193, "grad_norm": 0.48329511284828186, "learning_rate": 5.185361415642283e-05, "loss": 0.1163, "step": 12540 }, { "epoch": 328.1045751633987, "grad_norm": 0.34038567543029785, "learning_rate": 5.170875903899375e-05, "loss": 0.1136, "step": 12550 }, { "epoch": 328.36601307189545, "grad_norm": 0.3485356569290161, "learning_rate": 5.1564035947973456e-05, "loss": 0.1106, "step": 12560 }, { "epoch": 328.62745098039215, "grad_norm": 0.3116021156311035, "learning_rate": 5.141944527902932e-05, "loss": 0.1147, "step": 12570 }, { "epoch": 328.8888888888889, "grad_norm": 0.32443079352378845, "learning_rate": 5.127498742746675e-05, "loss": 0.1181, "step": 12580 }, { "epoch": 329.1503267973856, "grad_norm": 0.28348246216773987, "learning_rate": 5.113066278822807e-05, "loss": 0.1125, "step": 12590 }, { "epoch": 329.4117647058824, "grad_norm": 0.30532312393188477, "learning_rate": 5.098647175589118e-05, "loss": 0.1122, "step": 12600 }, { "epoch": 329.6732026143791, "grad_norm": 0.37380146980285645, "learning_rate": 5.084241472466897e-05, "loss": 0.1149, "step": 12610 }, { "epoch": 329.93464052287584, "grad_norm": 0.2732062339782715, "learning_rate": 5.069849208840779e-05, "loss": 0.1124, "step": 12620 }, { "epoch": 330.19607843137254, "grad_norm": 0.3351595997810364, "learning_rate": 5.055470424058666e-05, "loss": 0.1137, "step": 12630 }, { "epoch": 330.4575163398693, "grad_norm": 0.33784759044647217, "learning_rate": 5.041105157431616e-05, "loss": 0.114, "step": 12640 }, { "epoch": 330.718954248366, "grad_norm": 0.30338507890701294, "learning_rate": 5.026753448233703e-05, "loss": 0.1135, "step": 12650 }, { "epoch": 330.98039215686276, "grad_norm": 0.35373762249946594, "learning_rate": 5.012415335701962e-05, "loss": 0.1145, "step": 12660 }, { "epoch": 331.24183006535947, "grad_norm": 0.3398035764694214, "learning_rate": 4.99809085903624e-05, "loss": 0.1119, "step": 12670 }, { "epoch": 331.5032679738562, "grad_norm": 0.3283422291278839, "learning_rate": 4.983780057399111e-05, "loss": 0.1151, "step": 12680 }, { "epoch": 331.7647058823529, "grad_norm": 0.30405548214912415, "learning_rate": 4.9694829699157695e-05, "loss": 0.1136, "step": 12690 }, { "epoch": 332.0261437908497, "grad_norm": 0.3022817373275757, "learning_rate": 4.9551996356738915e-05, "loss": 0.1128, "step": 12700 }, { "epoch": 332.2875816993464, "grad_norm": 0.3224383592605591, "learning_rate": 4.940930093723578e-05, "loss": 0.1129, "step": 12710 }, { "epoch": 332.54901960784315, "grad_norm": 0.3512856662273407, "learning_rate": 4.9266743830772034e-05, "loss": 0.1125, "step": 12720 }, { "epoch": 332.81045751633985, "grad_norm": 0.32787182927131653, "learning_rate": 4.9124325427093455e-05, "loss": 0.1132, "step": 12730 }, { "epoch": 333.0718954248366, "grad_norm": 0.29392367601394653, "learning_rate": 4.898204611556647e-05, "loss": 0.1154, "step": 12740 }, { "epoch": 333.3333333333333, "grad_norm": 0.30844447016716003, "learning_rate": 4.883990628517725e-05, "loss": 0.112, "step": 12750 }, { "epoch": 333.5947712418301, "grad_norm": 0.38730964064598083, "learning_rate": 4.869790632453075e-05, "loss": 0.1178, "step": 12760 }, { "epoch": 333.8562091503268, "grad_norm": 0.3621443808078766, "learning_rate": 4.8556046621849346e-05, "loss": 0.1114, "step": 12770 }, { "epoch": 334.11764705882354, "grad_norm": 0.31759655475616455, "learning_rate": 4.841432756497214e-05, "loss": 0.1159, "step": 12780 }, { "epoch": 334.37908496732024, "grad_norm": 0.2881384789943695, "learning_rate": 4.827274954135358e-05, "loss": 0.1123, "step": 12790 }, { "epoch": 334.640522875817, "grad_norm": 0.3464260697364807, "learning_rate": 4.813131293806253e-05, "loss": 0.1125, "step": 12800 }, { "epoch": 334.9019607843137, "grad_norm": 0.29048338532447815, "learning_rate": 4.7990018141781344e-05, "loss": 0.1154, "step": 12810 }, { "epoch": 335.16339869281046, "grad_norm": 0.301886647939682, "learning_rate": 4.7848865538804535e-05, "loss": 0.1129, "step": 12820 }, { "epoch": 335.42483660130716, "grad_norm": 0.3119642436504364, "learning_rate": 4.770785551503798e-05, "loss": 0.1118, "step": 12830 }, { "epoch": 335.6862745098039, "grad_norm": 0.32431304454803467, "learning_rate": 4.756698845599769e-05, "loss": 0.1127, "step": 12840 }, { "epoch": 335.9477124183006, "grad_norm": 0.41362830996513367, "learning_rate": 4.7426264746808755e-05, "loss": 0.1163, "step": 12850 }, { "epoch": 336.2091503267974, "grad_norm": 0.2932046055793762, "learning_rate": 4.728568477220453e-05, "loss": 0.1129, "step": 12860 }, { "epoch": 336.47058823529414, "grad_norm": 0.32170718908309937, "learning_rate": 4.714524891652524e-05, "loss": 0.1123, "step": 12870 }, { "epoch": 336.73202614379085, "grad_norm": 0.32445117831230164, "learning_rate": 4.7004957563717134e-05, "loss": 0.1132, "step": 12880 }, { "epoch": 336.9934640522876, "grad_norm": 0.39626502990722656, "learning_rate": 4.686481109733146e-05, "loss": 0.1167, "step": 12890 }, { "epoch": 337.2549019607843, "grad_norm": 0.2989867925643921, "learning_rate": 4.6724809900523256e-05, "loss": 0.1099, "step": 12900 }, { "epoch": 337.51633986928107, "grad_norm": 0.303310364484787, "learning_rate": 4.658495435605051e-05, "loss": 0.1151, "step": 12910 }, { "epoch": 337.77777777777777, "grad_norm": 0.28980472683906555, "learning_rate": 4.6445244846272916e-05, "loss": 0.1143, "step": 12920 }, { "epoch": 338.03921568627453, "grad_norm": 0.2834646999835968, "learning_rate": 4.630568175315088e-05, "loss": 0.1138, "step": 12930 }, { "epoch": 338.30065359477123, "grad_norm": 0.2876114249229431, "learning_rate": 4.6166265458244665e-05, "loss": 0.1142, "step": 12940 }, { "epoch": 338.562091503268, "grad_norm": 0.35517698526382446, "learning_rate": 4.6026996342713e-05, "loss": 0.1116, "step": 12950 }, { "epoch": 338.8235294117647, "grad_norm": 0.310722678899765, "learning_rate": 4.588787478731242e-05, "loss": 0.1145, "step": 12960 }, { "epoch": 339.08496732026146, "grad_norm": 0.2860526740550995, "learning_rate": 4.574890117239592e-05, "loss": 0.1144, "step": 12970 }, { "epoch": 339.34640522875816, "grad_norm": 0.3224795162677765, "learning_rate": 4.5610075877912e-05, "loss": 0.1144, "step": 12980 }, { "epoch": 339.6078431372549, "grad_norm": 0.3138015866279602, "learning_rate": 4.5471399283403784e-05, "loss": 0.1126, "step": 12990 }, { "epoch": 339.8692810457516, "grad_norm": 0.3215549886226654, "learning_rate": 4.533287176800772e-05, "loss": 0.114, "step": 13000 }, { "epoch": 340.1307189542484, "grad_norm": 0.2976275384426117, "learning_rate": 4.5194493710452825e-05, "loss": 0.1131, "step": 13010 }, { "epoch": 340.3921568627451, "grad_norm": 0.3737821877002716, "learning_rate": 4.505626548905938e-05, "loss": 0.1141, "step": 13020 }, { "epoch": 340.65359477124184, "grad_norm": 0.3438400328159332, "learning_rate": 4.491818748173804e-05, "loss": 0.1133, "step": 13030 }, { "epoch": 340.91503267973854, "grad_norm": 0.34674420952796936, "learning_rate": 4.478026006598885e-05, "loss": 0.1134, "step": 13040 }, { "epoch": 341.1764705882353, "grad_norm": 0.31675341725349426, "learning_rate": 4.464248361890006e-05, "loss": 0.112, "step": 13050 }, { "epoch": 341.437908496732, "grad_norm": 0.2978508770465851, "learning_rate": 4.4504858517147265e-05, "loss": 0.1124, "step": 13060 }, { "epoch": 341.69934640522877, "grad_norm": 0.32540374994277954, "learning_rate": 4.43673851369922e-05, "loss": 0.1104, "step": 13070 }, { "epoch": 341.96078431372547, "grad_norm": 0.3203536570072174, "learning_rate": 4.423006385428181e-05, "loss": 0.119, "step": 13080 }, { "epoch": 342.22222222222223, "grad_norm": 0.3339461386203766, "learning_rate": 4.409289504444732e-05, "loss": 0.1154, "step": 13090 }, { "epoch": 342.48366013071893, "grad_norm": 0.30102136731147766, "learning_rate": 4.3955879082502926e-05, "loss": 0.1116, "step": 13100 }, { "epoch": 342.7450980392157, "grad_norm": 0.31064385175704956, "learning_rate": 4.381901634304512e-05, "loss": 0.1145, "step": 13110 }, { "epoch": 343.0065359477124, "grad_norm": 0.30831557512283325, "learning_rate": 4.368230720025137e-05, "loss": 0.1121, "step": 13120 }, { "epoch": 343.26797385620915, "grad_norm": 0.31142985820770264, "learning_rate": 4.35457520278792e-05, "loss": 0.1136, "step": 13130 }, { "epoch": 343.52941176470586, "grad_norm": 0.2939104735851288, "learning_rate": 4.340935119926534e-05, "loss": 0.1129, "step": 13140 }, { "epoch": 343.7908496732026, "grad_norm": 0.30027836561203003, "learning_rate": 4.327310508732437e-05, "loss": 0.1144, "step": 13150 }, { "epoch": 344.0522875816994, "grad_norm": 0.25800076127052307, "learning_rate": 4.3137014064547965e-05, "loss": 0.1144, "step": 13160 }, { "epoch": 344.3137254901961, "grad_norm": 0.3547254204750061, "learning_rate": 4.3001078503003825e-05, "loss": 0.1115, "step": 13170 }, { "epoch": 344.57516339869284, "grad_norm": 0.2877454459667206, "learning_rate": 4.286529877433453e-05, "loss": 0.1131, "step": 13180 }, { "epoch": 344.83660130718954, "grad_norm": 0.36441680788993835, "learning_rate": 4.272967524975673e-05, "loss": 0.1127, "step": 13190 }, { "epoch": 345.0980392156863, "grad_norm": 0.3238193988800049, "learning_rate": 4.2594208300059946e-05, "loss": 0.1185, "step": 13200 }, { "epoch": 345.359477124183, "grad_norm": 0.2932418882846832, "learning_rate": 4.245889829560559e-05, "loss": 0.1134, "step": 13210 }, { "epoch": 345.62091503267976, "grad_norm": 0.2501135766506195, "learning_rate": 4.232374560632614e-05, "loss": 0.108, "step": 13220 }, { "epoch": 345.88235294117646, "grad_norm": 0.39940959215164185, "learning_rate": 4.218875060172379e-05, "loss": 0.1159, "step": 13230 }, { "epoch": 346.1437908496732, "grad_norm": 0.30277755856513977, "learning_rate": 4.2053913650869816e-05, "loss": 0.1145, "step": 13240 }, { "epoch": 346.4052287581699, "grad_norm": 0.2736481726169586, "learning_rate": 4.191923512240327e-05, "loss": 0.1114, "step": 13250 }, { "epoch": 346.6666666666667, "grad_norm": 0.3326358199119568, "learning_rate": 4.1784715384530035e-05, "loss": 0.1131, "step": 13260 }, { "epoch": 346.9281045751634, "grad_norm": 0.31444019079208374, "learning_rate": 4.165035480502204e-05, "loss": 0.1164, "step": 13270 }, { "epoch": 347.18954248366015, "grad_norm": 0.3125884532928467, "learning_rate": 4.1516153751215895e-05, "loss": 0.1157, "step": 13280 }, { "epoch": 347.45098039215685, "grad_norm": 0.3291109800338745, "learning_rate": 4.138211259001222e-05, "loss": 0.1151, "step": 13290 }, { "epoch": 347.7124183006536, "grad_norm": 0.385786771774292, "learning_rate": 4.1248231687874414e-05, "loss": 0.1105, "step": 13300 }, { "epoch": 347.9738562091503, "grad_norm": 0.32350867986679077, "learning_rate": 4.1114511410827714e-05, "loss": 0.1148, "step": 13310 }, { "epoch": 348.2352941176471, "grad_norm": 0.2844417691230774, "learning_rate": 4.098095212445831e-05, "loss": 0.113, "step": 13320 }, { "epoch": 348.4967320261438, "grad_norm": 0.298141747713089, "learning_rate": 4.084755419391213e-05, "loss": 0.1118, "step": 13330 }, { "epoch": 348.75816993464053, "grad_norm": 0.2766071856021881, "learning_rate": 4.071431798389408e-05, "loss": 0.1134, "step": 13340 }, { "epoch": 349.01960784313724, "grad_norm": 0.3159619867801666, "learning_rate": 4.058124385866685e-05, "loss": 0.1138, "step": 13350 }, { "epoch": 349.281045751634, "grad_norm": 0.2848956286907196, "learning_rate": 4.044833218204998e-05, "loss": 0.1128, "step": 13360 }, { "epoch": 349.5424836601307, "grad_norm": 0.28767091035842896, "learning_rate": 4.031558331741897e-05, "loss": 0.1114, "step": 13370 }, { "epoch": 349.80392156862746, "grad_norm": 0.30374428629875183, "learning_rate": 4.01829976277041e-05, "loss": 0.1143, "step": 13380 }, { "epoch": 350.06535947712416, "grad_norm": 0.2949487864971161, "learning_rate": 4.005057547538964e-05, "loss": 0.1147, "step": 13390 }, { "epoch": 350.3267973856209, "grad_norm": 0.2627740502357483, "learning_rate": 3.991831722251268e-05, "loss": 0.1116, "step": 13400 }, { "epoch": 350.5882352941176, "grad_norm": 0.3931275010108948, "learning_rate": 3.978622323066217e-05, "loss": 0.1147, "step": 13410 }, { "epoch": 350.8496732026144, "grad_norm": 0.27783629298210144, "learning_rate": 3.965429386097813e-05, "loss": 0.1137, "step": 13420 }, { "epoch": 351.1111111111111, "grad_norm": 0.29457733035087585, "learning_rate": 3.952252947415038e-05, "loss": 0.1134, "step": 13430 }, { "epoch": 351.37254901960785, "grad_norm": 0.27213922142982483, "learning_rate": 3.9390930430417696e-05, "loss": 0.1098, "step": 13440 }, { "epoch": 351.63398692810455, "grad_norm": 0.2786455452442169, "learning_rate": 3.925949708956689e-05, "loss": 0.1134, "step": 13450 }, { "epoch": 351.8954248366013, "grad_norm": 0.326171338558197, "learning_rate": 3.9128229810931626e-05, "loss": 0.1145, "step": 13460 }, { "epoch": 352.15686274509807, "grad_norm": 0.2971991002559662, "learning_rate": 3.8997128953391727e-05, "loss": 0.1141, "step": 13470 }, { "epoch": 352.41830065359477, "grad_norm": 0.3068319261074066, "learning_rate": 3.886619487537187e-05, "loss": 0.1101, "step": 13480 }, { "epoch": 352.67973856209153, "grad_norm": 0.285197377204895, "learning_rate": 3.873542793484081e-05, "loss": 0.1134, "step": 13490 }, { "epoch": 352.94117647058823, "grad_norm": 0.30382540822029114, "learning_rate": 3.860482848931042e-05, "loss": 0.1161, "step": 13500 }, { "epoch": 353.202614379085, "grad_norm": 0.32687294483184814, "learning_rate": 3.847439689583454e-05, "loss": 0.1113, "step": 13510 }, { "epoch": 353.4640522875817, "grad_norm": 0.27805617451667786, "learning_rate": 3.834413351100823e-05, "loss": 0.1139, "step": 13520 }, { "epoch": 353.72549019607845, "grad_norm": 0.33675503730773926, "learning_rate": 3.821403869096658e-05, "loss": 0.1166, "step": 13530 }, { "epoch": 353.98692810457516, "grad_norm": 0.3418455123901367, "learning_rate": 3.808411279138383e-05, "loss": 0.1113, "step": 13540 }, { "epoch": 354.2483660130719, "grad_norm": 0.2951265275478363, "learning_rate": 3.7954356167472485e-05, "loss": 0.1149, "step": 13550 }, { "epoch": 354.5098039215686, "grad_norm": 0.3082883358001709, "learning_rate": 3.782476917398213e-05, "loss": 0.1125, "step": 13560 }, { "epoch": 354.7712418300654, "grad_norm": 0.30738532543182373, "learning_rate": 3.7695352165198774e-05, "loss": 0.1139, "step": 13570 }, { "epoch": 355.0326797385621, "grad_norm": 0.28504759073257446, "learning_rate": 3.7566105494943435e-05, "loss": 0.1122, "step": 13580 }, { "epoch": 355.29411764705884, "grad_norm": 0.2820607125759125, "learning_rate": 3.743702951657163e-05, "loss": 0.1104, "step": 13590 }, { "epoch": 355.55555555555554, "grad_norm": 0.38755741715431213, "learning_rate": 3.730812458297222e-05, "loss": 0.1149, "step": 13600 }, { "epoch": 355.8169934640523, "grad_norm": 0.31920456886291504, "learning_rate": 3.717939104656626e-05, "loss": 0.1128, "step": 13610 }, { "epoch": 356.078431372549, "grad_norm": 0.4119099974632263, "learning_rate": 3.7050829259306466e-05, "loss": 0.1142, "step": 13620 }, { "epoch": 356.33986928104576, "grad_norm": 0.31791460514068604, "learning_rate": 3.692243957267568e-05, "loss": 0.1095, "step": 13630 }, { "epoch": 356.60130718954247, "grad_norm": 0.24925312399864197, "learning_rate": 3.679422233768651e-05, "loss": 0.1179, "step": 13640 }, { "epoch": 356.8627450980392, "grad_norm": 0.3441096246242523, "learning_rate": 3.6666177904879994e-05, "loss": 0.1141, "step": 13650 }, { "epoch": 357.12418300653593, "grad_norm": 0.2764681577682495, "learning_rate": 3.655108595056173e-05, "loss": 0.1124, "step": 13660 }, { "epoch": 357.3856209150327, "grad_norm": 0.29715630412101746, "learning_rate": 3.6423370805949876e-05, "loss": 0.1119, "step": 13670 }, { "epoch": 357.6470588235294, "grad_norm": 0.282524049282074, "learning_rate": 3.629582947741461e-05, "loss": 0.1155, "step": 13680 }, { "epoch": 357.90849673202615, "grad_norm": 0.3714596629142761, "learning_rate": 3.616846231364902e-05, "loss": 0.1121, "step": 13690 }, { "epoch": 358.16993464052285, "grad_norm": 0.2508104145526886, "learning_rate": 3.604126966287004e-05, "loss": 0.1124, "step": 13700 }, { "epoch": 358.4313725490196, "grad_norm": 0.30074048042297363, "learning_rate": 3.591425187281756e-05, "loss": 0.1143, "step": 13710 }, { "epoch": 358.6928104575163, "grad_norm": 0.3082398474216461, "learning_rate": 3.578740929075333e-05, "loss": 0.1139, "step": 13720 }, { "epoch": 358.9542483660131, "grad_norm": 0.3137090504169464, "learning_rate": 3.5660742263460203e-05, "loss": 0.1116, "step": 13730 }, { "epoch": 359.2156862745098, "grad_norm": 0.33088791370391846, "learning_rate": 3.553425113724088e-05, "loss": 0.1108, "step": 13740 }, { "epoch": 359.47712418300654, "grad_norm": 0.29915428161621094, "learning_rate": 3.5407936257917326e-05, "loss": 0.1142, "step": 13750 }, { "epoch": 359.73856209150324, "grad_norm": 0.2895696461200714, "learning_rate": 3.5281797970829635e-05, "loss": 0.1125, "step": 13760 }, { "epoch": 360.0, "grad_norm": 0.33440282940864563, "learning_rate": 3.5155836620835006e-05, "loss": 0.115, "step": 13770 }, { "epoch": 360.26143790849676, "grad_norm": 0.3408808708190918, "learning_rate": 3.5030052552307044e-05, "loss": 0.1107, "step": 13780 }, { "epoch": 360.52287581699346, "grad_norm": 0.35789361596107483, "learning_rate": 3.490444610913447e-05, "loss": 0.1147, "step": 13790 }, { "epoch": 360.7843137254902, "grad_norm": 0.30055010318756104, "learning_rate": 3.477901763472057e-05, "loss": 0.1123, "step": 13800 }, { "epoch": 361.0457516339869, "grad_norm": 0.3059723973274231, "learning_rate": 3.465376747198203e-05, "loss": 0.1129, "step": 13810 }, { "epoch": 361.3071895424837, "grad_norm": 0.32120776176452637, "learning_rate": 3.452869596334798e-05, "loss": 0.1127, "step": 13820 }, { "epoch": 361.5686274509804, "grad_norm": 0.3302242159843445, "learning_rate": 3.440380345075915e-05, "loss": 0.1121, "step": 13830 }, { "epoch": 361.83006535947715, "grad_norm": 0.3632891774177551, "learning_rate": 3.427909027566688e-05, "loss": 0.1118, "step": 13840 }, { "epoch": 362.09150326797385, "grad_norm": 0.26599353551864624, "learning_rate": 3.415455677903224e-05, "loss": 0.1149, "step": 13850 }, { "epoch": 362.3529411764706, "grad_norm": 0.29619720578193665, "learning_rate": 3.403020330132509e-05, "loss": 0.1107, "step": 13860 }, { "epoch": 362.6143790849673, "grad_norm": 0.3457578718662262, "learning_rate": 3.3906030182523077e-05, "loss": 0.1157, "step": 13870 }, { "epoch": 362.87581699346407, "grad_norm": 0.33504214882850647, "learning_rate": 3.378203776211075e-05, "loss": 0.1117, "step": 13880 }, { "epoch": 363.1372549019608, "grad_norm": 0.3943881392478943, "learning_rate": 3.365822637907862e-05, "loss": 0.1158, "step": 13890 }, { "epoch": 363.39869281045753, "grad_norm": 0.4457724392414093, "learning_rate": 3.353459637192231e-05, "loss": 0.1122, "step": 13900 }, { "epoch": 363.66013071895424, "grad_norm": 0.3329622745513916, "learning_rate": 3.341114807864158e-05, "loss": 0.1119, "step": 13910 }, { "epoch": 363.921568627451, "grad_norm": 0.3475494682788849, "learning_rate": 3.328788183673932e-05, "loss": 0.1142, "step": 13920 }, { "epoch": 364.1830065359477, "grad_norm": 0.2990402281284332, "learning_rate": 3.316479798322072e-05, "loss": 0.1116, "step": 13930 }, { "epoch": 364.44444444444446, "grad_norm": 0.24483919143676758, "learning_rate": 3.3041896854592305e-05, "loss": 0.1128, "step": 13940 }, { "epoch": 364.70588235294116, "grad_norm": 0.31257379055023193, "learning_rate": 3.2919178786861104e-05, "loss": 0.1143, "step": 13950 }, { "epoch": 364.9673202614379, "grad_norm": 0.3180519938468933, "learning_rate": 3.279664411553368e-05, "loss": 0.1138, "step": 13960 }, { "epoch": 365.2287581699346, "grad_norm": 0.32743024826049805, "learning_rate": 3.267429317561504e-05, "loss": 0.1112, "step": 13970 }, { "epoch": 365.4901960784314, "grad_norm": 0.3621465861797333, "learning_rate": 3.2552126301608043e-05, "loss": 0.1136, "step": 13980 }, { "epoch": 365.7516339869281, "grad_norm": 0.3021661639213562, "learning_rate": 3.243014382751224e-05, "loss": 0.1142, "step": 13990 }, { "epoch": 366.01307189542484, "grad_norm": 0.4712045192718506, "learning_rate": 3.230834608682305e-05, "loss": 0.1143, "step": 14000 }, { "epoch": 366.27450980392155, "grad_norm": 0.28367525339126587, "learning_rate": 3.218673341253092e-05, "loss": 0.1144, "step": 14010 }, { "epoch": 366.5359477124183, "grad_norm": 0.36586108803749084, "learning_rate": 3.206530613712014e-05, "loss": 0.1121, "step": 14020 }, { "epoch": 366.797385620915, "grad_norm": 0.2866863012313843, "learning_rate": 3.194406459256833e-05, "loss": 0.1137, "step": 14030 }, { "epoch": 367.05882352941177, "grad_norm": 0.31462156772613525, "learning_rate": 3.182300911034518e-05, "loss": 0.1129, "step": 14040 }, { "epoch": 367.32026143790847, "grad_norm": 0.2943307161331177, "learning_rate": 3.17021400214118e-05, "loss": 0.1101, "step": 14050 }, { "epoch": 367.58169934640523, "grad_norm": 0.31919369101524353, "learning_rate": 3.158145765621971e-05, "loss": 0.1132, "step": 14060 }, { "epoch": 367.84313725490193, "grad_norm": 0.31484705209732056, "learning_rate": 3.1460962344709774e-05, "loss": 0.1127, "step": 14070 }, { "epoch": 368.1045751633987, "grad_norm": 0.3020099997520447, "learning_rate": 3.1340654416311656e-05, "loss": 0.1153, "step": 14080 }, { "epoch": 368.36601307189545, "grad_norm": 0.29994770884513855, "learning_rate": 3.1220534199942585e-05, "loss": 0.113, "step": 14090 }, { "epoch": 368.62745098039215, "grad_norm": 0.28115540742874146, "learning_rate": 3.1100602024006707e-05, "loss": 0.1115, "step": 14100 }, { "epoch": 368.8888888888889, "grad_norm": 0.37485307455062866, "learning_rate": 3.098085821639398e-05, "loss": 0.1172, "step": 14110 }, { "epoch": 369.1503267973856, "grad_norm": 0.3369976282119751, "learning_rate": 3.086130310447937e-05, "loss": 0.11, "step": 14120 }, { "epoch": 369.4117647058824, "grad_norm": 0.2802003026008606, "learning_rate": 3.074193701512204e-05, "loss": 0.1113, "step": 14130 }, { "epoch": 369.6732026143791, "grad_norm": 0.3496047854423523, "learning_rate": 3.0622760274664275e-05, "loss": 0.1167, "step": 14140 }, { "epoch": 369.93464052287584, "grad_norm": 0.2986941933631897, "learning_rate": 3.0503773208930787e-05, "loss": 0.1118, "step": 14150 }, { "epoch": 370.19607843137254, "grad_norm": 0.2768370807170868, "learning_rate": 3.038497614322763e-05, "loss": 0.1123, "step": 14160 }, { "epoch": 370.4575163398693, "grad_norm": 0.3114430010318756, "learning_rate": 3.0266369402341433e-05, "loss": 0.1152, "step": 14170 }, { "epoch": 370.718954248366, "grad_norm": 0.30271750688552856, "learning_rate": 3.0147953310538546e-05, "loss": 0.1107, "step": 14180 }, { "epoch": 370.98039215686276, "grad_norm": 0.33484479784965515, "learning_rate": 3.0029728191563977e-05, "loss": 0.112, "step": 14190 }, { "epoch": 371.24183006535947, "grad_norm": 0.428725004196167, "learning_rate": 2.9911694368640764e-05, "loss": 0.1123, "step": 14200 }, { "epoch": 371.5032679738562, "grad_norm": 0.3206990361213684, "learning_rate": 2.9793852164468826e-05, "loss": 0.1128, "step": 14210 }, { "epoch": 371.7647058823529, "grad_norm": 0.3497907817363739, "learning_rate": 2.9676201901224233e-05, "loss": 0.1133, "step": 14220 }, { "epoch": 372.0261437908497, "grad_norm": 0.3005002439022064, "learning_rate": 2.955874390055836e-05, "loss": 0.1127, "step": 14230 }, { "epoch": 372.2875816993464, "grad_norm": 0.2785640358924866, "learning_rate": 2.9441478483596862e-05, "loss": 0.1137, "step": 14240 }, { "epoch": 372.54901960784315, "grad_norm": 0.33614808320999146, "learning_rate": 2.9324405970938906e-05, "loss": 0.1105, "step": 14250 }, { "epoch": 372.81045751633985, "grad_norm": 0.2854107618331909, "learning_rate": 2.9207526682656306e-05, "loss": 0.1153, "step": 14260 }, { "epoch": 373.0718954248366, "grad_norm": 0.25472962856292725, "learning_rate": 2.909084093829252e-05, "loss": 0.1101, "step": 14270 }, { "epoch": 373.3333333333333, "grad_norm": 0.34902864694595337, "learning_rate": 2.897434905686198e-05, "loss": 0.1105, "step": 14280 }, { "epoch": 373.5947712418301, "grad_norm": 0.2810913622379303, "learning_rate": 2.8858051356849014e-05, "loss": 0.1148, "step": 14290 }, { "epoch": 373.8562091503268, "grad_norm": 0.34702831506729126, "learning_rate": 2.8741948156207056e-05, "loss": 0.1137, "step": 14300 }, { "epoch": 374.11764705882354, "grad_norm": 0.31400611996650696, "learning_rate": 2.8626039772357882e-05, "loss": 0.1122, "step": 14310 }, { "epoch": 374.37908496732024, "grad_norm": 0.3254755139350891, "learning_rate": 2.8510326522190545e-05, "loss": 0.1113, "step": 14320 }, { "epoch": 374.640522875817, "grad_norm": 0.31944742798805237, "learning_rate": 2.8394808722060696e-05, "loss": 0.1145, "step": 14330 }, { "epoch": 374.9019607843137, "grad_norm": 0.30928805470466614, "learning_rate": 2.8279486687789558e-05, "loss": 0.1135, "step": 14340 }, { "epoch": 375.16339869281046, "grad_norm": 0.3214901089668274, "learning_rate": 2.8164360734663142e-05, "loss": 0.111, "step": 14350 }, { "epoch": 375.42483660130716, "grad_norm": 0.2930542230606079, "learning_rate": 2.8049431177431486e-05, "loss": 0.1139, "step": 14360 }, { "epoch": 375.6862745098039, "grad_norm": 0.3097141683101654, "learning_rate": 2.7934698330307518e-05, "loss": 0.1127, "step": 14370 }, { "epoch": 375.9477124183006, "grad_norm": 0.33316555619239807, "learning_rate": 2.782016250696655e-05, "loss": 0.1139, "step": 14380 }, { "epoch": 376.2091503267974, "grad_norm": 0.3537690043449402, "learning_rate": 2.77058240205451e-05, "loss": 0.1126, "step": 14390 }, { "epoch": 376.47058823529414, "grad_norm": 0.35879310965538025, "learning_rate": 2.7591683183640215e-05, "loss": 0.1145, "step": 14400 }, { "epoch": 376.73202614379085, "grad_norm": 0.3203032314777374, "learning_rate": 2.7477740308308618e-05, "loss": 0.1093, "step": 14410 }, { "epoch": 376.9934640522876, "grad_norm": 0.3569747507572174, "learning_rate": 2.7363995706065737e-05, "loss": 0.1143, "step": 14420 }, { "epoch": 377.2549019607843, "grad_norm": 0.33088165521621704, "learning_rate": 2.7250449687885028e-05, "loss": 0.1112, "step": 14430 }, { "epoch": 377.51633986928107, "grad_norm": 0.4784368872642517, "learning_rate": 2.7137102564196937e-05, "loss": 0.1136, "step": 14440 }, { "epoch": 377.77777777777777, "grad_norm": 0.39264166355133057, "learning_rate": 2.702395464488814e-05, "loss": 0.115, "step": 14450 }, { "epoch": 378.03921568627453, "grad_norm": 0.3187940716743469, "learning_rate": 2.6911006239300794e-05, "loss": 0.1105, "step": 14460 }, { "epoch": 378.30065359477123, "grad_norm": 0.30897989869117737, "learning_rate": 2.6798257656231464e-05, "loss": 0.1127, "step": 14470 }, { "epoch": 378.562091503268, "grad_norm": 0.40761929750442505, "learning_rate": 2.668570920393052e-05, "loss": 0.1139, "step": 14480 }, { "epoch": 378.8235294117647, "grad_norm": 0.32026803493499756, "learning_rate": 2.657336119010112e-05, "loss": 0.1136, "step": 14490 }, { "epoch": 379.08496732026146, "grad_norm": 0.30582597851753235, "learning_rate": 2.646121392189841e-05, "loss": 0.112, "step": 14500 }, { "epoch": 379.34640522875816, "grad_norm": 0.2957599461078644, "learning_rate": 2.6349267705928793e-05, "loss": 0.1105, "step": 14510 }, { "epoch": 379.6078431372549, "grad_norm": 0.2888094484806061, "learning_rate": 2.623752284824893e-05, "loss": 0.1119, "step": 14520 }, { "epoch": 379.8692810457516, "grad_norm": 0.32123002409935, "learning_rate": 2.6125979654364952e-05, "loss": 0.1138, "step": 14530 }, { "epoch": 380.1307189542484, "grad_norm": 0.2851218283176422, "learning_rate": 2.601463842923175e-05, "loss": 0.1113, "step": 14540 }, { "epoch": 380.3921568627451, "grad_norm": 0.31959566473960876, "learning_rate": 2.5903499477251936e-05, "loss": 0.1136, "step": 14550 }, { "epoch": 380.65359477124184, "grad_norm": 0.32848477363586426, "learning_rate": 2.5792563102275213e-05, "loss": 0.1107, "step": 14560 }, { "epoch": 380.91503267973854, "grad_norm": 0.31065839529037476, "learning_rate": 2.568182960759735e-05, "loss": 0.1144, "step": 14570 }, { "epoch": 381.1764705882353, "grad_norm": 0.3056698739528656, "learning_rate": 2.5571299295959496e-05, "loss": 0.1127, "step": 14580 }, { "epoch": 381.437908496732, "grad_norm": 0.29230251908302307, "learning_rate": 2.546097246954734e-05, "loss": 0.1102, "step": 14590 }, { "epoch": 381.69934640522877, "grad_norm": 0.3203261196613312, "learning_rate": 2.5350849429990152e-05, "loss": 0.113, "step": 14600 }, { "epoch": 381.96078431372547, "grad_norm": 0.406775563955307, "learning_rate": 2.524093047836018e-05, "loss": 0.1152, "step": 14610 }, { "epoch": 382.22222222222223, "grad_norm": 0.2875988781452179, "learning_rate": 2.5131215915171624e-05, "loss": 0.113, "step": 14620 }, { "epoch": 382.48366013071893, "grad_norm": 0.29914501309394836, "learning_rate": 2.5021706040379854e-05, "loss": 0.1104, "step": 14630 }, { "epoch": 382.7450980392157, "grad_norm": 0.3158552944660187, "learning_rate": 2.4912401153380772e-05, "loss": 0.1152, "step": 14640 }, { "epoch": 383.0065359477124, "grad_norm": 0.45473095774650574, "learning_rate": 2.4803301553009694e-05, "loss": 0.112, "step": 14650 }, { "epoch": 383.26797385620915, "grad_norm": 0.32687652111053467, "learning_rate": 2.4694407537540808e-05, "loss": 0.1119, "step": 14660 }, { "epoch": 383.52941176470586, "grad_norm": 0.3493711054325104, "learning_rate": 2.4585719404686192e-05, "loss": 0.1152, "step": 14670 }, { "epoch": 383.7908496732026, "grad_norm": 0.3303303122520447, "learning_rate": 2.4477237451595004e-05, "loss": 0.1105, "step": 14680 }, { "epoch": 384.0522875816994, "grad_norm": 0.29483675956726074, "learning_rate": 2.436896197485282e-05, "loss": 0.1136, "step": 14690 }, { "epoch": 384.3137254901961, "grad_norm": 0.28735411167144775, "learning_rate": 2.42608932704806e-05, "loss": 0.1104, "step": 14700 }, { "epoch": 384.57516339869284, "grad_norm": 0.30859243869781494, "learning_rate": 2.415303163393412e-05, "loss": 0.1133, "step": 14710 }, { "epoch": 384.83660130718954, "grad_norm": 0.32482388615608215, "learning_rate": 2.404537736010295e-05, "loss": 0.1131, "step": 14720 }, { "epoch": 385.0980392156863, "grad_norm": 0.32856839895248413, "learning_rate": 2.3937930743309723e-05, "loss": 0.113, "step": 14730 }, { "epoch": 385.359477124183, "grad_norm": 0.3382302224636078, "learning_rate": 2.3830692077309446e-05, "loss": 0.111, "step": 14740 }, { "epoch": 385.62091503267976, "grad_norm": 0.4078975319862366, "learning_rate": 2.3723661655288487e-05, "loss": 0.1147, "step": 14750 }, { "epoch": 385.88235294117646, "grad_norm": 0.32090675830841064, "learning_rate": 2.3616839769863984e-05, "loss": 0.1121, "step": 14760 }, { "epoch": 386.1437908496732, "grad_norm": 0.3056178092956543, "learning_rate": 2.351022671308287e-05, "loss": 0.1116, "step": 14770 }, { "epoch": 386.4052287581699, "grad_norm": 0.33630117774009705, "learning_rate": 2.3403822776421135e-05, "loss": 0.113, "step": 14780 }, { "epoch": 386.6666666666667, "grad_norm": 0.3036194145679474, "learning_rate": 2.3297628250783154e-05, "loss": 0.1124, "step": 14790 }, { "epoch": 386.9281045751634, "grad_norm": 0.35577064752578735, "learning_rate": 2.3191643426500653e-05, "loss": 0.113, "step": 14800 }, { "epoch": 387.18954248366015, "grad_norm": 0.3300045430660248, "learning_rate": 2.3085868593332073e-05, "loss": 0.1126, "step": 14810 }, { "epoch": 387.45098039215685, "grad_norm": 0.28813156485557556, "learning_rate": 2.298030404046183e-05, "loss": 0.1106, "step": 14820 }, { "epoch": 387.7124183006536, "grad_norm": 0.40409666299819946, "learning_rate": 2.2874950056499324e-05, "loss": 0.118, "step": 14830 }, { "epoch": 387.9738562091503, "grad_norm": 0.3458053171634674, "learning_rate": 2.2769806929478377e-05, "loss": 0.1093, "step": 14840 }, { "epoch": 388.2352941176471, "grad_norm": 0.3175845742225647, "learning_rate": 2.266487494685625e-05, "loss": 0.1122, "step": 14850 }, { "epoch": 388.4967320261438, "grad_norm": 0.31592434644699097, "learning_rate": 2.2560154395512967e-05, "loss": 0.115, "step": 14860 }, { "epoch": 388.75816993464053, "grad_norm": 0.31762829422950745, "learning_rate": 2.245564556175056e-05, "loss": 0.1127, "step": 14870 }, { "epoch": 389.01960784313724, "grad_norm": 0.27463746070861816, "learning_rate": 2.235134873129213e-05, "loss": 0.1095, "step": 14880 }, { "epoch": 389.281045751634, "grad_norm": 0.2556680738925934, "learning_rate": 2.2247264189281304e-05, "loss": 0.1127, "step": 14890 }, { "epoch": 389.5424836601307, "grad_norm": 0.3717378079891205, "learning_rate": 2.214339222028119e-05, "loss": 0.1127, "step": 14900 }, { "epoch": 389.80392156862746, "grad_norm": 0.33163613080978394, "learning_rate": 2.2039733108273774e-05, "loss": 0.1129, "step": 14910 }, { "epoch": 390.06535947712416, "grad_norm": 0.33852529525756836, "learning_rate": 2.193628713665916e-05, "loss": 0.1126, "step": 14920 }, { "epoch": 390.3267973856209, "grad_norm": 0.38463908433914185, "learning_rate": 2.183305458825464e-05, "loss": 0.1107, "step": 14930 }, { "epoch": 390.5882352941176, "grad_norm": 0.29137659072875977, "learning_rate": 2.1730035745294098e-05, "loss": 0.1135, "step": 14940 }, { "epoch": 390.8496732026144, "grad_norm": 0.3932526111602783, "learning_rate": 2.1627230889427096e-05, "loss": 0.1138, "step": 14950 }, { "epoch": 391.1111111111111, "grad_norm": 0.30873677134513855, "learning_rate": 2.1524640301718167e-05, "loss": 0.1129, "step": 14960 }, { "epoch": 391.37254901960785, "grad_norm": 0.28704315423965454, "learning_rate": 2.1422264262646097e-05, "loss": 0.1134, "step": 14970 }, { "epoch": 391.63398692810455, "grad_norm": 0.3116617798805237, "learning_rate": 2.1320103052103024e-05, "loss": 0.1113, "step": 14980 }, { "epoch": 391.8954248366013, "grad_norm": 0.3618609607219696, "learning_rate": 2.1218156949393853e-05, "loss": 0.1132, "step": 14990 }, { "epoch": 392.15686274509807, "grad_norm": 0.30935177206993103, "learning_rate": 2.111642623323531e-05, "loss": 0.112, "step": 15000 }, { "epoch": 392.41830065359477, "grad_norm": 0.32789334654808044, "learning_rate": 2.1014911181755247e-05, "loss": 0.1127, "step": 15010 }, { "epoch": 392.67973856209153, "grad_norm": 0.3063357472419739, "learning_rate": 2.0913612072492006e-05, "loss": 0.1144, "step": 15020 }, { "epoch": 392.94117647058823, "grad_norm": 0.3251248896121979, "learning_rate": 2.0812529182393424e-05, "loss": 0.1107, "step": 15030 }, { "epoch": 393.202614379085, "grad_norm": 0.31256797909736633, "learning_rate": 2.071166278781632e-05, "loss": 0.11, "step": 15040 }, { "epoch": 393.4640522875817, "grad_norm": 0.29990172386169434, "learning_rate": 2.061101316452554e-05, "loss": 0.1126, "step": 15050 }, { "epoch": 393.72549019607845, "grad_norm": 0.34786394238471985, "learning_rate": 2.0510580587693273e-05, "loss": 0.1133, "step": 15060 }, { "epoch": 393.98692810457516, "grad_norm": 0.33727478981018066, "learning_rate": 2.0410365331898416e-05, "loss": 0.1145, "step": 15070 }, { "epoch": 394.2483660130719, "grad_norm": 0.31517842411994934, "learning_rate": 2.0310367671125618e-05, "loss": 0.1113, "step": 15080 }, { "epoch": 394.5098039215686, "grad_norm": 0.36189743876457214, "learning_rate": 2.021058787876464e-05, "loss": 0.1151, "step": 15090 }, { "epoch": 394.7712418300654, "grad_norm": 0.3063863515853882, "learning_rate": 2.0111026227609675e-05, "loss": 0.1124, "step": 15100 }, { "epoch": 395.0326797385621, "grad_norm": 0.3760160207748413, "learning_rate": 2.0011682989858427e-05, "loss": 0.1107, "step": 15110 }, { "epoch": 395.29411764705884, "grad_norm": 0.31587451696395874, "learning_rate": 1.991255843711156e-05, "loss": 0.1103, "step": 15120 }, { "epoch": 395.55555555555554, "grad_norm": 0.3024122416973114, "learning_rate": 1.98136528403718e-05, "loss": 0.1093, "step": 15130 }, { "epoch": 395.8169934640523, "grad_norm": 0.32457295060157776, "learning_rate": 1.971496647004324e-05, "loss": 0.1158, "step": 15140 }, { "epoch": 396.078431372549, "grad_norm": 0.3319365978240967, "learning_rate": 1.9616499595930692e-05, "loss": 0.1151, "step": 15150 }, { "epoch": 396.33986928104576, "grad_norm": 0.33728495240211487, "learning_rate": 1.9518252487238797e-05, "loss": 0.1118, "step": 15160 }, { "epoch": 396.60130718954247, "grad_norm": 0.34540119767189026, "learning_rate": 1.9420225412571435e-05, "loss": 0.1121, "step": 15170 }, { "epoch": 396.8627450980392, "grad_norm": 0.30559542775154114, "learning_rate": 1.9322418639930863e-05, "loss": 0.1125, "step": 15180 }, { "epoch": 397.12418300653593, "grad_norm": 0.3552384376525879, "learning_rate": 1.9224832436717045e-05, "loss": 0.1133, "step": 15190 }, { "epoch": 397.3856209150327, "grad_norm": 0.38219109177589417, "learning_rate": 1.912746706972697e-05, "loss": 0.1122, "step": 15200 }, { "epoch": 397.6470588235294, "grad_norm": 0.38125985860824585, "learning_rate": 1.90303228051538e-05, "loss": 0.112, "step": 15210 }, { "epoch": 397.90849673202615, "grad_norm": 0.3336564600467682, "learning_rate": 1.89333999085863e-05, "loss": 0.1127, "step": 15220 }, { "epoch": 398.16993464052285, "grad_norm": 0.29126253724098206, "learning_rate": 1.8836698645007888e-05, "loss": 0.1115, "step": 15230 }, { "epoch": 398.4313725490196, "grad_norm": 0.4003722667694092, "learning_rate": 1.8740219278796167e-05, "loss": 0.1117, "step": 15240 }, { "epoch": 398.6928104575163, "grad_norm": 0.348296582698822, "learning_rate": 1.8643962073722064e-05, "loss": 0.1133, "step": 15250 }, { "epoch": 398.9542483660131, "grad_norm": 0.37177759408950806, "learning_rate": 1.854792729294905e-05, "loss": 0.1121, "step": 15260 }, { "epoch": 399.2156862745098, "grad_norm": 0.3493458032608032, "learning_rate": 1.8452115199032638e-05, "loss": 0.1116, "step": 15270 }, { "epoch": 399.47712418300654, "grad_norm": 0.30324786901474, "learning_rate": 1.835652605391931e-05, "loss": 0.1116, "step": 15280 }, { "epoch": 399.73856209150324, "grad_norm": 0.2836083471775055, "learning_rate": 1.826116011894621e-05, "loss": 0.111, "step": 15290 }, { "epoch": 400.0, "grad_norm": 0.3711930513381958, "learning_rate": 1.8166017654840184e-05, "loss": 0.115, "step": 15300 }, { "epoch": 400.26143790849676, "grad_norm": 0.3117862343788147, "learning_rate": 1.8071098921717033e-05, "loss": 0.1129, "step": 15310 }, { "epoch": 400.52287581699346, "grad_norm": 0.3351728916168213, "learning_rate": 1.797640417908104e-05, "loss": 0.1117, "step": 15320 }, { "epoch": 400.7843137254902, "grad_norm": 0.36364924907684326, "learning_rate": 1.7881933685823905e-05, "loss": 0.1146, "step": 15330 }, { "epoch": 401.0457516339869, "grad_norm": 0.30129241943359375, "learning_rate": 1.7787687700224397e-05, "loss": 0.1101, "step": 15340 }, { "epoch": 401.3071895424837, "grad_norm": 0.29976898431777954, "learning_rate": 1.769366647994748e-05, "loss": 0.1131, "step": 15350 }, { "epoch": 401.5686274509804, "grad_norm": 0.28711673617362976, "learning_rate": 1.7599870282043552e-05, "loss": 0.1131, "step": 15360 }, { "epoch": 401.83006535947715, "grad_norm": 0.32417982816696167, "learning_rate": 1.750629936294782e-05, "loss": 0.112, "step": 15370 }, { "epoch": 402.09150326797385, "grad_norm": 0.2811816930770874, "learning_rate": 1.7412953978479595e-05, "loss": 0.1132, "step": 15380 }, { "epoch": 402.3529411764706, "grad_norm": 0.3149554431438446, "learning_rate": 1.7319834383841616e-05, "loss": 0.11, "step": 15390 }, { "epoch": 402.6143790849673, "grad_norm": 0.3484453856945038, "learning_rate": 1.7226940833619322e-05, "loss": 0.111, "step": 15400 }, { "epoch": 402.87581699346407, "grad_norm": 0.3713397681713104, "learning_rate": 1.7134273581780113e-05, "loss": 0.1151, "step": 15410 }, { "epoch": 403.1372549019608, "grad_norm": 0.36278796195983887, "learning_rate": 1.7041832881672703e-05, "loss": 0.1136, "step": 15420 }, { "epoch": 403.39869281045753, "grad_norm": 0.29296210408210754, "learning_rate": 1.6949618986026416e-05, "loss": 0.1099, "step": 15430 }, { "epoch": 403.66013071895424, "grad_norm": 0.32640260457992554, "learning_rate": 1.6857632146950564e-05, "loss": 0.1118, "step": 15440 }, { "epoch": 403.921568627451, "grad_norm": 0.3738747537136078, "learning_rate": 1.6765872615933677e-05, "loss": 0.1151, "step": 15450 }, { "epoch": 404.1830065359477, "grad_norm": 0.3174304962158203, "learning_rate": 1.6674340643842733e-05, "loss": 0.1092, "step": 15460 }, { "epoch": 404.44444444444446, "grad_norm": 0.3067731261253357, "learning_rate": 1.6583036480922697e-05, "loss": 0.1133, "step": 15470 }, { "epoch": 404.70588235294116, "grad_norm": 0.3003155589103699, "learning_rate": 1.6491960376795635e-05, "loss": 0.1139, "step": 15480 }, { "epoch": 404.9673202614379, "grad_norm": 0.3668583631515503, "learning_rate": 1.6401112580460167e-05, "loss": 0.1122, "step": 15490 }, { "epoch": 405.2287581699346, "grad_norm": 0.3048345148563385, "learning_rate": 1.6310493340290723e-05, "loss": 0.1122, "step": 15500 }, { "epoch": 405.4901960784314, "grad_norm": 0.308072566986084, "learning_rate": 1.622010290403677e-05, "loss": 0.1172, "step": 15510 }, { "epoch": 405.7516339869281, "grad_norm": 0.3233634829521179, "learning_rate": 1.6129941518822366e-05, "loss": 0.1103, "step": 15520 }, { "epoch": 406.01307189542484, "grad_norm": 0.34668582677841187, "learning_rate": 1.6040009431145266e-05, "loss": 0.1088, "step": 15530 }, { "epoch": 406.27450980392155, "grad_norm": 0.4414779841899872, "learning_rate": 1.5950306886876366e-05, "loss": 0.1117, "step": 15540 }, { "epoch": 406.5359477124183, "grad_norm": 0.3160878121852875, "learning_rate": 1.586083413125906e-05, "loss": 0.1113, "step": 15550 }, { "epoch": 406.797385620915, "grad_norm": 0.3153967261314392, "learning_rate": 1.577159140890835e-05, "loss": 0.1136, "step": 15560 }, { "epoch": 407.05882352941177, "grad_norm": 0.29611125588417053, "learning_rate": 1.568257896381049e-05, "loss": 0.1119, "step": 15570 }, { "epoch": 407.32026143790847, "grad_norm": 0.3124426603317261, "learning_rate": 1.5593797039322076e-05, "loss": 0.1136, "step": 15580 }, { "epoch": 407.58169934640523, "grad_norm": 0.29556897282600403, "learning_rate": 1.5505245878169528e-05, "loss": 0.1133, "step": 15590 }, { "epoch": 407.84313725490193, "grad_norm": 0.3371742069721222, "learning_rate": 1.541692572244833e-05, "loss": 0.1084, "step": 15600 }, { "epoch": 408.1045751633987, "grad_norm": 0.3488721549510956, "learning_rate": 1.5328836813622393e-05, "loss": 0.1134, "step": 15610 }, { "epoch": 408.36601307189545, "grad_norm": 0.31725990772247314, "learning_rate": 1.5240979392523458e-05, "loss": 0.113, "step": 15620 }, { "epoch": 408.62745098039215, "grad_norm": 0.37460392713546753, "learning_rate": 1.5153353699350337e-05, "loss": 0.1115, "step": 15630 }, { "epoch": 408.8888888888889, "grad_norm": 0.3097977936267853, "learning_rate": 1.5065959973668353e-05, "loss": 0.1125, "step": 15640 }, { "epoch": 409.1503267973856, "grad_norm": 0.27604126930236816, "learning_rate": 1.4978798454408605e-05, "loss": 0.1116, "step": 15650 }, { "epoch": 409.4117647058824, "grad_norm": 0.30955255031585693, "learning_rate": 1.489186937986734e-05, "loss": 0.11, "step": 15660 }, { "epoch": 409.6732026143791, "grad_norm": 0.310028076171875, "learning_rate": 1.4805172987705362e-05, "loss": 0.1135, "step": 15670 }, { "epoch": 409.93464052287584, "grad_norm": 0.3490021824836731, "learning_rate": 1.471870951494726e-05, "loss": 0.1142, "step": 15680 }, { "epoch": 410.19607843137254, "grad_norm": 0.40562987327575684, "learning_rate": 1.4641091730943024e-05, "loss": 0.1104, "step": 15690 }, { "epoch": 410.4575163398693, "grad_norm": 0.33675485849380493, "learning_rate": 1.4555071455773993e-05, "loss": 0.1106, "step": 15700 }, { "epoch": 410.718954248366, "grad_norm": 0.3096622824668884, "learning_rate": 1.4469284783776893e-05, "loss": 0.1133, "step": 15710 }, { "epoch": 410.98039215686276, "grad_norm": 0.3052927851676941, "learning_rate": 1.43837319494892e-05, "loss": 0.1142, "step": 15720 }, { "epoch": 411.24183006535947, "grad_norm": 0.3547520935535431, "learning_rate": 1.4298413186809123e-05, "loss": 0.1137, "step": 15730 }, { "epoch": 411.5032679738562, "grad_norm": 0.3303002417087555, "learning_rate": 1.4213328728994857e-05, "loss": 0.1126, "step": 15740 }, { "epoch": 411.7647058823529, "grad_norm": 0.3191388249397278, "learning_rate": 1.4128478808664125e-05, "loss": 0.1101, "step": 15750 }, { "epoch": 412.0261437908497, "grad_norm": 0.29005515575408936, "learning_rate": 1.4043863657793332e-05, "loss": 0.1122, "step": 15760 }, { "epoch": 412.2875816993464, "grad_norm": 0.33250725269317627, "learning_rate": 1.3959483507717042e-05, "loss": 0.1104, "step": 15770 }, { "epoch": 412.54901960784315, "grad_norm": 0.2780681252479553, "learning_rate": 1.3875338589127418e-05, "loss": 0.1114, "step": 15780 }, { "epoch": 412.81045751633985, "grad_norm": 0.35173699259757996, "learning_rate": 1.3791429132073408e-05, "loss": 0.1113, "step": 15790 }, { "epoch": 413.0718954248366, "grad_norm": 0.3443647027015686, "learning_rate": 1.3707755365960317e-05, "loss": 0.1156, "step": 15800 }, { "epoch": 413.3333333333333, "grad_norm": 0.2871156930923462, "learning_rate": 1.3624317519548979e-05, "loss": 0.1131, "step": 15810 }, { "epoch": 413.5947712418301, "grad_norm": 0.26214319467544556, "learning_rate": 1.3541115820955285e-05, "loss": 0.1083, "step": 15820 }, { "epoch": 413.8562091503268, "grad_norm": 0.3014472723007202, "learning_rate": 1.3458150497649525e-05, "loss": 0.1142, "step": 15830 }, { "epoch": 414.11764705882354, "grad_norm": 0.3216531276702881, "learning_rate": 1.3375421776455699e-05, "loss": 0.1124, "step": 15840 }, { "epoch": 414.37908496732024, "grad_norm": 0.3679593503475189, "learning_rate": 1.3292929883550998e-05, "loss": 0.1134, "step": 15850 }, { "epoch": 414.640522875817, "grad_norm": 0.2999366819858551, "learning_rate": 1.3210675044465103e-05, "loss": 0.1124, "step": 15860 }, { "epoch": 414.9019607843137, "grad_norm": 0.34971389174461365, "learning_rate": 1.3128657484079566e-05, "loss": 0.1131, "step": 15870 }, { "epoch": 415.16339869281046, "grad_norm": 0.3751276433467865, "learning_rate": 1.3046877426627313e-05, "loss": 0.1118, "step": 15880 }, { "epoch": 415.42483660130716, "grad_norm": 0.32183781266212463, "learning_rate": 1.2965335095691889e-05, "loss": 0.1129, "step": 15890 }, { "epoch": 415.6862745098039, "grad_norm": 0.35736966133117676, "learning_rate": 1.2884030714206874e-05, "loss": 0.1109, "step": 15900 }, { "epoch": 415.9477124183006, "grad_norm": 0.3015005588531494, "learning_rate": 1.2802964504455395e-05, "loss": 0.1124, "step": 15910 }, { "epoch": 416.2091503267974, "grad_norm": 0.34793102741241455, "learning_rate": 1.272213668806933e-05, "loss": 0.1146, "step": 15920 }, { "epoch": 416.47058823529414, "grad_norm": 0.3514239490032196, "learning_rate": 1.2641547486028882e-05, "loss": 0.1106, "step": 15930 }, { "epoch": 416.73202614379085, "grad_norm": 0.31007125973701477, "learning_rate": 1.2561197118661828e-05, "loss": 0.1102, "step": 15940 }, { "epoch": 416.9934640522876, "grad_norm": 0.31926625967025757, "learning_rate": 1.2481085805643e-05, "loss": 0.1129, "step": 15950 }, { "epoch": 417.2549019607843, "grad_norm": 0.3109895884990692, "learning_rate": 1.2401213765993691e-05, "loss": 0.1124, "step": 15960 }, { "epoch": 417.51633986928107, "grad_norm": 0.33066773414611816, "learning_rate": 1.2321581218080979e-05, "loss": 0.112, "step": 15970 }, { "epoch": 417.77777777777777, "grad_norm": 0.4178493618965149, "learning_rate": 1.2242188379617236e-05, "loss": 0.1121, "step": 15980 }, { "epoch": 418.03921568627453, "grad_norm": 0.3649527430534363, "learning_rate": 1.2163035467659444e-05, "loss": 0.1127, "step": 15990 }, { "epoch": 418.30065359477123, "grad_norm": 0.29703032970428467, "learning_rate": 1.2084122698608625e-05, "loss": 0.1118, "step": 16000 }, { "epoch": 418.562091503268, "grad_norm": 0.31829434633255005, "learning_rate": 1.2005450288209297e-05, "loss": 0.1123, "step": 16010 }, { "epoch": 418.8235294117647, "grad_norm": 0.3175608217716217, "learning_rate": 1.1927018451548811e-05, "loss": 0.1096, "step": 16020 }, { "epoch": 419.08496732026146, "grad_norm": 0.30831390619277954, "learning_rate": 1.1848827403056828e-05, "loss": 0.1153, "step": 16030 }, { "epoch": 419.34640522875816, "grad_norm": 0.3015325665473938, "learning_rate": 1.1770877356504683e-05, "loss": 0.1087, "step": 16040 }, { "epoch": 419.6078431372549, "grad_norm": 0.362508624792099, "learning_rate": 1.1693168525004805e-05, "loss": 0.113, "step": 16050 }, { "epoch": 419.8692810457516, "grad_norm": 0.36422449350357056, "learning_rate": 1.1615701121010214e-05, "loss": 0.1133, "step": 16060 }, { "epoch": 420.1307189542484, "grad_norm": 0.27966681122779846, "learning_rate": 1.1538475356313794e-05, "loss": 0.1118, "step": 16070 }, { "epoch": 420.3921568627451, "grad_norm": 0.362490713596344, "learning_rate": 1.1461491442047878e-05, "loss": 0.1122, "step": 16080 }, { "epoch": 420.65359477124184, "grad_norm": 0.3337494432926178, "learning_rate": 1.138474958868352e-05, "loss": 0.11, "step": 16090 }, { "epoch": 420.91503267973854, "grad_norm": 0.3513115346431732, "learning_rate": 1.1308250006029997e-05, "loss": 0.1138, "step": 16100 }, { "epoch": 421.1764705882353, "grad_norm": 0.28625595569610596, "learning_rate": 1.123199290323429e-05, "loss": 0.1115, "step": 16110 }, { "epoch": 421.437908496732, "grad_norm": 0.3301337957382202, "learning_rate": 1.1155978488780384e-05, "loss": 0.1113, "step": 16120 }, { "epoch": 421.69934640522877, "grad_norm": 0.42644599080085754, "learning_rate": 1.1080206970488793e-05, "loss": 0.1157, "step": 16130 }, { "epoch": 421.96078431372547, "grad_norm": 0.35698893666267395, "learning_rate": 1.1004678555515957e-05, "loss": 0.1112, "step": 16140 }, { "epoch": 422.22222222222223, "grad_norm": 0.37550118565559387, "learning_rate": 1.0929393450353654e-05, "loss": 0.1127, "step": 16150 }, { "epoch": 422.48366013071893, "grad_norm": 0.31585901975631714, "learning_rate": 1.0854351860828527e-05, "loss": 0.1109, "step": 16160 }, { "epoch": 422.7450980392157, "grad_norm": 0.3062879741191864, "learning_rate": 1.0779553992101387e-05, "loss": 0.1123, "step": 16170 }, { "epoch": 423.0065359477124, "grad_norm": 0.38038599491119385, "learning_rate": 1.0705000048666735e-05, "loss": 0.1117, "step": 16180 }, { "epoch": 423.26797385620915, "grad_norm": 0.30192792415618896, "learning_rate": 1.0630690234352259e-05, "loss": 0.1096, "step": 16190 }, { "epoch": 423.52941176470586, "grad_norm": 0.2793295383453369, "learning_rate": 1.0556624752318101e-05, "loss": 0.1139, "step": 16200 }, { "epoch": 423.7908496732026, "grad_norm": 0.35322046279907227, "learning_rate": 1.0482803805056507e-05, "loss": 0.1133, "step": 16210 }, { "epoch": 424.0522875816994, "grad_norm": 0.33232808113098145, "learning_rate": 1.0409227594391102e-05, "loss": 0.1096, "step": 16220 }, { "epoch": 424.3137254901961, "grad_norm": 0.2976301908493042, "learning_rate": 1.0335896321476413e-05, "loss": 0.1115, "step": 16230 }, { "epoch": 424.57516339869284, "grad_norm": 0.35135358572006226, "learning_rate": 1.0262810186797389e-05, "loss": 0.1116, "step": 16240 }, { "epoch": 424.83660130718954, "grad_norm": 0.31133124232292175, "learning_rate": 1.0189969390168696e-05, "loss": 0.1139, "step": 16250 }, { "epoch": 425.0980392156863, "grad_norm": 0.3206573724746704, "learning_rate": 1.0117374130734314e-05, "loss": 0.1106, "step": 16260 }, { "epoch": 425.359477124183, "grad_norm": 0.29747602343559265, "learning_rate": 1.0045024606966902e-05, "loss": 0.1112, "step": 16270 }, { "epoch": 425.62091503267976, "grad_norm": 0.3697657585144043, "learning_rate": 9.972921016667269e-06, "loss": 0.1128, "step": 16280 }, { "epoch": 425.88235294117646, "grad_norm": 0.3043394088745117, "learning_rate": 9.90106355696393e-06, "loss": 0.1128, "step": 16290 }, { "epoch": 426.1437908496732, "grad_norm": 0.34919267892837524, "learning_rate": 9.82945242431238e-06, "loss": 0.111, "step": 16300 }, { "epoch": 426.4052287581699, "grad_norm": 0.3754013478755951, "learning_rate": 9.758087814494764e-06, "loss": 0.1133, "step": 16310 }, { "epoch": 426.6666666666667, "grad_norm": 0.2977403402328491, "learning_rate": 9.686969922619193e-06, "loss": 0.1112, "step": 16320 }, { "epoch": 426.9281045751634, "grad_norm": 0.27013343572616577, "learning_rate": 9.616098943119234e-06, "loss": 0.1104, "step": 16330 }, { "epoch": 427.18954248366015, "grad_norm": 0.35326704382896423, "learning_rate": 9.545475069753484e-06, "loss": 0.1113, "step": 16340 }, { "epoch": 427.45098039215685, "grad_norm": 0.39166802167892456, "learning_rate": 9.475098495604884e-06, "loss": 0.1082, "step": 16350 }, { "epoch": 427.7124183006536, "grad_norm": 0.3371388614177704, "learning_rate": 9.404969413080322e-06, "loss": 0.1146, "step": 16360 }, { "epoch": 427.9738562091503, "grad_norm": 0.3773285448551178, "learning_rate": 9.335088013910021e-06, "loss": 0.116, "step": 16370 }, { "epoch": 428.2352941176471, "grad_norm": 0.31114351749420166, "learning_rate": 9.265454489147052e-06, "loss": 0.1115, "step": 16380 }, { "epoch": 428.4967320261438, "grad_norm": 0.31893211603164673, "learning_rate": 9.196069029166831e-06, "loss": 0.1146, "step": 16390 }, { "epoch": 428.75816993464053, "grad_norm": 0.3122820556163788, "learning_rate": 9.126931823666517e-06, "loss": 0.1105, "step": 16400 }, { "epoch": 429.01960784313724, "grad_norm": 0.41064098477363586, "learning_rate": 9.058043061664655e-06, "loss": 0.1125, "step": 16410 }, { "epoch": 429.281045751634, "grad_norm": 0.30506137013435364, "learning_rate": 8.989402931500434e-06, "loss": 0.1094, "step": 16420 }, { "epoch": 429.5424836601307, "grad_norm": 0.298270583152771, "learning_rate": 8.921011620833364e-06, "loss": 0.1134, "step": 16430 }, { "epoch": 429.80392156862746, "grad_norm": 0.3039408326148987, "learning_rate": 8.852869316642688e-06, "loss": 0.1129, "step": 16440 }, { "epoch": 430.06535947712416, "grad_norm": 0.33438700437545776, "learning_rate": 8.78497620522687e-06, "loss": 0.1124, "step": 16450 }, { "epoch": 430.3267973856209, "grad_norm": 0.3313377797603607, "learning_rate": 8.717332472203033e-06, "loss": 0.1095, "step": 16460 }, { "epoch": 430.5882352941176, "grad_norm": 0.3241206705570221, "learning_rate": 8.649938302506633e-06, "loss": 0.1133, "step": 16470 }, { "epoch": 430.8496732026144, "grad_norm": 0.3204852342605591, "learning_rate": 8.582793880390693e-06, "loss": 0.1111, "step": 16480 }, { "epoch": 431.1111111111111, "grad_norm": 0.315046489238739, "learning_rate": 8.515899389425542e-06, "loss": 0.1135, "step": 16490 }, { "epoch": 431.37254901960785, "grad_norm": 0.3156817555427551, "learning_rate": 8.449255012498148e-06, "loss": 0.1084, "step": 16500 }, { "epoch": 431.63398692810455, "grad_norm": 0.33890920877456665, "learning_rate": 8.382860931811687e-06, "loss": 0.113, "step": 16510 }, { "epoch": 431.8954248366013, "grad_norm": 0.3318261206150055, "learning_rate": 8.31671732888506e-06, "loss": 0.1128, "step": 16520 }, { "epoch": 432.15686274509807, "grad_norm": 0.3495106101036072, "learning_rate": 8.250824384552314e-06, "loss": 0.1134, "step": 16530 }, { "epoch": 432.41830065359477, "grad_norm": 0.3283674120903015, "learning_rate": 8.185182278962288e-06, "loss": 0.112, "step": 16540 }, { "epoch": 432.67973856209153, "grad_norm": 0.3081677556037903, "learning_rate": 8.119791191577975e-06, "loss": 0.112, "step": 16550 }, { "epoch": 432.94117647058823, "grad_norm": 0.3371969759464264, "learning_rate": 8.054651301176087e-06, "loss": 0.1108, "step": 16560 }, { "epoch": 433.202614379085, "grad_norm": 0.3539562225341797, "learning_rate": 7.989762785846633e-06, "loss": 0.1116, "step": 16570 }, { "epoch": 433.4640522875817, "grad_norm": 0.36196842789649963, "learning_rate": 7.925125822992307e-06, "loss": 0.1129, "step": 16580 }, { "epoch": 433.72549019607845, "grad_norm": 0.3191837966442108, "learning_rate": 7.860740589328142e-06, "loss": 0.1118, "step": 16590 }, { "epoch": 433.98692810457516, "grad_norm": 0.31484946608543396, "learning_rate": 7.796607260880839e-06, "loss": 0.1124, "step": 16600 }, { "epoch": 434.2483660130719, "grad_norm": 0.33817651867866516, "learning_rate": 7.73272601298851e-06, "loss": 0.1107, "step": 16610 }, { "epoch": 434.5098039215686, "grad_norm": 0.3488980531692505, "learning_rate": 7.669097020300064e-06, "loss": 0.1137, "step": 16620 }, { "epoch": 434.7712418300654, "grad_norm": 0.378192275762558, "learning_rate": 7.605720456774701e-06, "loss": 0.1099, "step": 16630 }, { "epoch": 435.0326797385621, "grad_norm": 0.4347194731235504, "learning_rate": 7.542596495681575e-06, "loss": 0.1138, "step": 16640 }, { "epoch": 435.29411764705884, "grad_norm": 0.314876526594162, "learning_rate": 7.479725309599117e-06, "loss": 0.1113, "step": 16650 }, { "epoch": 435.55555555555554, "grad_norm": 0.3719530999660492, "learning_rate": 7.417107070414786e-06, "loss": 0.1121, "step": 16660 }, { "epoch": 435.8169934640523, "grad_norm": 0.31676945090293884, "learning_rate": 7.354741949324473e-06, "loss": 0.1122, "step": 16670 }, { "epoch": 436.078431372549, "grad_norm": 0.3606952726840973, "learning_rate": 7.292630116832011e-06, "loss": 0.1109, "step": 16680 }, { "epoch": 436.33986928104576, "grad_norm": 0.36602726578712463, "learning_rate": 7.23077174274881e-06, "loss": 0.1135, "step": 16690 }, { "epoch": 436.60130718954247, "grad_norm": 0.359847754240036, "learning_rate": 7.169166996193255e-06, "loss": 0.108, "step": 16700 }, { "epoch": 436.8627450980392, "grad_norm": 0.3841437101364136, "learning_rate": 7.1078160455903875e-06, "loss": 0.1148, "step": 16710 }, { "epoch": 437.12418300653593, "grad_norm": 0.3049101233482361, "learning_rate": 7.0467190586713915e-06, "loss": 0.11, "step": 16720 }, { "epoch": 437.3856209150327, "grad_norm": 0.32839474081993103, "learning_rate": 6.985876202473085e-06, "loss": 0.1117, "step": 16730 }, { "epoch": 437.6470588235294, "grad_norm": 0.3973640501499176, "learning_rate": 6.925287643337497e-06, "loss": 0.1119, "step": 16740 }, { "epoch": 437.90849673202615, "grad_norm": 0.404038667678833, "learning_rate": 6.864953546911424e-06, "loss": 0.1126, "step": 16750 }, { "epoch": 438.16993464052285, "grad_norm": 0.44833749532699585, "learning_rate": 6.8048740781460065e-06, "loss": 0.1101, "step": 16760 }, { "epoch": 438.4313725490196, "grad_norm": 0.35082849860191345, "learning_rate": 6.7450494012962326e-06, "loss": 0.1105, "step": 16770 }, { "epoch": 438.6928104575163, "grad_norm": 0.3201982080936432, "learning_rate": 6.685479679920459e-06, "loss": 0.1132, "step": 16780 }, { "epoch": 438.9542483660131, "grad_norm": 0.3554164171218872, "learning_rate": 6.626165076880031e-06, "loss": 0.1115, "step": 16790 }, { "epoch": 439.2156862745098, "grad_norm": 0.4029858410358429, "learning_rate": 6.5671057543387985e-06, "loss": 0.1135, "step": 16800 }, { "epoch": 439.47712418300654, "grad_norm": 0.3851124346256256, "learning_rate": 6.508301873762712e-06, "loss": 0.1112, "step": 16810 }, { "epoch": 439.73856209150324, "grad_norm": 0.2964167594909668, "learning_rate": 6.449753595919361e-06, "loss": 0.1131, "step": 16820 }, { "epoch": 440.0, "grad_norm": 0.3902610242366791, "learning_rate": 6.391461080877436e-06, "loss": 0.1112, "step": 16830 }, { "epoch": 440.26143790849676, "grad_norm": 0.3522237241268158, "learning_rate": 6.333424488006501e-06, "loss": 0.1104, "step": 16840 }, { "epoch": 440.52287581699346, "grad_norm": 0.3239668309688568, "learning_rate": 6.275643975976353e-06, "loss": 0.1129, "step": 16850 }, { "epoch": 440.7843137254902, "grad_norm": 0.3547097444534302, "learning_rate": 6.218119702756708e-06, "loss": 0.1116, "step": 16860 }, { "epoch": 441.0457516339869, "grad_norm": 0.29542434215545654, "learning_rate": 6.160851825616787e-06, "loss": 0.1101, "step": 16870 }, { "epoch": 441.3071895424837, "grad_norm": 0.3131840229034424, "learning_rate": 6.103840501124702e-06, "loss": 0.1116, "step": 16880 }, { "epoch": 441.5686274509804, "grad_norm": 0.32945704460144043, "learning_rate": 6.047085885147286e-06, "loss": 0.1125, "step": 16890 }, { "epoch": 441.83006535947715, "grad_norm": 0.3166607618331909, "learning_rate": 5.990588132849462e-06, "loss": 0.1084, "step": 16900 }, { "epoch": 442.09150326797385, "grad_norm": 0.4009234607219696, "learning_rate": 5.9343473986939405e-06, "loss": 0.1174, "step": 16910 }, { "epoch": 442.3529411764706, "grad_norm": 0.3329063653945923, "learning_rate": 5.87836383644077e-06, "loss": 0.1099, "step": 16920 }, { "epoch": 442.6143790849673, "grad_norm": 0.3919806480407715, "learning_rate": 5.8226375991468294e-06, "loss": 0.1101, "step": 16930 }, { "epoch": 442.87581699346407, "grad_norm": 0.35498499870300293, "learning_rate": 5.767168839165538e-06, "loss": 0.1122, "step": 16940 }, { "epoch": 443.1372549019608, "grad_norm": 0.3532790243625641, "learning_rate": 5.711957708146365e-06, "loss": 0.1116, "step": 16950 }, { "epoch": 443.39869281045753, "grad_norm": 0.2659851014614105, "learning_rate": 5.657004357034445e-06, "loss": 0.1128, "step": 16960 }, { "epoch": 443.66013071895424, "grad_norm": 0.3570290207862854, "learning_rate": 5.602308936070133e-06, "loss": 0.1143, "step": 16970 }, { "epoch": 443.921568627451, "grad_norm": 0.30066853761672974, "learning_rate": 5.547871594788611e-06, "loss": 0.1106, "step": 16980 }, { "epoch": 444.1830065359477, "grad_norm": 0.30849123001098633, "learning_rate": 5.49369248201953e-06, "loss": 0.1101, "step": 16990 }, { "epoch": 444.44444444444446, "grad_norm": 0.30579501390457153, "learning_rate": 5.4397717458864576e-06, "loss": 0.1113, "step": 17000 }, { "epoch": 444.70588235294116, "grad_norm": 0.30909931659698486, "learning_rate": 5.3861095338066826e-06, "loss": 0.1124, "step": 17010 }, { "epoch": 444.9673202614379, "grad_norm": 0.3454172909259796, "learning_rate": 5.332705992490616e-06, "loss": 0.1124, "step": 17020 }, { "epoch": 445.2287581699346, "grad_norm": 0.390906423330307, "learning_rate": 5.279561267941491e-06, "loss": 0.1097, "step": 17030 }, { "epoch": 445.4901960784314, "grad_norm": 0.3330720365047455, "learning_rate": 5.226675505454981e-06, "loss": 0.1113, "step": 17040 }, { "epoch": 445.7516339869281, "grad_norm": 0.33850574493408203, "learning_rate": 5.174048849618718e-06, "loss": 0.1118, "step": 17050 }, { "epoch": 446.01307189542484, "grad_norm": 0.3519850969314575, "learning_rate": 5.121681444311987e-06, "loss": 0.1147, "step": 17060 }, { "epoch": 446.27450980392155, "grad_norm": 0.3401840031147003, "learning_rate": 5.069573432705277e-06, "loss": 0.1116, "step": 17070 }, { "epoch": 446.5359477124183, "grad_norm": 0.3529043197631836, "learning_rate": 5.017724957259873e-06, "loss": 0.1135, "step": 17080 }, { "epoch": 446.797385620915, "grad_norm": 0.3206219971179962, "learning_rate": 4.966136159727563e-06, "loss": 0.1118, "step": 17090 }, { "epoch": 447.05882352941177, "grad_norm": 0.3134680986404419, "learning_rate": 4.914807181150139e-06, "loss": 0.1092, "step": 17100 }, { "epoch": 447.32026143790847, "grad_norm": 0.3726840019226074, "learning_rate": 4.863738161859044e-06, "loss": 0.1116, "step": 17110 }, { "epoch": 447.58169934640523, "grad_norm": 0.41007351875305176, "learning_rate": 4.812929241475062e-06, "loss": 0.1136, "step": 17120 }, { "epoch": 447.84313725490193, "grad_norm": 0.2826221287250519, "learning_rate": 4.762380558907798e-06, "loss": 0.1113, "step": 17130 }, { "epoch": 448.1045751633987, "grad_norm": 0.3534366488456726, "learning_rate": 4.712092252355471e-06, "loss": 0.1109, "step": 17140 }, { "epoch": 448.36601307189545, "grad_norm": 0.28506147861480713, "learning_rate": 4.662064459304372e-06, "loss": 0.1134, "step": 17150 }, { "epoch": 448.62745098039215, "grad_norm": 0.3237999975681305, "learning_rate": 4.612297316528547e-06, "loss": 0.1113, "step": 17160 }, { "epoch": 448.8888888888889, "grad_norm": 0.37305569648742676, "learning_rate": 4.5627909600895026e-06, "loss": 0.1105, "step": 17170 }, { "epoch": 449.1503267973856, "grad_norm": 0.30823808908462524, "learning_rate": 4.513545525335705e-06, "loss": 0.1123, "step": 17180 }, { "epoch": 449.4117647058824, "grad_norm": 0.3125160336494446, "learning_rate": 4.464561146902302e-06, "loss": 0.1104, "step": 17190 }, { "epoch": 449.6732026143791, "grad_norm": 0.32170015573501587, "learning_rate": 4.4158379587107335e-06, "loss": 0.1118, "step": 17200 }, { "epoch": 449.93464052287584, "grad_norm": 0.3177231550216675, "learning_rate": 4.367376093968278e-06, "loss": 0.1134, "step": 17210 }, { "epoch": 450.19607843137254, "grad_norm": 0.35781294107437134, "learning_rate": 4.319175685167887e-06, "loss": 0.1095, "step": 17220 }, { "epoch": 450.4575163398693, "grad_norm": 0.3756282925605774, "learning_rate": 4.2712368640875914e-06, "loss": 0.1135, "step": 17230 }, { "epoch": 450.718954248366, "grad_norm": 0.3352264165878296, "learning_rate": 4.22355976179033e-06, "loss": 0.1113, "step": 17240 }, { "epoch": 450.98039215686276, "grad_norm": 0.38803133368492126, "learning_rate": 4.176144508623458e-06, "loss": 0.1114, "step": 17250 }, { "epoch": 451.24183006535947, "grad_norm": 0.3300243020057678, "learning_rate": 4.128991234218471e-06, "loss": 0.1115, "step": 17260 }, { "epoch": 451.5032679738562, "grad_norm": 0.30495062470436096, "learning_rate": 4.082100067490635e-06, "loss": 0.1117, "step": 17270 }, { "epoch": 451.7647058823529, "grad_norm": 0.3008281886577606, "learning_rate": 4.03547113663858e-06, "loss": 0.1138, "step": 17280 }, { "epoch": 452.0261437908497, "grad_norm": 0.3080352246761322, "learning_rate": 3.989104569144065e-06, "loss": 0.1098, "step": 17290 }, { "epoch": 452.2875816993464, "grad_norm": 0.30234184861183167, "learning_rate": 3.943000491771487e-06, "loss": 0.1114, "step": 17300 }, { "epoch": 452.54901960784315, "grad_norm": 0.3571633994579315, "learning_rate": 3.897159030567621e-06, "loss": 0.1114, "step": 17310 }, { "epoch": 452.81045751633985, "grad_norm": 0.3420047163963318, "learning_rate": 3.8515803108613025e-06, "loss": 0.1107, "step": 17320 }, { "epoch": 453.0718954248366, "grad_norm": 0.38761913776397705, "learning_rate": 3.806264457262976e-06, "loss": 0.1129, "step": 17330 }, { "epoch": 453.3333333333333, "grad_norm": 0.3283638656139374, "learning_rate": 3.7612115936644932e-06, "loss": 0.113, "step": 17340 }, { "epoch": 453.5947712418301, "grad_norm": 0.38328078389167786, "learning_rate": 3.716421843238649e-06, "loss": 0.1128, "step": 17350 }, { "epoch": 453.8562091503268, "grad_norm": 0.2841828763484955, "learning_rate": 3.67189532843889e-06, "loss": 0.1106, "step": 17360 }, { "epoch": 454.11764705882354, "grad_norm": 0.39247363805770874, "learning_rate": 3.627632170999029e-06, "loss": 0.1132, "step": 17370 }, { "epoch": 454.37908496732024, "grad_norm": 0.38083165884017944, "learning_rate": 3.5836324919328536e-06, "loss": 0.1143, "step": 17380 }, { "epoch": 454.640522875817, "grad_norm": 0.3211730420589447, "learning_rate": 3.5398964115337828e-06, "loss": 0.1088, "step": 17390 }, { "epoch": 454.9019607843137, "grad_norm": 0.347988486289978, "learning_rate": 3.496424049374614e-06, "loss": 0.1084, "step": 17400 }, { "epoch": 455.16339869281046, "grad_norm": 0.331668496131897, "learning_rate": 3.4532155243070963e-06, "loss": 0.1148, "step": 17410 }, { "epoch": 455.42483660130716, "grad_norm": 0.3547741174697876, "learning_rate": 3.410270954461725e-06, "loss": 0.1101, "step": 17420 }, { "epoch": 455.6862745098039, "grad_norm": 0.3205719292163849, "learning_rate": 3.3675904572472825e-06, "loss": 0.1102, "step": 17430 }, { "epoch": 455.9477124183006, "grad_norm": 0.3358950912952423, "learning_rate": 3.3251741493506294e-06, "loss": 0.1109, "step": 17440 }, { "epoch": 456.2091503267974, "grad_norm": 0.3523581027984619, "learning_rate": 3.2830221467363476e-06, "loss": 0.1134, "step": 17450 }, { "epoch": 456.47058823529414, "grad_norm": 0.36160987615585327, "learning_rate": 3.2411345646463643e-06, "loss": 0.1101, "step": 17460 }, { "epoch": 456.73202614379085, "grad_norm": 0.2866756319999695, "learning_rate": 3.1995115175997736e-06, "loss": 0.1121, "step": 17470 }, { "epoch": 456.9934640522876, "grad_norm": 0.31712275743484497, "learning_rate": 3.1581531193923706e-06, "loss": 0.1127, "step": 17480 }, { "epoch": 457.2549019607843, "grad_norm": 0.3979257345199585, "learning_rate": 3.1170594830964405e-06, "loss": 0.1103, "step": 17490 }, { "epoch": 457.51633986928107, "grad_norm": 0.3742763102054596, "learning_rate": 3.0762307210604246e-06, "loss": 0.1115, "step": 17500 }, { "epoch": 457.77777777777777, "grad_norm": 0.3208019435405731, "learning_rate": 3.0356669449085775e-06, "loss": 0.1157, "step": 17510 }, { "epoch": 458.03921568627453, "grad_norm": 0.31606632471084595, "learning_rate": 2.9953682655407434e-06, "loss": 0.1093, "step": 17520 }, { "epoch": 458.30065359477123, "grad_norm": 0.36023128032684326, "learning_rate": 2.9553347931319586e-06, "loss": 0.114, "step": 17530 }, { "epoch": 458.562091503268, "grad_norm": 0.39027029275894165, "learning_rate": 2.9155666371321944e-06, "loss": 0.1123, "step": 17540 }, { "epoch": 458.8235294117647, "grad_norm": 0.28587546944618225, "learning_rate": 2.876063906266102e-06, "loss": 0.1114, "step": 17550 }, { "epoch": 459.08496732026146, "grad_norm": 0.33159536123275757, "learning_rate": 2.836826708532603e-06, "loss": 0.1084, "step": 17560 }, { "epoch": 459.34640522875816, "grad_norm": 0.37588268518447876, "learning_rate": 2.7978551512047312e-06, "loss": 0.1136, "step": 17570 }, { "epoch": 459.6078431372549, "grad_norm": 0.309316486120224, "learning_rate": 2.7591493408292256e-06, "loss": 0.1107, "step": 17580 }, { "epoch": 459.8692810457516, "grad_norm": 0.34601840376853943, "learning_rate": 2.720709383226272e-06, "loss": 0.1104, "step": 17590 }, { "epoch": 460.1307189542484, "grad_norm": 0.28702256083488464, "learning_rate": 2.682535383489282e-06, "loss": 0.1126, "step": 17600 }, { "epoch": 460.3921568627451, "grad_norm": 0.3772827088832855, "learning_rate": 2.6446274459844712e-06, "loss": 0.111, "step": 17610 }, { "epoch": 460.65359477124184, "grad_norm": 0.3219127357006073, "learning_rate": 2.606985674350737e-06, "loss": 0.1089, "step": 17620 }, { "epoch": 460.91503267973854, "grad_norm": 0.36690565943717957, "learning_rate": 2.569610171499226e-06, "loss": 0.1134, "step": 17630 }, { "epoch": 461.1764705882353, "grad_norm": 0.3363436162471771, "learning_rate": 2.5325010396131332e-06, "loss": 0.1111, "step": 17640 }, { "epoch": 461.437908496732, "grad_norm": 0.3152006268501282, "learning_rate": 2.495658380147414e-06, "loss": 0.111, "step": 17650 }, { "epoch": 461.69934640522877, "grad_norm": 0.30644363164901733, "learning_rate": 2.4590822938284854e-06, "loss": 0.1126, "step": 17660 }, { "epoch": 461.96078431372547, "grad_norm": 0.4446253776550293, "learning_rate": 2.4227728806539672e-06, "loss": 0.1135, "step": 17670 }, { "epoch": 462.22222222222223, "grad_norm": 0.323368638753891, "learning_rate": 2.386730239892432e-06, "loss": 0.1088, "step": 17680 }, { "epoch": 462.48366013071893, "grad_norm": 0.37691059708595276, "learning_rate": 2.3509544700830556e-06, "loss": 0.1113, "step": 17690 }, { "epoch": 462.7450980392157, "grad_norm": 0.32642048597335815, "learning_rate": 2.318984532773427e-06, "loss": 0.1136, "step": 17700 }, { "epoch": 463.0065359477124, "grad_norm": 0.37580016255378723, "learning_rate": 2.283716086635357e-06, "loss": 0.1124, "step": 17710 }, { "epoch": 463.26797385620915, "grad_norm": 0.3199895918369293, "learning_rate": 2.248714793086215e-06, "loss": 0.112, "step": 17720 }, { "epoch": 463.52941176470586, "grad_norm": 0.2862391471862793, "learning_rate": 2.213980747818201e-06, "loss": 0.111, "step": 17730 }, { "epoch": 463.7908496732026, "grad_norm": 0.3182966411113739, "learning_rate": 2.179514045792885e-06, "loss": 0.111, "step": 17740 }, { "epoch": 464.0522875816994, "grad_norm": 0.4070954918861389, "learning_rate": 2.1453147812408925e-06, "loss": 0.1133, "step": 17750 }, { "epoch": 464.3137254901961, "grad_norm": 0.39886951446533203, "learning_rate": 2.1113830476617193e-06, "loss": 0.1129, "step": 17760 }, { "epoch": 464.57516339869284, "grad_norm": 0.3158631920814514, "learning_rate": 2.0777189378234143e-06, "loss": 0.1114, "step": 17770 }, { "epoch": 464.83660130718954, "grad_norm": 0.3350854814052582, "learning_rate": 2.0443225437624e-06, "loss": 0.1103, "step": 17780 }, { "epoch": 465.0980392156863, "grad_norm": 0.3238007128238678, "learning_rate": 2.0111939567831197e-06, "loss": 0.11, "step": 17790 }, { "epoch": 465.359477124183, "grad_norm": 0.3874686360359192, "learning_rate": 1.9783332674578546e-06, "loss": 0.1122, "step": 17800 }, { "epoch": 465.62091503267976, "grad_norm": 0.3172845244407654, "learning_rate": 1.9457405656264973e-06, "loss": 0.1108, "step": 17810 }, { "epoch": 465.88235294117646, "grad_norm": 0.3653365969657898, "learning_rate": 1.913415940396246e-06, "loss": 0.1124, "step": 17820 }, { "epoch": 466.1437908496732, "grad_norm": 0.33903956413269043, "learning_rate": 1.8813594801413758e-06, "loss": 0.1124, "step": 17830 }, { "epoch": 466.4052287581699, "grad_norm": 0.3369342088699341, "learning_rate": 1.8495712725030478e-06, "loss": 0.1157, "step": 17840 }, { "epoch": 466.6666666666667, "grad_norm": 0.36298859119415283, "learning_rate": 1.8180514043889763e-06, "loss": 0.1097, "step": 17850 }, { "epoch": 466.9281045751634, "grad_norm": 0.3302154541015625, "learning_rate": 1.7867999619733179e-06, "loss": 0.1104, "step": 17860 }, { "epoch": 467.18954248366015, "grad_norm": 0.42841556668281555, "learning_rate": 1.755817030696294e-06, "loss": 0.1101, "step": 17870 }, { "epoch": 467.45098039215685, "grad_norm": 0.33162766695022583, "learning_rate": 1.725102695264058e-06, "loss": 0.1095, "step": 17880 }, { "epoch": 467.7124183006536, "grad_norm": 0.33728310465812683, "learning_rate": 1.6946570396484507e-06, "loss": 0.11, "step": 17890 }, { "epoch": 467.9738562091503, "grad_norm": 0.4027140438556671, "learning_rate": 1.6644801470867e-06, "loss": 0.1145, "step": 17900 }, { "epoch": 468.2352941176471, "grad_norm": 0.33868324756622314, "learning_rate": 1.6345721000812997e-06, "loss": 0.1111, "step": 17910 }, { "epoch": 468.4967320261438, "grad_norm": 0.30771663784980774, "learning_rate": 1.6049329803997092e-06, "loss": 0.1131, "step": 17920 }, { "epoch": 468.75816993464053, "grad_norm": 0.32284727692604065, "learning_rate": 1.575562869074143e-06, "loss": 0.1076, "step": 17930 }, { "epoch": 469.01960784313724, "grad_norm": 0.3255426287651062, "learning_rate": 1.5464618464013592e-06, "loss": 0.1151, "step": 17940 }, { "epoch": 469.281045751634, "grad_norm": 0.3239709436893463, "learning_rate": 1.5176299919424486e-06, "loss": 0.1113, "step": 17950 }, { "epoch": 469.5424836601307, "grad_norm": 0.39039456844329834, "learning_rate": 1.4890673845226133e-06, "loss": 0.1123, "step": 17960 }, { "epoch": 469.80392156862746, "grad_norm": 0.35394954681396484, "learning_rate": 1.4607741022309106e-06, "loss": 0.1157, "step": 17970 }, { "epoch": 470.06535947712416, "grad_norm": 0.33994433283805847, "learning_rate": 1.4327502224200872e-06, "loss": 0.106, "step": 17980 }, { "epoch": 470.3267973856209, "grad_norm": 0.36443567276000977, "learning_rate": 1.4049958217063896e-06, "loss": 0.1097, "step": 17990 }, { "epoch": 470.5882352941176, "grad_norm": 0.28790152072906494, "learning_rate": 1.3775109759692651e-06, "loss": 0.1127, "step": 18000 }, { "epoch": 470.8496732026144, "grad_norm": 0.31258511543273926, "learning_rate": 1.350295760351261e-06, "loss": 0.112, "step": 18010 }, { "epoch": 471.1111111111111, "grad_norm": 0.35889947414398193, "learning_rate": 1.3233502492577044e-06, "loss": 0.1139, "step": 18020 }, { "epoch": 471.37254901960785, "grad_norm": 0.31674671173095703, "learning_rate": 1.2966745163566107e-06, "loss": 0.1113, "step": 18030 }, { "epoch": 471.63398692810455, "grad_norm": 0.3120609521865845, "learning_rate": 1.2702686345784088e-06, "loss": 0.1133, "step": 18040 }, { "epoch": 471.8954248366013, "grad_norm": 0.4035533666610718, "learning_rate": 1.2441326761157723e-06, "loss": 0.1112, "step": 18050 }, { "epoch": 472.15686274509807, "grad_norm": 0.3225405812263489, "learning_rate": 1.2182667124234326e-06, "loss": 0.1085, "step": 18060 }, { "epoch": 472.41830065359477, "grad_norm": 0.3946692645549774, "learning_rate": 1.1926708142179111e-06, "loss": 0.1124, "step": 18070 }, { "epoch": 472.67973856209153, "grad_norm": 0.34986528754234314, "learning_rate": 1.167345051477442e-06, "loss": 0.1111, "step": 18080 }, { "epoch": 472.94117647058823, "grad_norm": 0.32098639011383057, "learning_rate": 1.1422894934416838e-06, "loss": 0.1115, "step": 18090 }, { "epoch": 473.202614379085, "grad_norm": 0.33079513907432556, "learning_rate": 1.117504208611586e-06, "loss": 0.1116, "step": 18100 }, { "epoch": 473.4640522875817, "grad_norm": 0.332214891910553, "learning_rate": 1.092989264749167e-06, "loss": 0.1106, "step": 18110 }, { "epoch": 473.72549019607845, "grad_norm": 0.2994788885116577, "learning_rate": 1.0687447288773244e-06, "loss": 0.1122, "step": 18120 }, { "epoch": 473.98692810457516, "grad_norm": 0.32751554250717163, "learning_rate": 1.0447706672797264e-06, "loss": 0.1126, "step": 18130 }, { "epoch": 474.2483660130719, "grad_norm": 0.40659329295158386, "learning_rate": 1.0210671455005204e-06, "loss": 0.1122, "step": 18140 }, { "epoch": 474.5098039215686, "grad_norm": 0.3787976801395416, "learning_rate": 9.976342283442463e-07, "loss": 0.1132, "step": 18150 }, { "epoch": 474.7712418300654, "grad_norm": 0.32952606678009033, "learning_rate": 9.744719798755907e-07, "loss": 0.1134, "step": 18160 }, { "epoch": 475.0326797385621, "grad_norm": 0.36884790658950806, "learning_rate": 9.515804634192659e-07, "loss": 0.1089, "step": 18170 }, { "epoch": 475.29411764705884, "grad_norm": 0.3276387155056, "learning_rate": 9.289597415597872e-07, "loss": 0.1081, "step": 18180 }, { "epoch": 475.55555555555554, "grad_norm": 0.34189870953559875, "learning_rate": 9.066098761413733e-07, "loss": 0.113, "step": 18190 }, { "epoch": 475.8169934640523, "grad_norm": 0.32616567611694336, "learning_rate": 8.845309282676795e-07, "loss": 0.112, "step": 18200 }, { "epoch": 476.078431372549, "grad_norm": 0.3290955126285553, "learning_rate": 8.627229583017204e-07, "loss": 0.1112, "step": 18210 }, { "epoch": 476.33986928104576, "grad_norm": 0.34466055035591125, "learning_rate": 8.411860258656256e-07, "loss": 0.1096, "step": 18220 }, { "epoch": 476.60130718954247, "grad_norm": 0.30991095304489136, "learning_rate": 8.199201898405839e-07, "loss": 0.1115, "step": 18230 }, { "epoch": 476.8627450980392, "grad_norm": 0.34044432640075684, "learning_rate": 7.989255083665659e-07, "loss": 0.1145, "step": 18240 }, { "epoch": 477.12418300653593, "grad_norm": 0.34200185537338257, "learning_rate": 7.782020388422018e-07, "loss": 0.1114, "step": 18250 }, { "epoch": 477.3856209150327, "grad_norm": 0.30193594098091125, "learning_rate": 7.577498379247039e-07, "loss": 0.1109, "step": 18260 }, { "epoch": 477.6470588235294, "grad_norm": 0.3074875473976135, "learning_rate": 7.375689615295889e-07, "loss": 0.1111, "step": 18270 }, { "epoch": 477.90849673202615, "grad_norm": 0.3469603359699249, "learning_rate": 7.176594648306111e-07, "loss": 0.114, "step": 18280 }, { "epoch": 478.16993464052285, "grad_norm": 0.2915016710758209, "learning_rate": 6.980214022595744e-07, "loss": 0.1096, "step": 18290 }, { "epoch": 478.4313725490196, "grad_norm": 0.32458066940307617, "learning_rate": 6.786548275061754e-07, "loss": 0.1124, "step": 18300 }, { "epoch": 478.6928104575163, "grad_norm": 0.5039242506027222, "learning_rate": 6.595597935179166e-07, "loss": 0.111, "step": 18310 }, { "epoch": 478.9542483660131, "grad_norm": 0.3211175501346588, "learning_rate": 6.4073635249986e-07, "loss": 0.1138, "step": 18320 }, { "epoch": 479.2156862745098, "grad_norm": 0.32088157534599304, "learning_rate": 6.221845559146066e-07, "loss": 0.1104, "step": 18330 }, { "epoch": 479.47712418300654, "grad_norm": 0.38157036900520325, "learning_rate": 6.039044544820404e-07, "loss": 0.1124, "step": 18340 }, { "epoch": 479.73856209150324, "grad_norm": 0.3110623061656952, "learning_rate": 5.858960981792505e-07, "loss": 0.1127, "step": 18350 }, { "epoch": 480.0, "grad_norm": 0.39009323716163635, "learning_rate": 5.681595362404312e-07, "loss": 0.1107, "step": 18360 }, { "epoch": 480.26143790849676, "grad_norm": 0.3596895635128021, "learning_rate": 5.506948171566273e-07, "loss": 0.1122, "step": 18370 }, { "epoch": 480.52287581699346, "grad_norm": 0.3319242298603058, "learning_rate": 5.335019886757442e-07, "loss": 0.1116, "step": 18380 }, { "epoch": 480.7843137254902, "grad_norm": 0.34595364332199097, "learning_rate": 5.165810978023044e-07, "loss": 0.1109, "step": 18390 }, { "epoch": 481.0457516339869, "grad_norm": 0.3366650640964508, "learning_rate": 4.999321907973698e-07, "loss": 0.1103, "step": 18400 }, { "epoch": 481.3071895424837, "grad_norm": 0.31628909707069397, "learning_rate": 4.835553131784298e-07, "loss": 0.1128, "step": 18410 }, { "epoch": 481.5686274509804, "grad_norm": 0.33149656653404236, "learning_rate": 4.6745050971923607e-07, "loss": 0.1119, "step": 18420 }, { "epoch": 481.83006535947715, "grad_norm": 0.30861008167266846, "learning_rate": 4.5161782444971267e-07, "loss": 0.1106, "step": 18430 }, { "epoch": 482.09150326797385, "grad_norm": 0.357360303401947, "learning_rate": 4.360573006558122e-07, "loss": 0.1112, "step": 18440 }, { "epoch": 482.3529411764706, "grad_norm": 0.3232167065143585, "learning_rate": 4.207689808794046e-07, "loss": 0.1088, "step": 18450 }, { "epoch": 482.6143790849673, "grad_norm": 0.31159424781799316, "learning_rate": 4.0575290691817757e-07, "loss": 0.1121, "step": 18460 }, { "epoch": 482.87581699346407, "grad_norm": 0.4624285101890564, "learning_rate": 3.91009119825525e-07, "loss": 0.1121, "step": 18470 }, { "epoch": 483.1372549019608, "grad_norm": 0.34504663944244385, "learning_rate": 3.765376599103587e-07, "loss": 0.1135, "step": 18480 }, { "epoch": 483.39869281045753, "grad_norm": 0.3002839982509613, "learning_rate": 3.623385667371304e-07, "loss": 0.1101, "step": 18490 }, { "epoch": 483.66013071895424, "grad_norm": 0.37612277269363403, "learning_rate": 3.484118791255986e-07, "loss": 0.112, "step": 18500 }, { "epoch": 483.921568627451, "grad_norm": 0.4303232431411743, "learning_rate": 3.347576351508064e-07, "loss": 0.1127, "step": 18510 }, { "epoch": 484.1830065359477, "grad_norm": 0.4048657715320587, "learning_rate": 3.2137587214293717e-07, "loss": 0.1125, "step": 18520 }, { "epoch": 484.44444444444446, "grad_norm": 0.2931053638458252, "learning_rate": 3.0826662668720364e-07, "loss": 0.1138, "step": 18530 }, { "epoch": 484.70588235294116, "grad_norm": 0.29800963401794434, "learning_rate": 2.954299346238032e-07, "loss": 0.1096, "step": 18540 }, { "epoch": 484.9673202614379, "grad_norm": 0.34082716703414917, "learning_rate": 2.828658310477406e-07, "loss": 0.1086, "step": 18550 }, { "epoch": 485.2287581699346, "grad_norm": 0.38244274258613586, "learning_rate": 2.705743503088165e-07, "loss": 0.1134, "step": 18560 }, { "epoch": 485.4901960784314, "grad_norm": 0.3117094039916992, "learning_rate": 2.585555260114614e-07, "loss": 0.1078, "step": 18570 }, { "epoch": 485.7516339869281, "grad_norm": 0.32655006647109985, "learning_rate": 2.468093910146685e-07, "loss": 0.1124, "step": 18580 }, { "epoch": 486.01307189542484, "grad_norm": 0.4254554510116577, "learning_rate": 2.3533597743194967e-07, "loss": 0.1127, "step": 18590 }, { "epoch": 486.27450980392155, "grad_norm": 0.3003155291080475, "learning_rate": 2.2413531663115773e-07, "loss": 0.1097, "step": 18600 }, { "epoch": 486.5359477124183, "grad_norm": 0.3139316439628601, "learning_rate": 2.1320743923447517e-07, "loss": 0.1114, "step": 18610 }, { "epoch": 486.797385620915, "grad_norm": 0.38288217782974243, "learning_rate": 2.0255237511830338e-07, "loss": 0.1145, "step": 18620 }, { "epoch": 487.05882352941177, "grad_norm": 0.3423730134963989, "learning_rate": 1.921701534131848e-07, "loss": 0.1115, "step": 18630 }, { "epoch": 487.32026143790847, "grad_norm": 0.40306052565574646, "learning_rate": 1.8206080250372515e-07, "loss": 0.1129, "step": 18640 }, { "epoch": 487.58169934640523, "grad_norm": 0.30820974707603455, "learning_rate": 1.7222435002847147e-07, "loss": 0.1108, "step": 18650 }, { "epoch": 487.84313725490193, "grad_norm": 0.3316936790943146, "learning_rate": 1.6266082287994533e-07, "loss": 0.1087, "step": 18660 }, { "epoch": 488.1045751633987, "grad_norm": 0.32804399728775024, "learning_rate": 1.5337024720445403e-07, "loss": 0.1149, "step": 18670 }, { "epoch": 488.36601307189545, "grad_norm": 0.36528879404067993, "learning_rate": 1.443526484020574e-07, "loss": 0.111, "step": 18680 }, { "epoch": 488.62745098039215, "grad_norm": 0.336240291595459, "learning_rate": 1.3560805112655673e-07, "loss": 0.1123, "step": 18690 }, { "epoch": 488.8888888888889, "grad_norm": 0.4585813283920288, "learning_rate": 1.2713647928532802e-07, "loss": 0.1106, "step": 18700 }, { "epoch": 489.1503267973856, "grad_norm": 0.3581952452659607, "learning_rate": 1.1893795603932222e-07, "loss": 0.1135, "step": 18710 }, { "epoch": 489.4117647058824, "grad_norm": 0.3902879059314728, "learning_rate": 1.1101250380300965e-07, "loss": 0.1083, "step": 18720 }, { "epoch": 489.6732026143791, "grad_norm": 0.2964220345020294, "learning_rate": 1.0336014424424668e-07, "loss": 0.1116, "step": 18730 }, { "epoch": 489.93464052287584, "grad_norm": 0.39717039465904236, "learning_rate": 9.598089828430911e-08, "loss": 0.1142, "step": 18740 }, { "epoch": 490.19607843137254, "grad_norm": 0.3992186486721039, "learning_rate": 8.887478609777011e-08, "loss": 0.112, "step": 18750 }, { "epoch": 490.4575163398693, "grad_norm": 0.34660351276397705, "learning_rate": 8.204182711246677e-08, "loss": 0.1128, "step": 18760 }, { "epoch": 490.718954248366, "grad_norm": 0.29538923501968384, "learning_rate": 7.54820400094558e-08, "loss": 0.1075, "step": 18770 }, { "epoch": 490.98039215686276, "grad_norm": 0.43791237473487854, "learning_rate": 6.919544272293577e-08, "loss": 0.1137, "step": 18780 }, { "epoch": 491.24183006535947, "grad_norm": 0.35819703340530396, "learning_rate": 6.318205244023601e-08, "loss": 0.1113, "step": 18790 }, { "epoch": 491.5032679738562, "grad_norm": 0.31988316774368286, "learning_rate": 5.7441885601716707e-08, "loss": 0.1108, "step": 18800 }, { "epoch": 491.7647058823529, "grad_norm": 0.3159491717815399, "learning_rate": 5.19749579007911e-08, "loss": 0.114, "step": 18810 }, { "epoch": 492.0261437908497, "grad_norm": 0.3267784118652344, "learning_rate": 4.678128428382555e-08, "loss": 0.1088, "step": 18820 }, { "epoch": 492.2875816993464, "grad_norm": 0.3536956012248993, "learning_rate": 4.186087895011737e-08, "loss": 0.1136, "step": 18830 }, { "epoch": 492.54901960784315, "grad_norm": 0.3293076455593109, "learning_rate": 3.721375535188365e-08, "loss": 0.1121, "step": 18840 }, { "epoch": 492.81045751633985, "grad_norm": 0.2942824065685272, "learning_rate": 3.283992619416143e-08, "loss": 0.1117, "step": 18850 }, { "epoch": 493.0718954248366, "grad_norm": 0.34250175952911377, "learning_rate": 2.873940343485204e-08, "loss": 0.1103, "step": 18860 }, { "epoch": 493.3333333333333, "grad_norm": 0.3269750475883484, "learning_rate": 2.4912198284621214e-08, "loss": 0.11, "step": 18870 }, { "epoch": 493.5947712418301, "grad_norm": 0.339807391166687, "learning_rate": 2.135832120689907e-08, "loss": 0.1081, "step": 18880 }, { "epoch": 493.8562091503268, "grad_norm": 0.32826676964759827, "learning_rate": 1.8077781917846815e-08, "loss": 0.1127, "step": 18890 }, { "epoch": 494.11764705882354, "grad_norm": 0.29220259189605713, "learning_rate": 1.5070589386345645e-08, "loss": 0.1146, "step": 18900 }, { "epoch": 494.37908496732024, "grad_norm": 0.3818574547767639, "learning_rate": 1.2336751833941229e-08, "loss": 0.1145, "step": 18910 }, { "epoch": 494.640522875817, "grad_norm": 0.3570195138454437, "learning_rate": 9.876276734832601e-09, "loss": 0.1106, "step": 18920 }, { "epoch": 494.9019607843137, "grad_norm": 0.2886011600494385, "learning_rate": 7.689170815872172e-09, "loss": 0.1096, "step": 18930 }, { "epoch": 495.16339869281046, "grad_norm": 0.36767634749412537, "learning_rate": 5.775440056521308e-09, "loss": 0.1102, "step": 18940 }, { "epoch": 495.42483660130716, "grad_norm": 0.30554234981536865, "learning_rate": 4.1350896888503464e-09, "loss": 0.112, "step": 18950 }, { "epoch": 495.6862745098039, "grad_norm": 0.35083529353141785, "learning_rate": 2.768124197505273e-09, "loss": 0.1123, "step": 18960 }, { "epoch": 495.9477124183006, "grad_norm": 0.37509220838546753, "learning_rate": 1.6745473197188333e-09, "loss": 0.1122, "step": 18970 }, { "epoch": 496.2091503267974, "grad_norm": 0.3241898715496063, "learning_rate": 8.543620453105306e-10, "loss": 0.1103, "step": 18980 }, { "epoch": 496.47058823529414, "grad_norm": 0.40990370512008667, "learning_rate": 3.075706166089098e-10, "loss": 0.1103, "step": 18990 }, { "epoch": 496.73202614379085, "grad_norm": 0.401498407125473, "learning_rate": 3.417452852927383e-11, "loss": 0.1131, "step": 19000 } ], "logging_steps": 10, "max_steps": 19000, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3677835109083546e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }