{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9879760807595859, "eval_steps": 700, "global_step": 11664, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001714640889898622, "grad_norm": 223.0, "learning_rate": 2.0000000000000002e-07, "loss": 1.7472, "step": 1 }, { "epoch": 0.0001714640889898622, "eval_loss": 2.7519192695617676, "eval_runtime": 836.2413, "eval_samples_per_second": 2.988, "eval_steps_per_second": 2.988, "step": 1 }, { "epoch": 0.0003429281779797244, "grad_norm": 127.0, "learning_rate": 4.0000000000000003e-07, "loss": 1.6937, "step": 2 }, { "epoch": 0.0005143922669695865, "grad_norm": 237.0, "learning_rate": 6.000000000000001e-07, "loss": 1.661, "step": 3 }, { "epoch": 0.0006858563559594488, "grad_norm": 118.5, "learning_rate": 8.000000000000001e-07, "loss": 1.5902, "step": 4 }, { "epoch": 0.000857320444949311, "grad_norm": 156.0, "learning_rate": 1.0000000000000002e-06, "loss": 1.6023, "step": 5 }, { "epoch": 0.001028784533939173, "grad_norm": 155.0, "learning_rate": 1.2000000000000002e-06, "loss": 1.7616, "step": 6 }, { "epoch": 0.0012002486229290353, "grad_norm": 94.5, "learning_rate": 1.4000000000000001e-06, "loss": 1.7018, "step": 7 }, { "epoch": 0.0013717127119188975, "grad_norm": 96.5, "learning_rate": 1.6000000000000001e-06, "loss": 1.6484, "step": 8 }, { "epoch": 0.0015431768009087597, "grad_norm": 290.0, "learning_rate": 1.8000000000000001e-06, "loss": 1.5879, "step": 9 }, { "epoch": 0.001714640889898622, "grad_norm": 154.0, "learning_rate": 2.0000000000000003e-06, "loss": 1.5615, "step": 10 }, { "epoch": 0.001886104978888484, "grad_norm": 68.0, "learning_rate": 2.2e-06, "loss": 1.5076, "step": 11 }, { "epoch": 0.002057569067878346, "grad_norm": 70.5, "learning_rate": 2.4000000000000003e-06, "loss": 1.5792, "step": 12 }, { "epoch": 0.0022290331568682086, "grad_norm": 112.5, "learning_rate": 2.6e-06, "loss": 1.4464, "step": 13 }, { "epoch": 0.0024004972458580706, "grad_norm": 47.5, "learning_rate": 2.8000000000000003e-06, "loss": 1.4436, "step": 14 }, { "epoch": 0.0025719613348479326, "grad_norm": 33.75, "learning_rate": 3e-06, "loss": 1.4214, "step": 15 }, { "epoch": 0.002743425423837795, "grad_norm": 46.0, "learning_rate": 3.2000000000000003e-06, "loss": 1.3857, "step": 16 }, { "epoch": 0.002914889512827657, "grad_norm": 59.75, "learning_rate": 3.4000000000000005e-06, "loss": 1.477, "step": 17 }, { "epoch": 0.0030863536018175194, "grad_norm": 23.75, "learning_rate": 3.6000000000000003e-06, "loss": 1.4501, "step": 18 }, { "epoch": 0.0032578176908073814, "grad_norm": 19.625, "learning_rate": 3.8000000000000005e-06, "loss": 1.5123, "step": 19 }, { "epoch": 0.003429281779797244, "grad_norm": 12.75, "learning_rate": 4.000000000000001e-06, "loss": 1.3971, "step": 20 }, { "epoch": 0.003600745868787106, "grad_norm": 31.625, "learning_rate": 4.2000000000000004e-06, "loss": 1.2835, "step": 21 }, { "epoch": 0.003772209957776968, "grad_norm": 56.75, "learning_rate": 4.4e-06, "loss": 1.3467, "step": 22 }, { "epoch": 0.00394367404676683, "grad_norm": 5.09375, "learning_rate": 4.600000000000001e-06, "loss": 1.2356, "step": 23 }, { "epoch": 0.004115138135756692, "grad_norm": 83.5, "learning_rate": 4.800000000000001e-06, "loss": 1.3952, "step": 24 }, { "epoch": 0.004286602224746554, "grad_norm": 1004.0, "learning_rate": 5e-06, "loss": 1.3461, "step": 25 }, { "epoch": 0.004458066313736417, "grad_norm": 18.875, "learning_rate": 5.2e-06, "loss": 1.4225, "step": 26 }, { "epoch": 0.004629530402726279, "grad_norm": 430.0, "learning_rate": 5.400000000000001e-06, "loss": 1.356, "step": 27 }, { "epoch": 0.004800994491716141, "grad_norm": 62.25, "learning_rate": 5.600000000000001e-06, "loss": 1.4085, "step": 28 }, { "epoch": 0.004972458580706003, "grad_norm": 8.8125, "learning_rate": 5.8e-06, "loss": 1.2939, "step": 29 }, { "epoch": 0.005143922669695865, "grad_norm": 131.0, "learning_rate": 6e-06, "loss": 1.3616, "step": 30 }, { "epoch": 0.005315386758685728, "grad_norm": 43.0, "learning_rate": 6.200000000000001e-06, "loss": 1.4088, "step": 31 }, { "epoch": 0.00548685084767559, "grad_norm": 28.0, "learning_rate": 6.4000000000000006e-06, "loss": 1.3492, "step": 32 }, { "epoch": 0.005658314936665452, "grad_norm": 18.5, "learning_rate": 6.600000000000001e-06, "loss": 1.3562, "step": 33 }, { "epoch": 0.005829779025655314, "grad_norm": 19.375, "learning_rate": 6.800000000000001e-06, "loss": 1.2559, "step": 34 }, { "epoch": 0.006001243114645177, "grad_norm": 23.0, "learning_rate": 7e-06, "loss": 1.3713, "step": 35 }, { "epoch": 0.006172707203635039, "grad_norm": 211.0, "learning_rate": 7.2000000000000005e-06, "loss": 1.2363, "step": 36 }, { "epoch": 0.006344171292624901, "grad_norm": 34.5, "learning_rate": 7.4e-06, "loss": 1.371, "step": 37 }, { "epoch": 0.006515635381614763, "grad_norm": 7.78125, "learning_rate": 7.600000000000001e-06, "loss": 1.3426, "step": 38 }, { "epoch": 0.006687099470604625, "grad_norm": 21.0, "learning_rate": 7.800000000000002e-06, "loss": 1.2491, "step": 39 }, { "epoch": 0.006858563559594488, "grad_norm": 2.859375, "learning_rate": 8.000000000000001e-06, "loss": 1.2641, "step": 40 }, { "epoch": 0.00703002764858435, "grad_norm": 11.9375, "learning_rate": 8.2e-06, "loss": 1.3252, "step": 41 }, { "epoch": 0.007201491737574212, "grad_norm": 6.75, "learning_rate": 8.400000000000001e-06, "loss": 1.242, "step": 42 }, { "epoch": 0.007372955826564074, "grad_norm": 14.3125, "learning_rate": 8.6e-06, "loss": 1.3359, "step": 43 }, { "epoch": 0.007544419915553936, "grad_norm": 23.75, "learning_rate": 8.8e-06, "loss": 1.3308, "step": 44 }, { "epoch": 0.007715884004543799, "grad_norm": 17.25, "learning_rate": 9e-06, "loss": 1.2513, "step": 45 }, { "epoch": 0.00788734809353366, "grad_norm": 4.65625, "learning_rate": 9.200000000000002e-06, "loss": 1.2823, "step": 46 }, { "epoch": 0.008058812182523523, "grad_norm": 4.46875, "learning_rate": 9.4e-06, "loss": 1.3031, "step": 47 }, { "epoch": 0.008230276271513385, "grad_norm": 4.25, "learning_rate": 9.600000000000001e-06, "loss": 1.2793, "step": 48 }, { "epoch": 0.008401740360503247, "grad_norm": 4.3125, "learning_rate": 9.800000000000001e-06, "loss": 1.3314, "step": 49 }, { "epoch": 0.008573204449493109, "grad_norm": 9.1875, "learning_rate": 1e-05, "loss": 1.3205, "step": 50 }, { "epoch": 0.008744668538482972, "grad_norm": 8.625, "learning_rate": 1.02e-05, "loss": 1.2554, "step": 51 }, { "epoch": 0.008916132627472834, "grad_norm": 2.734375, "learning_rate": 1.04e-05, "loss": 1.2897, "step": 52 }, { "epoch": 0.009087596716462696, "grad_norm": 3.609375, "learning_rate": 1.0600000000000002e-05, "loss": 1.3327, "step": 53 }, { "epoch": 0.009259060805452558, "grad_norm": 8.9375, "learning_rate": 1.0800000000000002e-05, "loss": 1.2707, "step": 54 }, { "epoch": 0.00943052489444242, "grad_norm": 9.4375, "learning_rate": 1.1000000000000001e-05, "loss": 1.2724, "step": 55 }, { "epoch": 0.009601988983432282, "grad_norm": 3.890625, "learning_rate": 1.1200000000000001e-05, "loss": 1.2567, "step": 56 }, { "epoch": 0.009773453072422144, "grad_norm": 3.796875, "learning_rate": 1.14e-05, "loss": 1.3487, "step": 57 }, { "epoch": 0.009944917161412006, "grad_norm": 2.65625, "learning_rate": 1.16e-05, "loss": 1.2577, "step": 58 }, { "epoch": 0.010116381250401868, "grad_norm": 2.71875, "learning_rate": 1.18e-05, "loss": 1.2358, "step": 59 }, { "epoch": 0.01028784533939173, "grad_norm": 2.546875, "learning_rate": 1.2e-05, "loss": 1.2995, "step": 60 }, { "epoch": 0.010459309428381594, "grad_norm": 2.984375, "learning_rate": 1.22e-05, "loss": 1.2554, "step": 61 }, { "epoch": 0.010630773517371456, "grad_norm": 11.75, "learning_rate": 1.2400000000000002e-05, "loss": 1.2732, "step": 62 }, { "epoch": 0.010802237606361318, "grad_norm": 2.671875, "learning_rate": 1.2600000000000001e-05, "loss": 1.3157, "step": 63 }, { "epoch": 0.01097370169535118, "grad_norm": 5.78125, "learning_rate": 1.2800000000000001e-05, "loss": 1.1681, "step": 64 }, { "epoch": 0.011145165784341042, "grad_norm": 2.71875, "learning_rate": 1.3000000000000001e-05, "loss": 1.3121, "step": 65 }, { "epoch": 0.011316629873330904, "grad_norm": 2.4375, "learning_rate": 1.3200000000000002e-05, "loss": 1.2333, "step": 66 }, { "epoch": 0.011488093962320766, "grad_norm": 2.296875, "learning_rate": 1.3400000000000002e-05, "loss": 1.1963, "step": 67 }, { "epoch": 0.011659558051310628, "grad_norm": 2.6875, "learning_rate": 1.3600000000000002e-05, "loss": 1.1645, "step": 68 }, { "epoch": 0.01183102214030049, "grad_norm": 3.125, "learning_rate": 1.38e-05, "loss": 1.183, "step": 69 }, { "epoch": 0.012002486229290354, "grad_norm": 2.265625, "learning_rate": 1.4e-05, "loss": 1.2707, "step": 70 }, { "epoch": 0.012173950318280216, "grad_norm": 2.953125, "learning_rate": 1.4200000000000001e-05, "loss": 1.2276, "step": 71 }, { "epoch": 0.012345414407270078, "grad_norm": 2.78125, "learning_rate": 1.4400000000000001e-05, "loss": 1.2386, "step": 72 }, { "epoch": 0.01251687849625994, "grad_norm": 2.421875, "learning_rate": 1.46e-05, "loss": 1.1404, "step": 73 }, { "epoch": 0.012688342585249802, "grad_norm": 3.796875, "learning_rate": 1.48e-05, "loss": 1.2231, "step": 74 }, { "epoch": 0.012859806674239664, "grad_norm": 2.46875, "learning_rate": 1.5000000000000002e-05, "loss": 1.1835, "step": 75 }, { "epoch": 0.013031270763229526, "grad_norm": 2.140625, "learning_rate": 1.5200000000000002e-05, "loss": 1.2235, "step": 76 }, { "epoch": 0.013202734852219388, "grad_norm": 2.203125, "learning_rate": 1.54e-05, "loss": 1.1566, "step": 77 }, { "epoch": 0.01337419894120925, "grad_norm": 2.0, "learning_rate": 1.5600000000000003e-05, "loss": 1.1565, "step": 78 }, { "epoch": 0.013545663030199113, "grad_norm": 2.296875, "learning_rate": 1.58e-05, "loss": 1.2313, "step": 79 }, { "epoch": 0.013717127119188975, "grad_norm": 2.046875, "learning_rate": 1.6000000000000003e-05, "loss": 1.1824, "step": 80 }, { "epoch": 0.013888591208178837, "grad_norm": 1.96875, "learning_rate": 1.62e-05, "loss": 1.1746, "step": 81 }, { "epoch": 0.0140600552971687, "grad_norm": 2.359375, "learning_rate": 1.64e-05, "loss": 1.2601, "step": 82 }, { "epoch": 0.014231519386158561, "grad_norm": 2.359375, "learning_rate": 1.66e-05, "loss": 1.1281, "step": 83 }, { "epoch": 0.014402983475148423, "grad_norm": 2.0625, "learning_rate": 1.6800000000000002e-05, "loss": 1.1808, "step": 84 }, { "epoch": 0.014574447564138285, "grad_norm": 2.953125, "learning_rate": 1.7e-05, "loss": 1.1618, "step": 85 }, { "epoch": 0.014745911653128147, "grad_norm": 2.25, "learning_rate": 1.72e-05, "loss": 1.1581, "step": 86 }, { "epoch": 0.01491737574211801, "grad_norm": 2.265625, "learning_rate": 1.7400000000000003e-05, "loss": 1.2232, "step": 87 }, { "epoch": 0.015088839831107871, "grad_norm": 3.390625, "learning_rate": 1.76e-05, "loss": 1.2678, "step": 88 }, { "epoch": 0.015260303920097735, "grad_norm": 2.3125, "learning_rate": 1.7800000000000002e-05, "loss": 1.0981, "step": 89 }, { "epoch": 0.015431768009087597, "grad_norm": 2.171875, "learning_rate": 1.8e-05, "loss": 1.1916, "step": 90 }, { "epoch": 0.01560323209807746, "grad_norm": 2.015625, "learning_rate": 1.8200000000000002e-05, "loss": 1.1261, "step": 91 }, { "epoch": 0.01577469618706732, "grad_norm": 2.09375, "learning_rate": 1.8400000000000003e-05, "loss": 1.1955, "step": 92 }, { "epoch": 0.015946160276057185, "grad_norm": 10.6875, "learning_rate": 1.86e-05, "loss": 1.1729, "step": 93 }, { "epoch": 0.016117624365047045, "grad_norm": 2.15625, "learning_rate": 1.88e-05, "loss": 1.1203, "step": 94 }, { "epoch": 0.01628908845403691, "grad_norm": 2.171875, "learning_rate": 1.9e-05, "loss": 1.2245, "step": 95 }, { "epoch": 0.01646055254302677, "grad_norm": 2.046875, "learning_rate": 1.9200000000000003e-05, "loss": 1.191, "step": 96 }, { "epoch": 0.016632016632016633, "grad_norm": 2.34375, "learning_rate": 1.94e-05, "loss": 1.276, "step": 97 }, { "epoch": 0.016803480721006493, "grad_norm": 2.453125, "learning_rate": 1.9600000000000002e-05, "loss": 1.2306, "step": 98 }, { "epoch": 0.016974944809996357, "grad_norm": 2.0, "learning_rate": 1.98e-05, "loss": 1.2018, "step": 99 }, { "epoch": 0.017146408898986217, "grad_norm": 2.875, "learning_rate": 2e-05, "loss": 1.2462, "step": 100 }, { "epoch": 0.01731787298797608, "grad_norm": 2.0625, "learning_rate": 1.9999999836931174e-05, "loss": 1.1722, "step": 101 }, { "epoch": 0.017489337076965945, "grad_norm": 1.8046875, "learning_rate": 1.9999999347724693e-05, "loss": 1.1661, "step": 102 }, { "epoch": 0.017660801165955805, "grad_norm": 1.984375, "learning_rate": 1.999999853238058e-05, "loss": 1.1579, "step": 103 }, { "epoch": 0.01783226525494567, "grad_norm": 2.578125, "learning_rate": 1.9999997390898854e-05, "loss": 1.244, "step": 104 }, { "epoch": 0.01800372934393553, "grad_norm": 2.328125, "learning_rate": 1.999999592327956e-05, "loss": 1.1997, "step": 105 }, { "epoch": 0.018175193432925393, "grad_norm": 2.28125, "learning_rate": 1.999999412952274e-05, "loss": 1.1714, "step": 106 }, { "epoch": 0.018346657521915253, "grad_norm": 2.03125, "learning_rate": 1.999999200962846e-05, "loss": 1.1926, "step": 107 }, { "epoch": 0.018518121610905117, "grad_norm": 6.46875, "learning_rate": 1.999998956359678e-05, "loss": 1.1984, "step": 108 }, { "epoch": 0.018689585699894977, "grad_norm": 13.5625, "learning_rate": 1.9999986791427782e-05, "loss": 1.1753, "step": 109 }, { "epoch": 0.01886104978888484, "grad_norm": 2.125, "learning_rate": 1.9999983693121564e-05, "loss": 1.1055, "step": 110 }, { "epoch": 0.0190325138778747, "grad_norm": 2.046875, "learning_rate": 1.9999980268678218e-05, "loss": 1.1672, "step": 111 }, { "epoch": 0.019203977966864565, "grad_norm": 2.09375, "learning_rate": 1.999997651809786e-05, "loss": 1.2352, "step": 112 }, { "epoch": 0.01937544205585443, "grad_norm": 2.234375, "learning_rate": 1.999997244138061e-05, "loss": 1.095, "step": 113 }, { "epoch": 0.01954690614484429, "grad_norm": 2.0625, "learning_rate": 1.99999680385266e-05, "loss": 1.2065, "step": 114 }, { "epoch": 0.019718370233834152, "grad_norm": 2.296875, "learning_rate": 1.999996330953598e-05, "loss": 1.1259, "step": 115 }, { "epoch": 0.019889834322824013, "grad_norm": 3.796875, "learning_rate": 1.9999958254408897e-05, "loss": 1.2381, "step": 116 }, { "epoch": 0.020061298411813876, "grad_norm": 3.203125, "learning_rate": 1.9999952873145523e-05, "loss": 1.234, "step": 117 }, { "epoch": 0.020232762500803737, "grad_norm": 2.390625, "learning_rate": 1.9999947165746028e-05, "loss": 1.2862, "step": 118 }, { "epoch": 0.0204042265897936, "grad_norm": 2.25, "learning_rate": 1.9999941132210598e-05, "loss": 1.1938, "step": 119 }, { "epoch": 0.02057569067878346, "grad_norm": 1.9765625, "learning_rate": 1.999993477253943e-05, "loss": 1.2301, "step": 120 }, { "epoch": 0.020747154767773324, "grad_norm": 2.109375, "learning_rate": 1.9999928086732736e-05, "loss": 1.2761, "step": 121 }, { "epoch": 0.020918618856763188, "grad_norm": 1.828125, "learning_rate": 1.999992107479073e-05, "loss": 1.1392, "step": 122 }, { "epoch": 0.02109008294575305, "grad_norm": 1.859375, "learning_rate": 1.9999913736713642e-05, "loss": 1.0334, "step": 123 }, { "epoch": 0.021261547034742912, "grad_norm": 1.84375, "learning_rate": 1.9999906072501712e-05, "loss": 1.1569, "step": 124 }, { "epoch": 0.021433011123732772, "grad_norm": 1.8984375, "learning_rate": 1.9999898082155185e-05, "loss": 1.1254, "step": 125 }, { "epoch": 0.021604475212722636, "grad_norm": 2.40625, "learning_rate": 1.9999889765674326e-05, "loss": 1.1018, "step": 126 }, { "epoch": 0.021775939301712496, "grad_norm": 1.8125, "learning_rate": 1.999988112305941e-05, "loss": 1.1592, "step": 127 }, { "epoch": 0.02194740339070236, "grad_norm": 1.9296875, "learning_rate": 1.999987215431071e-05, "loss": 1.1072, "step": 128 }, { "epoch": 0.02211886747969222, "grad_norm": 2.21875, "learning_rate": 1.9999862859428526e-05, "loss": 1.1306, "step": 129 }, { "epoch": 0.022290331568682084, "grad_norm": 2.125, "learning_rate": 1.9999853238413154e-05, "loss": 1.2421, "step": 130 }, { "epoch": 0.022461795657671948, "grad_norm": 6.09375, "learning_rate": 1.9999843291264915e-05, "loss": 1.2552, "step": 131 }, { "epoch": 0.022633259746661808, "grad_norm": 5.21875, "learning_rate": 1.999983301798413e-05, "loss": 1.2062, "step": 132 }, { "epoch": 0.022804723835651672, "grad_norm": 2.03125, "learning_rate": 1.9999822418571134e-05, "loss": 1.1919, "step": 133 }, { "epoch": 0.022976187924641532, "grad_norm": 1.9296875, "learning_rate": 1.9999811493026275e-05, "loss": 1.1638, "step": 134 }, { "epoch": 0.023147652013631396, "grad_norm": 1.9609375, "learning_rate": 1.9999800241349903e-05, "loss": 1.2391, "step": 135 }, { "epoch": 0.023319116102621256, "grad_norm": 1.84375, "learning_rate": 1.9999788663542397e-05, "loss": 1.1979, "step": 136 }, { "epoch": 0.02349058019161112, "grad_norm": 2.0, "learning_rate": 1.9999776759604123e-05, "loss": 1.1564, "step": 137 }, { "epoch": 0.02366204428060098, "grad_norm": 2.109375, "learning_rate": 1.999976452953547e-05, "loss": 1.1106, "step": 138 }, { "epoch": 0.023833508369590844, "grad_norm": 2.5625, "learning_rate": 1.9999751973336843e-05, "loss": 1.1715, "step": 139 }, { "epoch": 0.024004972458580708, "grad_norm": 1.90625, "learning_rate": 1.9999739091008646e-05, "loss": 1.0636, "step": 140 }, { "epoch": 0.024176436547570568, "grad_norm": 1.8671875, "learning_rate": 1.9999725882551305e-05, "loss": 1.1546, "step": 141 }, { "epoch": 0.02434790063656043, "grad_norm": 1.90625, "learning_rate": 1.9999712347965245e-05, "loss": 1.248, "step": 142 }, { "epoch": 0.024519364725550292, "grad_norm": 1.8671875, "learning_rate": 1.9999698487250914e-05, "loss": 1.1635, "step": 143 }, { "epoch": 0.024690828814540156, "grad_norm": 1.9140625, "learning_rate": 1.9999684300408756e-05, "loss": 1.2565, "step": 144 }, { "epoch": 0.024862292903530016, "grad_norm": 1.9453125, "learning_rate": 1.999966978743924e-05, "loss": 1.1027, "step": 145 }, { "epoch": 0.02503375699251988, "grad_norm": 2.09375, "learning_rate": 1.9999654948342836e-05, "loss": 1.1535, "step": 146 }, { "epoch": 0.02520522108150974, "grad_norm": 2.265625, "learning_rate": 1.999963978312003e-05, "loss": 1.17, "step": 147 }, { "epoch": 0.025376685170499604, "grad_norm": 1.9609375, "learning_rate": 1.999962429177131e-05, "loss": 1.1554, "step": 148 }, { "epoch": 0.025548149259489467, "grad_norm": 2.046875, "learning_rate": 1.9999608474297192e-05, "loss": 1.2368, "step": 149 }, { "epoch": 0.025719613348479328, "grad_norm": 2.28125, "learning_rate": 1.9999592330698185e-05, "loss": 1.1703, "step": 150 }, { "epoch": 0.02589107743746919, "grad_norm": 1.875, "learning_rate": 1.9999575860974817e-05, "loss": 1.2139, "step": 151 }, { "epoch": 0.02606254152645905, "grad_norm": 2.0625, "learning_rate": 1.9999559065127627e-05, "loss": 1.208, "step": 152 }, { "epoch": 0.026234005615448915, "grad_norm": 1.9765625, "learning_rate": 1.999954194315716e-05, "loss": 1.1737, "step": 153 }, { "epoch": 0.026405469704438776, "grad_norm": 1.890625, "learning_rate": 1.9999524495063974e-05, "loss": 1.1485, "step": 154 }, { "epoch": 0.02657693379342864, "grad_norm": 1.9921875, "learning_rate": 1.999950672084864e-05, "loss": 1.1551, "step": 155 }, { "epoch": 0.0267483978824185, "grad_norm": 1.8984375, "learning_rate": 1.999948862051174e-05, "loss": 1.1953, "step": 156 }, { "epoch": 0.026919861971408363, "grad_norm": 1.90625, "learning_rate": 1.999947019405386e-05, "loss": 1.1708, "step": 157 }, { "epoch": 0.027091326060398227, "grad_norm": 1.859375, "learning_rate": 1.99994514414756e-05, "loss": 1.2106, "step": 158 }, { "epoch": 0.027262790149388087, "grad_norm": 2.28125, "learning_rate": 1.999943236277758e-05, "loss": 1.2831, "step": 159 }, { "epoch": 0.02743425423837795, "grad_norm": 2.125, "learning_rate": 1.999941295796041e-05, "loss": 1.2245, "step": 160 }, { "epoch": 0.02760571832736781, "grad_norm": 1.953125, "learning_rate": 1.9999393227024733e-05, "loss": 1.0303, "step": 161 }, { "epoch": 0.027777182416357675, "grad_norm": 3.296875, "learning_rate": 1.9999373169971184e-05, "loss": 1.1286, "step": 162 }, { "epoch": 0.027948646505347535, "grad_norm": 2.90625, "learning_rate": 1.9999352786800427e-05, "loss": 1.1177, "step": 163 }, { "epoch": 0.0281201105943374, "grad_norm": 2.109375, "learning_rate": 1.9999332077513118e-05, "loss": 1.1579, "step": 164 }, { "epoch": 0.02829157468332726, "grad_norm": 6.3125, "learning_rate": 1.9999311042109938e-05, "loss": 1.1761, "step": 165 }, { "epoch": 0.028463038772317123, "grad_norm": 4.5, "learning_rate": 1.9999289680591573e-05, "loss": 1.1855, "step": 166 }, { "epoch": 0.028634502861306987, "grad_norm": 3.03125, "learning_rate": 1.9999267992958713e-05, "loss": 1.1755, "step": 167 }, { "epoch": 0.028805966950296847, "grad_norm": 2.453125, "learning_rate": 1.999924597921207e-05, "loss": 1.1019, "step": 168 }, { "epoch": 0.02897743103928671, "grad_norm": 1.8515625, "learning_rate": 1.9999223639352364e-05, "loss": 1.1054, "step": 169 }, { "epoch": 0.02914889512827657, "grad_norm": 1.9609375, "learning_rate": 1.9999200973380325e-05, "loss": 1.0905, "step": 170 }, { "epoch": 0.029320359217266435, "grad_norm": 2.046875, "learning_rate": 1.9999177981296682e-05, "loss": 1.1277, "step": 171 }, { "epoch": 0.029491823306256295, "grad_norm": 1.8046875, "learning_rate": 1.9999154663102196e-05, "loss": 1.1312, "step": 172 }, { "epoch": 0.02966328739524616, "grad_norm": 2.015625, "learning_rate": 1.999913101879762e-05, "loss": 1.1578, "step": 173 }, { "epoch": 0.02983475148423602, "grad_norm": 2.0, "learning_rate": 1.999910704838373e-05, "loss": 1.1742, "step": 174 }, { "epoch": 0.030006215573225883, "grad_norm": 1.8671875, "learning_rate": 1.9999082751861308e-05, "loss": 1.1161, "step": 175 }, { "epoch": 0.030177679662215743, "grad_norm": 1.9609375, "learning_rate": 1.9999058129231144e-05, "loss": 1.1534, "step": 176 }, { "epoch": 0.030349143751205607, "grad_norm": 1.96875, "learning_rate": 1.9999033180494037e-05, "loss": 1.2287, "step": 177 }, { "epoch": 0.03052060784019547, "grad_norm": 1.9453125, "learning_rate": 1.999900790565081e-05, "loss": 1.1124, "step": 178 }, { "epoch": 0.03069207192918533, "grad_norm": 1.75, "learning_rate": 1.9998982304702278e-05, "loss": 1.1163, "step": 179 }, { "epoch": 0.030863536018175194, "grad_norm": 2.140625, "learning_rate": 1.9998956377649286e-05, "loss": 1.1342, "step": 180 }, { "epoch": 0.031035000107165055, "grad_norm": 1.9609375, "learning_rate": 1.999893012449267e-05, "loss": 1.2135, "step": 181 }, { "epoch": 0.03120646419615492, "grad_norm": 1.7265625, "learning_rate": 1.9998903545233293e-05, "loss": 1.053, "step": 182 }, { "epoch": 0.03137792828514478, "grad_norm": 1.9765625, "learning_rate": 1.9998876639872016e-05, "loss": 1.1699, "step": 183 }, { "epoch": 0.03154939237413464, "grad_norm": 1.9921875, "learning_rate": 1.999884940840972e-05, "loss": 1.1576, "step": 184 }, { "epoch": 0.0317208564631245, "grad_norm": 1.8828125, "learning_rate": 1.9998821850847296e-05, "loss": 1.1212, "step": 185 }, { "epoch": 0.03189232055211437, "grad_norm": 2.046875, "learning_rate": 1.9998793967185635e-05, "loss": 1.2487, "step": 186 }, { "epoch": 0.03206378464110423, "grad_norm": 1.953125, "learning_rate": 1.999876575742565e-05, "loss": 1.1613, "step": 187 }, { "epoch": 0.03223524873009409, "grad_norm": 1.84375, "learning_rate": 1.9998737221568264e-05, "loss": 1.1357, "step": 188 }, { "epoch": 0.03240671281908395, "grad_norm": 1.875, "learning_rate": 1.9998708359614408e-05, "loss": 1.0308, "step": 189 }, { "epoch": 0.03257817690807382, "grad_norm": 1.9375, "learning_rate": 1.9998679171565017e-05, "loss": 1.1871, "step": 190 }, { "epoch": 0.03274964099706368, "grad_norm": 1.9765625, "learning_rate": 1.9998649657421047e-05, "loss": 1.1016, "step": 191 }, { "epoch": 0.03292110508605354, "grad_norm": 1.84375, "learning_rate": 1.999861981718346e-05, "loss": 1.1855, "step": 192 }, { "epoch": 0.0330925691750434, "grad_norm": 2.765625, "learning_rate": 1.999858965085323e-05, "loss": 1.2059, "step": 193 }, { "epoch": 0.033264033264033266, "grad_norm": 2.0625, "learning_rate": 1.999855915843134e-05, "loss": 1.2393, "step": 194 }, { "epoch": 0.033435497353023126, "grad_norm": 1.90625, "learning_rate": 1.999852833991879e-05, "loss": 1.2064, "step": 195 }, { "epoch": 0.033606961442012986, "grad_norm": 1.84375, "learning_rate": 1.9998497195316572e-05, "loss": 1.0907, "step": 196 }, { "epoch": 0.033778425531002854, "grad_norm": 1.921875, "learning_rate": 1.9998465724625715e-05, "loss": 1.2311, "step": 197 }, { "epoch": 0.033949889619992714, "grad_norm": 1.8125, "learning_rate": 1.9998433927847238e-05, "loss": 1.1369, "step": 198 }, { "epoch": 0.034121353708982574, "grad_norm": 1.78125, "learning_rate": 1.999840180498218e-05, "loss": 1.1819, "step": 199 }, { "epoch": 0.034292817797972434, "grad_norm": 1.8046875, "learning_rate": 1.9998369356031587e-05, "loss": 1.1348, "step": 200 }, { "epoch": 0.0344642818869623, "grad_norm": 1.75, "learning_rate": 1.9998336580996524e-05, "loss": 1.141, "step": 201 }, { "epoch": 0.03463574597595216, "grad_norm": 1.8359375, "learning_rate": 1.999830347987805e-05, "loss": 1.1466, "step": 202 }, { "epoch": 0.03480721006494202, "grad_norm": 1.796875, "learning_rate": 1.999827005267725e-05, "loss": 1.2022, "step": 203 }, { "epoch": 0.03497867415393189, "grad_norm": 1.8125, "learning_rate": 1.9998236299395216e-05, "loss": 1.2304, "step": 204 }, { "epoch": 0.03515013824292175, "grad_norm": 1.875, "learning_rate": 1.9998202220033044e-05, "loss": 1.1918, "step": 205 }, { "epoch": 0.03532160233191161, "grad_norm": 1.8203125, "learning_rate": 1.9998167814591847e-05, "loss": 1.1763, "step": 206 }, { "epoch": 0.03549306642090147, "grad_norm": 1.8828125, "learning_rate": 1.999813308307275e-05, "loss": 1.1952, "step": 207 }, { "epoch": 0.03566453050989134, "grad_norm": 1.9375, "learning_rate": 1.9998098025476883e-05, "loss": 1.0583, "step": 208 }, { "epoch": 0.0358359945988812, "grad_norm": 4.21875, "learning_rate": 1.9998062641805392e-05, "loss": 1.1122, "step": 209 }, { "epoch": 0.03600745868787106, "grad_norm": 1.890625, "learning_rate": 1.9998026932059427e-05, "loss": 1.0917, "step": 210 }, { "epoch": 0.03617892277686092, "grad_norm": 1.8125, "learning_rate": 1.9997990896240154e-05, "loss": 1.1909, "step": 211 }, { "epoch": 0.036350386865850785, "grad_norm": 1.796875, "learning_rate": 1.999795453434875e-05, "loss": 1.1432, "step": 212 }, { "epoch": 0.036521850954840646, "grad_norm": 1.78125, "learning_rate": 1.99979178463864e-05, "loss": 1.0841, "step": 213 }, { "epoch": 0.036693315043830506, "grad_norm": 1.796875, "learning_rate": 1.9997880832354302e-05, "loss": 1.2121, "step": 214 }, { "epoch": 0.03686477913282037, "grad_norm": 1.8671875, "learning_rate": 1.999784349225366e-05, "loss": 1.0489, "step": 215 }, { "epoch": 0.03703624322181023, "grad_norm": 1.8046875, "learning_rate": 1.999780582608569e-05, "loss": 1.1155, "step": 216 }, { "epoch": 0.037207707310800094, "grad_norm": 2.234375, "learning_rate": 1.999776783385163e-05, "loss": 1.1808, "step": 217 }, { "epoch": 0.037379171399789954, "grad_norm": 2.125, "learning_rate": 1.9997729515552708e-05, "loss": 1.2913, "step": 218 }, { "epoch": 0.03755063548877982, "grad_norm": 1.875, "learning_rate": 1.999769087119018e-05, "loss": 1.1422, "step": 219 }, { "epoch": 0.03772209957776968, "grad_norm": 1.9765625, "learning_rate": 1.9997651900765308e-05, "loss": 1.1079, "step": 220 }, { "epoch": 0.03789356366675954, "grad_norm": 1.7890625, "learning_rate": 1.999761260427935e-05, "loss": 1.1709, "step": 221 }, { "epoch": 0.0380650277557494, "grad_norm": 2.1875, "learning_rate": 1.999757298173361e-05, "loss": 1.2323, "step": 222 }, { "epoch": 0.03823649184473927, "grad_norm": 1.953125, "learning_rate": 1.999753303312936e-05, "loss": 1.1526, "step": 223 }, { "epoch": 0.03840795593372913, "grad_norm": 1.921875, "learning_rate": 1.9997492758467915e-05, "loss": 1.0609, "step": 224 }, { "epoch": 0.03857942002271899, "grad_norm": 1.7890625, "learning_rate": 1.9997452157750577e-05, "loss": 1.1155, "step": 225 }, { "epoch": 0.03875088411170886, "grad_norm": 1.8125, "learning_rate": 1.9997411230978684e-05, "loss": 1.1335, "step": 226 }, { "epoch": 0.03892234820069872, "grad_norm": 1.9296875, "learning_rate": 1.9997369978153564e-05, "loss": 1.1145, "step": 227 }, { "epoch": 0.03909381228968858, "grad_norm": 1.96875, "learning_rate": 1.9997328399276558e-05, "loss": 1.0612, "step": 228 }, { "epoch": 0.03926527637867844, "grad_norm": 1.7890625, "learning_rate": 1.9997286494349032e-05, "loss": 1.1189, "step": 229 }, { "epoch": 0.039436740467668305, "grad_norm": 1.84375, "learning_rate": 1.999724426337234e-05, "loss": 1.1008, "step": 230 }, { "epoch": 0.039608204556658165, "grad_norm": 2.359375, "learning_rate": 1.9997201706347875e-05, "loss": 1.1649, "step": 231 }, { "epoch": 0.039779668645648025, "grad_norm": 2.015625, "learning_rate": 1.999715882327701e-05, "loss": 1.1094, "step": 232 }, { "epoch": 0.03995113273463789, "grad_norm": 1.84375, "learning_rate": 1.999711561416115e-05, "loss": 1.1228, "step": 233 }, { "epoch": 0.04012259682362775, "grad_norm": 1.8125, "learning_rate": 1.9997072079001705e-05, "loss": 1.1056, "step": 234 }, { "epoch": 0.04029406091261761, "grad_norm": 1.9296875, "learning_rate": 1.9997028217800097e-05, "loss": 1.2507, "step": 235 }, { "epoch": 0.04046552500160747, "grad_norm": 1.796875, "learning_rate": 1.999698403055775e-05, "loss": 1.0986, "step": 236 }, { "epoch": 0.04063698909059734, "grad_norm": 2.59375, "learning_rate": 1.9996939517276107e-05, "loss": 1.1712, "step": 237 }, { "epoch": 0.0408084531795872, "grad_norm": 1.765625, "learning_rate": 1.9996894677956628e-05, "loss": 1.197, "step": 238 }, { "epoch": 0.04097991726857706, "grad_norm": 1.953125, "learning_rate": 1.9996849512600764e-05, "loss": 1.0771, "step": 239 }, { "epoch": 0.04115138135756692, "grad_norm": 1.8359375, "learning_rate": 1.9996804021209995e-05, "loss": 1.191, "step": 240 }, { "epoch": 0.04132284544655679, "grad_norm": 1.8515625, "learning_rate": 1.9996758203785797e-05, "loss": 1.1842, "step": 241 }, { "epoch": 0.04149430953554665, "grad_norm": 1.8359375, "learning_rate": 1.9996712060329675e-05, "loss": 1.1033, "step": 242 }, { "epoch": 0.04166577362453651, "grad_norm": 1.734375, "learning_rate": 1.999666559084313e-05, "loss": 1.0938, "step": 243 }, { "epoch": 0.041837237713526376, "grad_norm": 1.859375, "learning_rate": 1.999661879532767e-05, "loss": 1.2002, "step": 244 }, { "epoch": 0.042008701802516236, "grad_norm": 1.7578125, "learning_rate": 1.999657167378483e-05, "loss": 1.195, "step": 245 }, { "epoch": 0.0421801658915061, "grad_norm": 1.75, "learning_rate": 1.9996524226216147e-05, "loss": 1.1571, "step": 246 }, { "epoch": 0.04235162998049596, "grad_norm": 1.921875, "learning_rate": 1.9996476452623163e-05, "loss": 1.0698, "step": 247 }, { "epoch": 0.042523094069485824, "grad_norm": 1.8203125, "learning_rate": 1.999642835300744e-05, "loss": 1.1737, "step": 248 }, { "epoch": 0.042694558158475684, "grad_norm": 1.90625, "learning_rate": 1.9996379927370542e-05, "loss": 1.1447, "step": 249 }, { "epoch": 0.042866022247465545, "grad_norm": 1.8515625, "learning_rate": 1.9996331175714056e-05, "loss": 1.2027, "step": 250 }, { "epoch": 0.04303748633645541, "grad_norm": 1.8515625, "learning_rate": 1.9996282098039565e-05, "loss": 1.109, "step": 251 }, { "epoch": 0.04320895042544527, "grad_norm": 1.8203125, "learning_rate": 1.9996232694348673e-05, "loss": 1.1268, "step": 252 }, { "epoch": 0.04338041451443513, "grad_norm": 1.7578125, "learning_rate": 1.9996182964642992e-05, "loss": 1.119, "step": 253 }, { "epoch": 0.04355187860342499, "grad_norm": 1.7109375, "learning_rate": 1.999613290892414e-05, "loss": 1.139, "step": 254 }, { "epoch": 0.04372334269241486, "grad_norm": 1.7421875, "learning_rate": 1.999608252719375e-05, "loss": 1.1325, "step": 255 }, { "epoch": 0.04389480678140472, "grad_norm": 1.84375, "learning_rate": 1.9996031819453474e-05, "loss": 1.1633, "step": 256 }, { "epoch": 0.04406627087039458, "grad_norm": 1.8828125, "learning_rate": 1.9995980785704955e-05, "loss": 1.2083, "step": 257 }, { "epoch": 0.04423773495938444, "grad_norm": 1.71875, "learning_rate": 1.999592942594986e-05, "loss": 1.043, "step": 258 }, { "epoch": 0.04440919904837431, "grad_norm": 1.90625, "learning_rate": 1.9995877740189868e-05, "loss": 1.153, "step": 259 }, { "epoch": 0.04458066313736417, "grad_norm": 2.015625, "learning_rate": 1.9995825728426655e-05, "loss": 1.1146, "step": 260 }, { "epoch": 0.04475212722635403, "grad_norm": 1.96875, "learning_rate": 1.999577339066193e-05, "loss": 1.1924, "step": 261 }, { "epoch": 0.044923591315343896, "grad_norm": 1.859375, "learning_rate": 1.9995720726897394e-05, "loss": 1.1448, "step": 262 }, { "epoch": 0.045095055404333756, "grad_norm": 1.9375, "learning_rate": 1.9995667737134765e-05, "loss": 1.1947, "step": 263 }, { "epoch": 0.045266519493323616, "grad_norm": 1.8359375, "learning_rate": 1.9995614421375768e-05, "loss": 1.1596, "step": 264 }, { "epoch": 0.045437983582313476, "grad_norm": 1.765625, "learning_rate": 1.9995560779622148e-05, "loss": 1.104, "step": 265 }, { "epoch": 0.045609447671303344, "grad_norm": 1.9375, "learning_rate": 1.999550681187565e-05, "loss": 1.1444, "step": 266 }, { "epoch": 0.045780911760293204, "grad_norm": 2.0, "learning_rate": 1.9995452518138037e-05, "loss": 1.1601, "step": 267 }, { "epoch": 0.045952375849283064, "grad_norm": 1.875, "learning_rate": 1.9995397898411073e-05, "loss": 1.1729, "step": 268 }, { "epoch": 0.04612383993827293, "grad_norm": 1.7734375, "learning_rate": 1.999534295269655e-05, "loss": 1.0968, "step": 269 }, { "epoch": 0.04629530402726279, "grad_norm": 1.75, "learning_rate": 1.999528768099625e-05, "loss": 1.1298, "step": 270 }, { "epoch": 0.04646676811625265, "grad_norm": 2.0, "learning_rate": 1.999523208331198e-05, "loss": 1.0809, "step": 271 }, { "epoch": 0.04663823220524251, "grad_norm": 1.765625, "learning_rate": 1.9995176159645557e-05, "loss": 1.1636, "step": 272 }, { "epoch": 0.04680969629423238, "grad_norm": 1.796875, "learning_rate": 1.9995119909998798e-05, "loss": 1.1, "step": 273 }, { "epoch": 0.04698116038322224, "grad_norm": 4.6875, "learning_rate": 1.9995063334373544e-05, "loss": 1.1314, "step": 274 }, { "epoch": 0.0471526244722121, "grad_norm": 1.859375, "learning_rate": 1.9995006432771634e-05, "loss": 1.1535, "step": 275 }, { "epoch": 0.04732408856120196, "grad_norm": 1.734375, "learning_rate": 1.9994949205194925e-05, "loss": 1.0962, "step": 276 }, { "epoch": 0.04749555265019183, "grad_norm": 1.875, "learning_rate": 1.999489165164529e-05, "loss": 1.2084, "step": 277 }, { "epoch": 0.04766701673918169, "grad_norm": 1.765625, "learning_rate": 1.9994833772124597e-05, "loss": 1.1022, "step": 278 }, { "epoch": 0.04783848082817155, "grad_norm": 1.8359375, "learning_rate": 1.9994775566634737e-05, "loss": 1.1553, "step": 279 }, { "epoch": 0.048009944917161415, "grad_norm": 1.796875, "learning_rate": 1.999471703517761e-05, "loss": 1.1098, "step": 280 }, { "epoch": 0.048181409006151275, "grad_norm": 1.8046875, "learning_rate": 1.9994658177755124e-05, "loss": 1.1211, "step": 281 }, { "epoch": 0.048352873095141136, "grad_norm": 1.9296875, "learning_rate": 1.99945989943692e-05, "loss": 1.1674, "step": 282 }, { "epoch": 0.048524337184130996, "grad_norm": 1.734375, "learning_rate": 1.9994539485021765e-05, "loss": 1.086, "step": 283 }, { "epoch": 0.04869580127312086, "grad_norm": 1.921875, "learning_rate": 1.999447964971476e-05, "loss": 1.2183, "step": 284 }, { "epoch": 0.04886726536211072, "grad_norm": 1.796875, "learning_rate": 1.999441948845014e-05, "loss": 1.1132, "step": 285 }, { "epoch": 0.049038729451100584, "grad_norm": 1.765625, "learning_rate": 1.9994359001229865e-05, "loss": 1.0739, "step": 286 }, { "epoch": 0.049210193540090444, "grad_norm": 1.8671875, "learning_rate": 1.9994298188055907e-05, "loss": 1.1649, "step": 287 }, { "epoch": 0.04938165762908031, "grad_norm": 1.8828125, "learning_rate": 1.999423704893025e-05, "loss": 1.1438, "step": 288 }, { "epoch": 0.04955312171807017, "grad_norm": 2.015625, "learning_rate": 1.999417558385489e-05, "loss": 1.166, "step": 289 }, { "epoch": 0.04972458580706003, "grad_norm": 2.015625, "learning_rate": 1.9994113792831825e-05, "loss": 1.14, "step": 290 }, { "epoch": 0.0498960498960499, "grad_norm": 1.8984375, "learning_rate": 1.999405167586308e-05, "loss": 1.186, "step": 291 }, { "epoch": 0.05006751398503976, "grad_norm": 1.96875, "learning_rate": 1.999398923295067e-05, "loss": 1.1538, "step": 292 }, { "epoch": 0.05023897807402962, "grad_norm": 1.84375, "learning_rate": 1.9993926464096646e-05, "loss": 1.1432, "step": 293 }, { "epoch": 0.05041044216301948, "grad_norm": 1.78125, "learning_rate": 1.999386336930304e-05, "loss": 1.0595, "step": 294 }, { "epoch": 0.05058190625200935, "grad_norm": 1.8203125, "learning_rate": 1.999379994857192e-05, "loss": 1.128, "step": 295 }, { "epoch": 0.05075337034099921, "grad_norm": 1.78125, "learning_rate": 1.9993736201905343e-05, "loss": 1.1186, "step": 296 }, { "epoch": 0.05092483442998907, "grad_norm": 1.71875, "learning_rate": 1.9993672129305398e-05, "loss": 1.086, "step": 297 }, { "epoch": 0.051096298518978935, "grad_norm": 1.8125, "learning_rate": 1.9993607730774176e-05, "loss": 1.1423, "step": 298 }, { "epoch": 0.051267762607968795, "grad_norm": 1.75, "learning_rate": 1.9993543006313772e-05, "loss": 1.1484, "step": 299 }, { "epoch": 0.051439226696958655, "grad_norm": 1.7421875, "learning_rate": 1.9993477955926298e-05, "loss": 1.1604, "step": 300 }, { "epoch": 0.051610690785948515, "grad_norm": 1.9375, "learning_rate": 1.999341257961388e-05, "loss": 1.1192, "step": 301 }, { "epoch": 0.05178215487493838, "grad_norm": 1.90625, "learning_rate": 1.9993346877378637e-05, "loss": 1.0958, "step": 302 }, { "epoch": 0.05195361896392824, "grad_norm": 1.828125, "learning_rate": 1.9993280849222726e-05, "loss": 1.0935, "step": 303 }, { "epoch": 0.0521250830529181, "grad_norm": 1.8125, "learning_rate": 1.9993214495148297e-05, "loss": 1.1584, "step": 304 }, { "epoch": 0.05229654714190796, "grad_norm": 1.7578125, "learning_rate": 1.999314781515751e-05, "loss": 1.0625, "step": 305 }, { "epoch": 0.05246801123089783, "grad_norm": 1.75, "learning_rate": 1.9993080809252542e-05, "loss": 1.1468, "step": 306 }, { "epoch": 0.05263947531988769, "grad_norm": 1.7421875, "learning_rate": 1.999301347743558e-05, "loss": 1.1285, "step": 307 }, { "epoch": 0.05281093940887755, "grad_norm": 2.0, "learning_rate": 1.999294581970882e-05, "loss": 1.2316, "step": 308 }, { "epoch": 0.05298240349786742, "grad_norm": 1.8515625, "learning_rate": 1.9992877836074465e-05, "loss": 1.1048, "step": 309 }, { "epoch": 0.05315386758685728, "grad_norm": 1.8046875, "learning_rate": 1.9992809526534732e-05, "loss": 1.0039, "step": 310 }, { "epoch": 0.05332533167584714, "grad_norm": 1.734375, "learning_rate": 1.9992740891091852e-05, "loss": 1.0773, "step": 311 }, { "epoch": 0.053496795764837, "grad_norm": 1.7421875, "learning_rate": 1.9992671929748062e-05, "loss": 1.1283, "step": 312 }, { "epoch": 0.053668259853826866, "grad_norm": 1.8671875, "learning_rate": 1.999260264250561e-05, "loss": 1.1078, "step": 313 }, { "epoch": 0.053839723942816727, "grad_norm": 1.8046875, "learning_rate": 1.9992533029366763e-05, "loss": 1.1397, "step": 314 }, { "epoch": 0.05401118803180659, "grad_norm": 1.6640625, "learning_rate": 1.999246309033378e-05, "loss": 1.0898, "step": 315 }, { "epoch": 0.054182652120796454, "grad_norm": 1.6796875, "learning_rate": 1.999239282540895e-05, "loss": 1.066, "step": 316 }, { "epoch": 0.054354116209786314, "grad_norm": 1.8203125, "learning_rate": 1.9992322234594562e-05, "loss": 1.2013, "step": 317 }, { "epoch": 0.054525580298776175, "grad_norm": 1.8046875, "learning_rate": 1.9992251317892916e-05, "loss": 1.1571, "step": 318 }, { "epoch": 0.054697044387766035, "grad_norm": 1.6953125, "learning_rate": 1.999218007530633e-05, "loss": 1.1173, "step": 319 }, { "epoch": 0.0548685084767559, "grad_norm": 1.7109375, "learning_rate": 1.9992108506837122e-05, "loss": 1.099, "step": 320 }, { "epoch": 0.05503997256574576, "grad_norm": 1.78125, "learning_rate": 1.999203661248763e-05, "loss": 1.1084, "step": 321 }, { "epoch": 0.05521143665473562, "grad_norm": 1.765625, "learning_rate": 1.9991964392260198e-05, "loss": 1.0651, "step": 322 }, { "epoch": 0.05538290074372548, "grad_norm": 1.7890625, "learning_rate": 1.9991891846157182e-05, "loss": 1.1868, "step": 323 }, { "epoch": 0.05555436483271535, "grad_norm": 3.6875, "learning_rate": 1.9991818974180944e-05, "loss": 1.189, "step": 324 }, { "epoch": 0.05572582892170521, "grad_norm": 1.734375, "learning_rate": 1.9991745776333865e-05, "loss": 1.1314, "step": 325 }, { "epoch": 0.05589729301069507, "grad_norm": 2.234375, "learning_rate": 1.9991672252618334e-05, "loss": 1.1119, "step": 326 }, { "epoch": 0.05606875709968494, "grad_norm": 2.015625, "learning_rate": 1.999159840303674e-05, "loss": 1.103, "step": 327 }, { "epoch": 0.0562402211886748, "grad_norm": 1.7109375, "learning_rate": 1.99915242275915e-05, "loss": 1.1243, "step": 328 }, { "epoch": 0.05641168527766466, "grad_norm": 1.96875, "learning_rate": 1.9991449726285033e-05, "loss": 1.1458, "step": 329 }, { "epoch": 0.05658314936665452, "grad_norm": 1.8359375, "learning_rate": 1.9991374899119763e-05, "loss": 1.1669, "step": 330 }, { "epoch": 0.056754613455644386, "grad_norm": 1.7265625, "learning_rate": 1.9991299746098133e-05, "loss": 1.1497, "step": 331 }, { "epoch": 0.056926077544634246, "grad_norm": 1.828125, "learning_rate": 1.9991224267222596e-05, "loss": 1.0866, "step": 332 }, { "epoch": 0.057097541633624106, "grad_norm": 1.890625, "learning_rate": 1.9991148462495612e-05, "loss": 1.1331, "step": 333 }, { "epoch": 0.05726900572261397, "grad_norm": 1.7578125, "learning_rate": 1.999107233191965e-05, "loss": 1.1717, "step": 334 }, { "epoch": 0.057440469811603834, "grad_norm": 1.734375, "learning_rate": 1.9990995875497203e-05, "loss": 1.1216, "step": 335 }, { "epoch": 0.057611933900593694, "grad_norm": 1.671875, "learning_rate": 1.9990919093230752e-05, "loss": 1.0493, "step": 336 }, { "epoch": 0.057783397989583554, "grad_norm": 1.8828125, "learning_rate": 1.999084198512281e-05, "loss": 1.0627, "step": 337 }, { "epoch": 0.05795486207857342, "grad_norm": 1.9453125, "learning_rate": 1.999076455117589e-05, "loss": 1.0835, "step": 338 }, { "epoch": 0.05812632616756328, "grad_norm": 1.765625, "learning_rate": 1.999068679139251e-05, "loss": 1.1594, "step": 339 }, { "epoch": 0.05829779025655314, "grad_norm": 1.75, "learning_rate": 1.9990608705775217e-05, "loss": 1.1199, "step": 340 }, { "epoch": 0.058469254345543, "grad_norm": 1.7578125, "learning_rate": 1.9990530294326554e-05, "loss": 1.1978, "step": 341 }, { "epoch": 0.05864071843453287, "grad_norm": 1.8671875, "learning_rate": 1.9990451557049077e-05, "loss": 1.1584, "step": 342 }, { "epoch": 0.05881218252352273, "grad_norm": 1.7578125, "learning_rate": 1.9990372493945353e-05, "loss": 1.1209, "step": 343 }, { "epoch": 0.05898364661251259, "grad_norm": 1.796875, "learning_rate": 1.9990293105017962e-05, "loss": 1.1281, "step": 344 }, { "epoch": 0.05915511070150246, "grad_norm": 1.734375, "learning_rate": 1.999021339026949e-05, "loss": 1.0771, "step": 345 }, { "epoch": 0.05932657479049232, "grad_norm": 1.6796875, "learning_rate": 1.9990133349702544e-05, "loss": 1.1337, "step": 346 }, { "epoch": 0.05949803887948218, "grad_norm": 1.8203125, "learning_rate": 1.999005298331973e-05, "loss": 1.1041, "step": 347 }, { "epoch": 0.05966950296847204, "grad_norm": 1.8203125, "learning_rate": 1.9989972291123666e-05, "loss": 1.1493, "step": 348 }, { "epoch": 0.059840967057461905, "grad_norm": 1.984375, "learning_rate": 1.998989127311699e-05, "loss": 1.1732, "step": 349 }, { "epoch": 0.060012431146451765, "grad_norm": 1.8046875, "learning_rate": 1.998980992930234e-05, "loss": 1.0674, "step": 350 }, { "epoch": 0.060183895235441626, "grad_norm": 3.609375, "learning_rate": 1.9989728259682368e-05, "loss": 1.23, "step": 351 }, { "epoch": 0.060355359324431486, "grad_norm": 4.4375, "learning_rate": 1.9989646264259743e-05, "loss": 1.1521, "step": 352 }, { "epoch": 0.06052682341342135, "grad_norm": 1.7734375, "learning_rate": 1.9989563943037133e-05, "loss": 1.1788, "step": 353 }, { "epoch": 0.06069828750241121, "grad_norm": 1.7265625, "learning_rate": 1.998948129601723e-05, "loss": 1.1802, "step": 354 }, { "epoch": 0.060869751591401074, "grad_norm": 1.6953125, "learning_rate": 1.998939832320272e-05, "loss": 1.073, "step": 355 }, { "epoch": 0.06104121568039094, "grad_norm": 1.8515625, "learning_rate": 1.9989315024596315e-05, "loss": 1.1932, "step": 356 }, { "epoch": 0.0612126797693808, "grad_norm": 1.8203125, "learning_rate": 1.998923140020073e-05, "loss": 1.0352, "step": 357 }, { "epoch": 0.06138414385837066, "grad_norm": 1.7890625, "learning_rate": 1.9989147450018698e-05, "loss": 1.1364, "step": 358 }, { "epoch": 0.06155560794736052, "grad_norm": 1.6328125, "learning_rate": 1.9989063174052948e-05, "loss": 1.0597, "step": 359 }, { "epoch": 0.06172707203635039, "grad_norm": 1.78125, "learning_rate": 1.998897857230623e-05, "loss": 1.1841, "step": 360 }, { "epoch": 0.06189853612534025, "grad_norm": 1.7265625, "learning_rate": 1.9988893644781312e-05, "loss": 1.0917, "step": 361 }, { "epoch": 0.06207000021433011, "grad_norm": 1.65625, "learning_rate": 1.9988808391480955e-05, "loss": 1.1106, "step": 362 }, { "epoch": 0.06224146430331998, "grad_norm": 1.7734375, "learning_rate": 1.998872281240794e-05, "loss": 1.0291, "step": 363 }, { "epoch": 0.06241292839230984, "grad_norm": 1.6328125, "learning_rate": 1.998863690756506e-05, "loss": 1.1161, "step": 364 }, { "epoch": 0.0625843924812997, "grad_norm": 1.671875, "learning_rate": 1.998855067695512e-05, "loss": 1.1159, "step": 365 }, { "epoch": 0.06275585657028956, "grad_norm": 1.859375, "learning_rate": 1.9988464120580925e-05, "loss": 1.098, "step": 366 }, { "epoch": 0.06292732065927942, "grad_norm": 1.7734375, "learning_rate": 1.9988377238445308e-05, "loss": 1.1378, "step": 367 }, { "epoch": 0.06309878474826928, "grad_norm": 1.6953125, "learning_rate": 1.9988290030551088e-05, "loss": 1.0319, "step": 368 }, { "epoch": 0.06327024883725915, "grad_norm": 1.734375, "learning_rate": 1.9988202496901126e-05, "loss": 1.0653, "step": 369 }, { "epoch": 0.063441712926249, "grad_norm": 1.703125, "learning_rate": 1.9988114637498264e-05, "loss": 1.0302, "step": 370 }, { "epoch": 0.06361317701523887, "grad_norm": 1.7421875, "learning_rate": 1.9988026452345376e-05, "loss": 1.0393, "step": 371 }, { "epoch": 0.06378464110422874, "grad_norm": 1.7109375, "learning_rate": 1.9987937941445327e-05, "loss": 1.0318, "step": 372 }, { "epoch": 0.0639561051932186, "grad_norm": 1.7265625, "learning_rate": 1.9987849104801018e-05, "loss": 1.1677, "step": 373 }, { "epoch": 0.06412756928220846, "grad_norm": 1.765625, "learning_rate": 1.9987759942415335e-05, "loss": 1.1635, "step": 374 }, { "epoch": 0.06429903337119831, "grad_norm": 1.7421875, "learning_rate": 1.998767045429119e-05, "loss": 1.1291, "step": 375 }, { "epoch": 0.06447049746018818, "grad_norm": 1.78125, "learning_rate": 1.99875806404315e-05, "loss": 1.1369, "step": 376 }, { "epoch": 0.06464196154917805, "grad_norm": 1.90625, "learning_rate": 1.9987490500839198e-05, "loss": 1.1804, "step": 377 }, { "epoch": 0.0648134256381679, "grad_norm": 1.71875, "learning_rate": 1.998740003551722e-05, "loss": 1.1491, "step": 378 }, { "epoch": 0.06498488972715777, "grad_norm": 1.7109375, "learning_rate": 1.9987309244468517e-05, "loss": 1.147, "step": 379 }, { "epoch": 0.06515635381614764, "grad_norm": 1.8125, "learning_rate": 1.9987218127696054e-05, "loss": 1.0988, "step": 380 }, { "epoch": 0.06532781790513749, "grad_norm": 2.078125, "learning_rate": 1.9987126685202795e-05, "loss": 1.0034, "step": 381 }, { "epoch": 0.06549928199412736, "grad_norm": 1.7421875, "learning_rate": 1.998703491699173e-05, "loss": 1.1178, "step": 382 }, { "epoch": 0.06567074608311722, "grad_norm": 1.671875, "learning_rate": 1.998694282306585e-05, "loss": 1.0409, "step": 383 }, { "epoch": 0.06584221017210708, "grad_norm": 1.7421875, "learning_rate": 1.9986850403428155e-05, "loss": 1.1258, "step": 384 }, { "epoch": 0.06601367426109694, "grad_norm": 1.75, "learning_rate": 1.998675765808166e-05, "loss": 1.1269, "step": 385 }, { "epoch": 0.0661851383500868, "grad_norm": 1.8203125, "learning_rate": 1.9986664587029395e-05, "loss": 1.142, "step": 386 }, { "epoch": 0.06635660243907666, "grad_norm": 1.609375, "learning_rate": 1.9986571190274388e-05, "loss": 1.1271, "step": 387 }, { "epoch": 0.06652806652806653, "grad_norm": 1.78125, "learning_rate": 1.9986477467819688e-05, "loss": 1.0659, "step": 388 }, { "epoch": 0.06669953061705639, "grad_norm": 1.7578125, "learning_rate": 1.9986383419668355e-05, "loss": 1.0759, "step": 389 }, { "epoch": 0.06687099470604625, "grad_norm": 1.75, "learning_rate": 1.998628904582345e-05, "loss": 1.0522, "step": 390 }, { "epoch": 0.06704245879503612, "grad_norm": 1.671875, "learning_rate": 1.998619434628806e-05, "loss": 1.1373, "step": 391 }, { "epoch": 0.06721392288402597, "grad_norm": 1.6796875, "learning_rate": 1.9986099321065266e-05, "loss": 1.1222, "step": 392 }, { "epoch": 0.06738538697301584, "grad_norm": 1.65625, "learning_rate": 1.9986003970158164e-05, "loss": 1.1167, "step": 393 }, { "epoch": 0.06755685106200571, "grad_norm": 1.734375, "learning_rate": 1.9985908293569873e-05, "loss": 1.1531, "step": 394 }, { "epoch": 0.06772831515099556, "grad_norm": 1.890625, "learning_rate": 1.998581229130351e-05, "loss": 1.1829, "step": 395 }, { "epoch": 0.06789977923998543, "grad_norm": 1.765625, "learning_rate": 1.99857159633622e-05, "loss": 1.201, "step": 396 }, { "epoch": 0.06807124332897528, "grad_norm": 1.7109375, "learning_rate": 1.9985619309749096e-05, "loss": 1.1298, "step": 397 }, { "epoch": 0.06824270741796515, "grad_norm": 1.71875, "learning_rate": 1.9985522330467343e-05, "loss": 1.07, "step": 398 }, { "epoch": 0.06841417150695502, "grad_norm": 1.6171875, "learning_rate": 1.9985425025520098e-05, "loss": 1.0821, "step": 399 }, { "epoch": 0.06858563559594487, "grad_norm": 1.7578125, "learning_rate": 1.998532739491055e-05, "loss": 1.1798, "step": 400 }, { "epoch": 0.06875709968493474, "grad_norm": 1.75, "learning_rate": 1.998522943864187e-05, "loss": 1.1651, "step": 401 }, { "epoch": 0.0689285637739246, "grad_norm": 1.640625, "learning_rate": 1.9985131156717258e-05, "loss": 1.0264, "step": 402 }, { "epoch": 0.06910002786291446, "grad_norm": 1.7109375, "learning_rate": 1.9985032549139917e-05, "loss": 1.153, "step": 403 }, { "epoch": 0.06927149195190432, "grad_norm": 1.703125, "learning_rate": 1.9984933615913068e-05, "loss": 1.0631, "step": 404 }, { "epoch": 0.06944295604089419, "grad_norm": 1.6796875, "learning_rate": 1.9984834357039927e-05, "loss": 1.1154, "step": 405 }, { "epoch": 0.06961442012988404, "grad_norm": 1.75, "learning_rate": 1.9984734772523747e-05, "loss": 1.1756, "step": 406 }, { "epoch": 0.06978588421887391, "grad_norm": 1.7890625, "learning_rate": 1.998463486236776e-05, "loss": 1.126, "step": 407 }, { "epoch": 0.06995734830786378, "grad_norm": 1.6953125, "learning_rate": 1.9984534626575236e-05, "loss": 1.1288, "step": 408 }, { "epoch": 0.07012881239685363, "grad_norm": 1.734375, "learning_rate": 1.9984434065149435e-05, "loss": 1.1455, "step": 409 }, { "epoch": 0.0703002764858435, "grad_norm": 1.734375, "learning_rate": 1.9984333178093646e-05, "loss": 1.0953, "step": 410 }, { "epoch": 0.07047174057483335, "grad_norm": 1.7890625, "learning_rate": 1.9984231965411154e-05, "loss": 1.1731, "step": 411 }, { "epoch": 0.07064320466382322, "grad_norm": 1.828125, "learning_rate": 1.9984130427105257e-05, "loss": 1.0778, "step": 412 }, { "epoch": 0.07081466875281309, "grad_norm": 1.7890625, "learning_rate": 1.9984028563179274e-05, "loss": 1.1916, "step": 413 }, { "epoch": 0.07098613284180294, "grad_norm": 1.8359375, "learning_rate": 1.998392637363652e-05, "loss": 1.1299, "step": 414 }, { "epoch": 0.07115759693079281, "grad_norm": 1.6953125, "learning_rate": 1.9983823858480333e-05, "loss": 1.0373, "step": 415 }, { "epoch": 0.07132906101978267, "grad_norm": 1.90625, "learning_rate": 1.9983721017714055e-05, "loss": 1.1585, "step": 416 }, { "epoch": 0.07150052510877253, "grad_norm": 1.71875, "learning_rate": 1.9983617851341038e-05, "loss": 1.1074, "step": 417 }, { "epoch": 0.0716719891977624, "grad_norm": 1.9375, "learning_rate": 1.998351435936465e-05, "loss": 1.2491, "step": 418 }, { "epoch": 0.07184345328675226, "grad_norm": 1.671875, "learning_rate": 1.998341054178826e-05, "loss": 1.006, "step": 419 }, { "epoch": 0.07201491737574212, "grad_norm": 1.8046875, "learning_rate": 1.998330639861526e-05, "loss": 1.1695, "step": 420 }, { "epoch": 0.07218638146473198, "grad_norm": 1.6953125, "learning_rate": 1.9983201929849044e-05, "loss": 1.1085, "step": 421 }, { "epoch": 0.07235784555372184, "grad_norm": 1.671875, "learning_rate": 1.9983097135493024e-05, "loss": 1.1592, "step": 422 }, { "epoch": 0.0725293096427117, "grad_norm": 1.78125, "learning_rate": 1.998299201555061e-05, "loss": 1.0885, "step": 423 }, { "epoch": 0.07270077373170157, "grad_norm": 1.6953125, "learning_rate": 1.998288657002523e-05, "loss": 1.078, "step": 424 }, { "epoch": 0.07287223782069142, "grad_norm": 1.75, "learning_rate": 1.9982780798920337e-05, "loss": 1.0714, "step": 425 }, { "epoch": 0.07304370190968129, "grad_norm": 1.71875, "learning_rate": 1.9982674702239363e-05, "loss": 1.1147, "step": 426 }, { "epoch": 0.07321516599867116, "grad_norm": 1.71875, "learning_rate": 1.998256827998578e-05, "loss": 1.1031, "step": 427 }, { "epoch": 0.07338663008766101, "grad_norm": 1.765625, "learning_rate": 1.9982461532163052e-05, "loss": 1.0766, "step": 428 }, { "epoch": 0.07355809417665088, "grad_norm": 1.90625, "learning_rate": 1.9982354458774658e-05, "loss": 1.0823, "step": 429 }, { "epoch": 0.07372955826564075, "grad_norm": 1.7109375, "learning_rate": 1.9982247059824103e-05, "loss": 1.0892, "step": 430 }, { "epoch": 0.0739010223546306, "grad_norm": 1.7578125, "learning_rate": 1.9982139335314878e-05, "loss": 1.081, "step": 431 }, { "epoch": 0.07407248644362047, "grad_norm": 1.65625, "learning_rate": 1.99820312852505e-05, "loss": 1.0797, "step": 432 }, { "epoch": 0.07424395053261032, "grad_norm": 1.9921875, "learning_rate": 1.9981922909634495e-05, "loss": 1.1905, "step": 433 }, { "epoch": 0.07441541462160019, "grad_norm": 1.6796875, "learning_rate": 1.9981814208470394e-05, "loss": 1.0763, "step": 434 }, { "epoch": 0.07458687871059005, "grad_norm": 1.7109375, "learning_rate": 1.9981705181761747e-05, "loss": 1.1211, "step": 435 }, { "epoch": 0.07475834279957991, "grad_norm": 1.71875, "learning_rate": 1.99815958295121e-05, "loss": 1.1659, "step": 436 }, { "epoch": 0.07492980688856977, "grad_norm": 1.7265625, "learning_rate": 1.9981486151725027e-05, "loss": 1.075, "step": 437 }, { "epoch": 0.07510127097755964, "grad_norm": 1.6796875, "learning_rate": 1.9981376148404112e-05, "loss": 1.0265, "step": 438 }, { "epoch": 0.0752727350665495, "grad_norm": 1.796875, "learning_rate": 1.9981265819552927e-05, "loss": 1.1912, "step": 439 }, { "epoch": 0.07544419915553936, "grad_norm": 1.7578125, "learning_rate": 1.998115516517508e-05, "loss": 1.1765, "step": 440 }, { "epoch": 0.07561566324452923, "grad_norm": 1.734375, "learning_rate": 1.9981044185274174e-05, "loss": 1.0593, "step": 441 }, { "epoch": 0.07578712733351908, "grad_norm": 1.796875, "learning_rate": 1.9980932879853835e-05, "loss": 0.98, "step": 442 }, { "epoch": 0.07595859142250895, "grad_norm": 1.75, "learning_rate": 1.998082124891769e-05, "loss": 1.1933, "step": 443 }, { "epoch": 0.0761300555114988, "grad_norm": 1.828125, "learning_rate": 1.998070929246938e-05, "loss": 1.1674, "step": 444 }, { "epoch": 0.07630151960048867, "grad_norm": 1.734375, "learning_rate": 1.998059701051255e-05, "loss": 1.1136, "step": 445 }, { "epoch": 0.07647298368947854, "grad_norm": 1.8046875, "learning_rate": 1.9980484403050876e-05, "loss": 1.1286, "step": 446 }, { "epoch": 0.07664444777846839, "grad_norm": 1.8515625, "learning_rate": 1.998037147008802e-05, "loss": 1.1737, "step": 447 }, { "epoch": 0.07681591186745826, "grad_norm": 1.7421875, "learning_rate": 1.998025821162767e-05, "loss": 1.1155, "step": 448 }, { "epoch": 0.07698737595644813, "grad_norm": 1.6171875, "learning_rate": 1.9980144627673514e-05, "loss": 1.1302, "step": 449 }, { "epoch": 0.07715884004543798, "grad_norm": 1.828125, "learning_rate": 1.9980030718229262e-05, "loss": 1.1465, "step": 450 }, { "epoch": 0.07733030413442785, "grad_norm": 1.640625, "learning_rate": 1.9979916483298625e-05, "loss": 1.0567, "step": 451 }, { "epoch": 0.07750176822341771, "grad_norm": 3.21875, "learning_rate": 1.997980192288533e-05, "loss": 1.1799, "step": 452 }, { "epoch": 0.07767323231240757, "grad_norm": 1.75, "learning_rate": 1.9979687036993116e-05, "loss": 1.0321, "step": 453 }, { "epoch": 0.07784469640139743, "grad_norm": 1.6484375, "learning_rate": 1.9979571825625726e-05, "loss": 1.0242, "step": 454 }, { "epoch": 0.0780161604903873, "grad_norm": 1.6640625, "learning_rate": 1.9979456288786926e-05, "loss": 1.0746, "step": 455 }, { "epoch": 0.07818762457937715, "grad_norm": 1.7265625, "learning_rate": 1.997934042648047e-05, "loss": 1.0803, "step": 456 }, { "epoch": 0.07835908866836702, "grad_norm": 2.234375, "learning_rate": 1.9979224238710143e-05, "loss": 1.094, "step": 457 }, { "epoch": 0.07853055275735688, "grad_norm": 1.953125, "learning_rate": 1.997910772547974e-05, "loss": 1.1616, "step": 458 }, { "epoch": 0.07870201684634674, "grad_norm": 6.75, "learning_rate": 1.9978990886793056e-05, "loss": 1.0693, "step": 459 }, { "epoch": 0.07887348093533661, "grad_norm": 5.75, "learning_rate": 1.9978873722653896e-05, "loss": 1.1452, "step": 460 }, { "epoch": 0.07904494502432646, "grad_norm": 3.59375, "learning_rate": 1.997875623306609e-05, "loss": 1.1542, "step": 461 }, { "epoch": 0.07921640911331633, "grad_norm": 1.7265625, "learning_rate": 1.9978638418033473e-05, "loss": 1.1215, "step": 462 }, { "epoch": 0.0793878732023062, "grad_norm": 1.7109375, "learning_rate": 1.9978520277559873e-05, "loss": 1.0942, "step": 463 }, { "epoch": 0.07955933729129605, "grad_norm": 1.7265625, "learning_rate": 1.9978401811649157e-05, "loss": 1.0332, "step": 464 }, { "epoch": 0.07973080138028592, "grad_norm": 1.8828125, "learning_rate": 1.9978283020305177e-05, "loss": 1.1336, "step": 465 }, { "epoch": 0.07990226546927578, "grad_norm": 1.8359375, "learning_rate": 1.9978163903531817e-05, "loss": 1.0954, "step": 466 }, { "epoch": 0.08007372955826564, "grad_norm": 1.6796875, "learning_rate": 1.997804446133296e-05, "loss": 1.0685, "step": 467 }, { "epoch": 0.0802451936472555, "grad_norm": 1.59375, "learning_rate": 1.9977924693712492e-05, "loss": 1.1116, "step": 468 }, { "epoch": 0.08041665773624536, "grad_norm": 1.7265625, "learning_rate": 1.9977804600674327e-05, "loss": 1.082, "step": 469 }, { "epoch": 0.08058812182523523, "grad_norm": 1.6796875, "learning_rate": 1.9977684182222387e-05, "loss": 1.035, "step": 470 }, { "epoch": 0.0807595859142251, "grad_norm": 1.734375, "learning_rate": 1.9977563438360593e-05, "loss": 1.1146, "step": 471 }, { "epoch": 0.08093105000321495, "grad_norm": 1.78125, "learning_rate": 1.997744236909288e-05, "loss": 1.04, "step": 472 }, { "epoch": 0.08110251409220481, "grad_norm": 1.7734375, "learning_rate": 1.9977320974423196e-05, "loss": 1.1159, "step": 473 }, { "epoch": 0.08127397818119468, "grad_norm": 1.7734375, "learning_rate": 1.997719925435551e-05, "loss": 1.1449, "step": 474 }, { "epoch": 0.08144544227018453, "grad_norm": 1.78125, "learning_rate": 1.9977077208893788e-05, "loss": 1.1145, "step": 475 }, { "epoch": 0.0816169063591744, "grad_norm": 1.765625, "learning_rate": 1.9976954838042e-05, "loss": 1.1232, "step": 476 }, { "epoch": 0.08178837044816427, "grad_norm": 1.8515625, "learning_rate": 1.9976832141804153e-05, "loss": 1.2183, "step": 477 }, { "epoch": 0.08195983453715412, "grad_norm": 1.65625, "learning_rate": 1.9976709120184234e-05, "loss": 1.0607, "step": 478 }, { "epoch": 0.08213129862614399, "grad_norm": 1.8046875, "learning_rate": 1.9976585773186267e-05, "loss": 1.1443, "step": 479 }, { "epoch": 0.08230276271513384, "grad_norm": 1.8359375, "learning_rate": 1.9976462100814268e-05, "loss": 1.0947, "step": 480 }, { "epoch": 0.08247422680412371, "grad_norm": 1.8046875, "learning_rate": 1.997633810307227e-05, "loss": 1.065, "step": 481 }, { "epoch": 0.08264569089311358, "grad_norm": 1.7578125, "learning_rate": 1.997621377996432e-05, "loss": 1.0127, "step": 482 }, { "epoch": 0.08281715498210343, "grad_norm": 1.953125, "learning_rate": 1.997608913149447e-05, "loss": 1.1413, "step": 483 }, { "epoch": 0.0829886190710933, "grad_norm": 1.640625, "learning_rate": 1.997596415766679e-05, "loss": 1.1008, "step": 484 }, { "epoch": 0.08316008316008316, "grad_norm": 1.6875, "learning_rate": 1.997583885848535e-05, "loss": 1.1314, "step": 485 }, { "epoch": 0.08333154724907302, "grad_norm": 1.7265625, "learning_rate": 1.9975713233954242e-05, "loss": 1.1378, "step": 486 }, { "epoch": 0.08350301133806289, "grad_norm": 1.890625, "learning_rate": 1.9975587284077558e-05, "loss": 1.1202, "step": 487 }, { "epoch": 0.08367447542705275, "grad_norm": 1.765625, "learning_rate": 1.9975461008859408e-05, "loss": 1.1505, "step": 488 }, { "epoch": 0.0838459395160426, "grad_norm": 1.6171875, "learning_rate": 1.9975334408303916e-05, "loss": 1.072, "step": 489 }, { "epoch": 0.08401740360503247, "grad_norm": 1.8515625, "learning_rate": 1.9975207482415198e-05, "loss": 1.1842, "step": 490 }, { "epoch": 0.08418886769402233, "grad_norm": 1.7109375, "learning_rate": 1.9975080231197406e-05, "loss": 1.1735, "step": 491 }, { "epoch": 0.0843603317830122, "grad_norm": 1.6953125, "learning_rate": 1.997495265465468e-05, "loss": 1.0909, "step": 492 }, { "epoch": 0.08453179587200206, "grad_norm": 1.7421875, "learning_rate": 1.997482475279119e-05, "loss": 1.0719, "step": 493 }, { "epoch": 0.08470325996099191, "grad_norm": 1.8046875, "learning_rate": 1.9974696525611102e-05, "loss": 1.0718, "step": 494 }, { "epoch": 0.08487472404998178, "grad_norm": 1.6796875, "learning_rate": 1.99745679731186e-05, "loss": 1.1493, "step": 495 }, { "epoch": 0.08504618813897165, "grad_norm": 2.515625, "learning_rate": 1.9974439095317874e-05, "loss": 1.2006, "step": 496 }, { "epoch": 0.0852176522279615, "grad_norm": 1.7109375, "learning_rate": 1.997430989221313e-05, "loss": 1.1398, "step": 497 }, { "epoch": 0.08538911631695137, "grad_norm": 1.9140625, "learning_rate": 1.9974180363808577e-05, "loss": 1.1164, "step": 498 }, { "epoch": 0.08556058040594124, "grad_norm": 1.6640625, "learning_rate": 1.9974050510108447e-05, "loss": 1.0891, "step": 499 }, { "epoch": 0.08573204449493109, "grad_norm": 1.765625, "learning_rate": 1.9973920331116973e-05, "loss": 1.1879, "step": 500 }, { "epoch": 0.08590350858392096, "grad_norm": 1.796875, "learning_rate": 1.9973789826838393e-05, "loss": 1.1017, "step": 501 }, { "epoch": 0.08607497267291082, "grad_norm": 1.75, "learning_rate": 1.997365899727697e-05, "loss": 1.1061, "step": 502 }, { "epoch": 0.08624643676190068, "grad_norm": 1.6796875, "learning_rate": 1.9973527842436975e-05, "loss": 1.1116, "step": 503 }, { "epoch": 0.08641790085089054, "grad_norm": 1.9375, "learning_rate": 1.9973396362322677e-05, "loss": 1.1648, "step": 504 }, { "epoch": 0.0865893649398804, "grad_norm": 1.7890625, "learning_rate": 1.9973264556938365e-05, "loss": 1.1449, "step": 505 }, { "epoch": 0.08676082902887026, "grad_norm": 1.796875, "learning_rate": 1.9973132426288343e-05, "loss": 1.1397, "step": 506 }, { "epoch": 0.08693229311786013, "grad_norm": 1.7265625, "learning_rate": 1.9972999970376917e-05, "loss": 1.0692, "step": 507 }, { "epoch": 0.08710375720684999, "grad_norm": 1.8125, "learning_rate": 1.9972867189208403e-05, "loss": 1.0507, "step": 508 }, { "epoch": 0.08727522129583985, "grad_norm": 1.8359375, "learning_rate": 1.997273408278714e-05, "loss": 1.1395, "step": 509 }, { "epoch": 0.08744668538482972, "grad_norm": 1.734375, "learning_rate": 1.9972600651117465e-05, "loss": 1.0305, "step": 510 }, { "epoch": 0.08761814947381957, "grad_norm": 1.7578125, "learning_rate": 1.9972466894203727e-05, "loss": 1.1262, "step": 511 }, { "epoch": 0.08778961356280944, "grad_norm": 1.796875, "learning_rate": 1.9972332812050293e-05, "loss": 1.1948, "step": 512 }, { "epoch": 0.08796107765179931, "grad_norm": 1.6953125, "learning_rate": 1.997219840466153e-05, "loss": 0.9924, "step": 513 }, { "epoch": 0.08813254174078916, "grad_norm": 1.78125, "learning_rate": 1.997206367204183e-05, "loss": 0.992, "step": 514 }, { "epoch": 0.08830400582977903, "grad_norm": 1.71875, "learning_rate": 1.9971928614195578e-05, "loss": 1.1893, "step": 515 }, { "epoch": 0.08847546991876888, "grad_norm": 1.6796875, "learning_rate": 1.9971793231127185e-05, "loss": 1.1084, "step": 516 }, { "epoch": 0.08864693400775875, "grad_norm": 1.8125, "learning_rate": 1.9971657522841064e-05, "loss": 1.0878, "step": 517 }, { "epoch": 0.08881839809674862, "grad_norm": 1.671875, "learning_rate": 1.9971521489341644e-05, "loss": 1.0084, "step": 518 }, { "epoch": 0.08898986218573847, "grad_norm": 1.6953125, "learning_rate": 1.9971385130633357e-05, "loss": 1.0803, "step": 519 }, { "epoch": 0.08916132627472834, "grad_norm": 1.609375, "learning_rate": 1.9971248446720654e-05, "loss": 1.1219, "step": 520 }, { "epoch": 0.0893327903637182, "grad_norm": 1.6484375, "learning_rate": 1.9971111437607988e-05, "loss": 1.0364, "step": 521 }, { "epoch": 0.08950425445270806, "grad_norm": 1.6484375, "learning_rate": 1.9970974103299833e-05, "loss": 1.1015, "step": 522 }, { "epoch": 0.08967571854169792, "grad_norm": 1.7421875, "learning_rate": 1.997083644380066e-05, "loss": 1.1518, "step": 523 }, { "epoch": 0.08984718263068779, "grad_norm": 1.734375, "learning_rate": 1.9970698459114967e-05, "loss": 1.1541, "step": 524 }, { "epoch": 0.09001864671967764, "grad_norm": 1.671875, "learning_rate": 1.9970560149247255e-05, "loss": 0.9705, "step": 525 }, { "epoch": 0.09019011080866751, "grad_norm": 1.703125, "learning_rate": 1.9970421514202025e-05, "loss": 1.1515, "step": 526 }, { "epoch": 0.09036157489765737, "grad_norm": 1.71875, "learning_rate": 1.9970282553983805e-05, "loss": 1.1664, "step": 527 }, { "epoch": 0.09053303898664723, "grad_norm": 1.671875, "learning_rate": 1.9970143268597127e-05, "loss": 1.08, "step": 528 }, { "epoch": 0.0907045030756371, "grad_norm": 1.65625, "learning_rate": 1.9970003658046536e-05, "loss": 1.129, "step": 529 }, { "epoch": 0.09087596716462695, "grad_norm": 1.6328125, "learning_rate": 1.996986372233658e-05, "loss": 1.0173, "step": 530 }, { "epoch": 0.09104743125361682, "grad_norm": 1.59375, "learning_rate": 1.9969723461471826e-05, "loss": 1.0457, "step": 531 }, { "epoch": 0.09121889534260669, "grad_norm": 1.6796875, "learning_rate": 1.9969582875456846e-05, "loss": 1.1081, "step": 532 }, { "epoch": 0.09139035943159654, "grad_norm": 1.734375, "learning_rate": 1.9969441964296227e-05, "loss": 1.0597, "step": 533 }, { "epoch": 0.09156182352058641, "grad_norm": 1.640625, "learning_rate": 1.9969300727994564e-05, "loss": 1.0248, "step": 534 }, { "epoch": 0.09173328760957628, "grad_norm": 1.7734375, "learning_rate": 1.9969159166556462e-05, "loss": 1.1426, "step": 535 }, { "epoch": 0.09190475169856613, "grad_norm": 1.7265625, "learning_rate": 1.9969017279986542e-05, "loss": 1.1681, "step": 536 }, { "epoch": 0.092076215787556, "grad_norm": 1.8203125, "learning_rate": 1.9968875068289425e-05, "loss": 1.09, "step": 537 }, { "epoch": 0.09224767987654586, "grad_norm": 1.828125, "learning_rate": 1.9968732531469757e-05, "loss": 1.1262, "step": 538 }, { "epoch": 0.09241914396553572, "grad_norm": 1.65625, "learning_rate": 1.9968589669532183e-05, "loss": 1.0702, "step": 539 }, { "epoch": 0.09259060805452558, "grad_norm": 1.6875, "learning_rate": 1.9968446482481357e-05, "loss": 1.108, "step": 540 }, { "epoch": 0.09276207214351544, "grad_norm": 1.7265625, "learning_rate": 1.9968302970321956e-05, "loss": 1.1241, "step": 541 }, { "epoch": 0.0929335362325053, "grad_norm": 1.7734375, "learning_rate": 1.9968159133058655e-05, "loss": 1.1355, "step": 542 }, { "epoch": 0.09310500032149517, "grad_norm": 1.7265625, "learning_rate": 1.996801497069615e-05, "loss": 1.1341, "step": 543 }, { "epoch": 0.09327646441048502, "grad_norm": 1.78125, "learning_rate": 1.996787048323914e-05, "loss": 1.1449, "step": 544 }, { "epoch": 0.09344792849947489, "grad_norm": 1.671875, "learning_rate": 1.9967725670692343e-05, "loss": 1.084, "step": 545 }, { "epoch": 0.09361939258846476, "grad_norm": 1.890625, "learning_rate": 1.996758053306047e-05, "loss": 1.1289, "step": 546 }, { "epoch": 0.09379085667745461, "grad_norm": 1.6171875, "learning_rate": 1.9967435070348265e-05, "loss": 1.0199, "step": 547 }, { "epoch": 0.09396232076644448, "grad_norm": 1.7578125, "learning_rate": 1.9967289282560468e-05, "loss": 1.1458, "step": 548 }, { "epoch": 0.09413378485543435, "grad_norm": 1.609375, "learning_rate": 1.9967143169701836e-05, "loss": 0.9749, "step": 549 }, { "epoch": 0.0943052489444242, "grad_norm": 1.8125, "learning_rate": 1.996699673177713e-05, "loss": 1.0688, "step": 550 }, { "epoch": 0.09447671303341407, "grad_norm": 1.703125, "learning_rate": 1.996684996879113e-05, "loss": 1.0636, "step": 551 }, { "epoch": 0.09464817712240392, "grad_norm": 1.7109375, "learning_rate": 1.9966702880748618e-05, "loss": 1.1019, "step": 552 }, { "epoch": 0.09481964121139379, "grad_norm": 1.6875, "learning_rate": 1.9966555467654395e-05, "loss": 1.1146, "step": 553 }, { "epoch": 0.09499110530038365, "grad_norm": 1.890625, "learning_rate": 1.996640772951327e-05, "loss": 1.0833, "step": 554 }, { "epoch": 0.09516256938937351, "grad_norm": 1.6640625, "learning_rate": 1.9966259666330055e-05, "loss": 1.0641, "step": 555 }, { "epoch": 0.09533403347836338, "grad_norm": 1.65625, "learning_rate": 1.9966111278109586e-05, "loss": 1.016, "step": 556 }, { "epoch": 0.09550549756735324, "grad_norm": 1.7890625, "learning_rate": 1.99659625648567e-05, "loss": 1.1166, "step": 557 }, { "epoch": 0.0956769616563431, "grad_norm": 1.671875, "learning_rate": 1.9965813526576246e-05, "loss": 1.0452, "step": 558 }, { "epoch": 0.09584842574533296, "grad_norm": 1.7734375, "learning_rate": 1.996566416327308e-05, "loss": 1.0721, "step": 559 }, { "epoch": 0.09601988983432283, "grad_norm": 1.65625, "learning_rate": 1.9965514474952084e-05, "loss": 1.0086, "step": 560 }, { "epoch": 0.09619135392331268, "grad_norm": 1.71875, "learning_rate": 1.996536446161813e-05, "loss": 1.0513, "step": 561 }, { "epoch": 0.09636281801230255, "grad_norm": 1.828125, "learning_rate": 1.996521412327612e-05, "loss": 1.1228, "step": 562 }, { "epoch": 0.0965342821012924, "grad_norm": 1.6796875, "learning_rate": 1.996506345993095e-05, "loss": 1.1084, "step": 563 }, { "epoch": 0.09670574619028227, "grad_norm": 1.8984375, "learning_rate": 1.9964912471587533e-05, "loss": 1.1142, "step": 564 }, { "epoch": 0.09687721027927214, "grad_norm": 1.6875, "learning_rate": 1.99647611582508e-05, "loss": 1.05, "step": 565 }, { "epoch": 0.09704867436826199, "grad_norm": 1.8671875, "learning_rate": 1.996460951992568e-05, "loss": 1.1963, "step": 566 }, { "epoch": 0.09722013845725186, "grad_norm": 1.6484375, "learning_rate": 1.996445755661712e-05, "loss": 1.0478, "step": 567 }, { "epoch": 0.09739160254624173, "grad_norm": 1.75, "learning_rate": 1.9964305268330075e-05, "loss": 1.0692, "step": 568 }, { "epoch": 0.09756306663523158, "grad_norm": 1.7421875, "learning_rate": 1.9964152655069514e-05, "loss": 1.0776, "step": 569 }, { "epoch": 0.09773453072422145, "grad_norm": 1.8125, "learning_rate": 1.9963999716840417e-05, "loss": 1.0419, "step": 570 }, { "epoch": 0.09790599481321131, "grad_norm": 1.6953125, "learning_rate": 1.9963846453647763e-05, "loss": 1.1413, "step": 571 }, { "epoch": 0.09807745890220117, "grad_norm": 1.8203125, "learning_rate": 1.996369286549656e-05, "loss": 1.073, "step": 572 }, { "epoch": 0.09824892299119103, "grad_norm": 1.7421875, "learning_rate": 1.9963538952391808e-05, "loss": 1.1669, "step": 573 }, { "epoch": 0.09842038708018089, "grad_norm": 1.7578125, "learning_rate": 1.9963384714338537e-05, "loss": 1.1771, "step": 574 }, { "epoch": 0.09859185116917075, "grad_norm": 1.859375, "learning_rate": 1.996323015134177e-05, "loss": 1.1514, "step": 575 }, { "epoch": 0.09876331525816062, "grad_norm": 1.7734375, "learning_rate": 1.9963075263406545e-05, "loss": 1.163, "step": 576 }, { "epoch": 0.09893477934715048, "grad_norm": 1.6875, "learning_rate": 1.9962920050537922e-05, "loss": 1.1313, "step": 577 }, { "epoch": 0.09910624343614034, "grad_norm": 1.9453125, "learning_rate": 1.996276451274096e-05, "loss": 1.0454, "step": 578 }, { "epoch": 0.09927770752513021, "grad_norm": 1.75, "learning_rate": 1.996260865002073e-05, "loss": 1.2105, "step": 579 }, { "epoch": 0.09944917161412006, "grad_norm": 1.6484375, "learning_rate": 1.996245246238232e-05, "loss": 1.1686, "step": 580 }, { "epoch": 0.09962063570310993, "grad_norm": 1.6953125, "learning_rate": 1.9962295949830814e-05, "loss": 1.1004, "step": 581 }, { "epoch": 0.0997920997920998, "grad_norm": 1.765625, "learning_rate": 1.9962139112371324e-05, "loss": 1.2155, "step": 582 }, { "epoch": 0.09996356388108965, "grad_norm": 1.703125, "learning_rate": 1.9961981950008966e-05, "loss": 1.1185, "step": 583 }, { "epoch": 0.10013502797007952, "grad_norm": 1.65625, "learning_rate": 1.9961824462748863e-05, "loss": 1.0739, "step": 584 }, { "epoch": 0.10030649205906939, "grad_norm": 1.65625, "learning_rate": 1.996166665059615e-05, "loss": 1.1047, "step": 585 }, { "epoch": 0.10047795614805924, "grad_norm": 1.6484375, "learning_rate": 1.9961508513555975e-05, "loss": 1.1484, "step": 586 }, { "epoch": 0.1006494202370491, "grad_norm": 1.671875, "learning_rate": 1.9961350051633497e-05, "loss": 1.0129, "step": 587 }, { "epoch": 0.10082088432603896, "grad_norm": 1.734375, "learning_rate": 1.9961191264833884e-05, "loss": 1.0761, "step": 588 }, { "epoch": 0.10099234841502883, "grad_norm": 1.859375, "learning_rate": 1.9961032153162312e-05, "loss": 1.1838, "step": 589 }, { "epoch": 0.1011638125040187, "grad_norm": 1.6953125, "learning_rate": 1.9960872716623972e-05, "loss": 1.0953, "step": 590 }, { "epoch": 0.10133527659300855, "grad_norm": 1.71875, "learning_rate": 1.9960712955224065e-05, "loss": 1.0843, "step": 591 }, { "epoch": 0.10150674068199841, "grad_norm": 1.6953125, "learning_rate": 1.9960552868967796e-05, "loss": 1.2278, "step": 592 }, { "epoch": 0.10167820477098828, "grad_norm": 1.6953125, "learning_rate": 1.9960392457860394e-05, "loss": 1.1326, "step": 593 }, { "epoch": 0.10184966885997813, "grad_norm": 1.96875, "learning_rate": 1.9960231721907083e-05, "loss": 1.1644, "step": 594 }, { "epoch": 0.102021132948968, "grad_norm": 1.6875, "learning_rate": 1.996007066111311e-05, "loss": 1.1278, "step": 595 }, { "epoch": 0.10219259703795787, "grad_norm": 1.7890625, "learning_rate": 1.995990927548373e-05, "loss": 1.1009, "step": 596 }, { "epoch": 0.10236406112694772, "grad_norm": 1.7421875, "learning_rate": 1.99597475650242e-05, "loss": 1.1035, "step": 597 }, { "epoch": 0.10253552521593759, "grad_norm": 1.6953125, "learning_rate": 1.9959585529739796e-05, "loss": 1.1012, "step": 598 }, { "epoch": 0.10270698930492744, "grad_norm": 1.65625, "learning_rate": 1.9959423169635804e-05, "loss": 1.1065, "step": 599 }, { "epoch": 0.10287845339391731, "grad_norm": 1.6875, "learning_rate": 1.995926048471752e-05, "loss": 1.1969, "step": 600 }, { "epoch": 0.10304991748290718, "grad_norm": 1.7734375, "learning_rate": 1.9959097474990248e-05, "loss": 1.163, "step": 601 }, { "epoch": 0.10322138157189703, "grad_norm": 1.8046875, "learning_rate": 1.9958934140459302e-05, "loss": 1.1103, "step": 602 }, { "epoch": 0.1033928456608869, "grad_norm": 1.6953125, "learning_rate": 1.9958770481130016e-05, "loss": 1.1603, "step": 603 }, { "epoch": 0.10356430974987677, "grad_norm": 1.765625, "learning_rate": 1.995860649700772e-05, "loss": 1.1266, "step": 604 }, { "epoch": 0.10373577383886662, "grad_norm": 1.8515625, "learning_rate": 1.995844218809777e-05, "loss": 1.1011, "step": 605 }, { "epoch": 0.10390723792785649, "grad_norm": 1.78125, "learning_rate": 1.9958277554405516e-05, "loss": 1.0317, "step": 606 }, { "epoch": 0.10407870201684635, "grad_norm": 1.8359375, "learning_rate": 1.9958112595936333e-05, "loss": 1.0968, "step": 607 }, { "epoch": 0.1042501661058362, "grad_norm": 1.6640625, "learning_rate": 1.9957947312695602e-05, "loss": 1.0747, "step": 608 }, { "epoch": 0.10442163019482607, "grad_norm": 1.7109375, "learning_rate": 1.9957781704688707e-05, "loss": 1.0624, "step": 609 }, { "epoch": 0.10459309428381593, "grad_norm": 1.8125, "learning_rate": 1.9957615771921055e-05, "loss": 1.0525, "step": 610 }, { "epoch": 0.1047645583728058, "grad_norm": 2.078125, "learning_rate": 1.9957449514398054e-05, "loss": 1.1324, "step": 611 }, { "epoch": 0.10493602246179566, "grad_norm": 1.609375, "learning_rate": 1.9957282932125132e-05, "loss": 1.0306, "step": 612 }, { "epoch": 0.10510748655078551, "grad_norm": 1.671875, "learning_rate": 1.9957116025107713e-05, "loss": 1.1239, "step": 613 }, { "epoch": 0.10527895063977538, "grad_norm": 1.8828125, "learning_rate": 1.9956948793351248e-05, "loss": 1.0913, "step": 614 }, { "epoch": 0.10545041472876525, "grad_norm": 1.7109375, "learning_rate": 1.995678123686119e-05, "loss": 1.0655, "step": 615 }, { "epoch": 0.1056218788177551, "grad_norm": 1.6953125, "learning_rate": 1.9956613355643e-05, "loss": 1.0825, "step": 616 }, { "epoch": 0.10579334290674497, "grad_norm": 1.90625, "learning_rate": 1.9956445149702154e-05, "loss": 1.1483, "step": 617 }, { "epoch": 0.10596480699573484, "grad_norm": 2.34375, "learning_rate": 1.9956276619044145e-05, "loss": 1.1081, "step": 618 }, { "epoch": 0.10613627108472469, "grad_norm": 1.734375, "learning_rate": 1.9956107763674456e-05, "loss": 1.1174, "step": 619 }, { "epoch": 0.10630773517371456, "grad_norm": 1.6796875, "learning_rate": 1.9955938583598606e-05, "loss": 1.1238, "step": 620 }, { "epoch": 0.10647919926270442, "grad_norm": 1.7890625, "learning_rate": 1.9955769078822106e-05, "loss": 1.1835, "step": 621 }, { "epoch": 0.10665066335169428, "grad_norm": 1.8046875, "learning_rate": 1.9955599249350487e-05, "loss": 1.1702, "step": 622 }, { "epoch": 0.10682212744068414, "grad_norm": 1.7421875, "learning_rate": 1.995542909518929e-05, "loss": 1.0848, "step": 623 }, { "epoch": 0.106993591529674, "grad_norm": 1.7734375, "learning_rate": 1.995525861634406e-05, "loss": 1.106, "step": 624 }, { "epoch": 0.10716505561866387, "grad_norm": 1.6875, "learning_rate": 1.9955087812820357e-05, "loss": 1.0085, "step": 625 }, { "epoch": 0.10733651970765373, "grad_norm": 1.7578125, "learning_rate": 1.9954916684623753e-05, "loss": 1.083, "step": 626 }, { "epoch": 0.10750798379664359, "grad_norm": 1.7578125, "learning_rate": 1.995474523175983e-05, "loss": 1.1413, "step": 627 }, { "epoch": 0.10767944788563345, "grad_norm": 1.578125, "learning_rate": 1.9954573454234177e-05, "loss": 1.0141, "step": 628 }, { "epoch": 0.10785091197462332, "grad_norm": 10.1875, "learning_rate": 1.99544013520524e-05, "loss": 1.1188, "step": 629 }, { "epoch": 0.10802237606361317, "grad_norm": 1.8125, "learning_rate": 1.9954228925220112e-05, "loss": 1.0341, "step": 630 }, { "epoch": 0.10819384015260304, "grad_norm": 1.78125, "learning_rate": 1.9954056173742933e-05, "loss": 1.1826, "step": 631 }, { "epoch": 0.10836530424159291, "grad_norm": 1.609375, "learning_rate": 1.99538830976265e-05, "loss": 1.0859, "step": 632 }, { "epoch": 0.10853676833058276, "grad_norm": 1.78125, "learning_rate": 1.9953709696876452e-05, "loss": 1.0958, "step": 633 }, { "epoch": 0.10870823241957263, "grad_norm": 1.8125, "learning_rate": 1.9953535971498452e-05, "loss": 1.1274, "step": 634 }, { "epoch": 0.10887969650856248, "grad_norm": 1.71875, "learning_rate": 1.9953361921498163e-05, "loss": 1.186, "step": 635 }, { "epoch": 0.10905116059755235, "grad_norm": 1.734375, "learning_rate": 1.9953187546881257e-05, "loss": 1.0017, "step": 636 }, { "epoch": 0.10922262468654222, "grad_norm": 1.71875, "learning_rate": 1.995301284765343e-05, "loss": 1.1105, "step": 637 }, { "epoch": 0.10939408877553207, "grad_norm": 1.7109375, "learning_rate": 1.9952837823820373e-05, "loss": 1.0799, "step": 638 }, { "epoch": 0.10956555286452194, "grad_norm": 1.734375, "learning_rate": 1.9952662475387794e-05, "loss": 1.0469, "step": 639 }, { "epoch": 0.1097370169535118, "grad_norm": 1.625, "learning_rate": 1.9952486802361416e-05, "loss": 1.0985, "step": 640 }, { "epoch": 0.10990848104250166, "grad_norm": 1.828125, "learning_rate": 1.995231080474696e-05, "loss": 1.1092, "step": 641 }, { "epoch": 0.11007994513149152, "grad_norm": 1.640625, "learning_rate": 1.9952134482550175e-05, "loss": 1.0038, "step": 642 }, { "epoch": 0.11025140922048139, "grad_norm": 1.703125, "learning_rate": 1.9951957835776808e-05, "loss": 1.1089, "step": 643 }, { "epoch": 0.11042287330947125, "grad_norm": 1.6875, "learning_rate": 1.9951780864432623e-05, "loss": 1.0895, "step": 644 }, { "epoch": 0.11059433739846111, "grad_norm": 1.65625, "learning_rate": 1.9951603568523387e-05, "loss": 1.0533, "step": 645 }, { "epoch": 0.11076580148745097, "grad_norm": 1.8046875, "learning_rate": 1.9951425948054886e-05, "loss": 1.1723, "step": 646 }, { "epoch": 0.11093726557644083, "grad_norm": 1.8203125, "learning_rate": 1.995124800303291e-05, "loss": 1.2228, "step": 647 }, { "epoch": 0.1111087296654307, "grad_norm": 1.8359375, "learning_rate": 1.9951069733463262e-05, "loss": 1.1278, "step": 648 }, { "epoch": 0.11128019375442055, "grad_norm": 1.8203125, "learning_rate": 1.995089113935176e-05, "loss": 1.1156, "step": 649 }, { "epoch": 0.11145165784341042, "grad_norm": 1.7265625, "learning_rate": 1.9950712220704224e-05, "loss": 1.1134, "step": 650 }, { "epoch": 0.11162312193240029, "grad_norm": 1.6953125, "learning_rate": 1.9950532977526493e-05, "loss": 1.1028, "step": 651 }, { "epoch": 0.11179458602139014, "grad_norm": 1.828125, "learning_rate": 1.9950353409824412e-05, "loss": 1.0454, "step": 652 }, { "epoch": 0.11196605011038001, "grad_norm": 1.859375, "learning_rate": 1.995017351760384e-05, "loss": 1.0748, "step": 653 }, { "epoch": 0.11213751419936988, "grad_norm": 1.8359375, "learning_rate": 1.9949993300870637e-05, "loss": 1.1675, "step": 654 }, { "epoch": 0.11230897828835973, "grad_norm": 1.7890625, "learning_rate": 1.9949812759630683e-05, "loss": 1.1367, "step": 655 }, { "epoch": 0.1124804423773496, "grad_norm": 1.75, "learning_rate": 1.9949631893889866e-05, "loss": 1.0374, "step": 656 }, { "epoch": 0.11265190646633945, "grad_norm": 1.8125, "learning_rate": 1.994945070365409e-05, "loss": 1.1405, "step": 657 }, { "epoch": 0.11282337055532932, "grad_norm": 1.78125, "learning_rate": 1.994926918892926e-05, "loss": 1.1399, "step": 658 }, { "epoch": 0.11299483464431918, "grad_norm": 2.0625, "learning_rate": 1.9949087349721296e-05, "loss": 1.1436, "step": 659 }, { "epoch": 0.11316629873330904, "grad_norm": 1.7265625, "learning_rate": 1.9948905186036128e-05, "loss": 1.0613, "step": 660 }, { "epoch": 0.1133377628222989, "grad_norm": 1.6484375, "learning_rate": 1.9948722697879696e-05, "loss": 1.1771, "step": 661 }, { "epoch": 0.11350922691128877, "grad_norm": 1.8359375, "learning_rate": 1.9948539885257956e-05, "loss": 1.1123, "step": 662 }, { "epoch": 0.11368069100027862, "grad_norm": 1.6953125, "learning_rate": 1.9948356748176867e-05, "loss": 1.0366, "step": 663 }, { "epoch": 0.11385215508926849, "grad_norm": 1.6875, "learning_rate": 1.9948173286642403e-05, "loss": 1.0478, "step": 664 }, { "epoch": 0.11402361917825836, "grad_norm": 1.734375, "learning_rate": 1.9947989500660544e-05, "loss": 1.0948, "step": 665 }, { "epoch": 0.11419508326724821, "grad_norm": 1.6484375, "learning_rate": 1.994780539023729e-05, "loss": 1.0645, "step": 666 }, { "epoch": 0.11436654735623808, "grad_norm": 1.7734375, "learning_rate": 1.9947620955378642e-05, "loss": 1.0534, "step": 667 }, { "epoch": 0.11453801144522795, "grad_norm": 1.7421875, "learning_rate": 1.9947436196090614e-05, "loss": 1.077, "step": 668 }, { "epoch": 0.1147094755342178, "grad_norm": 1.7890625, "learning_rate": 1.9947251112379233e-05, "loss": 1.0538, "step": 669 }, { "epoch": 0.11488093962320767, "grad_norm": 1.6796875, "learning_rate": 1.9947065704250533e-05, "loss": 1.0747, "step": 670 }, { "epoch": 0.11505240371219752, "grad_norm": 1.75, "learning_rate": 1.994687997171057e-05, "loss": 1.0738, "step": 671 }, { "epoch": 0.11522386780118739, "grad_norm": 1.6953125, "learning_rate": 1.994669391476539e-05, "loss": 1.109, "step": 672 }, { "epoch": 0.11539533189017726, "grad_norm": 1.5703125, "learning_rate": 1.9946507533421067e-05, "loss": 1.0449, "step": 673 }, { "epoch": 0.11556679597916711, "grad_norm": 1.7109375, "learning_rate": 1.9946320827683676e-05, "loss": 1.046, "step": 674 }, { "epoch": 0.11573826006815698, "grad_norm": 1.6796875, "learning_rate": 1.994613379755931e-05, "loss": 1.0857, "step": 675 }, { "epoch": 0.11590972415714684, "grad_norm": 1.625, "learning_rate": 1.994594644305407e-05, "loss": 1.0279, "step": 676 }, { "epoch": 0.1160811882461367, "grad_norm": 1.7421875, "learning_rate": 1.9945758764174056e-05, "loss": 1.1194, "step": 677 }, { "epoch": 0.11625265233512656, "grad_norm": 1.7109375, "learning_rate": 1.9945570760925403e-05, "loss": 1.0465, "step": 678 }, { "epoch": 0.11642411642411643, "grad_norm": 1.875, "learning_rate": 1.9945382433314234e-05, "loss": 1.042, "step": 679 }, { "epoch": 0.11659558051310628, "grad_norm": 1.7734375, "learning_rate": 1.9945193781346695e-05, "loss": 1.0926, "step": 680 }, { "epoch": 0.11676704460209615, "grad_norm": 1.7421875, "learning_rate": 1.9945004805028933e-05, "loss": 1.0987, "step": 681 }, { "epoch": 0.116938508691086, "grad_norm": 1.7265625, "learning_rate": 1.9944815504367117e-05, "loss": 1.1226, "step": 682 }, { "epoch": 0.11710997278007587, "grad_norm": 1.6953125, "learning_rate": 1.9944625879367417e-05, "loss": 1.0064, "step": 683 }, { "epoch": 0.11728143686906574, "grad_norm": 1.7890625, "learning_rate": 1.994443593003602e-05, "loss": 1.1373, "step": 684 }, { "epoch": 0.11745290095805559, "grad_norm": 1.734375, "learning_rate": 1.9944245656379123e-05, "loss": 1.1338, "step": 685 }, { "epoch": 0.11762436504704546, "grad_norm": 1.7734375, "learning_rate": 1.9944055058402923e-05, "loss": 1.0695, "step": 686 }, { "epoch": 0.11779582913603533, "grad_norm": 1.75, "learning_rate": 1.9943864136113647e-05, "loss": 1.1445, "step": 687 }, { "epoch": 0.11796729322502518, "grad_norm": 1.765625, "learning_rate": 1.9943672889517515e-05, "loss": 1.1899, "step": 688 }, { "epoch": 0.11813875731401505, "grad_norm": 1.71875, "learning_rate": 1.9943481318620765e-05, "loss": 1.0404, "step": 689 }, { "epoch": 0.11831022140300491, "grad_norm": 1.8046875, "learning_rate": 1.9943289423429645e-05, "loss": 1.0936, "step": 690 }, { "epoch": 0.11848168549199477, "grad_norm": 1.7578125, "learning_rate": 1.9943097203950413e-05, "loss": 1.0992, "step": 691 }, { "epoch": 0.11865314958098463, "grad_norm": 1.7265625, "learning_rate": 1.994290466018934e-05, "loss": 1.0978, "step": 692 }, { "epoch": 0.11882461366997449, "grad_norm": 1.7890625, "learning_rate": 1.9942711792152708e-05, "loss": 1.1341, "step": 693 }, { "epoch": 0.11899607775896436, "grad_norm": 7.25, "learning_rate": 1.99425185998468e-05, "loss": 1.1517, "step": 694 }, { "epoch": 0.11916754184795422, "grad_norm": 1.890625, "learning_rate": 1.9942325083277917e-05, "loss": 1.155, "step": 695 }, { "epoch": 0.11933900593694408, "grad_norm": 1.6953125, "learning_rate": 1.994213124245238e-05, "loss": 1.0425, "step": 696 }, { "epoch": 0.11951047002593394, "grad_norm": 1.7890625, "learning_rate": 1.99419370773765e-05, "loss": 1.0609, "step": 697 }, { "epoch": 0.11968193411492381, "grad_norm": 1.6875, "learning_rate": 1.9941742588056616e-05, "loss": 1.0257, "step": 698 }, { "epoch": 0.11985339820391366, "grad_norm": 1.7578125, "learning_rate": 1.994154777449907e-05, "loss": 1.1413, "step": 699 }, { "epoch": 0.12002486229290353, "grad_norm": 1.7890625, "learning_rate": 1.9941352636710215e-05, "loss": 1.1103, "step": 700 }, { "epoch": 0.12002486229290353, "eval_loss": 0.9447146058082581, "eval_runtime": 837.1484, "eval_samples_per_second": 2.985, "eval_steps_per_second": 2.985, "step": 700 }, { "epoch": 0.1201963263818934, "grad_norm": 1.6171875, "learning_rate": 1.9941157174696412e-05, "loss": 1.1096, "step": 701 }, { "epoch": 0.12036779047088325, "grad_norm": 1.765625, "learning_rate": 1.9940961388464042e-05, "loss": 1.0937, "step": 702 }, { "epoch": 0.12053925455987312, "grad_norm": 1.7109375, "learning_rate": 1.9940765278019484e-05, "loss": 1.0675, "step": 703 }, { "epoch": 0.12071071864886297, "grad_norm": 1.75, "learning_rate": 1.994056884336914e-05, "loss": 1.0785, "step": 704 }, { "epoch": 0.12088218273785284, "grad_norm": 1.7109375, "learning_rate": 1.994037208451941e-05, "loss": 1.1388, "step": 705 }, { "epoch": 0.1210536468268427, "grad_norm": 1.7421875, "learning_rate": 1.9940175001476715e-05, "loss": 1.1303, "step": 706 }, { "epoch": 0.12122511091583256, "grad_norm": 1.6484375, "learning_rate": 1.9939977594247483e-05, "loss": 1.0755, "step": 707 }, { "epoch": 0.12139657500482243, "grad_norm": 1.6328125, "learning_rate": 1.9939779862838153e-05, "loss": 1.1185, "step": 708 }, { "epoch": 0.1215680390938123, "grad_norm": 1.8828125, "learning_rate": 1.9939581807255168e-05, "loss": 1.1227, "step": 709 }, { "epoch": 0.12173950318280215, "grad_norm": 1.828125, "learning_rate": 1.9939383427504994e-05, "loss": 1.0672, "step": 710 }, { "epoch": 0.12191096727179201, "grad_norm": 1.6875, "learning_rate": 1.9939184723594097e-05, "loss": 1.076, "step": 711 }, { "epoch": 0.12208243136078188, "grad_norm": 1.78125, "learning_rate": 1.9938985695528957e-05, "loss": 1.137, "step": 712 }, { "epoch": 0.12225389544977174, "grad_norm": 1.78125, "learning_rate": 1.993878634331607e-05, "loss": 1.1944, "step": 713 }, { "epoch": 0.1224253595387616, "grad_norm": 1.7578125, "learning_rate": 1.9938586666961933e-05, "loss": 1.1928, "step": 714 }, { "epoch": 0.12259682362775147, "grad_norm": 1.6953125, "learning_rate": 1.993838666647306e-05, "loss": 1.0469, "step": 715 }, { "epoch": 0.12276828771674132, "grad_norm": 1.734375, "learning_rate": 1.9938186341855972e-05, "loss": 1.0558, "step": 716 }, { "epoch": 0.12293975180573119, "grad_norm": 1.7734375, "learning_rate": 1.9937985693117205e-05, "loss": 1.101, "step": 717 }, { "epoch": 0.12311121589472104, "grad_norm": 1.921875, "learning_rate": 1.99377847202633e-05, "loss": 1.1684, "step": 718 }, { "epoch": 0.12328267998371091, "grad_norm": 1.65625, "learning_rate": 1.9937583423300816e-05, "loss": 0.9685, "step": 719 }, { "epoch": 0.12345414407270078, "grad_norm": 1.734375, "learning_rate": 1.993738180223631e-05, "loss": 1.1437, "step": 720 }, { "epoch": 0.12362560816169063, "grad_norm": 1.71875, "learning_rate": 1.9937179857076368e-05, "loss": 1.0985, "step": 721 }, { "epoch": 0.1237970722506805, "grad_norm": 1.6953125, "learning_rate": 1.993697758782757e-05, "loss": 1.028, "step": 722 }, { "epoch": 0.12396853633967037, "grad_norm": 1.7265625, "learning_rate": 1.9936774994496512e-05, "loss": 1.1075, "step": 723 }, { "epoch": 0.12414000042866022, "grad_norm": 1.828125, "learning_rate": 1.99365720770898e-05, "loss": 1.0201, "step": 724 }, { "epoch": 0.12431146451765009, "grad_norm": 1.7578125, "learning_rate": 1.993636883561406e-05, "loss": 1.1393, "step": 725 }, { "epoch": 0.12448292860663995, "grad_norm": 1.7265625, "learning_rate": 1.9936165270075915e-05, "loss": 1.0818, "step": 726 }, { "epoch": 0.1246543926956298, "grad_norm": 1.7421875, "learning_rate": 1.9935961380482003e-05, "loss": 1.0204, "step": 727 }, { "epoch": 0.12482585678461967, "grad_norm": 1.65625, "learning_rate": 1.9935757166838974e-05, "loss": 1.1329, "step": 728 }, { "epoch": 0.12499732087360953, "grad_norm": 1.7421875, "learning_rate": 1.993555262915349e-05, "loss": 1.158, "step": 729 }, { "epoch": 0.1251687849625994, "grad_norm": 1.671875, "learning_rate": 1.9935347767432224e-05, "loss": 1.151, "step": 730 }, { "epoch": 0.12534024905158925, "grad_norm": 1.6328125, "learning_rate": 1.993514258168185e-05, "loss": 1.1295, "step": 731 }, { "epoch": 0.12551171314057913, "grad_norm": 1.6328125, "learning_rate": 1.9934937071909064e-05, "loss": 1.1672, "step": 732 }, { "epoch": 0.12568317722956898, "grad_norm": 1.7578125, "learning_rate": 1.993473123812057e-05, "loss": 1.0816, "step": 733 }, { "epoch": 0.12585464131855884, "grad_norm": 1.71875, "learning_rate": 1.993452508032308e-05, "loss": 1.0609, "step": 734 }, { "epoch": 0.12602610540754872, "grad_norm": 1.6015625, "learning_rate": 1.9934318598523315e-05, "loss": 1.0721, "step": 735 }, { "epoch": 0.12619756949653857, "grad_norm": 1.7265625, "learning_rate": 1.9934111792728015e-05, "loss": 1.1286, "step": 736 }, { "epoch": 0.12636903358552842, "grad_norm": 1.78125, "learning_rate": 1.993390466294392e-05, "loss": 1.134, "step": 737 }, { "epoch": 0.1265404976745183, "grad_norm": 1.6875, "learning_rate": 1.9933697209177785e-05, "loss": 1.1024, "step": 738 }, { "epoch": 0.12671196176350816, "grad_norm": 1.6171875, "learning_rate": 1.9933489431436375e-05, "loss": 1.0655, "step": 739 }, { "epoch": 0.126883425852498, "grad_norm": 1.703125, "learning_rate": 1.9933281329726473e-05, "loss": 1.0384, "step": 740 }, { "epoch": 0.1270548899414879, "grad_norm": 1.6328125, "learning_rate": 1.993307290405486e-05, "loss": 1.0531, "step": 741 }, { "epoch": 0.12722635403047775, "grad_norm": 1.703125, "learning_rate": 1.9932864154428335e-05, "loss": 1.0878, "step": 742 }, { "epoch": 0.1273978181194676, "grad_norm": 1.71875, "learning_rate": 1.9932655080853706e-05, "loss": 1.1547, "step": 743 }, { "epoch": 0.12756928220845748, "grad_norm": 1.8046875, "learning_rate": 1.9932445683337795e-05, "loss": 1.1422, "step": 744 }, { "epoch": 0.12774074629744733, "grad_norm": 1.765625, "learning_rate": 1.9932235961887424e-05, "loss": 1.1325, "step": 745 }, { "epoch": 0.1279122103864372, "grad_norm": 1.78125, "learning_rate": 1.9932025916509436e-05, "loss": 1.122, "step": 746 }, { "epoch": 0.12808367447542704, "grad_norm": 1.6640625, "learning_rate": 1.9931815547210686e-05, "loss": 1.1181, "step": 747 }, { "epoch": 0.12825513856441692, "grad_norm": 1.640625, "learning_rate": 1.993160485399803e-05, "loss": 1.0483, "step": 748 }, { "epoch": 0.12842660265340677, "grad_norm": 1.796875, "learning_rate": 1.9931393836878338e-05, "loss": 1.1126, "step": 749 }, { "epoch": 0.12859806674239663, "grad_norm": 1.6484375, "learning_rate": 1.99311824958585e-05, "loss": 1.0719, "step": 750 }, { "epoch": 0.1287695308313865, "grad_norm": 1.7578125, "learning_rate": 1.99309708309454e-05, "loss": 1.1756, "step": 751 }, { "epoch": 0.12894099492037636, "grad_norm": 1.7265625, "learning_rate": 1.9930758842145947e-05, "loss": 1.1677, "step": 752 }, { "epoch": 0.12911245900936622, "grad_norm": 1.6328125, "learning_rate": 1.993054652946705e-05, "loss": 1.1622, "step": 753 }, { "epoch": 0.1292839230983561, "grad_norm": 1.71875, "learning_rate": 1.9930333892915636e-05, "loss": 1.0601, "step": 754 }, { "epoch": 0.12945538718734595, "grad_norm": 1.5859375, "learning_rate": 1.993012093249864e-05, "loss": 1.022, "step": 755 }, { "epoch": 0.1296268512763358, "grad_norm": 1.6796875, "learning_rate": 1.9929907648223004e-05, "loss": 1.122, "step": 756 }, { "epoch": 0.12979831536532568, "grad_norm": 1.6171875, "learning_rate": 1.9929694040095694e-05, "loss": 1.0447, "step": 757 }, { "epoch": 0.12996977945431554, "grad_norm": 1.6796875, "learning_rate": 1.9929480108123666e-05, "loss": 1.1163, "step": 758 }, { "epoch": 0.1301412435433054, "grad_norm": 1.6875, "learning_rate": 1.99292658523139e-05, "loss": 1.136, "step": 759 }, { "epoch": 0.13031270763229527, "grad_norm": 1.7265625, "learning_rate": 1.9929051272673385e-05, "loss": 1.0643, "step": 760 }, { "epoch": 0.13048417172128512, "grad_norm": 1.7421875, "learning_rate": 1.9928836369209115e-05, "loss": 1.0715, "step": 761 }, { "epoch": 0.13065563581027498, "grad_norm": 1.75, "learning_rate": 1.9928621141928107e-05, "loss": 1.0668, "step": 762 }, { "epoch": 0.13082709989926486, "grad_norm": 1.796875, "learning_rate": 1.9928405590837377e-05, "loss": 1.2258, "step": 763 }, { "epoch": 0.1309985639882547, "grad_norm": 1.7109375, "learning_rate": 1.992818971594395e-05, "loss": 1.0404, "step": 764 }, { "epoch": 0.13117002807724457, "grad_norm": 1.6484375, "learning_rate": 1.992797351725487e-05, "loss": 1.0869, "step": 765 }, { "epoch": 0.13134149216623445, "grad_norm": 1.5859375, "learning_rate": 1.9927756994777193e-05, "loss": 1.0574, "step": 766 }, { "epoch": 0.1315129562552243, "grad_norm": 1.8515625, "learning_rate": 1.9927540148517976e-05, "loss": 1.1286, "step": 767 }, { "epoch": 0.13168442034421415, "grad_norm": 1.734375, "learning_rate": 1.9927322978484283e-05, "loss": 1.129, "step": 768 }, { "epoch": 0.131855884433204, "grad_norm": 1.703125, "learning_rate": 1.9927105484683214e-05, "loss": 1.0774, "step": 769 }, { "epoch": 0.1320273485221939, "grad_norm": 1.7578125, "learning_rate": 1.9926887667121847e-05, "loss": 1.0588, "step": 770 }, { "epoch": 0.13219881261118374, "grad_norm": 1.625, "learning_rate": 1.9926669525807295e-05, "loss": 1.1937, "step": 771 }, { "epoch": 0.1323702767001736, "grad_norm": 1.6328125, "learning_rate": 1.9926451060746668e-05, "loss": 1.1166, "step": 772 }, { "epoch": 0.13254174078916348, "grad_norm": 1.6796875, "learning_rate": 1.9926232271947094e-05, "loss": 1.1121, "step": 773 }, { "epoch": 0.13271320487815333, "grad_norm": 1.796875, "learning_rate": 1.9926013159415705e-05, "loss": 1.1396, "step": 774 }, { "epoch": 0.13288466896714318, "grad_norm": 1.7265625, "learning_rate": 1.992579372315965e-05, "loss": 1.0838, "step": 775 }, { "epoch": 0.13305613305613306, "grad_norm": 1.7734375, "learning_rate": 1.992557396318608e-05, "loss": 1.1048, "step": 776 }, { "epoch": 0.13322759714512292, "grad_norm": 1.6328125, "learning_rate": 1.9925353879502175e-05, "loss": 1.047, "step": 777 }, { "epoch": 0.13339906123411277, "grad_norm": 1.6875, "learning_rate": 1.9925133472115098e-05, "loss": 1.0915, "step": 778 }, { "epoch": 0.13357052532310265, "grad_norm": 1.796875, "learning_rate": 1.9924912741032048e-05, "loss": 1.1477, "step": 779 }, { "epoch": 0.1337419894120925, "grad_norm": 1.59375, "learning_rate": 1.9924691686260217e-05, "loss": 1.0514, "step": 780 }, { "epoch": 0.13391345350108236, "grad_norm": 1.5859375, "learning_rate": 1.992447030780682e-05, "loss": 1.0808, "step": 781 }, { "epoch": 0.13408491759007224, "grad_norm": 1.75, "learning_rate": 1.992424860567907e-05, "loss": 1.165, "step": 782 }, { "epoch": 0.1342563816790621, "grad_norm": 1.75, "learning_rate": 1.9924026579884206e-05, "loss": 1.0896, "step": 783 }, { "epoch": 0.13442784576805195, "grad_norm": 1.671875, "learning_rate": 1.9923804230429464e-05, "loss": 1.1519, "step": 784 }, { "epoch": 0.13459930985704183, "grad_norm": 1.6328125, "learning_rate": 1.9923581557322094e-05, "loss": 1.0796, "step": 785 }, { "epoch": 0.13477077394603168, "grad_norm": 1.7265625, "learning_rate": 1.9923358560569364e-05, "loss": 1.1155, "step": 786 }, { "epoch": 0.13494223803502153, "grad_norm": 1.6796875, "learning_rate": 1.992313524017854e-05, "loss": 1.0285, "step": 787 }, { "epoch": 0.13511370212401141, "grad_norm": 1.7265625, "learning_rate": 1.992291159615691e-05, "loss": 1.1588, "step": 788 }, { "epoch": 0.13528516621300127, "grad_norm": 1.7890625, "learning_rate": 1.9922687628511768e-05, "loss": 1.181, "step": 789 }, { "epoch": 0.13545663030199112, "grad_norm": 1.703125, "learning_rate": 1.9922463337250417e-05, "loss": 1.0962, "step": 790 }, { "epoch": 0.135628094390981, "grad_norm": 1.796875, "learning_rate": 1.992223872238017e-05, "loss": 1.1206, "step": 791 }, { "epoch": 0.13579955847997086, "grad_norm": 1.703125, "learning_rate": 1.9922013783908358e-05, "loss": 1.117, "step": 792 }, { "epoch": 0.1359710225689607, "grad_norm": 1.7265625, "learning_rate": 1.9921788521842308e-05, "loss": 1.0403, "step": 793 }, { "epoch": 0.13614248665795056, "grad_norm": 1.8359375, "learning_rate": 1.9921562936189377e-05, "loss": 1.0875, "step": 794 }, { "epoch": 0.13631395074694044, "grad_norm": 1.6796875, "learning_rate": 1.9921337026956918e-05, "loss": 1.1067, "step": 795 }, { "epoch": 0.1364854148359303, "grad_norm": 1.5390625, "learning_rate": 1.9921110794152296e-05, "loss": 1.0446, "step": 796 }, { "epoch": 0.13665687892492015, "grad_norm": 1.671875, "learning_rate": 1.992088423778289e-05, "loss": 0.9725, "step": 797 }, { "epoch": 0.13682834301391003, "grad_norm": 1.8359375, "learning_rate": 1.9920657357856092e-05, "loss": 1.113, "step": 798 }, { "epoch": 0.13699980710289988, "grad_norm": 1.9375, "learning_rate": 1.9920430154379302e-05, "loss": 1.1265, "step": 799 }, { "epoch": 0.13717127119188974, "grad_norm": 1.6875, "learning_rate": 1.9920202627359924e-05, "loss": 1.0608, "step": 800 }, { "epoch": 0.13734273528087962, "grad_norm": 1.734375, "learning_rate": 1.9919974776805386e-05, "loss": 1.0655, "step": 801 }, { "epoch": 0.13751419936986947, "grad_norm": 1.703125, "learning_rate": 1.9919746602723113e-05, "loss": 1.1335, "step": 802 }, { "epoch": 0.13768566345885933, "grad_norm": 1.7265625, "learning_rate": 1.991951810512055e-05, "loss": 1.1218, "step": 803 }, { "epoch": 0.1378571275478492, "grad_norm": 1.6328125, "learning_rate": 1.991928928400515e-05, "loss": 1.0527, "step": 804 }, { "epoch": 0.13802859163683906, "grad_norm": 1.609375, "learning_rate": 1.9919060139384368e-05, "loss": 1.0453, "step": 805 }, { "epoch": 0.1382000557258289, "grad_norm": 1.75, "learning_rate": 1.991883067126569e-05, "loss": 1.1147, "step": 806 }, { "epoch": 0.1383715198148188, "grad_norm": 1.75, "learning_rate": 1.991860087965659e-05, "loss": 1.1992, "step": 807 }, { "epoch": 0.13854298390380865, "grad_norm": 1.6171875, "learning_rate": 1.9918370764564563e-05, "loss": 1.0232, "step": 808 }, { "epoch": 0.1387144479927985, "grad_norm": 1.578125, "learning_rate": 1.9918140325997117e-05, "loss": 1.0054, "step": 809 }, { "epoch": 0.13888591208178838, "grad_norm": 1.6953125, "learning_rate": 1.991790956396177e-05, "loss": 1.0625, "step": 810 }, { "epoch": 0.13905737617077824, "grad_norm": 1.671875, "learning_rate": 1.9917678478466043e-05, "loss": 1.0785, "step": 811 }, { "epoch": 0.1392288402597681, "grad_norm": 1.671875, "learning_rate": 1.9917447069517473e-05, "loss": 1.1155, "step": 812 }, { "epoch": 0.13940030434875797, "grad_norm": 1.78125, "learning_rate": 1.991721533712361e-05, "loss": 1.0851, "step": 813 }, { "epoch": 0.13957176843774782, "grad_norm": 1.8046875, "learning_rate": 1.9916983281292008e-05, "loss": 1.1355, "step": 814 }, { "epoch": 0.13974323252673768, "grad_norm": 1.703125, "learning_rate": 1.9916750902030243e-05, "loss": 1.1464, "step": 815 }, { "epoch": 0.13991469661572756, "grad_norm": 1.75, "learning_rate": 1.991651819934588e-05, "loss": 1.0878, "step": 816 }, { "epoch": 0.1400861607047174, "grad_norm": 1.7578125, "learning_rate": 1.9916285173246522e-05, "loss": 1.0585, "step": 817 }, { "epoch": 0.14025762479370726, "grad_norm": 1.6796875, "learning_rate": 1.9916051823739758e-05, "loss": 1.0578, "step": 818 }, { "epoch": 0.14042908888269712, "grad_norm": 1.734375, "learning_rate": 1.991581815083321e-05, "loss": 1.1667, "step": 819 }, { "epoch": 0.140600552971687, "grad_norm": 1.6796875, "learning_rate": 1.9915584154534492e-05, "loss": 0.9936, "step": 820 }, { "epoch": 0.14077201706067685, "grad_norm": 1.59375, "learning_rate": 1.991534983485123e-05, "loss": 0.9491, "step": 821 }, { "epoch": 0.1409434811496667, "grad_norm": 1.75, "learning_rate": 1.9915115191791078e-05, "loss": 1.15, "step": 822 }, { "epoch": 0.1411149452386566, "grad_norm": 1.6328125, "learning_rate": 1.991488022536168e-05, "loss": 1.0197, "step": 823 }, { "epoch": 0.14128640932764644, "grad_norm": 1.7265625, "learning_rate": 1.99146449355707e-05, "loss": 1.0575, "step": 824 }, { "epoch": 0.1414578734166363, "grad_norm": 1.6953125, "learning_rate": 1.9914409322425817e-05, "loss": 1.0816, "step": 825 }, { "epoch": 0.14162933750562617, "grad_norm": 1.7421875, "learning_rate": 1.9914173385934715e-05, "loss": 1.1971, "step": 826 }, { "epoch": 0.14180080159461603, "grad_norm": 1.875, "learning_rate": 1.991393712610508e-05, "loss": 1.1115, "step": 827 }, { "epoch": 0.14197226568360588, "grad_norm": 1.6875, "learning_rate": 1.991370054294462e-05, "loss": 1.0964, "step": 828 }, { "epoch": 0.14214372977259576, "grad_norm": 1.65625, "learning_rate": 1.9913463636461062e-05, "loss": 1.0427, "step": 829 }, { "epoch": 0.14231519386158561, "grad_norm": 1.7109375, "learning_rate": 1.991322640666212e-05, "loss": 1.1705, "step": 830 }, { "epoch": 0.14248665795057547, "grad_norm": 1.6484375, "learning_rate": 1.9912988853555536e-05, "loss": 1.0714, "step": 831 }, { "epoch": 0.14265812203956535, "grad_norm": 1.6328125, "learning_rate": 1.9912750977149056e-05, "loss": 1.0938, "step": 832 }, { "epoch": 0.1428295861285552, "grad_norm": 1.625, "learning_rate": 1.9912512777450436e-05, "loss": 1.0464, "step": 833 }, { "epoch": 0.14300105021754506, "grad_norm": 1.6015625, "learning_rate": 1.9912274254467452e-05, "loss": 1.0499, "step": 834 }, { "epoch": 0.14317251430653494, "grad_norm": 1.796875, "learning_rate": 1.9912035408207875e-05, "loss": 1.1257, "step": 835 }, { "epoch": 0.1433439783955248, "grad_norm": 1.640625, "learning_rate": 1.9911796238679502e-05, "loss": 1.0548, "step": 836 }, { "epoch": 0.14351544248451464, "grad_norm": 1.765625, "learning_rate": 1.9911556745890123e-05, "loss": 1.0836, "step": 837 }, { "epoch": 0.14368690657350452, "grad_norm": 1.671875, "learning_rate": 1.9911316929847563e-05, "loss": 1.1414, "step": 838 }, { "epoch": 0.14385837066249438, "grad_norm": 1.6484375, "learning_rate": 1.991107679055963e-05, "loss": 1.0649, "step": 839 }, { "epoch": 0.14402983475148423, "grad_norm": 2.0, "learning_rate": 1.991083632803416e-05, "loss": 1.0143, "step": 840 }, { "epoch": 0.14420129884047408, "grad_norm": 1.7421875, "learning_rate": 1.9910595542279002e-05, "loss": 1.1205, "step": 841 }, { "epoch": 0.14437276292946397, "grad_norm": 1.625, "learning_rate": 1.9910354433302e-05, "loss": 0.9626, "step": 842 }, { "epoch": 0.14454422701845382, "grad_norm": 1.6796875, "learning_rate": 1.991011300111102e-05, "loss": 0.9339, "step": 843 }, { "epoch": 0.14471569110744367, "grad_norm": 1.7109375, "learning_rate": 1.990987124571394e-05, "loss": 1.0628, "step": 844 }, { "epoch": 0.14488715519643355, "grad_norm": 1.71875, "learning_rate": 1.9909629167118638e-05, "loss": 1.082, "step": 845 }, { "epoch": 0.1450586192854234, "grad_norm": 1.9375, "learning_rate": 1.9909386765333016e-05, "loss": 1.1171, "step": 846 }, { "epoch": 0.14523008337441326, "grad_norm": 1.71875, "learning_rate": 1.9909144040364975e-05, "loss": 1.0785, "step": 847 }, { "epoch": 0.14540154746340314, "grad_norm": 3.125, "learning_rate": 1.990890099222243e-05, "loss": 1.2007, "step": 848 }, { "epoch": 0.145573011552393, "grad_norm": 1.734375, "learning_rate": 1.9908657620913315e-05, "loss": 1.1942, "step": 849 }, { "epoch": 0.14574447564138285, "grad_norm": 5.53125, "learning_rate": 1.990841392644556e-05, "loss": 1.1591, "step": 850 }, { "epoch": 0.14591593973037273, "grad_norm": 1.7578125, "learning_rate": 1.9908169908827113e-05, "loss": 1.1318, "step": 851 }, { "epoch": 0.14608740381936258, "grad_norm": 1.625, "learning_rate": 1.9907925568065937e-05, "loss": 0.9784, "step": 852 }, { "epoch": 0.14625886790835244, "grad_norm": 1.6953125, "learning_rate": 1.9907680904169996e-05, "loss": 1.1982, "step": 853 }, { "epoch": 0.14643033199734232, "grad_norm": 1.703125, "learning_rate": 1.9907435917147276e-05, "loss": 1.0986, "step": 854 }, { "epoch": 0.14660179608633217, "grad_norm": 1.6484375, "learning_rate": 1.9907190607005762e-05, "loss": 1.0898, "step": 855 }, { "epoch": 0.14677326017532202, "grad_norm": 1.7109375, "learning_rate": 1.990694497375345e-05, "loss": 1.0914, "step": 856 }, { "epoch": 0.1469447242643119, "grad_norm": 1.9375, "learning_rate": 1.9906699017398363e-05, "loss": 1.1664, "step": 857 }, { "epoch": 0.14711618835330176, "grad_norm": 1.7578125, "learning_rate": 1.9906452737948512e-05, "loss": 1.0166, "step": 858 }, { "epoch": 0.1472876524422916, "grad_norm": 1.71875, "learning_rate": 1.9906206135411934e-05, "loss": 1.0844, "step": 859 }, { "epoch": 0.1474591165312815, "grad_norm": 1.6484375, "learning_rate": 1.990595920979667e-05, "loss": 1.135, "step": 860 }, { "epoch": 0.14763058062027135, "grad_norm": 1.78125, "learning_rate": 1.9905711961110777e-05, "loss": 1.0987, "step": 861 }, { "epoch": 0.1478020447092612, "grad_norm": 1.65625, "learning_rate": 1.9905464389362312e-05, "loss": 1.0714, "step": 862 }, { "epoch": 0.14797350879825108, "grad_norm": 1.7578125, "learning_rate": 1.9905216494559354e-05, "loss": 1.1026, "step": 863 }, { "epoch": 0.14814497288724093, "grad_norm": 1.6640625, "learning_rate": 1.9904968276709986e-05, "loss": 1.0989, "step": 864 }, { "epoch": 0.1483164369762308, "grad_norm": 1.65625, "learning_rate": 1.9904719735822303e-05, "loss": 1.0343, "step": 865 }, { "epoch": 0.14848790106522064, "grad_norm": 1.625, "learning_rate": 1.9904470871904413e-05, "loss": 1.0357, "step": 866 }, { "epoch": 0.14865936515421052, "grad_norm": 1.6796875, "learning_rate": 1.9904221684964434e-05, "loss": 1.0805, "step": 867 }, { "epoch": 0.14883082924320037, "grad_norm": 1.65625, "learning_rate": 1.9903972175010486e-05, "loss": 1.1604, "step": 868 }, { "epoch": 0.14900229333219023, "grad_norm": 1.765625, "learning_rate": 1.9903722342050712e-05, "loss": 1.0845, "step": 869 }, { "epoch": 0.1491737574211801, "grad_norm": 1.609375, "learning_rate": 1.9903472186093257e-05, "loss": 1.022, "step": 870 }, { "epoch": 0.14934522151016996, "grad_norm": 1.703125, "learning_rate": 1.9903221707146283e-05, "loss": 1.097, "step": 871 }, { "epoch": 0.14951668559915982, "grad_norm": 1.796875, "learning_rate": 1.9902970905217955e-05, "loss": 1.1724, "step": 872 }, { "epoch": 0.1496881496881497, "grad_norm": 1.5703125, "learning_rate": 1.9902719780316455e-05, "loss": 1.1537, "step": 873 }, { "epoch": 0.14985961377713955, "grad_norm": 1.703125, "learning_rate": 1.9902468332449973e-05, "loss": 1.1606, "step": 874 }, { "epoch": 0.1500310778661294, "grad_norm": 1.7734375, "learning_rate": 1.990221656162671e-05, "loss": 1.0925, "step": 875 }, { "epoch": 0.15020254195511928, "grad_norm": 1.71875, "learning_rate": 1.9901964467854876e-05, "loss": 1.048, "step": 876 }, { "epoch": 0.15037400604410914, "grad_norm": 1.65625, "learning_rate": 1.9901712051142693e-05, "loss": 1.1162, "step": 877 }, { "epoch": 0.150545470133099, "grad_norm": 1.6015625, "learning_rate": 1.9901459311498396e-05, "loss": 1.0051, "step": 878 }, { "epoch": 0.15071693422208887, "grad_norm": 1.7578125, "learning_rate": 1.990120624893022e-05, "loss": 1.1115, "step": 879 }, { "epoch": 0.15088839831107873, "grad_norm": 1.6640625, "learning_rate": 1.990095286344643e-05, "loss": 1.0596, "step": 880 }, { "epoch": 0.15105986240006858, "grad_norm": 1.734375, "learning_rate": 1.990069915505528e-05, "loss": 1.083, "step": 881 }, { "epoch": 0.15123132648905846, "grad_norm": 1.7734375, "learning_rate": 1.9900445123765052e-05, "loss": 1.0373, "step": 882 }, { "epoch": 0.1514027905780483, "grad_norm": 1.625, "learning_rate": 1.990019076958402e-05, "loss": 1.0076, "step": 883 }, { "epoch": 0.15157425466703817, "grad_norm": 1.8359375, "learning_rate": 1.9899936092520495e-05, "loss": 1.1757, "step": 884 }, { "epoch": 0.15174571875602805, "grad_norm": 1.703125, "learning_rate": 1.9899681092582768e-05, "loss": 1.1041, "step": 885 }, { "epoch": 0.1519171828450179, "grad_norm": 1.6796875, "learning_rate": 1.9899425769779165e-05, "loss": 1.1192, "step": 886 }, { "epoch": 0.15208864693400775, "grad_norm": 1.6328125, "learning_rate": 1.9899170124118007e-05, "loss": 1.1226, "step": 887 }, { "epoch": 0.1522601110229976, "grad_norm": 1.65625, "learning_rate": 1.9898914155607635e-05, "loss": 1.0934, "step": 888 }, { "epoch": 0.1524315751119875, "grad_norm": 1.7265625, "learning_rate": 1.98986578642564e-05, "loss": 1.1332, "step": 889 }, { "epoch": 0.15260303920097734, "grad_norm": 1.6328125, "learning_rate": 1.9898401250072653e-05, "loss": 1.1179, "step": 890 }, { "epoch": 0.1527745032899672, "grad_norm": 1.6640625, "learning_rate": 1.9898144313064765e-05, "loss": 1.1094, "step": 891 }, { "epoch": 0.15294596737895708, "grad_norm": 1.671875, "learning_rate": 1.9897887053241126e-05, "loss": 1.0668, "step": 892 }, { "epoch": 0.15311743146794693, "grad_norm": 1.59375, "learning_rate": 1.9897629470610113e-05, "loss": 1.104, "step": 893 }, { "epoch": 0.15328889555693678, "grad_norm": 1.671875, "learning_rate": 1.9897371565180133e-05, "loss": 1.0576, "step": 894 }, { "epoch": 0.15346035964592666, "grad_norm": 1.7109375, "learning_rate": 1.9897113336959595e-05, "loss": 1.1034, "step": 895 }, { "epoch": 0.15363182373491652, "grad_norm": 1.8046875, "learning_rate": 1.9896854785956922e-05, "loss": 1.0618, "step": 896 }, { "epoch": 0.15380328782390637, "grad_norm": 1.6640625, "learning_rate": 1.9896595912180547e-05, "loss": 1.1598, "step": 897 }, { "epoch": 0.15397475191289625, "grad_norm": 1.59375, "learning_rate": 1.9896336715638913e-05, "loss": 1.0393, "step": 898 }, { "epoch": 0.1541462160018861, "grad_norm": 1.6875, "learning_rate": 1.989607719634047e-05, "loss": 1.1174, "step": 899 }, { "epoch": 0.15431768009087596, "grad_norm": 1.671875, "learning_rate": 1.989581735429369e-05, "loss": 1.1053, "step": 900 }, { "epoch": 0.15448914417986584, "grad_norm": 1.6796875, "learning_rate": 1.989555718950704e-05, "loss": 1.072, "step": 901 }, { "epoch": 0.1546606082688557, "grad_norm": 1.7734375, "learning_rate": 1.9895296701989002e-05, "loss": 1.095, "step": 902 }, { "epoch": 0.15483207235784555, "grad_norm": 1.65625, "learning_rate": 1.989503589174808e-05, "loss": 1.1022, "step": 903 }, { "epoch": 0.15500353644683543, "grad_norm": 1.65625, "learning_rate": 1.9894774758792775e-05, "loss": 0.9856, "step": 904 }, { "epoch": 0.15517500053582528, "grad_norm": 1.7265625, "learning_rate": 1.9894513303131607e-05, "loss": 1.0835, "step": 905 }, { "epoch": 0.15534646462481513, "grad_norm": 1.6796875, "learning_rate": 1.98942515247731e-05, "loss": 1.0362, "step": 906 }, { "epoch": 0.15551792871380501, "grad_norm": 1.6875, "learning_rate": 1.9893989423725792e-05, "loss": 1.1525, "step": 907 }, { "epoch": 0.15568939280279487, "grad_norm": 1.6171875, "learning_rate": 1.989372699999823e-05, "loss": 1.0246, "step": 908 }, { "epoch": 0.15586085689178472, "grad_norm": 1.6953125, "learning_rate": 1.9893464253598974e-05, "loss": 1.0513, "step": 909 }, { "epoch": 0.1560323209807746, "grad_norm": 1.6875, "learning_rate": 1.9893201184536598e-05, "loss": 1.055, "step": 910 }, { "epoch": 0.15620378506976446, "grad_norm": 1.8046875, "learning_rate": 1.9892937792819676e-05, "loss": 1.182, "step": 911 }, { "epoch": 0.1563752491587543, "grad_norm": 1.609375, "learning_rate": 1.9892674078456795e-05, "loss": 1.0651, "step": 912 }, { "epoch": 0.15654671324774416, "grad_norm": 1.7578125, "learning_rate": 1.989241004145656e-05, "loss": 1.1454, "step": 913 }, { "epoch": 0.15671817733673404, "grad_norm": 1.703125, "learning_rate": 1.989214568182759e-05, "loss": 1.1123, "step": 914 }, { "epoch": 0.1568896414257239, "grad_norm": 1.625, "learning_rate": 1.9891880999578492e-05, "loss": 1.0728, "step": 915 }, { "epoch": 0.15706110551471375, "grad_norm": 1.8515625, "learning_rate": 1.9891615994717904e-05, "loss": 1.0941, "step": 916 }, { "epoch": 0.15723256960370363, "grad_norm": 1.6796875, "learning_rate": 1.9891350667254474e-05, "loss": 1.1077, "step": 917 }, { "epoch": 0.15740403369269348, "grad_norm": 1.6171875, "learning_rate": 1.9891085017196848e-05, "loss": 1.0342, "step": 918 }, { "epoch": 0.15757549778168334, "grad_norm": 1.7421875, "learning_rate": 1.98908190445537e-05, "loss": 1.0765, "step": 919 }, { "epoch": 0.15774696187067322, "grad_norm": 1.65625, "learning_rate": 1.989055274933369e-05, "loss": 1.1752, "step": 920 }, { "epoch": 0.15791842595966307, "grad_norm": 1.6953125, "learning_rate": 1.9890286131545514e-05, "loss": 1.1564, "step": 921 }, { "epoch": 0.15808989004865293, "grad_norm": 1.6875, "learning_rate": 1.9890019191197863e-05, "loss": 1.0157, "step": 922 }, { "epoch": 0.1582613541376428, "grad_norm": 1.6875, "learning_rate": 1.9889751928299446e-05, "loss": 0.9963, "step": 923 }, { "epoch": 0.15843281822663266, "grad_norm": 1.6640625, "learning_rate": 1.9889484342858974e-05, "loss": 1.0637, "step": 924 }, { "epoch": 0.1586042823156225, "grad_norm": 1.7421875, "learning_rate": 1.988921643488518e-05, "loss": 1.1039, "step": 925 }, { "epoch": 0.1587757464046124, "grad_norm": 1.8046875, "learning_rate": 1.9888948204386797e-05, "loss": 1.1995, "step": 926 }, { "epoch": 0.15894721049360225, "grad_norm": 1.640625, "learning_rate": 1.9888679651372572e-05, "loss": 1.0425, "step": 927 }, { "epoch": 0.1591186745825921, "grad_norm": 1.75, "learning_rate": 1.988841077585127e-05, "loss": 1.0697, "step": 928 }, { "epoch": 0.15929013867158198, "grad_norm": 1.6875, "learning_rate": 1.9888141577831656e-05, "loss": 1.0453, "step": 929 }, { "epoch": 0.15946160276057184, "grad_norm": 1.7109375, "learning_rate": 1.988787205732251e-05, "loss": 1.1458, "step": 930 }, { "epoch": 0.1596330668495617, "grad_norm": 1.6796875, "learning_rate": 1.9887602214332622e-05, "loss": 1.1275, "step": 931 }, { "epoch": 0.15980453093855157, "grad_norm": 1.671875, "learning_rate": 1.9887332048870792e-05, "loss": 1.0878, "step": 932 }, { "epoch": 0.15997599502754142, "grad_norm": 1.6484375, "learning_rate": 1.988706156094583e-05, "loss": 1.0844, "step": 933 }, { "epoch": 0.16014745911653128, "grad_norm": 1.6875, "learning_rate": 1.988679075056656e-05, "loss": 1.0174, "step": 934 }, { "epoch": 0.16031892320552113, "grad_norm": 1.6875, "learning_rate": 1.988651961774182e-05, "loss": 1.0657, "step": 935 }, { "epoch": 0.160490387294511, "grad_norm": 1.640625, "learning_rate": 1.9886248162480436e-05, "loss": 1.0754, "step": 936 }, { "epoch": 0.16066185138350086, "grad_norm": 1.734375, "learning_rate": 1.9885976384791276e-05, "loss": 1.1581, "step": 937 }, { "epoch": 0.16083331547249072, "grad_norm": 1.6484375, "learning_rate": 1.98857042846832e-05, "loss": 1.0489, "step": 938 }, { "epoch": 0.1610047795614806, "grad_norm": 1.6328125, "learning_rate": 1.9885431862165078e-05, "loss": 1.1013, "step": 939 }, { "epoch": 0.16117624365047045, "grad_norm": 1.8046875, "learning_rate": 1.98851591172458e-05, "loss": 1.019, "step": 940 }, { "epoch": 0.1613477077394603, "grad_norm": 1.5703125, "learning_rate": 1.9884886049934258e-05, "loss": 0.9853, "step": 941 }, { "epoch": 0.1615191718284502, "grad_norm": 1.6875, "learning_rate": 1.988461266023936e-05, "loss": 1.0605, "step": 942 }, { "epoch": 0.16169063591744004, "grad_norm": 1.7421875, "learning_rate": 1.9884338948170022e-05, "loss": 1.0944, "step": 943 }, { "epoch": 0.1618621000064299, "grad_norm": 1.7109375, "learning_rate": 1.9884064913735165e-05, "loss": 1.0564, "step": 944 }, { "epoch": 0.16203356409541977, "grad_norm": 1.7109375, "learning_rate": 1.9883790556943736e-05, "loss": 1.1367, "step": 945 }, { "epoch": 0.16220502818440963, "grad_norm": 1.65625, "learning_rate": 1.9883515877804676e-05, "loss": 1.095, "step": 946 }, { "epoch": 0.16237649227339948, "grad_norm": 1.65625, "learning_rate": 1.9883240876326947e-05, "loss": 1.0305, "step": 947 }, { "epoch": 0.16254795636238936, "grad_norm": 1.6875, "learning_rate": 1.9882965552519517e-05, "loss": 1.0605, "step": 948 }, { "epoch": 0.16271942045137922, "grad_norm": 1.6171875, "learning_rate": 1.988268990639136e-05, "loss": 1.0784, "step": 949 }, { "epoch": 0.16289088454036907, "grad_norm": 1.6796875, "learning_rate": 1.9882413937951475e-05, "loss": 1.1448, "step": 950 }, { "epoch": 0.16306234862935895, "grad_norm": 1.6875, "learning_rate": 1.9882137647208858e-05, "loss": 1.0633, "step": 951 }, { "epoch": 0.1632338127183488, "grad_norm": 1.6875, "learning_rate": 1.9881861034172514e-05, "loss": 1.0318, "step": 952 }, { "epoch": 0.16340527680733866, "grad_norm": 1.65625, "learning_rate": 1.9881584098851475e-05, "loss": 0.9796, "step": 953 }, { "epoch": 0.16357674089632854, "grad_norm": 1.6796875, "learning_rate": 1.9881306841254768e-05, "loss": 1.1173, "step": 954 }, { "epoch": 0.1637482049853184, "grad_norm": 1.7109375, "learning_rate": 1.9881029261391434e-05, "loss": 1.0393, "step": 955 }, { "epoch": 0.16391966907430824, "grad_norm": 1.8125, "learning_rate": 1.9880751359270526e-05, "loss": 1.1808, "step": 956 }, { "epoch": 0.16409113316329813, "grad_norm": 1.703125, "learning_rate": 1.988047313490111e-05, "loss": 1.1231, "step": 957 }, { "epoch": 0.16426259725228798, "grad_norm": 1.6171875, "learning_rate": 1.988019458829226e-05, "loss": 1.0345, "step": 958 }, { "epoch": 0.16443406134127783, "grad_norm": 1.7421875, "learning_rate": 1.9879915719453055e-05, "loss": 1.0765, "step": 959 }, { "epoch": 0.16460552543026769, "grad_norm": 1.7578125, "learning_rate": 1.9879636528392597e-05, "loss": 1.1324, "step": 960 }, { "epoch": 0.16477698951925757, "grad_norm": 1.609375, "learning_rate": 1.9879357015119993e-05, "loss": 1.0949, "step": 961 }, { "epoch": 0.16494845360824742, "grad_norm": 1.53125, "learning_rate": 1.9879077179644346e-05, "loss": 1.0602, "step": 962 }, { "epoch": 0.16511991769723727, "grad_norm": 1.640625, "learning_rate": 1.9878797021974795e-05, "loss": 1.1427, "step": 963 }, { "epoch": 0.16529138178622715, "grad_norm": 1.765625, "learning_rate": 1.9878516542120473e-05, "loss": 1.0229, "step": 964 }, { "epoch": 0.165462845875217, "grad_norm": 1.59375, "learning_rate": 1.987823574009053e-05, "loss": 1.0367, "step": 965 }, { "epoch": 0.16563430996420686, "grad_norm": 1.7265625, "learning_rate": 1.9877954615894118e-05, "loss": 1.0842, "step": 966 }, { "epoch": 0.16580577405319674, "grad_norm": 1.6796875, "learning_rate": 1.9877673169540408e-05, "loss": 1.0605, "step": 967 }, { "epoch": 0.1659772381421866, "grad_norm": 1.6875, "learning_rate": 1.9877391401038583e-05, "loss": 1.1082, "step": 968 }, { "epoch": 0.16614870223117645, "grad_norm": 1.7265625, "learning_rate": 1.987710931039783e-05, "loss": 1.171, "step": 969 }, { "epoch": 0.16632016632016633, "grad_norm": 1.7109375, "learning_rate": 1.9876826897627347e-05, "loss": 1.0277, "step": 970 }, { "epoch": 0.16649163040915618, "grad_norm": 1.75, "learning_rate": 1.9876544162736343e-05, "loss": 1.0761, "step": 971 }, { "epoch": 0.16666309449814604, "grad_norm": 1.640625, "learning_rate": 1.987626110573405e-05, "loss": 1.0559, "step": 972 }, { "epoch": 0.16683455858713592, "grad_norm": 1.609375, "learning_rate": 1.9875977726629685e-05, "loss": 1.0628, "step": 973 }, { "epoch": 0.16700602267612577, "grad_norm": 1.65625, "learning_rate": 1.98756940254325e-05, "loss": 1.0941, "step": 974 }, { "epoch": 0.16717748676511562, "grad_norm": 1.6953125, "learning_rate": 1.9875410002151745e-05, "loss": 1.0969, "step": 975 }, { "epoch": 0.1673489508541055, "grad_norm": 1.640625, "learning_rate": 1.987512565679668e-05, "loss": 1.0515, "step": 976 }, { "epoch": 0.16752041494309536, "grad_norm": 1.6953125, "learning_rate": 1.987484098937658e-05, "loss": 1.0269, "step": 977 }, { "epoch": 0.1676918790320852, "grad_norm": 1.6015625, "learning_rate": 1.9874555999900735e-05, "loss": 1.0805, "step": 978 }, { "epoch": 0.1678633431210751, "grad_norm": 1.6953125, "learning_rate": 1.9874270688378433e-05, "loss": 1.1372, "step": 979 }, { "epoch": 0.16803480721006495, "grad_norm": 1.6953125, "learning_rate": 1.987398505481898e-05, "loss": 1.1989, "step": 980 }, { "epoch": 0.1682062712990548, "grad_norm": 1.5859375, "learning_rate": 1.987369909923169e-05, "loss": 1.0114, "step": 981 }, { "epoch": 0.16837773538804465, "grad_norm": 1.640625, "learning_rate": 1.9873412821625894e-05, "loss": 1.0541, "step": 982 }, { "epoch": 0.16854919947703453, "grad_norm": 1.6171875, "learning_rate": 1.987312622201093e-05, "loss": 1.0933, "step": 983 }, { "epoch": 0.1687206635660244, "grad_norm": 1.671875, "learning_rate": 1.9872839300396132e-05, "loss": 1.0502, "step": 984 }, { "epoch": 0.16889212765501424, "grad_norm": 1.6640625, "learning_rate": 1.987255205679087e-05, "loss": 1.0388, "step": 985 }, { "epoch": 0.16906359174400412, "grad_norm": 1.640625, "learning_rate": 1.9872264491204514e-05, "loss": 1.0497, "step": 986 }, { "epoch": 0.16923505583299397, "grad_norm": 1.7421875, "learning_rate": 1.9871976603646432e-05, "loss": 1.0919, "step": 987 }, { "epoch": 0.16940651992198383, "grad_norm": 1.734375, "learning_rate": 1.987168839412602e-05, "loss": 1.0666, "step": 988 }, { "epoch": 0.1695779840109737, "grad_norm": 1.6953125, "learning_rate": 1.9871399862652677e-05, "loss": 1.0808, "step": 989 }, { "epoch": 0.16974944809996356, "grad_norm": 1.59375, "learning_rate": 1.9871111009235814e-05, "loss": 1.0397, "step": 990 }, { "epoch": 0.16992091218895342, "grad_norm": 1.578125, "learning_rate": 1.9870821833884845e-05, "loss": 0.9993, "step": 991 }, { "epoch": 0.1700923762779433, "grad_norm": 1.75, "learning_rate": 1.987053233660921e-05, "loss": 1.2037, "step": 992 }, { "epoch": 0.17026384036693315, "grad_norm": 1.578125, "learning_rate": 1.9870242517418346e-05, "loss": 1.0779, "step": 993 }, { "epoch": 0.170435304455923, "grad_norm": 1.6953125, "learning_rate": 1.9869952376321705e-05, "loss": 1.1726, "step": 994 }, { "epoch": 0.17060676854491288, "grad_norm": 1.671875, "learning_rate": 1.986966191332875e-05, "loss": 1.0898, "step": 995 }, { "epoch": 0.17077823263390274, "grad_norm": 1.6796875, "learning_rate": 1.9869371128448952e-05, "loss": 1.0367, "step": 996 }, { "epoch": 0.1709496967228926, "grad_norm": 1.6796875, "learning_rate": 1.98690800216918e-05, "loss": 1.063, "step": 997 }, { "epoch": 0.17112116081188247, "grad_norm": 1.953125, "learning_rate": 1.9868788593066783e-05, "loss": 1.0421, "step": 998 }, { "epoch": 0.17129262490087233, "grad_norm": 1.640625, "learning_rate": 1.9868496842583412e-05, "loss": 1.0655, "step": 999 }, { "epoch": 0.17146408898986218, "grad_norm": 1.6875, "learning_rate": 1.9868204770251194e-05, "loss": 1.1434, "step": 1000 }, { "epoch": 0.17163555307885206, "grad_norm": 1.6640625, "learning_rate": 1.9867912376079657e-05, "loss": 1.1156, "step": 1001 }, { "epoch": 0.1718070171678419, "grad_norm": 1.6328125, "learning_rate": 1.986761966007834e-05, "loss": 1.0472, "step": 1002 }, { "epoch": 0.17197848125683177, "grad_norm": 1.75, "learning_rate": 1.986732662225679e-05, "loss": 1.1131, "step": 1003 }, { "epoch": 0.17214994534582165, "grad_norm": 1.640625, "learning_rate": 1.986703326262456e-05, "loss": 1.0208, "step": 1004 }, { "epoch": 0.1723214094348115, "grad_norm": 1.609375, "learning_rate": 1.9866739581191217e-05, "loss": 1.0863, "step": 1005 }, { "epoch": 0.17249287352380135, "grad_norm": 1.6796875, "learning_rate": 1.9866445577966345e-05, "loss": 1.095, "step": 1006 }, { "epoch": 0.1726643376127912, "grad_norm": 1.7421875, "learning_rate": 1.9866151252959524e-05, "loss": 1.1355, "step": 1007 }, { "epoch": 0.1728358017017811, "grad_norm": 1.6796875, "learning_rate": 1.9865856606180364e-05, "loss": 1.0531, "step": 1008 }, { "epoch": 0.17300726579077094, "grad_norm": 1.703125, "learning_rate": 1.9865561637638466e-05, "loss": 1.0611, "step": 1009 }, { "epoch": 0.1731787298797608, "grad_norm": 1.6953125, "learning_rate": 1.9865266347343453e-05, "loss": 1.0575, "step": 1010 }, { "epoch": 0.17335019396875068, "grad_norm": 1.6171875, "learning_rate": 1.9864970735304956e-05, "loss": 0.9826, "step": 1011 }, { "epoch": 0.17352165805774053, "grad_norm": 1.6171875, "learning_rate": 1.9864674801532616e-05, "loss": 1.0855, "step": 1012 }, { "epoch": 0.17369312214673038, "grad_norm": 1.7109375, "learning_rate": 1.9864378546036082e-05, "loss": 1.1052, "step": 1013 }, { "epoch": 0.17386458623572026, "grad_norm": 1.7734375, "learning_rate": 1.986408196882502e-05, "loss": 1.0988, "step": 1014 }, { "epoch": 0.17403605032471012, "grad_norm": 1.6328125, "learning_rate": 1.9863785069909095e-05, "loss": 1.0497, "step": 1015 }, { "epoch": 0.17420751441369997, "grad_norm": 1.71875, "learning_rate": 1.9863487849298e-05, "loss": 1.0612, "step": 1016 }, { "epoch": 0.17437897850268985, "grad_norm": 1.78125, "learning_rate": 1.9863190307001426e-05, "loss": 1.0183, "step": 1017 }, { "epoch": 0.1745504425916797, "grad_norm": 1.7734375, "learning_rate": 1.986289244302907e-05, "loss": 1.077, "step": 1018 }, { "epoch": 0.17472190668066956, "grad_norm": 1.625, "learning_rate": 1.986259425739065e-05, "loss": 1.0255, "step": 1019 }, { "epoch": 0.17489337076965944, "grad_norm": 1.6640625, "learning_rate": 1.9862295750095896e-05, "loss": 1.0715, "step": 1020 }, { "epoch": 0.1750648348586493, "grad_norm": 1.6640625, "learning_rate": 1.986199692115454e-05, "loss": 1.1179, "step": 1021 }, { "epoch": 0.17523629894763915, "grad_norm": 1.6171875, "learning_rate": 1.9861697770576326e-05, "loss": 1.0499, "step": 1022 }, { "epoch": 0.17540776303662903, "grad_norm": 1.7265625, "learning_rate": 1.9861398298371012e-05, "loss": 1.1127, "step": 1023 }, { "epoch": 0.17557922712561888, "grad_norm": 1.7578125, "learning_rate": 1.9861098504548365e-05, "loss": 1.0414, "step": 1024 }, { "epoch": 0.17575069121460873, "grad_norm": 1.7265625, "learning_rate": 1.9860798389118163e-05, "loss": 1.0307, "step": 1025 }, { "epoch": 0.17592215530359862, "grad_norm": 1.6328125, "learning_rate": 1.9860497952090192e-05, "loss": 1.0727, "step": 1026 }, { "epoch": 0.17609361939258847, "grad_norm": 1.5859375, "learning_rate": 1.9860197193474253e-05, "loss": 1.0412, "step": 1027 }, { "epoch": 0.17626508348157832, "grad_norm": 1.7109375, "learning_rate": 1.9859896113280152e-05, "loss": 1.0912, "step": 1028 }, { "epoch": 0.1764365475705682, "grad_norm": 1.609375, "learning_rate": 1.985959471151771e-05, "loss": 1.1073, "step": 1029 }, { "epoch": 0.17660801165955806, "grad_norm": 1.6171875, "learning_rate": 1.9859292988196757e-05, "loss": 1.1186, "step": 1030 }, { "epoch": 0.1767794757485479, "grad_norm": 1.5703125, "learning_rate": 1.9858990943327136e-05, "loss": 1.0762, "step": 1031 }, { "epoch": 0.17695093983753776, "grad_norm": 1.7890625, "learning_rate": 1.985868857691869e-05, "loss": 1.1374, "step": 1032 }, { "epoch": 0.17712240392652764, "grad_norm": 1.703125, "learning_rate": 1.985838588898129e-05, "loss": 1.1087, "step": 1033 }, { "epoch": 0.1772938680155175, "grad_norm": 1.6953125, "learning_rate": 1.98580828795248e-05, "loss": 1.0683, "step": 1034 }, { "epoch": 0.17746533210450735, "grad_norm": 1.6484375, "learning_rate": 1.9857779548559103e-05, "loss": 1.1012, "step": 1035 }, { "epoch": 0.17763679619349723, "grad_norm": 1.75, "learning_rate": 1.9857475896094094e-05, "loss": 1.1002, "step": 1036 }, { "epoch": 0.17780826028248709, "grad_norm": 1.6875, "learning_rate": 1.9857171922139678e-05, "loss": 1.0632, "step": 1037 }, { "epoch": 0.17797972437147694, "grad_norm": 1.5546875, "learning_rate": 1.985686762670577e-05, "loss": 0.9738, "step": 1038 }, { "epoch": 0.17815118846046682, "grad_norm": 1.90625, "learning_rate": 1.9856563009802286e-05, "loss": 0.9626, "step": 1039 }, { "epoch": 0.17832265254945667, "grad_norm": 1.65625, "learning_rate": 1.985625807143917e-05, "loss": 1.1021, "step": 1040 }, { "epoch": 0.17849411663844653, "grad_norm": 1.71875, "learning_rate": 1.985595281162636e-05, "loss": 1.0798, "step": 1041 }, { "epoch": 0.1786655807274364, "grad_norm": 1.7421875, "learning_rate": 1.9855647230373817e-05, "loss": 1.1084, "step": 1042 }, { "epoch": 0.17883704481642626, "grad_norm": 1.7578125, "learning_rate": 1.9855341327691506e-05, "loss": 1.0181, "step": 1043 }, { "epoch": 0.1790085089054161, "grad_norm": 1.65625, "learning_rate": 1.9855035103589396e-05, "loss": 1.0188, "step": 1044 }, { "epoch": 0.179179972994406, "grad_norm": 1.640625, "learning_rate": 1.9854728558077488e-05, "loss": 1.1623, "step": 1045 }, { "epoch": 0.17935143708339585, "grad_norm": 1.7109375, "learning_rate": 1.9854421691165768e-05, "loss": 1.0942, "step": 1046 }, { "epoch": 0.1795229011723857, "grad_norm": 1.6015625, "learning_rate": 1.9854114502864252e-05, "loss": 0.9914, "step": 1047 }, { "epoch": 0.17969436526137558, "grad_norm": 1.7421875, "learning_rate": 1.9853806993182952e-05, "loss": 1.1027, "step": 1048 }, { "epoch": 0.17986582935036544, "grad_norm": 1.6640625, "learning_rate": 1.98534991621319e-05, "loss": 1.0728, "step": 1049 }, { "epoch": 0.1800372934393553, "grad_norm": 1.6640625, "learning_rate": 1.9853191009721137e-05, "loss": 1.1005, "step": 1050 }, { "epoch": 0.18020875752834517, "grad_norm": 1.59375, "learning_rate": 1.9852882535960712e-05, "loss": 1.0844, "step": 1051 }, { "epoch": 0.18038022161733502, "grad_norm": 1.6953125, "learning_rate": 1.985257374086068e-05, "loss": 1.0699, "step": 1052 }, { "epoch": 0.18055168570632488, "grad_norm": 1.6171875, "learning_rate": 1.985226462443112e-05, "loss": 1.0853, "step": 1053 }, { "epoch": 0.18072314979531473, "grad_norm": 1.6171875, "learning_rate": 1.9851955186682114e-05, "loss": 1.0661, "step": 1054 }, { "epoch": 0.1808946138843046, "grad_norm": 1.7265625, "learning_rate": 1.9851645427623744e-05, "loss": 1.11, "step": 1055 }, { "epoch": 0.18106607797329446, "grad_norm": 1.7421875, "learning_rate": 1.9851335347266124e-05, "loss": 1.0096, "step": 1056 }, { "epoch": 0.18123754206228432, "grad_norm": 1.7734375, "learning_rate": 1.985102494561936e-05, "loss": 1.0221, "step": 1057 }, { "epoch": 0.1814090061512742, "grad_norm": 1.7578125, "learning_rate": 1.9850714222693576e-05, "loss": 1.0572, "step": 1058 }, { "epoch": 0.18158047024026405, "grad_norm": 1.65625, "learning_rate": 1.9850403178498906e-05, "loss": 1.1359, "step": 1059 }, { "epoch": 0.1817519343292539, "grad_norm": 1.6953125, "learning_rate": 1.9850091813045496e-05, "loss": 1.0709, "step": 1060 }, { "epoch": 0.1819233984182438, "grad_norm": 1.875, "learning_rate": 1.9849780126343502e-05, "loss": 1.0282, "step": 1061 }, { "epoch": 0.18209486250723364, "grad_norm": 1.765625, "learning_rate": 1.984946811840309e-05, "loss": 1.0842, "step": 1062 }, { "epoch": 0.1822663265962235, "grad_norm": 1.71875, "learning_rate": 1.9849155789234427e-05, "loss": 1.0752, "step": 1063 }, { "epoch": 0.18243779068521337, "grad_norm": 1.6875, "learning_rate": 1.9848843138847704e-05, "loss": 0.9987, "step": 1064 }, { "epoch": 0.18260925477420323, "grad_norm": 1.671875, "learning_rate": 1.984853016725312e-05, "loss": 1.1689, "step": 1065 }, { "epoch": 0.18278071886319308, "grad_norm": 1.640625, "learning_rate": 1.9848216874460885e-05, "loss": 1.0585, "step": 1066 }, { "epoch": 0.18295218295218296, "grad_norm": 1.59375, "learning_rate": 1.984790326048121e-05, "loss": 1.1052, "step": 1067 }, { "epoch": 0.18312364704117282, "grad_norm": 1.71875, "learning_rate": 1.984758932532433e-05, "loss": 1.0769, "step": 1068 }, { "epoch": 0.18329511113016267, "grad_norm": 1.5625, "learning_rate": 1.9847275069000473e-05, "loss": 1.0894, "step": 1069 }, { "epoch": 0.18346657521915255, "grad_norm": 1.7265625, "learning_rate": 1.98469604915199e-05, "loss": 1.1557, "step": 1070 }, { "epoch": 0.1836380393081424, "grad_norm": 1.6484375, "learning_rate": 1.9846645592892862e-05, "loss": 1.0458, "step": 1071 }, { "epoch": 0.18380950339713226, "grad_norm": 1.7421875, "learning_rate": 1.9846330373129633e-05, "loss": 1.1307, "step": 1072 }, { "epoch": 0.18398096748612214, "grad_norm": 1.71875, "learning_rate": 1.9846014832240494e-05, "loss": 1.004, "step": 1073 }, { "epoch": 0.184152431575112, "grad_norm": 1.7265625, "learning_rate": 1.9845698970235733e-05, "loss": 1.0424, "step": 1074 }, { "epoch": 0.18432389566410184, "grad_norm": 1.7421875, "learning_rate": 1.9845382787125653e-05, "loss": 1.0541, "step": 1075 }, { "epoch": 0.18449535975309173, "grad_norm": 1.65625, "learning_rate": 1.984506628292057e-05, "loss": 1.0967, "step": 1076 }, { "epoch": 0.18466682384208158, "grad_norm": 1.6328125, "learning_rate": 1.9844749457630797e-05, "loss": 1.1174, "step": 1077 }, { "epoch": 0.18483828793107143, "grad_norm": 1.65625, "learning_rate": 1.9844432311266675e-05, "loss": 1.0977, "step": 1078 }, { "epoch": 0.18500975202006129, "grad_norm": 1.640625, "learning_rate": 1.9844114843838542e-05, "loss": 1.1425, "step": 1079 }, { "epoch": 0.18518121610905117, "grad_norm": 1.75, "learning_rate": 1.984379705535676e-05, "loss": 1.0752, "step": 1080 }, { "epoch": 0.18535268019804102, "grad_norm": 1.8046875, "learning_rate": 1.9843478945831684e-05, "loss": 1.1111, "step": 1081 }, { "epoch": 0.18552414428703087, "grad_norm": 1.625, "learning_rate": 1.984316051527369e-05, "loss": 1.0563, "step": 1082 }, { "epoch": 0.18569560837602075, "grad_norm": 1.765625, "learning_rate": 1.9842841763693167e-05, "loss": 1.0428, "step": 1083 }, { "epoch": 0.1858670724650106, "grad_norm": 1.5703125, "learning_rate": 1.9842522691100513e-05, "loss": 1.0658, "step": 1084 }, { "epoch": 0.18603853655400046, "grad_norm": 1.7421875, "learning_rate": 1.984220329750613e-05, "loss": 1.1422, "step": 1085 }, { "epoch": 0.18621000064299034, "grad_norm": 1.6328125, "learning_rate": 1.984188358292043e-05, "loss": 1.1716, "step": 1086 }, { "epoch": 0.1863814647319802, "grad_norm": 1.53125, "learning_rate": 1.984156354735385e-05, "loss": 1.0068, "step": 1087 }, { "epoch": 0.18655292882097005, "grad_norm": 1.6796875, "learning_rate": 1.984124319081682e-05, "loss": 1.0618, "step": 1088 }, { "epoch": 0.18672439290995993, "grad_norm": 1.6484375, "learning_rate": 1.9840922513319793e-05, "loss": 0.9207, "step": 1089 }, { "epoch": 0.18689585699894978, "grad_norm": 1.796875, "learning_rate": 1.9840601514873226e-05, "loss": 1.0818, "step": 1090 }, { "epoch": 0.18706732108793964, "grad_norm": 1.6875, "learning_rate": 1.9840280195487587e-05, "loss": 1.0598, "step": 1091 }, { "epoch": 0.18723878517692952, "grad_norm": 1.7265625, "learning_rate": 1.9839958555173354e-05, "loss": 1.1416, "step": 1092 }, { "epoch": 0.18741024926591937, "grad_norm": 1.5859375, "learning_rate": 1.9839636593941018e-05, "loss": 1.0442, "step": 1093 }, { "epoch": 0.18758171335490922, "grad_norm": 1.7109375, "learning_rate": 1.9839314311801085e-05, "loss": 1.0928, "step": 1094 }, { "epoch": 0.1877531774438991, "grad_norm": 1.71875, "learning_rate": 1.9838991708764054e-05, "loss": 1.0887, "step": 1095 }, { "epoch": 0.18792464153288896, "grad_norm": 1.5703125, "learning_rate": 1.9838668784840457e-05, "loss": 1.0573, "step": 1096 }, { "epoch": 0.1880961056218788, "grad_norm": 1.6015625, "learning_rate": 1.9838345540040823e-05, "loss": 1.0727, "step": 1097 }, { "epoch": 0.1882675697108687, "grad_norm": 1.703125, "learning_rate": 1.983802197437569e-05, "loss": 1.0522, "step": 1098 }, { "epoch": 0.18843903379985855, "grad_norm": 1.6328125, "learning_rate": 1.9837698087855615e-05, "loss": 1.0368, "step": 1099 }, { "epoch": 0.1886104978888484, "grad_norm": 1.640625, "learning_rate": 1.9837373880491162e-05, "loss": 1.1018, "step": 1100 }, { "epoch": 0.18878196197783825, "grad_norm": 1.6015625, "learning_rate": 1.98370493522929e-05, "loss": 1.0202, "step": 1101 }, { "epoch": 0.18895342606682813, "grad_norm": 1.6484375, "learning_rate": 1.9836724503271417e-05, "loss": 1.1532, "step": 1102 }, { "epoch": 0.189124890155818, "grad_norm": 1.7109375, "learning_rate": 1.9836399333437307e-05, "loss": 1.0213, "step": 1103 }, { "epoch": 0.18929635424480784, "grad_norm": 1.703125, "learning_rate": 1.9836073842801175e-05, "loss": 1.0724, "step": 1104 }, { "epoch": 0.18946781833379772, "grad_norm": 1.7265625, "learning_rate": 1.983574803137363e-05, "loss": 1.1454, "step": 1105 }, { "epoch": 0.18963928242278758, "grad_norm": 1.65625, "learning_rate": 1.9835421899165312e-05, "loss": 1.0038, "step": 1106 }, { "epoch": 0.18981074651177743, "grad_norm": 1.6875, "learning_rate": 1.9835095446186842e-05, "loss": 1.0624, "step": 1107 }, { "epoch": 0.1899822106007673, "grad_norm": 1.625, "learning_rate": 1.9834768672448877e-05, "loss": 1.1086, "step": 1108 }, { "epoch": 0.19015367468975716, "grad_norm": 1.75, "learning_rate": 1.9834441577962072e-05, "loss": 1.1179, "step": 1109 }, { "epoch": 0.19032513877874702, "grad_norm": 1.765625, "learning_rate": 1.983411416273709e-05, "loss": 1.2127, "step": 1110 }, { "epoch": 0.1904966028677369, "grad_norm": 1.734375, "learning_rate": 1.983378642678462e-05, "loss": 1.0469, "step": 1111 }, { "epoch": 0.19066806695672675, "grad_norm": 1.703125, "learning_rate": 1.983345837011534e-05, "loss": 1.1232, "step": 1112 }, { "epoch": 0.1908395310457166, "grad_norm": 1.6640625, "learning_rate": 1.9833129992739956e-05, "loss": 1.1014, "step": 1113 }, { "epoch": 0.19101099513470648, "grad_norm": 1.671875, "learning_rate": 1.983280129466917e-05, "loss": 1.0703, "step": 1114 }, { "epoch": 0.19118245922369634, "grad_norm": 1.6484375, "learning_rate": 1.983247227591371e-05, "loss": 1.0507, "step": 1115 }, { "epoch": 0.1913539233126862, "grad_norm": 1.671875, "learning_rate": 1.9832142936484303e-05, "loss": 1.0769, "step": 1116 }, { "epoch": 0.19152538740167607, "grad_norm": 1.7890625, "learning_rate": 1.983181327639169e-05, "loss": 1.071, "step": 1117 }, { "epoch": 0.19169685149066593, "grad_norm": 1.6484375, "learning_rate": 1.9831483295646623e-05, "loss": 1.1299, "step": 1118 }, { "epoch": 0.19186831557965578, "grad_norm": 1.640625, "learning_rate": 1.9831152994259863e-05, "loss": 0.9908, "step": 1119 }, { "epoch": 0.19203977966864566, "grad_norm": 1.5859375, "learning_rate": 1.9830822372242185e-05, "loss": 1.1028, "step": 1120 }, { "epoch": 0.1922112437576355, "grad_norm": 1.5703125, "learning_rate": 1.983049142960437e-05, "loss": 0.9642, "step": 1121 }, { "epoch": 0.19238270784662537, "grad_norm": 1.7265625, "learning_rate": 1.983016016635721e-05, "loss": 1.0726, "step": 1122 }, { "epoch": 0.19255417193561525, "grad_norm": 1.71875, "learning_rate": 1.982982858251151e-05, "loss": 1.0486, "step": 1123 }, { "epoch": 0.1927256360246051, "grad_norm": 1.6953125, "learning_rate": 1.9829496678078083e-05, "loss": 1.0696, "step": 1124 }, { "epoch": 0.19289710011359495, "grad_norm": 1.6171875, "learning_rate": 1.9829164453067754e-05, "loss": 1.0225, "step": 1125 }, { "epoch": 0.1930685642025848, "grad_norm": 1.7265625, "learning_rate": 1.9828831907491364e-05, "loss": 1.1109, "step": 1126 }, { "epoch": 0.1932400282915747, "grad_norm": 1.6796875, "learning_rate": 1.982849904135975e-05, "loss": 1.0883, "step": 1127 }, { "epoch": 0.19341149238056454, "grad_norm": 1.7421875, "learning_rate": 1.9828165854683774e-05, "loss": 1.1209, "step": 1128 }, { "epoch": 0.1935829564695544, "grad_norm": 1.6796875, "learning_rate": 1.98278323474743e-05, "loss": 1.1511, "step": 1129 }, { "epoch": 0.19375442055854428, "grad_norm": 1.6484375, "learning_rate": 1.9827498519742202e-05, "loss": 1.1082, "step": 1130 }, { "epoch": 0.19392588464753413, "grad_norm": 1.6640625, "learning_rate": 1.982716437149837e-05, "loss": 1.0247, "step": 1131 }, { "epoch": 0.19409734873652398, "grad_norm": 1.7734375, "learning_rate": 1.9826829902753706e-05, "loss": 1.1547, "step": 1132 }, { "epoch": 0.19426881282551386, "grad_norm": 1.671875, "learning_rate": 1.9826495113519112e-05, "loss": 1.0645, "step": 1133 }, { "epoch": 0.19444027691450372, "grad_norm": 1.6171875, "learning_rate": 1.982616000380551e-05, "loss": 1.0384, "step": 1134 }, { "epoch": 0.19461174100349357, "grad_norm": 1.71875, "learning_rate": 1.9825824573623825e-05, "loss": 1.086, "step": 1135 }, { "epoch": 0.19478320509248345, "grad_norm": 1.7109375, "learning_rate": 1.9825488822985007e-05, "loss": 1.1152, "step": 1136 }, { "epoch": 0.1949546691814733, "grad_norm": 1.5625, "learning_rate": 1.9825152751899993e-05, "loss": 0.9781, "step": 1137 }, { "epoch": 0.19512613327046316, "grad_norm": 1.5859375, "learning_rate": 1.9824816360379753e-05, "loss": 1.0205, "step": 1138 }, { "epoch": 0.19529759735945304, "grad_norm": 1.640625, "learning_rate": 1.9824479648435253e-05, "loss": 1.0696, "step": 1139 }, { "epoch": 0.1954690614484429, "grad_norm": 1.65625, "learning_rate": 1.9824142616077475e-05, "loss": 1.0341, "step": 1140 }, { "epoch": 0.19564052553743275, "grad_norm": 1.6484375, "learning_rate": 1.9823805263317415e-05, "loss": 0.989, "step": 1141 }, { "epoch": 0.19581198962642263, "grad_norm": 1.6484375, "learning_rate": 1.9823467590166073e-05, "loss": 1.0009, "step": 1142 }, { "epoch": 0.19598345371541248, "grad_norm": 1.7734375, "learning_rate": 1.9823129596634456e-05, "loss": 1.1526, "step": 1143 }, { "epoch": 0.19615491780440233, "grad_norm": 1.84375, "learning_rate": 1.98227912827336e-05, "loss": 1.047, "step": 1144 }, { "epoch": 0.19632638189339222, "grad_norm": 1.7265625, "learning_rate": 1.9822452648474524e-05, "loss": 1.0929, "step": 1145 }, { "epoch": 0.19649784598238207, "grad_norm": 1.734375, "learning_rate": 1.9822113693868282e-05, "loss": 1.0357, "step": 1146 }, { "epoch": 0.19666931007137192, "grad_norm": 1.6640625, "learning_rate": 1.9821774418925924e-05, "loss": 1.0461, "step": 1147 }, { "epoch": 0.19684077416036178, "grad_norm": 1.6171875, "learning_rate": 1.982143482365852e-05, "loss": 1.0513, "step": 1148 }, { "epoch": 0.19701223824935166, "grad_norm": 1.7734375, "learning_rate": 1.982109490807714e-05, "loss": 1.05, "step": 1149 }, { "epoch": 0.1971837023383415, "grad_norm": 1.6640625, "learning_rate": 1.982075467219287e-05, "loss": 1.1052, "step": 1150 }, { "epoch": 0.19735516642733136, "grad_norm": 1.7265625, "learning_rate": 1.9820414116016814e-05, "loss": 1.1074, "step": 1151 }, { "epoch": 0.19752663051632124, "grad_norm": 1.734375, "learning_rate": 1.982007323956007e-05, "loss": 1.0711, "step": 1152 }, { "epoch": 0.1976980946053111, "grad_norm": 1.6796875, "learning_rate": 1.9819732042833762e-05, "loss": 1.0514, "step": 1153 }, { "epoch": 0.19786955869430095, "grad_norm": 1.6953125, "learning_rate": 1.981939052584901e-05, "loss": 1.0599, "step": 1154 }, { "epoch": 0.19804102278329083, "grad_norm": 1.703125, "learning_rate": 1.981904868861696e-05, "loss": 1.1223, "step": 1155 }, { "epoch": 0.19821248687228069, "grad_norm": 1.7734375, "learning_rate": 1.9818706531148754e-05, "loss": 1.1717, "step": 1156 }, { "epoch": 0.19838395096127054, "grad_norm": 1.765625, "learning_rate": 1.9818364053455553e-05, "loss": 1.1644, "step": 1157 }, { "epoch": 0.19855541505026042, "grad_norm": 1.65625, "learning_rate": 1.9818021255548536e-05, "loss": 1.064, "step": 1158 }, { "epoch": 0.19872687913925027, "grad_norm": 1.8359375, "learning_rate": 1.9817678137438866e-05, "loss": 1.1106, "step": 1159 }, { "epoch": 0.19889834322824013, "grad_norm": 1.7265625, "learning_rate": 1.9817334699137746e-05, "loss": 1.0919, "step": 1160 }, { "epoch": 0.19906980731723, "grad_norm": 1.5546875, "learning_rate": 1.9816990940656373e-05, "loss": 1.0914, "step": 1161 }, { "epoch": 0.19924127140621986, "grad_norm": 1.734375, "learning_rate": 1.9816646862005954e-05, "loss": 1.1322, "step": 1162 }, { "epoch": 0.19941273549520971, "grad_norm": 1.6328125, "learning_rate": 1.9816302463197718e-05, "loss": 1.0757, "step": 1163 }, { "epoch": 0.1995841995841996, "grad_norm": 1.6328125, "learning_rate": 1.9815957744242894e-05, "loss": 1.0408, "step": 1164 }, { "epoch": 0.19975566367318945, "grad_norm": 1.703125, "learning_rate": 1.9815612705152726e-05, "loss": 1.1011, "step": 1165 }, { "epoch": 0.1999271277621793, "grad_norm": 1.65625, "learning_rate": 1.9815267345938464e-05, "loss": 1.052, "step": 1166 }, { "epoch": 0.20009859185116918, "grad_norm": 1.6953125, "learning_rate": 1.9814921666611372e-05, "loss": 0.9697, "step": 1167 }, { "epoch": 0.20027005594015904, "grad_norm": 1.5859375, "learning_rate": 1.9814575667182723e-05, "loss": 1.0396, "step": 1168 }, { "epoch": 0.2004415200291489, "grad_norm": 1.640625, "learning_rate": 1.9814229347663806e-05, "loss": 1.1019, "step": 1169 }, { "epoch": 0.20061298411813877, "grad_norm": 1.625, "learning_rate": 1.9813882708065914e-05, "loss": 1.03, "step": 1170 }, { "epoch": 0.20078444820712862, "grad_norm": 1.671875, "learning_rate": 1.981353574840035e-05, "loss": 1.0963, "step": 1171 }, { "epoch": 0.20095591229611848, "grad_norm": 1.6953125, "learning_rate": 1.981318846867843e-05, "loss": 1.0078, "step": 1172 }, { "epoch": 0.20112737638510833, "grad_norm": 1.6875, "learning_rate": 1.9812840868911484e-05, "loss": 1.1007, "step": 1173 }, { "epoch": 0.2012988404740982, "grad_norm": 1.7421875, "learning_rate": 1.981249294911084e-05, "loss": 1.1998, "step": 1174 }, { "epoch": 0.20147030456308807, "grad_norm": 1.671875, "learning_rate": 1.9812144709287856e-05, "loss": 1.0579, "step": 1175 }, { "epoch": 0.20164176865207792, "grad_norm": 1.7109375, "learning_rate": 1.9811796149453883e-05, "loss": 1.1642, "step": 1176 }, { "epoch": 0.2018132327410678, "grad_norm": 1.6484375, "learning_rate": 1.9811447269620284e-05, "loss": 1.1081, "step": 1177 }, { "epoch": 0.20198469683005765, "grad_norm": 1.640625, "learning_rate": 1.9811098069798448e-05, "loss": 1.024, "step": 1178 }, { "epoch": 0.2021561609190475, "grad_norm": 1.5625, "learning_rate": 1.981074854999976e-05, "loss": 1.077, "step": 1179 }, { "epoch": 0.2023276250080374, "grad_norm": 2.1875, "learning_rate": 1.9810398710235616e-05, "loss": 1.0823, "step": 1180 }, { "epoch": 0.20249908909702724, "grad_norm": 1.7578125, "learning_rate": 1.981004855051743e-05, "loss": 1.0865, "step": 1181 }, { "epoch": 0.2026705531860171, "grad_norm": 1.734375, "learning_rate": 1.9809698070856616e-05, "loss": 1.1196, "step": 1182 }, { "epoch": 0.20284201727500697, "grad_norm": 1.7734375, "learning_rate": 1.980934727126461e-05, "loss": 1.1146, "step": 1183 }, { "epoch": 0.20301348136399683, "grad_norm": 1.703125, "learning_rate": 1.980899615175285e-05, "loss": 1.082, "step": 1184 }, { "epoch": 0.20318494545298668, "grad_norm": 1.6875, "learning_rate": 1.9808644712332786e-05, "loss": 1.08, "step": 1185 }, { "epoch": 0.20335640954197656, "grad_norm": 1.6796875, "learning_rate": 1.9808292953015888e-05, "loss": 1.0736, "step": 1186 }, { "epoch": 0.20352787363096642, "grad_norm": 1.703125, "learning_rate": 1.980794087381362e-05, "loss": 1.0743, "step": 1187 }, { "epoch": 0.20369933771995627, "grad_norm": 1.6796875, "learning_rate": 1.980758847473746e-05, "loss": 1.1012, "step": 1188 }, { "epoch": 0.20387080180894615, "grad_norm": 1.609375, "learning_rate": 1.9807235755798918e-05, "loss": 0.9476, "step": 1189 }, { "epoch": 0.204042265897936, "grad_norm": 1.65625, "learning_rate": 1.980688271700948e-05, "loss": 1.003, "step": 1190 }, { "epoch": 0.20421372998692586, "grad_norm": 1.734375, "learning_rate": 1.980652935838067e-05, "loss": 1.0686, "step": 1191 }, { "epoch": 0.20438519407591574, "grad_norm": 39.5, "learning_rate": 1.9806175679924008e-05, "loss": 1.0877, "step": 1192 }, { "epoch": 0.2045566581649056, "grad_norm": 1.6015625, "learning_rate": 1.9805821681651035e-05, "loss": 1.0505, "step": 1193 }, { "epoch": 0.20472812225389544, "grad_norm": 1.859375, "learning_rate": 1.980546736357329e-05, "loss": 1.117, "step": 1194 }, { "epoch": 0.2048995863428853, "grad_norm": 1.703125, "learning_rate": 1.9805112725702326e-05, "loss": 1.07, "step": 1195 }, { "epoch": 0.20507105043187518, "grad_norm": 1.703125, "learning_rate": 1.980475776804972e-05, "loss": 1.0231, "step": 1196 }, { "epoch": 0.20524251452086503, "grad_norm": 1.6796875, "learning_rate": 1.9804402490627038e-05, "loss": 1.0791, "step": 1197 }, { "epoch": 0.20541397860985489, "grad_norm": 1.7421875, "learning_rate": 1.9804046893445868e-05, "loss": 1.1112, "step": 1198 }, { "epoch": 0.20558544269884477, "grad_norm": 1.6875, "learning_rate": 1.9803690976517814e-05, "loss": 1.0347, "step": 1199 }, { "epoch": 0.20575690678783462, "grad_norm": 1.6640625, "learning_rate": 1.9803334739854477e-05, "loss": 1.1235, "step": 1200 }, { "epoch": 0.20592837087682447, "grad_norm": 1.5546875, "learning_rate": 1.9802978183467482e-05, "loss": 1.0379, "step": 1201 }, { "epoch": 0.20609983496581435, "grad_norm": 1.6953125, "learning_rate": 1.9802621307368453e-05, "loss": 1.0426, "step": 1202 }, { "epoch": 0.2062712990548042, "grad_norm": 1.6640625, "learning_rate": 1.980226411156903e-05, "loss": 1.1002, "step": 1203 }, { "epoch": 0.20644276314379406, "grad_norm": 1.53125, "learning_rate": 1.980190659608086e-05, "loss": 1.0196, "step": 1204 }, { "epoch": 0.20661422723278394, "grad_norm": 1.7109375, "learning_rate": 1.9801548760915607e-05, "loss": 1.0715, "step": 1205 }, { "epoch": 0.2067856913217738, "grad_norm": 1.671875, "learning_rate": 1.980119060608494e-05, "loss": 0.9744, "step": 1206 }, { "epoch": 0.20695715541076365, "grad_norm": 1.6875, "learning_rate": 1.9800832131600537e-05, "loss": 1.1004, "step": 1207 }, { "epoch": 0.20712861949975353, "grad_norm": 1.6484375, "learning_rate": 1.98004733374741e-05, "loss": 1.0266, "step": 1208 }, { "epoch": 0.20730008358874338, "grad_norm": 1.671875, "learning_rate": 1.9800114223717314e-05, "loss": 0.9948, "step": 1209 }, { "epoch": 0.20747154767773324, "grad_norm": 1.625, "learning_rate": 1.97997547903419e-05, "loss": 1.0021, "step": 1210 }, { "epoch": 0.20764301176672312, "grad_norm": 1.640625, "learning_rate": 1.9799395037359583e-05, "loss": 1.0087, "step": 1211 }, { "epoch": 0.20781447585571297, "grad_norm": 1.6953125, "learning_rate": 1.9799034964782088e-05, "loss": 1.0903, "step": 1212 }, { "epoch": 0.20798593994470282, "grad_norm": 1.703125, "learning_rate": 1.9798674572621167e-05, "loss": 1.1095, "step": 1213 }, { "epoch": 0.2081574040336927, "grad_norm": 1.671875, "learning_rate": 1.979831386088857e-05, "loss": 1.077, "step": 1214 }, { "epoch": 0.20832886812268256, "grad_norm": 1.6640625, "learning_rate": 1.979795282959606e-05, "loss": 1.008, "step": 1215 }, { "epoch": 0.2085003322116724, "grad_norm": 1.6875, "learning_rate": 1.9797591478755413e-05, "loss": 1.0306, "step": 1216 }, { "epoch": 0.2086717963006623, "grad_norm": 1.578125, "learning_rate": 1.9797229808378412e-05, "loss": 1.0404, "step": 1217 }, { "epoch": 0.20884326038965215, "grad_norm": 1.5546875, "learning_rate": 1.9796867818476854e-05, "loss": 1.049, "step": 1218 }, { "epoch": 0.209014724478642, "grad_norm": 1.6484375, "learning_rate": 1.9796505509062545e-05, "loss": 0.9834, "step": 1219 }, { "epoch": 0.20918618856763185, "grad_norm": 1.6640625, "learning_rate": 1.97961428801473e-05, "loss": 1.0725, "step": 1220 }, { "epoch": 0.20935765265662173, "grad_norm": 1.75, "learning_rate": 1.979577993174295e-05, "loss": 1.0489, "step": 1221 }, { "epoch": 0.2095291167456116, "grad_norm": 1.6796875, "learning_rate": 1.9795416663861328e-05, "loss": 1.0873, "step": 1222 }, { "epoch": 0.20970058083460144, "grad_norm": 1.6328125, "learning_rate": 1.9795053076514283e-05, "loss": 1.008, "step": 1223 }, { "epoch": 0.20987204492359132, "grad_norm": 3.4375, "learning_rate": 1.979468916971367e-05, "loss": 1.0834, "step": 1224 }, { "epoch": 0.21004350901258118, "grad_norm": 1.7265625, "learning_rate": 1.9794324943471365e-05, "loss": 1.0316, "step": 1225 }, { "epoch": 0.21021497310157103, "grad_norm": 1.7578125, "learning_rate": 1.9793960397799233e-05, "loss": 1.0937, "step": 1226 }, { "epoch": 0.2103864371905609, "grad_norm": 1.65625, "learning_rate": 1.979359553270918e-05, "loss": 1.0939, "step": 1227 }, { "epoch": 0.21055790127955076, "grad_norm": 1.671875, "learning_rate": 1.9793230348213094e-05, "loss": 1.0984, "step": 1228 }, { "epoch": 0.21072936536854062, "grad_norm": 1.640625, "learning_rate": 1.979286484432289e-05, "loss": 1.0379, "step": 1229 }, { "epoch": 0.2109008294575305, "grad_norm": 1.6953125, "learning_rate": 1.9792499021050484e-05, "loss": 1.1062, "step": 1230 }, { "epoch": 0.21107229354652035, "grad_norm": 1.7109375, "learning_rate": 1.979213287840781e-05, "loss": 0.9739, "step": 1231 }, { "epoch": 0.2112437576355102, "grad_norm": 1.71875, "learning_rate": 1.979176641640681e-05, "loss": 1.1277, "step": 1232 }, { "epoch": 0.21141522172450009, "grad_norm": 1.7265625, "learning_rate": 1.9791399635059438e-05, "loss": 0.9814, "step": 1233 }, { "epoch": 0.21158668581348994, "grad_norm": 1.6953125, "learning_rate": 1.979103253437765e-05, "loss": 1.0308, "step": 1234 }, { "epoch": 0.2117581499024798, "grad_norm": 1.71875, "learning_rate": 1.979066511437342e-05, "loss": 1.0897, "step": 1235 }, { "epoch": 0.21192961399146967, "grad_norm": 1.7890625, "learning_rate": 1.9790297375058733e-05, "loss": 1.0596, "step": 1236 }, { "epoch": 0.21210107808045953, "grad_norm": 1.65625, "learning_rate": 1.9789929316445584e-05, "loss": 1.0471, "step": 1237 }, { "epoch": 0.21227254216944938, "grad_norm": 1.609375, "learning_rate": 1.9789560938545972e-05, "loss": 1.0457, "step": 1238 }, { "epoch": 0.21244400625843926, "grad_norm": 1.6953125, "learning_rate": 1.9789192241371916e-05, "loss": 1.0937, "step": 1239 }, { "epoch": 0.21261547034742911, "grad_norm": 1.703125, "learning_rate": 1.9788823224935436e-05, "loss": 1.082, "step": 1240 }, { "epoch": 0.21278693443641897, "grad_norm": 1.671875, "learning_rate": 1.978845388924857e-05, "loss": 1.0213, "step": 1241 }, { "epoch": 0.21295839852540885, "grad_norm": 1.6796875, "learning_rate": 1.9788084234323365e-05, "loss": 1.0233, "step": 1242 }, { "epoch": 0.2131298626143987, "grad_norm": 1.6484375, "learning_rate": 1.978771426017187e-05, "loss": 1.0576, "step": 1243 }, { "epoch": 0.21330132670338856, "grad_norm": 1.6875, "learning_rate": 1.978734396680616e-05, "loss": 1.0684, "step": 1244 }, { "epoch": 0.2134727907923784, "grad_norm": 1.7734375, "learning_rate": 1.9786973354238304e-05, "loss": 1.0791, "step": 1245 }, { "epoch": 0.2136442548813683, "grad_norm": 1.6171875, "learning_rate": 1.9786602422480397e-05, "loss": 1.1244, "step": 1246 }, { "epoch": 0.21381571897035814, "grad_norm": 1.671875, "learning_rate": 1.9786231171544533e-05, "loss": 1.0968, "step": 1247 }, { "epoch": 0.213987183059348, "grad_norm": 1.6484375, "learning_rate": 1.9785859601442813e-05, "loss": 1.12, "step": 1248 }, { "epoch": 0.21415864714833788, "grad_norm": 1.59375, "learning_rate": 1.9785487712187365e-05, "loss": 1.0423, "step": 1249 }, { "epoch": 0.21433011123732773, "grad_norm": 1.625, "learning_rate": 1.9785115503790314e-05, "loss": 1.0428, "step": 1250 }, { "epoch": 0.21450157532631758, "grad_norm": 1.671875, "learning_rate": 1.9784742976263797e-05, "loss": 1.134, "step": 1251 }, { "epoch": 0.21467303941530747, "grad_norm": 1.75, "learning_rate": 1.9784370129619966e-05, "loss": 1.1417, "step": 1252 }, { "epoch": 0.21484450350429732, "grad_norm": 1.7734375, "learning_rate": 1.9783996963870983e-05, "loss": 1.1021, "step": 1253 }, { "epoch": 0.21501596759328717, "grad_norm": 1.7890625, "learning_rate": 1.978362347902901e-05, "loss": 1.0492, "step": 1254 }, { "epoch": 0.21518743168227705, "grad_norm": 1.7265625, "learning_rate": 1.978324967510624e-05, "loss": 1.1161, "step": 1255 }, { "epoch": 0.2153588957712669, "grad_norm": 1.7109375, "learning_rate": 1.9782875552114857e-05, "loss": 1.159, "step": 1256 }, { "epoch": 0.21553035986025676, "grad_norm": 1.796875, "learning_rate": 1.978250111006706e-05, "loss": 1.1507, "step": 1257 }, { "epoch": 0.21570182394924664, "grad_norm": 1.90625, "learning_rate": 1.978212634897507e-05, "loss": 1.0697, "step": 1258 }, { "epoch": 0.2158732880382365, "grad_norm": 1.6875, "learning_rate": 1.97817512688511e-05, "loss": 1.0872, "step": 1259 }, { "epoch": 0.21604475212722635, "grad_norm": 1.5859375, "learning_rate": 1.9781375869707384e-05, "loss": 1.0489, "step": 1260 }, { "epoch": 0.21621621621621623, "grad_norm": 1.7265625, "learning_rate": 1.978100015155617e-05, "loss": 1.0415, "step": 1261 }, { "epoch": 0.21638768030520608, "grad_norm": 1.7890625, "learning_rate": 1.978062411440971e-05, "loss": 1.0646, "step": 1262 }, { "epoch": 0.21655914439419593, "grad_norm": 1.7109375, "learning_rate": 1.9780247758280265e-05, "loss": 1.0606, "step": 1263 }, { "epoch": 0.21673060848318582, "grad_norm": 1.6484375, "learning_rate": 1.9779871083180113e-05, "loss": 1.0649, "step": 1264 }, { "epoch": 0.21690207257217567, "grad_norm": 1.546875, "learning_rate": 1.9779494089121537e-05, "loss": 1.0249, "step": 1265 }, { "epoch": 0.21707353666116552, "grad_norm": 1.6484375, "learning_rate": 1.9779116776116837e-05, "loss": 1.0685, "step": 1266 }, { "epoch": 0.21724500075015538, "grad_norm": 1.671875, "learning_rate": 1.9778739144178307e-05, "loss": 1.011, "step": 1267 }, { "epoch": 0.21741646483914526, "grad_norm": 1.78125, "learning_rate": 1.9778361193318276e-05, "loss": 1.118, "step": 1268 }, { "epoch": 0.2175879289281351, "grad_norm": 1.7109375, "learning_rate": 1.9777982923549062e-05, "loss": 1.1274, "step": 1269 }, { "epoch": 0.21775939301712496, "grad_norm": 1.5703125, "learning_rate": 1.9777604334883003e-05, "loss": 0.9581, "step": 1270 }, { "epoch": 0.21793085710611484, "grad_norm": 1.640625, "learning_rate": 1.9777225427332448e-05, "loss": 1.049, "step": 1271 }, { "epoch": 0.2181023211951047, "grad_norm": 1.6328125, "learning_rate": 1.9776846200909757e-05, "loss": 1.038, "step": 1272 }, { "epoch": 0.21827378528409455, "grad_norm": 1.625, "learning_rate": 1.9776466655627292e-05, "loss": 1.004, "step": 1273 }, { "epoch": 0.21844524937308443, "grad_norm": 1.7109375, "learning_rate": 1.977608679149744e-05, "loss": 1.0698, "step": 1274 }, { "epoch": 0.21861671346207429, "grad_norm": 1.6015625, "learning_rate": 1.9775706608532577e-05, "loss": 1.025, "step": 1275 }, { "epoch": 0.21878817755106414, "grad_norm": 1.609375, "learning_rate": 1.9775326106745113e-05, "loss": 0.9535, "step": 1276 }, { "epoch": 0.21895964164005402, "grad_norm": 1.625, "learning_rate": 1.9774945286147454e-05, "loss": 1.0276, "step": 1277 }, { "epoch": 0.21913110572904387, "grad_norm": 1.71875, "learning_rate": 1.977456414675202e-05, "loss": 1.0779, "step": 1278 }, { "epoch": 0.21930256981803373, "grad_norm": 1.7265625, "learning_rate": 1.9774182688571245e-05, "loss": 1.1006, "step": 1279 }, { "epoch": 0.2194740339070236, "grad_norm": 1.625, "learning_rate": 1.9773800911617563e-05, "loss": 1.1209, "step": 1280 }, { "epoch": 0.21964549799601346, "grad_norm": 1.703125, "learning_rate": 1.9773418815903428e-05, "loss": 0.9508, "step": 1281 }, { "epoch": 0.21981696208500331, "grad_norm": 1.6875, "learning_rate": 1.97730364014413e-05, "loss": 1.0266, "step": 1282 }, { "epoch": 0.2199884261739932, "grad_norm": 1.6875, "learning_rate": 1.9772653668243658e-05, "loss": 1.0921, "step": 1283 }, { "epoch": 0.22015989026298305, "grad_norm": 1.6484375, "learning_rate": 1.9772270616322977e-05, "loss": 1.0855, "step": 1284 }, { "epoch": 0.2203313543519729, "grad_norm": 1.6484375, "learning_rate": 1.9771887245691753e-05, "loss": 1.025, "step": 1285 }, { "epoch": 0.22050281844096278, "grad_norm": 1.6796875, "learning_rate": 1.977150355636249e-05, "loss": 1.0946, "step": 1286 }, { "epoch": 0.22067428252995264, "grad_norm": 1.609375, "learning_rate": 1.9771119548347693e-05, "loss": 1.0725, "step": 1287 }, { "epoch": 0.2208457466189425, "grad_norm": 1.734375, "learning_rate": 1.97707352216599e-05, "loss": 1.0588, "step": 1288 }, { "epoch": 0.22101721070793237, "grad_norm": 1.6328125, "learning_rate": 1.977035057631164e-05, "loss": 0.9982, "step": 1289 }, { "epoch": 0.22118867479692222, "grad_norm": 1.7421875, "learning_rate": 1.976996561231545e-05, "loss": 1.0788, "step": 1290 }, { "epoch": 0.22136013888591208, "grad_norm": 1.7109375, "learning_rate": 1.976958032968389e-05, "loss": 1.0912, "step": 1291 }, { "epoch": 0.22153160297490193, "grad_norm": 1.734375, "learning_rate": 1.976919472842953e-05, "loss": 1.1632, "step": 1292 }, { "epoch": 0.2217030670638918, "grad_norm": 1.6953125, "learning_rate": 1.9768808808564944e-05, "loss": 1.0549, "step": 1293 }, { "epoch": 0.22187453115288167, "grad_norm": 1.7578125, "learning_rate": 1.9768422570102717e-05, "loss": 1.0129, "step": 1294 }, { "epoch": 0.22204599524187152, "grad_norm": 1.7890625, "learning_rate": 1.9768036013055444e-05, "loss": 1.0461, "step": 1295 }, { "epoch": 0.2222174593308614, "grad_norm": 1.578125, "learning_rate": 1.976764913743573e-05, "loss": 1.0885, "step": 1296 }, { "epoch": 0.22238892341985125, "grad_norm": 1.578125, "learning_rate": 1.97672619432562e-05, "loss": 1.068, "step": 1297 }, { "epoch": 0.2225603875088411, "grad_norm": 1.609375, "learning_rate": 1.976687443052948e-05, "loss": 1.1145, "step": 1298 }, { "epoch": 0.222731851597831, "grad_norm": 1.5390625, "learning_rate": 1.9766486599268203e-05, "loss": 1.0323, "step": 1299 }, { "epoch": 0.22290331568682084, "grad_norm": 1.640625, "learning_rate": 1.976609844948502e-05, "loss": 1.0156, "step": 1300 }, { "epoch": 0.2230747797758107, "grad_norm": 1.65625, "learning_rate": 1.9765709981192592e-05, "loss": 1.0087, "step": 1301 }, { "epoch": 0.22324624386480058, "grad_norm": 1.65625, "learning_rate": 1.9765321194403588e-05, "loss": 0.998, "step": 1302 }, { "epoch": 0.22341770795379043, "grad_norm": 1.6796875, "learning_rate": 1.9764932089130687e-05, "loss": 1.0416, "step": 1303 }, { "epoch": 0.22358917204278028, "grad_norm": 1.6796875, "learning_rate": 1.9764542665386578e-05, "loss": 1.0588, "step": 1304 }, { "epoch": 0.22376063613177016, "grad_norm": 1.75, "learning_rate": 1.9764152923183965e-05, "loss": 0.9994, "step": 1305 }, { "epoch": 0.22393210022076002, "grad_norm": 1.640625, "learning_rate": 1.9763762862535556e-05, "loss": 1.1422, "step": 1306 }, { "epoch": 0.22410356430974987, "grad_norm": 1.7890625, "learning_rate": 1.976337248345407e-05, "loss": 1.0702, "step": 1307 }, { "epoch": 0.22427502839873975, "grad_norm": 1.5703125, "learning_rate": 1.9762981785952248e-05, "loss": 1.0187, "step": 1308 }, { "epoch": 0.2244464924877296, "grad_norm": 1.6796875, "learning_rate": 1.976259077004282e-05, "loss": 1.0987, "step": 1309 }, { "epoch": 0.22461795657671946, "grad_norm": 1.65625, "learning_rate": 1.976219943573855e-05, "loss": 1.0949, "step": 1310 }, { "epoch": 0.22478942066570934, "grad_norm": 1.7265625, "learning_rate": 1.976180778305219e-05, "loss": 1.0813, "step": 1311 }, { "epoch": 0.2249608847546992, "grad_norm": 1.671875, "learning_rate": 1.976141581199652e-05, "loss": 1.1172, "step": 1312 }, { "epoch": 0.22513234884368905, "grad_norm": 1.65625, "learning_rate": 1.9761023522584325e-05, "loss": 1.125, "step": 1313 }, { "epoch": 0.2253038129326789, "grad_norm": 1.7421875, "learning_rate": 1.976063091482839e-05, "loss": 1.0168, "step": 1314 }, { "epoch": 0.22547527702166878, "grad_norm": 1.6875, "learning_rate": 1.9760237988741534e-05, "loss": 1.0785, "step": 1315 }, { "epoch": 0.22564674111065863, "grad_norm": 1.640625, "learning_rate": 1.975984474433656e-05, "loss": 1.0187, "step": 1316 }, { "epoch": 0.2258182051996485, "grad_norm": 1.6953125, "learning_rate": 1.9759451181626295e-05, "loss": 1.0427, "step": 1317 }, { "epoch": 0.22598966928863837, "grad_norm": 1.7890625, "learning_rate": 1.9759057300623578e-05, "loss": 1.1146, "step": 1318 }, { "epoch": 0.22616113337762822, "grad_norm": 1.6796875, "learning_rate": 1.975866310134125e-05, "loss": 1.0689, "step": 1319 }, { "epoch": 0.22633259746661807, "grad_norm": 1.65625, "learning_rate": 1.9758268583792176e-05, "loss": 1.0353, "step": 1320 }, { "epoch": 0.22650406155560796, "grad_norm": 1.6171875, "learning_rate": 1.9757873747989214e-05, "loss": 0.9958, "step": 1321 }, { "epoch": 0.2266755256445978, "grad_norm": 1.6015625, "learning_rate": 1.9757478593945244e-05, "loss": 1.0269, "step": 1322 }, { "epoch": 0.22684698973358766, "grad_norm": 1.7734375, "learning_rate": 1.9757083121673157e-05, "loss": 1.1019, "step": 1323 }, { "epoch": 0.22701845382257754, "grad_norm": 1.6953125, "learning_rate": 1.9756687331185845e-05, "loss": 1.0571, "step": 1324 }, { "epoch": 0.2271899179115674, "grad_norm": 1.84375, "learning_rate": 1.9756291222496217e-05, "loss": 1.163, "step": 1325 }, { "epoch": 0.22736138200055725, "grad_norm": 1.59375, "learning_rate": 1.9755894795617196e-05, "loss": 1.0597, "step": 1326 }, { "epoch": 0.22753284608954713, "grad_norm": 1.609375, "learning_rate": 1.975549805056171e-05, "loss": 0.9859, "step": 1327 }, { "epoch": 0.22770431017853698, "grad_norm": 1.6015625, "learning_rate": 1.9755100987342695e-05, "loss": 0.979, "step": 1328 }, { "epoch": 0.22787577426752684, "grad_norm": 1.6875, "learning_rate": 1.9754703605973104e-05, "loss": 1.0554, "step": 1329 }, { "epoch": 0.22804723835651672, "grad_norm": 1.6640625, "learning_rate": 1.975430590646589e-05, "loss": 1.0303, "step": 1330 }, { "epoch": 0.22821870244550657, "grad_norm": 1.65625, "learning_rate": 1.9753907888834037e-05, "loss": 1.1333, "step": 1331 }, { "epoch": 0.22839016653449642, "grad_norm": 1.6484375, "learning_rate": 1.9753509553090513e-05, "loss": 1.0818, "step": 1332 }, { "epoch": 0.2285616306234863, "grad_norm": 1.6796875, "learning_rate": 1.9753110899248313e-05, "loss": 1.1285, "step": 1333 }, { "epoch": 0.22873309471247616, "grad_norm": 1.65625, "learning_rate": 1.9752711927320444e-05, "loss": 1.1243, "step": 1334 }, { "epoch": 0.228904558801466, "grad_norm": 1.625, "learning_rate": 1.975231263731991e-05, "loss": 1.0094, "step": 1335 }, { "epoch": 0.2290760228904559, "grad_norm": 1.6796875, "learning_rate": 1.975191302925974e-05, "loss": 1.1451, "step": 1336 }, { "epoch": 0.22924748697944575, "grad_norm": 1.5703125, "learning_rate": 1.975151310315296e-05, "loss": 1.0273, "step": 1337 }, { "epoch": 0.2294189510684356, "grad_norm": 1.5703125, "learning_rate": 1.975111285901262e-05, "loss": 1.1098, "step": 1338 }, { "epoch": 0.22959041515742545, "grad_norm": 1.65625, "learning_rate": 1.975071229685177e-05, "loss": 1.0838, "step": 1339 }, { "epoch": 0.22976187924641533, "grad_norm": 1.640625, "learning_rate": 1.9750311416683475e-05, "loss": 1.0626, "step": 1340 }, { "epoch": 0.2299333433354052, "grad_norm": 1.6328125, "learning_rate": 1.974991021852081e-05, "loss": 0.9985, "step": 1341 }, { "epoch": 0.23010480742439504, "grad_norm": 1.640625, "learning_rate": 1.9749508702376853e-05, "loss": 1.0117, "step": 1342 }, { "epoch": 0.23027627151338492, "grad_norm": 1.734375, "learning_rate": 1.9749106868264706e-05, "loss": 1.1324, "step": 1343 }, { "epoch": 0.23044773560237478, "grad_norm": 1.6484375, "learning_rate": 1.9748704716197474e-05, "loss": 1.0518, "step": 1344 }, { "epoch": 0.23061919969136463, "grad_norm": 1.609375, "learning_rate": 1.9748302246188267e-05, "loss": 1.0752, "step": 1345 }, { "epoch": 0.2307906637803545, "grad_norm": 1.671875, "learning_rate": 1.974789945825022e-05, "loss": 1.0666, "step": 1346 }, { "epoch": 0.23096212786934436, "grad_norm": 1.703125, "learning_rate": 1.974749635239646e-05, "loss": 1.1034, "step": 1347 }, { "epoch": 0.23113359195833422, "grad_norm": 1.6875, "learning_rate": 1.974709292864014e-05, "loss": 1.0834, "step": 1348 }, { "epoch": 0.2313050560473241, "grad_norm": 1.7109375, "learning_rate": 1.9746689186994417e-05, "loss": 1.1175, "step": 1349 }, { "epoch": 0.23147652013631395, "grad_norm": 1.703125, "learning_rate": 1.9746285127472458e-05, "loss": 1.0594, "step": 1350 }, { "epoch": 0.2316479842253038, "grad_norm": 1.671875, "learning_rate": 1.9745880750087437e-05, "loss": 1.083, "step": 1351 }, { "epoch": 0.23181944831429369, "grad_norm": 1.625, "learning_rate": 1.9745476054852543e-05, "loss": 1.0639, "step": 1352 }, { "epoch": 0.23199091240328354, "grad_norm": 1.65625, "learning_rate": 1.9745071041780983e-05, "loss": 1.0399, "step": 1353 }, { "epoch": 0.2321623764922734, "grad_norm": 1.625, "learning_rate": 1.9744665710885955e-05, "loss": 1.1361, "step": 1354 }, { "epoch": 0.23233384058126327, "grad_norm": 1.625, "learning_rate": 1.9744260062180686e-05, "loss": 0.987, "step": 1355 }, { "epoch": 0.23250530467025313, "grad_norm": 1.640625, "learning_rate": 1.9743854095678402e-05, "loss": 1.2077, "step": 1356 }, { "epoch": 0.23267676875924298, "grad_norm": 1.671875, "learning_rate": 1.9743447811392342e-05, "loss": 1.1149, "step": 1357 }, { "epoch": 0.23284823284823286, "grad_norm": 1.703125, "learning_rate": 1.974304120933576e-05, "loss": 1.0483, "step": 1358 }, { "epoch": 0.23301969693722271, "grad_norm": 1.625, "learning_rate": 1.9742634289521915e-05, "loss": 1.032, "step": 1359 }, { "epoch": 0.23319116102621257, "grad_norm": 1.671875, "learning_rate": 1.9742227051964078e-05, "loss": 1.0213, "step": 1360 }, { "epoch": 0.23336262511520242, "grad_norm": 1.8125, "learning_rate": 1.9741819496675533e-05, "loss": 1.0281, "step": 1361 }, { "epoch": 0.2335340892041923, "grad_norm": 1.6015625, "learning_rate": 1.974141162366957e-05, "loss": 0.9483, "step": 1362 }, { "epoch": 0.23370555329318216, "grad_norm": 1.765625, "learning_rate": 1.9741003432959486e-05, "loss": 1.1563, "step": 1363 }, { "epoch": 0.233877017382172, "grad_norm": 1.6484375, "learning_rate": 1.9740594924558606e-05, "loss": 1.1034, "step": 1364 }, { "epoch": 0.2340484814711619, "grad_norm": 1.53125, "learning_rate": 1.974018609848024e-05, "loss": 0.9837, "step": 1365 }, { "epoch": 0.23421994556015174, "grad_norm": 1.59375, "learning_rate": 1.973977695473773e-05, "loss": 1.0679, "step": 1366 }, { "epoch": 0.2343914096491416, "grad_norm": 1.8125, "learning_rate": 1.9739367493344415e-05, "loss": 1.1194, "step": 1367 }, { "epoch": 0.23456287373813148, "grad_norm": 1.640625, "learning_rate": 1.9738957714313653e-05, "loss": 1.0915, "step": 1368 }, { "epoch": 0.23473433782712133, "grad_norm": 1.703125, "learning_rate": 1.9738547617658806e-05, "loss": 1.076, "step": 1369 }, { "epoch": 0.23490580191611118, "grad_norm": 1.6640625, "learning_rate": 1.973813720339325e-05, "loss": 0.9973, "step": 1370 }, { "epoch": 0.23507726600510107, "grad_norm": 1.671875, "learning_rate": 1.973772647153037e-05, "loss": 1.224, "step": 1371 }, { "epoch": 0.23524873009409092, "grad_norm": 1.5859375, "learning_rate": 1.9737315422083557e-05, "loss": 1.1084, "step": 1372 }, { "epoch": 0.23542019418308077, "grad_norm": 1.703125, "learning_rate": 1.9736904055066227e-05, "loss": 1.1346, "step": 1373 }, { "epoch": 0.23559165827207065, "grad_norm": 1.7109375, "learning_rate": 1.9736492370491786e-05, "loss": 1.0141, "step": 1374 }, { "epoch": 0.2357631223610605, "grad_norm": 1.6171875, "learning_rate": 1.973608036837366e-05, "loss": 1.0714, "step": 1375 }, { "epoch": 0.23593458645005036, "grad_norm": 1.671875, "learning_rate": 1.9735668048725296e-05, "loss": 1.131, "step": 1376 }, { "epoch": 0.23610605053904024, "grad_norm": 1.5546875, "learning_rate": 1.9735255411560137e-05, "loss": 1.0559, "step": 1377 }, { "epoch": 0.2362775146280301, "grad_norm": 1.6484375, "learning_rate": 1.973484245689164e-05, "loss": 1.0569, "step": 1378 }, { "epoch": 0.23644897871701995, "grad_norm": 1.671875, "learning_rate": 1.973442918473327e-05, "loss": 1.0454, "step": 1379 }, { "epoch": 0.23662044280600983, "grad_norm": 1.71875, "learning_rate": 1.9734015595098507e-05, "loss": 0.9595, "step": 1380 }, { "epoch": 0.23679190689499968, "grad_norm": 1.6328125, "learning_rate": 1.9733601688000837e-05, "loss": 1.1188, "step": 1381 }, { "epoch": 0.23696337098398954, "grad_norm": 1.6015625, "learning_rate": 1.973318746345377e-05, "loss": 0.972, "step": 1382 }, { "epoch": 0.23713483507297942, "grad_norm": 1.6484375, "learning_rate": 1.9732772921470804e-05, "loss": 0.9974, "step": 1383 }, { "epoch": 0.23730629916196927, "grad_norm": 1.7265625, "learning_rate": 1.9732358062065465e-05, "loss": 1.0464, "step": 1384 }, { "epoch": 0.23747776325095912, "grad_norm": 1.640625, "learning_rate": 1.973194288525128e-05, "loss": 1.1304, "step": 1385 }, { "epoch": 0.23764922733994898, "grad_norm": 1.6328125, "learning_rate": 1.9731527391041786e-05, "loss": 1.1416, "step": 1386 }, { "epoch": 0.23782069142893886, "grad_norm": 1.734375, "learning_rate": 1.9731111579450545e-05, "loss": 1.1684, "step": 1387 }, { "epoch": 0.2379921555179287, "grad_norm": 1.640625, "learning_rate": 1.9730695450491106e-05, "loss": 1.0167, "step": 1388 }, { "epoch": 0.23816361960691856, "grad_norm": 1.546875, "learning_rate": 1.973027900417705e-05, "loss": 0.9737, "step": 1389 }, { "epoch": 0.23833508369590845, "grad_norm": 1.6953125, "learning_rate": 1.9729862240521953e-05, "loss": 1.0621, "step": 1390 }, { "epoch": 0.2385065477848983, "grad_norm": 1.78125, "learning_rate": 1.9729445159539412e-05, "loss": 1.0712, "step": 1391 }, { "epoch": 0.23867801187388815, "grad_norm": 1.703125, "learning_rate": 1.9729027761243022e-05, "loss": 1.0961, "step": 1392 }, { "epoch": 0.23884947596287803, "grad_norm": 1.546875, "learning_rate": 1.9728610045646402e-05, "loss": 1.0431, "step": 1393 }, { "epoch": 0.2390209400518679, "grad_norm": 1.5625, "learning_rate": 1.9728192012763174e-05, "loss": 1.0822, "step": 1394 }, { "epoch": 0.23919240414085774, "grad_norm": 1.6875, "learning_rate": 1.9727773662606973e-05, "loss": 1.1067, "step": 1395 }, { "epoch": 0.23936386822984762, "grad_norm": 1.7109375, "learning_rate": 1.9727354995191442e-05, "loss": 1.099, "step": 1396 }, { "epoch": 0.23953533231883747, "grad_norm": 1.640625, "learning_rate": 1.972693601053023e-05, "loss": 1.1294, "step": 1397 }, { "epoch": 0.23970679640782733, "grad_norm": 1.7578125, "learning_rate": 1.9726516708637012e-05, "loss": 1.1383, "step": 1398 }, { "epoch": 0.2398782604968172, "grad_norm": 1.578125, "learning_rate": 1.9726097089525456e-05, "loss": 1.0686, "step": 1399 }, { "epoch": 0.24004972458580706, "grad_norm": 1.671875, "learning_rate": 1.9725677153209246e-05, "loss": 1.1246, "step": 1400 }, { "epoch": 0.24004972458580706, "eval_loss": 0.9177221655845642, "eval_runtime": 837.168, "eval_samples_per_second": 2.985, "eval_steps_per_second": 2.985, "step": 1400 }, { "epoch": 0.24022118867479691, "grad_norm": 1.7578125, "learning_rate": 1.9725256899702085e-05, "loss": 1.0207, "step": 1401 }, { "epoch": 0.2403926527637868, "grad_norm": 1.6015625, "learning_rate": 1.9724836329017673e-05, "loss": 1.0681, "step": 1402 }, { "epoch": 0.24056411685277665, "grad_norm": 1.6875, "learning_rate": 1.9724415441169723e-05, "loss": 1.0937, "step": 1403 }, { "epoch": 0.2407355809417665, "grad_norm": 1.5625, "learning_rate": 1.9723994236171972e-05, "loss": 1.1032, "step": 1404 }, { "epoch": 0.24090704503075638, "grad_norm": 1.6484375, "learning_rate": 1.972357271403815e-05, "loss": 1.0918, "step": 1405 }, { "epoch": 0.24107850911974624, "grad_norm": 1.8828125, "learning_rate": 1.972315087478201e-05, "loss": 1.1631, "step": 1406 }, { "epoch": 0.2412499732087361, "grad_norm": 1.6484375, "learning_rate": 1.97227287184173e-05, "loss": 1.0473, "step": 1407 }, { "epoch": 0.24142143729772594, "grad_norm": 1.625, "learning_rate": 1.9722306244957797e-05, "loss": 1.0959, "step": 1408 }, { "epoch": 0.24159290138671582, "grad_norm": 1.578125, "learning_rate": 1.9721883454417278e-05, "loss": 0.9669, "step": 1409 }, { "epoch": 0.24176436547570568, "grad_norm": 1.5546875, "learning_rate": 1.972146034680953e-05, "loss": 1.083, "step": 1410 }, { "epoch": 0.24193582956469553, "grad_norm": 1.609375, "learning_rate": 1.9721036922148352e-05, "loss": 1.0486, "step": 1411 }, { "epoch": 0.2421072936536854, "grad_norm": 1.625, "learning_rate": 1.9720613180447553e-05, "loss": 1.0244, "step": 1412 }, { "epoch": 0.24227875774267527, "grad_norm": 1.8203125, "learning_rate": 1.9720189121720953e-05, "loss": 1.1736, "step": 1413 }, { "epoch": 0.24245022183166512, "grad_norm": 1.765625, "learning_rate": 1.9719764745982383e-05, "loss": 1.0677, "step": 1414 }, { "epoch": 0.242621685920655, "grad_norm": 1.65625, "learning_rate": 1.9719340053245682e-05, "loss": 1.0646, "step": 1415 }, { "epoch": 0.24279315000964485, "grad_norm": 1.6796875, "learning_rate": 1.9718915043524703e-05, "loss": 1.0764, "step": 1416 }, { "epoch": 0.2429646140986347, "grad_norm": 1.6328125, "learning_rate": 1.9718489716833308e-05, "loss": 1.0921, "step": 1417 }, { "epoch": 0.2431360781876246, "grad_norm": 1.59375, "learning_rate": 1.971806407318537e-05, "loss": 1.0519, "step": 1418 }, { "epoch": 0.24330754227661444, "grad_norm": 1.703125, "learning_rate": 1.9717638112594765e-05, "loss": 1.0683, "step": 1419 }, { "epoch": 0.2434790063656043, "grad_norm": 1.640625, "learning_rate": 1.9717211835075388e-05, "loss": 1.0414, "step": 1420 }, { "epoch": 0.24365047045459418, "grad_norm": 1.734375, "learning_rate": 1.971678524064114e-05, "loss": 1.0836, "step": 1421 }, { "epoch": 0.24382193454358403, "grad_norm": 1.71875, "learning_rate": 1.9716358329305937e-05, "loss": 1.1452, "step": 1422 }, { "epoch": 0.24399339863257388, "grad_norm": 1.6015625, "learning_rate": 1.9715931101083696e-05, "loss": 1.0282, "step": 1423 }, { "epoch": 0.24416486272156376, "grad_norm": 1.7421875, "learning_rate": 1.971550355598836e-05, "loss": 1.0765, "step": 1424 }, { "epoch": 0.24433632681055362, "grad_norm": 1.7109375, "learning_rate": 1.9715075694033866e-05, "loss": 1.0627, "step": 1425 }, { "epoch": 0.24450779089954347, "grad_norm": 1.671875, "learning_rate": 1.971464751523417e-05, "loss": 1.0239, "step": 1426 }, { "epoch": 0.24467925498853335, "grad_norm": 1.6953125, "learning_rate": 1.9714219019603236e-05, "loss": 1.1714, "step": 1427 }, { "epoch": 0.2448507190775232, "grad_norm": 1.5703125, "learning_rate": 1.971379020715504e-05, "loss": 1.015, "step": 1428 }, { "epoch": 0.24502218316651306, "grad_norm": 1.75, "learning_rate": 1.971336107790357e-05, "loss": 1.0271, "step": 1429 }, { "epoch": 0.24519364725550294, "grad_norm": 1.59375, "learning_rate": 1.971293163186281e-05, "loss": 1.103, "step": 1430 }, { "epoch": 0.2453651113444928, "grad_norm": 1.71875, "learning_rate": 1.9712501869046782e-05, "loss": 1.0114, "step": 1431 }, { "epoch": 0.24553657543348265, "grad_norm": 1.8125, "learning_rate": 1.971207178946949e-05, "loss": 1.1274, "step": 1432 }, { "epoch": 0.2457080395224725, "grad_norm": 1.671875, "learning_rate": 1.9711641393144967e-05, "loss": 1.0402, "step": 1433 }, { "epoch": 0.24587950361146238, "grad_norm": 1.59375, "learning_rate": 1.971121068008725e-05, "loss": 1.1112, "step": 1434 }, { "epoch": 0.24605096770045223, "grad_norm": 1.7265625, "learning_rate": 1.9710779650310376e-05, "loss": 1.1078, "step": 1435 }, { "epoch": 0.2462224317894421, "grad_norm": 1.6171875, "learning_rate": 1.9710348303828412e-05, "loss": 1.0754, "step": 1436 }, { "epoch": 0.24639389587843197, "grad_norm": 1.671875, "learning_rate": 1.970991664065543e-05, "loss": 0.9915, "step": 1437 }, { "epoch": 0.24656535996742182, "grad_norm": 1.703125, "learning_rate": 1.9709484660805498e-05, "loss": 1.0486, "step": 1438 }, { "epoch": 0.24673682405641167, "grad_norm": 1.6875, "learning_rate": 1.9709052364292705e-05, "loss": 1.1207, "step": 1439 }, { "epoch": 0.24690828814540156, "grad_norm": 1.5390625, "learning_rate": 1.970861975113116e-05, "loss": 0.9616, "step": 1440 }, { "epoch": 0.2470797522343914, "grad_norm": 1.5234375, "learning_rate": 1.9708186821334964e-05, "loss": 1.0366, "step": 1441 }, { "epoch": 0.24725121632338126, "grad_norm": 1.65625, "learning_rate": 1.9707753574918235e-05, "loss": 1.0376, "step": 1442 }, { "epoch": 0.24742268041237114, "grad_norm": 1.640625, "learning_rate": 1.970732001189511e-05, "loss": 1.0982, "step": 1443 }, { "epoch": 0.247594144501361, "grad_norm": 1.734375, "learning_rate": 1.9706886132279724e-05, "loss": 1.1314, "step": 1444 }, { "epoch": 0.24776560859035085, "grad_norm": 2.546875, "learning_rate": 1.9706451936086228e-05, "loss": 0.9716, "step": 1445 }, { "epoch": 0.24793707267934073, "grad_norm": 1.578125, "learning_rate": 1.9706017423328782e-05, "loss": 0.9978, "step": 1446 }, { "epoch": 0.24810853676833058, "grad_norm": 1.703125, "learning_rate": 1.970558259402156e-05, "loss": 1.0639, "step": 1447 }, { "epoch": 0.24828000085732044, "grad_norm": 1.7890625, "learning_rate": 1.970514744817874e-05, "loss": 1.0706, "step": 1448 }, { "epoch": 0.24845146494631032, "grad_norm": 1.703125, "learning_rate": 1.9704711985814516e-05, "loss": 1.1416, "step": 1449 }, { "epoch": 0.24862292903530017, "grad_norm": 1.59375, "learning_rate": 1.970427620694309e-05, "loss": 1.155, "step": 1450 }, { "epoch": 0.24879439312429003, "grad_norm": 1.5625, "learning_rate": 1.9703840111578673e-05, "loss": 1.0571, "step": 1451 }, { "epoch": 0.2489658572132799, "grad_norm": 1.65625, "learning_rate": 1.970340369973549e-05, "loss": 1.0132, "step": 1452 }, { "epoch": 0.24913732130226976, "grad_norm": 1.6328125, "learning_rate": 1.9702966971427773e-05, "loss": 1.0421, "step": 1453 }, { "epoch": 0.2493087853912596, "grad_norm": 1.5625, "learning_rate": 1.9702529926669768e-05, "loss": 1.0326, "step": 1454 }, { "epoch": 0.2494802494802495, "grad_norm": 1.6796875, "learning_rate": 1.970209256547572e-05, "loss": 1.0629, "step": 1455 }, { "epoch": 0.24965171356923935, "grad_norm": 1.6875, "learning_rate": 1.9701654887859904e-05, "loss": 1.0527, "step": 1456 }, { "epoch": 0.2498231776582292, "grad_norm": 1.6875, "learning_rate": 1.9701216893836584e-05, "loss": 1.0612, "step": 1457 }, { "epoch": 0.24999464174721905, "grad_norm": 1.578125, "learning_rate": 1.970077858342005e-05, "loss": 0.9602, "step": 1458 }, { "epoch": 0.2501661058362089, "grad_norm": 1.59375, "learning_rate": 1.9700339956624603e-05, "loss": 0.9985, "step": 1459 }, { "epoch": 0.2503375699251988, "grad_norm": 1.5625, "learning_rate": 1.9699901013464534e-05, "loss": 0.9584, "step": 1460 }, { "epoch": 0.25050903401418867, "grad_norm": 1.625, "learning_rate": 1.969946175395417e-05, "loss": 0.9766, "step": 1461 }, { "epoch": 0.2506804981031785, "grad_norm": 1.6875, "learning_rate": 1.9699022178107833e-05, "loss": 1.0457, "step": 1462 }, { "epoch": 0.2508519621921684, "grad_norm": 1.625, "learning_rate": 1.969858228593986e-05, "loss": 1.0036, "step": 1463 }, { "epoch": 0.25102342628115826, "grad_norm": 1.6484375, "learning_rate": 1.9698142077464597e-05, "loss": 1.0776, "step": 1464 }, { "epoch": 0.2511948903701481, "grad_norm": 1.6484375, "learning_rate": 1.96977015526964e-05, "loss": 1.1579, "step": 1465 }, { "epoch": 0.25136635445913796, "grad_norm": 1.7109375, "learning_rate": 1.969726071164964e-05, "loss": 1.0759, "step": 1466 }, { "epoch": 0.25153781854812785, "grad_norm": 1.6015625, "learning_rate": 1.9696819554338693e-05, "loss": 1.0797, "step": 1467 }, { "epoch": 0.25170928263711767, "grad_norm": 1.5859375, "learning_rate": 1.9696378080777937e-05, "loss": 1.0332, "step": 1468 }, { "epoch": 0.25188074672610755, "grad_norm": 1.6328125, "learning_rate": 1.9695936290981788e-05, "loss": 1.0922, "step": 1469 }, { "epoch": 0.25205221081509743, "grad_norm": 1.703125, "learning_rate": 1.969549418496464e-05, "loss": 1.0826, "step": 1470 }, { "epoch": 0.25222367490408726, "grad_norm": 1.7421875, "learning_rate": 1.9695051762740917e-05, "loss": 1.1504, "step": 1471 }, { "epoch": 0.25239513899307714, "grad_norm": 1.6796875, "learning_rate": 1.969460902432505e-05, "loss": 1.048, "step": 1472 }, { "epoch": 0.252566603082067, "grad_norm": 1.625, "learning_rate": 1.9694165969731473e-05, "loss": 1.0838, "step": 1473 }, { "epoch": 0.25273806717105685, "grad_norm": 1.671875, "learning_rate": 1.969372259897464e-05, "loss": 1.0886, "step": 1474 }, { "epoch": 0.2529095312600467, "grad_norm": 1.6796875, "learning_rate": 1.969327891206901e-05, "loss": 1.0252, "step": 1475 }, { "epoch": 0.2530809953490366, "grad_norm": 1.6796875, "learning_rate": 1.9692834909029056e-05, "loss": 1.0893, "step": 1476 }, { "epoch": 0.25325245943802643, "grad_norm": 1.8671875, "learning_rate": 1.9692390589869256e-05, "loss": 1.0419, "step": 1477 }, { "epoch": 0.2534239235270163, "grad_norm": 1.6796875, "learning_rate": 1.9691945954604095e-05, "loss": 1.0386, "step": 1478 }, { "epoch": 0.2535953876160062, "grad_norm": 1.6171875, "learning_rate": 1.9691501003248086e-05, "loss": 0.976, "step": 1479 }, { "epoch": 0.253766851704996, "grad_norm": 1.7109375, "learning_rate": 1.969105573581573e-05, "loss": 1.0139, "step": 1480 }, { "epoch": 0.2539383157939859, "grad_norm": 1.6640625, "learning_rate": 1.9690610152321557e-05, "loss": 0.9874, "step": 1481 }, { "epoch": 0.2541097798829758, "grad_norm": 1.6640625, "learning_rate": 1.969016425278009e-05, "loss": 1.0118, "step": 1482 }, { "epoch": 0.2542812439719656, "grad_norm": 1.640625, "learning_rate": 1.9689718037205883e-05, "loss": 1.0416, "step": 1483 }, { "epoch": 0.2544527080609555, "grad_norm": 1.65625, "learning_rate": 1.9689271505613477e-05, "loss": 1.0712, "step": 1484 }, { "epoch": 0.25462417214994537, "grad_norm": 1.65625, "learning_rate": 1.9688824658017446e-05, "loss": 1.0663, "step": 1485 }, { "epoch": 0.2547956362389352, "grad_norm": 1.734375, "learning_rate": 1.968837749443236e-05, "loss": 1.1637, "step": 1486 }, { "epoch": 0.2549671003279251, "grad_norm": 1.6640625, "learning_rate": 1.9687930014872794e-05, "loss": 1.0739, "step": 1487 }, { "epoch": 0.25513856441691496, "grad_norm": 1.765625, "learning_rate": 1.9687482219353352e-05, "loss": 1.0067, "step": 1488 }, { "epoch": 0.2553100285059048, "grad_norm": 1.578125, "learning_rate": 1.9687034107888636e-05, "loss": 1.0853, "step": 1489 }, { "epoch": 0.25548149259489467, "grad_norm": 1.6484375, "learning_rate": 1.9686585680493257e-05, "loss": 1.1097, "step": 1490 }, { "epoch": 0.2556529566838845, "grad_norm": 1.609375, "learning_rate": 1.9686136937181847e-05, "loss": 1.0814, "step": 1491 }, { "epoch": 0.2558244207728744, "grad_norm": 1.6171875, "learning_rate": 1.9685687877969035e-05, "loss": 1.0519, "step": 1492 }, { "epoch": 0.25599588486186425, "grad_norm": 1.609375, "learning_rate": 1.9685238502869468e-05, "loss": 1.1121, "step": 1493 }, { "epoch": 0.2561673489508541, "grad_norm": 1.609375, "learning_rate": 1.96847888118978e-05, "loss": 1.0927, "step": 1494 }, { "epoch": 0.25633881303984396, "grad_norm": 1.6796875, "learning_rate": 1.96843388050687e-05, "loss": 1.0372, "step": 1495 }, { "epoch": 0.25651027712883384, "grad_norm": 1.6328125, "learning_rate": 1.9683888482396844e-05, "loss": 1.0362, "step": 1496 }, { "epoch": 0.25668174121782367, "grad_norm": 1.578125, "learning_rate": 1.968343784389692e-05, "loss": 1.0311, "step": 1497 }, { "epoch": 0.25685320530681355, "grad_norm": 1.5859375, "learning_rate": 1.9682986889583623e-05, "loss": 1.1184, "step": 1498 }, { "epoch": 0.25702466939580343, "grad_norm": 1.71875, "learning_rate": 1.968253561947166e-05, "loss": 1.1175, "step": 1499 }, { "epoch": 0.25719613348479325, "grad_norm": 1.6640625, "learning_rate": 1.968208403357575e-05, "loss": 1.0388, "step": 1500 }, { "epoch": 0.25736759757378314, "grad_norm": 1.5, "learning_rate": 1.968163213191062e-05, "loss": 0.9964, "step": 1501 }, { "epoch": 0.257539061662773, "grad_norm": 1.65625, "learning_rate": 1.968117991449101e-05, "loss": 1.1443, "step": 1502 }, { "epoch": 0.25771052575176284, "grad_norm": 1.671875, "learning_rate": 1.9680727381331665e-05, "loss": 1.04, "step": 1503 }, { "epoch": 0.2578819898407527, "grad_norm": 1.6328125, "learning_rate": 1.9680274532447344e-05, "loss": 1.0192, "step": 1504 }, { "epoch": 0.2580534539297426, "grad_norm": 1.609375, "learning_rate": 1.9679821367852824e-05, "loss": 1.1258, "step": 1505 }, { "epoch": 0.25822491801873243, "grad_norm": 1.5859375, "learning_rate": 1.9679367887562874e-05, "loss": 1.0358, "step": 1506 }, { "epoch": 0.2583963821077223, "grad_norm": 1.7109375, "learning_rate": 1.967891409159229e-05, "loss": 1.1577, "step": 1507 }, { "epoch": 0.2585678461967122, "grad_norm": 1.65625, "learning_rate": 1.967845997995587e-05, "loss": 0.9771, "step": 1508 }, { "epoch": 0.258739310285702, "grad_norm": 1.8984375, "learning_rate": 1.9678005552668423e-05, "loss": 1.1342, "step": 1509 }, { "epoch": 0.2589107743746919, "grad_norm": 1.671875, "learning_rate": 1.967755080974477e-05, "loss": 1.0732, "step": 1510 }, { "epoch": 0.2590822384636818, "grad_norm": 1.734375, "learning_rate": 1.9677095751199746e-05, "loss": 1.1263, "step": 1511 }, { "epoch": 0.2592537025526716, "grad_norm": 1.7109375, "learning_rate": 1.9676640377048185e-05, "loss": 1.0274, "step": 1512 }, { "epoch": 0.2594251666416615, "grad_norm": 1.6328125, "learning_rate": 1.9676184687304945e-05, "loss": 0.998, "step": 1513 }, { "epoch": 0.25959663073065137, "grad_norm": 1.578125, "learning_rate": 1.9675728681984886e-05, "loss": 1.0682, "step": 1514 }, { "epoch": 0.2597680948196412, "grad_norm": 1.703125, "learning_rate": 1.9675272361102876e-05, "loss": 1.1237, "step": 1515 }, { "epoch": 0.2599395589086311, "grad_norm": 1.7578125, "learning_rate": 1.9674815724673804e-05, "loss": 1.0046, "step": 1516 }, { "epoch": 0.26011102299762096, "grad_norm": 1.6875, "learning_rate": 1.967435877271256e-05, "loss": 1.142, "step": 1517 }, { "epoch": 0.2602824870866108, "grad_norm": 1.5859375, "learning_rate": 1.9673901505234042e-05, "loss": 0.9946, "step": 1518 }, { "epoch": 0.26045395117560066, "grad_norm": 1.671875, "learning_rate": 1.967344392225317e-05, "loss": 1.0523, "step": 1519 }, { "epoch": 0.26062541526459054, "grad_norm": 1.765625, "learning_rate": 1.9672986023784863e-05, "loss": 1.1062, "step": 1520 }, { "epoch": 0.26079687935358037, "grad_norm": 1.59375, "learning_rate": 1.967252780984406e-05, "loss": 1.051, "step": 1521 }, { "epoch": 0.26096834344257025, "grad_norm": 1.6953125, "learning_rate": 1.9672069280445696e-05, "loss": 1.0351, "step": 1522 }, { "epoch": 0.26113980753156013, "grad_norm": 1.7578125, "learning_rate": 1.967161043560474e-05, "loss": 1.0894, "step": 1523 }, { "epoch": 0.26131127162054996, "grad_norm": 1.6484375, "learning_rate": 1.967115127533614e-05, "loss": 1.0919, "step": 1524 }, { "epoch": 0.26148273570953984, "grad_norm": 1.7421875, "learning_rate": 1.967069179965488e-05, "loss": 1.0772, "step": 1525 }, { "epoch": 0.2616541997985297, "grad_norm": 1.609375, "learning_rate": 1.9670232008575945e-05, "loss": 0.9477, "step": 1526 }, { "epoch": 0.26182566388751954, "grad_norm": 1.6484375, "learning_rate": 1.966977190211433e-05, "loss": 1.0974, "step": 1527 }, { "epoch": 0.2619971279765094, "grad_norm": 1.625, "learning_rate": 1.966931148028504e-05, "loss": 1.0565, "step": 1528 }, { "epoch": 0.2621685920654993, "grad_norm": 1.671875, "learning_rate": 1.9668850743103093e-05, "loss": 1.0793, "step": 1529 }, { "epoch": 0.26234005615448913, "grad_norm": 1.8359375, "learning_rate": 1.9668389690583512e-05, "loss": 1.0805, "step": 1530 }, { "epoch": 0.262511520243479, "grad_norm": 1.546875, "learning_rate": 1.9667928322741337e-05, "loss": 1.0825, "step": 1531 }, { "epoch": 0.2626829843324689, "grad_norm": 1.59375, "learning_rate": 1.9667466639591612e-05, "loss": 1.052, "step": 1532 }, { "epoch": 0.2628544484214587, "grad_norm": 1.6328125, "learning_rate": 1.9667004641149393e-05, "loss": 1.095, "step": 1533 }, { "epoch": 0.2630259125104486, "grad_norm": 1.671875, "learning_rate": 1.9666542327429754e-05, "loss": 0.9686, "step": 1534 }, { "epoch": 0.2631973765994385, "grad_norm": 1.6171875, "learning_rate": 1.966607969844777e-05, "loss": 1.0208, "step": 1535 }, { "epoch": 0.2633688406884283, "grad_norm": 1.671875, "learning_rate": 1.9665616754218523e-05, "loss": 1.0695, "step": 1536 }, { "epoch": 0.2635403047774182, "grad_norm": 1.5625, "learning_rate": 1.966515349475712e-05, "loss": 1.0443, "step": 1537 }, { "epoch": 0.263711768866408, "grad_norm": 1.59375, "learning_rate": 1.9664689920078665e-05, "loss": 1.0577, "step": 1538 }, { "epoch": 0.2638832329553979, "grad_norm": 1.640625, "learning_rate": 1.9664226030198278e-05, "loss": 1.0162, "step": 1539 }, { "epoch": 0.2640546970443878, "grad_norm": 1.6875, "learning_rate": 1.9663761825131087e-05, "loss": 1.0553, "step": 1540 }, { "epoch": 0.2642261611333776, "grad_norm": 1.6171875, "learning_rate": 1.966329730489223e-05, "loss": 1.0822, "step": 1541 }, { "epoch": 0.2643976252223675, "grad_norm": 1.6328125, "learning_rate": 1.9662832469496863e-05, "loss": 0.9951, "step": 1542 }, { "epoch": 0.26456908931135736, "grad_norm": 1.609375, "learning_rate": 1.9662367318960143e-05, "loss": 1.0579, "step": 1543 }, { "epoch": 0.2647405534003472, "grad_norm": 1.6015625, "learning_rate": 1.966190185329724e-05, "loss": 1.0021, "step": 1544 }, { "epoch": 0.26491201748933707, "grad_norm": 1.75, "learning_rate": 1.9661436072523333e-05, "loss": 1.0497, "step": 1545 }, { "epoch": 0.26508348157832695, "grad_norm": 1.640625, "learning_rate": 1.966096997665361e-05, "loss": 1.0591, "step": 1546 }, { "epoch": 0.2652549456673168, "grad_norm": 1.796875, "learning_rate": 1.966050356570328e-05, "loss": 1.128, "step": 1547 }, { "epoch": 0.26542640975630666, "grad_norm": 1.96875, "learning_rate": 1.9660036839687552e-05, "loss": 0.9565, "step": 1548 }, { "epoch": 0.26559787384529654, "grad_norm": 1.65625, "learning_rate": 1.9659569798621642e-05, "loss": 0.9964, "step": 1549 }, { "epoch": 0.26576933793428636, "grad_norm": 14.0625, "learning_rate": 1.965910244252079e-05, "loss": 1.0035, "step": 1550 }, { "epoch": 0.26594080202327625, "grad_norm": 1.921875, "learning_rate": 1.9658634771400235e-05, "loss": 1.0745, "step": 1551 }, { "epoch": 0.2661122661122661, "grad_norm": 1.8984375, "learning_rate": 1.9658166785275227e-05, "loss": 1.1087, "step": 1552 }, { "epoch": 0.26628373020125595, "grad_norm": 1.5859375, "learning_rate": 1.9657698484161032e-05, "loss": 0.9657, "step": 1553 }, { "epoch": 0.26645519429024583, "grad_norm": 1.6484375, "learning_rate": 1.965722986807292e-05, "loss": 1.0533, "step": 1554 }, { "epoch": 0.2666266583792357, "grad_norm": 1.75, "learning_rate": 1.965676093702618e-05, "loss": 1.0809, "step": 1555 }, { "epoch": 0.26679812246822554, "grad_norm": 1.734375, "learning_rate": 1.9656291691036098e-05, "loss": 1.0149, "step": 1556 }, { "epoch": 0.2669695865572154, "grad_norm": 1.640625, "learning_rate": 1.9655822130117985e-05, "loss": 1.1726, "step": 1557 }, { "epoch": 0.2671410506462053, "grad_norm": 1.703125, "learning_rate": 1.965535225428715e-05, "loss": 1.0171, "step": 1558 }, { "epoch": 0.26731251473519513, "grad_norm": 1.6015625, "learning_rate": 1.9654882063558918e-05, "loss": 1.0369, "step": 1559 }, { "epoch": 0.267483978824185, "grad_norm": 1.625, "learning_rate": 1.965441155794863e-05, "loss": 1.0901, "step": 1560 }, { "epoch": 0.2676554429131749, "grad_norm": 1.578125, "learning_rate": 1.965394073747162e-05, "loss": 1.0468, "step": 1561 }, { "epoch": 0.2678269070021647, "grad_norm": 1.625, "learning_rate": 1.965346960214325e-05, "loss": 1.0866, "step": 1562 }, { "epoch": 0.2679983710911546, "grad_norm": 1.6640625, "learning_rate": 1.9652998151978887e-05, "loss": 1.0789, "step": 1563 }, { "epoch": 0.2681698351801445, "grad_norm": 1.703125, "learning_rate": 1.9652526386993903e-05, "loss": 1.0895, "step": 1564 }, { "epoch": 0.2683412992691343, "grad_norm": 1.6640625, "learning_rate": 1.9652054307203687e-05, "loss": 1.0682, "step": 1565 }, { "epoch": 0.2685127633581242, "grad_norm": 1.6796875, "learning_rate": 1.9651581912623633e-05, "loss": 1.0731, "step": 1566 }, { "epoch": 0.26868422744711407, "grad_norm": 1.796875, "learning_rate": 1.9651109203269147e-05, "loss": 1.0308, "step": 1567 }, { "epoch": 0.2688556915361039, "grad_norm": 1.6171875, "learning_rate": 1.965063617915565e-05, "loss": 0.9955, "step": 1568 }, { "epoch": 0.2690271556250938, "grad_norm": 1.625, "learning_rate": 1.9650162840298564e-05, "loss": 1.0101, "step": 1569 }, { "epoch": 0.26919861971408365, "grad_norm": 1.7890625, "learning_rate": 1.964968918671333e-05, "loss": 0.9961, "step": 1570 }, { "epoch": 0.2693700838030735, "grad_norm": 1.6875, "learning_rate": 1.9649215218415393e-05, "loss": 1.0691, "step": 1571 }, { "epoch": 0.26954154789206336, "grad_norm": 1.6171875, "learning_rate": 1.9648740935420212e-05, "loss": 1.0502, "step": 1572 }, { "epoch": 0.26971301198105324, "grad_norm": 1.6015625, "learning_rate": 1.9648266337743254e-05, "loss": 1.0375, "step": 1573 }, { "epoch": 0.26988447607004307, "grad_norm": 1.5625, "learning_rate": 1.96477914254e-05, "loss": 1.0687, "step": 1574 }, { "epoch": 0.27005594015903295, "grad_norm": 1.7421875, "learning_rate": 1.9647316198405943e-05, "loss": 1.1145, "step": 1575 }, { "epoch": 0.27022740424802283, "grad_norm": 1.59375, "learning_rate": 1.964684065677657e-05, "loss": 0.9817, "step": 1576 }, { "epoch": 0.27039886833701265, "grad_norm": 1.7890625, "learning_rate": 1.9646364800527396e-05, "loss": 1.0822, "step": 1577 }, { "epoch": 0.27057033242600254, "grad_norm": 1.6328125, "learning_rate": 1.9645888629673944e-05, "loss": 1.0437, "step": 1578 }, { "epoch": 0.2707417965149924, "grad_norm": 1.6171875, "learning_rate": 1.964541214423174e-05, "loss": 1.09, "step": 1579 }, { "epoch": 0.27091326060398224, "grad_norm": 1.75, "learning_rate": 1.9644935344216325e-05, "loss": 1.0711, "step": 1580 }, { "epoch": 0.2710847246929721, "grad_norm": 1.7265625, "learning_rate": 1.964445822964325e-05, "loss": 1.0824, "step": 1581 }, { "epoch": 0.271256188781962, "grad_norm": 1.75, "learning_rate": 1.964398080052807e-05, "loss": 1.0594, "step": 1582 }, { "epoch": 0.27142765287095183, "grad_norm": 1.6796875, "learning_rate": 1.9643503056886364e-05, "loss": 1.1174, "step": 1583 }, { "epoch": 0.2715991169599417, "grad_norm": 1.6015625, "learning_rate": 1.964302499873371e-05, "loss": 1.0093, "step": 1584 }, { "epoch": 0.2717705810489316, "grad_norm": 1.6953125, "learning_rate": 1.9642546626085693e-05, "loss": 1.0025, "step": 1585 }, { "epoch": 0.2719420451379214, "grad_norm": 1.6171875, "learning_rate": 1.9642067938957926e-05, "loss": 1.1346, "step": 1586 }, { "epoch": 0.2721135092269113, "grad_norm": 1.640625, "learning_rate": 1.964158893736601e-05, "loss": 1.0351, "step": 1587 }, { "epoch": 0.2722849733159011, "grad_norm": 1.6171875, "learning_rate": 1.9641109621325577e-05, "loss": 1.1297, "step": 1588 }, { "epoch": 0.272456437404891, "grad_norm": 1.6640625, "learning_rate": 1.9640629990852253e-05, "loss": 1.028, "step": 1589 }, { "epoch": 0.2726279014938809, "grad_norm": 1.59375, "learning_rate": 1.964015004596168e-05, "loss": 1.0212, "step": 1590 }, { "epoch": 0.2727993655828707, "grad_norm": 1.7421875, "learning_rate": 1.9639669786669513e-05, "loss": 1.1078, "step": 1591 }, { "epoch": 0.2729708296718606, "grad_norm": 1.6640625, "learning_rate": 1.9639189212991415e-05, "loss": 1.0638, "step": 1592 }, { "epoch": 0.2731422937608505, "grad_norm": 1.6953125, "learning_rate": 1.9638708324943056e-05, "loss": 1.0655, "step": 1593 }, { "epoch": 0.2733137578498403, "grad_norm": 1.6484375, "learning_rate": 1.963822712254013e-05, "loss": 1.0881, "step": 1594 }, { "epoch": 0.2734852219388302, "grad_norm": 1.65625, "learning_rate": 1.9637745605798316e-05, "loss": 1.2106, "step": 1595 }, { "epoch": 0.27365668602782006, "grad_norm": 1.5703125, "learning_rate": 1.9637263774733326e-05, "loss": 1.0209, "step": 1596 }, { "epoch": 0.2738281501168099, "grad_norm": 1.7109375, "learning_rate": 1.963678162936088e-05, "loss": 1.1191, "step": 1597 }, { "epoch": 0.27399961420579977, "grad_norm": 1.7578125, "learning_rate": 1.963629916969669e-05, "loss": 1.0532, "step": 1598 }, { "epoch": 0.27417107829478965, "grad_norm": 1.5625, "learning_rate": 1.96358163957565e-05, "loss": 1.0672, "step": 1599 }, { "epoch": 0.2743425423837795, "grad_norm": 1.703125, "learning_rate": 1.963533330755605e-05, "loss": 1.0651, "step": 1600 }, { "epoch": 0.27451400647276936, "grad_norm": 1.6328125, "learning_rate": 1.96348499051111e-05, "loss": 1.0525, "step": 1601 }, { "epoch": 0.27468547056175924, "grad_norm": 1.6796875, "learning_rate": 1.963436618843741e-05, "loss": 0.9884, "step": 1602 }, { "epoch": 0.27485693465074906, "grad_norm": 1.6796875, "learning_rate": 1.963388215755076e-05, "loss": 1.1011, "step": 1603 }, { "epoch": 0.27502839873973894, "grad_norm": 1.6953125, "learning_rate": 1.9633397812466938e-05, "loss": 1.076, "step": 1604 }, { "epoch": 0.2751998628287288, "grad_norm": 1.6875, "learning_rate": 1.9632913153201733e-05, "loss": 1.0313, "step": 1605 }, { "epoch": 0.27537132691771865, "grad_norm": 1.6328125, "learning_rate": 1.9632428179770958e-05, "loss": 1.0222, "step": 1606 }, { "epoch": 0.27554279100670853, "grad_norm": 1.90625, "learning_rate": 1.9631942892190428e-05, "loss": 1.0423, "step": 1607 }, { "epoch": 0.2757142550956984, "grad_norm": 1.6484375, "learning_rate": 1.963145729047597e-05, "loss": 1.1188, "step": 1608 }, { "epoch": 0.27588571918468824, "grad_norm": 1.59375, "learning_rate": 1.963097137464342e-05, "loss": 0.9623, "step": 1609 }, { "epoch": 0.2760571832736781, "grad_norm": 1.6875, "learning_rate": 1.9630485144708627e-05, "loss": 1.0379, "step": 1610 }, { "epoch": 0.276228647362668, "grad_norm": 1.6796875, "learning_rate": 1.962999860068745e-05, "loss": 1.1151, "step": 1611 }, { "epoch": 0.2764001114516578, "grad_norm": 1.6328125, "learning_rate": 1.9629511742595752e-05, "loss": 1.0464, "step": 1612 }, { "epoch": 0.2765715755406477, "grad_norm": 1.6953125, "learning_rate": 1.962902457044942e-05, "loss": 1.1254, "step": 1613 }, { "epoch": 0.2767430396296376, "grad_norm": 1.8203125, "learning_rate": 1.9628537084264333e-05, "loss": 1.1636, "step": 1614 }, { "epoch": 0.2769145037186274, "grad_norm": 1.6875, "learning_rate": 1.9628049284056395e-05, "loss": 1.053, "step": 1615 }, { "epoch": 0.2770859678076173, "grad_norm": 1.5859375, "learning_rate": 1.9627561169841512e-05, "loss": 1.0359, "step": 1616 }, { "epoch": 0.2772574318966072, "grad_norm": 1.6796875, "learning_rate": 1.9627072741635608e-05, "loss": 0.9806, "step": 1617 }, { "epoch": 0.277428895985597, "grad_norm": 1.515625, "learning_rate": 1.9626583999454608e-05, "loss": 1.066, "step": 1618 }, { "epoch": 0.2776003600745869, "grad_norm": 1.6953125, "learning_rate": 1.9626094943314452e-05, "loss": 1.0277, "step": 1619 }, { "epoch": 0.27777182416357676, "grad_norm": 1.6484375, "learning_rate": 1.9625605573231093e-05, "loss": 1.1298, "step": 1620 }, { "epoch": 0.2779432882525666, "grad_norm": 1.671875, "learning_rate": 1.962511588922049e-05, "loss": 1.0838, "step": 1621 }, { "epoch": 0.27811475234155647, "grad_norm": 1.578125, "learning_rate": 1.9624625891298615e-05, "loss": 1.0209, "step": 1622 }, { "epoch": 0.27828621643054635, "grad_norm": 1.7421875, "learning_rate": 1.9624135579481446e-05, "loss": 1.09, "step": 1623 }, { "epoch": 0.2784576805195362, "grad_norm": 1.609375, "learning_rate": 1.9623644953784974e-05, "loss": 1.0581, "step": 1624 }, { "epoch": 0.27862914460852606, "grad_norm": 1.515625, "learning_rate": 1.96231540142252e-05, "loss": 1.0417, "step": 1625 }, { "epoch": 0.27880060869751594, "grad_norm": 1.5703125, "learning_rate": 1.9622662760818136e-05, "loss": 1.0268, "step": 1626 }, { "epoch": 0.27897207278650576, "grad_norm": 1.578125, "learning_rate": 1.9622171193579806e-05, "loss": 1.0325, "step": 1627 }, { "epoch": 0.27914353687549565, "grad_norm": 1.6328125, "learning_rate": 1.9621679312526234e-05, "loss": 1.0768, "step": 1628 }, { "epoch": 0.2793150009644855, "grad_norm": 1.6640625, "learning_rate": 1.962118711767347e-05, "loss": 0.9856, "step": 1629 }, { "epoch": 0.27948646505347535, "grad_norm": 1.65625, "learning_rate": 1.9620694609037567e-05, "loss": 1.0344, "step": 1630 }, { "epoch": 0.27965792914246523, "grad_norm": 1.6015625, "learning_rate": 1.9620201786634584e-05, "loss": 1.0485, "step": 1631 }, { "epoch": 0.2798293932314551, "grad_norm": 1.5390625, "learning_rate": 1.9619708650480595e-05, "loss": 1.0014, "step": 1632 }, { "epoch": 0.28000085732044494, "grad_norm": 1.6328125, "learning_rate": 1.961921520059168e-05, "loss": 1.0522, "step": 1633 }, { "epoch": 0.2801723214094348, "grad_norm": 1.640625, "learning_rate": 1.9618721436983935e-05, "loss": 1.036, "step": 1634 }, { "epoch": 0.28034378549842465, "grad_norm": 1.625, "learning_rate": 1.9618227359673464e-05, "loss": 1.181, "step": 1635 }, { "epoch": 0.28051524958741453, "grad_norm": 1.6640625, "learning_rate": 1.961773296867638e-05, "loss": 1.013, "step": 1636 }, { "epoch": 0.2806867136764044, "grad_norm": 1.578125, "learning_rate": 1.9617238264008806e-05, "loss": 1.0042, "step": 1637 }, { "epoch": 0.28085817776539423, "grad_norm": 1.5859375, "learning_rate": 1.961674324568688e-05, "loss": 0.9712, "step": 1638 }, { "epoch": 0.2810296418543841, "grad_norm": 1.6875, "learning_rate": 1.9616247913726738e-05, "loss": 1.0074, "step": 1639 }, { "epoch": 0.281201105943374, "grad_norm": 1.6640625, "learning_rate": 1.9615752268144544e-05, "loss": 1.1121, "step": 1640 }, { "epoch": 0.2813725700323638, "grad_norm": 1.71875, "learning_rate": 1.9615256308956458e-05, "loss": 1.0317, "step": 1641 }, { "epoch": 0.2815440341213537, "grad_norm": 1.5546875, "learning_rate": 1.961476003617866e-05, "loss": 1.0479, "step": 1642 }, { "epoch": 0.2817154982103436, "grad_norm": 1.578125, "learning_rate": 1.9614263449827324e-05, "loss": 1.0782, "step": 1643 }, { "epoch": 0.2818869622993334, "grad_norm": 1.671875, "learning_rate": 1.9613766549918657e-05, "loss": 1.0909, "step": 1644 }, { "epoch": 0.2820584263883233, "grad_norm": 1.7109375, "learning_rate": 1.9613269336468862e-05, "loss": 1.0773, "step": 1645 }, { "epoch": 0.2822298904773132, "grad_norm": 1.6328125, "learning_rate": 1.961277180949415e-05, "loss": 1.0504, "step": 1646 }, { "epoch": 0.282401354566303, "grad_norm": 1.59375, "learning_rate": 1.9612273969010755e-05, "loss": 1.0147, "step": 1647 }, { "epoch": 0.2825728186552929, "grad_norm": 1.71875, "learning_rate": 1.9611775815034905e-05, "loss": 1.0605, "step": 1648 }, { "epoch": 0.28274428274428276, "grad_norm": 1.578125, "learning_rate": 1.9611277347582853e-05, "loss": 1.0812, "step": 1649 }, { "epoch": 0.2829157468332726, "grad_norm": 1.7421875, "learning_rate": 1.9610778566670858e-05, "loss": 1.0974, "step": 1650 }, { "epoch": 0.28308721092226247, "grad_norm": 1.6015625, "learning_rate": 1.9610279472315177e-05, "loss": 0.9812, "step": 1651 }, { "epoch": 0.28325867501125235, "grad_norm": 1.6953125, "learning_rate": 1.9609780064532095e-05, "loss": 1.0965, "step": 1652 }, { "epoch": 0.2834301391002422, "grad_norm": 1.6484375, "learning_rate": 1.96092803433379e-05, "loss": 1.0054, "step": 1653 }, { "epoch": 0.28360160318923205, "grad_norm": 1.6875, "learning_rate": 1.9608780308748886e-05, "loss": 1.0366, "step": 1654 }, { "epoch": 0.28377306727822194, "grad_norm": 1.6328125, "learning_rate": 1.9608279960781363e-05, "loss": 1.0656, "step": 1655 }, { "epoch": 0.28394453136721176, "grad_norm": 1.6015625, "learning_rate": 1.960777929945165e-05, "loss": 1.0081, "step": 1656 }, { "epoch": 0.28411599545620164, "grad_norm": 1.703125, "learning_rate": 1.9607278324776072e-05, "loss": 1.1173, "step": 1657 }, { "epoch": 0.2842874595451915, "grad_norm": 1.640625, "learning_rate": 1.9606777036770978e-05, "loss": 1.0612, "step": 1658 }, { "epoch": 0.28445892363418135, "grad_norm": 1.640625, "learning_rate": 1.96062754354527e-05, "loss": 0.9736, "step": 1659 }, { "epoch": 0.28463038772317123, "grad_norm": 1.6484375, "learning_rate": 1.960577352083761e-05, "loss": 1.1171, "step": 1660 }, { "epoch": 0.2848018518121611, "grad_norm": 1.6796875, "learning_rate": 1.9605271292942073e-05, "loss": 1.0382, "step": 1661 }, { "epoch": 0.28497331590115094, "grad_norm": 1.640625, "learning_rate": 1.9604768751782468e-05, "loss": 1.0854, "step": 1662 }, { "epoch": 0.2851447799901408, "grad_norm": 1.640625, "learning_rate": 1.9604265897375187e-05, "loss": 1.0552, "step": 1663 }, { "epoch": 0.2853162440791307, "grad_norm": 1.625, "learning_rate": 1.960376272973663e-05, "loss": 1.076, "step": 1664 }, { "epoch": 0.2854877081681205, "grad_norm": 1.53125, "learning_rate": 1.9603259248883203e-05, "loss": 0.9763, "step": 1665 }, { "epoch": 0.2856591722571104, "grad_norm": 1.6640625, "learning_rate": 1.9602755454831334e-05, "loss": 1.0594, "step": 1666 }, { "epoch": 0.2858306363461003, "grad_norm": 1.6796875, "learning_rate": 1.9602251347597442e-05, "loss": 0.9935, "step": 1667 }, { "epoch": 0.2860021004350901, "grad_norm": 1.71875, "learning_rate": 1.960174692719798e-05, "loss": 1.0511, "step": 1668 }, { "epoch": 0.28617356452408, "grad_norm": 1.640625, "learning_rate": 1.9601242193649394e-05, "loss": 1.0227, "step": 1669 }, { "epoch": 0.2863450286130699, "grad_norm": 1.65625, "learning_rate": 1.9600737146968143e-05, "loss": 1.0802, "step": 1670 }, { "epoch": 0.2865164927020597, "grad_norm": 1.609375, "learning_rate": 1.9600231787170704e-05, "loss": 1.0771, "step": 1671 }, { "epoch": 0.2866879567910496, "grad_norm": 1.6171875, "learning_rate": 1.959972611427355e-05, "loss": 1.0267, "step": 1672 }, { "epoch": 0.28685942088003946, "grad_norm": 1.6640625, "learning_rate": 1.959922012829318e-05, "loss": 1.0999, "step": 1673 }, { "epoch": 0.2870308849690293, "grad_norm": 1.6484375, "learning_rate": 1.9598713829246097e-05, "loss": 0.9944, "step": 1674 }, { "epoch": 0.28720234905801917, "grad_norm": 1.65625, "learning_rate": 1.9598207217148806e-05, "loss": 1.0756, "step": 1675 }, { "epoch": 0.28737381314700905, "grad_norm": 1.6953125, "learning_rate": 1.9597700292017838e-05, "loss": 1.032, "step": 1676 }, { "epoch": 0.2875452772359989, "grad_norm": 1.6484375, "learning_rate": 1.959719305386972e-05, "loss": 1.0701, "step": 1677 }, { "epoch": 0.28771674132498876, "grad_norm": 1.5390625, "learning_rate": 1.9596685502720997e-05, "loss": 1.0368, "step": 1678 }, { "epoch": 0.28788820541397864, "grad_norm": 1.609375, "learning_rate": 1.9596177638588223e-05, "loss": 1.0462, "step": 1679 }, { "epoch": 0.28805966950296846, "grad_norm": 1.625, "learning_rate": 1.959566946148796e-05, "loss": 1.0831, "step": 1680 }, { "epoch": 0.28823113359195834, "grad_norm": 1.75, "learning_rate": 1.9595160971436784e-05, "loss": 1.2239, "step": 1681 }, { "epoch": 0.28840259768094817, "grad_norm": 1.6875, "learning_rate": 1.9594652168451274e-05, "loss": 1.0844, "step": 1682 }, { "epoch": 0.28857406176993805, "grad_norm": 1.7109375, "learning_rate": 1.9594143052548027e-05, "loss": 1.1673, "step": 1683 }, { "epoch": 0.28874552585892793, "grad_norm": 1.6171875, "learning_rate": 1.9593633623743646e-05, "loss": 0.9826, "step": 1684 }, { "epoch": 0.28891698994791776, "grad_norm": 1.6328125, "learning_rate": 1.9593123882054748e-05, "loss": 0.9722, "step": 1685 }, { "epoch": 0.28908845403690764, "grad_norm": 1.71875, "learning_rate": 1.9592613827497953e-05, "loss": 1.0444, "step": 1686 }, { "epoch": 0.2892599181258975, "grad_norm": 1.6328125, "learning_rate": 1.9592103460089903e-05, "loss": 1.1105, "step": 1687 }, { "epoch": 0.28943138221488734, "grad_norm": 1.7265625, "learning_rate": 1.9591592779847234e-05, "loss": 1.0662, "step": 1688 }, { "epoch": 0.2896028463038772, "grad_norm": 1.6328125, "learning_rate": 1.9591081786786608e-05, "loss": 1.1187, "step": 1689 }, { "epoch": 0.2897743103928671, "grad_norm": 1.5625, "learning_rate": 1.959057048092469e-05, "loss": 1.0104, "step": 1690 }, { "epoch": 0.28994577448185693, "grad_norm": 1.6328125, "learning_rate": 1.9590058862278154e-05, "loss": 1.0834, "step": 1691 }, { "epoch": 0.2901172385708468, "grad_norm": 1.6796875, "learning_rate": 1.9589546930863685e-05, "loss": 1.0711, "step": 1692 }, { "epoch": 0.2902887026598367, "grad_norm": 1.5625, "learning_rate": 1.9589034686697977e-05, "loss": 1.0707, "step": 1693 }, { "epoch": 0.2904601667488265, "grad_norm": 1.5390625, "learning_rate": 1.9588522129797744e-05, "loss": 1.0679, "step": 1694 }, { "epoch": 0.2906316308378164, "grad_norm": 1.7890625, "learning_rate": 1.9588009260179693e-05, "loss": 1.1389, "step": 1695 }, { "epoch": 0.2908030949268063, "grad_norm": 1.59375, "learning_rate": 1.9587496077860553e-05, "loss": 0.9303, "step": 1696 }, { "epoch": 0.2909745590157961, "grad_norm": 1.609375, "learning_rate": 1.9586982582857067e-05, "loss": 1.0341, "step": 1697 }, { "epoch": 0.291146023104786, "grad_norm": 1.6640625, "learning_rate": 1.9586468775185975e-05, "loss": 1.0564, "step": 1698 }, { "epoch": 0.29131748719377587, "grad_norm": 1.5703125, "learning_rate": 1.958595465486404e-05, "loss": 1.0828, "step": 1699 }, { "epoch": 0.2914889512827657, "grad_norm": 1.703125, "learning_rate": 1.9585440221908026e-05, "loss": 1.141, "step": 1700 }, { "epoch": 0.2916604153717556, "grad_norm": 1.625, "learning_rate": 1.958492547633471e-05, "loss": 1.0198, "step": 1701 }, { "epoch": 0.29183187946074546, "grad_norm": 1.78125, "learning_rate": 1.9584410418160876e-05, "loss": 1.1171, "step": 1702 }, { "epoch": 0.2920033435497353, "grad_norm": 1.6171875, "learning_rate": 1.9583895047403335e-05, "loss": 1.064, "step": 1703 }, { "epoch": 0.29217480763872516, "grad_norm": 1.8203125, "learning_rate": 1.958337936407888e-05, "loss": 1.0585, "step": 1704 }, { "epoch": 0.29234627172771505, "grad_norm": 1.640625, "learning_rate": 1.9582863368204342e-05, "loss": 1.055, "step": 1705 }, { "epoch": 0.29251773581670487, "grad_norm": 1.671875, "learning_rate": 1.958234705979654e-05, "loss": 1.0747, "step": 1706 }, { "epoch": 0.29268919990569475, "grad_norm": 1.578125, "learning_rate": 1.958183043887232e-05, "loss": 1.0259, "step": 1707 }, { "epoch": 0.29286066399468463, "grad_norm": 1.5859375, "learning_rate": 1.958131350544852e-05, "loss": 1.0217, "step": 1708 }, { "epoch": 0.29303212808367446, "grad_norm": 1.6171875, "learning_rate": 1.9580796259542018e-05, "loss": 1.0129, "step": 1709 }, { "epoch": 0.29320359217266434, "grad_norm": 1.5546875, "learning_rate": 1.958027870116966e-05, "loss": 0.9592, "step": 1710 }, { "epoch": 0.2933750562616542, "grad_norm": 1.5859375, "learning_rate": 1.9579760830348345e-05, "loss": 1.1031, "step": 1711 }, { "epoch": 0.29354652035064405, "grad_norm": 1.7109375, "learning_rate": 1.9579242647094956e-05, "loss": 1.117, "step": 1712 }, { "epoch": 0.29371798443963393, "grad_norm": 1.796875, "learning_rate": 1.957872415142639e-05, "loss": 1.029, "step": 1713 }, { "epoch": 0.2938894485286238, "grad_norm": 1.6171875, "learning_rate": 1.957820534335956e-05, "loss": 1.0084, "step": 1714 }, { "epoch": 0.29406091261761363, "grad_norm": 1.640625, "learning_rate": 1.9577686222911386e-05, "loss": 1.0399, "step": 1715 }, { "epoch": 0.2942323767066035, "grad_norm": 1.578125, "learning_rate": 1.9577166790098797e-05, "loss": 1.0133, "step": 1716 }, { "epoch": 0.2944038407955934, "grad_norm": 1.6875, "learning_rate": 1.9576647044938733e-05, "loss": 1.1458, "step": 1717 }, { "epoch": 0.2945753048845832, "grad_norm": 1.703125, "learning_rate": 1.957612698744815e-05, "loss": 1.1054, "step": 1718 }, { "epoch": 0.2947467689735731, "grad_norm": 1.6640625, "learning_rate": 1.9575606617644e-05, "loss": 1.0095, "step": 1719 }, { "epoch": 0.294918233062563, "grad_norm": 1.6796875, "learning_rate": 1.9575085935543266e-05, "loss": 1.0516, "step": 1720 }, { "epoch": 0.2950896971515528, "grad_norm": 1.65625, "learning_rate": 1.9574564941162918e-05, "loss": 1.0511, "step": 1721 }, { "epoch": 0.2952611612405427, "grad_norm": 1.671875, "learning_rate": 1.9574043634519957e-05, "loss": 1.071, "step": 1722 }, { "epoch": 0.29543262532953257, "grad_norm": 1.6796875, "learning_rate": 1.9573522015631378e-05, "loss": 1.1122, "step": 1723 }, { "epoch": 0.2956040894185224, "grad_norm": 1.6328125, "learning_rate": 1.9573000084514197e-05, "loss": 1.0394, "step": 1724 }, { "epoch": 0.2957755535075123, "grad_norm": 1.703125, "learning_rate": 1.9572477841185435e-05, "loss": 1.1326, "step": 1725 }, { "epoch": 0.29594701759650216, "grad_norm": 1.5859375, "learning_rate": 1.9571955285662124e-05, "loss": 1.0425, "step": 1726 }, { "epoch": 0.296118481685492, "grad_norm": 1.6484375, "learning_rate": 1.9571432417961308e-05, "loss": 1.0776, "step": 1727 }, { "epoch": 0.29628994577448187, "grad_norm": 1.546875, "learning_rate": 1.9570909238100034e-05, "loss": 1.0177, "step": 1728 }, { "epoch": 0.2964614098634717, "grad_norm": 1.6328125, "learning_rate": 1.9570385746095372e-05, "loss": 1.0884, "step": 1729 }, { "epoch": 0.2966328739524616, "grad_norm": 1.640625, "learning_rate": 1.956986194196439e-05, "loss": 1.069, "step": 1730 }, { "epoch": 0.29680433804145145, "grad_norm": 1.640625, "learning_rate": 1.9569337825724174e-05, "loss": 1.0551, "step": 1731 }, { "epoch": 0.2969758021304413, "grad_norm": 1.578125, "learning_rate": 1.9568813397391816e-05, "loss": 1.0309, "step": 1732 }, { "epoch": 0.29714726621943116, "grad_norm": 1.8359375, "learning_rate": 1.9568288656984423e-05, "loss": 1.1874, "step": 1733 }, { "epoch": 0.29731873030842104, "grad_norm": 1.578125, "learning_rate": 1.9567763604519105e-05, "loss": 1.0737, "step": 1734 }, { "epoch": 0.29749019439741087, "grad_norm": 1.609375, "learning_rate": 1.9567238240012988e-05, "loss": 1.0948, "step": 1735 }, { "epoch": 0.29766165848640075, "grad_norm": 1.5703125, "learning_rate": 1.9566712563483203e-05, "loss": 1.0346, "step": 1736 }, { "epoch": 0.29783312257539063, "grad_norm": 1.609375, "learning_rate": 1.9566186574946894e-05, "loss": 1.0707, "step": 1737 }, { "epoch": 0.29800458666438046, "grad_norm": 1.75, "learning_rate": 1.956566027442122e-05, "loss": 1.1326, "step": 1738 }, { "epoch": 0.29817605075337034, "grad_norm": 1.734375, "learning_rate": 1.9565133661923346e-05, "loss": 1.1118, "step": 1739 }, { "epoch": 0.2983475148423602, "grad_norm": 1.6640625, "learning_rate": 1.9564606737470444e-05, "loss": 1.056, "step": 1740 }, { "epoch": 0.29851897893135004, "grad_norm": 1.546875, "learning_rate": 1.9564079501079698e-05, "loss": 1.0614, "step": 1741 }, { "epoch": 0.2986904430203399, "grad_norm": 1.5546875, "learning_rate": 1.9563551952768303e-05, "loss": 1.0242, "step": 1742 }, { "epoch": 0.2988619071093298, "grad_norm": 1.6484375, "learning_rate": 1.9563024092553465e-05, "loss": 0.999, "step": 1743 }, { "epoch": 0.29903337119831963, "grad_norm": 1.6640625, "learning_rate": 1.9562495920452405e-05, "loss": 1.0743, "step": 1744 }, { "epoch": 0.2992048352873095, "grad_norm": 1.6796875, "learning_rate": 1.9561967436482342e-05, "loss": 1.1008, "step": 1745 }, { "epoch": 0.2993762993762994, "grad_norm": 1.7734375, "learning_rate": 1.956143864066051e-05, "loss": 1.0411, "step": 1746 }, { "epoch": 0.2995477634652892, "grad_norm": 1.609375, "learning_rate": 1.9560909533004168e-05, "loss": 1.0089, "step": 1747 }, { "epoch": 0.2997192275542791, "grad_norm": 1.6171875, "learning_rate": 1.9560380113530555e-05, "loss": 1.083, "step": 1748 }, { "epoch": 0.299890691643269, "grad_norm": 1.609375, "learning_rate": 1.955985038225695e-05, "loss": 1.0368, "step": 1749 }, { "epoch": 0.3000621557322588, "grad_norm": 1.6640625, "learning_rate": 1.955932033920062e-05, "loss": 1.1021, "step": 1750 }, { "epoch": 0.3002336198212487, "grad_norm": 1.6796875, "learning_rate": 1.955878998437886e-05, "loss": 1.0446, "step": 1751 }, { "epoch": 0.30040508391023857, "grad_norm": 1.6171875, "learning_rate": 1.9558259317808964e-05, "loss": 1.1077, "step": 1752 }, { "epoch": 0.3005765479992284, "grad_norm": 1.59375, "learning_rate": 1.9557728339508238e-05, "loss": 1.0784, "step": 1753 }, { "epoch": 0.3007480120882183, "grad_norm": 1.5234375, "learning_rate": 1.9557197049493997e-05, "loss": 0.9533, "step": 1754 }, { "epoch": 0.30091947617720816, "grad_norm": 1.6328125, "learning_rate": 1.9556665447783577e-05, "loss": 1.051, "step": 1755 }, { "epoch": 0.301090940266198, "grad_norm": 1.8125, "learning_rate": 1.9556133534394304e-05, "loss": 1.1247, "step": 1756 }, { "epoch": 0.30126240435518786, "grad_norm": 1.6328125, "learning_rate": 1.9555601309343536e-05, "loss": 1.1179, "step": 1757 }, { "epoch": 0.30143386844417774, "grad_norm": 1.609375, "learning_rate": 1.955506877264862e-05, "loss": 1.0374, "step": 1758 }, { "epoch": 0.30160533253316757, "grad_norm": 1.7578125, "learning_rate": 1.9554535924326936e-05, "loss": 1.1062, "step": 1759 }, { "epoch": 0.30177679662215745, "grad_norm": 1.5703125, "learning_rate": 1.9554002764395856e-05, "loss": 0.9826, "step": 1760 }, { "epoch": 0.30194826071114733, "grad_norm": 1.5546875, "learning_rate": 1.9553469292872765e-05, "loss": 0.9971, "step": 1761 }, { "epoch": 0.30211972480013716, "grad_norm": 1.65625, "learning_rate": 1.955293550977507e-05, "loss": 1.0656, "step": 1762 }, { "epoch": 0.30229118888912704, "grad_norm": 1.5625, "learning_rate": 1.955240141512017e-05, "loss": 1.0908, "step": 1763 }, { "epoch": 0.3024626529781169, "grad_norm": 1.6015625, "learning_rate": 1.9551867008925492e-05, "loss": 1.0144, "step": 1764 }, { "epoch": 0.30263411706710674, "grad_norm": 1.6875, "learning_rate": 1.955133229120846e-05, "loss": 1.0528, "step": 1765 }, { "epoch": 0.3028055811560966, "grad_norm": 1.671875, "learning_rate": 1.9550797261986516e-05, "loss": 1.0964, "step": 1766 }, { "epoch": 0.3029770452450865, "grad_norm": 1.546875, "learning_rate": 1.9550261921277108e-05, "loss": 1.0305, "step": 1767 }, { "epoch": 0.30314850933407633, "grad_norm": 1.625, "learning_rate": 1.9549726269097696e-05, "loss": 1.0677, "step": 1768 }, { "epoch": 0.3033199734230662, "grad_norm": 1.703125, "learning_rate": 1.9549190305465754e-05, "loss": 1.0878, "step": 1769 }, { "epoch": 0.3034914375120561, "grad_norm": 1.625, "learning_rate": 1.9548654030398754e-05, "loss": 1.0571, "step": 1770 }, { "epoch": 0.3036629016010459, "grad_norm": 1.5078125, "learning_rate": 1.954811744391419e-05, "loss": 0.9636, "step": 1771 }, { "epoch": 0.3038343656900358, "grad_norm": 1.609375, "learning_rate": 1.9547580546029555e-05, "loss": 1.0493, "step": 1772 }, { "epoch": 0.3040058297790257, "grad_norm": 1.640625, "learning_rate": 1.9547043336762372e-05, "loss": 1.0085, "step": 1773 }, { "epoch": 0.3041772938680155, "grad_norm": 1.6171875, "learning_rate": 1.9546505816130153e-05, "loss": 1.1213, "step": 1774 }, { "epoch": 0.3043487579570054, "grad_norm": 1.671875, "learning_rate": 1.954596798415043e-05, "loss": 1.0836, "step": 1775 }, { "epoch": 0.3045202220459952, "grad_norm": 1.65625, "learning_rate": 1.9545429840840744e-05, "loss": 1.0237, "step": 1776 }, { "epoch": 0.3046916861349851, "grad_norm": 1.6875, "learning_rate": 1.9544891386218647e-05, "loss": 1.0164, "step": 1777 }, { "epoch": 0.304863150223975, "grad_norm": 1.6484375, "learning_rate": 1.95443526203017e-05, "loss": 1.037, "step": 1778 }, { "epoch": 0.3050346143129648, "grad_norm": 1.640625, "learning_rate": 1.9543813543107473e-05, "loss": 1.038, "step": 1779 }, { "epoch": 0.3052060784019547, "grad_norm": 1.6171875, "learning_rate": 1.9543274154653544e-05, "loss": 1.0393, "step": 1780 }, { "epoch": 0.30537754249094456, "grad_norm": 1.5859375, "learning_rate": 1.954273445495751e-05, "loss": 1.0726, "step": 1781 }, { "epoch": 0.3055490065799344, "grad_norm": 1.7734375, "learning_rate": 1.9542194444036976e-05, "loss": 1.157, "step": 1782 }, { "epoch": 0.30572047066892427, "grad_norm": 1.5859375, "learning_rate": 1.9541654121909543e-05, "loss": 1.0675, "step": 1783 }, { "epoch": 0.30589193475791415, "grad_norm": 1.625, "learning_rate": 1.954111348859284e-05, "loss": 1.0843, "step": 1784 }, { "epoch": 0.306063398846904, "grad_norm": 1.7421875, "learning_rate": 1.95405725441045e-05, "loss": 1.1394, "step": 1785 }, { "epoch": 0.30623486293589386, "grad_norm": 1.6953125, "learning_rate": 1.9540031288462157e-05, "loss": 1.0638, "step": 1786 }, { "epoch": 0.30640632702488374, "grad_norm": 1.6015625, "learning_rate": 1.9539489721683477e-05, "loss": 1.0697, "step": 1787 }, { "epoch": 0.30657779111387357, "grad_norm": 1.6484375, "learning_rate": 1.953894784378611e-05, "loss": 1.1335, "step": 1788 }, { "epoch": 0.30674925520286345, "grad_norm": 1.703125, "learning_rate": 1.9538405654787733e-05, "loss": 1.1439, "step": 1789 }, { "epoch": 0.30692071929185333, "grad_norm": 1.609375, "learning_rate": 1.953786315470603e-05, "loss": 1.1355, "step": 1790 }, { "epoch": 0.30709218338084315, "grad_norm": 1.625, "learning_rate": 1.9537320343558696e-05, "loss": 1.0443, "step": 1791 }, { "epoch": 0.30726364746983303, "grad_norm": 1.6640625, "learning_rate": 1.9536777221363427e-05, "loss": 1.0282, "step": 1792 }, { "epoch": 0.3074351115588229, "grad_norm": 1.5390625, "learning_rate": 1.9536233788137945e-05, "loss": 1.0803, "step": 1793 }, { "epoch": 0.30760657564781274, "grad_norm": 1.609375, "learning_rate": 1.9535690043899965e-05, "loss": 1.0731, "step": 1794 }, { "epoch": 0.3077780397368026, "grad_norm": 1.6484375, "learning_rate": 1.953514598866723e-05, "loss": 1.1285, "step": 1795 }, { "epoch": 0.3079495038257925, "grad_norm": 1.625, "learning_rate": 1.9534601622457473e-05, "loss": 1.0675, "step": 1796 }, { "epoch": 0.30812096791478233, "grad_norm": 1.5234375, "learning_rate": 1.9534056945288454e-05, "loss": 0.9412, "step": 1797 }, { "epoch": 0.3082924320037722, "grad_norm": 1.6171875, "learning_rate": 1.953351195717794e-05, "loss": 1.0785, "step": 1798 }, { "epoch": 0.3084638960927621, "grad_norm": 1.640625, "learning_rate": 1.9532966658143697e-05, "loss": 1.1037, "step": 1799 }, { "epoch": 0.3086353601817519, "grad_norm": 1.7265625, "learning_rate": 1.953242104820351e-05, "loss": 1.1528, "step": 1800 }, { "epoch": 0.3088068242707418, "grad_norm": 1.578125, "learning_rate": 1.9531875127375185e-05, "loss": 1.004, "step": 1801 }, { "epoch": 0.3089782883597317, "grad_norm": 1.6171875, "learning_rate": 1.9531328895676515e-05, "loss": 0.9947, "step": 1802 }, { "epoch": 0.3091497524487215, "grad_norm": 1.6796875, "learning_rate": 1.9530782353125315e-05, "loss": 0.9227, "step": 1803 }, { "epoch": 0.3093212165377114, "grad_norm": 1.71875, "learning_rate": 1.9530235499739417e-05, "loss": 0.987, "step": 1804 }, { "epoch": 0.30949268062670127, "grad_norm": 1.7265625, "learning_rate": 1.952968833553665e-05, "loss": 1.1483, "step": 1805 }, { "epoch": 0.3096641447156911, "grad_norm": 1.75, "learning_rate": 1.952914086053486e-05, "loss": 1.0115, "step": 1806 }, { "epoch": 0.309835608804681, "grad_norm": 1.7265625, "learning_rate": 1.9528593074751903e-05, "loss": 1.1096, "step": 1807 }, { "epoch": 0.31000707289367085, "grad_norm": 1.578125, "learning_rate": 1.952804497820565e-05, "loss": 1.0568, "step": 1808 }, { "epoch": 0.3101785369826607, "grad_norm": 1.5859375, "learning_rate": 1.9527496570913964e-05, "loss": 1.1535, "step": 1809 }, { "epoch": 0.31035000107165056, "grad_norm": 1.6171875, "learning_rate": 1.9526947852894743e-05, "loss": 1.0408, "step": 1810 }, { "epoch": 0.31052146516064044, "grad_norm": 1.59375, "learning_rate": 1.9526398824165874e-05, "loss": 1.0993, "step": 1811 }, { "epoch": 0.31069292924963027, "grad_norm": 1.6484375, "learning_rate": 1.9525849484745266e-05, "loss": 1.0322, "step": 1812 }, { "epoch": 0.31086439333862015, "grad_norm": 1.6171875, "learning_rate": 1.9525299834650838e-05, "loss": 1.0349, "step": 1813 }, { "epoch": 0.31103585742761003, "grad_norm": 1.6796875, "learning_rate": 1.9524749873900514e-05, "loss": 1.108, "step": 1814 }, { "epoch": 0.31120732151659986, "grad_norm": 1.703125, "learning_rate": 1.9524199602512227e-05, "loss": 1.1155, "step": 1815 }, { "epoch": 0.31137878560558974, "grad_norm": 1.65625, "learning_rate": 1.9523649020503925e-05, "loss": 0.9738, "step": 1816 }, { "epoch": 0.3115502496945796, "grad_norm": 1.65625, "learning_rate": 1.9523098127893566e-05, "loss": 1.0731, "step": 1817 }, { "epoch": 0.31172171378356944, "grad_norm": 1.671875, "learning_rate": 1.9522546924699117e-05, "loss": 0.9524, "step": 1818 }, { "epoch": 0.3118931778725593, "grad_norm": 1.7421875, "learning_rate": 1.9521995410938556e-05, "loss": 1.0931, "step": 1819 }, { "epoch": 0.3120646419615492, "grad_norm": 1.7109375, "learning_rate": 1.9521443586629866e-05, "loss": 1.0354, "step": 1820 }, { "epoch": 0.31223610605053903, "grad_norm": 1.6171875, "learning_rate": 1.952089145179105e-05, "loss": 1.0416, "step": 1821 }, { "epoch": 0.3124075701395289, "grad_norm": 1.6015625, "learning_rate": 1.9520339006440107e-05, "loss": 1.0128, "step": 1822 }, { "epoch": 0.31257903422851874, "grad_norm": 1.578125, "learning_rate": 1.951978625059506e-05, "loss": 1.0744, "step": 1823 }, { "epoch": 0.3127504983175086, "grad_norm": 1.640625, "learning_rate": 1.9519233184273937e-05, "loss": 1.0984, "step": 1824 }, { "epoch": 0.3129219624064985, "grad_norm": 1.7265625, "learning_rate": 1.951867980749477e-05, "loss": 1.1042, "step": 1825 }, { "epoch": 0.3130934264954883, "grad_norm": 1.625, "learning_rate": 1.9518126120275615e-05, "loss": 1.0636, "step": 1826 }, { "epoch": 0.3132648905844782, "grad_norm": 1.640625, "learning_rate": 1.9517572122634522e-05, "loss": 1.0009, "step": 1827 }, { "epoch": 0.3134363546734681, "grad_norm": 1.6015625, "learning_rate": 1.9517017814589562e-05, "loss": 1.0826, "step": 1828 }, { "epoch": 0.3136078187624579, "grad_norm": 1.6328125, "learning_rate": 1.9516463196158818e-05, "loss": 1.0033, "step": 1829 }, { "epoch": 0.3137792828514478, "grad_norm": 1.546875, "learning_rate": 1.951590826736037e-05, "loss": 0.9962, "step": 1830 }, { "epoch": 0.3139507469404377, "grad_norm": 1.6796875, "learning_rate": 1.9515353028212317e-05, "loss": 1.0731, "step": 1831 }, { "epoch": 0.3141222110294275, "grad_norm": 1.6328125, "learning_rate": 1.9514797478732773e-05, "loss": 1.0536, "step": 1832 }, { "epoch": 0.3142936751184174, "grad_norm": 1.5703125, "learning_rate": 1.9514241618939855e-05, "loss": 0.9774, "step": 1833 }, { "epoch": 0.31446513920740726, "grad_norm": 1.6640625, "learning_rate": 1.9513685448851688e-05, "loss": 1.0468, "step": 1834 }, { "epoch": 0.3146366032963971, "grad_norm": 1.5625, "learning_rate": 1.9513128968486414e-05, "loss": 1.0279, "step": 1835 }, { "epoch": 0.31480806738538697, "grad_norm": 1.5390625, "learning_rate": 1.9512572177862184e-05, "loss": 0.9927, "step": 1836 }, { "epoch": 0.31497953147437685, "grad_norm": 1.546875, "learning_rate": 1.951201507699715e-05, "loss": 1.0225, "step": 1837 }, { "epoch": 0.3151509955633667, "grad_norm": 1.6015625, "learning_rate": 1.951145766590949e-05, "loss": 1.0497, "step": 1838 }, { "epoch": 0.31532245965235656, "grad_norm": 1.59375, "learning_rate": 1.9510899944617377e-05, "loss": 0.9978, "step": 1839 }, { "epoch": 0.31549392374134644, "grad_norm": 1.671875, "learning_rate": 1.9510341913139e-05, "loss": 1.0166, "step": 1840 }, { "epoch": 0.31566538783033626, "grad_norm": 1.7265625, "learning_rate": 1.950978357149256e-05, "loss": 1.1047, "step": 1841 }, { "epoch": 0.31583685191932614, "grad_norm": 1.6328125, "learning_rate": 1.9509224919696274e-05, "loss": 0.9729, "step": 1842 }, { "epoch": 0.316008316008316, "grad_norm": 1.5859375, "learning_rate": 1.9508665957768345e-05, "loss": 1.0282, "step": 1843 }, { "epoch": 0.31617978009730585, "grad_norm": 1.609375, "learning_rate": 1.950810668572702e-05, "loss": 1.0229, "step": 1844 }, { "epoch": 0.31635124418629573, "grad_norm": 1.6953125, "learning_rate": 1.9507547103590528e-05, "loss": 1.1291, "step": 1845 }, { "epoch": 0.3165227082752856, "grad_norm": 1.609375, "learning_rate": 1.9506987211377125e-05, "loss": 1.1058, "step": 1846 }, { "epoch": 0.31669417236427544, "grad_norm": 1.6171875, "learning_rate": 1.950642700910507e-05, "loss": 1.0973, "step": 1847 }, { "epoch": 0.3168656364532653, "grad_norm": 42.25, "learning_rate": 1.950586649679263e-05, "loss": 1.2741, "step": 1848 }, { "epoch": 0.3170371005422552, "grad_norm": 1.703125, "learning_rate": 1.9505305674458087e-05, "loss": 1.0999, "step": 1849 }, { "epoch": 0.317208564631245, "grad_norm": 1.609375, "learning_rate": 1.9504744542119735e-05, "loss": 1.004, "step": 1850 }, { "epoch": 0.3173800287202349, "grad_norm": 1.6171875, "learning_rate": 1.950418309979587e-05, "loss": 1.0514, "step": 1851 }, { "epoch": 0.3175514928092248, "grad_norm": 1.671875, "learning_rate": 1.9503621347504806e-05, "loss": 1.0184, "step": 1852 }, { "epoch": 0.3177229568982146, "grad_norm": 1.7265625, "learning_rate": 1.950305928526486e-05, "loss": 1.0488, "step": 1853 }, { "epoch": 0.3178944209872045, "grad_norm": 1.609375, "learning_rate": 1.950249691309437e-05, "loss": 0.9829, "step": 1854 }, { "epoch": 0.3180658850761944, "grad_norm": 1.546875, "learning_rate": 1.950193423101167e-05, "loss": 0.9648, "step": 1855 }, { "epoch": 0.3182373491651842, "grad_norm": 1.6015625, "learning_rate": 1.9501371239035113e-05, "loss": 0.9902, "step": 1856 }, { "epoch": 0.3184088132541741, "grad_norm": 1.5859375, "learning_rate": 1.950080793718306e-05, "loss": 1.0648, "step": 1857 }, { "epoch": 0.31858027734316396, "grad_norm": 1.578125, "learning_rate": 1.9500244325473888e-05, "loss": 1.0418, "step": 1858 }, { "epoch": 0.3187517414321538, "grad_norm": 1.65625, "learning_rate": 1.949968040392597e-05, "loss": 1.0923, "step": 1859 }, { "epoch": 0.31892320552114367, "grad_norm": 1.671875, "learning_rate": 1.9499116172557703e-05, "loss": 1.1025, "step": 1860 }, { "epoch": 0.31909466961013355, "grad_norm": 1.6328125, "learning_rate": 1.949855163138749e-05, "loss": 1.0503, "step": 1861 }, { "epoch": 0.3192661336991234, "grad_norm": 1.6171875, "learning_rate": 1.9497986780433735e-05, "loss": 1.0073, "step": 1862 }, { "epoch": 0.31943759778811326, "grad_norm": 1.640625, "learning_rate": 1.9497421619714866e-05, "loss": 1.0719, "step": 1863 }, { "epoch": 0.31960906187710314, "grad_norm": 2.03125, "learning_rate": 1.9496856149249315e-05, "loss": 1.0481, "step": 1864 }, { "epoch": 0.31978052596609297, "grad_norm": 1.59375, "learning_rate": 1.9496290369055522e-05, "loss": 1.0105, "step": 1865 }, { "epoch": 0.31995199005508285, "grad_norm": 1.59375, "learning_rate": 1.9495724279151945e-05, "loss": 1.0392, "step": 1866 }, { "epoch": 0.3201234541440727, "grad_norm": 1.53125, "learning_rate": 1.949515787955704e-05, "loss": 1.0413, "step": 1867 }, { "epoch": 0.32029491823306255, "grad_norm": 1.703125, "learning_rate": 1.9494591170289276e-05, "loss": 1.0007, "step": 1868 }, { "epoch": 0.32046638232205243, "grad_norm": 1.59375, "learning_rate": 1.9494024151367145e-05, "loss": 1.0438, "step": 1869 }, { "epoch": 0.32063784641104226, "grad_norm": 1.796875, "learning_rate": 1.9493456822809135e-05, "loss": 1.0791, "step": 1870 }, { "epoch": 0.32080931050003214, "grad_norm": 1.65625, "learning_rate": 1.949288918463375e-05, "loss": 1.0887, "step": 1871 }, { "epoch": 0.320980774589022, "grad_norm": 1.9140625, "learning_rate": 1.9492321236859496e-05, "loss": 1.0292, "step": 1872 }, { "epoch": 0.32115223867801185, "grad_norm": 1.6171875, "learning_rate": 1.9491752979504906e-05, "loss": 1.0404, "step": 1873 }, { "epoch": 0.32132370276700173, "grad_norm": 1.671875, "learning_rate": 1.949118441258851e-05, "loss": 1.0434, "step": 1874 }, { "epoch": 0.3214951668559916, "grad_norm": 1.59375, "learning_rate": 1.9490615536128847e-05, "loss": 0.9624, "step": 1875 }, { "epoch": 0.32166663094498144, "grad_norm": 1.6796875, "learning_rate": 1.9490046350144475e-05, "loss": 1.1028, "step": 1876 }, { "epoch": 0.3218380950339713, "grad_norm": 1.6640625, "learning_rate": 1.948947685465395e-05, "loss": 1.1315, "step": 1877 }, { "epoch": 0.3220095591229612, "grad_norm": 1.5625, "learning_rate": 1.9488907049675858e-05, "loss": 1.0452, "step": 1878 }, { "epoch": 0.322181023211951, "grad_norm": 1.6640625, "learning_rate": 1.9488336935228772e-05, "loss": 1.1242, "step": 1879 }, { "epoch": 0.3223524873009409, "grad_norm": 1.578125, "learning_rate": 1.948776651133129e-05, "loss": 1.0056, "step": 1880 }, { "epoch": 0.3225239513899308, "grad_norm": 1.765625, "learning_rate": 1.9487195778002013e-05, "loss": 1.1483, "step": 1881 }, { "epoch": 0.3226954154789206, "grad_norm": 1.7109375, "learning_rate": 1.9486624735259557e-05, "loss": 1.0993, "step": 1882 }, { "epoch": 0.3228668795679105, "grad_norm": 1.5625, "learning_rate": 1.9486053383122544e-05, "loss": 1.0002, "step": 1883 }, { "epoch": 0.3230383436569004, "grad_norm": 1.7578125, "learning_rate": 1.948548172160961e-05, "loss": 1.0762, "step": 1884 }, { "epoch": 0.3232098077458902, "grad_norm": 1.5703125, "learning_rate": 1.94849097507394e-05, "loss": 1.0094, "step": 1885 }, { "epoch": 0.3233812718348801, "grad_norm": 1.65625, "learning_rate": 1.9484337470530563e-05, "loss": 1.0485, "step": 1886 }, { "epoch": 0.32355273592386996, "grad_norm": 1.75, "learning_rate": 1.9483764881001767e-05, "loss": 1.102, "step": 1887 }, { "epoch": 0.3237242000128598, "grad_norm": 1.6875, "learning_rate": 1.9483191982171686e-05, "loss": 1.0389, "step": 1888 }, { "epoch": 0.32389566410184967, "grad_norm": 1.5078125, "learning_rate": 1.9482618774059007e-05, "loss": 0.9699, "step": 1889 }, { "epoch": 0.32406712819083955, "grad_norm": 1.7109375, "learning_rate": 1.9482045256682415e-05, "loss": 1.1713, "step": 1890 }, { "epoch": 0.3242385922798294, "grad_norm": 1.734375, "learning_rate": 1.9481471430060627e-05, "loss": 1.074, "step": 1891 }, { "epoch": 0.32441005636881926, "grad_norm": 1.7421875, "learning_rate": 1.9480897294212348e-05, "loss": 1.0589, "step": 1892 }, { "epoch": 0.32458152045780914, "grad_norm": 1.6875, "learning_rate": 1.9480322849156307e-05, "loss": 1.0421, "step": 1893 }, { "epoch": 0.32475298454679896, "grad_norm": 1.6015625, "learning_rate": 1.947974809491124e-05, "loss": 1.0185, "step": 1894 }, { "epoch": 0.32492444863578884, "grad_norm": 3.296875, "learning_rate": 1.947917303149589e-05, "loss": 1.1065, "step": 1895 }, { "epoch": 0.3250959127247787, "grad_norm": 1.6171875, "learning_rate": 1.9478597658929012e-05, "loss": 1.0147, "step": 1896 }, { "epoch": 0.32526737681376855, "grad_norm": 2.28125, "learning_rate": 1.947802197722937e-05, "loss": 1.055, "step": 1897 }, { "epoch": 0.32543884090275843, "grad_norm": 1.84375, "learning_rate": 1.9477445986415743e-05, "loss": 1.0291, "step": 1898 }, { "epoch": 0.3256103049917483, "grad_norm": 1.6328125, "learning_rate": 1.947686968650691e-05, "loss": 1.0111, "step": 1899 }, { "epoch": 0.32578176908073814, "grad_norm": 1.59375, "learning_rate": 1.9476293077521674e-05, "loss": 1.0271, "step": 1900 }, { "epoch": 0.325953233169728, "grad_norm": 1.640625, "learning_rate": 1.9475716159478836e-05, "loss": 1.0097, "step": 1901 }, { "epoch": 0.3261246972587179, "grad_norm": 1.7734375, "learning_rate": 1.947513893239721e-05, "loss": 1.0723, "step": 1902 }, { "epoch": 0.3262961613477077, "grad_norm": 1.703125, "learning_rate": 1.947456139629562e-05, "loss": 1.092, "step": 1903 }, { "epoch": 0.3264676254366976, "grad_norm": 1.71875, "learning_rate": 1.947398355119291e-05, "loss": 1.0427, "step": 1904 }, { "epoch": 0.3266390895256875, "grad_norm": 1.453125, "learning_rate": 1.9473405397107917e-05, "loss": 0.9551, "step": 1905 }, { "epoch": 0.3268105536146773, "grad_norm": 1.6328125, "learning_rate": 1.9472826934059506e-05, "loss": 1.0296, "step": 1906 }, { "epoch": 0.3269820177036672, "grad_norm": 1.5859375, "learning_rate": 1.947224816206653e-05, "loss": 1.072, "step": 1907 }, { "epoch": 0.3271534817926571, "grad_norm": 1.6796875, "learning_rate": 1.9471669081147878e-05, "loss": 0.9822, "step": 1908 }, { "epoch": 0.3273249458816469, "grad_norm": 1.640625, "learning_rate": 1.947108969132243e-05, "loss": 1.0869, "step": 1909 }, { "epoch": 0.3274964099706368, "grad_norm": 1.6015625, "learning_rate": 1.947050999260908e-05, "loss": 1.0469, "step": 1910 }, { "epoch": 0.32766787405962666, "grad_norm": 1.671875, "learning_rate": 1.9469929985026738e-05, "loss": 1.1537, "step": 1911 }, { "epoch": 0.3278393381486165, "grad_norm": 1.59375, "learning_rate": 1.9469349668594314e-05, "loss": 1.093, "step": 1912 }, { "epoch": 0.32801080223760637, "grad_norm": 1.6171875, "learning_rate": 1.9468769043330744e-05, "loss": 1.078, "step": 1913 }, { "epoch": 0.32818226632659625, "grad_norm": 1.640625, "learning_rate": 1.946818810925496e-05, "loss": 1.0673, "step": 1914 }, { "epoch": 0.3283537304155861, "grad_norm": 1.671875, "learning_rate": 1.9467606866385905e-05, "loss": 1.105, "step": 1915 }, { "epoch": 0.32852519450457596, "grad_norm": 1.7109375, "learning_rate": 1.946702531474254e-05, "loss": 1.0411, "step": 1916 }, { "epoch": 0.3286966585935658, "grad_norm": 1.578125, "learning_rate": 1.946644345434383e-05, "loss": 0.9658, "step": 1917 }, { "epoch": 0.32886812268255566, "grad_norm": 1.671875, "learning_rate": 1.9465861285208752e-05, "loss": 1.0598, "step": 1918 }, { "epoch": 0.32903958677154554, "grad_norm": 1.6328125, "learning_rate": 1.9465278807356292e-05, "loss": 1.104, "step": 1919 }, { "epoch": 0.32921105086053537, "grad_norm": 1.65625, "learning_rate": 1.9464696020805446e-05, "loss": 1.1023, "step": 1920 }, { "epoch": 0.32938251494952525, "grad_norm": 1.6484375, "learning_rate": 1.9464112925575224e-05, "loss": 1.1455, "step": 1921 }, { "epoch": 0.32955397903851513, "grad_norm": 1.5859375, "learning_rate": 1.9463529521684638e-05, "loss": 1.0643, "step": 1922 }, { "epoch": 0.32972544312750496, "grad_norm": 1.65625, "learning_rate": 1.9462945809152722e-05, "loss": 1.0266, "step": 1923 }, { "epoch": 0.32989690721649484, "grad_norm": 1.6484375, "learning_rate": 1.9462361787998503e-05, "loss": 0.9947, "step": 1924 }, { "epoch": 0.3300683713054847, "grad_norm": 1.625, "learning_rate": 1.9461777458241038e-05, "loss": 1.1242, "step": 1925 }, { "epoch": 0.33023983539447455, "grad_norm": 1.6484375, "learning_rate": 1.9461192819899383e-05, "loss": 1.04, "step": 1926 }, { "epoch": 0.3304112994834644, "grad_norm": 1.6171875, "learning_rate": 1.94606078729926e-05, "loss": 0.9667, "step": 1927 }, { "epoch": 0.3305827635724543, "grad_norm": 1.6328125, "learning_rate": 1.9460022617539765e-05, "loss": 1.0544, "step": 1928 }, { "epoch": 0.33075422766144413, "grad_norm": 1.6328125, "learning_rate": 1.9459437053559974e-05, "loss": 1.0008, "step": 1929 }, { "epoch": 0.330925691750434, "grad_norm": 1.6328125, "learning_rate": 1.9458851181072317e-05, "loss": 1.121, "step": 1930 }, { "epoch": 0.3310971558394239, "grad_norm": 1.6484375, "learning_rate": 1.9458265000095905e-05, "loss": 1.0576, "step": 1931 }, { "epoch": 0.3312686199284137, "grad_norm": 1.6640625, "learning_rate": 1.9457678510649857e-05, "loss": 1.1327, "step": 1932 }, { "epoch": 0.3314400840174036, "grad_norm": 1.53125, "learning_rate": 1.9457091712753298e-05, "loss": 1.0161, "step": 1933 }, { "epoch": 0.3316115481063935, "grad_norm": 1.6328125, "learning_rate": 1.9456504606425365e-05, "loss": 0.9686, "step": 1934 }, { "epoch": 0.3317830121953833, "grad_norm": 1.6015625, "learning_rate": 1.9455917191685207e-05, "loss": 0.9736, "step": 1935 }, { "epoch": 0.3319544762843732, "grad_norm": 1.546875, "learning_rate": 1.945532946855198e-05, "loss": 1.0819, "step": 1936 }, { "epoch": 0.33212594037336307, "grad_norm": 1.609375, "learning_rate": 1.9454741437044858e-05, "loss": 1.1065, "step": 1937 }, { "epoch": 0.3322974044623529, "grad_norm": 1.5859375, "learning_rate": 1.945415309718301e-05, "loss": 0.9961, "step": 1938 }, { "epoch": 0.3324688685513428, "grad_norm": 1.6875, "learning_rate": 1.945356444898563e-05, "loss": 1.1321, "step": 1939 }, { "epoch": 0.33264033264033266, "grad_norm": 1.6484375, "learning_rate": 1.9452975492471915e-05, "loss": 1.0079, "step": 1940 }, { "epoch": 0.3328117967293225, "grad_norm": 1.5390625, "learning_rate": 1.9452386227661076e-05, "loss": 1.0916, "step": 1941 }, { "epoch": 0.33298326081831237, "grad_norm": 1.6875, "learning_rate": 1.945179665457232e-05, "loss": 1.0289, "step": 1942 }, { "epoch": 0.33315472490730225, "grad_norm": 1.7265625, "learning_rate": 1.945120677322489e-05, "loss": 1.1086, "step": 1943 }, { "epoch": 0.33332618899629207, "grad_norm": 1.625, "learning_rate": 1.9450616583638013e-05, "loss": 0.9818, "step": 1944 }, { "epoch": 0.33349765308528195, "grad_norm": 1.5703125, "learning_rate": 1.9450026085830946e-05, "loss": 1.0208, "step": 1945 }, { "epoch": 0.33366911717427183, "grad_norm": 1.6484375, "learning_rate": 1.9449435279822934e-05, "loss": 1.0519, "step": 1946 }, { "epoch": 0.33384058126326166, "grad_norm": 1.609375, "learning_rate": 1.9448844165633264e-05, "loss": 1.0612, "step": 1947 }, { "epoch": 0.33401204535225154, "grad_norm": 1.6328125, "learning_rate": 1.9448252743281196e-05, "loss": 1.0525, "step": 1948 }, { "epoch": 0.3341835094412414, "grad_norm": 1.6875, "learning_rate": 1.9447661012786034e-05, "loss": 1.0689, "step": 1949 }, { "epoch": 0.33435497353023125, "grad_norm": 1.6640625, "learning_rate": 1.9447068974167068e-05, "loss": 1.1055, "step": 1950 }, { "epoch": 0.33452643761922113, "grad_norm": 1.6328125, "learning_rate": 1.9446476627443608e-05, "loss": 0.9484, "step": 1951 }, { "epoch": 0.334697901708211, "grad_norm": 1.5546875, "learning_rate": 1.9445883972634973e-05, "loss": 1.0176, "step": 1952 }, { "epoch": 0.33486936579720084, "grad_norm": 1.5078125, "learning_rate": 1.9445291009760493e-05, "loss": 0.9543, "step": 1953 }, { "epoch": 0.3350408298861907, "grad_norm": 1.609375, "learning_rate": 1.9444697738839503e-05, "loss": 1.065, "step": 1954 }, { "epoch": 0.3352122939751806, "grad_norm": 1.7734375, "learning_rate": 1.9444104159891353e-05, "loss": 1.0328, "step": 1955 }, { "epoch": 0.3353837580641704, "grad_norm": 1.703125, "learning_rate": 1.9443510272935407e-05, "loss": 1.175, "step": 1956 }, { "epoch": 0.3355552221531603, "grad_norm": 1.6640625, "learning_rate": 1.944291607799103e-05, "loss": 0.9973, "step": 1957 }, { "epoch": 0.3357266862421502, "grad_norm": 1.5625, "learning_rate": 1.94423215750776e-05, "loss": 1.0261, "step": 1958 }, { "epoch": 0.33589815033114, "grad_norm": 1.578125, "learning_rate": 1.9441726764214506e-05, "loss": 1.0332, "step": 1959 }, { "epoch": 0.3360696144201299, "grad_norm": 1.7109375, "learning_rate": 1.9441131645421146e-05, "loss": 1.091, "step": 1960 }, { "epoch": 0.3362410785091198, "grad_norm": 1.6796875, "learning_rate": 1.9440536218716934e-05, "loss": 1.0468, "step": 1961 }, { "epoch": 0.3364125425981096, "grad_norm": 1.671875, "learning_rate": 1.9439940484121287e-05, "loss": 1.0747, "step": 1962 }, { "epoch": 0.3365840066870995, "grad_norm": 1.59375, "learning_rate": 1.943934444165363e-05, "loss": 1.0025, "step": 1963 }, { "epoch": 0.3367554707760893, "grad_norm": 1.625, "learning_rate": 1.943874809133341e-05, "loss": 1.0103, "step": 1964 }, { "epoch": 0.3369269348650792, "grad_norm": 1.5859375, "learning_rate": 1.9438151433180068e-05, "loss": 1.0693, "step": 1965 }, { "epoch": 0.33709839895406907, "grad_norm": 1.546875, "learning_rate": 1.9437554467213066e-05, "loss": 1.0221, "step": 1966 }, { "epoch": 0.3372698630430589, "grad_norm": 1.5390625, "learning_rate": 1.943695719345188e-05, "loss": 1.1032, "step": 1967 }, { "epoch": 0.3374413271320488, "grad_norm": 1.578125, "learning_rate": 1.9436359611915978e-05, "loss": 1.0418, "step": 1968 }, { "epoch": 0.33761279122103866, "grad_norm": 1.6640625, "learning_rate": 1.943576172262486e-05, "loss": 1.0459, "step": 1969 }, { "epoch": 0.3377842553100285, "grad_norm": 1.6484375, "learning_rate": 1.9435163525598017e-05, "loss": 0.9038, "step": 1970 }, { "epoch": 0.33795571939901836, "grad_norm": 1.6953125, "learning_rate": 1.943456502085496e-05, "loss": 1.061, "step": 1971 }, { "epoch": 0.33812718348800824, "grad_norm": 1.625, "learning_rate": 1.9433966208415212e-05, "loss": 1.0721, "step": 1972 }, { "epoch": 0.33829864757699807, "grad_norm": 1.5703125, "learning_rate": 1.94333670882983e-05, "loss": 1.0833, "step": 1973 }, { "epoch": 0.33847011166598795, "grad_norm": 1.4921875, "learning_rate": 1.9432767660523768e-05, "loss": 0.9957, "step": 1974 }, { "epoch": 0.33864157575497783, "grad_norm": 1.609375, "learning_rate": 1.943216792511116e-05, "loss": 0.9921, "step": 1975 }, { "epoch": 0.33881303984396766, "grad_norm": 1.6640625, "learning_rate": 1.9431567882080042e-05, "loss": 1.002, "step": 1976 }, { "epoch": 0.33898450393295754, "grad_norm": 1.6953125, "learning_rate": 1.9430967531449973e-05, "loss": 1.1183, "step": 1977 }, { "epoch": 0.3391559680219474, "grad_norm": 1.65625, "learning_rate": 1.943036687324054e-05, "loss": 1.0156, "step": 1978 }, { "epoch": 0.33932743211093724, "grad_norm": 1.671875, "learning_rate": 1.9429765907471336e-05, "loss": 1.1207, "step": 1979 }, { "epoch": 0.3394988961999271, "grad_norm": 1.7265625, "learning_rate": 1.942916463416195e-05, "loss": 1.07, "step": 1980 }, { "epoch": 0.339670360288917, "grad_norm": 1.6015625, "learning_rate": 1.9428563053332004e-05, "loss": 0.9844, "step": 1981 }, { "epoch": 0.33984182437790683, "grad_norm": 1.59375, "learning_rate": 1.942796116500111e-05, "loss": 1.0781, "step": 1982 }, { "epoch": 0.3400132884668967, "grad_norm": 1.6484375, "learning_rate": 1.94273589691889e-05, "loss": 1.078, "step": 1983 }, { "epoch": 0.3401847525558866, "grad_norm": 1.6875, "learning_rate": 1.9426756465915014e-05, "loss": 1.1393, "step": 1984 }, { "epoch": 0.3403562166448764, "grad_norm": 1.6015625, "learning_rate": 1.94261536551991e-05, "loss": 1.0316, "step": 1985 }, { "epoch": 0.3405276807338663, "grad_norm": 1.578125, "learning_rate": 1.9425550537060826e-05, "loss": 1.1531, "step": 1986 }, { "epoch": 0.3406991448228562, "grad_norm": 1.5546875, "learning_rate": 1.942494711151985e-05, "loss": 1.0325, "step": 1987 }, { "epoch": 0.340870608911846, "grad_norm": 1.640625, "learning_rate": 1.9424343378595857e-05, "loss": 1.0903, "step": 1988 }, { "epoch": 0.3410420730008359, "grad_norm": 1.65625, "learning_rate": 1.942373933830854e-05, "loss": 1.092, "step": 1989 }, { "epoch": 0.34121353708982577, "grad_norm": 1.5625, "learning_rate": 1.9423134990677596e-05, "loss": 1.032, "step": 1990 }, { "epoch": 0.3413850011788156, "grad_norm": 1.625, "learning_rate": 1.9422530335722736e-05, "loss": 1.0583, "step": 1991 }, { "epoch": 0.3415564652678055, "grad_norm": 1.6015625, "learning_rate": 1.942192537346368e-05, "loss": 1.0195, "step": 1992 }, { "epoch": 0.34172792935679536, "grad_norm": 1.6640625, "learning_rate": 1.942132010392016e-05, "loss": 1.1263, "step": 1993 }, { "epoch": 0.3418993934457852, "grad_norm": 1.6171875, "learning_rate": 1.9420714527111907e-05, "loss": 1.1151, "step": 1994 }, { "epoch": 0.34207085753477506, "grad_norm": 1.640625, "learning_rate": 1.9420108643058685e-05, "loss": 1.0326, "step": 1995 }, { "epoch": 0.34224232162376494, "grad_norm": 1.578125, "learning_rate": 1.9419502451780243e-05, "loss": 0.9469, "step": 1996 }, { "epoch": 0.34241378571275477, "grad_norm": 1.546875, "learning_rate": 1.941889595329636e-05, "loss": 1.0316, "step": 1997 }, { "epoch": 0.34258524980174465, "grad_norm": 1.6171875, "learning_rate": 1.9418289147626804e-05, "loss": 1.069, "step": 1998 }, { "epoch": 0.34275671389073453, "grad_norm": 1.59375, "learning_rate": 1.941768203479138e-05, "loss": 1.049, "step": 1999 }, { "epoch": 0.34292817797972436, "grad_norm": 1.6171875, "learning_rate": 1.941707461480988e-05, "loss": 1.0452, "step": 2000 }, { "epoch": 0.34309964206871424, "grad_norm": 1.640625, "learning_rate": 1.9416466887702116e-05, "loss": 1.0781, "step": 2001 }, { "epoch": 0.3432711061577041, "grad_norm": 1.671875, "learning_rate": 1.9415858853487904e-05, "loss": 0.991, "step": 2002 }, { "epoch": 0.34344257024669395, "grad_norm": 1.5703125, "learning_rate": 1.941525051218708e-05, "loss": 1.0131, "step": 2003 }, { "epoch": 0.3436140343356838, "grad_norm": 2.25, "learning_rate": 1.9414641863819484e-05, "loss": 1.1457, "step": 2004 }, { "epoch": 0.3437854984246737, "grad_norm": 1.6171875, "learning_rate": 1.9414032908404962e-05, "loss": 1.1031, "step": 2005 }, { "epoch": 0.34395696251366353, "grad_norm": 1.671875, "learning_rate": 1.9413423645963378e-05, "loss": 1.1239, "step": 2006 }, { "epoch": 0.3441284266026534, "grad_norm": 1.578125, "learning_rate": 1.94128140765146e-05, "loss": 1.0762, "step": 2007 }, { "epoch": 0.3442998906916433, "grad_norm": 1.5625, "learning_rate": 1.9412204200078514e-05, "loss": 1.0155, "step": 2008 }, { "epoch": 0.3444713547806331, "grad_norm": 1.6328125, "learning_rate": 1.9411594016675e-05, "loss": 1.1015, "step": 2009 }, { "epoch": 0.344642818869623, "grad_norm": 1.703125, "learning_rate": 1.9410983526323964e-05, "loss": 1.1553, "step": 2010 }, { "epoch": 0.3448142829586129, "grad_norm": 1.6171875, "learning_rate": 1.941037272904532e-05, "loss": 1.1046, "step": 2011 }, { "epoch": 0.3449857470476027, "grad_norm": 1.5234375, "learning_rate": 1.940976162485899e-05, "loss": 1.0013, "step": 2012 }, { "epoch": 0.3451572111365926, "grad_norm": 1.6875, "learning_rate": 1.9409150213784892e-05, "loss": 1.0267, "step": 2013 }, { "epoch": 0.3453286752255824, "grad_norm": 1.546875, "learning_rate": 1.9408538495842973e-05, "loss": 1.0041, "step": 2014 }, { "epoch": 0.3455001393145723, "grad_norm": 1.6015625, "learning_rate": 1.9407926471053188e-05, "loss": 1.02, "step": 2015 }, { "epoch": 0.3456716034035622, "grad_norm": 1.6171875, "learning_rate": 1.9407314139435495e-05, "loss": 1.0167, "step": 2016 }, { "epoch": 0.345843067492552, "grad_norm": 1.7109375, "learning_rate": 1.9406701501009862e-05, "loss": 1.0526, "step": 2017 }, { "epoch": 0.3460145315815419, "grad_norm": 1.6328125, "learning_rate": 1.9406088555796268e-05, "loss": 1.0379, "step": 2018 }, { "epoch": 0.34618599567053177, "grad_norm": 1.625, "learning_rate": 1.940547530381471e-05, "loss": 1.0715, "step": 2019 }, { "epoch": 0.3463574597595216, "grad_norm": 1.6328125, "learning_rate": 1.9404861745085184e-05, "loss": 1.0622, "step": 2020 }, { "epoch": 0.34652892384851147, "grad_norm": 1.5859375, "learning_rate": 1.94042478796277e-05, "loss": 0.9886, "step": 2021 }, { "epoch": 0.34670038793750135, "grad_norm": 1.515625, "learning_rate": 1.9403633707462282e-05, "loss": 1.0259, "step": 2022 }, { "epoch": 0.3468718520264912, "grad_norm": 1.703125, "learning_rate": 1.9403019228608953e-05, "loss": 0.9734, "step": 2023 }, { "epoch": 0.34704331611548106, "grad_norm": 1.59375, "learning_rate": 1.9402404443087763e-05, "loss": 1.0332, "step": 2024 }, { "epoch": 0.34721478020447094, "grad_norm": 1.625, "learning_rate": 1.9401789350918755e-05, "loss": 1.0274, "step": 2025 }, { "epoch": 0.34738624429346077, "grad_norm": 1.7265625, "learning_rate": 1.9401173952121993e-05, "loss": 1.1486, "step": 2026 }, { "epoch": 0.34755770838245065, "grad_norm": 1.5859375, "learning_rate": 1.9400558246717547e-05, "loss": 1.0557, "step": 2027 }, { "epoch": 0.34772917247144053, "grad_norm": 1.578125, "learning_rate": 1.93999422347255e-05, "loss": 0.9885, "step": 2028 }, { "epoch": 0.34790063656043035, "grad_norm": 1.5625, "learning_rate": 1.9399325916165937e-05, "loss": 1.1045, "step": 2029 }, { "epoch": 0.34807210064942024, "grad_norm": 1.53125, "learning_rate": 1.939870929105896e-05, "loss": 0.9841, "step": 2030 }, { "epoch": 0.3482435647384101, "grad_norm": 1.5859375, "learning_rate": 1.9398092359424683e-05, "loss": 0.9914, "step": 2031 }, { "epoch": 0.34841502882739994, "grad_norm": 1.5703125, "learning_rate": 1.9397475121283226e-05, "loss": 1.0192, "step": 2032 }, { "epoch": 0.3485864929163898, "grad_norm": 1.6015625, "learning_rate": 1.9396857576654714e-05, "loss": 1.0977, "step": 2033 }, { "epoch": 0.3487579570053797, "grad_norm": 1.5703125, "learning_rate": 1.9396239725559294e-05, "loss": 1.0574, "step": 2034 }, { "epoch": 0.34892942109436953, "grad_norm": 1.5859375, "learning_rate": 1.939562156801711e-05, "loss": 1.0004, "step": 2035 }, { "epoch": 0.3491008851833594, "grad_norm": 1.6171875, "learning_rate": 1.939500310404833e-05, "loss": 1.0306, "step": 2036 }, { "epoch": 0.3492723492723493, "grad_norm": 1.65625, "learning_rate": 1.9394384333673117e-05, "loss": 1.0474, "step": 2037 }, { "epoch": 0.3494438133613391, "grad_norm": 1.6640625, "learning_rate": 1.9393765256911657e-05, "loss": 1.0378, "step": 2038 }, { "epoch": 0.349615277450329, "grad_norm": 1.671875, "learning_rate": 1.9393145873784137e-05, "loss": 1.0387, "step": 2039 }, { "epoch": 0.3497867415393189, "grad_norm": 1.6953125, "learning_rate": 1.939252618431076e-05, "loss": 1.0389, "step": 2040 }, { "epoch": 0.3499582056283087, "grad_norm": 1.7578125, "learning_rate": 1.9391906188511736e-05, "loss": 1.041, "step": 2041 }, { "epoch": 0.3501296697172986, "grad_norm": 1.6171875, "learning_rate": 1.939128588640728e-05, "loss": 1.1094, "step": 2042 }, { "epoch": 0.35030113380628847, "grad_norm": 1.5859375, "learning_rate": 1.9390665278017635e-05, "loss": 1.0563, "step": 2043 }, { "epoch": 0.3504725978952783, "grad_norm": 1.5625, "learning_rate": 1.939004436336303e-05, "loss": 1.0217, "step": 2044 }, { "epoch": 0.3506440619842682, "grad_norm": 1.6328125, "learning_rate": 1.9389423142463715e-05, "loss": 1.0005, "step": 2045 }, { "epoch": 0.35081552607325805, "grad_norm": 1.7265625, "learning_rate": 1.9388801615339955e-05, "loss": 1.0865, "step": 2046 }, { "epoch": 0.3509869901622479, "grad_norm": 1.6171875, "learning_rate": 1.9388179782012023e-05, "loss": 1.0456, "step": 2047 }, { "epoch": 0.35115845425123776, "grad_norm": 1.5625, "learning_rate": 1.9387557642500192e-05, "loss": 1.0332, "step": 2048 }, { "epoch": 0.35132991834022764, "grad_norm": 1.6875, "learning_rate": 1.9386935196824756e-05, "loss": 1.057, "step": 2049 }, { "epoch": 0.35150138242921747, "grad_norm": 1.71875, "learning_rate": 1.938631244500602e-05, "loss": 1.1701, "step": 2050 }, { "epoch": 0.35167284651820735, "grad_norm": 1.640625, "learning_rate": 1.9385689387064285e-05, "loss": 0.9727, "step": 2051 }, { "epoch": 0.35184431060719723, "grad_norm": 1.6640625, "learning_rate": 1.9385066023019878e-05, "loss": 1.0281, "step": 2052 }, { "epoch": 0.35201577469618706, "grad_norm": 1.6015625, "learning_rate": 1.9384442352893125e-05, "loss": 1.0045, "step": 2053 }, { "epoch": 0.35218723878517694, "grad_norm": 1.5859375, "learning_rate": 1.938381837670437e-05, "loss": 1.0382, "step": 2054 }, { "epoch": 0.3523587028741668, "grad_norm": 1.6484375, "learning_rate": 1.938319409447396e-05, "loss": 1.0064, "step": 2055 }, { "epoch": 0.35253016696315664, "grad_norm": 1.6171875, "learning_rate": 1.938256950622226e-05, "loss": 1.0506, "step": 2056 }, { "epoch": 0.3527016310521465, "grad_norm": 1.5703125, "learning_rate": 1.938194461196963e-05, "loss": 1.0538, "step": 2057 }, { "epoch": 0.3528730951411364, "grad_norm": 1.6015625, "learning_rate": 1.9381319411736467e-05, "loss": 1.0444, "step": 2058 }, { "epoch": 0.35304455923012623, "grad_norm": 1.5390625, "learning_rate": 1.9380693905543144e-05, "loss": 1.0035, "step": 2059 }, { "epoch": 0.3532160233191161, "grad_norm": 1.7578125, "learning_rate": 1.938006809341007e-05, "loss": 1.1376, "step": 2060 }, { "epoch": 0.35338748740810594, "grad_norm": 1.671875, "learning_rate": 1.9379441975357654e-05, "loss": 1.0271, "step": 2061 }, { "epoch": 0.3535589514970958, "grad_norm": 1.609375, "learning_rate": 1.9378815551406315e-05, "loss": 1.0318, "step": 2062 }, { "epoch": 0.3537304155860857, "grad_norm": 1.6015625, "learning_rate": 1.9378188821576484e-05, "loss": 1.1232, "step": 2063 }, { "epoch": 0.3539018796750755, "grad_norm": 1.6328125, "learning_rate": 1.9377561785888602e-05, "loss": 0.9949, "step": 2064 }, { "epoch": 0.3540733437640654, "grad_norm": 1.6328125, "learning_rate": 1.9376934444363114e-05, "loss": 1.1025, "step": 2065 }, { "epoch": 0.3542448078530553, "grad_norm": 1.6484375, "learning_rate": 1.9376306797020486e-05, "loss": 1.1526, "step": 2066 }, { "epoch": 0.3544162719420451, "grad_norm": 20.375, "learning_rate": 1.9375678843881186e-05, "loss": 1.1018, "step": 2067 }, { "epoch": 0.354587736031035, "grad_norm": 1.53125, "learning_rate": 1.937505058496569e-05, "loss": 1.0327, "step": 2068 }, { "epoch": 0.3547592001200249, "grad_norm": 1.5625, "learning_rate": 1.937442202029449e-05, "loss": 1.0052, "step": 2069 }, { "epoch": 0.3549306642090147, "grad_norm": 1.6484375, "learning_rate": 1.9373793149888092e-05, "loss": 1.0494, "step": 2070 }, { "epoch": 0.3551021282980046, "grad_norm": 1.7109375, "learning_rate": 1.9373163973767e-05, "loss": 1.0583, "step": 2071 }, { "epoch": 0.35527359238699446, "grad_norm": 1.8203125, "learning_rate": 1.937253449195174e-05, "loss": 1.0328, "step": 2072 }, { "epoch": 0.3554450564759843, "grad_norm": 1.5703125, "learning_rate": 1.9371904704462826e-05, "loss": 1.0797, "step": 2073 }, { "epoch": 0.35561652056497417, "grad_norm": 1.5625, "learning_rate": 1.9371274611320813e-05, "loss": 1.0437, "step": 2074 }, { "epoch": 0.35578798465396405, "grad_norm": 1.8203125, "learning_rate": 1.9370644212546246e-05, "loss": 1.0953, "step": 2075 }, { "epoch": 0.3559594487429539, "grad_norm": 1.953125, "learning_rate": 1.9370013508159685e-05, "loss": 1.1619, "step": 2076 }, { "epoch": 0.35613091283194376, "grad_norm": 1.7421875, "learning_rate": 1.9369382498181698e-05, "loss": 1.0185, "step": 2077 }, { "epoch": 0.35630237692093364, "grad_norm": 1.75, "learning_rate": 1.936875118263287e-05, "loss": 1.112, "step": 2078 }, { "epoch": 0.35647384100992346, "grad_norm": 1.7109375, "learning_rate": 1.936811956153378e-05, "loss": 1.0548, "step": 2079 }, { "epoch": 0.35664530509891335, "grad_norm": 1.6484375, "learning_rate": 1.936748763490504e-05, "loss": 1.0316, "step": 2080 }, { "epoch": 0.3568167691879032, "grad_norm": 1.6953125, "learning_rate": 1.936685540276725e-05, "loss": 0.9346, "step": 2081 }, { "epoch": 0.35698823327689305, "grad_norm": 1.6953125, "learning_rate": 1.9366222865141032e-05, "loss": 1.0405, "step": 2082 }, { "epoch": 0.35715969736588293, "grad_norm": 1.6015625, "learning_rate": 1.936559002204702e-05, "loss": 0.9518, "step": 2083 }, { "epoch": 0.3573311614548728, "grad_norm": 1.703125, "learning_rate": 1.936495687350585e-05, "loss": 1.0378, "step": 2084 }, { "epoch": 0.35750262554386264, "grad_norm": 1.6015625, "learning_rate": 1.9364323419538166e-05, "loss": 0.9886, "step": 2085 }, { "epoch": 0.3576740896328525, "grad_norm": 1.7578125, "learning_rate": 1.9363689660164637e-05, "loss": 1.0009, "step": 2086 }, { "epoch": 0.3578455537218424, "grad_norm": 1.6015625, "learning_rate": 1.9363055595405928e-05, "loss": 0.9917, "step": 2087 }, { "epoch": 0.3580170178108322, "grad_norm": 1.609375, "learning_rate": 1.9362421225282717e-05, "loss": 1.0459, "step": 2088 }, { "epoch": 0.3581884818998221, "grad_norm": 1.65625, "learning_rate": 1.936178654981569e-05, "loss": 1.1376, "step": 2089 }, { "epoch": 0.358359945988812, "grad_norm": 1.53125, "learning_rate": 1.9361151569025556e-05, "loss": 1.0498, "step": 2090 }, { "epoch": 0.3585314100778018, "grad_norm": 1.6171875, "learning_rate": 1.936051628293302e-05, "loss": 1.0736, "step": 2091 }, { "epoch": 0.3587028741667917, "grad_norm": 1.6171875, "learning_rate": 1.9359880691558796e-05, "loss": 1.1096, "step": 2092 }, { "epoch": 0.3588743382557816, "grad_norm": 1.6640625, "learning_rate": 1.9359244794923615e-05, "loss": 1.001, "step": 2093 }, { "epoch": 0.3590458023447714, "grad_norm": 1.7265625, "learning_rate": 1.935860859304822e-05, "loss": 1.0068, "step": 2094 }, { "epoch": 0.3592172664337613, "grad_norm": 1.6953125, "learning_rate": 1.9357972085953356e-05, "loss": 1.0759, "step": 2095 }, { "epoch": 0.35938873052275117, "grad_norm": 1.5859375, "learning_rate": 1.9357335273659785e-05, "loss": 1.0363, "step": 2096 }, { "epoch": 0.359560194611741, "grad_norm": 1.546875, "learning_rate": 1.9356698156188273e-05, "loss": 0.9934, "step": 2097 }, { "epoch": 0.35973165870073087, "grad_norm": 1.7578125, "learning_rate": 1.9356060733559602e-05, "loss": 1.1027, "step": 2098 }, { "epoch": 0.35990312278972075, "grad_norm": 1.734375, "learning_rate": 1.9355423005794556e-05, "loss": 1.0325, "step": 2099 }, { "epoch": 0.3600745868787106, "grad_norm": 1.6796875, "learning_rate": 1.9354784972913938e-05, "loss": 1.116, "step": 2100 }, { "epoch": 0.3600745868787106, "eval_loss": 0.8986930847167969, "eval_runtime": 837.0926, "eval_samples_per_second": 2.985, "eval_steps_per_second": 2.985, "step": 2100 }, { "epoch": 0.36024605096770046, "grad_norm": 1.625, "learning_rate": 1.9354146634938558e-05, "loss": 1.1254, "step": 2101 }, { "epoch": 0.36041751505669034, "grad_norm": 1.6640625, "learning_rate": 1.9353507991889228e-05, "loss": 1.0357, "step": 2102 }, { "epoch": 0.36058897914568017, "grad_norm": 1.6328125, "learning_rate": 1.935286904378678e-05, "loss": 1.0318, "step": 2103 }, { "epoch": 0.36076044323467005, "grad_norm": 1.640625, "learning_rate": 1.9352229790652056e-05, "loss": 1.0231, "step": 2104 }, { "epoch": 0.36093190732365993, "grad_norm": 1.765625, "learning_rate": 1.9351590232505898e-05, "loss": 1.0675, "step": 2105 }, { "epoch": 0.36110337141264975, "grad_norm": 1.65625, "learning_rate": 1.935095036936917e-05, "loss": 1.0321, "step": 2106 }, { "epoch": 0.36127483550163964, "grad_norm": 1.6796875, "learning_rate": 1.9350310201262737e-05, "loss": 1.0532, "step": 2107 }, { "epoch": 0.36144629959062946, "grad_norm": 1.625, "learning_rate": 1.934966972820748e-05, "loss": 1.0822, "step": 2108 }, { "epoch": 0.36161776367961934, "grad_norm": 1.5859375, "learning_rate": 1.934902895022428e-05, "loss": 1.0376, "step": 2109 }, { "epoch": 0.3617892277686092, "grad_norm": 1.6484375, "learning_rate": 1.9348387867334047e-05, "loss": 1.0499, "step": 2110 }, { "epoch": 0.36196069185759905, "grad_norm": 1.6171875, "learning_rate": 1.934774647955768e-05, "loss": 1.0162, "step": 2111 }, { "epoch": 0.36213215594658893, "grad_norm": 1.734375, "learning_rate": 1.9347104786916103e-05, "loss": 1.042, "step": 2112 }, { "epoch": 0.3623036200355788, "grad_norm": 1.671875, "learning_rate": 1.934646278943024e-05, "loss": 1.057, "step": 2113 }, { "epoch": 0.36247508412456864, "grad_norm": 1.703125, "learning_rate": 1.9345820487121026e-05, "loss": 0.9956, "step": 2114 }, { "epoch": 0.3626465482135585, "grad_norm": 1.5546875, "learning_rate": 1.9345177880009417e-05, "loss": 0.9508, "step": 2115 }, { "epoch": 0.3628180123025484, "grad_norm": 1.8359375, "learning_rate": 1.9344534968116365e-05, "loss": 1.1172, "step": 2116 }, { "epoch": 0.3629894763915382, "grad_norm": 1.6875, "learning_rate": 1.9343891751462838e-05, "loss": 1.1314, "step": 2117 }, { "epoch": 0.3631609404805281, "grad_norm": 1.5703125, "learning_rate": 1.9343248230069815e-05, "loss": 1.0684, "step": 2118 }, { "epoch": 0.363332404569518, "grad_norm": 1.625, "learning_rate": 1.9342604403958287e-05, "loss": 0.9849, "step": 2119 }, { "epoch": 0.3635038686585078, "grad_norm": 1.5390625, "learning_rate": 1.9341960273149246e-05, "loss": 1.0186, "step": 2120 }, { "epoch": 0.3636753327474977, "grad_norm": 1.6640625, "learning_rate": 1.9341315837663704e-05, "loss": 1.0682, "step": 2121 }, { "epoch": 0.3638467968364876, "grad_norm": 1.671875, "learning_rate": 1.9340671097522678e-05, "loss": 1.0753, "step": 2122 }, { "epoch": 0.3640182609254774, "grad_norm": 1.6484375, "learning_rate": 1.934002605274719e-05, "loss": 1.1382, "step": 2123 }, { "epoch": 0.3641897250144673, "grad_norm": 1.5625, "learning_rate": 1.933938070335828e-05, "loss": 1.0476, "step": 2124 }, { "epoch": 0.36436118910345716, "grad_norm": 1.5859375, "learning_rate": 1.9338735049377e-05, "loss": 1.0797, "step": 2125 }, { "epoch": 0.364532653192447, "grad_norm": 1.53125, "learning_rate": 1.9338089090824402e-05, "loss": 1.0387, "step": 2126 }, { "epoch": 0.36470411728143687, "grad_norm": 1.5859375, "learning_rate": 1.9337442827721555e-05, "loss": 1.0245, "step": 2127 }, { "epoch": 0.36487558137042675, "grad_norm": 1.6015625, "learning_rate": 1.9336796260089534e-05, "loss": 1.0181, "step": 2128 }, { "epoch": 0.3650470454594166, "grad_norm": 1.65625, "learning_rate": 1.933614938794943e-05, "loss": 1.0161, "step": 2129 }, { "epoch": 0.36521850954840646, "grad_norm": 1.75, "learning_rate": 1.933550221132234e-05, "loss": 1.1429, "step": 2130 }, { "epoch": 0.36538997363739634, "grad_norm": 1.75, "learning_rate": 1.933485473022936e-05, "loss": 1.0651, "step": 2131 }, { "epoch": 0.36556143772638616, "grad_norm": 1.5703125, "learning_rate": 1.9334206944691626e-05, "loss": 1.0549, "step": 2132 }, { "epoch": 0.36573290181537604, "grad_norm": 1.6796875, "learning_rate": 1.933355885473025e-05, "loss": 1.0231, "step": 2133 }, { "epoch": 0.3659043659043659, "grad_norm": 1.640625, "learning_rate": 1.933291046036637e-05, "loss": 1.0812, "step": 2134 }, { "epoch": 0.36607582999335575, "grad_norm": 1.6953125, "learning_rate": 1.9332261761621138e-05, "loss": 1.1038, "step": 2135 }, { "epoch": 0.36624729408234563, "grad_norm": 1.6328125, "learning_rate": 1.9331612758515706e-05, "loss": 1.0191, "step": 2136 }, { "epoch": 0.3664187581713355, "grad_norm": 1.640625, "learning_rate": 1.933096345107125e-05, "loss": 1.0076, "step": 2137 }, { "epoch": 0.36659022226032534, "grad_norm": 1.53125, "learning_rate": 1.933031383930893e-05, "loss": 1.016, "step": 2138 }, { "epoch": 0.3667616863493152, "grad_norm": 1.8984375, "learning_rate": 1.9329663923249945e-05, "loss": 1.0134, "step": 2139 }, { "epoch": 0.3669331504383051, "grad_norm": 1.8046875, "learning_rate": 1.9329013702915485e-05, "loss": 1.2297, "step": 2140 }, { "epoch": 0.3671046145272949, "grad_norm": 1.609375, "learning_rate": 1.932836317832676e-05, "loss": 0.9963, "step": 2141 }, { "epoch": 0.3672760786162848, "grad_norm": 1.6328125, "learning_rate": 1.9327712349504982e-05, "loss": 1.0519, "step": 2142 }, { "epoch": 0.3674475427052747, "grad_norm": 1.734375, "learning_rate": 1.9327061216471382e-05, "loss": 1.0634, "step": 2143 }, { "epoch": 0.3676190067942645, "grad_norm": 1.609375, "learning_rate": 1.932640977924719e-05, "loss": 1.117, "step": 2144 }, { "epoch": 0.3677904708832544, "grad_norm": 1.609375, "learning_rate": 1.932575803785366e-05, "loss": 1.0166, "step": 2145 }, { "epoch": 0.3679619349722443, "grad_norm": 1.7265625, "learning_rate": 1.932510599231204e-05, "loss": 1.1124, "step": 2146 }, { "epoch": 0.3681333990612341, "grad_norm": 1.6015625, "learning_rate": 1.9324453642643598e-05, "loss": 0.9047, "step": 2147 }, { "epoch": 0.368304863150224, "grad_norm": 1.578125, "learning_rate": 1.932380098886961e-05, "loss": 1.0224, "step": 2148 }, { "epoch": 0.36847632723921386, "grad_norm": 1.734375, "learning_rate": 1.9323148031011363e-05, "loss": 1.0019, "step": 2149 }, { "epoch": 0.3686477913282037, "grad_norm": 1.671875, "learning_rate": 1.932249476909015e-05, "loss": 1.079, "step": 2150 }, { "epoch": 0.36881925541719357, "grad_norm": 1.7109375, "learning_rate": 1.9321841203127277e-05, "loss": 1.0723, "step": 2151 }, { "epoch": 0.36899071950618345, "grad_norm": 1.65625, "learning_rate": 1.9321187333144064e-05, "loss": 0.9337, "step": 2152 }, { "epoch": 0.3691621835951733, "grad_norm": 1.6484375, "learning_rate": 1.9320533159161824e-05, "loss": 1.0433, "step": 2153 }, { "epoch": 0.36933364768416316, "grad_norm": 1.6484375, "learning_rate": 1.9319878681201905e-05, "loss": 1.0609, "step": 2154 }, { "epoch": 0.369505111773153, "grad_norm": 1.703125, "learning_rate": 1.9319223899285643e-05, "loss": 1.0004, "step": 2155 }, { "epoch": 0.36967657586214286, "grad_norm": 1.640625, "learning_rate": 1.93185688134344e-05, "loss": 1.0564, "step": 2156 }, { "epoch": 0.36984803995113275, "grad_norm": 1.8125, "learning_rate": 1.9317913423669535e-05, "loss": 1.1132, "step": 2157 }, { "epoch": 0.37001950404012257, "grad_norm": 1.59375, "learning_rate": 1.931725773001243e-05, "loss": 0.9931, "step": 2158 }, { "epoch": 0.37019096812911245, "grad_norm": 1.6953125, "learning_rate": 1.931660173248446e-05, "loss": 1.121, "step": 2159 }, { "epoch": 0.37036243221810233, "grad_norm": 1.5546875, "learning_rate": 1.931594543110703e-05, "loss": 1.0856, "step": 2160 }, { "epoch": 0.37053389630709216, "grad_norm": 1.6640625, "learning_rate": 1.9315288825901534e-05, "loss": 1.1187, "step": 2161 }, { "epoch": 0.37070536039608204, "grad_norm": 1.7734375, "learning_rate": 1.931463191688939e-05, "loss": 1.0063, "step": 2162 }, { "epoch": 0.3708768244850719, "grad_norm": 1.703125, "learning_rate": 1.9313974704092028e-05, "loss": 1.0472, "step": 2163 }, { "epoch": 0.37104828857406175, "grad_norm": 1.65625, "learning_rate": 1.9313317187530875e-05, "loss": 1.1154, "step": 2164 }, { "epoch": 0.3712197526630516, "grad_norm": 1.7109375, "learning_rate": 1.931265936722738e-05, "loss": 1.0989, "step": 2165 }, { "epoch": 0.3713912167520415, "grad_norm": 1.71875, "learning_rate": 1.931200124320299e-05, "loss": 1.0552, "step": 2166 }, { "epoch": 0.37156268084103133, "grad_norm": 1.6484375, "learning_rate": 1.931134281547918e-05, "loss": 1.1136, "step": 2167 }, { "epoch": 0.3717341449300212, "grad_norm": 1.6953125, "learning_rate": 1.9310684084077413e-05, "loss": 0.9891, "step": 2168 }, { "epoch": 0.3719056090190111, "grad_norm": 1.734375, "learning_rate": 1.931002504901918e-05, "loss": 1.0897, "step": 2169 }, { "epoch": 0.3720770731080009, "grad_norm": 1.6484375, "learning_rate": 1.930936571032597e-05, "loss": 1.0432, "step": 2170 }, { "epoch": 0.3722485371969908, "grad_norm": 1.6484375, "learning_rate": 1.930870606801929e-05, "loss": 0.9868, "step": 2171 }, { "epoch": 0.3724200012859807, "grad_norm": 1.6328125, "learning_rate": 1.9308046122120648e-05, "loss": 1.037, "step": 2172 }, { "epoch": 0.3725914653749705, "grad_norm": 1.59375, "learning_rate": 1.9307385872651574e-05, "loss": 1.0493, "step": 2173 }, { "epoch": 0.3727629294639604, "grad_norm": 1.65625, "learning_rate": 1.9306725319633603e-05, "loss": 1.011, "step": 2174 }, { "epoch": 0.37293439355295027, "grad_norm": 1.578125, "learning_rate": 1.9306064463088267e-05, "loss": 1.0288, "step": 2175 }, { "epoch": 0.3731058576419401, "grad_norm": 1.671875, "learning_rate": 1.9305403303037128e-05, "loss": 1.0481, "step": 2176 }, { "epoch": 0.37327732173093, "grad_norm": 1.6796875, "learning_rate": 1.930474183950174e-05, "loss": 0.994, "step": 2177 }, { "epoch": 0.37344878581991986, "grad_norm": 1.6328125, "learning_rate": 1.930408007250369e-05, "loss": 1.0024, "step": 2178 }, { "epoch": 0.3736202499089097, "grad_norm": 1.609375, "learning_rate": 1.9303418002064553e-05, "loss": 1.0486, "step": 2179 }, { "epoch": 0.37379171399789957, "grad_norm": 1.578125, "learning_rate": 1.9302755628205917e-05, "loss": 1.0745, "step": 2180 }, { "epoch": 0.37396317808688945, "grad_norm": 1.5859375, "learning_rate": 1.9302092950949393e-05, "loss": 1.0946, "step": 2181 }, { "epoch": 0.3741346421758793, "grad_norm": 1.625, "learning_rate": 1.9301429970316588e-05, "loss": 1.0594, "step": 2182 }, { "epoch": 0.37430610626486915, "grad_norm": 1.5234375, "learning_rate": 1.9300766686329124e-05, "loss": 1.0049, "step": 2183 }, { "epoch": 0.37447757035385904, "grad_norm": 1.671875, "learning_rate": 1.9300103099008634e-05, "loss": 1.0796, "step": 2184 }, { "epoch": 0.37464903444284886, "grad_norm": 1.671875, "learning_rate": 1.9299439208376763e-05, "loss": 1.0713, "step": 2185 }, { "epoch": 0.37482049853183874, "grad_norm": 1.59375, "learning_rate": 1.9298775014455163e-05, "loss": 1.1084, "step": 2186 }, { "epoch": 0.3749919626208286, "grad_norm": 1.65625, "learning_rate": 1.9298110517265492e-05, "loss": 0.9409, "step": 2187 }, { "epoch": 0.37516342670981845, "grad_norm": 1.6015625, "learning_rate": 1.9297445716829423e-05, "loss": 0.9509, "step": 2188 }, { "epoch": 0.37533489079880833, "grad_norm": 1.609375, "learning_rate": 1.9296780613168638e-05, "loss": 1.0959, "step": 2189 }, { "epoch": 0.3755063548877982, "grad_norm": 1.59375, "learning_rate": 1.929611520630483e-05, "loss": 1.0904, "step": 2190 }, { "epoch": 0.37567781897678804, "grad_norm": 1.6953125, "learning_rate": 1.92954494962597e-05, "loss": 1.0285, "step": 2191 }, { "epoch": 0.3758492830657779, "grad_norm": 1.5703125, "learning_rate": 1.929478348305496e-05, "loss": 1.0246, "step": 2192 }, { "epoch": 0.3760207471547678, "grad_norm": 1.7578125, "learning_rate": 1.9294117166712322e-05, "loss": 1.1087, "step": 2193 }, { "epoch": 0.3761922112437576, "grad_norm": 1.5859375, "learning_rate": 1.929345054725353e-05, "loss": 1.0125, "step": 2194 }, { "epoch": 0.3763636753327475, "grad_norm": 1.734375, "learning_rate": 1.929278362470032e-05, "loss": 1.0662, "step": 2195 }, { "epoch": 0.3765351394217374, "grad_norm": 1.6171875, "learning_rate": 1.9292116399074442e-05, "loss": 0.9725, "step": 2196 }, { "epoch": 0.3767066035107272, "grad_norm": 1.59375, "learning_rate": 1.9291448870397658e-05, "loss": 0.9811, "step": 2197 }, { "epoch": 0.3768780675997171, "grad_norm": 1.65625, "learning_rate": 1.929078103869174e-05, "loss": 1.0886, "step": 2198 }, { "epoch": 0.377049531688707, "grad_norm": 1.578125, "learning_rate": 1.929011290397846e-05, "loss": 1.0572, "step": 2199 }, { "epoch": 0.3772209957776968, "grad_norm": 1.4765625, "learning_rate": 1.9289444466279622e-05, "loss": 0.9523, "step": 2200 }, { "epoch": 0.3773924598666867, "grad_norm": 1.5390625, "learning_rate": 1.9288775725617014e-05, "loss": 1.0021, "step": 2201 }, { "epoch": 0.3775639239556765, "grad_norm": 1.5546875, "learning_rate": 1.9288106682012456e-05, "loss": 1.1431, "step": 2202 }, { "epoch": 0.3777353880446664, "grad_norm": 1.7578125, "learning_rate": 1.928743733548776e-05, "loss": 1.1734, "step": 2203 }, { "epoch": 0.37790685213365627, "grad_norm": 1.625, "learning_rate": 1.9286767686064757e-05, "loss": 1.0438, "step": 2204 }, { "epoch": 0.3780783162226461, "grad_norm": 1.671875, "learning_rate": 1.9286097733765296e-05, "loss": 1.0226, "step": 2205 }, { "epoch": 0.378249780311636, "grad_norm": 1.7734375, "learning_rate": 1.9285427478611214e-05, "loss": 1.0586, "step": 2206 }, { "epoch": 0.37842124440062586, "grad_norm": 1.609375, "learning_rate": 1.9284756920624378e-05, "loss": 1.0845, "step": 2207 }, { "epoch": 0.3785927084896157, "grad_norm": 1.578125, "learning_rate": 1.9284086059826654e-05, "loss": 1.048, "step": 2208 }, { "epoch": 0.37876417257860556, "grad_norm": 2.296875, "learning_rate": 1.9283414896239924e-05, "loss": 0.9909, "step": 2209 }, { "epoch": 0.37893563666759544, "grad_norm": 1.578125, "learning_rate": 1.928274342988608e-05, "loss": 1.0743, "step": 2210 }, { "epoch": 0.37910710075658527, "grad_norm": 2.671875, "learning_rate": 1.928207166078701e-05, "loss": 1.0515, "step": 2211 }, { "epoch": 0.37927856484557515, "grad_norm": 1.5625, "learning_rate": 1.9281399588964636e-05, "loss": 0.998, "step": 2212 }, { "epoch": 0.37945002893456503, "grad_norm": 1.6640625, "learning_rate": 1.9280727214440865e-05, "loss": 1.0134, "step": 2213 }, { "epoch": 0.37962149302355486, "grad_norm": 1.640625, "learning_rate": 1.928005453723764e-05, "loss": 0.9262, "step": 2214 }, { "epoch": 0.37979295711254474, "grad_norm": 1.7265625, "learning_rate": 1.9279381557376883e-05, "loss": 1.1135, "step": 2215 }, { "epoch": 0.3799644212015346, "grad_norm": 1.59375, "learning_rate": 1.9278708274880556e-05, "loss": 1.1122, "step": 2216 }, { "epoch": 0.38013588529052444, "grad_norm": 1.53125, "learning_rate": 1.927803468977061e-05, "loss": 1.0032, "step": 2217 }, { "epoch": 0.3803073493795143, "grad_norm": 1.6015625, "learning_rate": 1.9277360802069013e-05, "loss": 1.0734, "step": 2218 }, { "epoch": 0.3804788134685042, "grad_norm": 1.6328125, "learning_rate": 1.927668661179775e-05, "loss": 1.0122, "step": 2219 }, { "epoch": 0.38065027755749403, "grad_norm": 1.5, "learning_rate": 1.9276012118978794e-05, "loss": 0.9827, "step": 2220 }, { "epoch": 0.3808217416464839, "grad_norm": 1.6640625, "learning_rate": 1.927533732363416e-05, "loss": 1.1203, "step": 2221 }, { "epoch": 0.3809932057354738, "grad_norm": 1.6171875, "learning_rate": 1.927466222578585e-05, "loss": 1.0789, "step": 2222 }, { "epoch": 0.3811646698244636, "grad_norm": 1.65625, "learning_rate": 1.9273986825455874e-05, "loss": 1.067, "step": 2223 }, { "epoch": 0.3813361339134535, "grad_norm": 1.6171875, "learning_rate": 1.927331112266627e-05, "loss": 1.0309, "step": 2224 }, { "epoch": 0.3815075980024434, "grad_norm": 1.5625, "learning_rate": 1.9272635117439066e-05, "loss": 1.0483, "step": 2225 }, { "epoch": 0.3816790620914332, "grad_norm": 1.5078125, "learning_rate": 1.9271958809796315e-05, "loss": 1.0048, "step": 2226 }, { "epoch": 0.3818505261804231, "grad_norm": 1.625, "learning_rate": 1.9271282199760076e-05, "loss": 1.0654, "step": 2227 }, { "epoch": 0.38202199026941297, "grad_norm": 1.5546875, "learning_rate": 1.9270605287352406e-05, "loss": 1.0612, "step": 2228 }, { "epoch": 0.3821934543584028, "grad_norm": 1.6015625, "learning_rate": 1.9269928072595392e-05, "loss": 1.0226, "step": 2229 }, { "epoch": 0.3823649184473927, "grad_norm": 1.5703125, "learning_rate": 1.926925055551111e-05, "loss": 0.9787, "step": 2230 }, { "epoch": 0.38253638253638256, "grad_norm": 1.53125, "learning_rate": 1.9268572736121668e-05, "loss": 1.0347, "step": 2231 }, { "epoch": 0.3827078466253724, "grad_norm": 1.6015625, "learning_rate": 1.9267894614449168e-05, "loss": 1.1223, "step": 2232 }, { "epoch": 0.38287931071436226, "grad_norm": 1.6015625, "learning_rate": 1.9267216190515726e-05, "loss": 1.0425, "step": 2233 }, { "epoch": 0.38305077480335215, "grad_norm": 1.6484375, "learning_rate": 1.926653746434346e-05, "loss": 1.0454, "step": 2234 }, { "epoch": 0.38322223889234197, "grad_norm": 1.6484375, "learning_rate": 1.9265858435954515e-05, "loss": 0.9754, "step": 2235 }, { "epoch": 0.38339370298133185, "grad_norm": 1.75, "learning_rate": 1.926517910537104e-05, "loss": 1.148, "step": 2236 }, { "epoch": 0.38356516707032173, "grad_norm": 1.7421875, "learning_rate": 1.926449947261518e-05, "loss": 1.041, "step": 2237 }, { "epoch": 0.38373663115931156, "grad_norm": 1.703125, "learning_rate": 1.9263819537709104e-05, "loss": 1.0121, "step": 2238 }, { "epoch": 0.38390809524830144, "grad_norm": 1.703125, "learning_rate": 1.9263139300674994e-05, "loss": 1.0594, "step": 2239 }, { "epoch": 0.3840795593372913, "grad_norm": 1.7265625, "learning_rate": 1.9262458761535026e-05, "loss": 1.0862, "step": 2240 }, { "epoch": 0.38425102342628115, "grad_norm": 1.546875, "learning_rate": 1.92617779203114e-05, "loss": 1.0087, "step": 2241 }, { "epoch": 0.384422487515271, "grad_norm": 1.734375, "learning_rate": 1.9261096777026318e-05, "loss": 1.0964, "step": 2242 }, { "epoch": 0.3845939516042609, "grad_norm": 1.546875, "learning_rate": 1.9260415331701996e-05, "loss": 0.9698, "step": 2243 }, { "epoch": 0.38476541569325073, "grad_norm": 1.609375, "learning_rate": 1.925973358436066e-05, "loss": 0.9941, "step": 2244 }, { "epoch": 0.3849368797822406, "grad_norm": 1.6484375, "learning_rate": 1.9259051535024542e-05, "loss": 1.0449, "step": 2245 }, { "epoch": 0.3851083438712305, "grad_norm": 1.71875, "learning_rate": 1.9258369183715887e-05, "loss": 1.1611, "step": 2246 }, { "epoch": 0.3852798079602203, "grad_norm": 1.6171875, "learning_rate": 1.9257686530456947e-05, "loss": 1.0714, "step": 2247 }, { "epoch": 0.3854512720492102, "grad_norm": 1.5234375, "learning_rate": 1.9257003575269992e-05, "loss": 0.9581, "step": 2248 }, { "epoch": 0.38562273613820003, "grad_norm": 1.578125, "learning_rate": 1.925632031817729e-05, "loss": 1.0036, "step": 2249 }, { "epoch": 0.3857942002271899, "grad_norm": 1.5859375, "learning_rate": 1.9255636759201127e-05, "loss": 1.0602, "step": 2250 }, { "epoch": 0.3859656643161798, "grad_norm": 1.703125, "learning_rate": 1.925495289836379e-05, "loss": 1.1031, "step": 2251 }, { "epoch": 0.3861371284051696, "grad_norm": 1.65625, "learning_rate": 1.9254268735687595e-05, "loss": 1.1401, "step": 2252 }, { "epoch": 0.3863085924941595, "grad_norm": 1.59375, "learning_rate": 1.9253584271194848e-05, "loss": 1.0456, "step": 2253 }, { "epoch": 0.3864800565831494, "grad_norm": 1.6875, "learning_rate": 1.925289950490787e-05, "loss": 1.1156, "step": 2254 }, { "epoch": 0.3866515206721392, "grad_norm": 1.75, "learning_rate": 1.9252214436848996e-05, "loss": 1.0134, "step": 2255 }, { "epoch": 0.3868229847611291, "grad_norm": 1.5859375, "learning_rate": 1.925152906704057e-05, "loss": 1.0402, "step": 2256 }, { "epoch": 0.38699444885011897, "grad_norm": 1.6953125, "learning_rate": 1.925084339550494e-05, "loss": 1.0452, "step": 2257 }, { "epoch": 0.3871659129391088, "grad_norm": 1.671875, "learning_rate": 1.9250157422264477e-05, "loss": 1.0607, "step": 2258 }, { "epoch": 0.3873373770280987, "grad_norm": 1.578125, "learning_rate": 1.9249471147341544e-05, "loss": 1.0349, "step": 2259 }, { "epoch": 0.38750884111708855, "grad_norm": 1.6640625, "learning_rate": 1.9248784570758526e-05, "loss": 0.984, "step": 2260 }, { "epoch": 0.3876803052060784, "grad_norm": 1.671875, "learning_rate": 1.9248097692537815e-05, "loss": 1.0693, "step": 2261 }, { "epoch": 0.38785176929506826, "grad_norm": 1.65625, "learning_rate": 1.9247410512701815e-05, "loss": 1.0794, "step": 2262 }, { "epoch": 0.38802323338405814, "grad_norm": 1.578125, "learning_rate": 1.9246723031272934e-05, "loss": 0.9385, "step": 2263 }, { "epoch": 0.38819469747304797, "grad_norm": 1.578125, "learning_rate": 1.9246035248273598e-05, "loss": 1.04, "step": 2264 }, { "epoch": 0.38836616156203785, "grad_norm": 1.703125, "learning_rate": 1.9245347163726233e-05, "loss": 1.0896, "step": 2265 }, { "epoch": 0.38853762565102773, "grad_norm": 1.625, "learning_rate": 1.9244658777653282e-05, "loss": 1.0744, "step": 2266 }, { "epoch": 0.38870908974001755, "grad_norm": 1.640625, "learning_rate": 1.9243970090077196e-05, "loss": 1.0505, "step": 2267 }, { "epoch": 0.38888055382900744, "grad_norm": 1.5625, "learning_rate": 1.9243281101020436e-05, "loss": 1.0416, "step": 2268 }, { "epoch": 0.3890520179179973, "grad_norm": 1.609375, "learning_rate": 1.9242591810505474e-05, "loss": 0.9873, "step": 2269 }, { "epoch": 0.38922348200698714, "grad_norm": 1.6875, "learning_rate": 1.9241902218554787e-05, "loss": 1.0796, "step": 2270 }, { "epoch": 0.389394946095977, "grad_norm": 1.6640625, "learning_rate": 1.9241212325190867e-05, "loss": 1.1094, "step": 2271 }, { "epoch": 0.3895664101849669, "grad_norm": 1.6484375, "learning_rate": 1.9240522130436213e-05, "loss": 1.0207, "step": 2272 }, { "epoch": 0.38973787427395673, "grad_norm": 1.734375, "learning_rate": 1.9239831634313338e-05, "loss": 0.9764, "step": 2273 }, { "epoch": 0.3899093383629466, "grad_norm": 1.6484375, "learning_rate": 1.923914083684476e-05, "loss": 1.0445, "step": 2274 }, { "epoch": 0.3900808024519365, "grad_norm": 1.6171875, "learning_rate": 1.9238449738053003e-05, "loss": 1.0329, "step": 2275 }, { "epoch": 0.3902522665409263, "grad_norm": 1.671875, "learning_rate": 1.9237758337960616e-05, "loss": 1.0006, "step": 2276 }, { "epoch": 0.3904237306299162, "grad_norm": 1.5625, "learning_rate": 1.9237066636590142e-05, "loss": 0.9341, "step": 2277 }, { "epoch": 0.3905951947189061, "grad_norm": 1.6875, "learning_rate": 1.923637463396414e-05, "loss": 1.0451, "step": 2278 }, { "epoch": 0.3907666588078959, "grad_norm": 1.578125, "learning_rate": 1.9235682330105182e-05, "loss": 1.0136, "step": 2279 }, { "epoch": 0.3909381228968858, "grad_norm": 1.703125, "learning_rate": 1.9234989725035843e-05, "loss": 1.0312, "step": 2280 }, { "epoch": 0.39110958698587567, "grad_norm": 1.6640625, "learning_rate": 1.923429681877872e-05, "loss": 0.9722, "step": 2281 }, { "epoch": 0.3912810510748655, "grad_norm": 1.65625, "learning_rate": 1.9233603611356394e-05, "loss": 1.036, "step": 2282 }, { "epoch": 0.3914525151638554, "grad_norm": 1.671875, "learning_rate": 1.923291010279149e-05, "loss": 1.0469, "step": 2283 }, { "epoch": 0.39162397925284526, "grad_norm": 1.5703125, "learning_rate": 1.923221629310662e-05, "loss": 1.0114, "step": 2284 }, { "epoch": 0.3917954433418351, "grad_norm": 1.6328125, "learning_rate": 1.923152218232441e-05, "loss": 0.9944, "step": 2285 }, { "epoch": 0.39196690743082496, "grad_norm": 1.65625, "learning_rate": 1.9230827770467497e-05, "loss": 0.985, "step": 2286 }, { "epoch": 0.39213837151981484, "grad_norm": 1.6015625, "learning_rate": 1.9230133057558533e-05, "loss": 0.9982, "step": 2287 }, { "epoch": 0.39230983560880467, "grad_norm": 1.671875, "learning_rate": 1.922943804362017e-05, "loss": 1.0644, "step": 2288 }, { "epoch": 0.39248129969779455, "grad_norm": 1.6015625, "learning_rate": 1.922874272867508e-05, "loss": 1.0168, "step": 2289 }, { "epoch": 0.39265276378678443, "grad_norm": 1.5703125, "learning_rate": 1.9228047112745938e-05, "loss": 1.0118, "step": 2290 }, { "epoch": 0.39282422787577426, "grad_norm": 1.53125, "learning_rate": 1.9227351195855428e-05, "loss": 1.0358, "step": 2291 }, { "epoch": 0.39299569196476414, "grad_norm": 1.546875, "learning_rate": 1.9226654978026248e-05, "loss": 0.9959, "step": 2292 }, { "epoch": 0.393167156053754, "grad_norm": 1.578125, "learning_rate": 1.9225958459281105e-05, "loss": 1.0322, "step": 2293 }, { "epoch": 0.39333862014274384, "grad_norm": 1.5703125, "learning_rate": 1.9225261639642717e-05, "loss": 0.9162, "step": 2294 }, { "epoch": 0.3935100842317337, "grad_norm": 1.546875, "learning_rate": 1.922456451913381e-05, "loss": 0.9757, "step": 2295 }, { "epoch": 0.39368154832072355, "grad_norm": 1.6953125, "learning_rate": 1.9223867097777113e-05, "loss": 1.0772, "step": 2296 }, { "epoch": 0.39385301240971343, "grad_norm": 1.5546875, "learning_rate": 1.9223169375595376e-05, "loss": 0.9295, "step": 2297 }, { "epoch": 0.3940244764987033, "grad_norm": 1.6171875, "learning_rate": 1.9222471352611357e-05, "loss": 1.0406, "step": 2298 }, { "epoch": 0.39419594058769314, "grad_norm": 1.578125, "learning_rate": 1.9221773028847817e-05, "loss": 0.9843, "step": 2299 }, { "epoch": 0.394367404676683, "grad_norm": 1.5859375, "learning_rate": 1.9221074404327532e-05, "loss": 1.0198, "step": 2300 }, { "epoch": 0.3945388687656729, "grad_norm": 1.5703125, "learning_rate": 1.922037547907329e-05, "loss": 1.0301, "step": 2301 }, { "epoch": 0.3947103328546627, "grad_norm": 1.9375, "learning_rate": 1.921967625310788e-05, "loss": 0.9537, "step": 2302 }, { "epoch": 0.3948817969436526, "grad_norm": 1.5546875, "learning_rate": 1.9218976726454112e-05, "loss": 0.9385, "step": 2303 }, { "epoch": 0.3950532610326425, "grad_norm": 1.59375, "learning_rate": 1.9218276899134794e-05, "loss": 1.0448, "step": 2304 }, { "epoch": 0.3952247251216323, "grad_norm": 8.9375, "learning_rate": 1.9217576771172756e-05, "loss": 1.1329, "step": 2305 }, { "epoch": 0.3953961892106222, "grad_norm": 1.59375, "learning_rate": 1.9216876342590827e-05, "loss": 0.9946, "step": 2306 }, { "epoch": 0.3955676532996121, "grad_norm": 1.609375, "learning_rate": 1.9216175613411857e-05, "loss": 1.1019, "step": 2307 }, { "epoch": 0.3957391173886019, "grad_norm": 1.59375, "learning_rate": 1.9215474583658693e-05, "loss": 0.9478, "step": 2308 }, { "epoch": 0.3959105814775918, "grad_norm": 1.6875, "learning_rate": 1.92147732533542e-05, "loss": 1.1428, "step": 2309 }, { "epoch": 0.39608204556658166, "grad_norm": 1.5390625, "learning_rate": 1.921407162252125e-05, "loss": 1.1084, "step": 2310 }, { "epoch": 0.3962535096555715, "grad_norm": 1.6796875, "learning_rate": 1.921336969118273e-05, "loss": 1.0258, "step": 2311 }, { "epoch": 0.39642497374456137, "grad_norm": 1.53125, "learning_rate": 1.9212667459361532e-05, "loss": 0.9763, "step": 2312 }, { "epoch": 0.39659643783355125, "grad_norm": 1.6015625, "learning_rate": 1.9211964927080552e-05, "loss": 1.0425, "step": 2313 }, { "epoch": 0.3967679019225411, "grad_norm": 1.640625, "learning_rate": 1.921126209436271e-05, "loss": 1.0274, "step": 2314 }, { "epoch": 0.39693936601153096, "grad_norm": 1.640625, "learning_rate": 1.9210558961230922e-05, "loss": 0.9467, "step": 2315 }, { "epoch": 0.39711083010052084, "grad_norm": 1.65625, "learning_rate": 1.9209855527708127e-05, "loss": 1.0797, "step": 2316 }, { "epoch": 0.39728229418951067, "grad_norm": 1.625, "learning_rate": 1.9209151793817257e-05, "loss": 1.0166, "step": 2317 }, { "epoch": 0.39745375827850055, "grad_norm": 1.6796875, "learning_rate": 1.920844775958127e-05, "loss": 1.0113, "step": 2318 }, { "epoch": 0.3976252223674904, "grad_norm": 1.609375, "learning_rate": 1.9207743425023125e-05, "loss": 1.0353, "step": 2319 }, { "epoch": 0.39779668645648025, "grad_norm": 1.6015625, "learning_rate": 1.9207038790165796e-05, "loss": 1.0908, "step": 2320 }, { "epoch": 0.39796815054547013, "grad_norm": 1.625, "learning_rate": 1.9206333855032262e-05, "loss": 0.9886, "step": 2321 }, { "epoch": 0.39813961463446, "grad_norm": 1.6015625, "learning_rate": 1.9205628619645514e-05, "loss": 1.0451, "step": 2322 }, { "epoch": 0.39831107872344984, "grad_norm": 1.7421875, "learning_rate": 1.9204923084028548e-05, "loss": 1.0668, "step": 2323 }, { "epoch": 0.3984825428124397, "grad_norm": 1.7578125, "learning_rate": 1.920421724820438e-05, "loss": 1.1112, "step": 2324 }, { "epoch": 0.3986540069014296, "grad_norm": 1.59375, "learning_rate": 1.9203511112196026e-05, "loss": 1.0489, "step": 2325 }, { "epoch": 0.39882547099041943, "grad_norm": 1.5390625, "learning_rate": 1.920280467602652e-05, "loss": 0.9801, "step": 2326 }, { "epoch": 0.3989969350794093, "grad_norm": 1.6875, "learning_rate": 1.9202097939718896e-05, "loss": 1.1454, "step": 2327 }, { "epoch": 0.3991683991683992, "grad_norm": 1.59375, "learning_rate": 1.9201390903296208e-05, "loss": 0.9652, "step": 2328 }, { "epoch": 0.399339863257389, "grad_norm": 1.53125, "learning_rate": 1.9200683566781513e-05, "loss": 0.9748, "step": 2329 }, { "epoch": 0.3995113273463789, "grad_norm": 1.578125, "learning_rate": 1.919997593019788e-05, "loss": 1.0532, "step": 2330 }, { "epoch": 0.3996827914353688, "grad_norm": 1.5859375, "learning_rate": 1.9199267993568387e-05, "loss": 0.9792, "step": 2331 }, { "epoch": 0.3998542555243586, "grad_norm": 1.6328125, "learning_rate": 1.9198559756916123e-05, "loss": 1.0428, "step": 2332 }, { "epoch": 0.4000257196133485, "grad_norm": 1.703125, "learning_rate": 1.919785122026419e-05, "loss": 1.038, "step": 2333 }, { "epoch": 0.40019718370233837, "grad_norm": 1.625, "learning_rate": 1.919714238363569e-05, "loss": 1.0486, "step": 2334 }, { "epoch": 0.4003686477913282, "grad_norm": 1.7578125, "learning_rate": 1.9196433247053746e-05, "loss": 1.0839, "step": 2335 }, { "epoch": 0.4005401118803181, "grad_norm": 1.75, "learning_rate": 1.919572381054148e-05, "loss": 1.1026, "step": 2336 }, { "epoch": 0.40071157596930795, "grad_norm": 1.546875, "learning_rate": 1.9195014074122037e-05, "loss": 1.0809, "step": 2337 }, { "epoch": 0.4008830400582978, "grad_norm": 1.703125, "learning_rate": 1.9194304037818555e-05, "loss": 1.1521, "step": 2338 }, { "epoch": 0.40105450414728766, "grad_norm": 1.5859375, "learning_rate": 1.9193593701654202e-05, "loss": 1.0516, "step": 2339 }, { "epoch": 0.40122596823627754, "grad_norm": 1.5546875, "learning_rate": 1.9192883065652132e-05, "loss": 1.058, "step": 2340 }, { "epoch": 0.40139743232526737, "grad_norm": 1.578125, "learning_rate": 1.9192172129835533e-05, "loss": 1.053, "step": 2341 }, { "epoch": 0.40156889641425725, "grad_norm": 1.59375, "learning_rate": 1.919146089422758e-05, "loss": 1.0059, "step": 2342 }, { "epoch": 0.4017403605032471, "grad_norm": 1.546875, "learning_rate": 1.9190749358851485e-05, "loss": 1.0745, "step": 2343 }, { "epoch": 0.40191182459223695, "grad_norm": 1.59375, "learning_rate": 1.9190037523730435e-05, "loss": 0.991, "step": 2344 }, { "epoch": 0.40208328868122684, "grad_norm": 1.6875, "learning_rate": 1.918932538888766e-05, "loss": 0.9776, "step": 2345 }, { "epoch": 0.40225475277021666, "grad_norm": 1.6796875, "learning_rate": 1.918861295434638e-05, "loss": 1.0356, "step": 2346 }, { "epoch": 0.40242621685920654, "grad_norm": 1.6328125, "learning_rate": 1.918790022012983e-05, "loss": 0.9826, "step": 2347 }, { "epoch": 0.4025976809481964, "grad_norm": 1.59375, "learning_rate": 1.9187187186261254e-05, "loss": 1.0368, "step": 2348 }, { "epoch": 0.40276914503718625, "grad_norm": 1.625, "learning_rate": 1.918647385276391e-05, "loss": 1.0812, "step": 2349 }, { "epoch": 0.40294060912617613, "grad_norm": 1.6875, "learning_rate": 1.9185760219661057e-05, "loss": 1.0314, "step": 2350 }, { "epoch": 0.403112073215166, "grad_norm": 1.5390625, "learning_rate": 1.9185046286975978e-05, "loss": 1.0582, "step": 2351 }, { "epoch": 0.40328353730415584, "grad_norm": 1.703125, "learning_rate": 1.9184332054731948e-05, "loss": 1.0616, "step": 2352 }, { "epoch": 0.4034550013931457, "grad_norm": 1.546875, "learning_rate": 1.9183617522952267e-05, "loss": 1.0125, "step": 2353 }, { "epoch": 0.4036264654821356, "grad_norm": 1.7578125, "learning_rate": 1.9182902691660235e-05, "loss": 1.0078, "step": 2354 }, { "epoch": 0.4037979295711254, "grad_norm": 1.671875, "learning_rate": 1.9182187560879165e-05, "loss": 1.0332, "step": 2355 }, { "epoch": 0.4039693936601153, "grad_norm": 1.7109375, "learning_rate": 1.9181472130632385e-05, "loss": 1.0976, "step": 2356 }, { "epoch": 0.4041408577491052, "grad_norm": 1.6796875, "learning_rate": 1.9180756400943222e-05, "loss": 1.0831, "step": 2357 }, { "epoch": 0.404312321838095, "grad_norm": 1.5390625, "learning_rate": 1.9180040371835025e-05, "loss": 1.0052, "step": 2358 }, { "epoch": 0.4044837859270849, "grad_norm": 1.6640625, "learning_rate": 1.917932404333114e-05, "loss": 1.1313, "step": 2359 }, { "epoch": 0.4046552500160748, "grad_norm": 1.65625, "learning_rate": 1.917860741545493e-05, "loss": 1.087, "step": 2360 }, { "epoch": 0.4048267141050646, "grad_norm": 1.6328125, "learning_rate": 1.9177890488229775e-05, "loss": 1.1294, "step": 2361 }, { "epoch": 0.4049981781940545, "grad_norm": 1.5, "learning_rate": 1.9177173261679045e-05, "loss": 0.8989, "step": 2362 }, { "epoch": 0.40516964228304436, "grad_norm": 1.5859375, "learning_rate": 1.9176455735826136e-05, "loss": 1.0715, "step": 2363 }, { "epoch": 0.4053411063720342, "grad_norm": 1.7109375, "learning_rate": 1.917573791069445e-05, "loss": 1.042, "step": 2364 }, { "epoch": 0.40551257046102407, "grad_norm": 1.640625, "learning_rate": 1.9175019786307404e-05, "loss": 1.0684, "step": 2365 }, { "epoch": 0.40568403455001395, "grad_norm": 1.6875, "learning_rate": 1.9174301362688408e-05, "loss": 1.0898, "step": 2366 }, { "epoch": 0.4058554986390038, "grad_norm": 1.6171875, "learning_rate": 1.9173582639860895e-05, "loss": 1.0967, "step": 2367 }, { "epoch": 0.40602696272799366, "grad_norm": 1.7109375, "learning_rate": 1.9172863617848315e-05, "loss": 1.0429, "step": 2368 }, { "epoch": 0.40619842681698354, "grad_norm": 1.6796875, "learning_rate": 1.9172144296674106e-05, "loss": 1.0442, "step": 2369 }, { "epoch": 0.40636989090597336, "grad_norm": 1.625, "learning_rate": 1.917142467636173e-05, "loss": 1.1039, "step": 2370 }, { "epoch": 0.40654135499496324, "grad_norm": 1.625, "learning_rate": 1.9170704756934663e-05, "loss": 0.9351, "step": 2371 }, { "epoch": 0.4067128190839531, "grad_norm": 1.6640625, "learning_rate": 1.9169984538416382e-05, "loss": 1.0478, "step": 2372 }, { "epoch": 0.40688428317294295, "grad_norm": 1.53125, "learning_rate": 1.916926402083037e-05, "loss": 1.0912, "step": 2373 }, { "epoch": 0.40705574726193283, "grad_norm": 1.71875, "learning_rate": 1.916854320420013e-05, "loss": 1.0144, "step": 2374 }, { "epoch": 0.4072272113509227, "grad_norm": 1.640625, "learning_rate": 1.9167822088549177e-05, "loss": 1.1344, "step": 2375 }, { "epoch": 0.40739867543991254, "grad_norm": 1.546875, "learning_rate": 1.9167100673901014e-05, "loss": 0.9543, "step": 2376 }, { "epoch": 0.4075701395289024, "grad_norm": 1.6484375, "learning_rate": 1.9166378960279185e-05, "loss": 1.0708, "step": 2377 }, { "epoch": 0.4077416036178923, "grad_norm": 1.609375, "learning_rate": 1.9165656947707216e-05, "loss": 1.021, "step": 2378 }, { "epoch": 0.4079130677068821, "grad_norm": 1.640625, "learning_rate": 1.916493463620866e-05, "loss": 0.9727, "step": 2379 }, { "epoch": 0.408084531795872, "grad_norm": 1.6796875, "learning_rate": 1.9164212025807073e-05, "loss": 1.0842, "step": 2380 }, { "epoch": 0.4082559958848619, "grad_norm": 1.703125, "learning_rate": 1.9163489116526025e-05, "loss": 1.0958, "step": 2381 }, { "epoch": 0.4084274599738517, "grad_norm": 1.5390625, "learning_rate": 1.916276590838909e-05, "loss": 1.0029, "step": 2382 }, { "epoch": 0.4085989240628416, "grad_norm": 1.8515625, "learning_rate": 1.9162042401419853e-05, "loss": 1.0555, "step": 2383 }, { "epoch": 0.4087703881518315, "grad_norm": 1.5703125, "learning_rate": 1.9161318595641915e-05, "loss": 1.0398, "step": 2384 }, { "epoch": 0.4089418522408213, "grad_norm": 1.765625, "learning_rate": 1.9160594491078875e-05, "loss": 1.0446, "step": 2385 }, { "epoch": 0.4091133163298112, "grad_norm": 1.625, "learning_rate": 1.915987008775436e-05, "loss": 1.0675, "step": 2386 }, { "epoch": 0.40928478041880106, "grad_norm": 1.625, "learning_rate": 1.915914538569198e-05, "loss": 1.1024, "step": 2387 }, { "epoch": 0.4094562445077909, "grad_norm": 1.625, "learning_rate": 1.9158420384915387e-05, "loss": 1.106, "step": 2388 }, { "epoch": 0.40962770859678077, "grad_norm": 1.6015625, "learning_rate": 1.915769508544821e-05, "loss": 1.0341, "step": 2389 }, { "epoch": 0.4097991726857706, "grad_norm": 1.6875, "learning_rate": 1.915696948731412e-05, "loss": 1.062, "step": 2390 }, { "epoch": 0.4099706367747605, "grad_norm": 1.6015625, "learning_rate": 1.9156243590536764e-05, "loss": 1.1496, "step": 2391 }, { "epoch": 0.41014210086375036, "grad_norm": 1.640625, "learning_rate": 1.9155517395139833e-05, "loss": 1.0598, "step": 2392 }, { "epoch": 0.4103135649527402, "grad_norm": 1.65625, "learning_rate": 1.9154790901147e-05, "loss": 0.9867, "step": 2393 }, { "epoch": 0.41048502904173007, "grad_norm": 1.6015625, "learning_rate": 1.915406410858196e-05, "loss": 1.0782, "step": 2394 }, { "epoch": 0.41065649313071995, "grad_norm": 1.65625, "learning_rate": 1.9153337017468424e-05, "loss": 1.0514, "step": 2395 }, { "epoch": 0.41082795721970977, "grad_norm": 1.6484375, "learning_rate": 1.9152609627830095e-05, "loss": 1.0171, "step": 2396 }, { "epoch": 0.41099942130869965, "grad_norm": 1.6484375, "learning_rate": 1.91518819396907e-05, "loss": 1.0764, "step": 2397 }, { "epoch": 0.41117088539768953, "grad_norm": 1.8984375, "learning_rate": 1.9151153953073976e-05, "loss": 1.0801, "step": 2398 }, { "epoch": 0.41134234948667936, "grad_norm": 1.5625, "learning_rate": 1.9150425668003657e-05, "loss": 1.0627, "step": 2399 }, { "epoch": 0.41151381357566924, "grad_norm": 1.59375, "learning_rate": 1.9149697084503502e-05, "loss": 1.0157, "step": 2400 }, { "epoch": 0.4116852776646591, "grad_norm": 1.53125, "learning_rate": 1.9148968202597272e-05, "loss": 0.9235, "step": 2401 }, { "epoch": 0.41185674175364895, "grad_norm": 1.6171875, "learning_rate": 1.9148239022308734e-05, "loss": 1.0204, "step": 2402 }, { "epoch": 0.41202820584263883, "grad_norm": 1.671875, "learning_rate": 1.9147509543661677e-05, "loss": 1.0096, "step": 2403 }, { "epoch": 0.4121996699316287, "grad_norm": 1.703125, "learning_rate": 1.9146779766679885e-05, "loss": 1.0784, "step": 2404 }, { "epoch": 0.41237113402061853, "grad_norm": 1.6953125, "learning_rate": 1.9146049691387157e-05, "loss": 0.9982, "step": 2405 }, { "epoch": 0.4125425981096084, "grad_norm": 1.609375, "learning_rate": 1.9145319317807314e-05, "loss": 1.0323, "step": 2406 }, { "epoch": 0.4127140621985983, "grad_norm": 1.6875, "learning_rate": 1.9144588645964165e-05, "loss": 1.0299, "step": 2407 }, { "epoch": 0.4128855262875881, "grad_norm": 1.6484375, "learning_rate": 1.914385767588155e-05, "loss": 1.052, "step": 2408 }, { "epoch": 0.413056990376578, "grad_norm": 1.7109375, "learning_rate": 1.91431264075833e-05, "loss": 1.0281, "step": 2409 }, { "epoch": 0.4132284544655679, "grad_norm": 1.7109375, "learning_rate": 1.914239484109327e-05, "loss": 1.0676, "step": 2410 }, { "epoch": 0.4133999185545577, "grad_norm": 1.640625, "learning_rate": 1.9141662976435315e-05, "loss": 1.1174, "step": 2411 }, { "epoch": 0.4135713826435476, "grad_norm": 1.6640625, "learning_rate": 1.9140930813633307e-05, "loss": 1.1627, "step": 2412 }, { "epoch": 0.4137428467325375, "grad_norm": 1.8046875, "learning_rate": 1.9140198352711124e-05, "loss": 1.0565, "step": 2413 }, { "epoch": 0.4139143108215273, "grad_norm": 1.6484375, "learning_rate": 1.9139465593692653e-05, "loss": 1.0109, "step": 2414 }, { "epoch": 0.4140857749105172, "grad_norm": 1.625, "learning_rate": 1.9138732536601794e-05, "loss": 1.0394, "step": 2415 }, { "epoch": 0.41425723899950706, "grad_norm": 1.625, "learning_rate": 1.9137999181462453e-05, "loss": 1.023, "step": 2416 }, { "epoch": 0.4144287030884969, "grad_norm": 1.8203125, "learning_rate": 1.913726552829855e-05, "loss": 1.0992, "step": 2417 }, { "epoch": 0.41460016717748677, "grad_norm": 1.6640625, "learning_rate": 1.9136531577134007e-05, "loss": 1.1668, "step": 2418 }, { "epoch": 0.41477163126647665, "grad_norm": 1.6171875, "learning_rate": 1.9135797327992766e-05, "loss": 0.9833, "step": 2419 }, { "epoch": 0.4149430953554665, "grad_norm": 1.6640625, "learning_rate": 1.9135062780898775e-05, "loss": 1.0893, "step": 2420 }, { "epoch": 0.41511455944445635, "grad_norm": 1.640625, "learning_rate": 1.913432793587598e-05, "loss": 1.0264, "step": 2421 }, { "epoch": 0.41528602353344624, "grad_norm": 1.640625, "learning_rate": 1.913359279294836e-05, "loss": 0.9673, "step": 2422 }, { "epoch": 0.41545748762243606, "grad_norm": 1.6171875, "learning_rate": 1.9132857352139884e-05, "loss": 1.0679, "step": 2423 }, { "epoch": 0.41562895171142594, "grad_norm": 1.546875, "learning_rate": 1.913212161347454e-05, "loss": 0.9749, "step": 2424 }, { "epoch": 0.4158004158004158, "grad_norm": 1.6640625, "learning_rate": 1.913138557697632e-05, "loss": 1.1008, "step": 2425 }, { "epoch": 0.41597187988940565, "grad_norm": 1.5703125, "learning_rate": 1.913064924266923e-05, "loss": 1.0338, "step": 2426 }, { "epoch": 0.41614334397839553, "grad_norm": 1.7109375, "learning_rate": 1.9129912610577287e-05, "loss": 1.0881, "step": 2427 }, { "epoch": 0.4163148080673854, "grad_norm": 1.5625, "learning_rate": 1.912917568072451e-05, "loss": 1.1241, "step": 2428 }, { "epoch": 0.41648627215637524, "grad_norm": 1.640625, "learning_rate": 1.912843845313494e-05, "loss": 1.1109, "step": 2429 }, { "epoch": 0.4166577362453651, "grad_norm": 1.59375, "learning_rate": 1.9127700927832616e-05, "loss": 0.9705, "step": 2430 }, { "epoch": 0.416829200334355, "grad_norm": 1.59375, "learning_rate": 1.9126963104841595e-05, "loss": 1.0619, "step": 2431 }, { "epoch": 0.4170006644233448, "grad_norm": 1.6015625, "learning_rate": 1.912622498418594e-05, "loss": 1.0899, "step": 2432 }, { "epoch": 0.4171721285123347, "grad_norm": 1.6484375, "learning_rate": 1.912548656588972e-05, "loss": 1.0203, "step": 2433 }, { "epoch": 0.4173435926013246, "grad_norm": 1.703125, "learning_rate": 1.9124747849977016e-05, "loss": 1.0402, "step": 2434 }, { "epoch": 0.4175150566903144, "grad_norm": 1.6328125, "learning_rate": 1.9124008836471927e-05, "loss": 1.0895, "step": 2435 }, { "epoch": 0.4176865207793043, "grad_norm": 1.625, "learning_rate": 1.9123269525398554e-05, "loss": 1.0637, "step": 2436 }, { "epoch": 0.4178579848682941, "grad_norm": 1.6953125, "learning_rate": 1.9122529916781002e-05, "loss": 0.9888, "step": 2437 }, { "epoch": 0.418029448957284, "grad_norm": 1.671875, "learning_rate": 1.9121790010643402e-05, "loss": 1.0156, "step": 2438 }, { "epoch": 0.4182009130462739, "grad_norm": 1.6328125, "learning_rate": 1.912104980700988e-05, "loss": 1.1407, "step": 2439 }, { "epoch": 0.4183723771352637, "grad_norm": 1.5, "learning_rate": 1.9120309305904575e-05, "loss": 1.0139, "step": 2440 }, { "epoch": 0.4185438412242536, "grad_norm": 1.640625, "learning_rate": 1.911956850735164e-05, "loss": 1.015, "step": 2441 }, { "epoch": 0.41871530531324347, "grad_norm": 1.65625, "learning_rate": 1.9118827411375233e-05, "loss": 1.0958, "step": 2442 }, { "epoch": 0.4188867694022333, "grad_norm": 1.546875, "learning_rate": 1.911808601799953e-05, "loss": 0.9161, "step": 2443 }, { "epoch": 0.4190582334912232, "grad_norm": 1.6640625, "learning_rate": 1.9117344327248704e-05, "loss": 1.0281, "step": 2444 }, { "epoch": 0.41922969758021306, "grad_norm": 1.5625, "learning_rate": 1.9116602339146945e-05, "loss": 1.0395, "step": 2445 }, { "epoch": 0.4194011616692029, "grad_norm": 1.5625, "learning_rate": 1.9115860053718455e-05, "loss": 1.0219, "step": 2446 }, { "epoch": 0.41957262575819276, "grad_norm": 1.609375, "learning_rate": 1.911511747098744e-05, "loss": 1.0217, "step": 2447 }, { "epoch": 0.41974408984718264, "grad_norm": 1.6171875, "learning_rate": 1.9114374590978123e-05, "loss": 0.9609, "step": 2448 }, { "epoch": 0.41991555393617247, "grad_norm": 2.296875, "learning_rate": 1.911363141371473e-05, "loss": 1.0245, "step": 2449 }, { "epoch": 0.42008701802516235, "grad_norm": 1.6015625, "learning_rate": 1.9112887939221495e-05, "loss": 1.0485, "step": 2450 }, { "epoch": 0.42025848211415223, "grad_norm": 1.703125, "learning_rate": 1.911214416752267e-05, "loss": 1.1228, "step": 2451 }, { "epoch": 0.42042994620314206, "grad_norm": 1.953125, "learning_rate": 1.911140009864251e-05, "loss": 1.0937, "step": 2452 }, { "epoch": 0.42060141029213194, "grad_norm": 1.6015625, "learning_rate": 1.911065573260528e-05, "loss": 1.0646, "step": 2453 }, { "epoch": 0.4207728743811218, "grad_norm": 1.609375, "learning_rate": 1.9109911069435263e-05, "loss": 1.0335, "step": 2454 }, { "epoch": 0.42094433847011165, "grad_norm": 1.640625, "learning_rate": 1.910916610915674e-05, "loss": 1.0465, "step": 2455 }, { "epoch": 0.4211158025591015, "grad_norm": 1.5546875, "learning_rate": 1.9108420851794007e-05, "loss": 0.9507, "step": 2456 }, { "epoch": 0.4212872666480914, "grad_norm": 1.6328125, "learning_rate": 1.9107675297371372e-05, "loss": 0.9774, "step": 2457 }, { "epoch": 0.42145873073708123, "grad_norm": 1.6328125, "learning_rate": 1.9106929445913147e-05, "loss": 1.1207, "step": 2458 }, { "epoch": 0.4216301948260711, "grad_norm": 1.5078125, "learning_rate": 1.910618329744366e-05, "loss": 1.0304, "step": 2459 }, { "epoch": 0.421801658915061, "grad_norm": 1.578125, "learning_rate": 1.9105436851987247e-05, "loss": 1.0384, "step": 2460 }, { "epoch": 0.4219731230040508, "grad_norm": 1.6484375, "learning_rate": 1.9104690109568248e-05, "loss": 1.0574, "step": 2461 }, { "epoch": 0.4221445870930407, "grad_norm": 1.609375, "learning_rate": 1.910394307021102e-05, "loss": 0.9608, "step": 2462 }, { "epoch": 0.4223160511820306, "grad_norm": 1.5703125, "learning_rate": 1.910319573393993e-05, "loss": 1.0314, "step": 2463 }, { "epoch": 0.4224875152710204, "grad_norm": 1.6328125, "learning_rate": 1.9102448100779343e-05, "loss": 1.0206, "step": 2464 }, { "epoch": 0.4226589793600103, "grad_norm": 1.6015625, "learning_rate": 1.910170017075365e-05, "loss": 1.0368, "step": 2465 }, { "epoch": 0.42283044344900017, "grad_norm": 1.6796875, "learning_rate": 1.910095194388724e-05, "loss": 1.0978, "step": 2466 }, { "epoch": 0.42300190753799, "grad_norm": 1.640625, "learning_rate": 1.9100203420204516e-05, "loss": 1.1047, "step": 2467 }, { "epoch": 0.4231733716269799, "grad_norm": 1.5859375, "learning_rate": 1.9099454599729887e-05, "loss": 1.028, "step": 2468 }, { "epoch": 0.42334483571596976, "grad_norm": 1.6328125, "learning_rate": 1.909870548248778e-05, "loss": 1.0247, "step": 2469 }, { "epoch": 0.4235162998049596, "grad_norm": 1.5703125, "learning_rate": 1.9097956068502626e-05, "loss": 1.0003, "step": 2470 }, { "epoch": 0.42368776389394947, "grad_norm": 1.546875, "learning_rate": 1.9097206357798864e-05, "loss": 1.086, "step": 2471 }, { "epoch": 0.42385922798293935, "grad_norm": 1.65625, "learning_rate": 1.9096456350400948e-05, "loss": 1.0451, "step": 2472 }, { "epoch": 0.42403069207192917, "grad_norm": 1.6953125, "learning_rate": 1.9095706046333333e-05, "loss": 1.1459, "step": 2473 }, { "epoch": 0.42420215616091905, "grad_norm": 1.6171875, "learning_rate": 1.9094955445620492e-05, "loss": 1.0954, "step": 2474 }, { "epoch": 0.42437362024990893, "grad_norm": 1.6171875, "learning_rate": 1.909420454828691e-05, "loss": 0.9514, "step": 2475 }, { "epoch": 0.42454508433889876, "grad_norm": 1.5703125, "learning_rate": 1.9093453354357064e-05, "loss": 1.0758, "step": 2476 }, { "epoch": 0.42471654842788864, "grad_norm": 1.765625, "learning_rate": 1.909270186385547e-05, "loss": 1.0863, "step": 2477 }, { "epoch": 0.4248880125168785, "grad_norm": 1.609375, "learning_rate": 1.9091950076806626e-05, "loss": 1.0663, "step": 2478 }, { "epoch": 0.42505947660586835, "grad_norm": 1.5390625, "learning_rate": 1.909119799323505e-05, "loss": 0.9911, "step": 2479 }, { "epoch": 0.42523094069485823, "grad_norm": 1.6328125, "learning_rate": 1.9090445613165273e-05, "loss": 1.0238, "step": 2480 }, { "epoch": 0.4254024047838481, "grad_norm": 1.734375, "learning_rate": 1.9089692936621836e-05, "loss": 1.1141, "step": 2481 }, { "epoch": 0.42557386887283793, "grad_norm": 1.4609375, "learning_rate": 1.908893996362928e-05, "loss": 0.9739, "step": 2482 }, { "epoch": 0.4257453329618278, "grad_norm": 1.625, "learning_rate": 1.9088186694212174e-05, "loss": 1.0654, "step": 2483 }, { "epoch": 0.4259167970508177, "grad_norm": 1.6484375, "learning_rate": 1.9087433128395073e-05, "loss": 1.0327, "step": 2484 }, { "epoch": 0.4260882611398075, "grad_norm": 1.671875, "learning_rate": 1.9086679266202554e-05, "loss": 1.0202, "step": 2485 }, { "epoch": 0.4262597252287974, "grad_norm": 1.5546875, "learning_rate": 1.908592510765921e-05, "loss": 1.1057, "step": 2486 }, { "epoch": 0.42643118931778723, "grad_norm": 1.640625, "learning_rate": 1.908517065278963e-05, "loss": 1.0686, "step": 2487 }, { "epoch": 0.4266026534067771, "grad_norm": 1.5390625, "learning_rate": 1.9084415901618428e-05, "loss": 0.9874, "step": 2488 }, { "epoch": 0.426774117495767, "grad_norm": 1.7265625, "learning_rate": 1.9083660854170212e-05, "loss": 1.0316, "step": 2489 }, { "epoch": 0.4269455815847568, "grad_norm": 1.625, "learning_rate": 1.908290551046961e-05, "loss": 1.0549, "step": 2490 }, { "epoch": 0.4271170456737467, "grad_norm": 1.6484375, "learning_rate": 1.9082149870541258e-05, "loss": 1.039, "step": 2491 }, { "epoch": 0.4272885097627366, "grad_norm": 1.59375, "learning_rate": 1.9081393934409797e-05, "loss": 1.0129, "step": 2492 }, { "epoch": 0.4274599738517264, "grad_norm": 1.734375, "learning_rate": 1.9080637702099883e-05, "loss": 1.1138, "step": 2493 }, { "epoch": 0.4276314379407163, "grad_norm": 1.59375, "learning_rate": 1.9079881173636182e-05, "loss": 1.013, "step": 2494 }, { "epoch": 0.42780290202970617, "grad_norm": 1.640625, "learning_rate": 1.907912434904336e-05, "loss": 1.0412, "step": 2495 }, { "epoch": 0.427974366118696, "grad_norm": 1.6328125, "learning_rate": 1.9078367228346106e-05, "loss": 1.0565, "step": 2496 }, { "epoch": 0.4281458302076859, "grad_norm": 1.6171875, "learning_rate": 1.9077609811569107e-05, "loss": 1.0311, "step": 2497 }, { "epoch": 0.42831729429667575, "grad_norm": 1.671875, "learning_rate": 1.9076852098737074e-05, "loss": 1.1392, "step": 2498 }, { "epoch": 0.4284887583856656, "grad_norm": 1.640625, "learning_rate": 1.907609408987471e-05, "loss": 1.0764, "step": 2499 }, { "epoch": 0.42866022247465546, "grad_norm": 1.703125, "learning_rate": 1.9075335785006743e-05, "loss": 1.063, "step": 2500 }, { "epoch": 0.42883168656364534, "grad_norm": 1.6015625, "learning_rate": 1.90745771841579e-05, "loss": 1.1018, "step": 2501 }, { "epoch": 0.42900315065263517, "grad_norm": 1.609375, "learning_rate": 1.9073818287352925e-05, "loss": 1.0077, "step": 2502 }, { "epoch": 0.42917461474162505, "grad_norm": 1.6015625, "learning_rate": 1.9073059094616565e-05, "loss": 0.9708, "step": 2503 }, { "epoch": 0.42934607883061493, "grad_norm": 1.625, "learning_rate": 1.9072299605973582e-05, "loss": 1.068, "step": 2504 }, { "epoch": 0.42951754291960476, "grad_norm": 1.5625, "learning_rate": 1.907153982144875e-05, "loss": 1.0559, "step": 2505 }, { "epoch": 0.42968900700859464, "grad_norm": 1.578125, "learning_rate": 1.907077974106684e-05, "loss": 1.1482, "step": 2506 }, { "epoch": 0.4298604710975845, "grad_norm": 1.546875, "learning_rate": 1.9070019364852646e-05, "loss": 1.071, "step": 2507 }, { "epoch": 0.43003193518657434, "grad_norm": 1.6484375, "learning_rate": 1.9069258692830964e-05, "loss": 1.0705, "step": 2508 }, { "epoch": 0.4302033992755642, "grad_norm": 1.6171875, "learning_rate": 1.9068497725026607e-05, "loss": 1.0141, "step": 2509 }, { "epoch": 0.4303748633645541, "grad_norm": 1.6484375, "learning_rate": 1.906773646146439e-05, "loss": 1.1033, "step": 2510 }, { "epoch": 0.43054632745354393, "grad_norm": 1.71875, "learning_rate": 1.9066974902169142e-05, "loss": 0.9858, "step": 2511 }, { "epoch": 0.4307177915425338, "grad_norm": 1.7109375, "learning_rate": 1.9066213047165698e-05, "loss": 1.0306, "step": 2512 }, { "epoch": 0.4308892556315237, "grad_norm": 1.65625, "learning_rate": 1.9065450896478904e-05, "loss": 1.0003, "step": 2513 }, { "epoch": 0.4310607197205135, "grad_norm": 1.5078125, "learning_rate": 1.906468845013362e-05, "loss": 0.9826, "step": 2514 }, { "epoch": 0.4312321838095034, "grad_norm": 1.671875, "learning_rate": 1.9063925708154713e-05, "loss": 1.0373, "step": 2515 }, { "epoch": 0.4314036478984933, "grad_norm": 1.5546875, "learning_rate": 1.9063162670567057e-05, "loss": 1.072, "step": 2516 }, { "epoch": 0.4315751119874831, "grad_norm": 1.671875, "learning_rate": 1.906239933739554e-05, "loss": 1.0728, "step": 2517 }, { "epoch": 0.431746576076473, "grad_norm": 1.5859375, "learning_rate": 1.9061635708665053e-05, "loss": 1.0666, "step": 2518 }, { "epoch": 0.43191804016546287, "grad_norm": 1.5078125, "learning_rate": 1.90608717844005e-05, "loss": 1.0418, "step": 2519 }, { "epoch": 0.4320895042544527, "grad_norm": 1.6171875, "learning_rate": 1.90601075646268e-05, "loss": 0.9639, "step": 2520 }, { "epoch": 0.4322609683434426, "grad_norm": 1.5390625, "learning_rate": 1.905934304936887e-05, "loss": 0.9816, "step": 2521 }, { "epoch": 0.43243243243243246, "grad_norm": 1.6875, "learning_rate": 1.9058578238651655e-05, "loss": 1.0473, "step": 2522 }, { "epoch": 0.4326038965214223, "grad_norm": 1.609375, "learning_rate": 1.9057813132500095e-05, "loss": 1.105, "step": 2523 }, { "epoch": 0.43277536061041216, "grad_norm": 1.6171875, "learning_rate": 1.9057047730939132e-05, "loss": 1.0386, "step": 2524 }, { "epoch": 0.43294682469940204, "grad_norm": 1.6953125, "learning_rate": 1.905628203399374e-05, "loss": 1.0777, "step": 2525 }, { "epoch": 0.43311828878839187, "grad_norm": 1.6328125, "learning_rate": 1.905551604168889e-05, "loss": 1.0285, "step": 2526 }, { "epoch": 0.43328975287738175, "grad_norm": 1.7421875, "learning_rate": 1.905474975404956e-05, "loss": 1.0409, "step": 2527 }, { "epoch": 0.43346121696637163, "grad_norm": 1.5859375, "learning_rate": 1.9053983171100744e-05, "loss": 1.0576, "step": 2528 }, { "epoch": 0.43363268105536146, "grad_norm": 1.75, "learning_rate": 1.9053216292867445e-05, "loss": 1.1138, "step": 2529 }, { "epoch": 0.43380414514435134, "grad_norm": 1.6171875, "learning_rate": 1.905244911937467e-05, "loss": 1.0694, "step": 2530 }, { "epoch": 0.4339756092333412, "grad_norm": 1.640625, "learning_rate": 1.9051681650647437e-05, "loss": 0.955, "step": 2531 }, { "epoch": 0.43414707332233105, "grad_norm": 1.546875, "learning_rate": 1.9050913886710786e-05, "loss": 1.021, "step": 2532 }, { "epoch": 0.4343185374113209, "grad_norm": 1.703125, "learning_rate": 1.9050145827589745e-05, "loss": 1.098, "step": 2533 }, { "epoch": 0.43449000150031075, "grad_norm": 1.609375, "learning_rate": 1.904937747330937e-05, "loss": 1.0156, "step": 2534 }, { "epoch": 0.43466146558930063, "grad_norm": 1.6640625, "learning_rate": 1.9048608823894722e-05, "loss": 1.0958, "step": 2535 }, { "epoch": 0.4348329296782905, "grad_norm": 1.71875, "learning_rate": 1.9047839879370867e-05, "loss": 1.0483, "step": 2536 }, { "epoch": 0.43500439376728034, "grad_norm": 1.59375, "learning_rate": 1.9047070639762878e-05, "loss": 0.9646, "step": 2537 }, { "epoch": 0.4351758578562702, "grad_norm": 1.5859375, "learning_rate": 1.9046301105095847e-05, "loss": 0.9332, "step": 2538 }, { "epoch": 0.4353473219452601, "grad_norm": 1.671875, "learning_rate": 1.9045531275394878e-05, "loss": 1.0767, "step": 2539 }, { "epoch": 0.4355187860342499, "grad_norm": 1.6171875, "learning_rate": 1.904476115068507e-05, "loss": 1.0839, "step": 2540 }, { "epoch": 0.4356902501232398, "grad_norm": 1.5703125, "learning_rate": 1.9043990730991536e-05, "loss": 1.0746, "step": 2541 }, { "epoch": 0.4358617142122297, "grad_norm": 1.5859375, "learning_rate": 1.9043220016339414e-05, "loss": 0.9487, "step": 2542 }, { "epoch": 0.4360331783012195, "grad_norm": 1.6875, "learning_rate": 1.9042449006753827e-05, "loss": 1.0746, "step": 2543 }, { "epoch": 0.4362046423902094, "grad_norm": 1.5546875, "learning_rate": 1.904167770225993e-05, "loss": 1.0214, "step": 2544 }, { "epoch": 0.4363761064791993, "grad_norm": 1.5703125, "learning_rate": 1.904090610288288e-05, "loss": 1.0514, "step": 2545 }, { "epoch": 0.4365475705681891, "grad_norm": 1.53125, "learning_rate": 1.904013420864783e-05, "loss": 1.0005, "step": 2546 }, { "epoch": 0.436719034657179, "grad_norm": 1.5625, "learning_rate": 1.9039362019579965e-05, "loss": 0.9825, "step": 2547 }, { "epoch": 0.43689049874616886, "grad_norm": 1.75, "learning_rate": 1.9038589535704467e-05, "loss": 1.0655, "step": 2548 }, { "epoch": 0.4370619628351587, "grad_norm": 1.6015625, "learning_rate": 1.9037816757046528e-05, "loss": 0.9854, "step": 2549 }, { "epoch": 0.43723342692414857, "grad_norm": 1.5546875, "learning_rate": 1.903704368363135e-05, "loss": 1.0154, "step": 2550 }, { "epoch": 0.43740489101313845, "grad_norm": 1.578125, "learning_rate": 1.9036270315484144e-05, "loss": 1.0617, "step": 2551 }, { "epoch": 0.4375763551021283, "grad_norm": 1.6640625, "learning_rate": 1.9035496652630138e-05, "loss": 1.0488, "step": 2552 }, { "epoch": 0.43774781919111816, "grad_norm": 1.640625, "learning_rate": 1.9034722695094562e-05, "loss": 1.0497, "step": 2553 }, { "epoch": 0.43791928328010804, "grad_norm": 1.671875, "learning_rate": 1.9033948442902658e-05, "loss": 1.0427, "step": 2554 }, { "epoch": 0.43809074736909787, "grad_norm": 1.625, "learning_rate": 1.9033173896079677e-05, "loss": 1.0454, "step": 2555 }, { "epoch": 0.43826221145808775, "grad_norm": 1.53125, "learning_rate": 1.9032399054650876e-05, "loss": 0.962, "step": 2556 }, { "epoch": 0.43843367554707763, "grad_norm": 1.5625, "learning_rate": 1.903162391864153e-05, "loss": 1.0095, "step": 2557 }, { "epoch": 0.43860513963606745, "grad_norm": 1.75, "learning_rate": 1.9030848488076924e-05, "loss": 1.1543, "step": 2558 }, { "epoch": 0.43877660372505733, "grad_norm": 1.671875, "learning_rate": 1.9030072762982335e-05, "loss": 1.0546, "step": 2559 }, { "epoch": 0.4389480678140472, "grad_norm": 1.671875, "learning_rate": 1.9029296743383074e-05, "loss": 1.1123, "step": 2560 }, { "epoch": 0.43911953190303704, "grad_norm": 1.71875, "learning_rate": 1.902852042930444e-05, "loss": 1.0054, "step": 2561 }, { "epoch": 0.4392909959920269, "grad_norm": 1.6328125, "learning_rate": 1.9027743820771756e-05, "loss": 1.0301, "step": 2562 }, { "epoch": 0.4394624600810168, "grad_norm": 1.671875, "learning_rate": 1.9026966917810356e-05, "loss": 1.0997, "step": 2563 }, { "epoch": 0.43963392417000663, "grad_norm": 1.5390625, "learning_rate": 1.9026189720445568e-05, "loss": 1.0412, "step": 2564 }, { "epoch": 0.4398053882589965, "grad_norm": 1.5078125, "learning_rate": 1.9025412228702747e-05, "loss": 1.0091, "step": 2565 }, { "epoch": 0.4399768523479864, "grad_norm": 1.640625, "learning_rate": 1.9024634442607245e-05, "loss": 1.0701, "step": 2566 }, { "epoch": 0.4401483164369762, "grad_norm": 1.6640625, "learning_rate": 1.9023856362184433e-05, "loss": 0.974, "step": 2567 }, { "epoch": 0.4403197805259661, "grad_norm": 1.7109375, "learning_rate": 1.902307798745968e-05, "loss": 1.1008, "step": 2568 }, { "epoch": 0.440491244614956, "grad_norm": 1.6015625, "learning_rate": 1.9022299318458377e-05, "loss": 1.0568, "step": 2569 }, { "epoch": 0.4406627087039458, "grad_norm": 1.7734375, "learning_rate": 1.902152035520592e-05, "loss": 1.1136, "step": 2570 }, { "epoch": 0.4408341727929357, "grad_norm": 1.6328125, "learning_rate": 1.902074109772771e-05, "loss": 0.989, "step": 2571 }, { "epoch": 0.44100563688192557, "grad_norm": 1.6328125, "learning_rate": 1.9019961546049165e-05, "loss": 0.9976, "step": 2572 }, { "epoch": 0.4411771009709154, "grad_norm": 1.6484375, "learning_rate": 1.9019181700195714e-05, "loss": 1.0227, "step": 2573 }, { "epoch": 0.4413485650599053, "grad_norm": 1.6484375, "learning_rate": 1.9018401560192775e-05, "loss": 1.0334, "step": 2574 }, { "epoch": 0.44152002914889515, "grad_norm": 1.6640625, "learning_rate": 1.9017621126065804e-05, "loss": 1.0257, "step": 2575 }, { "epoch": 0.441691493237885, "grad_norm": 1.546875, "learning_rate": 1.9016840397840256e-05, "loss": 1.0305, "step": 2576 }, { "epoch": 0.44186295732687486, "grad_norm": 1.5859375, "learning_rate": 1.901605937554158e-05, "loss": 1.0111, "step": 2577 }, { "epoch": 0.44203442141586474, "grad_norm": 1.671875, "learning_rate": 1.901527805919526e-05, "loss": 1.0746, "step": 2578 }, { "epoch": 0.44220588550485457, "grad_norm": 1.6484375, "learning_rate": 1.9014496448826775e-05, "loss": 1.08, "step": 2579 }, { "epoch": 0.44237734959384445, "grad_norm": 1.546875, "learning_rate": 1.9013714544461617e-05, "loss": 1.0462, "step": 2580 }, { "epoch": 0.4425488136828343, "grad_norm": 1.625, "learning_rate": 1.9012932346125282e-05, "loss": 1.0524, "step": 2581 }, { "epoch": 0.44272027777182416, "grad_norm": 1.578125, "learning_rate": 1.9012149853843283e-05, "loss": 0.9439, "step": 2582 }, { "epoch": 0.44289174186081404, "grad_norm": 1.7578125, "learning_rate": 1.9011367067641146e-05, "loss": 1.0259, "step": 2583 }, { "epoch": 0.44306320594980386, "grad_norm": 1.5625, "learning_rate": 1.901058398754439e-05, "loss": 1.0868, "step": 2584 }, { "epoch": 0.44323467003879374, "grad_norm": 1.6875, "learning_rate": 1.9009800613578563e-05, "loss": 1.1054, "step": 2585 }, { "epoch": 0.4434061341277836, "grad_norm": 1.578125, "learning_rate": 1.9009016945769207e-05, "loss": 0.9721, "step": 2586 }, { "epoch": 0.44357759821677345, "grad_norm": 1.6875, "learning_rate": 1.9008232984141885e-05, "loss": 1.0474, "step": 2587 }, { "epoch": 0.44374906230576333, "grad_norm": 1.640625, "learning_rate": 1.9007448728722165e-05, "loss": 1.0311, "step": 2588 }, { "epoch": 0.4439205263947532, "grad_norm": 1.71875, "learning_rate": 1.900666417953562e-05, "loss": 1.021, "step": 2589 }, { "epoch": 0.44409199048374304, "grad_norm": 1.65625, "learning_rate": 1.9005879336607844e-05, "loss": 1.0546, "step": 2590 }, { "epoch": 0.4442634545727329, "grad_norm": 1.5390625, "learning_rate": 1.9005094199964427e-05, "loss": 1.0064, "step": 2591 }, { "epoch": 0.4444349186617228, "grad_norm": 1.640625, "learning_rate": 1.900430876963098e-05, "loss": 0.9986, "step": 2592 }, { "epoch": 0.4446063827507126, "grad_norm": 1.5703125, "learning_rate": 1.9003523045633116e-05, "loss": 1.0127, "step": 2593 }, { "epoch": 0.4447778468397025, "grad_norm": 1.6875, "learning_rate": 1.900273702799646e-05, "loss": 1.1392, "step": 2594 }, { "epoch": 0.4449493109286924, "grad_norm": 1.515625, "learning_rate": 1.9001950716746648e-05, "loss": 0.9631, "step": 2595 }, { "epoch": 0.4451207750176822, "grad_norm": 1.609375, "learning_rate": 1.9001164111909327e-05, "loss": 0.9925, "step": 2596 }, { "epoch": 0.4452922391066721, "grad_norm": 1.578125, "learning_rate": 1.9000377213510147e-05, "loss": 1.0275, "step": 2597 }, { "epoch": 0.445463703195662, "grad_norm": 1.7578125, "learning_rate": 1.8999590021574776e-05, "loss": 1.0874, "step": 2598 }, { "epoch": 0.4456351672846518, "grad_norm": 1.546875, "learning_rate": 1.8998802536128885e-05, "loss": 0.979, "step": 2599 }, { "epoch": 0.4458066313736417, "grad_norm": 1.6015625, "learning_rate": 1.8998014757198152e-05, "loss": 1.0238, "step": 2600 }, { "epoch": 0.44597809546263156, "grad_norm": 1.75, "learning_rate": 1.8997226684808282e-05, "loss": 1.1384, "step": 2601 }, { "epoch": 0.4461495595516214, "grad_norm": 1.515625, "learning_rate": 1.8996438318984968e-05, "loss": 1.0545, "step": 2602 }, { "epoch": 0.44632102364061127, "grad_norm": 1.625, "learning_rate": 1.8995649659753917e-05, "loss": 1.0358, "step": 2603 }, { "epoch": 0.44649248772960115, "grad_norm": 1.6015625, "learning_rate": 1.8994860707140862e-05, "loss": 1.0149, "step": 2604 }, { "epoch": 0.446663951818591, "grad_norm": 1.5703125, "learning_rate": 1.8994071461171522e-05, "loss": 0.9884, "step": 2605 }, { "epoch": 0.44683541590758086, "grad_norm": 1.5390625, "learning_rate": 1.899328192187165e-05, "loss": 1.0315, "step": 2606 }, { "epoch": 0.44700687999657074, "grad_norm": 1.5703125, "learning_rate": 1.8992492089266986e-05, "loss": 0.9365, "step": 2607 }, { "epoch": 0.44717834408556056, "grad_norm": 1.5546875, "learning_rate": 1.899170196338329e-05, "loss": 1.0134, "step": 2608 }, { "epoch": 0.44734980817455045, "grad_norm": 1.65625, "learning_rate": 1.8990911544246338e-05, "loss": 1.0205, "step": 2609 }, { "epoch": 0.4475212722635403, "grad_norm": 1.6015625, "learning_rate": 1.89901208318819e-05, "loss": 1.0267, "step": 2610 }, { "epoch": 0.44769273635253015, "grad_norm": 1.5859375, "learning_rate": 1.898932982631577e-05, "loss": 1.1054, "step": 2611 }, { "epoch": 0.44786420044152003, "grad_norm": 1.8203125, "learning_rate": 1.8988538527573743e-05, "loss": 0.9982, "step": 2612 }, { "epoch": 0.4480356645305099, "grad_norm": 1.6015625, "learning_rate": 1.8987746935681627e-05, "loss": 1.0921, "step": 2613 }, { "epoch": 0.44820712861949974, "grad_norm": 1.6484375, "learning_rate": 1.898695505066524e-05, "loss": 1.105, "step": 2614 }, { "epoch": 0.4483785927084896, "grad_norm": 1.84375, "learning_rate": 1.8986162872550405e-05, "loss": 1.1337, "step": 2615 }, { "epoch": 0.4485500567974795, "grad_norm": 1.6484375, "learning_rate": 1.898537040136296e-05, "loss": 1.0623, "step": 2616 }, { "epoch": 0.4487215208864693, "grad_norm": 1.6953125, "learning_rate": 1.8984577637128755e-05, "loss": 1.1215, "step": 2617 }, { "epoch": 0.4488929849754592, "grad_norm": 1.5, "learning_rate": 1.8983784579873635e-05, "loss": 0.9338, "step": 2618 }, { "epoch": 0.4490644490644491, "grad_norm": 1.6484375, "learning_rate": 1.898299122962347e-05, "loss": 1.0079, "step": 2619 }, { "epoch": 0.4492359131534389, "grad_norm": 1.65625, "learning_rate": 1.8982197586404136e-05, "loss": 1.0642, "step": 2620 }, { "epoch": 0.4494073772424288, "grad_norm": 1.625, "learning_rate": 1.8981403650241517e-05, "loss": 1.0029, "step": 2621 }, { "epoch": 0.4495788413314187, "grad_norm": 1.8125, "learning_rate": 1.89806094211615e-05, "loss": 1.0326, "step": 2622 }, { "epoch": 0.4497503054204085, "grad_norm": 1.78125, "learning_rate": 1.8979814899189993e-05, "loss": 1.0617, "step": 2623 }, { "epoch": 0.4499217695093984, "grad_norm": 1.6640625, "learning_rate": 1.897902008435291e-05, "loss": 1.0087, "step": 2624 }, { "epoch": 0.45009323359838826, "grad_norm": 1.609375, "learning_rate": 1.8978224976676168e-05, "loss": 1.0239, "step": 2625 }, { "epoch": 0.4502646976873781, "grad_norm": 1.6484375, "learning_rate": 1.89774295761857e-05, "loss": 1.0068, "step": 2626 }, { "epoch": 0.45043616177636797, "grad_norm": 1.59375, "learning_rate": 1.897663388290745e-05, "loss": 0.9916, "step": 2627 }, { "epoch": 0.4506076258653578, "grad_norm": 1.546875, "learning_rate": 1.8975837896867365e-05, "loss": 1.0105, "step": 2628 }, { "epoch": 0.4507790899543477, "grad_norm": 1.5, "learning_rate": 1.8975041618091406e-05, "loss": 0.964, "step": 2629 }, { "epoch": 0.45095055404333756, "grad_norm": 1.5859375, "learning_rate": 1.8974245046605544e-05, "loss": 1.0457, "step": 2630 }, { "epoch": 0.4511220181323274, "grad_norm": 1.59375, "learning_rate": 1.8973448182435757e-05, "loss": 1.0377, "step": 2631 }, { "epoch": 0.45129348222131727, "grad_norm": 1.640625, "learning_rate": 1.897265102560803e-05, "loss": 1.0343, "step": 2632 }, { "epoch": 0.45146494631030715, "grad_norm": 1.59375, "learning_rate": 1.897185357614837e-05, "loss": 0.9959, "step": 2633 }, { "epoch": 0.451636410399297, "grad_norm": 1.6953125, "learning_rate": 1.8971055834082778e-05, "loss": 1.0973, "step": 2634 }, { "epoch": 0.45180787448828685, "grad_norm": 1.515625, "learning_rate": 1.8970257799437274e-05, "loss": 0.951, "step": 2635 }, { "epoch": 0.45197933857727673, "grad_norm": 1.578125, "learning_rate": 1.8969459472237886e-05, "loss": 1.0957, "step": 2636 }, { "epoch": 0.45215080266626656, "grad_norm": 1.671875, "learning_rate": 1.8968660852510646e-05, "loss": 1.0603, "step": 2637 }, { "epoch": 0.45232226675525644, "grad_norm": 1.5625, "learning_rate": 1.8967861940281603e-05, "loss": 0.9595, "step": 2638 }, { "epoch": 0.4524937308442463, "grad_norm": 1.6640625, "learning_rate": 1.896706273557681e-05, "loss": 1.0628, "step": 2639 }, { "epoch": 0.45266519493323615, "grad_norm": 1.65625, "learning_rate": 1.896626323842234e-05, "loss": 1.0335, "step": 2640 }, { "epoch": 0.45283665902222603, "grad_norm": 1.546875, "learning_rate": 1.896546344884426e-05, "loss": 0.9677, "step": 2641 }, { "epoch": 0.4530081231112159, "grad_norm": 1.5625, "learning_rate": 1.8964663366868655e-05, "loss": 0.9559, "step": 2642 }, { "epoch": 0.45317958720020574, "grad_norm": 1.5703125, "learning_rate": 1.896386299252162e-05, "loss": 1.0433, "step": 2643 }, { "epoch": 0.4533510512891956, "grad_norm": 1.609375, "learning_rate": 1.8963062325829256e-05, "loss": 1.0023, "step": 2644 }, { "epoch": 0.4535225153781855, "grad_norm": 1.6484375, "learning_rate": 1.8962261366817684e-05, "loss": 1.1453, "step": 2645 }, { "epoch": 0.4536939794671753, "grad_norm": 1.59375, "learning_rate": 1.8961460115513012e-05, "loss": 0.9839, "step": 2646 }, { "epoch": 0.4538654435561652, "grad_norm": 1.6015625, "learning_rate": 1.8960658571941388e-05, "loss": 1.1139, "step": 2647 }, { "epoch": 0.4540369076451551, "grad_norm": 1.6484375, "learning_rate": 1.895985673612894e-05, "loss": 1.073, "step": 2648 }, { "epoch": 0.4542083717341449, "grad_norm": 1.5390625, "learning_rate": 1.8959054608101823e-05, "loss": 1.0102, "step": 2649 }, { "epoch": 0.4543798358231348, "grad_norm": 1.5859375, "learning_rate": 1.8958252187886202e-05, "loss": 1.0139, "step": 2650 }, { "epoch": 0.4545512999121247, "grad_norm": 1.5859375, "learning_rate": 1.8957449475508243e-05, "loss": 0.9918, "step": 2651 }, { "epoch": 0.4547227640011145, "grad_norm": 1.6171875, "learning_rate": 1.8956646470994124e-05, "loss": 1.0911, "step": 2652 }, { "epoch": 0.4548942280901044, "grad_norm": 1.6328125, "learning_rate": 1.895584317437004e-05, "loss": 1.1194, "step": 2653 }, { "epoch": 0.45506569217909426, "grad_norm": 1.609375, "learning_rate": 1.8955039585662182e-05, "loss": 1.0762, "step": 2654 }, { "epoch": 0.4552371562680841, "grad_norm": 1.5703125, "learning_rate": 1.895423570489676e-05, "loss": 1.0032, "step": 2655 }, { "epoch": 0.45540862035707397, "grad_norm": 1.6171875, "learning_rate": 1.8953431532099994e-05, "loss": 0.9551, "step": 2656 }, { "epoch": 0.45558008444606385, "grad_norm": 1.59375, "learning_rate": 1.8952627067298115e-05, "loss": 1.0617, "step": 2657 }, { "epoch": 0.4557515485350537, "grad_norm": 1.6953125, "learning_rate": 1.895182231051735e-05, "loss": 1.0103, "step": 2658 }, { "epoch": 0.45592301262404356, "grad_norm": 1.6328125, "learning_rate": 1.8951017261783954e-05, "loss": 1.1338, "step": 2659 }, { "epoch": 0.45609447671303344, "grad_norm": 1.5625, "learning_rate": 1.8950211921124177e-05, "loss": 1.0476, "step": 2660 }, { "epoch": 0.45626594080202326, "grad_norm": 1.5390625, "learning_rate": 1.8949406288564282e-05, "loss": 1.0679, "step": 2661 }, { "epoch": 0.45643740489101314, "grad_norm": 1.609375, "learning_rate": 1.894860036413055e-05, "loss": 1.0398, "step": 2662 }, { "epoch": 0.456608868980003, "grad_norm": 1.6015625, "learning_rate": 1.8947794147849264e-05, "loss": 1.0968, "step": 2663 }, { "epoch": 0.45678033306899285, "grad_norm": 1.609375, "learning_rate": 1.8946987639746717e-05, "loss": 1.0471, "step": 2664 }, { "epoch": 0.45695179715798273, "grad_norm": 1.5703125, "learning_rate": 1.894618083984921e-05, "loss": 1.0101, "step": 2665 }, { "epoch": 0.4571232612469726, "grad_norm": 1.6328125, "learning_rate": 1.894537374818306e-05, "loss": 1.0929, "step": 2666 }, { "epoch": 0.45729472533596244, "grad_norm": 1.578125, "learning_rate": 1.8944566364774585e-05, "loss": 1.0572, "step": 2667 }, { "epoch": 0.4574661894249523, "grad_norm": 1.625, "learning_rate": 1.8943758689650117e-05, "loss": 1.0125, "step": 2668 }, { "epoch": 0.4576376535139422, "grad_norm": 1.5703125, "learning_rate": 1.8942950722836e-05, "loss": 1.0197, "step": 2669 }, { "epoch": 0.457809117602932, "grad_norm": 1.5859375, "learning_rate": 1.8942142464358587e-05, "loss": 1.0973, "step": 2670 }, { "epoch": 0.4579805816919219, "grad_norm": 1.546875, "learning_rate": 1.8941333914244234e-05, "loss": 1.0297, "step": 2671 }, { "epoch": 0.4581520457809118, "grad_norm": 1.6796875, "learning_rate": 1.894052507251931e-05, "loss": 1.0836, "step": 2672 }, { "epoch": 0.4583235098699016, "grad_norm": 1.640625, "learning_rate": 1.8939715939210202e-05, "loss": 1.1439, "step": 2673 }, { "epoch": 0.4584949739588915, "grad_norm": 1.6328125, "learning_rate": 1.8938906514343287e-05, "loss": 1.0319, "step": 2674 }, { "epoch": 0.4586664380478813, "grad_norm": 1.6875, "learning_rate": 1.8938096797944972e-05, "loss": 1.0257, "step": 2675 }, { "epoch": 0.4588379021368712, "grad_norm": 1.5546875, "learning_rate": 1.893728679004166e-05, "loss": 1.0049, "step": 2676 }, { "epoch": 0.4590093662258611, "grad_norm": 1.4921875, "learning_rate": 1.8936476490659778e-05, "loss": 0.9021, "step": 2677 }, { "epoch": 0.4591808303148509, "grad_norm": 1.6484375, "learning_rate": 1.893566589982574e-05, "loss": 0.9815, "step": 2678 }, { "epoch": 0.4593522944038408, "grad_norm": 1.6015625, "learning_rate": 1.893485501756599e-05, "loss": 1.1038, "step": 2679 }, { "epoch": 0.45952375849283067, "grad_norm": 1.5859375, "learning_rate": 1.8934043843906975e-05, "loss": 0.9767, "step": 2680 }, { "epoch": 0.4596952225818205, "grad_norm": 1.625, "learning_rate": 1.8933232378875145e-05, "loss": 1.0821, "step": 2681 }, { "epoch": 0.4598666866708104, "grad_norm": 1.53125, "learning_rate": 1.8932420622496964e-05, "loss": 0.9355, "step": 2682 }, { "epoch": 0.46003815075980026, "grad_norm": 1.5, "learning_rate": 1.8931608574798915e-05, "loss": 0.9788, "step": 2683 }, { "epoch": 0.4602096148487901, "grad_norm": 1.5625, "learning_rate": 1.8930796235807478e-05, "loss": 1.0064, "step": 2684 }, { "epoch": 0.46038107893777996, "grad_norm": 1.578125, "learning_rate": 1.8929983605549146e-05, "loss": 1.1276, "step": 2685 }, { "epoch": 0.46055254302676985, "grad_norm": 1.5625, "learning_rate": 1.8929170684050414e-05, "loss": 1.0831, "step": 2686 }, { "epoch": 0.46072400711575967, "grad_norm": 1.625, "learning_rate": 1.892835747133781e-05, "loss": 0.9837, "step": 2687 }, { "epoch": 0.46089547120474955, "grad_norm": 1.5, "learning_rate": 1.8927543967437846e-05, "loss": 0.9239, "step": 2688 }, { "epoch": 0.46106693529373943, "grad_norm": 1.609375, "learning_rate": 1.892673017237705e-05, "loss": 1.0372, "step": 2689 }, { "epoch": 0.46123839938272926, "grad_norm": 1.640625, "learning_rate": 1.8925916086181975e-05, "loss": 1.1304, "step": 2690 }, { "epoch": 0.46140986347171914, "grad_norm": 1.6875, "learning_rate": 1.8925101708879162e-05, "loss": 1.0963, "step": 2691 }, { "epoch": 0.461581327560709, "grad_norm": 1.5703125, "learning_rate": 1.8924287040495174e-05, "loss": 0.9772, "step": 2692 }, { "epoch": 0.46175279164969885, "grad_norm": 1.5859375, "learning_rate": 1.8923472081056577e-05, "loss": 0.9871, "step": 2693 }, { "epoch": 0.4619242557386887, "grad_norm": 1.734375, "learning_rate": 1.8922656830589955e-05, "loss": 1.1368, "step": 2694 }, { "epoch": 0.4620957198276786, "grad_norm": 1.6484375, "learning_rate": 1.8921841289121894e-05, "loss": 1.1031, "step": 2695 }, { "epoch": 0.46226718391666843, "grad_norm": 1.5859375, "learning_rate": 1.8921025456678988e-05, "loss": 0.9948, "step": 2696 }, { "epoch": 0.4624386480056583, "grad_norm": 1.609375, "learning_rate": 1.8920209333287854e-05, "loss": 0.9736, "step": 2697 }, { "epoch": 0.4626101120946482, "grad_norm": 1.609375, "learning_rate": 1.89193929189751e-05, "loss": 1.0552, "step": 2698 }, { "epoch": 0.462781576183638, "grad_norm": 1.6328125, "learning_rate": 1.8918576213767358e-05, "loss": 1.095, "step": 2699 }, { "epoch": 0.4629530402726279, "grad_norm": 1.6015625, "learning_rate": 1.891775921769126e-05, "loss": 0.9765, "step": 2700 }, { "epoch": 0.4631245043616178, "grad_norm": 1.6484375, "learning_rate": 1.8916941930773457e-05, "loss": 1.0, "step": 2701 }, { "epoch": 0.4632959684506076, "grad_norm": 1.6328125, "learning_rate": 1.8916124353040594e-05, "loss": 1.013, "step": 2702 }, { "epoch": 0.4634674325395975, "grad_norm": 1.6328125, "learning_rate": 1.8915306484519344e-05, "loss": 1.1182, "step": 2703 }, { "epoch": 0.46363889662858737, "grad_norm": 1.6328125, "learning_rate": 1.8914488325236373e-05, "loss": 1.0811, "step": 2704 }, { "epoch": 0.4638103607175772, "grad_norm": 1.6640625, "learning_rate": 1.8913669875218375e-05, "loss": 1.0969, "step": 2705 }, { "epoch": 0.4639818248065671, "grad_norm": 1.546875, "learning_rate": 1.8912851134492033e-05, "loss": 1.0535, "step": 2706 }, { "epoch": 0.46415328889555696, "grad_norm": 1.5703125, "learning_rate": 1.8912032103084054e-05, "loss": 1.0302, "step": 2707 }, { "epoch": 0.4643247529845468, "grad_norm": 1.609375, "learning_rate": 1.8911212781021148e-05, "loss": 1.0506, "step": 2708 }, { "epoch": 0.46449621707353667, "grad_norm": 1.6015625, "learning_rate": 1.8910393168330036e-05, "loss": 1.0178, "step": 2709 }, { "epoch": 0.46466768116252655, "grad_norm": 1.6015625, "learning_rate": 1.8909573265037448e-05, "loss": 1.0261, "step": 2710 }, { "epoch": 0.4648391452515164, "grad_norm": 1.5625, "learning_rate": 1.8908753071170126e-05, "loss": 1.0288, "step": 2711 }, { "epoch": 0.46501060934050625, "grad_norm": 1.7109375, "learning_rate": 1.8907932586754823e-05, "loss": 1.0356, "step": 2712 }, { "epoch": 0.46518207342949613, "grad_norm": 1.671875, "learning_rate": 1.8907111811818292e-05, "loss": 1.0415, "step": 2713 }, { "epoch": 0.46535353751848596, "grad_norm": 1.578125, "learning_rate": 1.89062907463873e-05, "loss": 0.9969, "step": 2714 }, { "epoch": 0.46552500160747584, "grad_norm": 1.65625, "learning_rate": 1.8905469390488635e-05, "loss": 1.0581, "step": 2715 }, { "epoch": 0.4656964656964657, "grad_norm": 1.609375, "learning_rate": 1.8904647744149077e-05, "loss": 0.9877, "step": 2716 }, { "epoch": 0.46586792978545555, "grad_norm": 1.6875, "learning_rate": 1.8903825807395422e-05, "loss": 0.997, "step": 2717 }, { "epoch": 0.46603939387444543, "grad_norm": 1.5390625, "learning_rate": 1.890300358025448e-05, "loss": 1.0222, "step": 2718 }, { "epoch": 0.4662108579634353, "grad_norm": 1.59375, "learning_rate": 1.8902181062753064e-05, "loss": 1.0158, "step": 2719 }, { "epoch": 0.46638232205242514, "grad_norm": 1.609375, "learning_rate": 1.8901358254918005e-05, "loss": 0.9048, "step": 2720 }, { "epoch": 0.466553786141415, "grad_norm": 1.6953125, "learning_rate": 1.8900535156776128e-05, "loss": 1.176, "step": 2721 }, { "epoch": 0.46672525023040484, "grad_norm": 1.5546875, "learning_rate": 1.8899711768354288e-05, "loss": 1.059, "step": 2722 }, { "epoch": 0.4668967143193947, "grad_norm": 1.59375, "learning_rate": 1.8898888089679332e-05, "loss": 0.9411, "step": 2723 }, { "epoch": 0.4670681784083846, "grad_norm": 1.6875, "learning_rate": 1.8898064120778126e-05, "loss": 0.944, "step": 2724 }, { "epoch": 0.46723964249737443, "grad_norm": 1.5625, "learning_rate": 1.889723986167754e-05, "loss": 1.0461, "step": 2725 }, { "epoch": 0.4674111065863643, "grad_norm": 1.59375, "learning_rate": 1.889641531240446e-05, "loss": 1.095, "step": 2726 }, { "epoch": 0.4675825706753542, "grad_norm": 1.640625, "learning_rate": 1.8895590472985775e-05, "loss": 1.0206, "step": 2727 }, { "epoch": 0.467754034764344, "grad_norm": 1.6015625, "learning_rate": 1.889476534344839e-05, "loss": 1.0456, "step": 2728 }, { "epoch": 0.4679254988533339, "grad_norm": 1.5703125, "learning_rate": 1.889393992381921e-05, "loss": 1.0462, "step": 2729 }, { "epoch": 0.4680969629423238, "grad_norm": 1.5546875, "learning_rate": 1.8893114214125154e-05, "loss": 1.0204, "step": 2730 }, { "epoch": 0.4682684270313136, "grad_norm": 1.734375, "learning_rate": 1.889228821439316e-05, "loss": 1.047, "step": 2731 }, { "epoch": 0.4684398911203035, "grad_norm": 1.5703125, "learning_rate": 1.8891461924650165e-05, "loss": 0.9852, "step": 2732 }, { "epoch": 0.46861135520929337, "grad_norm": 1.6328125, "learning_rate": 1.8890635344923106e-05, "loss": 1.0183, "step": 2733 }, { "epoch": 0.4687828192982832, "grad_norm": 1.875, "learning_rate": 1.8889808475238957e-05, "loss": 1.0666, "step": 2734 }, { "epoch": 0.4689542833872731, "grad_norm": 1.578125, "learning_rate": 1.8888981315624672e-05, "loss": 0.9941, "step": 2735 }, { "epoch": 0.46912574747626296, "grad_norm": 1.5859375, "learning_rate": 1.8888153866107236e-05, "loss": 1.0518, "step": 2736 }, { "epoch": 0.4692972115652528, "grad_norm": 1.609375, "learning_rate": 1.888732612671363e-05, "loss": 1.0533, "step": 2737 }, { "epoch": 0.46946867565424266, "grad_norm": 1.609375, "learning_rate": 1.888649809747086e-05, "loss": 1.0669, "step": 2738 }, { "epoch": 0.46964013974323254, "grad_norm": 1.6484375, "learning_rate": 1.8885669778405917e-05, "loss": 1.0136, "step": 2739 }, { "epoch": 0.46981160383222237, "grad_norm": 1.5390625, "learning_rate": 1.8884841169545826e-05, "loss": 1.0767, "step": 2740 }, { "epoch": 0.46998306792121225, "grad_norm": 1.65625, "learning_rate": 1.8884012270917603e-05, "loss": 0.9982, "step": 2741 }, { "epoch": 0.47015453201020213, "grad_norm": 1.5703125, "learning_rate": 1.888318308254829e-05, "loss": 1.0528, "step": 2742 }, { "epoch": 0.47032599609919196, "grad_norm": 1.5859375, "learning_rate": 1.8882353604464923e-05, "loss": 1.0539, "step": 2743 }, { "epoch": 0.47049746018818184, "grad_norm": 1.71875, "learning_rate": 1.8881523836694556e-05, "loss": 1.0387, "step": 2744 }, { "epoch": 0.4706689242771717, "grad_norm": 1.5546875, "learning_rate": 1.8880693779264255e-05, "loss": 0.991, "step": 2745 }, { "epoch": 0.47084038836616154, "grad_norm": 1.53125, "learning_rate": 1.8879863432201086e-05, "loss": 0.9775, "step": 2746 }, { "epoch": 0.4710118524551514, "grad_norm": 1.5703125, "learning_rate": 1.8879032795532132e-05, "loss": 0.9837, "step": 2747 }, { "epoch": 0.4711833165441413, "grad_norm": 1.578125, "learning_rate": 1.8878201869284483e-05, "loss": 0.9901, "step": 2748 }, { "epoch": 0.47135478063313113, "grad_norm": 1.7265625, "learning_rate": 1.8877370653485242e-05, "loss": 1.1065, "step": 2749 }, { "epoch": 0.471526244722121, "grad_norm": 1.5859375, "learning_rate": 1.8876539148161514e-05, "loss": 1.0289, "step": 2750 }, { "epoch": 0.4716977088111109, "grad_norm": 1.5703125, "learning_rate": 1.887570735334042e-05, "loss": 1.0634, "step": 2751 }, { "epoch": 0.4718691729001007, "grad_norm": 1.546875, "learning_rate": 1.887487526904908e-05, "loss": 0.9623, "step": 2752 }, { "epoch": 0.4720406369890906, "grad_norm": 1.671875, "learning_rate": 1.8874042895314643e-05, "loss": 1.0526, "step": 2753 }, { "epoch": 0.4722121010780805, "grad_norm": 1.6796875, "learning_rate": 1.8873210232164248e-05, "loss": 1.0761, "step": 2754 }, { "epoch": 0.4723835651670703, "grad_norm": 1.6484375, "learning_rate": 1.8872377279625057e-05, "loss": 1.0276, "step": 2755 }, { "epoch": 0.4725550292560602, "grad_norm": 1.578125, "learning_rate": 1.887154403772423e-05, "loss": 0.9716, "step": 2756 }, { "epoch": 0.47272649334505007, "grad_norm": 1.5390625, "learning_rate": 1.8870710506488944e-05, "loss": 1.0427, "step": 2757 }, { "epoch": 0.4728979574340399, "grad_norm": 1.578125, "learning_rate": 1.8869876685946388e-05, "loss": 1.0066, "step": 2758 }, { "epoch": 0.4730694215230298, "grad_norm": 1.5625, "learning_rate": 1.886904257612375e-05, "loss": 1.0671, "step": 2759 }, { "epoch": 0.47324088561201966, "grad_norm": 1.6015625, "learning_rate": 1.8868208177048238e-05, "loss": 0.9966, "step": 2760 }, { "epoch": 0.4734123497010095, "grad_norm": 1.6484375, "learning_rate": 1.8867373488747058e-05, "loss": 1.091, "step": 2761 }, { "epoch": 0.47358381378999936, "grad_norm": 1.5546875, "learning_rate": 1.886653851124744e-05, "loss": 1.0605, "step": 2762 }, { "epoch": 0.47375527787898924, "grad_norm": 1.578125, "learning_rate": 1.8865703244576613e-05, "loss": 0.9792, "step": 2763 }, { "epoch": 0.47392674196797907, "grad_norm": 1.6171875, "learning_rate": 1.8864867688761818e-05, "loss": 1.1191, "step": 2764 }, { "epoch": 0.47409820605696895, "grad_norm": 1.59375, "learning_rate": 1.8864031843830305e-05, "loss": 1.0338, "step": 2765 }, { "epoch": 0.47426967014595883, "grad_norm": 1.578125, "learning_rate": 1.8863195709809336e-05, "loss": 1.0051, "step": 2766 }, { "epoch": 0.47444113423494866, "grad_norm": 1.6015625, "learning_rate": 1.886235928672618e-05, "loss": 0.9636, "step": 2767 }, { "epoch": 0.47461259832393854, "grad_norm": 1.6484375, "learning_rate": 1.8861522574608113e-05, "loss": 1.0052, "step": 2768 }, { "epoch": 0.47478406241292836, "grad_norm": 1.5859375, "learning_rate": 1.8860685573482427e-05, "loss": 1.0447, "step": 2769 }, { "epoch": 0.47495552650191825, "grad_norm": 1.65625, "learning_rate": 1.8859848283376417e-05, "loss": 1.098, "step": 2770 }, { "epoch": 0.4751269905909081, "grad_norm": 1.703125, "learning_rate": 1.885901070431739e-05, "loss": 1.0925, "step": 2771 }, { "epoch": 0.47529845467989795, "grad_norm": 1.6015625, "learning_rate": 1.8858172836332667e-05, "loss": 0.9781, "step": 2772 }, { "epoch": 0.47546991876888783, "grad_norm": 1.5859375, "learning_rate": 1.885733467944957e-05, "loss": 1.0624, "step": 2773 }, { "epoch": 0.4756413828578777, "grad_norm": 1.6171875, "learning_rate": 1.8856496233695435e-05, "loss": 1.097, "step": 2774 }, { "epoch": 0.47581284694686754, "grad_norm": 1.6484375, "learning_rate": 1.885565749909761e-05, "loss": 1.0228, "step": 2775 }, { "epoch": 0.4759843110358574, "grad_norm": 1.59375, "learning_rate": 1.885481847568344e-05, "loss": 1.0667, "step": 2776 }, { "epoch": 0.4761557751248473, "grad_norm": 1.5703125, "learning_rate": 1.8853979163480302e-05, "loss": 0.9551, "step": 2777 }, { "epoch": 0.47632723921383713, "grad_norm": 1.6484375, "learning_rate": 1.885313956251556e-05, "loss": 1.0382, "step": 2778 }, { "epoch": 0.476498703302827, "grad_norm": 1.6484375, "learning_rate": 1.8852299672816596e-05, "loss": 1.0474, "step": 2779 }, { "epoch": 0.4766701673918169, "grad_norm": 1.6484375, "learning_rate": 1.885145949441081e-05, "loss": 0.9978, "step": 2780 }, { "epoch": 0.4768416314808067, "grad_norm": 1.5546875, "learning_rate": 1.8850619027325595e-05, "loss": 0.9717, "step": 2781 }, { "epoch": 0.4770130955697966, "grad_norm": 1.625, "learning_rate": 1.884977827158837e-05, "loss": 1.0869, "step": 2782 }, { "epoch": 0.4771845596587865, "grad_norm": 1.578125, "learning_rate": 1.8848937227226542e-05, "loss": 1.065, "step": 2783 }, { "epoch": 0.4773560237477763, "grad_norm": 1.765625, "learning_rate": 1.8848095894267556e-05, "loss": 1.1608, "step": 2784 }, { "epoch": 0.4775274878367662, "grad_norm": 1.7265625, "learning_rate": 1.884725427273884e-05, "loss": 1.1015, "step": 2785 }, { "epoch": 0.47769895192575607, "grad_norm": 1.640625, "learning_rate": 1.884641236266785e-05, "loss": 1.0337, "step": 2786 }, { "epoch": 0.4778704160147459, "grad_norm": 1.515625, "learning_rate": 1.8845570164082038e-05, "loss": 0.9727, "step": 2787 }, { "epoch": 0.4780418801037358, "grad_norm": 1.5859375, "learning_rate": 1.8844727677008877e-05, "loss": 1.0569, "step": 2788 }, { "epoch": 0.47821334419272565, "grad_norm": 1.59375, "learning_rate": 1.8843884901475835e-05, "loss": 1.0511, "step": 2789 }, { "epoch": 0.4783848082817155, "grad_norm": 1.6796875, "learning_rate": 1.8843041837510408e-05, "loss": 1.0175, "step": 2790 }, { "epoch": 0.47855627237070536, "grad_norm": 1.7265625, "learning_rate": 1.8842198485140084e-05, "loss": 1.0046, "step": 2791 }, { "epoch": 0.47872773645969524, "grad_norm": 1.703125, "learning_rate": 1.884135484439237e-05, "loss": 1.0533, "step": 2792 }, { "epoch": 0.47889920054868507, "grad_norm": 1.7109375, "learning_rate": 1.8840510915294785e-05, "loss": 1.0449, "step": 2793 }, { "epoch": 0.47907066463767495, "grad_norm": 1.53125, "learning_rate": 1.8839666697874845e-05, "loss": 0.9818, "step": 2794 }, { "epoch": 0.47924212872666483, "grad_norm": 1.5625, "learning_rate": 1.883882219216009e-05, "loss": 1.0963, "step": 2795 }, { "epoch": 0.47941359281565465, "grad_norm": 1.515625, "learning_rate": 1.8837977398178058e-05, "loss": 1.0716, "step": 2796 }, { "epoch": 0.47958505690464454, "grad_norm": 1.6328125, "learning_rate": 1.88371323159563e-05, "loss": 0.9346, "step": 2797 }, { "epoch": 0.4797565209936344, "grad_norm": 1.546875, "learning_rate": 1.8836286945522384e-05, "loss": 1.0463, "step": 2798 }, { "epoch": 0.47992798508262424, "grad_norm": 1.53125, "learning_rate": 1.8835441286903874e-05, "loss": 1.0739, "step": 2799 }, { "epoch": 0.4800994491716141, "grad_norm": 1.5078125, "learning_rate": 1.883459534012835e-05, "loss": 0.9795, "step": 2800 }, { "epoch": 0.4800994491716141, "eval_loss": 0.8867102265357971, "eval_runtime": 837.1871, "eval_samples_per_second": 2.985, "eval_steps_per_second": 2.985, "step": 2800 }, { "epoch": 0.480270913260604, "grad_norm": 1.578125, "learning_rate": 1.8833749105223406e-05, "loss": 1.0323, "step": 2801 }, { "epoch": 0.48044237734959383, "grad_norm": 1.6171875, "learning_rate": 1.883290258221664e-05, "loss": 1.1122, "step": 2802 }, { "epoch": 0.4806138414385837, "grad_norm": 1.640625, "learning_rate": 1.8832055771135657e-05, "loss": 1.1051, "step": 2803 }, { "epoch": 0.4807853055275736, "grad_norm": 1.546875, "learning_rate": 1.8831208672008082e-05, "loss": 1.0378, "step": 2804 }, { "epoch": 0.4809567696165634, "grad_norm": 1.59375, "learning_rate": 1.8830361284861532e-05, "loss": 1.0658, "step": 2805 }, { "epoch": 0.4811282337055533, "grad_norm": 1.640625, "learning_rate": 1.882951360972365e-05, "loss": 1.0673, "step": 2806 }, { "epoch": 0.4812996977945432, "grad_norm": 1.5703125, "learning_rate": 1.882866564662208e-05, "loss": 0.9835, "step": 2807 }, { "epoch": 0.481471161883533, "grad_norm": 1.5703125, "learning_rate": 1.8827817395584474e-05, "loss": 1.0338, "step": 2808 }, { "epoch": 0.4816426259725229, "grad_norm": 1.6953125, "learning_rate": 1.8826968856638504e-05, "loss": 1.0203, "step": 2809 }, { "epoch": 0.48181409006151277, "grad_norm": 1.7265625, "learning_rate": 1.8826120029811844e-05, "loss": 1.0846, "step": 2810 }, { "epoch": 0.4819855541505026, "grad_norm": 1.609375, "learning_rate": 1.882527091513217e-05, "loss": 1.1038, "step": 2811 }, { "epoch": 0.4821570182394925, "grad_norm": 1.46875, "learning_rate": 1.882442151262718e-05, "loss": 0.9386, "step": 2812 }, { "epoch": 0.48232848232848236, "grad_norm": 1.546875, "learning_rate": 1.882357182232457e-05, "loss": 1.0621, "step": 2813 }, { "epoch": 0.4824999464174722, "grad_norm": 1.59375, "learning_rate": 1.882272184425206e-05, "loss": 1.088, "step": 2814 }, { "epoch": 0.48267141050646206, "grad_norm": 1.6640625, "learning_rate": 1.8821871578437367e-05, "loss": 1.0637, "step": 2815 }, { "epoch": 0.4828428745954519, "grad_norm": 1.578125, "learning_rate": 1.8821021024908223e-05, "loss": 1.0955, "step": 2816 }, { "epoch": 0.48301433868444177, "grad_norm": 1.5625, "learning_rate": 1.8820170183692364e-05, "loss": 0.9826, "step": 2817 }, { "epoch": 0.48318580277343165, "grad_norm": 1.6015625, "learning_rate": 1.8819319054817543e-05, "loss": 1.0452, "step": 2818 }, { "epoch": 0.4833572668624215, "grad_norm": 1.640625, "learning_rate": 1.8818467638311516e-05, "loss": 1.0884, "step": 2819 }, { "epoch": 0.48352873095141136, "grad_norm": 1.6015625, "learning_rate": 1.8817615934202055e-05, "loss": 1.1022, "step": 2820 }, { "epoch": 0.48370019504040124, "grad_norm": 1.5546875, "learning_rate": 1.881676394251693e-05, "loss": 1.0365, "step": 2821 }, { "epoch": 0.48387165912939106, "grad_norm": 1.515625, "learning_rate": 1.8815911663283936e-05, "loss": 0.9715, "step": 2822 }, { "epoch": 0.48404312321838094, "grad_norm": 1.578125, "learning_rate": 1.8815059096530863e-05, "loss": 1.056, "step": 2823 }, { "epoch": 0.4842145873073708, "grad_norm": 1.5546875, "learning_rate": 1.881420624228552e-05, "loss": 1.0723, "step": 2824 }, { "epoch": 0.48438605139636065, "grad_norm": 1.6484375, "learning_rate": 1.8813353100575716e-05, "loss": 0.942, "step": 2825 }, { "epoch": 0.48455751548535053, "grad_norm": 1.5234375, "learning_rate": 1.8812499671429286e-05, "loss": 1.0086, "step": 2826 }, { "epoch": 0.4847289795743404, "grad_norm": 1.6796875, "learning_rate": 1.8811645954874053e-05, "loss": 1.0392, "step": 2827 }, { "epoch": 0.48490044366333024, "grad_norm": 1.4921875, "learning_rate": 1.8810791950937864e-05, "loss": 1.002, "step": 2828 }, { "epoch": 0.4850719077523201, "grad_norm": 1.5546875, "learning_rate": 1.880993765964857e-05, "loss": 0.9703, "step": 2829 }, { "epoch": 0.48524337184131, "grad_norm": 1.640625, "learning_rate": 1.8809083081034035e-05, "loss": 1.1082, "step": 2830 }, { "epoch": 0.4854148359302998, "grad_norm": 1.640625, "learning_rate": 1.8808228215122127e-05, "loss": 0.9924, "step": 2831 }, { "epoch": 0.4855863000192897, "grad_norm": 2.03125, "learning_rate": 1.880737306194073e-05, "loss": 1.0646, "step": 2832 }, { "epoch": 0.4857577641082796, "grad_norm": 1.5625, "learning_rate": 1.8806517621517733e-05, "loss": 1.0148, "step": 2833 }, { "epoch": 0.4859292281972694, "grad_norm": 1.6484375, "learning_rate": 1.880566189388103e-05, "loss": 0.9831, "step": 2834 }, { "epoch": 0.4861006922862593, "grad_norm": 1.5078125, "learning_rate": 1.8804805879058538e-05, "loss": 0.9783, "step": 2835 }, { "epoch": 0.4862721563752492, "grad_norm": 1.546875, "learning_rate": 1.8803949577078172e-05, "loss": 0.9806, "step": 2836 }, { "epoch": 0.486443620464239, "grad_norm": 1.6171875, "learning_rate": 1.8803092987967853e-05, "loss": 1.0607, "step": 2837 }, { "epoch": 0.4866150845532289, "grad_norm": 1.5546875, "learning_rate": 1.8802236111755524e-05, "loss": 0.9994, "step": 2838 }, { "epoch": 0.48678654864221876, "grad_norm": 1.6328125, "learning_rate": 1.880137894846913e-05, "loss": 1.053, "step": 2839 }, { "epoch": 0.4869580127312086, "grad_norm": 1.65625, "learning_rate": 1.8800521498136622e-05, "loss": 1.0909, "step": 2840 }, { "epoch": 0.48712947682019847, "grad_norm": 1.6484375, "learning_rate": 1.8799663760785973e-05, "loss": 1.0357, "step": 2841 }, { "epoch": 0.48730094090918835, "grad_norm": 1.609375, "learning_rate": 1.8798805736445153e-05, "loss": 1.0848, "step": 2842 }, { "epoch": 0.4874724049981782, "grad_norm": 1.578125, "learning_rate": 1.8797947425142137e-05, "loss": 1.0419, "step": 2843 }, { "epoch": 0.48764386908716806, "grad_norm": 1.6875, "learning_rate": 1.879708882690493e-05, "loss": 1.15, "step": 2844 }, { "epoch": 0.48781533317615794, "grad_norm": 1.546875, "learning_rate": 1.879622994176153e-05, "loss": 1.1013, "step": 2845 }, { "epoch": 0.48798679726514776, "grad_norm": 1.6875, "learning_rate": 1.879537076973995e-05, "loss": 1.0203, "step": 2846 }, { "epoch": 0.48815826135413765, "grad_norm": 1.640625, "learning_rate": 1.8794511310868208e-05, "loss": 0.9994, "step": 2847 }, { "epoch": 0.4883297254431275, "grad_norm": 1.6171875, "learning_rate": 1.8793651565174333e-05, "loss": 1.0928, "step": 2848 }, { "epoch": 0.48850118953211735, "grad_norm": 1.671875, "learning_rate": 1.879279153268637e-05, "loss": 1.0635, "step": 2849 }, { "epoch": 0.48867265362110723, "grad_norm": 1.65625, "learning_rate": 1.879193121343236e-05, "loss": 1.1594, "step": 2850 }, { "epoch": 0.4888441177100971, "grad_norm": 1.6640625, "learning_rate": 1.879107060744037e-05, "loss": 0.9951, "step": 2851 }, { "epoch": 0.48901558179908694, "grad_norm": 1.6328125, "learning_rate": 1.8790209714738462e-05, "loss": 1.0426, "step": 2852 }, { "epoch": 0.4891870458880768, "grad_norm": 1.578125, "learning_rate": 1.8789348535354714e-05, "loss": 1.0581, "step": 2853 }, { "epoch": 0.4893585099770667, "grad_norm": 1.6328125, "learning_rate": 1.8788487069317208e-05, "loss": 1.0374, "step": 2854 }, { "epoch": 0.48952997406605653, "grad_norm": 1.640625, "learning_rate": 1.878762531665405e-05, "loss": 1.0132, "step": 2855 }, { "epoch": 0.4897014381550464, "grad_norm": 1.625, "learning_rate": 1.878676327739334e-05, "loss": 1.0167, "step": 2856 }, { "epoch": 0.4898729022440363, "grad_norm": 1.6640625, "learning_rate": 1.878590095156319e-05, "loss": 1.0232, "step": 2857 }, { "epoch": 0.4900443663330261, "grad_norm": 1.546875, "learning_rate": 1.8785038339191723e-05, "loss": 1.0708, "step": 2858 }, { "epoch": 0.490215830422016, "grad_norm": 1.6015625, "learning_rate": 1.8784175440307076e-05, "loss": 1.0704, "step": 2859 }, { "epoch": 0.4903872945110059, "grad_norm": 1.640625, "learning_rate": 1.878331225493739e-05, "loss": 1.0848, "step": 2860 }, { "epoch": 0.4905587585999957, "grad_norm": 1.734375, "learning_rate": 1.878244878311082e-05, "loss": 1.0802, "step": 2861 }, { "epoch": 0.4907302226889856, "grad_norm": 1.5703125, "learning_rate": 1.8781585024855517e-05, "loss": 1.0438, "step": 2862 }, { "epoch": 0.4909016867779754, "grad_norm": 1.640625, "learning_rate": 1.878072098019966e-05, "loss": 0.9577, "step": 2863 }, { "epoch": 0.4910731508669653, "grad_norm": 1.6953125, "learning_rate": 1.8779856649171427e-05, "loss": 1.0269, "step": 2864 }, { "epoch": 0.49124461495595517, "grad_norm": 1.65625, "learning_rate": 1.877899203179901e-05, "loss": 0.978, "step": 2865 }, { "epoch": 0.491416079044945, "grad_norm": 1.7734375, "learning_rate": 1.8778127128110602e-05, "loss": 1.0305, "step": 2866 }, { "epoch": 0.4915875431339349, "grad_norm": 1.5234375, "learning_rate": 1.877726193813441e-05, "loss": 1.0869, "step": 2867 }, { "epoch": 0.49175900722292476, "grad_norm": 1.640625, "learning_rate": 1.877639646189866e-05, "loss": 1.0588, "step": 2868 }, { "epoch": 0.4919304713119146, "grad_norm": 1.5703125, "learning_rate": 1.8775530699431566e-05, "loss": 1.0311, "step": 2869 }, { "epoch": 0.49210193540090447, "grad_norm": 1.578125, "learning_rate": 1.8774664650761373e-05, "loss": 1.0879, "step": 2870 }, { "epoch": 0.49227339948989435, "grad_norm": 1.625, "learning_rate": 1.8773798315916324e-05, "loss": 1.0146, "step": 2871 }, { "epoch": 0.4924448635788842, "grad_norm": 1.609375, "learning_rate": 1.8772931694924677e-05, "loss": 1.0515, "step": 2872 }, { "epoch": 0.49261632766787405, "grad_norm": 1.59375, "learning_rate": 1.8772064787814686e-05, "loss": 1.0498, "step": 2873 }, { "epoch": 0.49278779175686394, "grad_norm": 1.6171875, "learning_rate": 1.877119759461463e-05, "loss": 0.9728, "step": 2874 }, { "epoch": 0.49295925584585376, "grad_norm": 1.609375, "learning_rate": 1.8770330115352797e-05, "loss": 1.0578, "step": 2875 }, { "epoch": 0.49313071993484364, "grad_norm": 1.6015625, "learning_rate": 1.8769462350057467e-05, "loss": 1.0311, "step": 2876 }, { "epoch": 0.4933021840238335, "grad_norm": 1.6484375, "learning_rate": 1.876859429875695e-05, "loss": 1.0546, "step": 2877 }, { "epoch": 0.49347364811282335, "grad_norm": 1.6875, "learning_rate": 1.8767725961479548e-05, "loss": 0.9694, "step": 2878 }, { "epoch": 0.49364511220181323, "grad_norm": 1.5390625, "learning_rate": 1.876685733825359e-05, "loss": 0.9932, "step": 2879 }, { "epoch": 0.4938165762908031, "grad_norm": 1.59375, "learning_rate": 1.87659884291074e-05, "loss": 1.0917, "step": 2880 }, { "epoch": 0.49398804037979294, "grad_norm": 1.6953125, "learning_rate": 1.876511923406932e-05, "loss": 1.0881, "step": 2881 }, { "epoch": 0.4941595044687828, "grad_norm": 1.609375, "learning_rate": 1.8764249753167693e-05, "loss": 1.0479, "step": 2882 }, { "epoch": 0.4943309685577727, "grad_norm": 1.609375, "learning_rate": 1.8763379986430884e-05, "loss": 1.1382, "step": 2883 }, { "epoch": 0.4945024326467625, "grad_norm": 1.6171875, "learning_rate": 1.8762509933887248e-05, "loss": 1.0526, "step": 2884 }, { "epoch": 0.4946738967357524, "grad_norm": 1.5078125, "learning_rate": 1.8761639595565166e-05, "loss": 0.9602, "step": 2885 }, { "epoch": 0.4948453608247423, "grad_norm": 1.7109375, "learning_rate": 1.876076897149303e-05, "loss": 0.9996, "step": 2886 }, { "epoch": 0.4950168249137321, "grad_norm": 1.59375, "learning_rate": 1.8759898061699223e-05, "loss": 1.0105, "step": 2887 }, { "epoch": 0.495188289002722, "grad_norm": 1.625, "learning_rate": 1.875902686621215e-05, "loss": 1.0099, "step": 2888 }, { "epoch": 0.4953597530917119, "grad_norm": 1.578125, "learning_rate": 1.8758155385060232e-05, "loss": 1.0506, "step": 2889 }, { "epoch": 0.4955312171807017, "grad_norm": 1.640625, "learning_rate": 1.8757283618271887e-05, "loss": 1.0378, "step": 2890 }, { "epoch": 0.4957026812696916, "grad_norm": 1.6640625, "learning_rate": 1.8756411565875545e-05, "loss": 1.152, "step": 2891 }, { "epoch": 0.49587414535868146, "grad_norm": 1.6328125, "learning_rate": 1.875553922789965e-05, "loss": 1.0665, "step": 2892 }, { "epoch": 0.4960456094476713, "grad_norm": 1.5390625, "learning_rate": 1.875466660437265e-05, "loss": 1.0447, "step": 2893 }, { "epoch": 0.49621707353666117, "grad_norm": 1.546875, "learning_rate": 1.8753793695323e-05, "loss": 0.9895, "step": 2894 }, { "epoch": 0.49638853762565105, "grad_norm": 1.5859375, "learning_rate": 1.875292050077918e-05, "loss": 0.9806, "step": 2895 }, { "epoch": 0.4965600017146409, "grad_norm": 1.65625, "learning_rate": 1.8752047020769663e-05, "loss": 1.0423, "step": 2896 }, { "epoch": 0.49673146580363076, "grad_norm": 1.8203125, "learning_rate": 1.875117325532293e-05, "loss": 1.0898, "step": 2897 }, { "epoch": 0.49690292989262064, "grad_norm": 1.625, "learning_rate": 1.8750299204467485e-05, "loss": 1.0848, "step": 2898 }, { "epoch": 0.49707439398161046, "grad_norm": 1.53125, "learning_rate": 1.8749424868231837e-05, "loss": 0.9389, "step": 2899 }, { "epoch": 0.49724585807060034, "grad_norm": 1.6953125, "learning_rate": 1.874855024664449e-05, "loss": 1.0633, "step": 2900 }, { "epoch": 0.4974173221595902, "grad_norm": 1.578125, "learning_rate": 1.874767533973398e-05, "loss": 1.0126, "step": 2901 }, { "epoch": 0.49758878624858005, "grad_norm": 1.6328125, "learning_rate": 1.874680014752883e-05, "loss": 1.0707, "step": 2902 }, { "epoch": 0.49776025033756993, "grad_norm": 1.7578125, "learning_rate": 1.87459246700576e-05, "loss": 1.0622, "step": 2903 }, { "epoch": 0.4979317144265598, "grad_norm": 1.6484375, "learning_rate": 1.8745048907348824e-05, "loss": 1.0176, "step": 2904 }, { "epoch": 0.49810317851554964, "grad_norm": 1.5625, "learning_rate": 1.874417285943108e-05, "loss": 1.0011, "step": 2905 }, { "epoch": 0.4982746426045395, "grad_norm": 1.6015625, "learning_rate": 1.8743296526332924e-05, "loss": 1.0018, "step": 2906 }, { "epoch": 0.4984461066935294, "grad_norm": 1.59375, "learning_rate": 1.8742419908082946e-05, "loss": 1.0249, "step": 2907 }, { "epoch": 0.4986175707825192, "grad_norm": 1.5, "learning_rate": 1.8741543004709735e-05, "loss": 0.851, "step": 2908 }, { "epoch": 0.4987890348715091, "grad_norm": 1.6015625, "learning_rate": 1.874066581624189e-05, "loss": 1.0617, "step": 2909 }, { "epoch": 0.498960498960499, "grad_norm": 1.6875, "learning_rate": 1.8739788342708016e-05, "loss": 0.9858, "step": 2910 }, { "epoch": 0.4991319630494888, "grad_norm": 1.546875, "learning_rate": 1.8738910584136735e-05, "loss": 1.0821, "step": 2911 }, { "epoch": 0.4993034271384787, "grad_norm": 1.5625, "learning_rate": 1.873803254055667e-05, "loss": 1.0519, "step": 2912 }, { "epoch": 0.4994748912274685, "grad_norm": 1.515625, "learning_rate": 1.873715421199646e-05, "loss": 0.9833, "step": 2913 }, { "epoch": 0.4996463553164584, "grad_norm": 1.609375, "learning_rate": 1.8736275598484753e-05, "loss": 1.0917, "step": 2914 }, { "epoch": 0.4998178194054483, "grad_norm": 1.5546875, "learning_rate": 1.8735396700050202e-05, "loss": 0.992, "step": 2915 }, { "epoch": 0.4999892834944381, "grad_norm": 1.546875, "learning_rate": 1.8734517516721467e-05, "loss": 0.9666, "step": 2916 }, { "epoch": 0.500160747583428, "grad_norm": 1.5546875, "learning_rate": 1.8733638048527223e-05, "loss": 1.0, "step": 2917 }, { "epoch": 0.5003322116724178, "grad_norm": 1.5546875, "learning_rate": 1.8732758295496158e-05, "loss": 0.9783, "step": 2918 }, { "epoch": 0.5005036757614078, "grad_norm": 1.640625, "learning_rate": 1.8731878257656956e-05, "loss": 1.1255, "step": 2919 }, { "epoch": 0.5006751398503976, "grad_norm": 1.4921875, "learning_rate": 1.8730997935038328e-05, "loss": 1.0204, "step": 2920 }, { "epoch": 0.5008466039393874, "grad_norm": 1.5703125, "learning_rate": 1.8730117327668975e-05, "loss": 0.9479, "step": 2921 }, { "epoch": 0.5010180680283773, "grad_norm": 1.625, "learning_rate": 1.8729236435577625e-05, "loss": 0.9952, "step": 2922 }, { "epoch": 0.5011895321173672, "grad_norm": 1.609375, "learning_rate": 1.8728355258793e-05, "loss": 1.0075, "step": 2923 }, { "epoch": 0.501360996206357, "grad_norm": 1.6796875, "learning_rate": 1.8727473797343846e-05, "loss": 1.0614, "step": 2924 }, { "epoch": 0.5015324602953469, "grad_norm": 1.640625, "learning_rate": 1.8726592051258906e-05, "loss": 0.9874, "step": 2925 }, { "epoch": 0.5017039243843368, "grad_norm": 1.625, "learning_rate": 1.8725710020566936e-05, "loss": 1.0229, "step": 2926 }, { "epoch": 0.5018753884733266, "grad_norm": 1.5703125, "learning_rate": 1.8724827705296706e-05, "loss": 1.0575, "step": 2927 }, { "epoch": 0.5020468525623165, "grad_norm": 1.6328125, "learning_rate": 1.872394510547699e-05, "loss": 1.0114, "step": 2928 }, { "epoch": 0.5022183166513063, "grad_norm": 1.6796875, "learning_rate": 1.872306222113657e-05, "loss": 1.1254, "step": 2929 }, { "epoch": 0.5023897807402962, "grad_norm": 1.59375, "learning_rate": 1.8722179052304245e-05, "loss": 1.0574, "step": 2930 }, { "epoch": 0.5025612448292861, "grad_norm": 1.625, "learning_rate": 1.8721295599008815e-05, "loss": 1.0549, "step": 2931 }, { "epoch": 0.5027327089182759, "grad_norm": 1.5078125, "learning_rate": 1.8720411861279094e-05, "loss": 0.9429, "step": 2932 }, { "epoch": 0.5029041730072658, "grad_norm": 1.5546875, "learning_rate": 1.8719527839143906e-05, "loss": 0.9691, "step": 2933 }, { "epoch": 0.5030756370962557, "grad_norm": 1.5859375, "learning_rate": 1.8718643532632083e-05, "loss": 1.0602, "step": 2934 }, { "epoch": 0.5032471011852455, "grad_norm": 1.6484375, "learning_rate": 1.8717758941772458e-05, "loss": 1.0318, "step": 2935 }, { "epoch": 0.5034185652742353, "grad_norm": 1.6484375, "learning_rate": 1.8716874066593885e-05, "loss": 0.9521, "step": 2936 }, { "epoch": 0.5035900293632253, "grad_norm": 1.609375, "learning_rate": 1.871598890712523e-05, "loss": 1.0152, "step": 2937 }, { "epoch": 0.5037614934522151, "grad_norm": 1.6171875, "learning_rate": 1.8715103463395352e-05, "loss": 1.0642, "step": 2938 }, { "epoch": 0.5039329575412049, "grad_norm": 1.65625, "learning_rate": 1.8714217735433132e-05, "loss": 1.0613, "step": 2939 }, { "epoch": 0.5041044216301949, "grad_norm": 1.703125, "learning_rate": 1.871333172326746e-05, "loss": 1.142, "step": 2940 }, { "epoch": 0.5042758857191847, "grad_norm": 1.65625, "learning_rate": 1.8712445426927225e-05, "loss": 1.0557, "step": 2941 }, { "epoch": 0.5044473498081745, "grad_norm": 1.5859375, "learning_rate": 1.8711558846441336e-05, "loss": 1.0868, "step": 2942 }, { "epoch": 0.5046188138971645, "grad_norm": 1.671875, "learning_rate": 1.871067198183871e-05, "loss": 1.0208, "step": 2943 }, { "epoch": 0.5047902779861543, "grad_norm": 1.65625, "learning_rate": 1.870978483314827e-05, "loss": 1.069, "step": 2944 }, { "epoch": 0.5049617420751441, "grad_norm": 1.6875, "learning_rate": 1.870889740039895e-05, "loss": 1.0765, "step": 2945 }, { "epoch": 0.505133206164134, "grad_norm": 1.5859375, "learning_rate": 1.8708009683619684e-05, "loss": 0.9706, "step": 2946 }, { "epoch": 0.5053046702531239, "grad_norm": 1.65625, "learning_rate": 1.870712168283944e-05, "loss": 1.0681, "step": 2947 }, { "epoch": 0.5054761343421137, "grad_norm": 1.5703125, "learning_rate": 1.8706233398087166e-05, "loss": 0.9911, "step": 2948 }, { "epoch": 0.5056475984311036, "grad_norm": 1.7265625, "learning_rate": 1.8705344829391835e-05, "loss": 1.1497, "step": 2949 }, { "epoch": 0.5058190625200935, "grad_norm": 1.6328125, "learning_rate": 1.8704455976782427e-05, "loss": 0.9842, "step": 2950 }, { "epoch": 0.5059905266090833, "grad_norm": 1.6328125, "learning_rate": 1.8703566840287934e-05, "loss": 1.07, "step": 2951 }, { "epoch": 0.5061619906980732, "grad_norm": 1.5625, "learning_rate": 1.8702677419937353e-05, "loss": 0.9317, "step": 2952 }, { "epoch": 0.506333454787063, "grad_norm": 1.5859375, "learning_rate": 1.870178771575969e-05, "loss": 1.101, "step": 2953 }, { "epoch": 0.5065049188760529, "grad_norm": 1.59375, "learning_rate": 1.8700897727783957e-05, "loss": 1.1308, "step": 2954 }, { "epoch": 0.5066763829650428, "grad_norm": 1.5390625, "learning_rate": 1.8700007456039188e-05, "loss": 0.9863, "step": 2955 }, { "epoch": 0.5068478470540326, "grad_norm": 1.6796875, "learning_rate": 1.8699116900554414e-05, "loss": 1.0991, "step": 2956 }, { "epoch": 0.5070193111430225, "grad_norm": 1.53125, "learning_rate": 1.8698226061358685e-05, "loss": 1.126, "step": 2957 }, { "epoch": 0.5071907752320124, "grad_norm": 1.5390625, "learning_rate": 1.8697334938481044e-05, "loss": 1.0232, "step": 2958 }, { "epoch": 0.5073622393210022, "grad_norm": 1.65625, "learning_rate": 1.869644353195056e-05, "loss": 1.0008, "step": 2959 }, { "epoch": 0.507533703409992, "grad_norm": 1.6171875, "learning_rate": 1.8695551841796305e-05, "loss": 1.014, "step": 2960 }, { "epoch": 0.507705167498982, "grad_norm": 1.578125, "learning_rate": 1.869465986804736e-05, "loss": 0.9823, "step": 2961 }, { "epoch": 0.5078766315879718, "grad_norm": 1.5546875, "learning_rate": 1.8693767610732815e-05, "loss": 0.9592, "step": 2962 }, { "epoch": 0.5080480956769616, "grad_norm": 1.578125, "learning_rate": 1.8692875069881773e-05, "loss": 1.0477, "step": 2963 }, { "epoch": 0.5082195597659516, "grad_norm": 1.671875, "learning_rate": 1.8691982245523336e-05, "loss": 1.0331, "step": 2964 }, { "epoch": 0.5083910238549414, "grad_norm": 1.5859375, "learning_rate": 1.8691089137686633e-05, "loss": 0.9992, "step": 2965 }, { "epoch": 0.5085624879439312, "grad_norm": 1.5546875, "learning_rate": 1.869019574640078e-05, "loss": 1.0193, "step": 2966 }, { "epoch": 0.5087339520329212, "grad_norm": 1.5546875, "learning_rate": 1.8689302071694925e-05, "loss": 1.021, "step": 2967 }, { "epoch": 0.508905416121911, "grad_norm": 1.6640625, "learning_rate": 1.8688408113598205e-05, "loss": 1.0477, "step": 2968 }, { "epoch": 0.5090768802109008, "grad_norm": 1.546875, "learning_rate": 1.868751387213978e-05, "loss": 0.9475, "step": 2969 }, { "epoch": 0.5092483442998907, "grad_norm": 1.6015625, "learning_rate": 1.868661934734881e-05, "loss": 1.0664, "step": 2970 }, { "epoch": 0.5094198083888806, "grad_norm": 1.609375, "learning_rate": 1.8685724539254478e-05, "loss": 1.0631, "step": 2971 }, { "epoch": 0.5095912724778704, "grad_norm": 1.578125, "learning_rate": 1.8684829447885958e-05, "loss": 0.9959, "step": 2972 }, { "epoch": 0.5097627365668603, "grad_norm": 1.640625, "learning_rate": 1.868393407327245e-05, "loss": 1.026, "step": 2973 }, { "epoch": 0.5099342006558502, "grad_norm": 1.6015625, "learning_rate": 1.8683038415443143e-05, "loss": 0.9877, "step": 2974 }, { "epoch": 0.51010566474484, "grad_norm": 1.5703125, "learning_rate": 1.8682142474427264e-05, "loss": 1.0109, "step": 2975 }, { "epoch": 0.5102771288338299, "grad_norm": 1.671875, "learning_rate": 1.868124625025402e-05, "loss": 1.0571, "step": 2976 }, { "epoch": 0.5104485929228197, "grad_norm": 1.6328125, "learning_rate": 1.8680349742952648e-05, "loss": 1.0695, "step": 2977 }, { "epoch": 0.5106200570118096, "grad_norm": 1.5546875, "learning_rate": 1.867945295255238e-05, "loss": 1.0911, "step": 2978 }, { "epoch": 0.5107915211007995, "grad_norm": 1.53125, "learning_rate": 1.8678555879082473e-05, "loss": 1.0142, "step": 2979 }, { "epoch": 0.5109629851897893, "grad_norm": 1.6015625, "learning_rate": 1.8677658522572173e-05, "loss": 1.0067, "step": 2980 }, { "epoch": 0.5111344492787792, "grad_norm": 1.609375, "learning_rate": 1.8676760883050754e-05, "loss": 0.9649, "step": 2981 }, { "epoch": 0.511305913367769, "grad_norm": 1.5390625, "learning_rate": 1.867586296054749e-05, "loss": 1.0444, "step": 2982 }, { "epoch": 0.5114773774567589, "grad_norm": 1.578125, "learning_rate": 1.8674964755091663e-05, "loss": 0.9835, "step": 2983 }, { "epoch": 0.5116488415457487, "grad_norm": 1.5546875, "learning_rate": 1.8674066266712567e-05, "loss": 1.0934, "step": 2984 }, { "epoch": 0.5118203056347386, "grad_norm": 1.5703125, "learning_rate": 1.8673167495439507e-05, "loss": 1.061, "step": 2985 }, { "epoch": 0.5119917697237285, "grad_norm": 1.59375, "learning_rate": 1.8672268441301797e-05, "loss": 1.0825, "step": 2986 }, { "epoch": 0.5121632338127183, "grad_norm": 1.640625, "learning_rate": 1.8671369104328757e-05, "loss": 1.0551, "step": 2987 }, { "epoch": 0.5123346979017082, "grad_norm": 1.5625, "learning_rate": 1.8670469484549716e-05, "loss": 1.0457, "step": 2988 }, { "epoch": 0.5125061619906981, "grad_norm": 1.578125, "learning_rate": 1.8669569581994014e-05, "loss": 1.0619, "step": 2989 }, { "epoch": 0.5126776260796879, "grad_norm": 1.640625, "learning_rate": 1.8668669396691003e-05, "loss": 1.0387, "step": 2990 }, { "epoch": 0.5128490901686777, "grad_norm": 1.546875, "learning_rate": 1.8667768928670037e-05, "loss": 0.9911, "step": 2991 }, { "epoch": 0.5130205542576677, "grad_norm": 1.6875, "learning_rate": 1.8666868177960492e-05, "loss": 1.0664, "step": 2992 }, { "epoch": 0.5131920183466575, "grad_norm": 1.5859375, "learning_rate": 1.8665967144591733e-05, "loss": 1.076, "step": 2993 }, { "epoch": 0.5133634824356473, "grad_norm": 1.609375, "learning_rate": 1.8665065828593155e-05, "loss": 1.0758, "step": 2994 }, { "epoch": 0.5135349465246373, "grad_norm": 1.609375, "learning_rate": 1.8664164229994153e-05, "loss": 1.1051, "step": 2995 }, { "epoch": 0.5137064106136271, "grad_norm": 1.6328125, "learning_rate": 1.8663262348824127e-05, "loss": 1.0632, "step": 2996 }, { "epoch": 0.5138778747026169, "grad_norm": 1.609375, "learning_rate": 1.8662360185112495e-05, "loss": 1.0605, "step": 2997 }, { "epoch": 0.5140493387916069, "grad_norm": 1.53125, "learning_rate": 1.8661457738888673e-05, "loss": 1.0795, "step": 2998 }, { "epoch": 0.5142208028805967, "grad_norm": 1.6328125, "learning_rate": 1.86605550101821e-05, "loss": 0.9883, "step": 2999 }, { "epoch": 0.5143922669695865, "grad_norm": 1.5546875, "learning_rate": 1.8659651999022218e-05, "loss": 1.006, "step": 3000 }, { "epoch": 0.5145637310585764, "grad_norm": 1.59375, "learning_rate": 1.8658748705438474e-05, "loss": 1.0278, "step": 3001 }, { "epoch": 0.5147351951475663, "grad_norm": 1.5546875, "learning_rate": 1.865784512946033e-05, "loss": 0.9796, "step": 3002 }, { "epoch": 0.5149066592365561, "grad_norm": 1.5625, "learning_rate": 1.8656941271117252e-05, "loss": 0.9706, "step": 3003 }, { "epoch": 0.515078123325546, "grad_norm": 1.5625, "learning_rate": 1.865603713043872e-05, "loss": 0.9346, "step": 3004 }, { "epoch": 0.5152495874145359, "grad_norm": 1.609375, "learning_rate": 1.865513270745422e-05, "loss": 1.0387, "step": 3005 }, { "epoch": 0.5154210515035257, "grad_norm": 1.7109375, "learning_rate": 1.8654228002193255e-05, "loss": 1.0278, "step": 3006 }, { "epoch": 0.5155925155925156, "grad_norm": 1.609375, "learning_rate": 1.865332301468532e-05, "loss": 1.0926, "step": 3007 }, { "epoch": 0.5157639796815054, "grad_norm": 1.59375, "learning_rate": 1.8652417744959942e-05, "loss": 1.0507, "step": 3008 }, { "epoch": 0.5159354437704953, "grad_norm": 1.59375, "learning_rate": 1.865151219304664e-05, "loss": 1.0022, "step": 3009 }, { "epoch": 0.5161069078594852, "grad_norm": 1.5390625, "learning_rate": 1.8650606358974942e-05, "loss": 1.0055, "step": 3010 }, { "epoch": 0.516278371948475, "grad_norm": 1.625, "learning_rate": 1.8649700242774397e-05, "loss": 1.0461, "step": 3011 }, { "epoch": 0.5164498360374649, "grad_norm": 1.5859375, "learning_rate": 1.8648793844474556e-05, "loss": 1.0554, "step": 3012 }, { "epoch": 0.5166213001264548, "grad_norm": 1.6328125, "learning_rate": 1.8647887164104976e-05, "loss": 1.0602, "step": 3013 }, { "epoch": 0.5167927642154446, "grad_norm": 1.6328125, "learning_rate": 1.8646980201695236e-05, "loss": 1.0584, "step": 3014 }, { "epoch": 0.5169642283044344, "grad_norm": 1.625, "learning_rate": 1.8646072957274906e-05, "loss": 1.0742, "step": 3015 }, { "epoch": 0.5171356923934244, "grad_norm": 1.59375, "learning_rate": 1.8645165430873578e-05, "loss": 1.0544, "step": 3016 }, { "epoch": 0.5173071564824142, "grad_norm": 1.671875, "learning_rate": 1.8644257622520857e-05, "loss": 1.0175, "step": 3017 }, { "epoch": 0.517478620571404, "grad_norm": 1.6640625, "learning_rate": 1.864334953224634e-05, "loss": 0.9962, "step": 3018 }, { "epoch": 0.517650084660394, "grad_norm": 1.5703125, "learning_rate": 1.8642441160079644e-05, "loss": 0.9973, "step": 3019 }, { "epoch": 0.5178215487493838, "grad_norm": 1.6328125, "learning_rate": 1.86415325060504e-05, "loss": 1.0568, "step": 3020 }, { "epoch": 0.5179930128383736, "grad_norm": 1.5390625, "learning_rate": 1.864062357018824e-05, "loss": 0.9947, "step": 3021 }, { "epoch": 0.5181644769273636, "grad_norm": 1.5703125, "learning_rate": 1.863971435252281e-05, "loss": 0.9692, "step": 3022 }, { "epoch": 0.5183359410163534, "grad_norm": 1.59375, "learning_rate": 1.8638804853083757e-05, "loss": 1.0223, "step": 3023 }, { "epoch": 0.5185074051053432, "grad_norm": 1.5234375, "learning_rate": 1.863789507190075e-05, "loss": 0.9897, "step": 3024 }, { "epoch": 0.5186788691943331, "grad_norm": 1.59375, "learning_rate": 1.8636985009003456e-05, "loss": 1.0149, "step": 3025 }, { "epoch": 0.518850333283323, "grad_norm": 1.5390625, "learning_rate": 1.8636074664421556e-05, "loss": 1.0892, "step": 3026 }, { "epoch": 0.5190217973723128, "grad_norm": 1.5625, "learning_rate": 1.8635164038184742e-05, "loss": 1.1259, "step": 3027 }, { "epoch": 0.5191932614613027, "grad_norm": 1.5546875, "learning_rate": 1.8634253130322714e-05, "loss": 1.0054, "step": 3028 }, { "epoch": 0.5193647255502926, "grad_norm": 1.5546875, "learning_rate": 1.8633341940865172e-05, "loss": 1.0098, "step": 3029 }, { "epoch": 0.5195361896392824, "grad_norm": 1.6953125, "learning_rate": 1.8632430469841844e-05, "loss": 1.0955, "step": 3030 }, { "epoch": 0.5197076537282723, "grad_norm": 1.640625, "learning_rate": 1.863151871728245e-05, "loss": 1.0311, "step": 3031 }, { "epoch": 0.5198791178172621, "grad_norm": 1.5, "learning_rate": 1.863060668321673e-05, "loss": 0.9735, "step": 3032 }, { "epoch": 0.520050581906252, "grad_norm": 1.6953125, "learning_rate": 1.862969436767442e-05, "loss": 1.166, "step": 3033 }, { "epoch": 0.5202220459952419, "grad_norm": 1.6015625, "learning_rate": 1.8628781770685282e-05, "loss": 0.9992, "step": 3034 }, { "epoch": 0.5203935100842317, "grad_norm": 1.625, "learning_rate": 1.8627868892279083e-05, "loss": 0.9968, "step": 3035 }, { "epoch": 0.5205649741732216, "grad_norm": 1.7734375, "learning_rate": 1.8626955732485585e-05, "loss": 1.084, "step": 3036 }, { "epoch": 0.5207364382622115, "grad_norm": 1.5546875, "learning_rate": 1.8626042291334572e-05, "loss": 0.9351, "step": 3037 }, { "epoch": 0.5209079023512013, "grad_norm": 1.7109375, "learning_rate": 1.8625128568855844e-05, "loss": 1.0052, "step": 3038 }, { "epoch": 0.5210793664401911, "grad_norm": 1.6015625, "learning_rate": 1.862421456507919e-05, "loss": 1.0152, "step": 3039 }, { "epoch": 0.5212508305291811, "grad_norm": 1.515625, "learning_rate": 1.8623300280034424e-05, "loss": 0.9943, "step": 3040 }, { "epoch": 0.5214222946181709, "grad_norm": 1.6484375, "learning_rate": 1.8622385713751363e-05, "loss": 1.0114, "step": 3041 }, { "epoch": 0.5215937587071607, "grad_norm": 1.640625, "learning_rate": 1.8621470866259835e-05, "loss": 1.1507, "step": 3042 }, { "epoch": 0.5217652227961507, "grad_norm": 1.6171875, "learning_rate": 1.862055573758968e-05, "loss": 0.9616, "step": 3043 }, { "epoch": 0.5219366868851405, "grad_norm": 1.5546875, "learning_rate": 1.8619640327770735e-05, "loss": 0.9324, "step": 3044 }, { "epoch": 0.5221081509741303, "grad_norm": 1.625, "learning_rate": 1.8618724636832866e-05, "loss": 0.9974, "step": 3045 }, { "epoch": 0.5222796150631203, "grad_norm": 1.609375, "learning_rate": 1.8617808664805926e-05, "loss": 1.0601, "step": 3046 }, { "epoch": 0.5224510791521101, "grad_norm": 1.6640625, "learning_rate": 1.8616892411719798e-05, "loss": 0.9951, "step": 3047 }, { "epoch": 0.5226225432410999, "grad_norm": 1.734375, "learning_rate": 1.8615975877604356e-05, "loss": 1.1189, "step": 3048 }, { "epoch": 0.5227940073300898, "grad_norm": 1.5625, "learning_rate": 1.86150590624895e-05, "loss": 1.0502, "step": 3049 }, { "epoch": 0.5229654714190797, "grad_norm": 1.515625, "learning_rate": 1.8614141966405125e-05, "loss": 1.0211, "step": 3050 }, { "epoch": 0.5231369355080695, "grad_norm": 1.640625, "learning_rate": 1.8613224589381143e-05, "loss": 1.0333, "step": 3051 }, { "epoch": 0.5233083995970594, "grad_norm": 1.5390625, "learning_rate": 1.8612306931447473e-05, "loss": 1.0176, "step": 3052 }, { "epoch": 0.5234798636860493, "grad_norm": 1.578125, "learning_rate": 1.861138899263404e-05, "loss": 1.1212, "step": 3053 }, { "epoch": 0.5236513277750391, "grad_norm": 1.5390625, "learning_rate": 1.8610470772970787e-05, "loss": 1.0371, "step": 3054 }, { "epoch": 0.523822791864029, "grad_norm": 1.5625, "learning_rate": 1.860955227248766e-05, "loss": 1.0394, "step": 3055 }, { "epoch": 0.5239942559530189, "grad_norm": 1.609375, "learning_rate": 1.8608633491214613e-05, "loss": 1.0387, "step": 3056 }, { "epoch": 0.5241657200420087, "grad_norm": 1.6796875, "learning_rate": 1.8607714429181606e-05, "loss": 1.1197, "step": 3057 }, { "epoch": 0.5243371841309986, "grad_norm": 1.65625, "learning_rate": 1.860679508641862e-05, "loss": 1.1022, "step": 3058 }, { "epoch": 0.5245086482199884, "grad_norm": 1.6171875, "learning_rate": 1.8605875462955638e-05, "loss": 1.0624, "step": 3059 }, { "epoch": 0.5246801123089783, "grad_norm": 1.546875, "learning_rate": 1.8604955558822647e-05, "loss": 1.0418, "step": 3060 }, { "epoch": 0.5248515763979682, "grad_norm": 1.6171875, "learning_rate": 1.8604035374049654e-05, "loss": 1.0223, "step": 3061 }, { "epoch": 0.525023040486958, "grad_norm": 1.6171875, "learning_rate": 1.860311490866667e-05, "loss": 1.0532, "step": 3062 }, { "epoch": 0.5251945045759479, "grad_norm": 1.5234375, "learning_rate": 1.860219416270371e-05, "loss": 0.9409, "step": 3063 }, { "epoch": 0.5253659686649378, "grad_norm": 1.6328125, "learning_rate": 1.8601273136190806e-05, "loss": 1.0213, "step": 3064 }, { "epoch": 0.5255374327539276, "grad_norm": 1.546875, "learning_rate": 1.8600351829157996e-05, "loss": 1.0198, "step": 3065 }, { "epoch": 0.5257088968429174, "grad_norm": 1.546875, "learning_rate": 1.859943024163532e-05, "loss": 1.0321, "step": 3066 }, { "epoch": 0.5258803609319074, "grad_norm": 1.609375, "learning_rate": 1.859850837365285e-05, "loss": 1.0562, "step": 3067 }, { "epoch": 0.5260518250208972, "grad_norm": 1.6015625, "learning_rate": 1.859758622524064e-05, "loss": 1.0236, "step": 3068 }, { "epoch": 0.526223289109887, "grad_norm": 1.5703125, "learning_rate": 1.8596663796428766e-05, "loss": 1.0014, "step": 3069 }, { "epoch": 0.526394753198877, "grad_norm": 1.5234375, "learning_rate": 1.859574108724732e-05, "loss": 1.032, "step": 3070 }, { "epoch": 0.5265662172878668, "grad_norm": 1.65625, "learning_rate": 1.8594818097726382e-05, "loss": 1.0493, "step": 3071 }, { "epoch": 0.5267376813768566, "grad_norm": 1.5859375, "learning_rate": 1.8593894827896063e-05, "loss": 1.0415, "step": 3072 }, { "epoch": 0.5269091454658466, "grad_norm": 1.59375, "learning_rate": 1.859297127778647e-05, "loss": 0.9893, "step": 3073 }, { "epoch": 0.5270806095548364, "grad_norm": 1.5859375, "learning_rate": 1.859204744742773e-05, "loss": 1.0089, "step": 3074 }, { "epoch": 0.5272520736438262, "grad_norm": 1.625, "learning_rate": 1.8591123336849962e-05, "loss": 1.0668, "step": 3075 }, { "epoch": 0.527423537732816, "grad_norm": 1.703125, "learning_rate": 1.8590198946083315e-05, "loss": 1.1142, "step": 3076 }, { "epoch": 0.527595001821806, "grad_norm": 1.6171875, "learning_rate": 1.8589274275157935e-05, "loss": 1.0525, "step": 3077 }, { "epoch": 0.5277664659107958, "grad_norm": 1.5859375, "learning_rate": 1.8588349324103974e-05, "loss": 1.0156, "step": 3078 }, { "epoch": 0.5279379299997856, "grad_norm": 1.5078125, "learning_rate": 1.85874240929516e-05, "loss": 1.02, "step": 3079 }, { "epoch": 0.5281093940887756, "grad_norm": 1.5390625, "learning_rate": 1.8586498581730987e-05, "loss": 1.0628, "step": 3080 }, { "epoch": 0.5282808581777654, "grad_norm": 1.4765625, "learning_rate": 1.8585572790472326e-05, "loss": 1.092, "step": 3081 }, { "epoch": 0.5284523222667552, "grad_norm": 1.609375, "learning_rate": 1.8584646719205803e-05, "loss": 1.0836, "step": 3082 }, { "epoch": 0.5286237863557451, "grad_norm": 1.53125, "learning_rate": 1.8583720367961623e-05, "loss": 0.9917, "step": 3083 }, { "epoch": 0.528795250444735, "grad_norm": 1.609375, "learning_rate": 1.858279373677e-05, "loss": 1.0639, "step": 3084 }, { "epoch": 0.5289667145337248, "grad_norm": 1.71875, "learning_rate": 1.8581866825661157e-05, "loss": 1.0244, "step": 3085 }, { "epoch": 0.5291381786227147, "grad_norm": 1.6171875, "learning_rate": 1.8580939634665315e-05, "loss": 1.0353, "step": 3086 }, { "epoch": 0.5293096427117046, "grad_norm": 1.5390625, "learning_rate": 1.8580012163812724e-05, "loss": 0.9599, "step": 3087 }, { "epoch": 0.5294811068006944, "grad_norm": 1.5546875, "learning_rate": 1.857908441313362e-05, "loss": 1.024, "step": 3088 }, { "epoch": 0.5296525708896843, "grad_norm": 1.6484375, "learning_rate": 1.8578156382658275e-05, "loss": 1.0846, "step": 3089 }, { "epoch": 0.5298240349786741, "grad_norm": 1.78125, "learning_rate": 1.8577228072416945e-05, "loss": 1.1081, "step": 3090 }, { "epoch": 0.529995499067664, "grad_norm": 1.59375, "learning_rate": 1.8576299482439905e-05, "loss": 1.0505, "step": 3091 }, { "epoch": 0.5301669631566539, "grad_norm": 1.59375, "learning_rate": 1.8575370612757447e-05, "loss": 1.0444, "step": 3092 }, { "epoch": 0.5303384272456437, "grad_norm": 1.609375, "learning_rate": 1.8574441463399862e-05, "loss": 1.0852, "step": 3093 }, { "epoch": 0.5305098913346336, "grad_norm": 1.6015625, "learning_rate": 1.8573512034397453e-05, "loss": 1.0687, "step": 3094 }, { "epoch": 0.5306813554236235, "grad_norm": 1.6015625, "learning_rate": 1.857258232578053e-05, "loss": 0.9603, "step": 3095 }, { "epoch": 0.5308528195126133, "grad_norm": 1.625, "learning_rate": 1.8571652337579414e-05, "loss": 0.9849, "step": 3096 }, { "epoch": 0.5310242836016031, "grad_norm": 1.6015625, "learning_rate": 1.857072206982444e-05, "loss": 0.9789, "step": 3097 }, { "epoch": 0.5311957476905931, "grad_norm": 1.65625, "learning_rate": 1.8569791522545943e-05, "loss": 1.0004, "step": 3098 }, { "epoch": 0.5313672117795829, "grad_norm": 1.546875, "learning_rate": 1.8568860695774278e-05, "loss": 0.9379, "step": 3099 }, { "epoch": 0.5315386758685727, "grad_norm": 1.6640625, "learning_rate": 1.8567929589539795e-05, "loss": 0.9668, "step": 3100 }, { "epoch": 0.5317101399575627, "grad_norm": 1.59375, "learning_rate": 1.8566998203872866e-05, "loss": 0.9812, "step": 3101 }, { "epoch": 0.5318816040465525, "grad_norm": 1.578125, "learning_rate": 1.8566066538803863e-05, "loss": 1.0347, "step": 3102 }, { "epoch": 0.5320530681355423, "grad_norm": 1.59375, "learning_rate": 1.8565134594363174e-05, "loss": 1.0362, "step": 3103 }, { "epoch": 0.5322245322245323, "grad_norm": 1.5859375, "learning_rate": 1.8564202370581193e-05, "loss": 0.993, "step": 3104 }, { "epoch": 0.5323959963135221, "grad_norm": 1.609375, "learning_rate": 1.8563269867488323e-05, "loss": 1.0241, "step": 3105 }, { "epoch": 0.5325674604025119, "grad_norm": 1.6953125, "learning_rate": 1.8562337085114978e-05, "loss": 1.0542, "step": 3106 }, { "epoch": 0.5327389244915018, "grad_norm": 1.5, "learning_rate": 1.8561404023491577e-05, "loss": 0.9356, "step": 3107 }, { "epoch": 0.5329103885804917, "grad_norm": 1.515625, "learning_rate": 1.856047068264855e-05, "loss": 0.9925, "step": 3108 }, { "epoch": 0.5330818526694815, "grad_norm": 1.5390625, "learning_rate": 1.8559537062616337e-05, "loss": 0.9417, "step": 3109 }, { "epoch": 0.5332533167584714, "grad_norm": 1.640625, "learning_rate": 1.8558603163425392e-05, "loss": 1.0574, "step": 3110 }, { "epoch": 0.5334247808474613, "grad_norm": 1.6328125, "learning_rate": 1.8557668985106164e-05, "loss": 1.0717, "step": 3111 }, { "epoch": 0.5335962449364511, "grad_norm": 1.640625, "learning_rate": 1.855673452768913e-05, "loss": 1.0645, "step": 3112 }, { "epoch": 0.533767709025441, "grad_norm": 1.5859375, "learning_rate": 1.8555799791204762e-05, "loss": 0.9766, "step": 3113 }, { "epoch": 0.5339391731144308, "grad_norm": 1.7734375, "learning_rate": 1.855486477568354e-05, "loss": 1.0897, "step": 3114 }, { "epoch": 0.5341106372034207, "grad_norm": 1.625, "learning_rate": 1.8553929481155966e-05, "loss": 1.0915, "step": 3115 }, { "epoch": 0.5342821012924106, "grad_norm": 1.421875, "learning_rate": 1.8552993907652538e-05, "loss": 0.907, "step": 3116 }, { "epoch": 0.5344535653814004, "grad_norm": 1.6015625, "learning_rate": 1.8552058055203773e-05, "loss": 1.0162, "step": 3117 }, { "epoch": 0.5346250294703903, "grad_norm": 1.5625, "learning_rate": 1.855112192384019e-05, "loss": 1.0393, "step": 3118 }, { "epoch": 0.5347964935593802, "grad_norm": 1.546875, "learning_rate": 1.8550185513592325e-05, "loss": 0.9972, "step": 3119 }, { "epoch": 0.53496795764837, "grad_norm": 1.5546875, "learning_rate": 1.854924882449071e-05, "loss": 1.0278, "step": 3120 }, { "epoch": 0.5351394217373598, "grad_norm": 1.6171875, "learning_rate": 1.8548311856565897e-05, "loss": 0.98, "step": 3121 }, { "epoch": 0.5353108858263498, "grad_norm": 1.609375, "learning_rate": 1.8547374609848442e-05, "loss": 1.089, "step": 3122 }, { "epoch": 0.5354823499153396, "grad_norm": 1.671875, "learning_rate": 1.8546437084368917e-05, "loss": 1.009, "step": 3123 }, { "epoch": 0.5356538140043294, "grad_norm": 1.609375, "learning_rate": 1.8545499280157897e-05, "loss": 0.9897, "step": 3124 }, { "epoch": 0.5358252780933194, "grad_norm": 1.6171875, "learning_rate": 1.8544561197245966e-05, "loss": 0.9395, "step": 3125 }, { "epoch": 0.5359967421823092, "grad_norm": 1.5859375, "learning_rate": 1.8543622835663715e-05, "loss": 1.027, "step": 3126 }, { "epoch": 0.536168206271299, "grad_norm": 1.5859375, "learning_rate": 1.8542684195441754e-05, "loss": 1.1007, "step": 3127 }, { "epoch": 0.536339670360289, "grad_norm": 1.6015625, "learning_rate": 1.8541745276610693e-05, "loss": 1.0017, "step": 3128 }, { "epoch": 0.5365111344492788, "grad_norm": 1.453125, "learning_rate": 1.8540806079201152e-05, "loss": 1.008, "step": 3129 }, { "epoch": 0.5366825985382686, "grad_norm": 1.5390625, "learning_rate": 1.8539866603243762e-05, "loss": 1.0767, "step": 3130 }, { "epoch": 0.5368540626272585, "grad_norm": 1.6015625, "learning_rate": 1.8538926848769166e-05, "loss": 1.1019, "step": 3131 }, { "epoch": 0.5370255267162484, "grad_norm": 1.65625, "learning_rate": 1.853798681580801e-05, "loss": 1.0994, "step": 3132 }, { "epoch": 0.5371969908052382, "grad_norm": 1.59375, "learning_rate": 1.853704650439095e-05, "loss": 1.0356, "step": 3133 }, { "epoch": 0.5373684548942281, "grad_norm": 1.6953125, "learning_rate": 1.853610591454866e-05, "loss": 1.0899, "step": 3134 }, { "epoch": 0.537539918983218, "grad_norm": 1.65625, "learning_rate": 1.8535165046311814e-05, "loss": 1.0018, "step": 3135 }, { "epoch": 0.5377113830722078, "grad_norm": 1.546875, "learning_rate": 1.853422389971109e-05, "loss": 1.0659, "step": 3136 }, { "epoch": 0.5378828471611977, "grad_norm": 1.5390625, "learning_rate": 1.853328247477719e-05, "loss": 0.9889, "step": 3137 }, { "epoch": 0.5380543112501875, "grad_norm": 1.6171875, "learning_rate": 1.8532340771540815e-05, "loss": 1.2005, "step": 3138 }, { "epoch": 0.5382257753391774, "grad_norm": 1.5234375, "learning_rate": 1.853139879003268e-05, "loss": 0.9841, "step": 3139 }, { "epoch": 0.5383972394281673, "grad_norm": 1.609375, "learning_rate": 1.85304565302835e-05, "loss": 0.9865, "step": 3140 }, { "epoch": 0.5385687035171571, "grad_norm": 1.578125, "learning_rate": 1.8529513992324012e-05, "loss": 1.0674, "step": 3141 }, { "epoch": 0.538740167606147, "grad_norm": 1.515625, "learning_rate": 1.8528571176184952e-05, "loss": 1.0455, "step": 3142 }, { "epoch": 0.5389116316951369, "grad_norm": 1.59375, "learning_rate": 1.8527628081897076e-05, "loss": 1.1141, "step": 3143 }, { "epoch": 0.5390830957841267, "grad_norm": 1.484375, "learning_rate": 1.852668470949113e-05, "loss": 0.9851, "step": 3144 }, { "epoch": 0.5392545598731165, "grad_norm": 1.703125, "learning_rate": 1.8525741058997888e-05, "loss": 1.08, "step": 3145 }, { "epoch": 0.5394260239621065, "grad_norm": 1.6171875, "learning_rate": 1.8524797130448127e-05, "loss": 0.9748, "step": 3146 }, { "epoch": 0.5395974880510963, "grad_norm": 1.5703125, "learning_rate": 1.8523852923872628e-05, "loss": 0.9932, "step": 3147 }, { "epoch": 0.5397689521400861, "grad_norm": 1.6171875, "learning_rate": 1.8522908439302193e-05, "loss": 1.0925, "step": 3148 }, { "epoch": 0.5399404162290761, "grad_norm": 1.515625, "learning_rate": 1.8521963676767614e-05, "loss": 1.0554, "step": 3149 }, { "epoch": 0.5401118803180659, "grad_norm": 1.6875, "learning_rate": 1.852101863629971e-05, "loss": 1.0495, "step": 3150 }, { "epoch": 0.5402833444070557, "grad_norm": 1.484375, "learning_rate": 1.85200733179293e-05, "loss": 1.043, "step": 3151 }, { "epoch": 0.5404548084960457, "grad_norm": 1.5078125, "learning_rate": 1.8519127721687217e-05, "loss": 1.0885, "step": 3152 }, { "epoch": 0.5406262725850355, "grad_norm": 1.4765625, "learning_rate": 1.8518181847604298e-05, "loss": 0.8962, "step": 3153 }, { "epoch": 0.5407977366740253, "grad_norm": 1.6484375, "learning_rate": 1.851723569571139e-05, "loss": 1.0868, "step": 3154 }, { "epoch": 0.5409692007630152, "grad_norm": 1.53125, "learning_rate": 1.8516289266039358e-05, "loss": 1.0079, "step": 3155 }, { "epoch": 0.5411406648520051, "grad_norm": 1.6640625, "learning_rate": 1.851534255861906e-05, "loss": 1.0173, "step": 3156 }, { "epoch": 0.5413121289409949, "grad_norm": 1.5859375, "learning_rate": 1.8514395573481377e-05, "loss": 1.0323, "step": 3157 }, { "epoch": 0.5414835930299848, "grad_norm": 1.5390625, "learning_rate": 1.8513448310657193e-05, "loss": 0.9832, "step": 3158 }, { "epoch": 0.5416550571189747, "grad_norm": 1.59375, "learning_rate": 1.85125007701774e-05, "loss": 1.0065, "step": 3159 }, { "epoch": 0.5418265212079645, "grad_norm": 1.5859375, "learning_rate": 1.85115529520729e-05, "loss": 1.1247, "step": 3160 }, { "epoch": 0.5419979852969544, "grad_norm": 1.5625, "learning_rate": 1.851060485637461e-05, "loss": 1.0102, "step": 3161 }, { "epoch": 0.5421694493859442, "grad_norm": 1.5859375, "learning_rate": 1.8509656483113444e-05, "loss": 1.0124, "step": 3162 }, { "epoch": 0.5423409134749341, "grad_norm": 1.5234375, "learning_rate": 1.8508707832320338e-05, "loss": 1.0461, "step": 3163 }, { "epoch": 0.542512377563924, "grad_norm": 1.5, "learning_rate": 1.850775890402623e-05, "loss": 1.027, "step": 3164 }, { "epoch": 0.5426838416529138, "grad_norm": 1.515625, "learning_rate": 1.8506809698262064e-05, "loss": 0.899, "step": 3165 }, { "epoch": 0.5428553057419037, "grad_norm": 1.5703125, "learning_rate": 1.85058602150588e-05, "loss": 0.9396, "step": 3166 }, { "epoch": 0.5430267698308936, "grad_norm": 1.5859375, "learning_rate": 1.8504910454447407e-05, "loss": 1.0611, "step": 3167 }, { "epoch": 0.5431982339198834, "grad_norm": 1.5390625, "learning_rate": 1.8503960416458854e-05, "loss": 1.0304, "step": 3168 }, { "epoch": 0.5433696980088732, "grad_norm": 1.609375, "learning_rate": 1.8503010101124132e-05, "loss": 0.9509, "step": 3169 }, { "epoch": 0.5435411620978632, "grad_norm": 1.53125, "learning_rate": 1.8502059508474232e-05, "loss": 0.9562, "step": 3170 }, { "epoch": 0.543712626186853, "grad_norm": 1.5546875, "learning_rate": 1.850110863854015e-05, "loss": 0.9525, "step": 3171 }, { "epoch": 0.5438840902758428, "grad_norm": 1.671875, "learning_rate": 1.850015749135291e-05, "loss": 0.9873, "step": 3172 }, { "epoch": 0.5440555543648327, "grad_norm": 1.6015625, "learning_rate": 1.849920606694352e-05, "loss": 1.0424, "step": 3173 }, { "epoch": 0.5442270184538226, "grad_norm": 1.6015625, "learning_rate": 1.849825436534302e-05, "loss": 1.0221, "step": 3174 }, { "epoch": 0.5443984825428124, "grad_norm": 1.5859375, "learning_rate": 1.849730238658244e-05, "loss": 1.0602, "step": 3175 }, { "epoch": 0.5445699466318022, "grad_norm": 1.625, "learning_rate": 1.849635013069283e-05, "loss": 1.0207, "step": 3176 }, { "epoch": 0.5447414107207922, "grad_norm": 1.625, "learning_rate": 1.8495397597705253e-05, "loss": 1.1356, "step": 3177 }, { "epoch": 0.544912874809782, "grad_norm": 1.578125, "learning_rate": 1.8494444787650768e-05, "loss": 0.9768, "step": 3178 }, { "epoch": 0.5450843388987718, "grad_norm": 1.671875, "learning_rate": 1.849349170056045e-05, "loss": 1.0797, "step": 3179 }, { "epoch": 0.5452558029877618, "grad_norm": 1.4765625, "learning_rate": 1.8492538336465387e-05, "loss": 0.9896, "step": 3180 }, { "epoch": 0.5454272670767516, "grad_norm": 1.609375, "learning_rate": 1.8491584695396666e-05, "loss": 0.9773, "step": 3181 }, { "epoch": 0.5455987311657414, "grad_norm": 1.5390625, "learning_rate": 1.8490630777385393e-05, "loss": 1.0454, "step": 3182 }, { "epoch": 0.5457701952547314, "grad_norm": 1.640625, "learning_rate": 1.8489676582462675e-05, "loss": 1.0276, "step": 3183 }, { "epoch": 0.5459416593437212, "grad_norm": 1.609375, "learning_rate": 1.8488722110659635e-05, "loss": 1.0258, "step": 3184 }, { "epoch": 0.546113123432711, "grad_norm": 1.5078125, "learning_rate": 1.8487767362007403e-05, "loss": 0.9235, "step": 3185 }, { "epoch": 0.546284587521701, "grad_norm": 1.578125, "learning_rate": 1.8486812336537117e-05, "loss": 1.0482, "step": 3186 }, { "epoch": 0.5464560516106908, "grad_norm": 1.546875, "learning_rate": 1.848585703427992e-05, "loss": 0.9701, "step": 3187 }, { "epoch": 0.5466275156996806, "grad_norm": 1.484375, "learning_rate": 1.848490145526697e-05, "loss": 0.9514, "step": 3188 }, { "epoch": 0.5467989797886705, "grad_norm": 1.578125, "learning_rate": 1.8483945599529436e-05, "loss": 1.0126, "step": 3189 }, { "epoch": 0.5469704438776604, "grad_norm": 1.59375, "learning_rate": 1.8482989467098483e-05, "loss": 1.0683, "step": 3190 }, { "epoch": 0.5471419079666502, "grad_norm": 1.7109375, "learning_rate": 1.8482033058005305e-05, "loss": 1.0875, "step": 3191 }, { "epoch": 0.5473133720556401, "grad_norm": 1.625, "learning_rate": 1.8481076372281083e-05, "loss": 0.9823, "step": 3192 }, { "epoch": 0.54748483614463, "grad_norm": 1.6015625, "learning_rate": 1.8480119409957026e-05, "loss": 1.0217, "step": 3193 }, { "epoch": 0.5476563002336198, "grad_norm": 1.640625, "learning_rate": 1.8479162171064345e-05, "loss": 1.0225, "step": 3194 }, { "epoch": 0.5478277643226097, "grad_norm": 1.609375, "learning_rate": 1.847820465563425e-05, "loss": 1.0182, "step": 3195 }, { "epoch": 0.5479992284115995, "grad_norm": 1.734375, "learning_rate": 1.847724686369798e-05, "loss": 1.0994, "step": 3196 }, { "epoch": 0.5481706925005894, "grad_norm": 1.625, "learning_rate": 1.8476288795286764e-05, "loss": 1.125, "step": 3197 }, { "epoch": 0.5483421565895793, "grad_norm": 1.5859375, "learning_rate": 1.8475330450431858e-05, "loss": 1.0885, "step": 3198 }, { "epoch": 0.5485136206785691, "grad_norm": 1.7421875, "learning_rate": 1.8474371829164504e-05, "loss": 1.0607, "step": 3199 }, { "epoch": 0.548685084767559, "grad_norm": 1.609375, "learning_rate": 1.8473412931515974e-05, "loss": 1.0336, "step": 3200 }, { "epoch": 0.5488565488565489, "grad_norm": 1.609375, "learning_rate": 1.8472453757517544e-05, "loss": 1.0461, "step": 3201 }, { "epoch": 0.5490280129455387, "grad_norm": 1.6484375, "learning_rate": 1.847149430720049e-05, "loss": 1.0338, "step": 3202 }, { "epoch": 0.5491994770345285, "grad_norm": 1.6171875, "learning_rate": 1.8470534580596106e-05, "loss": 1.0947, "step": 3203 }, { "epoch": 0.5493709411235185, "grad_norm": 1.6484375, "learning_rate": 1.8469574577735694e-05, "loss": 1.0113, "step": 3204 }, { "epoch": 0.5495424052125083, "grad_norm": 1.71875, "learning_rate": 1.8468614298650562e-05, "loss": 1.0102, "step": 3205 }, { "epoch": 0.5497138693014981, "grad_norm": 1.6328125, "learning_rate": 1.8467653743372026e-05, "loss": 1.0543, "step": 3206 }, { "epoch": 0.5498853333904881, "grad_norm": 1.6484375, "learning_rate": 1.8466692911931414e-05, "loss": 1.0869, "step": 3207 }, { "epoch": 0.5500567974794779, "grad_norm": 1.59375, "learning_rate": 1.8465731804360064e-05, "loss": 1.0763, "step": 3208 }, { "epoch": 0.5502282615684677, "grad_norm": 1.5078125, "learning_rate": 1.8464770420689323e-05, "loss": 0.967, "step": 3209 }, { "epoch": 0.5503997256574577, "grad_norm": 1.6015625, "learning_rate": 1.8463808760950544e-05, "loss": 0.9791, "step": 3210 }, { "epoch": 0.5505711897464475, "grad_norm": 1.6015625, "learning_rate": 1.8462846825175087e-05, "loss": 0.9765, "step": 3211 }, { "epoch": 0.5507426538354373, "grad_norm": 1.53125, "learning_rate": 1.8461884613394328e-05, "loss": 1.0778, "step": 3212 }, { "epoch": 0.5509141179244272, "grad_norm": 1.5390625, "learning_rate": 1.8460922125639643e-05, "loss": 1.0572, "step": 3213 }, { "epoch": 0.5510855820134171, "grad_norm": 1.6640625, "learning_rate": 1.845995936194243e-05, "loss": 1.0458, "step": 3214 }, { "epoch": 0.5512570461024069, "grad_norm": 1.640625, "learning_rate": 1.8458996322334085e-05, "loss": 1.0645, "step": 3215 }, { "epoch": 0.5514285101913968, "grad_norm": 1.5703125, "learning_rate": 1.845803300684602e-05, "loss": 0.9418, "step": 3216 }, { "epoch": 0.5515999742803867, "grad_norm": 1.5625, "learning_rate": 1.8457069415509642e-05, "loss": 1.0402, "step": 3217 }, { "epoch": 0.5517714383693765, "grad_norm": 1.828125, "learning_rate": 1.8456105548356392e-05, "loss": 0.9247, "step": 3218 }, { "epoch": 0.5519429024583664, "grad_norm": 1.5625, "learning_rate": 1.845514140541769e-05, "loss": 1.0088, "step": 3219 }, { "epoch": 0.5521143665473562, "grad_norm": 1.6171875, "learning_rate": 1.845417698672499e-05, "loss": 1.0644, "step": 3220 }, { "epoch": 0.5522858306363461, "grad_norm": 1.6328125, "learning_rate": 1.8453212292309743e-05, "loss": 1.1305, "step": 3221 }, { "epoch": 0.552457294725336, "grad_norm": 1.5625, "learning_rate": 1.8452247322203413e-05, "loss": 1.0003, "step": 3222 }, { "epoch": 0.5526287588143258, "grad_norm": 1.671875, "learning_rate": 1.8451282076437468e-05, "loss": 0.9957, "step": 3223 }, { "epoch": 0.5528002229033157, "grad_norm": 1.65625, "learning_rate": 1.845031655504339e-05, "loss": 1.0345, "step": 3224 }, { "epoch": 0.5529716869923056, "grad_norm": 1.515625, "learning_rate": 1.8449350758052668e-05, "loss": 0.9799, "step": 3225 }, { "epoch": 0.5531431510812954, "grad_norm": 1.5703125, "learning_rate": 1.84483846854968e-05, "loss": 1.0793, "step": 3226 }, { "epoch": 0.5533146151702852, "grad_norm": 1.59375, "learning_rate": 1.8447418337407295e-05, "loss": 1.0526, "step": 3227 }, { "epoch": 0.5534860792592752, "grad_norm": 1.7421875, "learning_rate": 1.8446451713815667e-05, "loss": 1.1667, "step": 3228 }, { "epoch": 0.553657543348265, "grad_norm": 1.59375, "learning_rate": 1.8445484814753444e-05, "loss": 1.0139, "step": 3229 }, { "epoch": 0.5538290074372548, "grad_norm": 1.578125, "learning_rate": 1.8444517640252156e-05, "loss": 1.0396, "step": 3230 }, { "epoch": 0.5540004715262448, "grad_norm": 1.7421875, "learning_rate": 1.844355019034335e-05, "loss": 0.9346, "step": 3231 }, { "epoch": 0.5541719356152346, "grad_norm": 1.578125, "learning_rate": 1.8442582465058577e-05, "loss": 0.9973, "step": 3232 }, { "epoch": 0.5543433997042244, "grad_norm": 1.5859375, "learning_rate": 1.8441614464429396e-05, "loss": 1.0392, "step": 3233 }, { "epoch": 0.5545148637932144, "grad_norm": 1.5234375, "learning_rate": 1.8440646188487382e-05, "loss": 0.9467, "step": 3234 }, { "epoch": 0.5546863278822042, "grad_norm": 1.625, "learning_rate": 1.843967763726411e-05, "loss": 1.0302, "step": 3235 }, { "epoch": 0.554857791971194, "grad_norm": 1.5234375, "learning_rate": 1.8438708810791167e-05, "loss": 1.0771, "step": 3236 }, { "epoch": 0.5550292560601839, "grad_norm": 1.640625, "learning_rate": 1.8437739709100155e-05, "loss": 1.0512, "step": 3237 }, { "epoch": 0.5552007201491738, "grad_norm": 1.546875, "learning_rate": 1.8436770332222677e-05, "loss": 1.0086, "step": 3238 }, { "epoch": 0.5553721842381636, "grad_norm": 1.4921875, "learning_rate": 1.8435800680190347e-05, "loss": 0.9962, "step": 3239 }, { "epoch": 0.5555436483271535, "grad_norm": 1.6015625, "learning_rate": 1.8434830753034795e-05, "loss": 0.9521, "step": 3240 }, { "epoch": 0.5557151124161434, "grad_norm": 1.5859375, "learning_rate": 1.8433860550787647e-05, "loss": 0.9977, "step": 3241 }, { "epoch": 0.5558865765051332, "grad_norm": 1.6796875, "learning_rate": 1.8432890073480546e-05, "loss": 1.011, "step": 3242 }, { "epoch": 0.5560580405941231, "grad_norm": 1.5859375, "learning_rate": 1.8431919321145146e-05, "loss": 1.0422, "step": 3243 }, { "epoch": 0.5562295046831129, "grad_norm": 1.4765625, "learning_rate": 1.8430948293813105e-05, "loss": 0.9491, "step": 3244 }, { "epoch": 0.5564009687721028, "grad_norm": 1.65625, "learning_rate": 1.8429976991516093e-05, "loss": 1.0425, "step": 3245 }, { "epoch": 0.5565724328610927, "grad_norm": 1.640625, "learning_rate": 1.8429005414285783e-05, "loss": 1.0996, "step": 3246 }, { "epoch": 0.5567438969500825, "grad_norm": 1.59375, "learning_rate": 1.842803356215387e-05, "loss": 0.9526, "step": 3247 }, { "epoch": 0.5569153610390724, "grad_norm": 1.6796875, "learning_rate": 1.8427061435152045e-05, "loss": 0.9868, "step": 3248 }, { "epoch": 0.5570868251280623, "grad_norm": 1.546875, "learning_rate": 1.8426089033312013e-05, "loss": 0.9752, "step": 3249 }, { "epoch": 0.5572582892170521, "grad_norm": 1.6328125, "learning_rate": 1.8425116356665484e-05, "loss": 1.0016, "step": 3250 }, { "epoch": 0.5574297533060419, "grad_norm": 1.6328125, "learning_rate": 1.842414340524419e-05, "loss": 1.0322, "step": 3251 }, { "epoch": 0.5576012173950319, "grad_norm": 1.6171875, "learning_rate": 1.8423170179079856e-05, "loss": 0.9072, "step": 3252 }, { "epoch": 0.5577726814840217, "grad_norm": 1.6953125, "learning_rate": 1.8422196678204224e-05, "loss": 1.0137, "step": 3253 }, { "epoch": 0.5579441455730115, "grad_norm": 1.5625, "learning_rate": 1.8421222902649042e-05, "loss": 1.0086, "step": 3254 }, { "epoch": 0.5581156096620015, "grad_norm": 1.5703125, "learning_rate": 1.842024885244607e-05, "loss": 1.0747, "step": 3255 }, { "epoch": 0.5582870737509913, "grad_norm": 1.640625, "learning_rate": 1.8419274527627072e-05, "loss": 0.9744, "step": 3256 }, { "epoch": 0.5584585378399811, "grad_norm": 1.53125, "learning_rate": 1.8418299928223834e-05, "loss": 1.0025, "step": 3257 }, { "epoch": 0.558630001928971, "grad_norm": 1.6328125, "learning_rate": 1.841732505426813e-05, "loss": 1.051, "step": 3258 }, { "epoch": 0.5588014660179609, "grad_norm": 1.5703125, "learning_rate": 1.841634990579176e-05, "loss": 0.9937, "step": 3259 }, { "epoch": 0.5589729301069507, "grad_norm": 1.546875, "learning_rate": 1.8415374482826526e-05, "loss": 0.9564, "step": 3260 }, { "epoch": 0.5591443941959406, "grad_norm": 1.59375, "learning_rate": 1.8414398785404245e-05, "loss": 1.1329, "step": 3261 }, { "epoch": 0.5593158582849305, "grad_norm": 1.6328125, "learning_rate": 1.8413422813556727e-05, "loss": 0.9782, "step": 3262 }, { "epoch": 0.5594873223739203, "grad_norm": 1.6171875, "learning_rate": 1.8412446567315816e-05, "loss": 1.0287, "step": 3263 }, { "epoch": 0.5596587864629102, "grad_norm": 1.625, "learning_rate": 1.841147004671334e-05, "loss": 1.0996, "step": 3264 }, { "epoch": 0.5598302505519, "grad_norm": 1.6015625, "learning_rate": 1.841049325178115e-05, "loss": 1.0127, "step": 3265 }, { "epoch": 0.5600017146408899, "grad_norm": 1.578125, "learning_rate": 1.840951618255111e-05, "loss": 0.9634, "step": 3266 }, { "epoch": 0.5601731787298797, "grad_norm": 1.6171875, "learning_rate": 1.8408538839055078e-05, "loss": 1.0352, "step": 3267 }, { "epoch": 0.5603446428188696, "grad_norm": 1.515625, "learning_rate": 1.8407561221324925e-05, "loss": 0.9594, "step": 3268 }, { "epoch": 0.5605161069078595, "grad_norm": 1.609375, "learning_rate": 1.8406583329392547e-05, "loss": 1.0399, "step": 3269 }, { "epoch": 0.5606875709968493, "grad_norm": 1.578125, "learning_rate": 1.8405605163289828e-05, "loss": 1.0761, "step": 3270 }, { "epoch": 0.5608590350858392, "grad_norm": 1.6875, "learning_rate": 1.840462672304867e-05, "loss": 0.9835, "step": 3271 }, { "epoch": 0.5610304991748291, "grad_norm": 1.546875, "learning_rate": 1.840364800870099e-05, "loss": 1.0436, "step": 3272 }, { "epoch": 0.5612019632638189, "grad_norm": 1.6484375, "learning_rate": 1.84026690202787e-05, "loss": 1.0471, "step": 3273 }, { "epoch": 0.5613734273528088, "grad_norm": 1.46875, "learning_rate": 1.8401689757813734e-05, "loss": 0.98, "step": 3274 }, { "epoch": 0.5615448914417986, "grad_norm": 1.6328125, "learning_rate": 1.8400710221338026e-05, "loss": 1.0721, "step": 3275 }, { "epoch": 0.5617163555307885, "grad_norm": 1.6484375, "learning_rate": 1.8399730410883524e-05, "loss": 1.0973, "step": 3276 }, { "epoch": 0.5618878196197784, "grad_norm": 1.578125, "learning_rate": 1.8398750326482182e-05, "loss": 0.991, "step": 3277 }, { "epoch": 0.5620592837087682, "grad_norm": 1.5546875, "learning_rate": 1.8397769968165967e-05, "loss": 1.0093, "step": 3278 }, { "epoch": 0.5622307477977581, "grad_norm": 1.6171875, "learning_rate": 1.8396789335966846e-05, "loss": 1.0551, "step": 3279 }, { "epoch": 0.562402211886748, "grad_norm": 1.65625, "learning_rate": 1.839580842991681e-05, "loss": 0.969, "step": 3280 }, { "epoch": 0.5625736759757378, "grad_norm": 1.5546875, "learning_rate": 1.8394827250047842e-05, "loss": 0.929, "step": 3281 }, { "epoch": 0.5627451400647276, "grad_norm": 1.625, "learning_rate": 1.8393845796391947e-05, "loss": 1.0078, "step": 3282 }, { "epoch": 0.5629166041537176, "grad_norm": 1.5703125, "learning_rate": 1.839286406898113e-05, "loss": 1.012, "step": 3283 }, { "epoch": 0.5630880682427074, "grad_norm": 1.59375, "learning_rate": 1.8391882067847414e-05, "loss": 0.9506, "step": 3284 }, { "epoch": 0.5632595323316972, "grad_norm": 1.640625, "learning_rate": 1.8390899793022824e-05, "loss": 1.0795, "step": 3285 }, { "epoch": 0.5634309964206872, "grad_norm": 1.6484375, "learning_rate": 1.8389917244539392e-05, "loss": 1.0464, "step": 3286 }, { "epoch": 0.563602460509677, "grad_norm": 1.640625, "learning_rate": 1.8388934422429167e-05, "loss": 1.1251, "step": 3287 }, { "epoch": 0.5637739245986668, "grad_norm": 1.5859375, "learning_rate": 1.83879513267242e-05, "loss": 0.9987, "step": 3288 }, { "epoch": 0.5639453886876568, "grad_norm": 1.6484375, "learning_rate": 1.8386967957456555e-05, "loss": 1.0367, "step": 3289 }, { "epoch": 0.5641168527766466, "grad_norm": 1.6015625, "learning_rate": 1.83859843146583e-05, "loss": 1.0065, "step": 3290 }, { "epoch": 0.5642883168656364, "grad_norm": 1.546875, "learning_rate": 1.8385000398361517e-05, "loss": 0.9502, "step": 3291 }, { "epoch": 0.5644597809546263, "grad_norm": 1.640625, "learning_rate": 1.83840162085983e-05, "loss": 0.9974, "step": 3292 }, { "epoch": 0.5646312450436162, "grad_norm": 1.5703125, "learning_rate": 1.838303174540074e-05, "loss": 1.1014, "step": 3293 }, { "epoch": 0.564802709132606, "grad_norm": 1.5703125, "learning_rate": 1.8382047008800947e-05, "loss": 0.9488, "step": 3294 }, { "epoch": 0.5649741732215959, "grad_norm": 1.6015625, "learning_rate": 1.838106199883104e-05, "loss": 1.0501, "step": 3295 }, { "epoch": 0.5651456373105858, "grad_norm": 1.5625, "learning_rate": 1.838007671552314e-05, "loss": 0.9358, "step": 3296 }, { "epoch": 0.5653171013995756, "grad_norm": 1.5859375, "learning_rate": 1.837909115890938e-05, "loss": 1.0654, "step": 3297 }, { "epoch": 0.5654885654885655, "grad_norm": 1.5, "learning_rate": 1.8378105329021908e-05, "loss": 0.9435, "step": 3298 }, { "epoch": 0.5656600295775553, "grad_norm": 1.65625, "learning_rate": 1.8377119225892868e-05, "loss": 0.9904, "step": 3299 }, { "epoch": 0.5658314936665452, "grad_norm": 1.6015625, "learning_rate": 1.8376132849554428e-05, "loss": 1.0634, "step": 3300 }, { "epoch": 0.5660029577555351, "grad_norm": 1.6015625, "learning_rate": 1.8375146200038752e-05, "loss": 1.0184, "step": 3301 }, { "epoch": 0.5661744218445249, "grad_norm": 1.65625, "learning_rate": 1.8374159277378024e-05, "loss": 1.0845, "step": 3302 }, { "epoch": 0.5663458859335148, "grad_norm": 1.578125, "learning_rate": 1.8373172081604424e-05, "loss": 0.9841, "step": 3303 }, { "epoch": 0.5665173500225047, "grad_norm": 1.5859375, "learning_rate": 1.8372184612750152e-05, "loss": 1.0651, "step": 3304 }, { "epoch": 0.5666888141114945, "grad_norm": 1.7265625, "learning_rate": 1.8371196870847418e-05, "loss": 1.0452, "step": 3305 }, { "epoch": 0.5668602782004843, "grad_norm": 1.59375, "learning_rate": 1.8370208855928427e-05, "loss": 1.0319, "step": 3306 }, { "epoch": 0.5670317422894743, "grad_norm": 1.6953125, "learning_rate": 1.8369220568025405e-05, "loss": 1.0709, "step": 3307 }, { "epoch": 0.5672032063784641, "grad_norm": 1.6015625, "learning_rate": 1.8368232007170587e-05, "loss": 1.0725, "step": 3308 }, { "epoch": 0.5673746704674539, "grad_norm": 1.5390625, "learning_rate": 1.836724317339621e-05, "loss": 1.0185, "step": 3309 }, { "epoch": 0.5675461345564439, "grad_norm": 1.640625, "learning_rate": 1.8366254066734526e-05, "loss": 1.073, "step": 3310 }, { "epoch": 0.5677175986454337, "grad_norm": 1.625, "learning_rate": 1.836526468721779e-05, "loss": 1.0662, "step": 3311 }, { "epoch": 0.5678890627344235, "grad_norm": 1.5078125, "learning_rate": 1.8364275034878277e-05, "loss": 1.0078, "step": 3312 }, { "epoch": 0.5680605268234135, "grad_norm": 1.5390625, "learning_rate": 1.8363285109748253e-05, "loss": 1.0112, "step": 3313 }, { "epoch": 0.5682319909124033, "grad_norm": 1.6171875, "learning_rate": 1.836229491186001e-05, "loss": 1.0086, "step": 3314 }, { "epoch": 0.5684034550013931, "grad_norm": 1.5625, "learning_rate": 1.8361304441245843e-05, "loss": 1.0831, "step": 3315 }, { "epoch": 0.568574919090383, "grad_norm": 1.59375, "learning_rate": 1.836031369793805e-05, "loss": 1.0802, "step": 3316 }, { "epoch": 0.5687463831793729, "grad_norm": 1.53125, "learning_rate": 1.8359322681968942e-05, "loss": 1.0011, "step": 3317 }, { "epoch": 0.5689178472683627, "grad_norm": 1.5234375, "learning_rate": 1.8358331393370847e-05, "loss": 1.0521, "step": 3318 }, { "epoch": 0.5690893113573526, "grad_norm": 1.6484375, "learning_rate": 1.8357339832176092e-05, "loss": 1.0235, "step": 3319 }, { "epoch": 0.5692607754463425, "grad_norm": 1.5859375, "learning_rate": 1.835634799841701e-05, "loss": 1.0332, "step": 3320 }, { "epoch": 0.5694322395353323, "grad_norm": 1.578125, "learning_rate": 1.835535589212596e-05, "loss": 1.0653, "step": 3321 }, { "epoch": 0.5696037036243222, "grad_norm": 1.6484375, "learning_rate": 1.8354363513335282e-05, "loss": 1.1728, "step": 3322 }, { "epoch": 0.569775167713312, "grad_norm": 1.53125, "learning_rate": 1.8353370862077358e-05, "loss": 1.0377, "step": 3323 }, { "epoch": 0.5699466318023019, "grad_norm": 1.65625, "learning_rate": 1.835237793838455e-05, "loss": 1.0604, "step": 3324 }, { "epoch": 0.5701180958912918, "grad_norm": 1.609375, "learning_rate": 1.8351384742289244e-05, "loss": 1.0777, "step": 3325 }, { "epoch": 0.5702895599802816, "grad_norm": 1.53125, "learning_rate": 1.8350391273823836e-05, "loss": 1.0222, "step": 3326 }, { "epoch": 0.5704610240692715, "grad_norm": 1.6015625, "learning_rate": 1.8349397533020723e-05, "loss": 1.0333, "step": 3327 }, { "epoch": 0.5706324881582614, "grad_norm": 1.578125, "learning_rate": 1.8348403519912316e-05, "loss": 1.0752, "step": 3328 }, { "epoch": 0.5708039522472512, "grad_norm": 1.5625, "learning_rate": 1.8347409234531034e-05, "loss": 1.0549, "step": 3329 }, { "epoch": 0.570975416336241, "grad_norm": 1.6015625, "learning_rate": 1.83464146769093e-05, "loss": 1.0061, "step": 3330 }, { "epoch": 0.571146880425231, "grad_norm": 1.6953125, "learning_rate": 1.8345419847079555e-05, "loss": 0.9759, "step": 3331 }, { "epoch": 0.5713183445142208, "grad_norm": 1.5390625, "learning_rate": 1.8344424745074245e-05, "loss": 1.0317, "step": 3332 }, { "epoch": 0.5714898086032106, "grad_norm": 1.6015625, "learning_rate": 1.834342937092582e-05, "loss": 1.0236, "step": 3333 }, { "epoch": 0.5716612726922006, "grad_norm": 1.5859375, "learning_rate": 1.8342433724666745e-05, "loss": 1.032, "step": 3334 }, { "epoch": 0.5718327367811904, "grad_norm": 1.5703125, "learning_rate": 1.8341437806329492e-05, "loss": 1.0583, "step": 3335 }, { "epoch": 0.5720042008701802, "grad_norm": 1.5703125, "learning_rate": 1.8340441615946544e-05, "loss": 1.0017, "step": 3336 }, { "epoch": 0.5721756649591702, "grad_norm": 1.6875, "learning_rate": 1.8339445153550382e-05, "loss": 1.1914, "step": 3337 }, { "epoch": 0.57234712904816, "grad_norm": 1.65625, "learning_rate": 1.8338448419173514e-05, "loss": 1.0421, "step": 3338 }, { "epoch": 0.5725185931371498, "grad_norm": 1.578125, "learning_rate": 1.833745141284844e-05, "loss": 1.0538, "step": 3339 }, { "epoch": 0.5726900572261397, "grad_norm": 1.5546875, "learning_rate": 1.833645413460768e-05, "loss": 1.019, "step": 3340 }, { "epoch": 0.5728615213151296, "grad_norm": 1.6796875, "learning_rate": 1.8335456584483763e-05, "loss": 1.0864, "step": 3341 }, { "epoch": 0.5730329854041194, "grad_norm": 1.5234375, "learning_rate": 1.8334458762509214e-05, "loss": 0.988, "step": 3342 }, { "epoch": 0.5732044494931093, "grad_norm": 1.65625, "learning_rate": 1.833346066871658e-05, "loss": 1.0151, "step": 3343 }, { "epoch": 0.5733759135820992, "grad_norm": 1.6953125, "learning_rate": 1.8332462303138418e-05, "loss": 1.0402, "step": 3344 }, { "epoch": 0.573547377671089, "grad_norm": 1.9375, "learning_rate": 1.8331463665807278e-05, "loss": 1.0646, "step": 3345 }, { "epoch": 0.5737188417600789, "grad_norm": 1.5390625, "learning_rate": 1.8330464756755737e-05, "loss": 0.9698, "step": 3346 }, { "epoch": 0.5738903058490687, "grad_norm": 1.484375, "learning_rate": 1.832946557601637e-05, "loss": 0.9967, "step": 3347 }, { "epoch": 0.5740617699380586, "grad_norm": 1.6015625, "learning_rate": 1.832846612362176e-05, "loss": 0.9096, "step": 3348 }, { "epoch": 0.5742332340270485, "grad_norm": 1.5546875, "learning_rate": 1.8327466399604517e-05, "loss": 1.0488, "step": 3349 }, { "epoch": 0.5744046981160383, "grad_norm": 1.578125, "learning_rate": 1.8326466403997228e-05, "loss": 1.0233, "step": 3350 }, { "epoch": 0.5745761622050282, "grad_norm": 1.65625, "learning_rate": 1.832546613683252e-05, "loss": 0.9991, "step": 3351 }, { "epoch": 0.5747476262940181, "grad_norm": 1.6640625, "learning_rate": 1.832446559814301e-05, "loss": 1.12, "step": 3352 }, { "epoch": 0.5749190903830079, "grad_norm": 1.7109375, "learning_rate": 1.8323464787961327e-05, "loss": 1.0985, "step": 3353 }, { "epoch": 0.5750905544719978, "grad_norm": 1.4609375, "learning_rate": 1.8322463706320116e-05, "loss": 0.9235, "step": 3354 }, { "epoch": 0.5752620185609877, "grad_norm": 1.5078125, "learning_rate": 1.8321462353252024e-05, "loss": 0.9686, "step": 3355 }, { "epoch": 0.5754334826499775, "grad_norm": 1.609375, "learning_rate": 1.8320460728789705e-05, "loss": 0.991, "step": 3356 }, { "epoch": 0.5756049467389673, "grad_norm": 1.625, "learning_rate": 1.8319458832965836e-05, "loss": 1.0094, "step": 3357 }, { "epoch": 0.5757764108279573, "grad_norm": 1.625, "learning_rate": 1.831845666581308e-05, "loss": 1.0868, "step": 3358 }, { "epoch": 0.5759478749169471, "grad_norm": 1.5703125, "learning_rate": 1.8317454227364134e-05, "loss": 0.9983, "step": 3359 }, { "epoch": 0.5761193390059369, "grad_norm": 1.5703125, "learning_rate": 1.8316451517651683e-05, "loss": 1.0399, "step": 3360 }, { "epoch": 0.5762908030949268, "grad_norm": 1.484375, "learning_rate": 1.831544853670843e-05, "loss": 0.9766, "step": 3361 }, { "epoch": 0.5764622671839167, "grad_norm": 1.5859375, "learning_rate": 1.8314445284567087e-05, "loss": 0.9866, "step": 3362 }, { "epoch": 0.5766337312729065, "grad_norm": 1.5703125, "learning_rate": 1.8313441761260374e-05, "loss": 1.0635, "step": 3363 }, { "epoch": 0.5768051953618963, "grad_norm": 1.6015625, "learning_rate": 1.831243796682102e-05, "loss": 0.9684, "step": 3364 }, { "epoch": 0.5769766594508863, "grad_norm": 1.5625, "learning_rate": 1.831143390128176e-05, "loss": 1.0406, "step": 3365 }, { "epoch": 0.5771481235398761, "grad_norm": 1.6015625, "learning_rate": 1.8310429564675347e-05, "loss": 0.943, "step": 3366 }, { "epoch": 0.5773195876288659, "grad_norm": 1.625, "learning_rate": 1.8309424957034528e-05, "loss": 1.0568, "step": 3367 }, { "epoch": 0.5774910517178559, "grad_norm": 1.5390625, "learning_rate": 1.8308420078392073e-05, "loss": 1.0687, "step": 3368 }, { "epoch": 0.5776625158068457, "grad_norm": 1.71875, "learning_rate": 1.8307414928780753e-05, "loss": 1.098, "step": 3369 }, { "epoch": 0.5778339798958355, "grad_norm": 1.7109375, "learning_rate": 1.8306409508233347e-05, "loss": 1.1567, "step": 3370 }, { "epoch": 0.5780054439848255, "grad_norm": 1.59375, "learning_rate": 1.830540381678265e-05, "loss": 1.0479, "step": 3371 }, { "epoch": 0.5781769080738153, "grad_norm": 1.5546875, "learning_rate": 1.830439785446146e-05, "loss": 1.0058, "step": 3372 }, { "epoch": 0.5783483721628051, "grad_norm": 1.65625, "learning_rate": 1.830339162130258e-05, "loss": 1.0528, "step": 3373 }, { "epoch": 0.578519836251795, "grad_norm": 1.5859375, "learning_rate": 1.8302385117338836e-05, "loss": 1.0247, "step": 3374 }, { "epoch": 0.5786913003407849, "grad_norm": 1.46875, "learning_rate": 1.8301378342603045e-05, "loss": 1.0035, "step": 3375 }, { "epoch": 0.5788627644297747, "grad_norm": 1.5390625, "learning_rate": 1.830037129712805e-05, "loss": 1.0463, "step": 3376 }, { "epoch": 0.5790342285187646, "grad_norm": 1.5859375, "learning_rate": 1.829936398094669e-05, "loss": 1.0622, "step": 3377 }, { "epoch": 0.5792056926077545, "grad_norm": 1.59375, "learning_rate": 1.829835639409182e-05, "loss": 1.0306, "step": 3378 }, { "epoch": 0.5793771566967443, "grad_norm": 1.578125, "learning_rate": 1.8297348536596297e-05, "loss": 1.0617, "step": 3379 }, { "epoch": 0.5795486207857342, "grad_norm": 1.5390625, "learning_rate": 1.829634040849299e-05, "loss": 1.0848, "step": 3380 }, { "epoch": 0.579720084874724, "grad_norm": 1.5546875, "learning_rate": 1.829533200981479e-05, "loss": 0.9907, "step": 3381 }, { "epoch": 0.5798915489637139, "grad_norm": 1.578125, "learning_rate": 1.8294323340594565e-05, "loss": 1.0193, "step": 3382 }, { "epoch": 0.5800630130527038, "grad_norm": 1.5390625, "learning_rate": 1.829331440086523e-05, "loss": 0.9932, "step": 3383 }, { "epoch": 0.5802344771416936, "grad_norm": 1.5390625, "learning_rate": 1.829230519065968e-05, "loss": 0.9571, "step": 3384 }, { "epoch": 0.5804059412306835, "grad_norm": 1.4296875, "learning_rate": 1.829129571001083e-05, "loss": 0.969, "step": 3385 }, { "epoch": 0.5805774053196734, "grad_norm": 1.5, "learning_rate": 1.829028595895161e-05, "loss": 0.963, "step": 3386 }, { "epoch": 0.5807488694086632, "grad_norm": 1.5390625, "learning_rate": 1.828927593751494e-05, "loss": 0.9628, "step": 3387 }, { "epoch": 0.580920333497653, "grad_norm": 1.5234375, "learning_rate": 1.828826564573377e-05, "loss": 0.9359, "step": 3388 }, { "epoch": 0.581091797586643, "grad_norm": 1.546875, "learning_rate": 1.8287255083641048e-05, "loss": 1.0194, "step": 3389 }, { "epoch": 0.5812632616756328, "grad_norm": 1.515625, "learning_rate": 1.828624425126973e-05, "loss": 1.0237, "step": 3390 }, { "epoch": 0.5814347257646226, "grad_norm": 1.6328125, "learning_rate": 1.8285233148652784e-05, "loss": 1.0385, "step": 3391 }, { "epoch": 0.5816061898536126, "grad_norm": 1.5078125, "learning_rate": 1.8284221775823187e-05, "loss": 1.0283, "step": 3392 }, { "epoch": 0.5817776539426024, "grad_norm": 1.5859375, "learning_rate": 1.828321013281392e-05, "loss": 1.0483, "step": 3393 }, { "epoch": 0.5819491180315922, "grad_norm": 1.609375, "learning_rate": 1.828219821965798e-05, "loss": 1.0926, "step": 3394 }, { "epoch": 0.5821205821205822, "grad_norm": 1.609375, "learning_rate": 1.8281186036388368e-05, "loss": 1.0863, "step": 3395 }, { "epoch": 0.582292046209572, "grad_norm": 1.5390625, "learning_rate": 1.8280173583038095e-05, "loss": 1.0118, "step": 3396 }, { "epoch": 0.5824635102985618, "grad_norm": 1.5859375, "learning_rate": 1.8279160859640182e-05, "loss": 0.978, "step": 3397 }, { "epoch": 0.5826349743875517, "grad_norm": 1.4609375, "learning_rate": 1.8278147866227658e-05, "loss": 1.029, "step": 3398 }, { "epoch": 0.5828064384765416, "grad_norm": 1.53125, "learning_rate": 1.8277134602833556e-05, "loss": 1.0615, "step": 3399 }, { "epoch": 0.5829779025655314, "grad_norm": 1.515625, "learning_rate": 1.8276121069490925e-05, "loss": 1.0119, "step": 3400 }, { "epoch": 0.5831493666545213, "grad_norm": 1.6171875, "learning_rate": 1.8275107266232826e-05, "loss": 1.0311, "step": 3401 }, { "epoch": 0.5833208307435112, "grad_norm": 1.578125, "learning_rate": 1.8274093193092313e-05, "loss": 1.0166, "step": 3402 }, { "epoch": 0.583492294832501, "grad_norm": 1.6015625, "learning_rate": 1.827307885010247e-05, "loss": 1.0905, "step": 3403 }, { "epoch": 0.5836637589214909, "grad_norm": 1.75, "learning_rate": 1.8272064237296367e-05, "loss": 1.0299, "step": 3404 }, { "epoch": 0.5838352230104807, "grad_norm": 1.546875, "learning_rate": 1.82710493547071e-05, "loss": 0.9962, "step": 3405 }, { "epoch": 0.5840066870994706, "grad_norm": 1.625, "learning_rate": 1.827003420236777e-05, "loss": 0.9978, "step": 3406 }, { "epoch": 0.5841781511884605, "grad_norm": 1.6171875, "learning_rate": 1.826901878031148e-05, "loss": 1.0804, "step": 3407 }, { "epoch": 0.5843496152774503, "grad_norm": 1.6171875, "learning_rate": 1.8268003088571352e-05, "loss": 1.0453, "step": 3408 }, { "epoch": 0.5845210793664402, "grad_norm": 1.5, "learning_rate": 1.8266987127180507e-05, "loss": 0.9565, "step": 3409 }, { "epoch": 0.5846925434554301, "grad_norm": 1.671875, "learning_rate": 1.8265970896172082e-05, "loss": 1.0409, "step": 3410 }, { "epoch": 0.5848640075444199, "grad_norm": 1.65625, "learning_rate": 1.8264954395579216e-05, "loss": 1.0311, "step": 3411 }, { "epoch": 0.5850354716334097, "grad_norm": 1.7265625, "learning_rate": 1.826393762543507e-05, "loss": 1.0295, "step": 3412 }, { "epoch": 0.5852069357223997, "grad_norm": 1.546875, "learning_rate": 1.8262920585772793e-05, "loss": 0.9599, "step": 3413 }, { "epoch": 0.5853783998113895, "grad_norm": 1.5703125, "learning_rate": 1.8261903276625562e-05, "loss": 0.9765, "step": 3414 }, { "epoch": 0.5855498639003793, "grad_norm": 1.546875, "learning_rate": 1.826088569802656e-05, "loss": 1.0553, "step": 3415 }, { "epoch": 0.5857213279893693, "grad_norm": 1.6328125, "learning_rate": 1.825986785000896e-05, "loss": 0.9898, "step": 3416 }, { "epoch": 0.5858927920783591, "grad_norm": 1.640625, "learning_rate": 1.825884973260597e-05, "loss": 1.1863, "step": 3417 }, { "epoch": 0.5860642561673489, "grad_norm": 1.5625, "learning_rate": 1.8257831345850787e-05, "loss": 0.9711, "step": 3418 }, { "epoch": 0.5862357202563389, "grad_norm": 1.5859375, "learning_rate": 1.825681268977663e-05, "loss": 1.0413, "step": 3419 }, { "epoch": 0.5864071843453287, "grad_norm": 1.5546875, "learning_rate": 1.8255793764416718e-05, "loss": 1.0048, "step": 3420 }, { "epoch": 0.5865786484343185, "grad_norm": 1.5625, "learning_rate": 1.8254774569804284e-05, "loss": 1.0406, "step": 3421 }, { "epoch": 0.5867501125233084, "grad_norm": 1.609375, "learning_rate": 1.8253755105972565e-05, "loss": 1.0598, "step": 3422 }, { "epoch": 0.5869215766122983, "grad_norm": 1.6640625, "learning_rate": 1.8252735372954812e-05, "loss": 1.0493, "step": 3423 }, { "epoch": 0.5870930407012881, "grad_norm": 1.71875, "learning_rate": 1.8251715370784283e-05, "loss": 1.0204, "step": 3424 }, { "epoch": 0.587264504790278, "grad_norm": 1.5390625, "learning_rate": 1.825069509949424e-05, "loss": 1.0498, "step": 3425 }, { "epoch": 0.5874359688792679, "grad_norm": 1.6875, "learning_rate": 1.8249674559117963e-05, "loss": 1.0432, "step": 3426 }, { "epoch": 0.5876074329682577, "grad_norm": 1.5546875, "learning_rate": 1.824865374968873e-05, "loss": 1.0371, "step": 3427 }, { "epoch": 0.5877788970572476, "grad_norm": 1.5859375, "learning_rate": 1.824763267123984e-05, "loss": 1.0738, "step": 3428 }, { "epoch": 0.5879503611462374, "grad_norm": 1.4921875, "learning_rate": 1.824661132380459e-05, "loss": 0.9195, "step": 3429 }, { "epoch": 0.5881218252352273, "grad_norm": 1.5703125, "learning_rate": 1.824558970741629e-05, "loss": 1.043, "step": 3430 }, { "epoch": 0.5882932893242172, "grad_norm": 1.546875, "learning_rate": 1.8244567822108262e-05, "loss": 0.9982, "step": 3431 }, { "epoch": 0.588464753413207, "grad_norm": 1.546875, "learning_rate": 1.824354566791383e-05, "loss": 1.0171, "step": 3432 }, { "epoch": 0.5886362175021969, "grad_norm": 1.5703125, "learning_rate": 1.824252324486633e-05, "loss": 0.9613, "step": 3433 }, { "epoch": 0.5888076815911868, "grad_norm": 1.65625, "learning_rate": 1.824150055299911e-05, "loss": 1.1034, "step": 3434 }, { "epoch": 0.5889791456801766, "grad_norm": 1.5546875, "learning_rate": 1.8240477592345522e-05, "loss": 0.9503, "step": 3435 }, { "epoch": 0.5891506097691664, "grad_norm": 1.5703125, "learning_rate": 1.8239454362938925e-05, "loss": 1.0519, "step": 3436 }, { "epoch": 0.5893220738581564, "grad_norm": 1.59375, "learning_rate": 1.82384308648127e-05, "loss": 1.0554, "step": 3437 }, { "epoch": 0.5894935379471462, "grad_norm": 1.578125, "learning_rate": 1.823740709800022e-05, "loss": 0.9954, "step": 3438 }, { "epoch": 0.589665002036136, "grad_norm": 1.8359375, "learning_rate": 1.8236383062534874e-05, "loss": 0.961, "step": 3439 }, { "epoch": 0.589836466125126, "grad_norm": 1.7265625, "learning_rate": 1.8235358758450062e-05, "loss": 1.0856, "step": 3440 }, { "epoch": 0.5900079302141158, "grad_norm": 1.5546875, "learning_rate": 1.8234334185779186e-05, "loss": 1.0408, "step": 3441 }, { "epoch": 0.5901793943031056, "grad_norm": 1.5859375, "learning_rate": 1.823330934455567e-05, "loss": 1.0096, "step": 3442 }, { "epoch": 0.5903508583920956, "grad_norm": 1.640625, "learning_rate": 1.823228423481293e-05, "loss": 1.0386, "step": 3443 }, { "epoch": 0.5905223224810854, "grad_norm": 1.6171875, "learning_rate": 1.82312588565844e-05, "loss": 1.0921, "step": 3444 }, { "epoch": 0.5906937865700752, "grad_norm": 1.6484375, "learning_rate": 1.8230233209903527e-05, "loss": 1.108, "step": 3445 }, { "epoch": 0.5908652506590651, "grad_norm": 1.5859375, "learning_rate": 1.8229207294803753e-05, "loss": 0.9812, "step": 3446 }, { "epoch": 0.591036714748055, "grad_norm": 1.5390625, "learning_rate": 1.8228181111318538e-05, "loss": 1.0402, "step": 3447 }, { "epoch": 0.5912081788370448, "grad_norm": 1.578125, "learning_rate": 1.822715465948136e-05, "loss": 0.9975, "step": 3448 }, { "epoch": 0.5913796429260347, "grad_norm": 1.5859375, "learning_rate": 1.8226127939325683e-05, "loss": 1.0768, "step": 3449 }, { "epoch": 0.5915511070150246, "grad_norm": 1.5, "learning_rate": 1.8225100950884997e-05, "loss": 1.0326, "step": 3450 }, { "epoch": 0.5917225711040144, "grad_norm": 1.5625, "learning_rate": 1.8224073694192798e-05, "loss": 1.0055, "step": 3451 }, { "epoch": 0.5918940351930043, "grad_norm": 1.71875, "learning_rate": 1.8223046169282585e-05, "loss": 1.0598, "step": 3452 }, { "epoch": 0.5920654992819941, "grad_norm": 1.6171875, "learning_rate": 1.8222018376187873e-05, "loss": 1.0051, "step": 3453 }, { "epoch": 0.592236963370984, "grad_norm": 1.4765625, "learning_rate": 1.822099031494218e-05, "loss": 0.937, "step": 3454 }, { "epoch": 0.5924084274599738, "grad_norm": 1.515625, "learning_rate": 1.8219961985579035e-05, "loss": 0.9945, "step": 3455 }, { "epoch": 0.5925798915489637, "grad_norm": 1.6328125, "learning_rate": 1.821893338813198e-05, "loss": 1.1318, "step": 3456 }, { "epoch": 0.5927513556379536, "grad_norm": 1.6171875, "learning_rate": 1.8217904522634553e-05, "loss": 1.0298, "step": 3457 }, { "epoch": 0.5929228197269434, "grad_norm": 1.5546875, "learning_rate": 1.8216875389120316e-05, "loss": 0.9916, "step": 3458 }, { "epoch": 0.5930942838159333, "grad_norm": 1.5390625, "learning_rate": 1.8215845987622828e-05, "loss": 1.0565, "step": 3459 }, { "epoch": 0.5932657479049231, "grad_norm": 1.546875, "learning_rate": 1.8214816318175663e-05, "loss": 0.9459, "step": 3460 }, { "epoch": 0.593437211993913, "grad_norm": 1.6015625, "learning_rate": 1.821378638081241e-05, "loss": 1.0692, "step": 3461 }, { "epoch": 0.5936086760829029, "grad_norm": 1.6015625, "learning_rate": 1.8212756175566646e-05, "loss": 1.0501, "step": 3462 }, { "epoch": 0.5937801401718927, "grad_norm": 1.734375, "learning_rate": 1.8211725702471984e-05, "loss": 1.0388, "step": 3463 }, { "epoch": 0.5939516042608826, "grad_norm": 1.5859375, "learning_rate": 1.8210694961562015e-05, "loss": 1.0025, "step": 3464 }, { "epoch": 0.5941230683498725, "grad_norm": 1.6796875, "learning_rate": 1.8209663952870373e-05, "loss": 1.046, "step": 3465 }, { "epoch": 0.5942945324388623, "grad_norm": 1.59375, "learning_rate": 1.8208632676430674e-05, "loss": 1.0592, "step": 3466 }, { "epoch": 0.5944659965278521, "grad_norm": 1.515625, "learning_rate": 1.820760113227655e-05, "loss": 0.9688, "step": 3467 }, { "epoch": 0.5946374606168421, "grad_norm": 1.65625, "learning_rate": 1.8206569320441645e-05, "loss": 0.9846, "step": 3468 }, { "epoch": 0.5948089247058319, "grad_norm": 1.59375, "learning_rate": 1.8205537240959615e-05, "loss": 0.9957, "step": 3469 }, { "epoch": 0.5949803887948217, "grad_norm": 1.640625, "learning_rate": 1.820450489386411e-05, "loss": 1.0043, "step": 3470 }, { "epoch": 0.5951518528838117, "grad_norm": 1.5859375, "learning_rate": 1.820347227918881e-05, "loss": 1.0168, "step": 3471 }, { "epoch": 0.5953233169728015, "grad_norm": 1.6484375, "learning_rate": 1.820243939696739e-05, "loss": 1.0769, "step": 3472 }, { "epoch": 0.5954947810617913, "grad_norm": 1.5546875, "learning_rate": 1.820140624723353e-05, "loss": 1.0118, "step": 3473 }, { "epoch": 0.5956662451507813, "grad_norm": 1.578125, "learning_rate": 1.820037283002093e-05, "loss": 1.0421, "step": 3474 }, { "epoch": 0.5958377092397711, "grad_norm": 1.6015625, "learning_rate": 1.8199339145363292e-05, "loss": 1.061, "step": 3475 }, { "epoch": 0.5960091733287609, "grad_norm": 1.625, "learning_rate": 1.819830519329433e-05, "loss": 1.0801, "step": 3476 }, { "epoch": 0.5961806374177508, "grad_norm": 1.671875, "learning_rate": 1.819727097384776e-05, "loss": 1.0314, "step": 3477 }, { "epoch": 0.5963521015067407, "grad_norm": 1.625, "learning_rate": 1.819623648705732e-05, "loss": 0.9982, "step": 3478 }, { "epoch": 0.5965235655957305, "grad_norm": 1.515625, "learning_rate": 1.819520173295674e-05, "loss": 1.0165, "step": 3479 }, { "epoch": 0.5966950296847204, "grad_norm": 1.6484375, "learning_rate": 1.819416671157977e-05, "loss": 1.0337, "step": 3480 }, { "epoch": 0.5968664937737103, "grad_norm": 1.5703125, "learning_rate": 1.819313142296017e-05, "loss": 1.0332, "step": 3481 }, { "epoch": 0.5970379578627001, "grad_norm": 1.625, "learning_rate": 1.8192095867131705e-05, "loss": 1.0734, "step": 3482 }, { "epoch": 0.59720942195169, "grad_norm": 1.5859375, "learning_rate": 1.819106004412814e-05, "loss": 1.1012, "step": 3483 }, { "epoch": 0.5973808860406798, "grad_norm": 1.625, "learning_rate": 1.8190023953983264e-05, "loss": 0.9893, "step": 3484 }, { "epoch": 0.5975523501296697, "grad_norm": 1.6171875, "learning_rate": 1.818898759673087e-05, "loss": 1.0762, "step": 3485 }, { "epoch": 0.5977238142186596, "grad_norm": 1.640625, "learning_rate": 1.8187950972404746e-05, "loss": 1.093, "step": 3486 }, { "epoch": 0.5978952783076494, "grad_norm": 1.59375, "learning_rate": 1.818691408103871e-05, "loss": 0.9958, "step": 3487 }, { "epoch": 0.5980667423966393, "grad_norm": 1.5859375, "learning_rate": 1.818587692266658e-05, "loss": 1.0037, "step": 3488 }, { "epoch": 0.5982382064856292, "grad_norm": 1.625, "learning_rate": 1.818483949732218e-05, "loss": 1.0488, "step": 3489 }, { "epoch": 0.598409670574619, "grad_norm": 1.5703125, "learning_rate": 1.8183801805039337e-05, "loss": 1.027, "step": 3490 }, { "epoch": 0.5985811346636088, "grad_norm": 1.734375, "learning_rate": 1.8182763845851902e-05, "loss": 1.053, "step": 3491 }, { "epoch": 0.5987525987525988, "grad_norm": 1.7109375, "learning_rate": 1.8181725619793727e-05, "loss": 1.0962, "step": 3492 }, { "epoch": 0.5989240628415886, "grad_norm": 1.5390625, "learning_rate": 1.8180687126898666e-05, "loss": 0.9581, "step": 3493 }, { "epoch": 0.5990955269305784, "grad_norm": 1.5703125, "learning_rate": 1.8179648367200596e-05, "loss": 0.9726, "step": 3494 }, { "epoch": 0.5992669910195684, "grad_norm": 1.515625, "learning_rate": 1.817860934073339e-05, "loss": 1.0337, "step": 3495 }, { "epoch": 0.5994384551085582, "grad_norm": 1.59375, "learning_rate": 1.8177570047530933e-05, "loss": 1.0834, "step": 3496 }, { "epoch": 0.599609919197548, "grad_norm": 1.546875, "learning_rate": 1.8176530487627123e-05, "loss": 0.9813, "step": 3497 }, { "epoch": 0.599781383286538, "grad_norm": 1.5390625, "learning_rate": 1.8175490661055864e-05, "loss": 0.9932, "step": 3498 }, { "epoch": 0.5999528473755278, "grad_norm": 1.609375, "learning_rate": 1.817445056785107e-05, "loss": 1.056, "step": 3499 }, { "epoch": 0.6001243114645176, "grad_norm": 1.5703125, "learning_rate": 1.8173410208046666e-05, "loss": 1.021, "step": 3500 }, { "epoch": 0.6001243114645176, "eval_loss": 0.8751522302627563, "eval_runtime": 837.1447, "eval_samples_per_second": 2.985, "eval_steps_per_second": 2.985, "step": 3500 }, { "epoch": 0.6002957755535075, "grad_norm": 1.6875, "learning_rate": 1.8172369581676567e-05, "loss": 1.0605, "step": 3501 }, { "epoch": 0.6004672396424974, "grad_norm": 1.5625, "learning_rate": 1.817132868877473e-05, "loss": 0.9984, "step": 3502 }, { "epoch": 0.6006387037314872, "grad_norm": 1.5390625, "learning_rate": 1.817028752937509e-05, "loss": 0.9996, "step": 3503 }, { "epoch": 0.6008101678204771, "grad_norm": 1.578125, "learning_rate": 1.8169246103511605e-05, "loss": 1.0918, "step": 3504 }, { "epoch": 0.600981631909467, "grad_norm": 1.6171875, "learning_rate": 1.8168204411218247e-05, "loss": 1.115, "step": 3505 }, { "epoch": 0.6011530959984568, "grad_norm": 1.625, "learning_rate": 1.8167162452528982e-05, "loss": 0.9894, "step": 3506 }, { "epoch": 0.6013245600874467, "grad_norm": 1.6015625, "learning_rate": 1.8166120227477796e-05, "loss": 1.0847, "step": 3507 }, { "epoch": 0.6014960241764366, "grad_norm": 1.515625, "learning_rate": 1.816507773609868e-05, "loss": 0.9875, "step": 3508 }, { "epoch": 0.6016674882654264, "grad_norm": 1.6875, "learning_rate": 1.8164034978425627e-05, "loss": 1.0064, "step": 3509 }, { "epoch": 0.6018389523544163, "grad_norm": 1.734375, "learning_rate": 1.8162991954492654e-05, "loss": 1.1553, "step": 3510 }, { "epoch": 0.6020104164434061, "grad_norm": 1.515625, "learning_rate": 1.8161948664333773e-05, "loss": 0.9663, "step": 3511 }, { "epoch": 0.602181880532396, "grad_norm": 1.5625, "learning_rate": 1.816090510798301e-05, "loss": 0.9988, "step": 3512 }, { "epoch": 0.6023533446213859, "grad_norm": 1.5234375, "learning_rate": 1.81598612854744e-05, "loss": 0.9419, "step": 3513 }, { "epoch": 0.6025248087103757, "grad_norm": 1.578125, "learning_rate": 1.8158817196841993e-05, "loss": 1.1279, "step": 3514 }, { "epoch": 0.6026962727993656, "grad_norm": 1.59375, "learning_rate": 1.815777284211983e-05, "loss": 1.0023, "step": 3515 }, { "epoch": 0.6028677368883555, "grad_norm": 1.6015625, "learning_rate": 1.815672822134197e-05, "loss": 1.1439, "step": 3516 }, { "epoch": 0.6030392009773453, "grad_norm": 1.6953125, "learning_rate": 1.8155683334542493e-05, "loss": 1.0355, "step": 3517 }, { "epoch": 0.6032106650663351, "grad_norm": 1.4921875, "learning_rate": 1.8154638181755467e-05, "loss": 1.0313, "step": 3518 }, { "epoch": 0.6033821291553251, "grad_norm": 1.734375, "learning_rate": 1.8153592763014987e-05, "loss": 1.1347, "step": 3519 }, { "epoch": 0.6035535932443149, "grad_norm": 1.4921875, "learning_rate": 1.8152547078355143e-05, "loss": 0.9597, "step": 3520 }, { "epoch": 0.6037250573333047, "grad_norm": 1.546875, "learning_rate": 1.8151501127810038e-05, "loss": 1.0531, "step": 3521 }, { "epoch": 0.6038965214222947, "grad_norm": 1.5078125, "learning_rate": 1.8150454911413783e-05, "loss": 0.9631, "step": 3522 }, { "epoch": 0.6040679855112845, "grad_norm": 1.5390625, "learning_rate": 1.8149408429200503e-05, "loss": 1.0816, "step": 3523 }, { "epoch": 0.6042394496002743, "grad_norm": 1.6015625, "learning_rate": 1.814836168120433e-05, "loss": 0.9617, "step": 3524 }, { "epoch": 0.6044109136892643, "grad_norm": 1.53125, "learning_rate": 1.8147314667459394e-05, "loss": 0.9863, "step": 3525 }, { "epoch": 0.6045823777782541, "grad_norm": 1.5859375, "learning_rate": 1.814626738799985e-05, "loss": 1.0777, "step": 3526 }, { "epoch": 0.6047538418672439, "grad_norm": 1.5703125, "learning_rate": 1.814521984285985e-05, "loss": 1.0113, "step": 3527 }, { "epoch": 0.6049253059562338, "grad_norm": 1.5859375, "learning_rate": 1.8144172032073558e-05, "loss": 0.9739, "step": 3528 }, { "epoch": 0.6050967700452237, "grad_norm": 1.6015625, "learning_rate": 1.8143123955675147e-05, "loss": 1.0028, "step": 3529 }, { "epoch": 0.6052682341342135, "grad_norm": 1.671875, "learning_rate": 1.8142075613698798e-05, "loss": 1.0616, "step": 3530 }, { "epoch": 0.6054396982232034, "grad_norm": 1.578125, "learning_rate": 1.8141027006178707e-05, "loss": 1.0612, "step": 3531 }, { "epoch": 0.6056111623121933, "grad_norm": 1.46875, "learning_rate": 1.813997813314907e-05, "loss": 0.9675, "step": 3532 }, { "epoch": 0.6057826264011831, "grad_norm": 1.5546875, "learning_rate": 1.8138928994644088e-05, "loss": 1.0159, "step": 3533 }, { "epoch": 0.605954090490173, "grad_norm": 1.6015625, "learning_rate": 1.8137879590697986e-05, "loss": 1.0209, "step": 3534 }, { "epoch": 0.6061255545791628, "grad_norm": 1.53125, "learning_rate": 1.8136829921344986e-05, "loss": 1.0564, "step": 3535 }, { "epoch": 0.6062970186681527, "grad_norm": 1.6484375, "learning_rate": 1.8135779986619324e-05, "loss": 1.0058, "step": 3536 }, { "epoch": 0.6064684827571426, "grad_norm": 1.578125, "learning_rate": 1.813472978655524e-05, "loss": 1.0321, "step": 3537 }, { "epoch": 0.6066399468461324, "grad_norm": 1.59375, "learning_rate": 1.813367932118698e-05, "loss": 1.0533, "step": 3538 }, { "epoch": 0.6068114109351223, "grad_norm": 1.59375, "learning_rate": 1.8132628590548813e-05, "loss": 0.9243, "step": 3539 }, { "epoch": 0.6069828750241122, "grad_norm": 1.609375, "learning_rate": 1.8131577594675003e-05, "loss": 1.053, "step": 3540 }, { "epoch": 0.607154339113102, "grad_norm": 1.6484375, "learning_rate": 1.8130526333599826e-05, "loss": 1.0014, "step": 3541 }, { "epoch": 0.6073258032020918, "grad_norm": 1.8359375, "learning_rate": 1.8129474807357567e-05, "loss": 0.9082, "step": 3542 }, { "epoch": 0.6074972672910818, "grad_norm": 1.546875, "learning_rate": 1.8128423015982526e-05, "loss": 1.0552, "step": 3543 }, { "epoch": 0.6076687313800716, "grad_norm": 1.625, "learning_rate": 1.8127370959508995e-05, "loss": 1.1296, "step": 3544 }, { "epoch": 0.6078401954690614, "grad_norm": 1.5703125, "learning_rate": 1.81263186379713e-05, "loss": 0.9598, "step": 3545 }, { "epoch": 0.6080116595580514, "grad_norm": 1.59375, "learning_rate": 1.812526605140375e-05, "loss": 1.0185, "step": 3546 }, { "epoch": 0.6081831236470412, "grad_norm": 1.6015625, "learning_rate": 1.8124213199840674e-05, "loss": 0.9668, "step": 3547 }, { "epoch": 0.608354587736031, "grad_norm": 1.546875, "learning_rate": 1.8123160083316417e-05, "loss": 1.0008, "step": 3548 }, { "epoch": 0.6085260518250208, "grad_norm": 1.5859375, "learning_rate": 1.8122106701865317e-05, "loss": 0.9254, "step": 3549 }, { "epoch": 0.6086975159140108, "grad_norm": 1.5859375, "learning_rate": 1.8121053055521736e-05, "loss": 1.021, "step": 3550 }, { "epoch": 0.6088689800030006, "grad_norm": 1.59375, "learning_rate": 1.811999914432003e-05, "loss": 1.0135, "step": 3551 }, { "epoch": 0.6090404440919904, "grad_norm": 1.515625, "learning_rate": 1.8118944968294578e-05, "loss": 0.9984, "step": 3552 }, { "epoch": 0.6092119081809804, "grad_norm": 1.4609375, "learning_rate": 1.8117890527479757e-05, "loss": 0.8945, "step": 3553 }, { "epoch": 0.6093833722699702, "grad_norm": 1.4921875, "learning_rate": 1.811683582190996e-05, "loss": 0.9852, "step": 3554 }, { "epoch": 0.60955483635896, "grad_norm": 1.578125, "learning_rate": 1.8115780851619575e-05, "loss": 1.0574, "step": 3555 }, { "epoch": 0.60972630044795, "grad_norm": 1.65625, "learning_rate": 1.8114725616643018e-05, "loss": 1.0476, "step": 3556 }, { "epoch": 0.6098977645369398, "grad_norm": 1.546875, "learning_rate": 1.8113670117014702e-05, "loss": 1.0345, "step": 3557 }, { "epoch": 0.6100692286259296, "grad_norm": 1.484375, "learning_rate": 1.8112614352769048e-05, "loss": 0.9556, "step": 3558 }, { "epoch": 0.6102406927149195, "grad_norm": 1.5859375, "learning_rate": 1.8111558323940493e-05, "loss": 1.0069, "step": 3559 }, { "epoch": 0.6104121568039094, "grad_norm": 1.640625, "learning_rate": 1.8110502030563477e-05, "loss": 1.0486, "step": 3560 }, { "epoch": 0.6105836208928992, "grad_norm": 1.6171875, "learning_rate": 1.8109445472672448e-05, "loss": 0.9778, "step": 3561 }, { "epoch": 0.6107550849818891, "grad_norm": 1.5859375, "learning_rate": 1.8108388650301862e-05, "loss": 1.0094, "step": 3562 }, { "epoch": 0.610926549070879, "grad_norm": 1.5859375, "learning_rate": 1.810733156348619e-05, "loss": 0.9897, "step": 3563 }, { "epoch": 0.6110980131598688, "grad_norm": 1.625, "learning_rate": 1.8106274212259906e-05, "loss": 1.0612, "step": 3564 }, { "epoch": 0.6112694772488587, "grad_norm": 1.6015625, "learning_rate": 1.8105216596657497e-05, "loss": 0.9924, "step": 3565 }, { "epoch": 0.6114409413378485, "grad_norm": 1.5546875, "learning_rate": 1.810415871671345e-05, "loss": 1.0056, "step": 3566 }, { "epoch": 0.6116124054268384, "grad_norm": 1.671875, "learning_rate": 1.810310057246227e-05, "loss": 1.1138, "step": 3567 }, { "epoch": 0.6117838695158283, "grad_norm": 1.6328125, "learning_rate": 1.8102042163938472e-05, "loss": 1.1251, "step": 3568 }, { "epoch": 0.6119553336048181, "grad_norm": 1.5703125, "learning_rate": 1.8100983491176563e-05, "loss": 0.981, "step": 3569 }, { "epoch": 0.612126797693808, "grad_norm": 1.53125, "learning_rate": 1.809992455421108e-05, "loss": 1.0471, "step": 3570 }, { "epoch": 0.6122982617827979, "grad_norm": 1.6328125, "learning_rate": 1.8098865353076554e-05, "loss": 1.0886, "step": 3571 }, { "epoch": 0.6124697258717877, "grad_norm": 1.7109375, "learning_rate": 1.8097805887807536e-05, "loss": 1.1298, "step": 3572 }, { "epoch": 0.6126411899607775, "grad_norm": 1.625, "learning_rate": 1.8096746158438568e-05, "loss": 1.0555, "step": 3573 }, { "epoch": 0.6128126540497675, "grad_norm": 1.6171875, "learning_rate": 1.8095686165004222e-05, "loss": 1.0145, "step": 3574 }, { "epoch": 0.6129841181387573, "grad_norm": 1.5390625, "learning_rate": 1.8094625907539065e-05, "loss": 0.9968, "step": 3575 }, { "epoch": 0.6131555822277471, "grad_norm": 1.5625, "learning_rate": 1.8093565386077675e-05, "loss": 0.9829, "step": 3576 }, { "epoch": 0.6133270463167371, "grad_norm": 1.609375, "learning_rate": 1.8092504600654642e-05, "loss": 1.142, "step": 3577 }, { "epoch": 0.6134985104057269, "grad_norm": 1.5234375, "learning_rate": 1.809144355130456e-05, "loss": 1.0224, "step": 3578 }, { "epoch": 0.6136699744947167, "grad_norm": 1.6171875, "learning_rate": 1.8090382238062028e-05, "loss": 1.0181, "step": 3579 }, { "epoch": 0.6138414385837067, "grad_norm": 1.5234375, "learning_rate": 1.808932066096167e-05, "loss": 1.0756, "step": 3580 }, { "epoch": 0.6140129026726965, "grad_norm": 1.5546875, "learning_rate": 1.8088258820038105e-05, "loss": 1.0064, "step": 3581 }, { "epoch": 0.6141843667616863, "grad_norm": 1.5078125, "learning_rate": 1.8087196715325962e-05, "loss": 0.9854, "step": 3582 }, { "epoch": 0.6143558308506762, "grad_norm": 1.625, "learning_rate": 1.808613434685988e-05, "loss": 0.9552, "step": 3583 }, { "epoch": 0.6145272949396661, "grad_norm": 1.515625, "learning_rate": 1.8085071714674506e-05, "loss": 0.979, "step": 3584 }, { "epoch": 0.6146987590286559, "grad_norm": 1.5859375, "learning_rate": 1.80840088188045e-05, "loss": 1.1012, "step": 3585 }, { "epoch": 0.6148702231176458, "grad_norm": 1.546875, "learning_rate": 1.8082945659284522e-05, "loss": 1.0108, "step": 3586 }, { "epoch": 0.6150416872066357, "grad_norm": 1.5390625, "learning_rate": 1.8081882236149253e-05, "loss": 0.9735, "step": 3587 }, { "epoch": 0.6152131512956255, "grad_norm": 1.59375, "learning_rate": 1.8080818549433367e-05, "loss": 1.0742, "step": 3588 }, { "epoch": 0.6153846153846154, "grad_norm": 1.59375, "learning_rate": 1.807975459917156e-05, "loss": 1.1254, "step": 3589 }, { "epoch": 0.6155560794736052, "grad_norm": 1.5625, "learning_rate": 1.807869038539853e-05, "loss": 1.0363, "step": 3590 }, { "epoch": 0.6157275435625951, "grad_norm": 1.578125, "learning_rate": 1.8077625908148983e-05, "loss": 1.0375, "step": 3591 }, { "epoch": 0.615899007651585, "grad_norm": 1.5, "learning_rate": 1.807656116745764e-05, "loss": 0.9396, "step": 3592 }, { "epoch": 0.6160704717405748, "grad_norm": 1.6015625, "learning_rate": 1.8075496163359224e-05, "loss": 1.0285, "step": 3593 }, { "epoch": 0.6162419358295647, "grad_norm": 1.5625, "learning_rate": 1.8074430895888466e-05, "loss": 0.9645, "step": 3594 }, { "epoch": 0.6164133999185546, "grad_norm": 1.625, "learning_rate": 1.8073365365080114e-05, "loss": 1.091, "step": 3595 }, { "epoch": 0.6165848640075444, "grad_norm": 1.6875, "learning_rate": 1.8072299570968915e-05, "loss": 1.0924, "step": 3596 }, { "epoch": 0.6167563280965342, "grad_norm": 1.65625, "learning_rate": 1.8071233513589626e-05, "loss": 1.0404, "step": 3597 }, { "epoch": 0.6169277921855242, "grad_norm": 1.59375, "learning_rate": 1.8070167192977024e-05, "loss": 1.0805, "step": 3598 }, { "epoch": 0.617099256274514, "grad_norm": 1.5390625, "learning_rate": 1.8069100609165878e-05, "loss": 1.0391, "step": 3599 }, { "epoch": 0.6172707203635038, "grad_norm": 1.6015625, "learning_rate": 1.8068033762190977e-05, "loss": 1.083, "step": 3600 }, { "epoch": 0.6174421844524938, "grad_norm": 1.6328125, "learning_rate": 1.8066966652087114e-05, "loss": 0.9962, "step": 3601 }, { "epoch": 0.6176136485414836, "grad_norm": 1.75, "learning_rate": 1.8065899278889088e-05, "loss": 1.0419, "step": 3602 }, { "epoch": 0.6177851126304734, "grad_norm": 1.53125, "learning_rate": 1.8064831642631712e-05, "loss": 1.1506, "step": 3603 }, { "epoch": 0.6179565767194634, "grad_norm": 1.640625, "learning_rate": 1.8063763743349813e-05, "loss": 1.063, "step": 3604 }, { "epoch": 0.6181280408084532, "grad_norm": 1.515625, "learning_rate": 1.806269558107821e-05, "loss": 0.9929, "step": 3605 }, { "epoch": 0.618299504897443, "grad_norm": 1.53125, "learning_rate": 1.806162715585174e-05, "loss": 0.9639, "step": 3606 }, { "epoch": 0.618470968986433, "grad_norm": 1.5859375, "learning_rate": 1.8060558467705254e-05, "loss": 0.9289, "step": 3607 }, { "epoch": 0.6186424330754228, "grad_norm": 1.5703125, "learning_rate": 1.8059489516673603e-05, "loss": 0.9864, "step": 3608 }, { "epoch": 0.6188138971644126, "grad_norm": 1.6328125, "learning_rate": 1.8058420302791647e-05, "loss": 1.0511, "step": 3609 }, { "epoch": 0.6189853612534025, "grad_norm": 1.5859375, "learning_rate": 1.8057350826094263e-05, "loss": 0.9789, "step": 3610 }, { "epoch": 0.6191568253423924, "grad_norm": 1.515625, "learning_rate": 1.805628108661633e-05, "loss": 1.0056, "step": 3611 }, { "epoch": 0.6193282894313822, "grad_norm": 1.625, "learning_rate": 1.8055211084392728e-05, "loss": 0.9905, "step": 3612 }, { "epoch": 0.6194997535203721, "grad_norm": 1.6015625, "learning_rate": 1.8054140819458362e-05, "loss": 0.9457, "step": 3613 }, { "epoch": 0.619671217609362, "grad_norm": 2.078125, "learning_rate": 1.8053070291848132e-05, "loss": 1.0677, "step": 3614 }, { "epoch": 0.6198426816983518, "grad_norm": 1.453125, "learning_rate": 1.805199950159696e-05, "loss": 0.9021, "step": 3615 }, { "epoch": 0.6200141457873417, "grad_norm": 1.6484375, "learning_rate": 1.805092844873976e-05, "loss": 1.0903, "step": 3616 }, { "epoch": 0.6201856098763315, "grad_norm": 1.59375, "learning_rate": 1.8049857133311467e-05, "loss": 1.01, "step": 3617 }, { "epoch": 0.6203570739653214, "grad_norm": 1.640625, "learning_rate": 1.804878555534702e-05, "loss": 0.9704, "step": 3618 }, { "epoch": 0.6205285380543113, "grad_norm": 1.609375, "learning_rate": 1.8047713714881366e-05, "loss": 0.9934, "step": 3619 }, { "epoch": 0.6207000021433011, "grad_norm": 1.6015625, "learning_rate": 1.8046641611949463e-05, "loss": 1.0609, "step": 3620 }, { "epoch": 0.620871466232291, "grad_norm": 2.3125, "learning_rate": 1.8045569246586274e-05, "loss": 1.0085, "step": 3621 }, { "epoch": 0.6210429303212809, "grad_norm": 1.609375, "learning_rate": 1.804449661882678e-05, "loss": 1.0796, "step": 3622 }, { "epoch": 0.6212143944102707, "grad_norm": 1.6484375, "learning_rate": 1.8043423728705955e-05, "loss": 1.0437, "step": 3623 }, { "epoch": 0.6213858584992605, "grad_norm": 1.53125, "learning_rate": 1.8042350576258796e-05, "loss": 1.0536, "step": 3624 }, { "epoch": 0.6215573225882505, "grad_norm": 1.5546875, "learning_rate": 1.8041277161520296e-05, "loss": 0.9373, "step": 3625 }, { "epoch": 0.6217287866772403, "grad_norm": 2.59375, "learning_rate": 1.804020348452547e-05, "loss": 1.0301, "step": 3626 }, { "epoch": 0.6219002507662301, "grad_norm": 1.5703125, "learning_rate": 1.8039129545309333e-05, "loss": 1.0713, "step": 3627 }, { "epoch": 0.6220717148552201, "grad_norm": 1.546875, "learning_rate": 1.8038055343906906e-05, "loss": 0.9714, "step": 3628 }, { "epoch": 0.6222431789442099, "grad_norm": 1.515625, "learning_rate": 1.803698088035323e-05, "loss": 1.069, "step": 3629 }, { "epoch": 0.6224146430331997, "grad_norm": 1.6875, "learning_rate": 1.8035906154683342e-05, "loss": 1.0507, "step": 3630 }, { "epoch": 0.6225861071221896, "grad_norm": 1.8359375, "learning_rate": 1.8034831166932288e-05, "loss": 1.0842, "step": 3631 }, { "epoch": 0.6227575712111795, "grad_norm": 1.609375, "learning_rate": 1.803375591713514e-05, "loss": 0.9876, "step": 3632 }, { "epoch": 0.6229290353001693, "grad_norm": 1.5390625, "learning_rate": 1.8032680405326956e-05, "loss": 0.9642, "step": 3633 }, { "epoch": 0.6231004993891592, "grad_norm": 1.7421875, "learning_rate": 1.8031604631542815e-05, "loss": 1.0226, "step": 3634 }, { "epoch": 0.6232719634781491, "grad_norm": 1.5859375, "learning_rate": 1.8030528595817804e-05, "loss": 1.0634, "step": 3635 }, { "epoch": 0.6234434275671389, "grad_norm": 1.4453125, "learning_rate": 1.8029452298187015e-05, "loss": 0.9581, "step": 3636 }, { "epoch": 0.6236148916561288, "grad_norm": 1.6953125, "learning_rate": 1.802837573868555e-05, "loss": 1.0147, "step": 3637 }, { "epoch": 0.6237863557451186, "grad_norm": 1.7109375, "learning_rate": 1.802729891734852e-05, "loss": 0.9945, "step": 3638 }, { "epoch": 0.6239578198341085, "grad_norm": 1.578125, "learning_rate": 1.8026221834211045e-05, "loss": 0.9873, "step": 3639 }, { "epoch": 0.6241292839230984, "grad_norm": 2.296875, "learning_rate": 1.8025144489308255e-05, "loss": 1.0331, "step": 3640 }, { "epoch": 0.6243007480120882, "grad_norm": 1.5625, "learning_rate": 1.802406688267528e-05, "loss": 0.9741, "step": 3641 }, { "epoch": 0.6244722121010781, "grad_norm": 1.5703125, "learning_rate": 1.802298901434727e-05, "loss": 1.0698, "step": 3642 }, { "epoch": 0.624643676190068, "grad_norm": 1.71875, "learning_rate": 1.8021910884359373e-05, "loss": 1.0824, "step": 3643 }, { "epoch": 0.6248151402790578, "grad_norm": 1.546875, "learning_rate": 1.802083249274676e-05, "loss": 0.9868, "step": 3644 }, { "epoch": 0.6249866043680476, "grad_norm": 1.90625, "learning_rate": 1.8019753839544588e-05, "loss": 0.9885, "step": 3645 }, { "epoch": 0.6251580684570375, "grad_norm": 1.5546875, "learning_rate": 1.801867492478805e-05, "loss": 0.9759, "step": 3646 }, { "epoch": 0.6253295325460274, "grad_norm": 1.53125, "learning_rate": 1.8017595748512327e-05, "loss": 0.9293, "step": 3647 }, { "epoch": 0.6255009966350172, "grad_norm": 1.625, "learning_rate": 1.8016516310752614e-05, "loss": 1.0215, "step": 3648 }, { "epoch": 0.6256724607240071, "grad_norm": 1.5390625, "learning_rate": 1.8015436611544112e-05, "loss": 1.0435, "step": 3649 }, { "epoch": 0.625843924812997, "grad_norm": 1.5859375, "learning_rate": 1.8014356650922043e-05, "loss": 1.016, "step": 3650 }, { "epoch": 0.6260153889019868, "grad_norm": 1.6171875, "learning_rate": 1.8013276428921625e-05, "loss": 1.0187, "step": 3651 }, { "epoch": 0.6261868529909767, "grad_norm": 1.6484375, "learning_rate": 1.8012195945578085e-05, "loss": 1.0304, "step": 3652 }, { "epoch": 0.6263583170799666, "grad_norm": 1.6328125, "learning_rate": 1.8011115200926666e-05, "loss": 1.1196, "step": 3653 }, { "epoch": 0.6265297811689564, "grad_norm": 1.5390625, "learning_rate": 1.8010034195002616e-05, "loss": 0.9716, "step": 3654 }, { "epoch": 0.6267012452579462, "grad_norm": 1.5703125, "learning_rate": 1.800895292784118e-05, "loss": 1.0112, "step": 3655 }, { "epoch": 0.6268727093469362, "grad_norm": 1.6640625, "learning_rate": 1.800787139947764e-05, "loss": 1.0518, "step": 3656 }, { "epoch": 0.627044173435926, "grad_norm": 1.5546875, "learning_rate": 1.800678960994725e-05, "loss": 0.9209, "step": 3657 }, { "epoch": 0.6272156375249158, "grad_norm": 1.640625, "learning_rate": 1.8005707559285306e-05, "loss": 1.0306, "step": 3658 }, { "epoch": 0.6273871016139058, "grad_norm": 1.5625, "learning_rate": 1.800462524752709e-05, "loss": 1.0427, "step": 3659 }, { "epoch": 0.6275585657028956, "grad_norm": 1.609375, "learning_rate": 1.80035426747079e-05, "loss": 1.0499, "step": 3660 }, { "epoch": 0.6277300297918854, "grad_norm": 1.5234375, "learning_rate": 1.8002459840863044e-05, "loss": 0.9191, "step": 3661 }, { "epoch": 0.6279014938808753, "grad_norm": 1.6875, "learning_rate": 1.8001376746027844e-05, "loss": 1.1145, "step": 3662 }, { "epoch": 0.6280729579698652, "grad_norm": 1.5390625, "learning_rate": 1.800029339023761e-05, "loss": 0.9926, "step": 3663 }, { "epoch": 0.628244422058855, "grad_norm": 1.609375, "learning_rate": 1.799920977352769e-05, "loss": 1.0446, "step": 3664 }, { "epoch": 0.6284158861478449, "grad_norm": 1.6328125, "learning_rate": 1.799812589593341e-05, "loss": 1.0364, "step": 3665 }, { "epoch": 0.6285873502368348, "grad_norm": 1.4765625, "learning_rate": 1.7997041757490133e-05, "loss": 0.954, "step": 3666 }, { "epoch": 0.6287588143258246, "grad_norm": 1.671875, "learning_rate": 1.7995957358233206e-05, "loss": 1.0657, "step": 3667 }, { "epoch": 0.6289302784148145, "grad_norm": 1.6015625, "learning_rate": 1.7994872698198e-05, "loss": 1.0128, "step": 3668 }, { "epoch": 0.6291017425038044, "grad_norm": 1.5234375, "learning_rate": 1.799378777741989e-05, "loss": 1.011, "step": 3669 }, { "epoch": 0.6292732065927942, "grad_norm": 1.625, "learning_rate": 1.7992702595934262e-05, "loss": 1.1297, "step": 3670 }, { "epoch": 0.6294446706817841, "grad_norm": 1.578125, "learning_rate": 1.79916171537765e-05, "loss": 1.0064, "step": 3671 }, { "epoch": 0.6296161347707739, "grad_norm": 1.5703125, "learning_rate": 1.7990531450982013e-05, "loss": 0.9921, "step": 3672 }, { "epoch": 0.6297875988597638, "grad_norm": 1.4765625, "learning_rate": 1.7989445487586202e-05, "loss": 1.0384, "step": 3673 }, { "epoch": 0.6299590629487537, "grad_norm": 1.5546875, "learning_rate": 1.798835926362449e-05, "loss": 1.1058, "step": 3674 }, { "epoch": 0.6301305270377435, "grad_norm": 1.515625, "learning_rate": 1.79872727791323e-05, "loss": 0.9876, "step": 3675 }, { "epoch": 0.6303019911267334, "grad_norm": 1.6171875, "learning_rate": 1.798618603414507e-05, "loss": 1.0731, "step": 3676 }, { "epoch": 0.6304734552157233, "grad_norm": 1.6953125, "learning_rate": 1.7985099028698237e-05, "loss": 1.0111, "step": 3677 }, { "epoch": 0.6306449193047131, "grad_norm": 1.5625, "learning_rate": 1.7984011762827257e-05, "loss": 0.9985, "step": 3678 }, { "epoch": 0.6308163833937029, "grad_norm": 1.609375, "learning_rate": 1.7982924236567587e-05, "loss": 1.0882, "step": 3679 }, { "epoch": 0.6309878474826929, "grad_norm": 1.609375, "learning_rate": 1.79818364499547e-05, "loss": 1.0011, "step": 3680 }, { "epoch": 0.6311593115716827, "grad_norm": 1.6875, "learning_rate": 1.7980748403024067e-05, "loss": 1.0398, "step": 3681 }, { "epoch": 0.6313307756606725, "grad_norm": 1.59375, "learning_rate": 1.7979660095811177e-05, "loss": 1.0896, "step": 3682 }, { "epoch": 0.6315022397496625, "grad_norm": 1.7109375, "learning_rate": 1.7978571528351517e-05, "loss": 1.0367, "step": 3683 }, { "epoch": 0.6316737038386523, "grad_norm": 1.6015625, "learning_rate": 1.7977482700680605e-05, "loss": 1.0867, "step": 3684 }, { "epoch": 0.6318451679276421, "grad_norm": 1.640625, "learning_rate": 1.7976393612833935e-05, "loss": 1.028, "step": 3685 }, { "epoch": 0.632016632016632, "grad_norm": 1.6328125, "learning_rate": 1.7975304264847035e-05, "loss": 1.0476, "step": 3686 }, { "epoch": 0.6321880961056219, "grad_norm": 1.578125, "learning_rate": 1.7974214656755432e-05, "loss": 0.9904, "step": 3687 }, { "epoch": 0.6323595601946117, "grad_norm": 1.6484375, "learning_rate": 1.7973124788594658e-05, "loss": 1.0644, "step": 3688 }, { "epoch": 0.6325310242836016, "grad_norm": 1.5234375, "learning_rate": 1.797203466040026e-05, "loss": 1.0607, "step": 3689 }, { "epoch": 0.6327024883725915, "grad_norm": 1.53125, "learning_rate": 1.7970944272207795e-05, "loss": 0.9432, "step": 3690 }, { "epoch": 0.6328739524615813, "grad_norm": 1.53125, "learning_rate": 1.796985362405282e-05, "loss": 1.0377, "step": 3691 }, { "epoch": 0.6330454165505712, "grad_norm": 1.7265625, "learning_rate": 1.7968762715970905e-05, "loss": 1.1736, "step": 3692 }, { "epoch": 0.633216880639561, "grad_norm": 1.578125, "learning_rate": 1.7967671547997633e-05, "loss": 1.0287, "step": 3693 }, { "epoch": 0.6333883447285509, "grad_norm": 1.5390625, "learning_rate": 1.7966580120168588e-05, "loss": 1.0058, "step": 3694 }, { "epoch": 0.6335598088175408, "grad_norm": 1.7109375, "learning_rate": 1.7965488432519363e-05, "loss": 1.0454, "step": 3695 }, { "epoch": 0.6337312729065306, "grad_norm": 1.640625, "learning_rate": 1.796439648508557e-05, "loss": 0.9996, "step": 3696 }, { "epoch": 0.6339027369955205, "grad_norm": 1.5625, "learning_rate": 1.7963304277902815e-05, "loss": 0.9916, "step": 3697 }, { "epoch": 0.6340742010845104, "grad_norm": 1.5390625, "learning_rate": 1.7962211811006717e-05, "loss": 1.0748, "step": 3698 }, { "epoch": 0.6342456651735002, "grad_norm": 1.578125, "learning_rate": 1.7961119084432914e-05, "loss": 0.9708, "step": 3699 }, { "epoch": 0.63441712926249, "grad_norm": 1.4921875, "learning_rate": 1.7960026098217033e-05, "loss": 1.0117, "step": 3700 }, { "epoch": 0.63458859335148, "grad_norm": 1.625, "learning_rate": 1.795893285239473e-05, "loss": 1.0854, "step": 3701 }, { "epoch": 0.6347600574404698, "grad_norm": 1.5625, "learning_rate": 1.7957839347001656e-05, "loss": 1.0339, "step": 3702 }, { "epoch": 0.6349315215294596, "grad_norm": 1.578125, "learning_rate": 1.7956745582073472e-05, "loss": 1.0549, "step": 3703 }, { "epoch": 0.6351029856184496, "grad_norm": 1.65625, "learning_rate": 1.7955651557645856e-05, "loss": 1.0122, "step": 3704 }, { "epoch": 0.6352744497074394, "grad_norm": 1.5234375, "learning_rate": 1.795455727375448e-05, "loss": 0.9592, "step": 3705 }, { "epoch": 0.6354459137964292, "grad_norm": 1.6484375, "learning_rate": 1.7953462730435043e-05, "loss": 1.1369, "step": 3706 }, { "epoch": 0.6356173778854192, "grad_norm": 1.53125, "learning_rate": 1.795236792772323e-05, "loss": 0.9327, "step": 3707 }, { "epoch": 0.635788841974409, "grad_norm": 1.546875, "learning_rate": 1.795127286565476e-05, "loss": 1.0152, "step": 3708 }, { "epoch": 0.6359603060633988, "grad_norm": 1.6015625, "learning_rate": 1.7950177544265333e-05, "loss": 1.049, "step": 3709 }, { "epoch": 0.6361317701523888, "grad_norm": 1.5390625, "learning_rate": 1.7949081963590683e-05, "loss": 1.0378, "step": 3710 }, { "epoch": 0.6363032342413786, "grad_norm": 1.609375, "learning_rate": 1.7947986123666536e-05, "loss": 1.0444, "step": 3711 }, { "epoch": 0.6364746983303684, "grad_norm": 1.5546875, "learning_rate": 1.7946890024528633e-05, "loss": 1.0106, "step": 3712 }, { "epoch": 0.6366461624193583, "grad_norm": 1.5625, "learning_rate": 1.794579366621272e-05, "loss": 1.0274, "step": 3713 }, { "epoch": 0.6368176265083482, "grad_norm": 1.5546875, "learning_rate": 1.7944697048754552e-05, "loss": 0.9483, "step": 3714 }, { "epoch": 0.636989090597338, "grad_norm": 1.5859375, "learning_rate": 1.79436001721899e-05, "loss": 1.096, "step": 3715 }, { "epoch": 0.6371605546863279, "grad_norm": 1.5234375, "learning_rate": 1.794250303655453e-05, "loss": 1.0592, "step": 3716 }, { "epoch": 0.6373320187753178, "grad_norm": 1.5703125, "learning_rate": 1.794140564188423e-05, "loss": 1.0463, "step": 3717 }, { "epoch": 0.6375034828643076, "grad_norm": 1.546875, "learning_rate": 1.794030798821479e-05, "loss": 0.9117, "step": 3718 }, { "epoch": 0.6376749469532975, "grad_norm": 1.671875, "learning_rate": 1.7939210075582003e-05, "loss": 1.051, "step": 3719 }, { "epoch": 0.6378464110422873, "grad_norm": 1.5703125, "learning_rate": 1.7938111904021677e-05, "loss": 1.0476, "step": 3720 }, { "epoch": 0.6380178751312772, "grad_norm": 1.609375, "learning_rate": 1.7937013473569633e-05, "loss": 0.9729, "step": 3721 }, { "epoch": 0.6381893392202671, "grad_norm": 1.625, "learning_rate": 1.793591478426169e-05, "loss": 1.0366, "step": 3722 }, { "epoch": 0.6383608033092569, "grad_norm": 1.5625, "learning_rate": 1.7934815836133684e-05, "loss": 1.0052, "step": 3723 }, { "epoch": 0.6385322673982468, "grad_norm": 1.6328125, "learning_rate": 1.7933716629221455e-05, "loss": 1.0623, "step": 3724 }, { "epoch": 0.6387037314872367, "grad_norm": 1.5546875, "learning_rate": 1.7932617163560847e-05, "loss": 0.9928, "step": 3725 }, { "epoch": 0.6388751955762265, "grad_norm": 1.5625, "learning_rate": 1.793151743918772e-05, "loss": 1.0026, "step": 3726 }, { "epoch": 0.6390466596652163, "grad_norm": 1.5234375, "learning_rate": 1.793041745613795e-05, "loss": 1.0157, "step": 3727 }, { "epoch": 0.6392181237542063, "grad_norm": 1.46875, "learning_rate": 1.7929317214447395e-05, "loss": 1.0197, "step": 3728 }, { "epoch": 0.6393895878431961, "grad_norm": 1.515625, "learning_rate": 1.7928216714151953e-05, "loss": 0.9981, "step": 3729 }, { "epoch": 0.6395610519321859, "grad_norm": 1.5546875, "learning_rate": 1.7927115955287506e-05, "loss": 1.009, "step": 3730 }, { "epoch": 0.6397325160211759, "grad_norm": 1.6171875, "learning_rate": 1.7926014937889956e-05, "loss": 1.0699, "step": 3731 }, { "epoch": 0.6399039801101657, "grad_norm": 1.609375, "learning_rate": 1.7924913661995212e-05, "loss": 1.0051, "step": 3732 }, { "epoch": 0.6400754441991555, "grad_norm": 1.7265625, "learning_rate": 1.7923812127639194e-05, "loss": 1.063, "step": 3733 }, { "epoch": 0.6402469082881455, "grad_norm": 1.6484375, "learning_rate": 1.7922710334857824e-05, "loss": 0.9803, "step": 3734 }, { "epoch": 0.6404183723771353, "grad_norm": 1.609375, "learning_rate": 1.7921608283687035e-05, "loss": 1.0446, "step": 3735 }, { "epoch": 0.6405898364661251, "grad_norm": 1.640625, "learning_rate": 1.7920505974162766e-05, "loss": 1.02, "step": 3736 }, { "epoch": 0.640761300555115, "grad_norm": 1.6015625, "learning_rate": 1.7919403406320978e-05, "loss": 1.007, "step": 3737 }, { "epoch": 0.6409327646441049, "grad_norm": 1.609375, "learning_rate": 1.7918300580197617e-05, "loss": 1.0234, "step": 3738 }, { "epoch": 0.6411042287330947, "grad_norm": 1.578125, "learning_rate": 1.7917197495828663e-05, "loss": 0.9882, "step": 3739 }, { "epoch": 0.6412756928220845, "grad_norm": 1.640625, "learning_rate": 1.791609415325008e-05, "loss": 1.0269, "step": 3740 }, { "epoch": 0.6414471569110745, "grad_norm": 1.6015625, "learning_rate": 1.7914990552497863e-05, "loss": 1.018, "step": 3741 }, { "epoch": 0.6416186210000643, "grad_norm": 1.5625, "learning_rate": 1.7913886693607992e-05, "loss": 1.0189, "step": 3742 }, { "epoch": 0.6417900850890541, "grad_norm": 1.5234375, "learning_rate": 1.791278257661648e-05, "loss": 1.0041, "step": 3743 }, { "epoch": 0.641961549178044, "grad_norm": 1.5703125, "learning_rate": 1.791167820155933e-05, "loss": 1.0526, "step": 3744 }, { "epoch": 0.6421330132670339, "grad_norm": 1.515625, "learning_rate": 1.7910573568472556e-05, "loss": 1.0612, "step": 3745 }, { "epoch": 0.6423044773560237, "grad_norm": 1.5390625, "learning_rate": 1.7909468677392198e-05, "loss": 1.0807, "step": 3746 }, { "epoch": 0.6424759414450136, "grad_norm": 1.5390625, "learning_rate": 1.790836352835428e-05, "loss": 1.0416, "step": 3747 }, { "epoch": 0.6426474055340035, "grad_norm": 1.5234375, "learning_rate": 1.7907258121394843e-05, "loss": 0.9954, "step": 3748 }, { "epoch": 0.6428188696229933, "grad_norm": 1.515625, "learning_rate": 1.7906152456549946e-05, "loss": 0.9858, "step": 3749 }, { "epoch": 0.6429903337119832, "grad_norm": 1.546875, "learning_rate": 1.7905046533855642e-05, "loss": 1.0425, "step": 3750 }, { "epoch": 0.643161797800973, "grad_norm": 1.5234375, "learning_rate": 1.790394035334801e-05, "loss": 0.9905, "step": 3751 }, { "epoch": 0.6433332618899629, "grad_norm": 1.609375, "learning_rate": 1.790283391506311e-05, "loss": 0.9942, "step": 3752 }, { "epoch": 0.6435047259789528, "grad_norm": 1.59375, "learning_rate": 1.7901727219037047e-05, "loss": 1.0426, "step": 3753 }, { "epoch": 0.6436761900679426, "grad_norm": 1.5546875, "learning_rate": 1.7900620265305895e-05, "loss": 1.1176, "step": 3754 }, { "epoch": 0.6438476541569325, "grad_norm": 1.6171875, "learning_rate": 1.7899513053905772e-05, "loss": 1.0297, "step": 3755 }, { "epoch": 0.6440191182459224, "grad_norm": 1.6484375, "learning_rate": 1.789840558487278e-05, "loss": 1.0504, "step": 3756 }, { "epoch": 0.6441905823349122, "grad_norm": 1.609375, "learning_rate": 1.7897297858243036e-05, "loss": 1.021, "step": 3757 }, { "epoch": 0.644362046423902, "grad_norm": 1.640625, "learning_rate": 1.7896189874052675e-05, "loss": 1.0672, "step": 3758 }, { "epoch": 0.644533510512892, "grad_norm": 1.5234375, "learning_rate": 1.7895081632337826e-05, "loss": 0.9723, "step": 3759 }, { "epoch": 0.6447049746018818, "grad_norm": 1.5234375, "learning_rate": 1.7893973133134634e-05, "loss": 0.946, "step": 3760 }, { "epoch": 0.6448764386908716, "grad_norm": 1.6484375, "learning_rate": 1.7892864376479254e-05, "loss": 0.978, "step": 3761 }, { "epoch": 0.6450479027798616, "grad_norm": 1.5859375, "learning_rate": 1.789175536240784e-05, "loss": 1.0083, "step": 3762 }, { "epoch": 0.6452193668688514, "grad_norm": 1.5078125, "learning_rate": 1.789064609095657e-05, "loss": 1.0333, "step": 3763 }, { "epoch": 0.6453908309578412, "grad_norm": 1.6328125, "learning_rate": 1.788953656216162e-05, "loss": 1.0297, "step": 3764 }, { "epoch": 0.6455622950468312, "grad_norm": 1.5625, "learning_rate": 1.7888426776059166e-05, "loss": 0.9605, "step": 3765 }, { "epoch": 0.645733759135821, "grad_norm": 1.6640625, "learning_rate": 1.7887316732685415e-05, "loss": 1.0634, "step": 3766 }, { "epoch": 0.6459052232248108, "grad_norm": 1.65625, "learning_rate": 1.7886206432076566e-05, "loss": 1.0777, "step": 3767 }, { "epoch": 0.6460766873138007, "grad_norm": 1.6328125, "learning_rate": 1.7885095874268826e-05, "loss": 1.0468, "step": 3768 }, { "epoch": 0.6462481514027906, "grad_norm": 1.546875, "learning_rate": 1.7883985059298418e-05, "loss": 0.9256, "step": 3769 }, { "epoch": 0.6464196154917804, "grad_norm": 1.671875, "learning_rate": 1.7882873987201566e-05, "loss": 1.0545, "step": 3770 }, { "epoch": 0.6465910795807703, "grad_norm": 1.546875, "learning_rate": 1.788176265801451e-05, "loss": 0.9954, "step": 3771 }, { "epoch": 0.6467625436697602, "grad_norm": 1.5390625, "learning_rate": 1.7880651071773495e-05, "loss": 0.9356, "step": 3772 }, { "epoch": 0.64693400775875, "grad_norm": 1.6875, "learning_rate": 1.7879539228514775e-05, "loss": 1.0697, "step": 3773 }, { "epoch": 0.6471054718477399, "grad_norm": 1.59375, "learning_rate": 1.7878427128274607e-05, "loss": 0.9816, "step": 3774 }, { "epoch": 0.6472769359367297, "grad_norm": 1.5234375, "learning_rate": 1.787731477108926e-05, "loss": 0.9684, "step": 3775 }, { "epoch": 0.6474484000257196, "grad_norm": 1.609375, "learning_rate": 1.7876202156995018e-05, "loss": 1.05, "step": 3776 }, { "epoch": 0.6476198641147095, "grad_norm": 1.5859375, "learning_rate": 1.7875089286028167e-05, "loss": 0.9508, "step": 3777 }, { "epoch": 0.6477913282036993, "grad_norm": 1.5859375, "learning_rate": 1.7873976158225e-05, "loss": 0.9991, "step": 3778 }, { "epoch": 0.6479627922926892, "grad_norm": 1.5, "learning_rate": 1.7872862773621814e-05, "loss": 0.9117, "step": 3779 }, { "epoch": 0.6481342563816791, "grad_norm": 1.6484375, "learning_rate": 1.787174913225493e-05, "loss": 0.9993, "step": 3780 }, { "epoch": 0.6483057204706689, "grad_norm": 45.5, "learning_rate": 1.7870635234160663e-05, "loss": 1.1466, "step": 3781 }, { "epoch": 0.6484771845596587, "grad_norm": 1.65625, "learning_rate": 1.7869521079375345e-05, "loss": 0.9891, "step": 3782 }, { "epoch": 0.6486486486486487, "grad_norm": 1.5390625, "learning_rate": 1.786840666793531e-05, "loss": 0.9753, "step": 3783 }, { "epoch": 0.6488201127376385, "grad_norm": 1.6171875, "learning_rate": 1.7867291999876905e-05, "loss": 1.0144, "step": 3784 }, { "epoch": 0.6489915768266283, "grad_norm": 1.625, "learning_rate": 1.786617707523648e-05, "loss": 1.1283, "step": 3785 }, { "epoch": 0.6491630409156183, "grad_norm": 1.6171875, "learning_rate": 1.78650618940504e-05, "loss": 0.996, "step": 3786 }, { "epoch": 0.6493345050046081, "grad_norm": 1.6328125, "learning_rate": 1.7863946456355036e-05, "loss": 1.0807, "step": 3787 }, { "epoch": 0.6495059690935979, "grad_norm": 1.5390625, "learning_rate": 1.786283076218676e-05, "loss": 1.0865, "step": 3788 }, { "epoch": 0.6496774331825879, "grad_norm": 1.484375, "learning_rate": 1.786171481158197e-05, "loss": 1.019, "step": 3789 }, { "epoch": 0.6498488972715777, "grad_norm": 1.4921875, "learning_rate": 1.786059860457705e-05, "loss": 0.8982, "step": 3790 }, { "epoch": 0.6500203613605675, "grad_norm": 1.5234375, "learning_rate": 1.7859482141208413e-05, "loss": 0.9797, "step": 3791 }, { "epoch": 0.6501918254495574, "grad_norm": 1.5234375, "learning_rate": 1.7858365421512467e-05, "loss": 0.9876, "step": 3792 }, { "epoch": 0.6503632895385473, "grad_norm": 1.546875, "learning_rate": 1.7857248445525636e-05, "loss": 1.0003, "step": 3793 }, { "epoch": 0.6505347536275371, "grad_norm": 1.6015625, "learning_rate": 1.785613121328434e-05, "loss": 0.9599, "step": 3794 }, { "epoch": 0.650706217716527, "grad_norm": 1.59375, "learning_rate": 1.785501372482503e-05, "loss": 1.0637, "step": 3795 }, { "epoch": 0.6508776818055169, "grad_norm": 1.546875, "learning_rate": 1.7853895980184136e-05, "loss": 0.9595, "step": 3796 }, { "epoch": 0.6510491458945067, "grad_norm": 1.546875, "learning_rate": 1.785277797939812e-05, "loss": 0.9918, "step": 3797 }, { "epoch": 0.6512206099834966, "grad_norm": 1.5859375, "learning_rate": 1.7851659722503442e-05, "loss": 0.9836, "step": 3798 }, { "epoch": 0.6513920740724864, "grad_norm": 1.5859375, "learning_rate": 1.7850541209536575e-05, "loss": 1.044, "step": 3799 }, { "epoch": 0.6515635381614763, "grad_norm": 1.5234375, "learning_rate": 1.7849422440533998e-05, "loss": 0.9397, "step": 3800 }, { "epoch": 0.6517350022504662, "grad_norm": 1.59375, "learning_rate": 1.7848303415532197e-05, "loss": 0.9828, "step": 3801 }, { "epoch": 0.651906466339456, "grad_norm": 1.640625, "learning_rate": 1.7847184134567664e-05, "loss": 1.0753, "step": 3802 }, { "epoch": 0.6520779304284459, "grad_norm": 1.5859375, "learning_rate": 1.7846064597676913e-05, "loss": 1.0568, "step": 3803 }, { "epoch": 0.6522493945174358, "grad_norm": 1.7265625, "learning_rate": 1.7844944804896446e-05, "loss": 0.978, "step": 3804 }, { "epoch": 0.6524208586064256, "grad_norm": 1.53125, "learning_rate": 1.784382475626279e-05, "loss": 1.0221, "step": 3805 }, { "epoch": 0.6525923226954154, "grad_norm": 1.6015625, "learning_rate": 1.784270445181247e-05, "loss": 1.0098, "step": 3806 }, { "epoch": 0.6527637867844054, "grad_norm": 1.6015625, "learning_rate": 1.784158389158202e-05, "loss": 1.0093, "step": 3807 }, { "epoch": 0.6529352508733952, "grad_norm": 1.6796875, "learning_rate": 1.7840463075607996e-05, "loss": 1.0862, "step": 3808 }, { "epoch": 0.653106714962385, "grad_norm": 1.53125, "learning_rate": 1.7839342003926945e-05, "loss": 1.0648, "step": 3809 }, { "epoch": 0.653278179051375, "grad_norm": 1.5234375, "learning_rate": 1.7838220676575433e-05, "loss": 1.0024, "step": 3810 }, { "epoch": 0.6534496431403648, "grad_norm": 1.5859375, "learning_rate": 1.783709909359003e-05, "loss": 1.1038, "step": 3811 }, { "epoch": 0.6536211072293546, "grad_norm": 1.5546875, "learning_rate": 1.783597725500731e-05, "loss": 0.9757, "step": 3812 }, { "epoch": 0.6537925713183446, "grad_norm": 1.6796875, "learning_rate": 1.7834855160863864e-05, "loss": 1.0116, "step": 3813 }, { "epoch": 0.6539640354073344, "grad_norm": 1.625, "learning_rate": 1.783373281119629e-05, "loss": 1.0175, "step": 3814 }, { "epoch": 0.6541354994963242, "grad_norm": 1.625, "learning_rate": 1.783261020604119e-05, "loss": 0.991, "step": 3815 }, { "epoch": 0.6543069635853141, "grad_norm": 1.7421875, "learning_rate": 1.7831487345435175e-05, "loss": 1.0568, "step": 3816 }, { "epoch": 0.654478427674304, "grad_norm": 1.5625, "learning_rate": 1.783036422941487e-05, "loss": 0.976, "step": 3817 }, { "epoch": 0.6546498917632938, "grad_norm": 1.5859375, "learning_rate": 1.7829240858016896e-05, "loss": 0.9601, "step": 3818 }, { "epoch": 0.6548213558522837, "grad_norm": 1.6015625, "learning_rate": 1.78281172312779e-05, "loss": 1.0032, "step": 3819 }, { "epoch": 0.6549928199412736, "grad_norm": 1.59375, "learning_rate": 1.782699334923452e-05, "loss": 1.0149, "step": 3820 }, { "epoch": 0.6551642840302634, "grad_norm": 1.5234375, "learning_rate": 1.7825869211923415e-05, "loss": 0.9881, "step": 3821 }, { "epoch": 0.6553357481192533, "grad_norm": 1.6640625, "learning_rate": 1.7824744819381244e-05, "loss": 1.096, "step": 3822 }, { "epoch": 0.6555072122082432, "grad_norm": 1.6328125, "learning_rate": 1.782362017164468e-05, "loss": 1.0724, "step": 3823 }, { "epoch": 0.655678676297233, "grad_norm": 1.6328125, "learning_rate": 1.7822495268750402e-05, "loss": 0.9867, "step": 3824 }, { "epoch": 0.6558501403862229, "grad_norm": 1.640625, "learning_rate": 1.7821370110735094e-05, "loss": 1.0498, "step": 3825 }, { "epoch": 0.6560216044752127, "grad_norm": 1.5625, "learning_rate": 1.7820244697635458e-05, "loss": 1.0758, "step": 3826 }, { "epoch": 0.6561930685642026, "grad_norm": 1.6171875, "learning_rate": 1.781911902948819e-05, "loss": 1.0126, "step": 3827 }, { "epoch": 0.6563645326531925, "grad_norm": 1.609375, "learning_rate": 1.781799310633001e-05, "loss": 1.1322, "step": 3828 }, { "epoch": 0.6565359967421823, "grad_norm": 1.578125, "learning_rate": 1.7816866928197632e-05, "loss": 1.1316, "step": 3829 }, { "epoch": 0.6567074608311722, "grad_norm": 1.5625, "learning_rate": 1.781574049512779e-05, "loss": 0.9376, "step": 3830 }, { "epoch": 0.6568789249201621, "grad_norm": 1.609375, "learning_rate": 1.781461380715722e-05, "loss": 0.9902, "step": 3831 }, { "epoch": 0.6570503890091519, "grad_norm": 1.609375, "learning_rate": 1.781348686432266e-05, "loss": 1.0493, "step": 3832 }, { "epoch": 0.6572218530981417, "grad_norm": 1.515625, "learning_rate": 1.781235966666088e-05, "loss": 0.9787, "step": 3833 }, { "epoch": 0.6573933171871316, "grad_norm": 1.6328125, "learning_rate": 1.7811232214208626e-05, "loss": 0.9926, "step": 3834 }, { "epoch": 0.6575647812761215, "grad_norm": 1.65625, "learning_rate": 1.7810104507002675e-05, "loss": 1.0555, "step": 3835 }, { "epoch": 0.6577362453651113, "grad_norm": 1.640625, "learning_rate": 1.780897654507981e-05, "loss": 0.9789, "step": 3836 }, { "epoch": 0.6579077094541012, "grad_norm": 1.7109375, "learning_rate": 1.7807848328476813e-05, "loss": 1.0648, "step": 3837 }, { "epoch": 0.6580791735430911, "grad_norm": 1.5859375, "learning_rate": 1.780671985723048e-05, "loss": 0.8998, "step": 3838 }, { "epoch": 0.6582506376320809, "grad_norm": 3.859375, "learning_rate": 1.7805591131377612e-05, "loss": 1.0997, "step": 3839 }, { "epoch": 0.6584221017210707, "grad_norm": 1.6875, "learning_rate": 1.780446215095503e-05, "loss": 1.0947, "step": 3840 }, { "epoch": 0.6585935658100607, "grad_norm": 1.5390625, "learning_rate": 1.7803332915999542e-05, "loss": 1.0082, "step": 3841 }, { "epoch": 0.6587650298990505, "grad_norm": 1.640625, "learning_rate": 1.780220342654799e-05, "loss": 1.0477, "step": 3842 }, { "epoch": 0.6589364939880403, "grad_norm": 1.625, "learning_rate": 1.78010736826372e-05, "loss": 1.1199, "step": 3843 }, { "epoch": 0.6591079580770303, "grad_norm": 1.515625, "learning_rate": 1.7799943684304016e-05, "loss": 0.9562, "step": 3844 }, { "epoch": 0.6592794221660201, "grad_norm": 1.7421875, "learning_rate": 1.77988134315853e-05, "loss": 1.0783, "step": 3845 }, { "epoch": 0.6594508862550099, "grad_norm": 1.4765625, "learning_rate": 1.7797682924517917e-05, "loss": 0.8665, "step": 3846 }, { "epoch": 0.6596223503439999, "grad_norm": 1.6015625, "learning_rate": 1.7796552163138722e-05, "loss": 1.1352, "step": 3847 }, { "epoch": 0.6597938144329897, "grad_norm": 1.5078125, "learning_rate": 1.7795421147484608e-05, "loss": 1.0046, "step": 3848 }, { "epoch": 0.6599652785219795, "grad_norm": 1.515625, "learning_rate": 1.7794289877592453e-05, "loss": 1.0031, "step": 3849 }, { "epoch": 0.6601367426109694, "grad_norm": 1.4609375, "learning_rate": 1.779315835349915e-05, "loss": 0.9268, "step": 3850 }, { "epoch": 0.6603082066999593, "grad_norm": 1.625, "learning_rate": 1.7792026575241616e-05, "loss": 0.9385, "step": 3851 }, { "epoch": 0.6604796707889491, "grad_norm": 1.6640625, "learning_rate": 1.7790894542856748e-05, "loss": 1.0262, "step": 3852 }, { "epoch": 0.660651134877939, "grad_norm": 1.609375, "learning_rate": 1.778976225638147e-05, "loss": 1.0465, "step": 3853 }, { "epoch": 0.6608225989669289, "grad_norm": 1.7265625, "learning_rate": 1.778862971585271e-05, "loss": 1.0516, "step": 3854 }, { "epoch": 0.6609940630559187, "grad_norm": 1.6171875, "learning_rate": 1.778749692130741e-05, "loss": 0.9965, "step": 3855 }, { "epoch": 0.6611655271449086, "grad_norm": 1.65625, "learning_rate": 1.778636387278251e-05, "loss": 1.0474, "step": 3856 }, { "epoch": 0.6613369912338984, "grad_norm": 1.65625, "learning_rate": 1.7785230570314963e-05, "loss": 1.0767, "step": 3857 }, { "epoch": 0.6615084553228883, "grad_norm": 1.5625, "learning_rate": 1.7784097013941732e-05, "loss": 1.0504, "step": 3858 }, { "epoch": 0.6616799194118782, "grad_norm": 1.578125, "learning_rate": 1.778296320369978e-05, "loss": 1.009, "step": 3859 }, { "epoch": 0.661851383500868, "grad_norm": 1.59375, "learning_rate": 1.7781829139626096e-05, "loss": 1.0706, "step": 3860 }, { "epoch": 0.6620228475898579, "grad_norm": 1.65625, "learning_rate": 1.7780694821757654e-05, "loss": 0.9951, "step": 3861 }, { "epoch": 0.6621943116788478, "grad_norm": 1.6171875, "learning_rate": 1.7779560250131457e-05, "loss": 1.0454, "step": 3862 }, { "epoch": 0.6623657757678376, "grad_norm": 1.609375, "learning_rate": 1.777842542478451e-05, "loss": 1.1059, "step": 3863 }, { "epoch": 0.6625372398568274, "grad_norm": 1.5390625, "learning_rate": 1.7777290345753814e-05, "loss": 1.0174, "step": 3864 }, { "epoch": 0.6627087039458174, "grad_norm": 1.609375, "learning_rate": 1.7776155013076394e-05, "loss": 1.0829, "step": 3865 }, { "epoch": 0.6628801680348072, "grad_norm": 1.515625, "learning_rate": 1.7775019426789274e-05, "loss": 0.997, "step": 3866 }, { "epoch": 0.663051632123797, "grad_norm": 1.546875, "learning_rate": 1.7773883586929498e-05, "loss": 0.9961, "step": 3867 }, { "epoch": 0.663223096212787, "grad_norm": 1.4765625, "learning_rate": 1.77727474935341e-05, "loss": 0.9763, "step": 3868 }, { "epoch": 0.6633945603017768, "grad_norm": 1.65625, "learning_rate": 1.777161114664014e-05, "loss": 1.0266, "step": 3869 }, { "epoch": 0.6635660243907666, "grad_norm": 1.6171875, "learning_rate": 1.7770474546284674e-05, "loss": 1.0637, "step": 3870 }, { "epoch": 0.6637374884797566, "grad_norm": 1.6171875, "learning_rate": 1.776933769250477e-05, "loss": 1.0638, "step": 3871 }, { "epoch": 0.6639089525687464, "grad_norm": 1.5625, "learning_rate": 1.776820058533751e-05, "loss": 1.0038, "step": 3872 }, { "epoch": 0.6640804166577362, "grad_norm": 1.6171875, "learning_rate": 1.7767063224819976e-05, "loss": 1.0904, "step": 3873 }, { "epoch": 0.6642518807467261, "grad_norm": 1.6640625, "learning_rate": 1.7765925610989263e-05, "loss": 0.968, "step": 3874 }, { "epoch": 0.664423344835716, "grad_norm": 1.6953125, "learning_rate": 1.776478774388247e-05, "loss": 1.1252, "step": 3875 }, { "epoch": 0.6645948089247058, "grad_norm": 1.5546875, "learning_rate": 1.7763649623536712e-05, "loss": 1.0071, "step": 3876 }, { "epoch": 0.6647662730136957, "grad_norm": 1.6640625, "learning_rate": 1.7762511249989104e-05, "loss": 1.214, "step": 3877 }, { "epoch": 0.6649377371026856, "grad_norm": 1.65625, "learning_rate": 1.776137262327677e-05, "loss": 1.0238, "step": 3878 }, { "epoch": 0.6651092011916754, "grad_norm": 1.5390625, "learning_rate": 1.776023374343685e-05, "loss": 0.9687, "step": 3879 }, { "epoch": 0.6652806652806653, "grad_norm": 1.5859375, "learning_rate": 1.7759094610506486e-05, "loss": 0.9881, "step": 3880 }, { "epoch": 0.6654521293696551, "grad_norm": 1.46875, "learning_rate": 1.775795522452283e-05, "loss": 1.0175, "step": 3881 }, { "epoch": 0.665623593458645, "grad_norm": 1.578125, "learning_rate": 1.7756815585523038e-05, "loss": 1.0085, "step": 3882 }, { "epoch": 0.6657950575476349, "grad_norm": 1.578125, "learning_rate": 1.7755675693544277e-05, "loss": 0.9782, "step": 3883 }, { "epoch": 0.6659665216366247, "grad_norm": 1.578125, "learning_rate": 1.775453554862373e-05, "loss": 0.9616, "step": 3884 }, { "epoch": 0.6661379857256146, "grad_norm": 1.6796875, "learning_rate": 1.775339515079858e-05, "loss": 1.0437, "step": 3885 }, { "epoch": 0.6663094498146045, "grad_norm": 1.65625, "learning_rate": 1.7752254500106016e-05, "loss": 1.146, "step": 3886 }, { "epoch": 0.6664809139035943, "grad_norm": 1.7265625, "learning_rate": 1.775111359658324e-05, "loss": 1.0646, "step": 3887 }, { "epoch": 0.6666523779925841, "grad_norm": 1.640625, "learning_rate": 1.7749972440267463e-05, "loss": 1.0783, "step": 3888 }, { "epoch": 0.6668238420815741, "grad_norm": 1.609375, "learning_rate": 1.7748831031195898e-05, "loss": 1.0681, "step": 3889 }, { "epoch": 0.6669953061705639, "grad_norm": 1.609375, "learning_rate": 1.774768936940578e-05, "loss": 1.0427, "step": 3890 }, { "epoch": 0.6671667702595537, "grad_norm": 1.609375, "learning_rate": 1.7746547454934327e-05, "loss": 1.064, "step": 3891 }, { "epoch": 0.6673382343485437, "grad_norm": 1.578125, "learning_rate": 1.77454052878188e-05, "loss": 0.9948, "step": 3892 }, { "epoch": 0.6675096984375335, "grad_norm": 1.6640625, "learning_rate": 1.7744262868096432e-05, "loss": 1.0956, "step": 3893 }, { "epoch": 0.6676811625265233, "grad_norm": 1.609375, "learning_rate": 1.7743120195804498e-05, "loss": 0.9624, "step": 3894 }, { "epoch": 0.6678526266155133, "grad_norm": 1.7890625, "learning_rate": 1.774197727098025e-05, "loss": 1.0827, "step": 3895 }, { "epoch": 0.6680240907045031, "grad_norm": 1.5859375, "learning_rate": 1.7740834093660974e-05, "loss": 0.8746, "step": 3896 }, { "epoch": 0.6681955547934929, "grad_norm": 1.5703125, "learning_rate": 1.7739690663883948e-05, "loss": 1.0361, "step": 3897 }, { "epoch": 0.6683670188824828, "grad_norm": 1.6484375, "learning_rate": 1.7738546981686458e-05, "loss": 0.9436, "step": 3898 }, { "epoch": 0.6685384829714727, "grad_norm": 1.46875, "learning_rate": 1.773740304710582e-05, "loss": 0.9772, "step": 3899 }, { "epoch": 0.6687099470604625, "grad_norm": 1.546875, "learning_rate": 1.7736258860179326e-05, "loss": 1.015, "step": 3900 }, { "epoch": 0.6688814111494524, "grad_norm": 1.59375, "learning_rate": 1.77351144209443e-05, "loss": 0.9838, "step": 3901 }, { "epoch": 0.6690528752384423, "grad_norm": 1.65625, "learning_rate": 1.7733969729438064e-05, "loss": 1.0158, "step": 3902 }, { "epoch": 0.6692243393274321, "grad_norm": 1.5234375, "learning_rate": 1.7732824785697956e-05, "loss": 0.9799, "step": 3903 }, { "epoch": 0.669395803416422, "grad_norm": 1.625, "learning_rate": 1.7731679589761307e-05, "loss": 0.9674, "step": 3904 }, { "epoch": 0.6695672675054118, "grad_norm": 1.625, "learning_rate": 1.7730534141665473e-05, "loss": 1.0857, "step": 3905 }, { "epoch": 0.6697387315944017, "grad_norm": 1.5546875, "learning_rate": 1.7729388441447813e-05, "loss": 1.0258, "step": 3906 }, { "epoch": 0.6699101956833916, "grad_norm": 1.5625, "learning_rate": 1.7728242489145687e-05, "loss": 1.0592, "step": 3907 }, { "epoch": 0.6700816597723814, "grad_norm": 1.5546875, "learning_rate": 1.7727096284796476e-05, "loss": 1.0646, "step": 3908 }, { "epoch": 0.6702531238613713, "grad_norm": 1.5390625, "learning_rate": 1.7725949828437552e-05, "loss": 0.9314, "step": 3909 }, { "epoch": 0.6704245879503612, "grad_norm": 1.5078125, "learning_rate": 1.7724803120106312e-05, "loss": 1.0248, "step": 3910 }, { "epoch": 0.670596052039351, "grad_norm": 1.515625, "learning_rate": 1.7723656159840156e-05, "loss": 0.9268, "step": 3911 }, { "epoch": 0.6707675161283408, "grad_norm": 1.5, "learning_rate": 1.7722508947676488e-05, "loss": 0.9512, "step": 3912 }, { "epoch": 0.6709389802173308, "grad_norm": 1.5859375, "learning_rate": 1.772136148365272e-05, "loss": 0.9944, "step": 3913 }, { "epoch": 0.6711104443063206, "grad_norm": 1.5390625, "learning_rate": 1.772021376780628e-05, "loss": 1.0312, "step": 3914 }, { "epoch": 0.6712819083953104, "grad_norm": 1.6875, "learning_rate": 1.7719065800174595e-05, "loss": 1.0387, "step": 3915 }, { "epoch": 0.6714533724843004, "grad_norm": 1.578125, "learning_rate": 1.7717917580795108e-05, "loss": 1.0055, "step": 3916 }, { "epoch": 0.6716248365732902, "grad_norm": 1.515625, "learning_rate": 1.771676910970527e-05, "loss": 1.0512, "step": 3917 }, { "epoch": 0.67179630066228, "grad_norm": 1.5390625, "learning_rate": 1.7715620386942528e-05, "loss": 1.0302, "step": 3918 }, { "epoch": 0.67196776475127, "grad_norm": 1.578125, "learning_rate": 1.7714471412544353e-05, "loss": 1.0381, "step": 3919 }, { "epoch": 0.6721392288402598, "grad_norm": 1.59375, "learning_rate": 1.771332218654821e-05, "loss": 1.0399, "step": 3920 }, { "epoch": 0.6723106929292496, "grad_norm": 1.4921875, "learning_rate": 1.7712172708991594e-05, "loss": 1.0186, "step": 3921 }, { "epoch": 0.6724821570182395, "grad_norm": 1.5546875, "learning_rate": 1.7711022979911977e-05, "loss": 1.0577, "step": 3922 }, { "epoch": 0.6726536211072294, "grad_norm": 1.578125, "learning_rate": 1.7709872999346867e-05, "loss": 1.1428, "step": 3923 }, { "epoch": 0.6728250851962192, "grad_norm": 1.625, "learning_rate": 1.7708722767333766e-05, "loss": 0.9536, "step": 3924 }, { "epoch": 0.6729965492852091, "grad_norm": 1.6953125, "learning_rate": 1.7707572283910184e-05, "loss": 1.112, "step": 3925 }, { "epoch": 0.673168013374199, "grad_norm": 1.4921875, "learning_rate": 1.770642154911365e-05, "loss": 0.9273, "step": 3926 }, { "epoch": 0.6733394774631888, "grad_norm": 1.5, "learning_rate": 1.7705270562981688e-05, "loss": 1.0, "step": 3927 }, { "epoch": 0.6735109415521786, "grad_norm": 1.5859375, "learning_rate": 1.7704119325551835e-05, "loss": 0.9893, "step": 3928 }, { "epoch": 0.6736824056411685, "grad_norm": 1.609375, "learning_rate": 1.7702967836861643e-05, "loss": 1.0211, "step": 3929 }, { "epoch": 0.6738538697301584, "grad_norm": 1.5625, "learning_rate": 1.7701816096948665e-05, "loss": 1.0401, "step": 3930 }, { "epoch": 0.6740253338191482, "grad_norm": 1.546875, "learning_rate": 1.7700664105850454e-05, "loss": 1.0329, "step": 3931 }, { "epoch": 0.6741967979081381, "grad_norm": 1.625, "learning_rate": 1.7699511863604597e-05, "loss": 1.0191, "step": 3932 }, { "epoch": 0.674368261997128, "grad_norm": 1.671875, "learning_rate": 1.769835937024866e-05, "loss": 0.9552, "step": 3933 }, { "epoch": 0.6745397260861178, "grad_norm": 1.59375, "learning_rate": 1.7697206625820237e-05, "loss": 1.0417, "step": 3934 }, { "epoch": 0.6747111901751077, "grad_norm": 1.5703125, "learning_rate": 1.7696053630356918e-05, "loss": 1.0664, "step": 3935 }, { "epoch": 0.6748826542640975, "grad_norm": 1.640625, "learning_rate": 1.769490038389631e-05, "loss": 1.1068, "step": 3936 }, { "epoch": 0.6750541183530874, "grad_norm": 1.5234375, "learning_rate": 1.7693746886476028e-05, "loss": 1.0836, "step": 3937 }, { "epoch": 0.6752255824420773, "grad_norm": 1.671875, "learning_rate": 1.7692593138133684e-05, "loss": 0.974, "step": 3938 }, { "epoch": 0.6753970465310671, "grad_norm": 1.59375, "learning_rate": 1.769143913890691e-05, "loss": 1.0171, "step": 3939 }, { "epoch": 0.675568510620057, "grad_norm": 1.5546875, "learning_rate": 1.7690284888833344e-05, "loss": 1.1185, "step": 3940 }, { "epoch": 0.6757399747090469, "grad_norm": 1.515625, "learning_rate": 1.768913038795063e-05, "loss": 1.0596, "step": 3941 }, { "epoch": 0.6759114387980367, "grad_norm": 1.4765625, "learning_rate": 1.7687975636296414e-05, "loss": 1.0241, "step": 3942 }, { "epoch": 0.6760829028870265, "grad_norm": 1.625, "learning_rate": 1.7686820633908368e-05, "loss": 1.1086, "step": 3943 }, { "epoch": 0.6762543669760165, "grad_norm": 1.53125, "learning_rate": 1.7685665380824156e-05, "loss": 0.9996, "step": 3944 }, { "epoch": 0.6764258310650063, "grad_norm": 1.5234375, "learning_rate": 1.768450987708145e-05, "loss": 0.9955, "step": 3945 }, { "epoch": 0.6765972951539961, "grad_norm": 1.4375, "learning_rate": 1.7683354122717942e-05, "loss": 0.937, "step": 3946 }, { "epoch": 0.6767687592429861, "grad_norm": 1.546875, "learning_rate": 1.768219811777132e-05, "loss": 1.054, "step": 3947 }, { "epoch": 0.6769402233319759, "grad_norm": 1.625, "learning_rate": 1.7681041862279294e-05, "loss": 0.9462, "step": 3948 }, { "epoch": 0.6771116874209657, "grad_norm": 1.546875, "learning_rate": 1.7679885356279566e-05, "loss": 0.97, "step": 3949 }, { "epoch": 0.6772831515099557, "grad_norm": 1.5390625, "learning_rate": 1.7678728599809858e-05, "loss": 0.9661, "step": 3950 }, { "epoch": 0.6774546155989455, "grad_norm": 1.5703125, "learning_rate": 1.7677571592907893e-05, "loss": 1.0832, "step": 3951 }, { "epoch": 0.6776260796879353, "grad_norm": 1.546875, "learning_rate": 1.767641433561141e-05, "loss": 1.0797, "step": 3952 }, { "epoch": 0.6777975437769252, "grad_norm": 1.6484375, "learning_rate": 1.7675256827958148e-05, "loss": 1.0648, "step": 3953 }, { "epoch": 0.6779690078659151, "grad_norm": 1.5, "learning_rate": 1.7674099069985855e-05, "loss": 0.9913, "step": 3954 }, { "epoch": 0.6781404719549049, "grad_norm": 1.5703125, "learning_rate": 1.7672941061732297e-05, "loss": 1.0111, "step": 3955 }, { "epoch": 0.6783119360438948, "grad_norm": 1.5546875, "learning_rate": 1.7671782803235235e-05, "loss": 0.9879, "step": 3956 }, { "epoch": 0.6784834001328847, "grad_norm": 1.625, "learning_rate": 1.767062429453245e-05, "loss": 1.0341, "step": 3957 }, { "epoch": 0.6786548642218745, "grad_norm": 1.5703125, "learning_rate": 1.766946553566172e-05, "loss": 1.0252, "step": 3958 }, { "epoch": 0.6788263283108644, "grad_norm": 1.5703125, "learning_rate": 1.7668306526660836e-05, "loss": 1.0166, "step": 3959 }, { "epoch": 0.6789977923998542, "grad_norm": 1.53125, "learning_rate": 1.7667147267567602e-05, "loss": 0.9378, "step": 3960 }, { "epoch": 0.6791692564888441, "grad_norm": 1.5859375, "learning_rate": 1.7665987758419824e-05, "loss": 1.0179, "step": 3961 }, { "epoch": 0.679340720577834, "grad_norm": 1.578125, "learning_rate": 1.766482799925532e-05, "loss": 1.0277, "step": 3962 }, { "epoch": 0.6795121846668238, "grad_norm": 1.5234375, "learning_rate": 1.7663667990111908e-05, "loss": 1.0449, "step": 3963 }, { "epoch": 0.6796836487558137, "grad_norm": 1.7109375, "learning_rate": 1.7662507731027426e-05, "loss": 1.0957, "step": 3964 }, { "epoch": 0.6798551128448036, "grad_norm": 1.6953125, "learning_rate": 1.7661347222039714e-05, "loss": 1.0475, "step": 3965 }, { "epoch": 0.6800265769337934, "grad_norm": 1.5703125, "learning_rate": 1.7660186463186617e-05, "loss": 0.8924, "step": 3966 }, { "epoch": 0.6801980410227833, "grad_norm": 1.5859375, "learning_rate": 1.7659025454505994e-05, "loss": 1.0355, "step": 3967 }, { "epoch": 0.6803695051117732, "grad_norm": 1.5234375, "learning_rate": 1.765786419603571e-05, "loss": 0.9575, "step": 3968 }, { "epoch": 0.680540969200763, "grad_norm": 1.7109375, "learning_rate": 1.765670268781364e-05, "loss": 1.0357, "step": 3969 }, { "epoch": 0.6807124332897528, "grad_norm": 1.5703125, "learning_rate": 1.765554092987766e-05, "loss": 0.9695, "step": 3970 }, { "epoch": 0.6808838973787428, "grad_norm": 1.5546875, "learning_rate": 1.765437892226566e-05, "loss": 1.1055, "step": 3971 }, { "epoch": 0.6810553614677326, "grad_norm": 1.5546875, "learning_rate": 1.765321666501554e-05, "loss": 0.951, "step": 3972 }, { "epoch": 0.6812268255567224, "grad_norm": 1.515625, "learning_rate": 1.765205415816521e-05, "loss": 0.9672, "step": 3973 }, { "epoch": 0.6813982896457124, "grad_norm": 1.6015625, "learning_rate": 1.7650891401752578e-05, "loss": 1.0185, "step": 3974 }, { "epoch": 0.6815697537347022, "grad_norm": 1.6953125, "learning_rate": 1.7649728395815567e-05, "loss": 1.0638, "step": 3975 }, { "epoch": 0.681741217823692, "grad_norm": 1.4609375, "learning_rate": 1.7648565140392103e-05, "loss": 0.9564, "step": 3976 }, { "epoch": 0.681912681912682, "grad_norm": 1.515625, "learning_rate": 1.764740163552013e-05, "loss": 1.0024, "step": 3977 }, { "epoch": 0.6820841460016718, "grad_norm": 1.53125, "learning_rate": 1.764623788123759e-05, "loss": 1.0701, "step": 3978 }, { "epoch": 0.6822556100906616, "grad_norm": 1.515625, "learning_rate": 1.7645073877582445e-05, "loss": 1.0215, "step": 3979 }, { "epoch": 0.6824270741796515, "grad_norm": 1.5703125, "learning_rate": 1.764390962459265e-05, "loss": 1.0535, "step": 3980 }, { "epoch": 0.6825985382686414, "grad_norm": 1.5859375, "learning_rate": 1.7642745122306177e-05, "loss": 0.9549, "step": 3981 }, { "epoch": 0.6827700023576312, "grad_norm": 1.5234375, "learning_rate": 1.7641580370761e-05, "loss": 0.9616, "step": 3982 }, { "epoch": 0.6829414664466211, "grad_norm": 1.6171875, "learning_rate": 1.764041536999512e-05, "loss": 1.0469, "step": 3983 }, { "epoch": 0.683112930535611, "grad_norm": 1.5703125, "learning_rate": 1.763925012004652e-05, "loss": 1.0718, "step": 3984 }, { "epoch": 0.6832843946246008, "grad_norm": 1.5859375, "learning_rate": 1.763808462095321e-05, "loss": 1.0762, "step": 3985 }, { "epoch": 0.6834558587135907, "grad_norm": 1.484375, "learning_rate": 1.7636918872753194e-05, "loss": 0.9343, "step": 3986 }, { "epoch": 0.6836273228025805, "grad_norm": 1.6171875, "learning_rate": 1.76357528754845e-05, "loss": 1.0311, "step": 3987 }, { "epoch": 0.6837987868915704, "grad_norm": 1.59375, "learning_rate": 1.7634586629185147e-05, "loss": 1.0325, "step": 3988 }, { "epoch": 0.6839702509805603, "grad_norm": 1.53125, "learning_rate": 1.7633420133893176e-05, "loss": 0.9296, "step": 3989 }, { "epoch": 0.6841417150695501, "grad_norm": 1.6171875, "learning_rate": 1.763225338964663e-05, "loss": 1.0297, "step": 3990 }, { "epoch": 0.68431317915854, "grad_norm": 1.8046875, "learning_rate": 1.7631086396483562e-05, "loss": 1.0326, "step": 3991 }, { "epoch": 0.6844846432475299, "grad_norm": 1.5703125, "learning_rate": 1.762991915444203e-05, "loss": 0.9755, "step": 3992 }, { "epoch": 0.6846561073365197, "grad_norm": 1.515625, "learning_rate": 1.76287516635601e-05, "loss": 0.9867, "step": 3993 }, { "epoch": 0.6848275714255095, "grad_norm": 1.59375, "learning_rate": 1.762758392387585e-05, "loss": 1.0919, "step": 3994 }, { "epoch": 0.6849990355144995, "grad_norm": 1.484375, "learning_rate": 1.7626415935427373e-05, "loss": 0.9688, "step": 3995 }, { "epoch": 0.6851704996034893, "grad_norm": 1.5625, "learning_rate": 1.762524769825275e-05, "loss": 1.0599, "step": 3996 }, { "epoch": 0.6853419636924791, "grad_norm": 1.6328125, "learning_rate": 1.762407921239008e-05, "loss": 1.0304, "step": 3997 }, { "epoch": 0.6855134277814691, "grad_norm": 1.6484375, "learning_rate": 1.7622910477877484e-05, "loss": 1.0556, "step": 3998 }, { "epoch": 0.6856848918704589, "grad_norm": 1.6015625, "learning_rate": 1.762174149475307e-05, "loss": 1.007, "step": 3999 }, { "epoch": 0.6858563559594487, "grad_norm": 1.578125, "learning_rate": 1.7620572263054964e-05, "loss": 0.9835, "step": 4000 }, { "epoch": 0.6860278200484387, "grad_norm": 1.609375, "learning_rate": 1.7619402782821306e-05, "loss": 0.9957, "step": 4001 }, { "epoch": 0.6861992841374285, "grad_norm": 1.59375, "learning_rate": 1.7618233054090223e-05, "loss": 1.0368, "step": 4002 }, { "epoch": 0.6863707482264183, "grad_norm": 1.5546875, "learning_rate": 1.7617063076899875e-05, "loss": 1.0038, "step": 4003 }, { "epoch": 0.6865422123154082, "grad_norm": 1.6484375, "learning_rate": 1.7615892851288417e-05, "loss": 1.0597, "step": 4004 }, { "epoch": 0.6867136764043981, "grad_norm": 1.59375, "learning_rate": 1.7614722377294017e-05, "loss": 0.9894, "step": 4005 }, { "epoch": 0.6868851404933879, "grad_norm": 1.5234375, "learning_rate": 1.7613551654954846e-05, "loss": 1.0048, "step": 4006 }, { "epoch": 0.6870566045823778, "grad_norm": 1.7109375, "learning_rate": 1.761238068430908e-05, "loss": 1.1029, "step": 4007 }, { "epoch": 0.6872280686713677, "grad_norm": 1.734375, "learning_rate": 1.761120946539492e-05, "loss": 1.1666, "step": 4008 }, { "epoch": 0.6873995327603575, "grad_norm": 1.5625, "learning_rate": 1.7610037998250555e-05, "loss": 1.0329, "step": 4009 }, { "epoch": 0.6875709968493474, "grad_norm": 1.5703125, "learning_rate": 1.7608866282914195e-05, "loss": 0.9206, "step": 4010 }, { "epoch": 0.6877424609383372, "grad_norm": 1.484375, "learning_rate": 1.7607694319424054e-05, "loss": 0.9154, "step": 4011 }, { "epoch": 0.6879139250273271, "grad_norm": 1.53125, "learning_rate": 1.7606522107818355e-05, "loss": 1.0722, "step": 4012 }, { "epoch": 0.688085389116317, "grad_norm": 1.6328125, "learning_rate": 1.760534964813532e-05, "loss": 1.0418, "step": 4013 }, { "epoch": 0.6882568532053068, "grad_norm": 1.5859375, "learning_rate": 1.76041769404132e-05, "loss": 0.9458, "step": 4014 }, { "epoch": 0.6884283172942967, "grad_norm": 1.6953125, "learning_rate": 1.7603003984690234e-05, "loss": 1.0555, "step": 4015 }, { "epoch": 0.6885997813832866, "grad_norm": 1.546875, "learning_rate": 1.7601830781004676e-05, "loss": 1.0789, "step": 4016 }, { "epoch": 0.6887712454722764, "grad_norm": 1.5703125, "learning_rate": 1.7600657329394794e-05, "loss": 0.9962, "step": 4017 }, { "epoch": 0.6889427095612662, "grad_norm": 1.6328125, "learning_rate": 1.759948362989885e-05, "loss": 1.0139, "step": 4018 }, { "epoch": 0.6891141736502562, "grad_norm": 1.6171875, "learning_rate": 1.7598309682555133e-05, "loss": 1.0394, "step": 4019 }, { "epoch": 0.689285637739246, "grad_norm": 1.515625, "learning_rate": 1.759713548740192e-05, "loss": 0.9697, "step": 4020 }, { "epoch": 0.6894571018282358, "grad_norm": 1.5859375, "learning_rate": 1.7595961044477516e-05, "loss": 1.0688, "step": 4021 }, { "epoch": 0.6896285659172258, "grad_norm": 1.6171875, "learning_rate": 1.7594786353820215e-05, "loss": 0.9847, "step": 4022 }, { "epoch": 0.6898000300062156, "grad_norm": 1.6875, "learning_rate": 1.759361141546833e-05, "loss": 1.0528, "step": 4023 }, { "epoch": 0.6899714940952054, "grad_norm": 1.578125, "learning_rate": 1.7592436229460185e-05, "loss": 1.0234, "step": 4024 }, { "epoch": 0.6901429581841952, "grad_norm": 1.6171875, "learning_rate": 1.7591260795834104e-05, "loss": 1.1078, "step": 4025 }, { "epoch": 0.6903144222731852, "grad_norm": 1.6796875, "learning_rate": 1.7590085114628422e-05, "loss": 1.1082, "step": 4026 }, { "epoch": 0.690485886362175, "grad_norm": 1.53125, "learning_rate": 1.758890918588148e-05, "loss": 1.0392, "step": 4027 }, { "epoch": 0.6906573504511648, "grad_norm": 1.5859375, "learning_rate": 1.7587733009631637e-05, "loss": 0.9816, "step": 4028 }, { "epoch": 0.6908288145401548, "grad_norm": 1.5390625, "learning_rate": 1.758655658591724e-05, "loss": 1.0778, "step": 4029 }, { "epoch": 0.6910002786291446, "grad_norm": 1.5078125, "learning_rate": 1.7585379914776672e-05, "loss": 1.0351, "step": 4030 }, { "epoch": 0.6911717427181344, "grad_norm": 1.609375, "learning_rate": 1.75842029962483e-05, "loss": 1.0163, "step": 4031 }, { "epoch": 0.6913432068071244, "grad_norm": 1.46875, "learning_rate": 1.7583025830370507e-05, "loss": 0.9156, "step": 4032 }, { "epoch": 0.6915146708961142, "grad_norm": 1.8203125, "learning_rate": 1.7581848417181687e-05, "loss": 1.0033, "step": 4033 }, { "epoch": 0.691686134985104, "grad_norm": 1.59375, "learning_rate": 1.758067075672024e-05, "loss": 1.0246, "step": 4034 }, { "epoch": 0.6918575990740939, "grad_norm": 1.5234375, "learning_rate": 1.7579492849024574e-05, "loss": 0.9565, "step": 4035 }, { "epoch": 0.6920290631630838, "grad_norm": 1.546875, "learning_rate": 1.7578314694133105e-05, "loss": 1.0043, "step": 4036 }, { "epoch": 0.6922005272520736, "grad_norm": 1.5390625, "learning_rate": 1.7577136292084255e-05, "loss": 0.931, "step": 4037 }, { "epoch": 0.6923719913410635, "grad_norm": 1.6015625, "learning_rate": 1.7575957642916453e-05, "loss": 1.0022, "step": 4038 }, { "epoch": 0.6925434554300534, "grad_norm": 1.6015625, "learning_rate": 1.7574778746668152e-05, "loss": 0.9138, "step": 4039 }, { "epoch": 0.6927149195190432, "grad_norm": 1.5390625, "learning_rate": 1.757359960337779e-05, "loss": 0.957, "step": 4040 }, { "epoch": 0.6928863836080331, "grad_norm": 1.578125, "learning_rate": 1.7572420213083822e-05, "loss": 1.0356, "step": 4041 }, { "epoch": 0.6930578476970229, "grad_norm": 1.6015625, "learning_rate": 1.757124057582472e-05, "loss": 1.0043, "step": 4042 }, { "epoch": 0.6932293117860128, "grad_norm": 1.671875, "learning_rate": 1.757006069163895e-05, "loss": 1.0444, "step": 4043 }, { "epoch": 0.6934007758750027, "grad_norm": 1.5625, "learning_rate": 1.7568880560564994e-05, "loss": 1.0236, "step": 4044 }, { "epoch": 0.6935722399639925, "grad_norm": 1.5390625, "learning_rate": 1.756770018264134e-05, "loss": 0.9874, "step": 4045 }, { "epoch": 0.6937437040529824, "grad_norm": 1.46875, "learning_rate": 1.7566519557906488e-05, "loss": 1.0284, "step": 4046 }, { "epoch": 0.6939151681419723, "grad_norm": 1.671875, "learning_rate": 1.756533868639894e-05, "loss": 1.1392, "step": 4047 }, { "epoch": 0.6940866322309621, "grad_norm": 1.5859375, "learning_rate": 1.7564157568157208e-05, "loss": 0.9929, "step": 4048 }, { "epoch": 0.694258096319952, "grad_norm": 1.578125, "learning_rate": 1.7562976203219815e-05, "loss": 1.0021, "step": 4049 }, { "epoch": 0.6944295604089419, "grad_norm": 1.5703125, "learning_rate": 1.756179459162529e-05, "loss": 0.9882, "step": 4050 }, { "epoch": 0.6946010244979317, "grad_norm": 1.53125, "learning_rate": 1.7560612733412167e-05, "loss": 1.0035, "step": 4051 }, { "epoch": 0.6947724885869215, "grad_norm": 1.609375, "learning_rate": 1.755943062861899e-05, "loss": 1.0026, "step": 4052 }, { "epoch": 0.6949439526759115, "grad_norm": 1.609375, "learning_rate": 1.7558248277284318e-05, "loss": 0.9078, "step": 4053 }, { "epoch": 0.6951154167649013, "grad_norm": 1.546875, "learning_rate": 1.7557065679446705e-05, "loss": 1.0097, "step": 4054 }, { "epoch": 0.6952868808538911, "grad_norm": 1.5703125, "learning_rate": 1.755588283514472e-05, "loss": 1.1547, "step": 4055 }, { "epoch": 0.6954583449428811, "grad_norm": 1.5625, "learning_rate": 1.755469974441695e-05, "loss": 0.9205, "step": 4056 }, { "epoch": 0.6956298090318709, "grad_norm": 1.703125, "learning_rate": 1.7553516407301967e-05, "loss": 1.0198, "step": 4057 }, { "epoch": 0.6958012731208607, "grad_norm": 1.5, "learning_rate": 1.7552332823838375e-05, "loss": 0.9808, "step": 4058 }, { "epoch": 0.6959727372098506, "grad_norm": 1.5546875, "learning_rate": 1.7551148994064765e-05, "loss": 0.9738, "step": 4059 }, { "epoch": 0.6961442012988405, "grad_norm": 1.4375, "learning_rate": 1.7549964918019754e-05, "loss": 0.9615, "step": 4060 }, { "epoch": 0.6963156653878303, "grad_norm": 1.6171875, "learning_rate": 1.7548780595741957e-05, "loss": 1.0502, "step": 4061 }, { "epoch": 0.6964871294768202, "grad_norm": 1.5390625, "learning_rate": 1.754759602727e-05, "loss": 1.0078, "step": 4062 }, { "epoch": 0.6966585935658101, "grad_norm": 1.5390625, "learning_rate": 1.754641121264251e-05, "loss": 0.9771, "step": 4063 }, { "epoch": 0.6968300576547999, "grad_norm": 1.6328125, "learning_rate": 1.7545226151898134e-05, "loss": 1.1078, "step": 4064 }, { "epoch": 0.6970015217437898, "grad_norm": 1.671875, "learning_rate": 1.7544040845075528e-05, "loss": 1.0964, "step": 4065 }, { "epoch": 0.6971729858327796, "grad_norm": 1.5546875, "learning_rate": 1.7542855292213334e-05, "loss": 1.0188, "step": 4066 }, { "epoch": 0.6973444499217695, "grad_norm": 1.515625, "learning_rate": 1.7541669493350227e-05, "loss": 0.9677, "step": 4067 }, { "epoch": 0.6975159140107594, "grad_norm": 1.640625, "learning_rate": 1.754048344852488e-05, "loss": 1.0634, "step": 4068 }, { "epoch": 0.6976873780997492, "grad_norm": 1.6015625, "learning_rate": 1.753929715777597e-05, "loss": 1.0472, "step": 4069 }, { "epoch": 0.6978588421887391, "grad_norm": 1.6796875, "learning_rate": 1.7538110621142194e-05, "loss": 1.0953, "step": 4070 }, { "epoch": 0.698030306277729, "grad_norm": 1.609375, "learning_rate": 1.7536923838662243e-05, "loss": 1.0702, "step": 4071 }, { "epoch": 0.6982017703667188, "grad_norm": 1.4921875, "learning_rate": 1.7535736810374822e-05, "loss": 1.0504, "step": 4072 }, { "epoch": 0.6983732344557086, "grad_norm": 1.5078125, "learning_rate": 1.7534549536318647e-05, "loss": 0.9602, "step": 4073 }, { "epoch": 0.6985446985446986, "grad_norm": 1.5859375, "learning_rate": 1.753336201653244e-05, "loss": 1.083, "step": 4074 }, { "epoch": 0.6987161626336884, "grad_norm": 1.5859375, "learning_rate": 1.753217425105493e-05, "loss": 0.9854, "step": 4075 }, { "epoch": 0.6988876267226782, "grad_norm": 1.5625, "learning_rate": 1.7530986239924858e-05, "loss": 0.9757, "step": 4076 }, { "epoch": 0.6990590908116682, "grad_norm": 1.5703125, "learning_rate": 1.7529797983180962e-05, "loss": 1.0032, "step": 4077 }, { "epoch": 0.699230554900658, "grad_norm": 1.6015625, "learning_rate": 1.7528609480861995e-05, "loss": 0.9975, "step": 4078 }, { "epoch": 0.6994020189896478, "grad_norm": 1.5859375, "learning_rate": 1.752742073300673e-05, "loss": 1.0549, "step": 4079 }, { "epoch": 0.6995734830786378, "grad_norm": 1.5625, "learning_rate": 1.7526231739653923e-05, "loss": 1.051, "step": 4080 }, { "epoch": 0.6997449471676276, "grad_norm": 1.5234375, "learning_rate": 1.7525042500842363e-05, "loss": 1.0099, "step": 4081 }, { "epoch": 0.6999164112566174, "grad_norm": 1.5546875, "learning_rate": 1.7523853016610826e-05, "loss": 1.0453, "step": 4082 }, { "epoch": 0.7000878753456073, "grad_norm": 1.7421875, "learning_rate": 1.7522663286998113e-05, "loss": 1.1041, "step": 4083 }, { "epoch": 0.7002593394345972, "grad_norm": 1.546875, "learning_rate": 1.752147331204302e-05, "loss": 0.9947, "step": 4084 }, { "epoch": 0.700430803523587, "grad_norm": 1.53125, "learning_rate": 1.752028309178436e-05, "loss": 1.012, "step": 4085 }, { "epoch": 0.7006022676125769, "grad_norm": 1.5234375, "learning_rate": 1.751909262626095e-05, "loss": 1.0268, "step": 4086 }, { "epoch": 0.7007737317015668, "grad_norm": 1.59375, "learning_rate": 1.7517901915511614e-05, "loss": 0.9548, "step": 4087 }, { "epoch": 0.7009451957905566, "grad_norm": 1.6484375, "learning_rate": 1.7516710959575188e-05, "loss": 1.0622, "step": 4088 }, { "epoch": 0.7011166598795465, "grad_norm": 1.5234375, "learning_rate": 1.751551975849051e-05, "loss": 0.9667, "step": 4089 }, { "epoch": 0.7012881239685363, "grad_norm": 1.625, "learning_rate": 1.751432831229644e-05, "loss": 1.0628, "step": 4090 }, { "epoch": 0.7014595880575262, "grad_norm": 1.7578125, "learning_rate": 1.751313662103182e-05, "loss": 1.0634, "step": 4091 }, { "epoch": 0.7016310521465161, "grad_norm": 1.515625, "learning_rate": 1.7511944684735523e-05, "loss": 1.0062, "step": 4092 }, { "epoch": 0.7018025162355059, "grad_norm": 1.609375, "learning_rate": 1.7510752503446423e-05, "loss": 1.0688, "step": 4093 }, { "epoch": 0.7019739803244958, "grad_norm": 1.6015625, "learning_rate": 1.7509560077203404e-05, "loss": 0.9527, "step": 4094 }, { "epoch": 0.7021454444134857, "grad_norm": 1.578125, "learning_rate": 1.7508367406045348e-05, "loss": 1.0027, "step": 4095 }, { "epoch": 0.7023169085024755, "grad_norm": 1.46875, "learning_rate": 1.750717449001116e-05, "loss": 0.9678, "step": 4096 }, { "epoch": 0.7024883725914653, "grad_norm": 1.484375, "learning_rate": 1.750598132913974e-05, "loss": 0.9813, "step": 4097 }, { "epoch": 0.7026598366804553, "grad_norm": 1.59375, "learning_rate": 1.7504787923470007e-05, "loss": 1.0437, "step": 4098 }, { "epoch": 0.7028313007694451, "grad_norm": 1.46875, "learning_rate": 1.7503594273040877e-05, "loss": 0.981, "step": 4099 }, { "epoch": 0.7030027648584349, "grad_norm": 1.53125, "learning_rate": 1.7502400377891285e-05, "loss": 0.9728, "step": 4100 }, { "epoch": 0.7031742289474249, "grad_norm": 1.6484375, "learning_rate": 1.7501206238060162e-05, "loss": 1.0596, "step": 4101 }, { "epoch": 0.7033456930364147, "grad_norm": 1.4921875, "learning_rate": 1.7500011853586457e-05, "loss": 0.9799, "step": 4102 }, { "epoch": 0.7035171571254045, "grad_norm": 1.5859375, "learning_rate": 1.7498817224509124e-05, "loss": 0.9499, "step": 4103 }, { "epoch": 0.7036886212143945, "grad_norm": 1.6171875, "learning_rate": 1.7497622350867125e-05, "loss": 1.1101, "step": 4104 }, { "epoch": 0.7038600853033843, "grad_norm": 1.578125, "learning_rate": 1.7496427232699423e-05, "loss": 1.0013, "step": 4105 }, { "epoch": 0.7040315493923741, "grad_norm": 1.53125, "learning_rate": 1.7495231870045002e-05, "loss": 0.9436, "step": 4106 }, { "epoch": 0.704203013481364, "grad_norm": 1.6875, "learning_rate": 1.749403626294285e-05, "loss": 1.1108, "step": 4107 }, { "epoch": 0.7043744775703539, "grad_norm": 1.578125, "learning_rate": 1.7492840411431952e-05, "loss": 0.9605, "step": 4108 }, { "epoch": 0.7045459416593437, "grad_norm": 1.578125, "learning_rate": 1.7491644315551314e-05, "loss": 1.0315, "step": 4109 }, { "epoch": 0.7047174057483336, "grad_norm": 1.5, "learning_rate": 1.7490447975339943e-05, "loss": 0.9563, "step": 4110 }, { "epoch": 0.7048888698373235, "grad_norm": 1.546875, "learning_rate": 1.7489251390836853e-05, "loss": 0.9553, "step": 4111 }, { "epoch": 0.7050603339263133, "grad_norm": 1.609375, "learning_rate": 1.748805456208108e-05, "loss": 1.0408, "step": 4112 }, { "epoch": 0.7052317980153032, "grad_norm": 1.5390625, "learning_rate": 1.7486857489111643e-05, "loss": 1.0166, "step": 4113 }, { "epoch": 0.705403262104293, "grad_norm": 1.4921875, "learning_rate": 1.7485660171967595e-05, "loss": 0.9863, "step": 4114 }, { "epoch": 0.7055747261932829, "grad_norm": 1.5625, "learning_rate": 1.748446261068798e-05, "loss": 0.9943, "step": 4115 }, { "epoch": 0.7057461902822728, "grad_norm": 1.5546875, "learning_rate": 1.7483264805311856e-05, "loss": 0.9912, "step": 4116 }, { "epoch": 0.7059176543712626, "grad_norm": 1.5, "learning_rate": 1.7482066755878287e-05, "loss": 1.0156, "step": 4117 }, { "epoch": 0.7060891184602525, "grad_norm": 1.6015625, "learning_rate": 1.7480868462426345e-05, "loss": 1.0942, "step": 4118 }, { "epoch": 0.7062605825492423, "grad_norm": 1.4921875, "learning_rate": 1.7479669924995117e-05, "loss": 0.9926, "step": 4119 }, { "epoch": 0.7064320466382322, "grad_norm": 1.5390625, "learning_rate": 1.747847114362368e-05, "loss": 1.0409, "step": 4120 }, { "epoch": 0.706603510727222, "grad_norm": 1.5859375, "learning_rate": 1.747727211835114e-05, "loss": 1.0482, "step": 4121 }, { "epoch": 0.7067749748162119, "grad_norm": 1.5625, "learning_rate": 1.7476072849216602e-05, "loss": 1.0351, "step": 4122 }, { "epoch": 0.7069464389052018, "grad_norm": 1.6171875, "learning_rate": 1.7474873336259172e-05, "loss": 1.0221, "step": 4123 }, { "epoch": 0.7071179029941916, "grad_norm": 1.59375, "learning_rate": 1.747367357951798e-05, "loss": 0.9936, "step": 4124 }, { "epoch": 0.7072893670831815, "grad_norm": 1.6640625, "learning_rate": 1.747247357903214e-05, "loss": 1.1066, "step": 4125 }, { "epoch": 0.7074608311721714, "grad_norm": 1.546875, "learning_rate": 1.7471273334840807e-05, "loss": 1.0051, "step": 4126 }, { "epoch": 0.7076322952611612, "grad_norm": 1.546875, "learning_rate": 1.7470072846983112e-05, "loss": 1.006, "step": 4127 }, { "epoch": 0.707803759350151, "grad_norm": 1.6171875, "learning_rate": 1.7468872115498216e-05, "loss": 0.9908, "step": 4128 }, { "epoch": 0.707975223439141, "grad_norm": 1.5625, "learning_rate": 1.7467671140425272e-05, "loss": 0.9295, "step": 4129 }, { "epoch": 0.7081466875281308, "grad_norm": 1.5234375, "learning_rate": 1.746646992180345e-05, "loss": 0.9709, "step": 4130 }, { "epoch": 0.7083181516171206, "grad_norm": 1.5234375, "learning_rate": 1.7465268459671932e-05, "loss": 1.0124, "step": 4131 }, { "epoch": 0.7084896157061106, "grad_norm": 1.5390625, "learning_rate": 1.7464066754069893e-05, "loss": 0.966, "step": 4132 }, { "epoch": 0.7086610797951004, "grad_norm": 1.53125, "learning_rate": 1.7462864805036535e-05, "loss": 0.9522, "step": 4133 }, { "epoch": 0.7088325438840902, "grad_norm": 1.4921875, "learning_rate": 1.746166261261105e-05, "loss": 0.9749, "step": 4134 }, { "epoch": 0.7090040079730802, "grad_norm": 1.5625, "learning_rate": 1.746046017683265e-05, "loss": 0.9992, "step": 4135 }, { "epoch": 0.70917547206207, "grad_norm": 1.6015625, "learning_rate": 1.7459257497740548e-05, "loss": 1.0595, "step": 4136 }, { "epoch": 0.7093469361510598, "grad_norm": 1.5390625, "learning_rate": 1.7458054575373973e-05, "loss": 1.0378, "step": 4137 }, { "epoch": 0.7095184002400498, "grad_norm": 1.4375, "learning_rate": 1.7456851409772153e-05, "loss": 0.9598, "step": 4138 }, { "epoch": 0.7096898643290396, "grad_norm": 1.609375, "learning_rate": 1.745564800097433e-05, "loss": 1.0255, "step": 4139 }, { "epoch": 0.7098613284180294, "grad_norm": 1.5390625, "learning_rate": 1.7454444349019747e-05, "loss": 1.005, "step": 4140 }, { "epoch": 0.7100327925070193, "grad_norm": 1.6953125, "learning_rate": 1.7453240453947664e-05, "loss": 1.0376, "step": 4141 }, { "epoch": 0.7102042565960092, "grad_norm": 1.53125, "learning_rate": 1.7452036315797346e-05, "loss": 0.9216, "step": 4142 }, { "epoch": 0.710375720684999, "grad_norm": 1.703125, "learning_rate": 1.745083193460806e-05, "loss": 1.0864, "step": 4143 }, { "epoch": 0.7105471847739889, "grad_norm": 1.5546875, "learning_rate": 1.7449627310419086e-05, "loss": 0.9865, "step": 4144 }, { "epoch": 0.7107186488629788, "grad_norm": 1.640625, "learning_rate": 1.7448422443269716e-05, "loss": 1.0528, "step": 4145 }, { "epoch": 0.7108901129519686, "grad_norm": 1.640625, "learning_rate": 1.744721733319924e-05, "loss": 1.0079, "step": 4146 }, { "epoch": 0.7110615770409585, "grad_norm": 1.5078125, "learning_rate": 1.7446011980246963e-05, "loss": 0.9296, "step": 4147 }, { "epoch": 0.7112330411299483, "grad_norm": 1.578125, "learning_rate": 1.7444806384452198e-05, "loss": 1.0596, "step": 4148 }, { "epoch": 0.7114045052189382, "grad_norm": 1.5859375, "learning_rate": 1.744360054585426e-05, "loss": 0.9702, "step": 4149 }, { "epoch": 0.7115759693079281, "grad_norm": 1.5078125, "learning_rate": 1.744239446449248e-05, "loss": 0.933, "step": 4150 }, { "epoch": 0.7117474333969179, "grad_norm": 1.5859375, "learning_rate": 1.744118814040619e-05, "loss": 1.1285, "step": 4151 }, { "epoch": 0.7119188974859078, "grad_norm": 1.5625, "learning_rate": 1.7439981573634734e-05, "loss": 1.0722, "step": 4152 }, { "epoch": 0.7120903615748977, "grad_norm": 1.609375, "learning_rate": 1.743877476421746e-05, "loss": 1.0448, "step": 4153 }, { "epoch": 0.7122618256638875, "grad_norm": 1.5390625, "learning_rate": 1.7437567712193732e-05, "loss": 0.93, "step": 4154 }, { "epoch": 0.7124332897528773, "grad_norm": 1.5625, "learning_rate": 1.7436360417602915e-05, "loss": 0.989, "step": 4155 }, { "epoch": 0.7126047538418673, "grad_norm": 1.53125, "learning_rate": 1.743515288048438e-05, "loss": 1.0188, "step": 4156 }, { "epoch": 0.7127762179308571, "grad_norm": 1.6171875, "learning_rate": 1.7433945100877513e-05, "loss": 1.0177, "step": 4157 }, { "epoch": 0.7129476820198469, "grad_norm": 1.5234375, "learning_rate": 1.74327370788217e-05, "loss": 0.9703, "step": 4158 }, { "epoch": 0.7131191461088369, "grad_norm": 1.6015625, "learning_rate": 1.743152881435634e-05, "loss": 1.0018, "step": 4159 }, { "epoch": 0.7132906101978267, "grad_norm": 1.65625, "learning_rate": 1.7430320307520844e-05, "loss": 1.0156, "step": 4160 }, { "epoch": 0.7134620742868165, "grad_norm": 1.6015625, "learning_rate": 1.7429111558354624e-05, "loss": 1.0338, "step": 4161 }, { "epoch": 0.7136335383758065, "grad_norm": 1.6640625, "learning_rate": 1.74279025668971e-05, "loss": 1.0291, "step": 4162 }, { "epoch": 0.7138050024647963, "grad_norm": 1.640625, "learning_rate": 1.7426693333187702e-05, "loss": 1.1897, "step": 4163 }, { "epoch": 0.7139764665537861, "grad_norm": 1.6328125, "learning_rate": 1.7425483857265865e-05, "loss": 1.0438, "step": 4164 }, { "epoch": 0.714147930642776, "grad_norm": 1.5390625, "learning_rate": 1.742427413917104e-05, "loss": 1.0547, "step": 4165 }, { "epoch": 0.7143193947317659, "grad_norm": 1.546875, "learning_rate": 1.742306417894268e-05, "loss": 1.0058, "step": 4166 }, { "epoch": 0.7144908588207557, "grad_norm": 1.6015625, "learning_rate": 1.7421853976620245e-05, "loss": 1.0136, "step": 4167 }, { "epoch": 0.7146623229097456, "grad_norm": 1.515625, "learning_rate": 1.74206435322432e-05, "loss": 1.0062, "step": 4168 }, { "epoch": 0.7148337869987355, "grad_norm": 1.6328125, "learning_rate": 1.7419432845851027e-05, "loss": 1.1097, "step": 4169 }, { "epoch": 0.7150052510877253, "grad_norm": 1.6640625, "learning_rate": 1.741822191748321e-05, "loss": 1.017, "step": 4170 }, { "epoch": 0.7151767151767152, "grad_norm": 1.609375, "learning_rate": 1.7417010747179245e-05, "loss": 0.9241, "step": 4171 }, { "epoch": 0.715348179265705, "grad_norm": 1.5859375, "learning_rate": 1.7415799334978624e-05, "loss": 0.9328, "step": 4172 }, { "epoch": 0.7155196433546949, "grad_norm": 1.5859375, "learning_rate": 1.7414587680920864e-05, "loss": 0.9638, "step": 4173 }, { "epoch": 0.7156911074436848, "grad_norm": 1.5625, "learning_rate": 1.741337578504548e-05, "loss": 1.0168, "step": 4174 }, { "epoch": 0.7158625715326746, "grad_norm": 1.609375, "learning_rate": 1.7412163647391996e-05, "loss": 1.0322, "step": 4175 }, { "epoch": 0.7160340356216645, "grad_norm": 1.59375, "learning_rate": 1.7410951267999943e-05, "loss": 1.0177, "step": 4176 }, { "epoch": 0.7162054997106544, "grad_norm": 1.5625, "learning_rate": 1.740973864690886e-05, "loss": 1.0237, "step": 4177 }, { "epoch": 0.7163769637996442, "grad_norm": 1.5546875, "learning_rate": 1.7408525784158298e-05, "loss": 0.9636, "step": 4178 }, { "epoch": 0.716548427888634, "grad_norm": 1.6171875, "learning_rate": 1.7407312679787814e-05, "loss": 0.9987, "step": 4179 }, { "epoch": 0.716719891977624, "grad_norm": 1.5234375, "learning_rate": 1.740609933383697e-05, "loss": 0.9429, "step": 4180 }, { "epoch": 0.7168913560666138, "grad_norm": 1.625, "learning_rate": 1.7404885746345333e-05, "loss": 1.0416, "step": 4181 }, { "epoch": 0.7170628201556036, "grad_norm": 1.6171875, "learning_rate": 1.7403671917352492e-05, "loss": 1.0059, "step": 4182 }, { "epoch": 0.7172342842445936, "grad_norm": 1.6796875, "learning_rate": 1.7402457846898032e-05, "loss": 1.0392, "step": 4183 }, { "epoch": 0.7174057483335834, "grad_norm": 1.5390625, "learning_rate": 1.7401243535021547e-05, "loss": 1.034, "step": 4184 }, { "epoch": 0.7175772124225732, "grad_norm": 1.5703125, "learning_rate": 1.7400028981762634e-05, "loss": 1.0569, "step": 4185 }, { "epoch": 0.7177486765115632, "grad_norm": 1.53125, "learning_rate": 1.7398814187160913e-05, "loss": 0.996, "step": 4186 }, { "epoch": 0.717920140600553, "grad_norm": 1.59375, "learning_rate": 1.7397599151256003e-05, "loss": 1.0013, "step": 4187 }, { "epoch": 0.7180916046895428, "grad_norm": 1.578125, "learning_rate": 1.739638387408752e-05, "loss": 0.9945, "step": 4188 }, { "epoch": 0.7182630687785327, "grad_norm": 1.578125, "learning_rate": 1.7395168355695116e-05, "loss": 1.0584, "step": 4189 }, { "epoch": 0.7184345328675226, "grad_norm": 1.5390625, "learning_rate": 1.739395259611842e-05, "loss": 0.9664, "step": 4190 }, { "epoch": 0.7186059969565124, "grad_norm": 1.578125, "learning_rate": 1.7392736595397086e-05, "loss": 1.0509, "step": 4191 }, { "epoch": 0.7187774610455023, "grad_norm": 1.5546875, "learning_rate": 1.7391520353570772e-05, "loss": 0.9658, "step": 4192 }, { "epoch": 0.7189489251344922, "grad_norm": 1.5546875, "learning_rate": 1.739030387067915e-05, "loss": 1.0161, "step": 4193 }, { "epoch": 0.719120389223482, "grad_norm": 1.5625, "learning_rate": 1.7389087146761886e-05, "loss": 0.998, "step": 4194 }, { "epoch": 0.7192918533124719, "grad_norm": 1.6640625, "learning_rate": 1.7387870181858666e-05, "loss": 1.0322, "step": 4195 }, { "epoch": 0.7194633174014617, "grad_norm": 1.65625, "learning_rate": 1.738665297600918e-05, "loss": 1.0163, "step": 4196 }, { "epoch": 0.7196347814904516, "grad_norm": 1.59375, "learning_rate": 1.738543552925312e-05, "loss": 0.9975, "step": 4197 }, { "epoch": 0.7198062455794415, "grad_norm": 1.5, "learning_rate": 1.7384217841630207e-05, "loss": 1.0121, "step": 4198 }, { "epoch": 0.7199777096684313, "grad_norm": 1.6171875, "learning_rate": 1.7382999913180135e-05, "loss": 1.0565, "step": 4199 }, { "epoch": 0.7201491737574212, "grad_norm": 1.671875, "learning_rate": 1.7381781743942636e-05, "loss": 0.9933, "step": 4200 }, { "epoch": 0.7201491737574212, "eval_loss": 0.8648467063903809, "eval_runtime": 837.3272, "eval_samples_per_second": 2.984, "eval_steps_per_second": 2.984, "step": 4200 }, { "epoch": 0.7203206378464111, "grad_norm": 1.5625, "learning_rate": 1.738056333395744e-05, "loss": 1.0291, "step": 4201 }, { "epoch": 0.7204921019354009, "grad_norm": 1.5625, "learning_rate": 1.7379344683264275e-05, "loss": 1.0251, "step": 4202 }, { "epoch": 0.7206635660243907, "grad_norm": 1.5, "learning_rate": 1.7378125791902897e-05, "loss": 0.9141, "step": 4203 }, { "epoch": 0.7208350301133807, "grad_norm": 1.5625, "learning_rate": 1.737690665991305e-05, "loss": 0.9753, "step": 4204 }, { "epoch": 0.7210064942023705, "grad_norm": 1.5703125, "learning_rate": 1.7375687287334502e-05, "loss": 1.0581, "step": 4205 }, { "epoch": 0.7211779582913603, "grad_norm": 1.6640625, "learning_rate": 1.7374467674207015e-05, "loss": 1.0378, "step": 4206 }, { "epoch": 0.7213494223803503, "grad_norm": 1.5859375, "learning_rate": 1.7373247820570368e-05, "loss": 1.0007, "step": 4207 }, { "epoch": 0.7215208864693401, "grad_norm": 1.640625, "learning_rate": 1.7372027726464346e-05, "loss": 0.9991, "step": 4208 }, { "epoch": 0.7216923505583299, "grad_norm": 1.515625, "learning_rate": 1.7370807391928737e-05, "loss": 0.9788, "step": 4209 }, { "epoch": 0.7218638146473199, "grad_norm": 1.5625, "learning_rate": 1.7369586817003345e-05, "loss": 0.9624, "step": 4210 }, { "epoch": 0.7220352787363097, "grad_norm": 1.6171875, "learning_rate": 1.7368366001727973e-05, "loss": 0.9637, "step": 4211 }, { "epoch": 0.7222067428252995, "grad_norm": 1.65625, "learning_rate": 1.736714494614244e-05, "loss": 1.0159, "step": 4212 }, { "epoch": 0.7223782069142893, "grad_norm": 1.640625, "learning_rate": 1.736592365028657e-05, "loss": 0.9424, "step": 4213 }, { "epoch": 0.7225496710032793, "grad_norm": 1.59375, "learning_rate": 1.736470211420019e-05, "loss": 1.0695, "step": 4214 }, { "epoch": 0.7227211350922691, "grad_norm": 1.5625, "learning_rate": 1.7363480337923143e-05, "loss": 0.9998, "step": 4215 }, { "epoch": 0.7228925991812589, "grad_norm": 1.5546875, "learning_rate": 1.736225832149527e-05, "loss": 0.9964, "step": 4216 }, { "epoch": 0.7230640632702489, "grad_norm": 1.640625, "learning_rate": 1.7361036064956433e-05, "loss": 0.9818, "step": 4217 }, { "epoch": 0.7232355273592387, "grad_norm": 1.8046875, "learning_rate": 1.7359813568346492e-05, "loss": 1.0565, "step": 4218 }, { "epoch": 0.7234069914482285, "grad_norm": 1.546875, "learning_rate": 1.735859083170531e-05, "loss": 0.9854, "step": 4219 }, { "epoch": 0.7235784555372184, "grad_norm": 1.625, "learning_rate": 1.7357367855072778e-05, "loss": 1.0865, "step": 4220 }, { "epoch": 0.7237499196262083, "grad_norm": 1.609375, "learning_rate": 1.7356144638488772e-05, "loss": 0.9946, "step": 4221 }, { "epoch": 0.7239213837151981, "grad_norm": 1.5859375, "learning_rate": 1.7354921181993187e-05, "loss": 0.948, "step": 4222 }, { "epoch": 0.724092847804188, "grad_norm": 1.5703125, "learning_rate": 1.7353697485625928e-05, "loss": 1.0151, "step": 4223 }, { "epoch": 0.7242643118931779, "grad_norm": 1.5, "learning_rate": 1.73524735494269e-05, "loss": 0.953, "step": 4224 }, { "epoch": 0.7244357759821677, "grad_norm": 1.734375, "learning_rate": 1.7351249373436028e-05, "loss": 1.007, "step": 4225 }, { "epoch": 0.7246072400711576, "grad_norm": 1.71875, "learning_rate": 1.7350024957693232e-05, "loss": 1.072, "step": 4226 }, { "epoch": 0.7247787041601474, "grad_norm": 1.6171875, "learning_rate": 1.734880030223844e-05, "loss": 1.1049, "step": 4227 }, { "epoch": 0.7249501682491373, "grad_norm": 1.6015625, "learning_rate": 1.7347575407111595e-05, "loss": 1.0501, "step": 4228 }, { "epoch": 0.7251216323381272, "grad_norm": 1.6796875, "learning_rate": 1.7346350272352653e-05, "loss": 1.1176, "step": 4229 }, { "epoch": 0.725293096427117, "grad_norm": 1.59375, "learning_rate": 1.7345124898001562e-05, "loss": 1.0401, "step": 4230 }, { "epoch": 0.7254645605161069, "grad_norm": 1.6015625, "learning_rate": 1.7343899284098292e-05, "loss": 1.073, "step": 4231 }, { "epoch": 0.7256360246050968, "grad_norm": 1.671875, "learning_rate": 1.734267343068281e-05, "loss": 1.0733, "step": 4232 }, { "epoch": 0.7258074886940866, "grad_norm": 1.609375, "learning_rate": 1.7341447337795098e-05, "loss": 1.0307, "step": 4233 }, { "epoch": 0.7259789527830764, "grad_norm": 1.6328125, "learning_rate": 1.734022100547514e-05, "loss": 1.0375, "step": 4234 }, { "epoch": 0.7261504168720664, "grad_norm": 1.46875, "learning_rate": 1.7338994433762938e-05, "loss": 0.8565, "step": 4235 }, { "epoch": 0.7263218809610562, "grad_norm": 1.578125, "learning_rate": 1.733776762269849e-05, "loss": 1.0315, "step": 4236 }, { "epoch": 0.726493345050046, "grad_norm": 1.6171875, "learning_rate": 1.7336540572321807e-05, "loss": 0.9393, "step": 4237 }, { "epoch": 0.726664809139036, "grad_norm": 1.59375, "learning_rate": 1.733531328267291e-05, "loss": 1.0198, "step": 4238 }, { "epoch": 0.7268362732280258, "grad_norm": 1.5078125, "learning_rate": 1.7334085753791824e-05, "loss": 0.9681, "step": 4239 }, { "epoch": 0.7270077373170156, "grad_norm": 1.59375, "learning_rate": 1.7332857985718584e-05, "loss": 1.0287, "step": 4240 }, { "epoch": 0.7271792014060056, "grad_norm": 1.640625, "learning_rate": 1.7331629978493234e-05, "loss": 1.0395, "step": 4241 }, { "epoch": 0.7273506654949954, "grad_norm": 1.6953125, "learning_rate": 1.733040173215582e-05, "loss": 1.0084, "step": 4242 }, { "epoch": 0.7275221295839852, "grad_norm": 1.5703125, "learning_rate": 1.7329173246746406e-05, "loss": 1.0966, "step": 4243 }, { "epoch": 0.7276935936729751, "grad_norm": 1.6953125, "learning_rate": 1.732794452230505e-05, "loss": 1.1253, "step": 4244 }, { "epoch": 0.727865057761965, "grad_norm": 1.5, "learning_rate": 1.7326715558871826e-05, "loss": 0.9047, "step": 4245 }, { "epoch": 0.7280365218509548, "grad_norm": 1.6328125, "learning_rate": 1.7325486356486823e-05, "loss": 1.1244, "step": 4246 }, { "epoch": 0.7282079859399447, "grad_norm": 1.6953125, "learning_rate": 1.732425691519012e-05, "loss": 1.0456, "step": 4247 }, { "epoch": 0.7283794500289346, "grad_norm": 1.59375, "learning_rate": 1.732302723502182e-05, "loss": 0.9747, "step": 4248 }, { "epoch": 0.7285509141179244, "grad_norm": 1.5, "learning_rate": 1.732179731602203e-05, "loss": 1.0131, "step": 4249 }, { "epoch": 0.7287223782069143, "grad_norm": 1.46875, "learning_rate": 1.7320567158230855e-05, "loss": 0.988, "step": 4250 }, { "epoch": 0.7288938422959041, "grad_norm": 1.609375, "learning_rate": 1.7319336761688415e-05, "loss": 1.0683, "step": 4251 }, { "epoch": 0.729065306384894, "grad_norm": 1.5703125, "learning_rate": 1.7318106126434845e-05, "loss": 1.0612, "step": 4252 }, { "epoch": 0.7292367704738839, "grad_norm": 1.4765625, "learning_rate": 1.7316875252510274e-05, "loss": 0.9791, "step": 4253 }, { "epoch": 0.7294082345628737, "grad_norm": 1.4609375, "learning_rate": 1.7315644139954852e-05, "loss": 0.9713, "step": 4254 }, { "epoch": 0.7295796986518636, "grad_norm": 1.5546875, "learning_rate": 1.7314412788808727e-05, "loss": 1.002, "step": 4255 }, { "epoch": 0.7297511627408535, "grad_norm": 1.671875, "learning_rate": 1.731318119911205e-05, "loss": 1.0398, "step": 4256 }, { "epoch": 0.7299226268298433, "grad_norm": 1.4921875, "learning_rate": 1.7311949370905e-05, "loss": 0.9147, "step": 4257 }, { "epoch": 0.7300940909188331, "grad_norm": 1.4921875, "learning_rate": 1.731071730422775e-05, "loss": 1.0034, "step": 4258 }, { "epoch": 0.7302655550078231, "grad_norm": 1.5, "learning_rate": 1.7309484999120475e-05, "loss": 0.9908, "step": 4259 }, { "epoch": 0.7304370190968129, "grad_norm": 1.59375, "learning_rate": 1.730825245562337e-05, "loss": 1.0843, "step": 4260 }, { "epoch": 0.7306084831858027, "grad_norm": 1.703125, "learning_rate": 1.730701967377663e-05, "loss": 1.0963, "step": 4261 }, { "epoch": 0.7307799472747927, "grad_norm": 1.515625, "learning_rate": 1.7305786653620466e-05, "loss": 0.9635, "step": 4262 }, { "epoch": 0.7309514113637825, "grad_norm": 1.515625, "learning_rate": 1.730455339519509e-05, "loss": 1.0101, "step": 4263 }, { "epoch": 0.7311228754527723, "grad_norm": 1.640625, "learning_rate": 1.730331989854072e-05, "loss": 1.0204, "step": 4264 }, { "epoch": 0.7312943395417623, "grad_norm": 1.609375, "learning_rate": 1.730208616369758e-05, "loss": 1.019, "step": 4265 }, { "epoch": 0.7314658036307521, "grad_norm": 1.4609375, "learning_rate": 1.730085219070592e-05, "loss": 0.928, "step": 4266 }, { "epoch": 0.7316372677197419, "grad_norm": 1.6015625, "learning_rate": 1.7299617979605976e-05, "loss": 1.0206, "step": 4267 }, { "epoch": 0.7318087318087318, "grad_norm": 1.5546875, "learning_rate": 1.7298383530438002e-05, "loss": 0.9391, "step": 4268 }, { "epoch": 0.7319801958977217, "grad_norm": 1.4921875, "learning_rate": 1.7297148843242257e-05, "loss": 0.9757, "step": 4269 }, { "epoch": 0.7321516599867115, "grad_norm": 1.5546875, "learning_rate": 1.7295913918059008e-05, "loss": 0.993, "step": 4270 }, { "epoch": 0.7323231240757014, "grad_norm": 1.59375, "learning_rate": 1.7294678754928535e-05, "loss": 0.9771, "step": 4271 }, { "epoch": 0.7324945881646913, "grad_norm": 1.5625, "learning_rate": 1.7293443353891118e-05, "loss": 1.0116, "step": 4272 }, { "epoch": 0.7326660522536811, "grad_norm": 1.4921875, "learning_rate": 1.729220771498705e-05, "loss": 0.9867, "step": 4273 }, { "epoch": 0.732837516342671, "grad_norm": 1.59375, "learning_rate": 1.7290971838256624e-05, "loss": 1.0323, "step": 4274 }, { "epoch": 0.7330089804316609, "grad_norm": 1.6171875, "learning_rate": 1.7289735723740157e-05, "loss": 0.9824, "step": 4275 }, { "epoch": 0.7331804445206507, "grad_norm": 1.578125, "learning_rate": 1.7288499371477954e-05, "loss": 0.9785, "step": 4276 }, { "epoch": 0.7333519086096406, "grad_norm": 1.625, "learning_rate": 1.728726278151034e-05, "loss": 1.0296, "step": 4277 }, { "epoch": 0.7335233726986304, "grad_norm": 1.5078125, "learning_rate": 1.7286025953877644e-05, "loss": 1.1321, "step": 4278 }, { "epoch": 0.7336948367876203, "grad_norm": 1.5703125, "learning_rate": 1.728478888862021e-05, "loss": 1.0588, "step": 4279 }, { "epoch": 0.7338663008766102, "grad_norm": 1.609375, "learning_rate": 1.7283551585778375e-05, "loss": 1.0378, "step": 4280 }, { "epoch": 0.7340377649656, "grad_norm": 1.875, "learning_rate": 1.72823140453925e-05, "loss": 0.9893, "step": 4281 }, { "epoch": 0.7342092290545899, "grad_norm": 1.625, "learning_rate": 1.7281076267502936e-05, "loss": 1.0624, "step": 4282 }, { "epoch": 0.7343806931435798, "grad_norm": 1.5859375, "learning_rate": 1.7279838252150057e-05, "loss": 1.083, "step": 4283 }, { "epoch": 0.7345521572325696, "grad_norm": 1.546875, "learning_rate": 1.727859999937424e-05, "loss": 1.0401, "step": 4284 }, { "epoch": 0.7347236213215594, "grad_norm": 1.546875, "learning_rate": 1.727736150921587e-05, "loss": 1.0169, "step": 4285 }, { "epoch": 0.7348950854105494, "grad_norm": 1.546875, "learning_rate": 1.7276122781715335e-05, "loss": 1.0907, "step": 4286 }, { "epoch": 0.7350665494995392, "grad_norm": 1.6015625, "learning_rate": 1.727488381691304e-05, "loss": 1.038, "step": 4287 }, { "epoch": 0.735238013588529, "grad_norm": 1.53125, "learning_rate": 1.727364461484939e-05, "loss": 1.0173, "step": 4288 }, { "epoch": 0.735409477677519, "grad_norm": 1.6328125, "learning_rate": 1.727240517556479e-05, "loss": 1.078, "step": 4289 }, { "epoch": 0.7355809417665088, "grad_norm": 1.59375, "learning_rate": 1.7271165499099682e-05, "loss": 1.0485, "step": 4290 }, { "epoch": 0.7357524058554986, "grad_norm": 1.5625, "learning_rate": 1.7269925585494483e-05, "loss": 0.9896, "step": 4291 }, { "epoch": 0.7359238699444886, "grad_norm": 1.59375, "learning_rate": 1.7268685434789634e-05, "loss": 1.0272, "step": 4292 }, { "epoch": 0.7360953340334784, "grad_norm": 1.546875, "learning_rate": 1.7267445047025582e-05, "loss": 0.9564, "step": 4293 }, { "epoch": 0.7362667981224682, "grad_norm": 1.5390625, "learning_rate": 1.726620442224278e-05, "loss": 1.0263, "step": 4294 }, { "epoch": 0.7364382622114581, "grad_norm": 1.59375, "learning_rate": 1.7264963560481688e-05, "loss": 1.0433, "step": 4295 }, { "epoch": 0.736609726300448, "grad_norm": 1.65625, "learning_rate": 1.7263722461782782e-05, "loss": 0.9827, "step": 4296 }, { "epoch": 0.7367811903894378, "grad_norm": 1.6015625, "learning_rate": 1.726248112618653e-05, "loss": 1.0504, "step": 4297 }, { "epoch": 0.7369526544784277, "grad_norm": 1.5546875, "learning_rate": 1.7261239553733424e-05, "loss": 0.9999, "step": 4298 }, { "epoch": 0.7371241185674176, "grad_norm": 1.5234375, "learning_rate": 1.7259997744463948e-05, "loss": 0.9925, "step": 4299 }, { "epoch": 0.7372955826564074, "grad_norm": 1.59375, "learning_rate": 1.7258755698418606e-05, "loss": 0.9823, "step": 4300 }, { "epoch": 0.7374670467453973, "grad_norm": 1.640625, "learning_rate": 1.7257513415637913e-05, "loss": 1.0207, "step": 4301 }, { "epoch": 0.7376385108343871, "grad_norm": 1.671875, "learning_rate": 1.7256270896162373e-05, "loss": 0.9931, "step": 4302 }, { "epoch": 0.737809974923377, "grad_norm": 1.5234375, "learning_rate": 1.7255028140032517e-05, "loss": 0.985, "step": 4303 }, { "epoch": 0.7379814390123669, "grad_norm": 1.625, "learning_rate": 1.7253785147288875e-05, "loss": 1.0894, "step": 4304 }, { "epoch": 0.7381529031013567, "grad_norm": 1.671875, "learning_rate": 1.7252541917971984e-05, "loss": 1.0508, "step": 4305 }, { "epoch": 0.7383243671903466, "grad_norm": 1.6484375, "learning_rate": 1.7251298452122385e-05, "loss": 1.0104, "step": 4306 }, { "epoch": 0.7384958312793364, "grad_norm": 1.625, "learning_rate": 1.7250054749780643e-05, "loss": 1.0856, "step": 4307 }, { "epoch": 0.7386672953683263, "grad_norm": 1.609375, "learning_rate": 1.7248810810987312e-05, "loss": 0.968, "step": 4308 }, { "epoch": 0.7388387594573161, "grad_norm": 1.59375, "learning_rate": 1.7247566635782965e-05, "loss": 1.0389, "step": 4309 }, { "epoch": 0.739010223546306, "grad_norm": 1.578125, "learning_rate": 1.724632222420818e-05, "loss": 0.9872, "step": 4310 }, { "epoch": 0.7391816876352959, "grad_norm": 1.59375, "learning_rate": 1.7245077576303537e-05, "loss": 1.0246, "step": 4311 }, { "epoch": 0.7393531517242857, "grad_norm": 1.7421875, "learning_rate": 1.7243832692109632e-05, "loss": 0.9628, "step": 4312 }, { "epoch": 0.7395246158132756, "grad_norm": 1.5390625, "learning_rate": 1.7242587571667067e-05, "loss": 1.0645, "step": 4313 }, { "epoch": 0.7396960799022655, "grad_norm": 1.53125, "learning_rate": 1.7241342215016446e-05, "loss": 0.9326, "step": 4314 }, { "epoch": 0.7398675439912553, "grad_norm": 1.65625, "learning_rate": 1.724009662219839e-05, "loss": 1.1156, "step": 4315 }, { "epoch": 0.7400390080802451, "grad_norm": 1.5390625, "learning_rate": 1.7238850793253516e-05, "loss": 0.974, "step": 4316 }, { "epoch": 0.7402104721692351, "grad_norm": 1.5234375, "learning_rate": 1.723760472822246e-05, "loss": 0.9032, "step": 4317 }, { "epoch": 0.7403819362582249, "grad_norm": 1.8671875, "learning_rate": 1.723635842714586e-05, "loss": 0.9871, "step": 4318 }, { "epoch": 0.7405534003472147, "grad_norm": 1.7734375, "learning_rate": 1.7235111890064363e-05, "loss": 1.0518, "step": 4319 }, { "epoch": 0.7407248644362047, "grad_norm": 1.6875, "learning_rate": 1.723386511701862e-05, "loss": 0.998, "step": 4320 }, { "epoch": 0.7408963285251945, "grad_norm": 3.078125, "learning_rate": 1.72326181080493e-05, "loss": 1.138, "step": 4321 }, { "epoch": 0.7410677926141843, "grad_norm": 2.921875, "learning_rate": 1.723137086319706e-05, "loss": 0.8734, "step": 4322 }, { "epoch": 0.7412392567031743, "grad_norm": 4.34375, "learning_rate": 1.7230123382502592e-05, "loss": 1.0155, "step": 4323 }, { "epoch": 0.7414107207921641, "grad_norm": 2.140625, "learning_rate": 1.7228875666006572e-05, "loss": 1.0432, "step": 4324 }, { "epoch": 0.7415821848811539, "grad_norm": 1.765625, "learning_rate": 1.72276277137497e-05, "loss": 1.0208, "step": 4325 }, { "epoch": 0.7417536489701438, "grad_norm": 1.6796875, "learning_rate": 1.7226379525772664e-05, "loss": 1.0444, "step": 4326 }, { "epoch": 0.7419251130591337, "grad_norm": 1.5625, "learning_rate": 1.7225131102116185e-05, "loss": 1.0232, "step": 4327 }, { "epoch": 0.7420965771481235, "grad_norm": 1.5546875, "learning_rate": 1.7223882442820968e-05, "loss": 1.0519, "step": 4328 }, { "epoch": 0.7422680412371134, "grad_norm": 1.671875, "learning_rate": 1.7222633547927745e-05, "loss": 1.0556, "step": 4329 }, { "epoch": 0.7424395053261033, "grad_norm": 1.6796875, "learning_rate": 1.7221384417477244e-05, "loss": 1.0376, "step": 4330 }, { "epoch": 0.7426109694150931, "grad_norm": 1.5859375, "learning_rate": 1.7220135051510202e-05, "loss": 1.0469, "step": 4331 }, { "epoch": 0.742782433504083, "grad_norm": 1.53125, "learning_rate": 1.721888545006737e-05, "loss": 0.9343, "step": 4332 }, { "epoch": 0.7429538975930728, "grad_norm": 2.390625, "learning_rate": 1.72176356131895e-05, "loss": 1.045, "step": 4333 }, { "epoch": 0.7431253616820627, "grad_norm": 1.828125, "learning_rate": 1.7216385540917353e-05, "loss": 1.0854, "step": 4334 }, { "epoch": 0.7432968257710526, "grad_norm": 1.765625, "learning_rate": 1.7215135233291698e-05, "loss": 1.0709, "step": 4335 }, { "epoch": 0.7434682898600424, "grad_norm": 1.625, "learning_rate": 1.7213884690353317e-05, "loss": 1.0443, "step": 4336 }, { "epoch": 0.7436397539490323, "grad_norm": 1.953125, "learning_rate": 1.7212633912142986e-05, "loss": 1.0119, "step": 4337 }, { "epoch": 0.7438112180380222, "grad_norm": 1.4921875, "learning_rate": 1.7211382898701506e-05, "loss": 0.9788, "step": 4338 }, { "epoch": 0.743982682127012, "grad_norm": 1.625, "learning_rate": 1.7210131650069674e-05, "loss": 0.9576, "step": 4339 }, { "epoch": 0.7441541462160018, "grad_norm": 1.546875, "learning_rate": 1.7208880166288294e-05, "loss": 0.9501, "step": 4340 }, { "epoch": 0.7443256103049918, "grad_norm": 1.671875, "learning_rate": 1.7207628447398192e-05, "loss": 1.085, "step": 4341 }, { "epoch": 0.7444970743939816, "grad_norm": 1.625, "learning_rate": 1.720637649344018e-05, "loss": 0.9839, "step": 4342 }, { "epoch": 0.7446685384829714, "grad_norm": 1.6875, "learning_rate": 1.7205124304455098e-05, "loss": 1.0275, "step": 4343 }, { "epoch": 0.7448400025719614, "grad_norm": 1.5859375, "learning_rate": 1.7203871880483776e-05, "loss": 0.9456, "step": 4344 }, { "epoch": 0.7450114666609512, "grad_norm": 1.515625, "learning_rate": 1.720261922156707e-05, "loss": 0.978, "step": 4345 }, { "epoch": 0.745182930749941, "grad_norm": 1.59375, "learning_rate": 1.7201366327745824e-05, "loss": 0.9333, "step": 4346 }, { "epoch": 0.745354394838931, "grad_norm": 1.515625, "learning_rate": 1.7200113199060905e-05, "loss": 1.0612, "step": 4347 }, { "epoch": 0.7455258589279208, "grad_norm": 1.5859375, "learning_rate": 1.7198859835553184e-05, "loss": 1.0532, "step": 4348 }, { "epoch": 0.7456973230169106, "grad_norm": 1.59375, "learning_rate": 1.7197606237263533e-05, "loss": 1.0471, "step": 4349 }, { "epoch": 0.7458687871059005, "grad_norm": 1.5078125, "learning_rate": 1.719635240423284e-05, "loss": 0.9832, "step": 4350 }, { "epoch": 0.7460402511948904, "grad_norm": 1.5546875, "learning_rate": 1.7195098336501997e-05, "loss": 0.962, "step": 4351 }, { "epoch": 0.7462117152838802, "grad_norm": 1.640625, "learning_rate": 1.71938440341119e-05, "loss": 1.0084, "step": 4352 }, { "epoch": 0.7463831793728701, "grad_norm": 1.546875, "learning_rate": 1.7192589497103462e-05, "loss": 1.0764, "step": 4353 }, { "epoch": 0.74655464346186, "grad_norm": 1.5703125, "learning_rate": 1.7191334725517593e-05, "loss": 0.95, "step": 4354 }, { "epoch": 0.7467261075508498, "grad_norm": 1.609375, "learning_rate": 1.7190079719395222e-05, "loss": 0.9804, "step": 4355 }, { "epoch": 0.7468975716398397, "grad_norm": 1.5078125, "learning_rate": 1.7188824478777275e-05, "loss": 0.9459, "step": 4356 }, { "epoch": 0.7470690357288295, "grad_norm": 1.4765625, "learning_rate": 1.7187569003704688e-05, "loss": 0.9771, "step": 4357 }, { "epoch": 0.7472404998178194, "grad_norm": 1.5703125, "learning_rate": 1.7186313294218416e-05, "loss": 0.9737, "step": 4358 }, { "epoch": 0.7474119639068093, "grad_norm": 1.6015625, "learning_rate": 1.71850573503594e-05, "loss": 1.0141, "step": 4359 }, { "epoch": 0.7475834279957991, "grad_norm": 1.515625, "learning_rate": 1.718380117216861e-05, "loss": 0.9638, "step": 4360 }, { "epoch": 0.747754892084789, "grad_norm": 1.5078125, "learning_rate": 1.7182544759687015e-05, "loss": 0.9517, "step": 4361 }, { "epoch": 0.7479263561737789, "grad_norm": 1.5546875, "learning_rate": 1.7181288112955586e-05, "loss": 1.0122, "step": 4362 }, { "epoch": 0.7480978202627687, "grad_norm": 1.4609375, "learning_rate": 1.7180031232015307e-05, "loss": 0.9521, "step": 4363 }, { "epoch": 0.7482692843517585, "grad_norm": 1.65625, "learning_rate": 1.7178774116907177e-05, "loss": 0.994, "step": 4364 }, { "epoch": 0.7484407484407485, "grad_norm": 1.5546875, "learning_rate": 1.7177516767672187e-05, "loss": 1.0372, "step": 4365 }, { "epoch": 0.7486122125297383, "grad_norm": 1.5078125, "learning_rate": 1.717625918435135e-05, "loss": 0.9872, "step": 4366 }, { "epoch": 0.7487836766187281, "grad_norm": 1.625, "learning_rate": 1.717500136698567e-05, "loss": 1.1055, "step": 4367 }, { "epoch": 0.7489551407077181, "grad_norm": 1.6640625, "learning_rate": 1.7173743315616184e-05, "loss": 1.1213, "step": 4368 }, { "epoch": 0.7491266047967079, "grad_norm": 1.6015625, "learning_rate": 1.7172485030283912e-05, "loss": 1.0286, "step": 4369 }, { "epoch": 0.7492980688856977, "grad_norm": 1.609375, "learning_rate": 1.7171226511029895e-05, "loss": 1.0856, "step": 4370 }, { "epoch": 0.7494695329746877, "grad_norm": 1.53125, "learning_rate": 1.716996775789518e-05, "loss": 1.0403, "step": 4371 }, { "epoch": 0.7496409970636775, "grad_norm": 1.609375, "learning_rate": 1.7168708770920815e-05, "loss": 1.0312, "step": 4372 }, { "epoch": 0.7498124611526673, "grad_norm": 1.484375, "learning_rate": 1.716744955014786e-05, "loss": 0.9761, "step": 4373 }, { "epoch": 0.7499839252416572, "grad_norm": 1.5625, "learning_rate": 1.7166190095617386e-05, "loss": 1.0441, "step": 4374 }, { "epoch": 0.7501553893306471, "grad_norm": 1.5390625, "learning_rate": 1.7164930407370465e-05, "loss": 0.9607, "step": 4375 }, { "epoch": 0.7503268534196369, "grad_norm": 1.6796875, "learning_rate": 1.716367048544819e-05, "loss": 1.0862, "step": 4376 }, { "epoch": 0.7504983175086268, "grad_norm": 1.53125, "learning_rate": 1.7162410329891636e-05, "loss": 1.0016, "step": 4377 }, { "epoch": 0.7506697815976167, "grad_norm": 1.5078125, "learning_rate": 1.7161149940741918e-05, "loss": 1.0165, "step": 4378 }, { "epoch": 0.7508412456866065, "grad_norm": 1.5390625, "learning_rate": 1.7159889318040128e-05, "loss": 1.0394, "step": 4379 }, { "epoch": 0.7510127097755964, "grad_norm": 1.5859375, "learning_rate": 1.7158628461827386e-05, "loss": 0.9978, "step": 4380 }, { "epoch": 0.7511841738645862, "grad_norm": 1.5, "learning_rate": 1.715736737214482e-05, "loss": 0.9515, "step": 4381 }, { "epoch": 0.7513556379535761, "grad_norm": 1.671875, "learning_rate": 1.7156106049033544e-05, "loss": 1.0019, "step": 4382 }, { "epoch": 0.751527102042566, "grad_norm": 1.7890625, "learning_rate": 1.7154844492534704e-05, "loss": 1.1323, "step": 4383 }, { "epoch": 0.7516985661315558, "grad_norm": 1.5546875, "learning_rate": 1.7153582702689445e-05, "loss": 1.0557, "step": 4384 }, { "epoch": 0.7518700302205457, "grad_norm": 1.5703125, "learning_rate": 1.7152320679538914e-05, "loss": 1.0072, "step": 4385 }, { "epoch": 0.7520414943095356, "grad_norm": 1.4453125, "learning_rate": 1.7151058423124275e-05, "loss": 0.889, "step": 4386 }, { "epoch": 0.7522129583985254, "grad_norm": 1.5703125, "learning_rate": 1.714979593348669e-05, "loss": 1.0677, "step": 4387 }, { "epoch": 0.7523844224875152, "grad_norm": 1.609375, "learning_rate": 1.7148533210667337e-05, "loss": 1.0883, "step": 4388 }, { "epoch": 0.7525558865765052, "grad_norm": 1.5, "learning_rate": 1.7147270254707394e-05, "loss": 0.9649, "step": 4389 }, { "epoch": 0.752727350665495, "grad_norm": 1.5859375, "learning_rate": 1.714600706564806e-05, "loss": 1.0362, "step": 4390 }, { "epoch": 0.7528988147544848, "grad_norm": 1.59375, "learning_rate": 1.714474364353052e-05, "loss": 1.0016, "step": 4391 }, { "epoch": 0.7530702788434748, "grad_norm": 1.53125, "learning_rate": 1.714347998839599e-05, "loss": 0.9474, "step": 4392 }, { "epoch": 0.7532417429324646, "grad_norm": 1.6171875, "learning_rate": 1.7142216100285672e-05, "loss": 1.0142, "step": 4393 }, { "epoch": 0.7534132070214544, "grad_norm": 1.6796875, "learning_rate": 1.7140951979240797e-05, "loss": 1.0681, "step": 4394 }, { "epoch": 0.7535846711104444, "grad_norm": 1.6328125, "learning_rate": 1.7139687625302587e-05, "loss": 1.041, "step": 4395 }, { "epoch": 0.7537561351994342, "grad_norm": 1.6015625, "learning_rate": 1.7138423038512275e-05, "loss": 0.9334, "step": 4396 }, { "epoch": 0.753927599288424, "grad_norm": 1.5390625, "learning_rate": 1.713715821891111e-05, "loss": 0.9708, "step": 4397 }, { "epoch": 0.754099063377414, "grad_norm": 1.6015625, "learning_rate": 1.713589316654034e-05, "loss": 0.9977, "step": 4398 }, { "epoch": 0.7542705274664038, "grad_norm": 1.53125, "learning_rate": 1.713462788144122e-05, "loss": 0.9526, "step": 4399 }, { "epoch": 0.7544419915553936, "grad_norm": 1.5390625, "learning_rate": 1.713336236365502e-05, "loss": 1.0819, "step": 4400 }, { "epoch": 0.7546134556443834, "grad_norm": 1.5078125, "learning_rate": 1.713209661322301e-05, "loss": 0.9711, "step": 4401 }, { "epoch": 0.7547849197333734, "grad_norm": 1.640625, "learning_rate": 1.7130830630186476e-05, "loss": 1.0617, "step": 4402 }, { "epoch": 0.7549563838223632, "grad_norm": 1.5546875, "learning_rate": 1.7129564414586698e-05, "loss": 1.0486, "step": 4403 }, { "epoch": 0.755127847911353, "grad_norm": 1.671875, "learning_rate": 1.712829796646498e-05, "loss": 1.0216, "step": 4404 }, { "epoch": 0.755299312000343, "grad_norm": 1.546875, "learning_rate": 1.712703128586262e-05, "loss": 0.9582, "step": 4405 }, { "epoch": 0.7554707760893328, "grad_norm": 1.625, "learning_rate": 1.712576437282093e-05, "loss": 1.029, "step": 4406 }, { "epoch": 0.7556422401783226, "grad_norm": 1.546875, "learning_rate": 1.7124497227381238e-05, "loss": 1.0258, "step": 4407 }, { "epoch": 0.7558137042673125, "grad_norm": 1.4921875, "learning_rate": 1.712322984958486e-05, "loss": 0.9825, "step": 4408 }, { "epoch": 0.7559851683563024, "grad_norm": 1.6875, "learning_rate": 1.7121962239473134e-05, "loss": 1.0222, "step": 4409 }, { "epoch": 0.7561566324452922, "grad_norm": 1.5703125, "learning_rate": 1.7120694397087396e-05, "loss": 0.9514, "step": 4410 }, { "epoch": 0.7563280965342821, "grad_norm": 1.546875, "learning_rate": 1.7119426322469002e-05, "loss": 0.9866, "step": 4411 }, { "epoch": 0.756499560623272, "grad_norm": 1.5703125, "learning_rate": 1.7118158015659308e-05, "loss": 0.9585, "step": 4412 }, { "epoch": 0.7566710247122618, "grad_norm": 1.5390625, "learning_rate": 1.7116889476699675e-05, "loss": 0.985, "step": 4413 }, { "epoch": 0.7568424888012517, "grad_norm": 1.5625, "learning_rate": 1.7115620705631477e-05, "loss": 0.9986, "step": 4414 }, { "epoch": 0.7570139528902415, "grad_norm": 1.5234375, "learning_rate": 1.711435170249609e-05, "loss": 0.9748, "step": 4415 }, { "epoch": 0.7571854169792314, "grad_norm": 1.5703125, "learning_rate": 1.711308246733491e-05, "loss": 0.9782, "step": 4416 }, { "epoch": 0.7573568810682213, "grad_norm": 1.625, "learning_rate": 1.7111813000189322e-05, "loss": 0.9702, "step": 4417 }, { "epoch": 0.7575283451572111, "grad_norm": 1.5, "learning_rate": 1.711054330110073e-05, "loss": 0.9542, "step": 4418 }, { "epoch": 0.757699809246201, "grad_norm": 1.7109375, "learning_rate": 1.7109273370110547e-05, "loss": 1.0391, "step": 4419 }, { "epoch": 0.7578712733351909, "grad_norm": 1.5859375, "learning_rate": 1.7108003207260188e-05, "loss": 0.9986, "step": 4420 }, { "epoch": 0.7580427374241807, "grad_norm": 1.6171875, "learning_rate": 1.7106732812591077e-05, "loss": 1.0372, "step": 4421 }, { "epoch": 0.7582142015131705, "grad_norm": 1.578125, "learning_rate": 1.7105462186144652e-05, "loss": 1.0716, "step": 4422 }, { "epoch": 0.7583856656021605, "grad_norm": 1.6328125, "learning_rate": 1.7104191327962345e-05, "loss": 1.0082, "step": 4423 }, { "epoch": 0.7585571296911503, "grad_norm": 1.703125, "learning_rate": 1.7102920238085606e-05, "loss": 0.9464, "step": 4424 }, { "epoch": 0.7587285937801401, "grad_norm": 1.6015625, "learning_rate": 1.710164891655589e-05, "loss": 1.0328, "step": 4425 }, { "epoch": 0.7589000578691301, "grad_norm": 1.59375, "learning_rate": 1.7100377363414665e-05, "loss": 0.9934, "step": 4426 }, { "epoch": 0.7590715219581199, "grad_norm": 1.5703125, "learning_rate": 1.7099105578703393e-05, "loss": 1.0205, "step": 4427 }, { "epoch": 0.7592429860471097, "grad_norm": 1.53125, "learning_rate": 1.7097833562463556e-05, "loss": 0.9942, "step": 4428 }, { "epoch": 0.7594144501360997, "grad_norm": 1.5546875, "learning_rate": 1.7096561314736638e-05, "loss": 0.9955, "step": 4429 }, { "epoch": 0.7595859142250895, "grad_norm": 1.5078125, "learning_rate": 1.709528883556413e-05, "loss": 0.936, "step": 4430 }, { "epoch": 0.7597573783140793, "grad_norm": 1.65625, "learning_rate": 1.709401612498754e-05, "loss": 0.9424, "step": 4431 }, { "epoch": 0.7599288424030692, "grad_norm": 1.6875, "learning_rate": 1.709274318304837e-05, "loss": 1.1093, "step": 4432 }, { "epoch": 0.7601003064920591, "grad_norm": 1.5546875, "learning_rate": 1.7091470009788135e-05, "loss": 0.9639, "step": 4433 }, { "epoch": 0.7602717705810489, "grad_norm": 1.5625, "learning_rate": 1.7090196605248358e-05, "loss": 0.974, "step": 4434 }, { "epoch": 0.7604432346700388, "grad_norm": 1.578125, "learning_rate": 1.708892296947057e-05, "loss": 1.0629, "step": 4435 }, { "epoch": 0.7606146987590287, "grad_norm": 1.609375, "learning_rate": 1.708764910249631e-05, "loss": 1.1132, "step": 4436 }, { "epoch": 0.7607861628480185, "grad_norm": 1.546875, "learning_rate": 1.7086375004367124e-05, "loss": 0.994, "step": 4437 }, { "epoch": 0.7609576269370084, "grad_norm": 1.640625, "learning_rate": 1.708510067512456e-05, "loss": 1.0587, "step": 4438 }, { "epoch": 0.7611290910259982, "grad_norm": 1.5078125, "learning_rate": 1.708382611481019e-05, "loss": 0.9997, "step": 4439 }, { "epoch": 0.7613005551149881, "grad_norm": 1.5703125, "learning_rate": 1.7082551323465573e-05, "loss": 1.0589, "step": 4440 }, { "epoch": 0.761472019203978, "grad_norm": 1.6171875, "learning_rate": 1.7081276301132284e-05, "loss": 1.0245, "step": 4441 }, { "epoch": 0.7616434832929678, "grad_norm": 1.484375, "learning_rate": 1.7080001047851912e-05, "loss": 0.9763, "step": 4442 }, { "epoch": 0.7618149473819577, "grad_norm": 1.5234375, "learning_rate": 1.7078725563666045e-05, "loss": 0.8622, "step": 4443 }, { "epoch": 0.7619864114709476, "grad_norm": 1.6484375, "learning_rate": 1.707744984861628e-05, "loss": 1.0999, "step": 4444 }, { "epoch": 0.7621578755599374, "grad_norm": 1.609375, "learning_rate": 1.7076173902744226e-05, "loss": 1.0719, "step": 4445 }, { "epoch": 0.7623293396489272, "grad_norm": 1.6171875, "learning_rate": 1.7074897726091492e-05, "loss": 1.0189, "step": 4446 }, { "epoch": 0.7625008037379172, "grad_norm": 1.5859375, "learning_rate": 1.7073621318699703e-05, "loss": 0.9854, "step": 4447 }, { "epoch": 0.762672267826907, "grad_norm": 1.5859375, "learning_rate": 1.7072344680610485e-05, "loss": 1.007, "step": 4448 }, { "epoch": 0.7628437319158968, "grad_norm": 1.6015625, "learning_rate": 1.7071067811865477e-05, "loss": 1.0101, "step": 4449 }, { "epoch": 0.7630151960048868, "grad_norm": 1.625, "learning_rate": 1.7069790712506317e-05, "loss": 0.9622, "step": 4450 }, { "epoch": 0.7631866600938766, "grad_norm": 1.546875, "learning_rate": 1.7068513382574665e-05, "loss": 0.9194, "step": 4451 }, { "epoch": 0.7633581241828664, "grad_norm": 1.546875, "learning_rate": 1.7067235822112168e-05, "loss": 1.0135, "step": 4452 }, { "epoch": 0.7635295882718564, "grad_norm": 1.671875, "learning_rate": 1.7065958031160503e-05, "loss": 0.9483, "step": 4453 }, { "epoch": 0.7637010523608462, "grad_norm": 1.6875, "learning_rate": 1.7064680009761338e-05, "loss": 1.0235, "step": 4454 }, { "epoch": 0.763872516449836, "grad_norm": 1.5859375, "learning_rate": 1.7063401757956353e-05, "loss": 1.05, "step": 4455 }, { "epoch": 0.7640439805388259, "grad_norm": 1.546875, "learning_rate": 1.7062123275787236e-05, "loss": 0.9476, "step": 4456 }, { "epoch": 0.7642154446278158, "grad_norm": 1.6171875, "learning_rate": 1.706084456329569e-05, "loss": 1.05, "step": 4457 }, { "epoch": 0.7643869087168056, "grad_norm": 1.5234375, "learning_rate": 1.7059565620523414e-05, "loss": 1.0445, "step": 4458 }, { "epoch": 0.7645583728057955, "grad_norm": 1.625, "learning_rate": 1.7058286447512115e-05, "loss": 1.0614, "step": 4459 }, { "epoch": 0.7647298368947854, "grad_norm": 1.484375, "learning_rate": 1.7057007044303518e-05, "loss": 1.0182, "step": 4460 }, { "epoch": 0.7649013009837752, "grad_norm": 1.578125, "learning_rate": 1.705572741093935e-05, "loss": 0.9285, "step": 4461 }, { "epoch": 0.7650727650727651, "grad_norm": 1.5625, "learning_rate": 1.7054447547461337e-05, "loss": 0.9481, "step": 4462 }, { "epoch": 0.7652442291617549, "grad_norm": 1.515625, "learning_rate": 1.705316745391123e-05, "loss": 1.0469, "step": 4463 }, { "epoch": 0.7654156932507448, "grad_norm": 1.5703125, "learning_rate": 1.7051887130330767e-05, "loss": 1.0112, "step": 4464 }, { "epoch": 0.7655871573397347, "grad_norm": 1.5234375, "learning_rate": 1.7050606576761714e-05, "loss": 0.9453, "step": 4465 }, { "epoch": 0.7657586214287245, "grad_norm": 1.5234375, "learning_rate": 1.704932579324583e-05, "loss": 0.9819, "step": 4466 }, { "epoch": 0.7659300855177144, "grad_norm": 1.5703125, "learning_rate": 1.7048044779824885e-05, "loss": 1.0449, "step": 4467 }, { "epoch": 0.7661015496067043, "grad_norm": 1.640625, "learning_rate": 1.7046763536540657e-05, "loss": 0.9743, "step": 4468 }, { "epoch": 0.7662730136956941, "grad_norm": 1.75, "learning_rate": 1.704548206343494e-05, "loss": 1.0509, "step": 4469 }, { "epoch": 0.7664444777846839, "grad_norm": 1.6171875, "learning_rate": 1.704420036054952e-05, "loss": 0.9419, "step": 4470 }, { "epoch": 0.7666159418736739, "grad_norm": 1.5625, "learning_rate": 1.70429184279262e-05, "loss": 0.9786, "step": 4471 }, { "epoch": 0.7667874059626637, "grad_norm": 1.4609375, "learning_rate": 1.7041636265606786e-05, "loss": 0.9696, "step": 4472 }, { "epoch": 0.7669588700516535, "grad_norm": 1.5625, "learning_rate": 1.7040353873633097e-05, "loss": 1.0663, "step": 4473 }, { "epoch": 0.7671303341406435, "grad_norm": 1.5390625, "learning_rate": 1.703907125204696e-05, "loss": 0.975, "step": 4474 }, { "epoch": 0.7673017982296333, "grad_norm": 1.5, "learning_rate": 1.7037788400890206e-05, "loss": 0.963, "step": 4475 }, { "epoch": 0.7674732623186231, "grad_norm": 1.65625, "learning_rate": 1.7036505320204664e-05, "loss": 1.0571, "step": 4476 }, { "epoch": 0.767644726407613, "grad_norm": 1.5703125, "learning_rate": 1.703522201003219e-05, "loss": 0.9805, "step": 4477 }, { "epoch": 0.7678161904966029, "grad_norm": 1.5703125, "learning_rate": 1.703393847041463e-05, "loss": 1.014, "step": 4478 }, { "epoch": 0.7679876545855927, "grad_norm": 1.546875, "learning_rate": 1.7032654701393853e-05, "loss": 1.036, "step": 4479 }, { "epoch": 0.7681591186745826, "grad_norm": 1.5859375, "learning_rate": 1.7031370703011724e-05, "loss": 1.0157, "step": 4480 }, { "epoch": 0.7683305827635725, "grad_norm": 1.5625, "learning_rate": 1.7030086475310116e-05, "loss": 0.9388, "step": 4481 }, { "epoch": 0.7685020468525623, "grad_norm": 1.578125, "learning_rate": 1.7028802018330915e-05, "loss": 0.9365, "step": 4482 }, { "epoch": 0.7686735109415522, "grad_norm": 1.6171875, "learning_rate": 1.7027517332116014e-05, "loss": 1.0533, "step": 4483 }, { "epoch": 0.768844975030542, "grad_norm": 1.4921875, "learning_rate": 1.7026232416707312e-05, "loss": 0.9399, "step": 4484 }, { "epoch": 0.7690164391195319, "grad_norm": 1.515625, "learning_rate": 1.7024947272146704e-05, "loss": 1.0067, "step": 4485 }, { "epoch": 0.7691879032085218, "grad_norm": 1.546875, "learning_rate": 1.702366189847612e-05, "loss": 0.9739, "step": 4486 }, { "epoch": 0.7693593672975116, "grad_norm": 1.578125, "learning_rate": 1.7022376295737473e-05, "loss": 1.0058, "step": 4487 }, { "epoch": 0.7695308313865015, "grad_norm": 1.6875, "learning_rate": 1.702109046397269e-05, "loss": 0.9879, "step": 4488 }, { "epoch": 0.7697022954754914, "grad_norm": 1.5546875, "learning_rate": 1.7019804403223702e-05, "loss": 0.9565, "step": 4489 }, { "epoch": 0.7698737595644812, "grad_norm": 1.625, "learning_rate": 1.7018518113532467e-05, "loss": 1.0858, "step": 4490 }, { "epoch": 0.7700452236534711, "grad_norm": 1.4921875, "learning_rate": 1.701723159494092e-05, "loss": 1.0049, "step": 4491 }, { "epoch": 0.770216687742461, "grad_norm": 1.578125, "learning_rate": 1.7015944847491027e-05, "loss": 1.0759, "step": 4492 }, { "epoch": 0.7703881518314508, "grad_norm": 1.5625, "learning_rate": 1.7014657871224758e-05, "loss": 0.9866, "step": 4493 }, { "epoch": 0.7705596159204406, "grad_norm": 1.515625, "learning_rate": 1.7013370666184078e-05, "loss": 0.9232, "step": 4494 }, { "epoch": 0.7707310800094306, "grad_norm": 1.5859375, "learning_rate": 1.701208323241097e-05, "loss": 0.9587, "step": 4495 }, { "epoch": 0.7709025440984204, "grad_norm": 1.53125, "learning_rate": 1.7010795569947426e-05, "loss": 0.9278, "step": 4496 }, { "epoch": 0.7710740081874102, "grad_norm": 1.5859375, "learning_rate": 1.7009507678835432e-05, "loss": 1.004, "step": 4497 }, { "epoch": 0.7712454722764001, "grad_norm": 1.65625, "learning_rate": 1.7008219559117002e-05, "loss": 1.0233, "step": 4498 }, { "epoch": 0.77141693636539, "grad_norm": 1.4296875, "learning_rate": 1.7006931210834144e-05, "loss": 0.929, "step": 4499 }, { "epoch": 0.7715884004543798, "grad_norm": 1.5859375, "learning_rate": 1.700564263402887e-05, "loss": 0.9735, "step": 4500 }, { "epoch": 0.7717598645433696, "grad_norm": 1.578125, "learning_rate": 1.7004353828743206e-05, "loss": 1.009, "step": 4501 }, { "epoch": 0.7719313286323596, "grad_norm": 1.5625, "learning_rate": 1.7003064795019193e-05, "loss": 1.008, "step": 4502 }, { "epoch": 0.7721027927213494, "grad_norm": 1.6015625, "learning_rate": 1.7001775532898865e-05, "loss": 0.9422, "step": 4503 }, { "epoch": 0.7722742568103392, "grad_norm": 1.6171875, "learning_rate": 1.7000486042424268e-05, "loss": 1.0274, "step": 4504 }, { "epoch": 0.7724457208993292, "grad_norm": 1.5859375, "learning_rate": 1.6999196323637463e-05, "loss": 0.9829, "step": 4505 }, { "epoch": 0.772617184988319, "grad_norm": 1.59375, "learning_rate": 1.6997906376580506e-05, "loss": 1.1202, "step": 4506 }, { "epoch": 0.7727886490773088, "grad_norm": 1.4765625, "learning_rate": 1.6996616201295472e-05, "loss": 0.9832, "step": 4507 }, { "epoch": 0.7729601131662988, "grad_norm": 1.546875, "learning_rate": 1.6995325797824434e-05, "loss": 0.9579, "step": 4508 }, { "epoch": 0.7731315772552886, "grad_norm": 1.59375, "learning_rate": 1.6994035166209483e-05, "loss": 1.0081, "step": 4509 }, { "epoch": 0.7733030413442784, "grad_norm": 1.4765625, "learning_rate": 1.699274430649271e-05, "loss": 0.9441, "step": 4510 }, { "epoch": 0.7734745054332683, "grad_norm": 1.609375, "learning_rate": 1.6991453218716212e-05, "loss": 0.9585, "step": 4511 }, { "epoch": 0.7736459695222582, "grad_norm": 1.5234375, "learning_rate": 1.6990161902922094e-05, "loss": 0.9768, "step": 4512 }, { "epoch": 0.773817433611248, "grad_norm": 1.6015625, "learning_rate": 1.6988870359152473e-05, "loss": 1.0389, "step": 4513 }, { "epoch": 0.7739888977002379, "grad_norm": 1.6640625, "learning_rate": 1.6987578587449478e-05, "loss": 0.9938, "step": 4514 }, { "epoch": 0.7741603617892278, "grad_norm": 1.515625, "learning_rate": 1.6986286587855226e-05, "loss": 0.9872, "step": 4515 }, { "epoch": 0.7743318258782176, "grad_norm": 1.546875, "learning_rate": 1.6984994360411866e-05, "loss": 0.9156, "step": 4516 }, { "epoch": 0.7745032899672075, "grad_norm": 1.5546875, "learning_rate": 1.6983701905161533e-05, "loss": 0.9448, "step": 4517 }, { "epoch": 0.7746747540561973, "grad_norm": 1.5, "learning_rate": 1.6982409222146384e-05, "loss": 0.9568, "step": 4518 }, { "epoch": 0.7748462181451872, "grad_norm": 1.5234375, "learning_rate": 1.698111631140858e-05, "loss": 1.0083, "step": 4519 }, { "epoch": 0.7750176822341771, "grad_norm": 1.578125, "learning_rate": 1.697982317299028e-05, "loss": 1.0531, "step": 4520 }, { "epoch": 0.7751891463231669, "grad_norm": 1.5703125, "learning_rate": 1.6978529806933665e-05, "loss": 0.9687, "step": 4521 }, { "epoch": 0.7753606104121568, "grad_norm": 1.46875, "learning_rate": 1.6977236213280913e-05, "loss": 0.9364, "step": 4522 }, { "epoch": 0.7755320745011467, "grad_norm": 1.609375, "learning_rate": 1.6975942392074217e-05, "loss": 1.0638, "step": 4523 }, { "epoch": 0.7757035385901365, "grad_norm": 1.5703125, "learning_rate": 1.6974648343355765e-05, "loss": 1.0329, "step": 4524 }, { "epoch": 0.7758750026791263, "grad_norm": 1.5703125, "learning_rate": 1.697335406716777e-05, "loss": 0.9509, "step": 4525 }, { "epoch": 0.7760464667681163, "grad_norm": 1.515625, "learning_rate": 1.697205956355244e-05, "loss": 0.9671, "step": 4526 }, { "epoch": 0.7762179308571061, "grad_norm": 1.65625, "learning_rate": 1.6970764832551996e-05, "loss": 1.0382, "step": 4527 }, { "epoch": 0.7763893949460959, "grad_norm": 1.5625, "learning_rate": 1.6969469874208657e-05, "loss": 0.992, "step": 4528 }, { "epoch": 0.7765608590350859, "grad_norm": 1.5078125, "learning_rate": 1.696817468856466e-05, "loss": 0.9358, "step": 4529 }, { "epoch": 0.7767323231240757, "grad_norm": 1.625, "learning_rate": 1.6966879275662252e-05, "loss": 1.0602, "step": 4530 }, { "epoch": 0.7769037872130655, "grad_norm": 1.65625, "learning_rate": 1.6965583635543673e-05, "loss": 1.0713, "step": 4531 }, { "epoch": 0.7770752513020555, "grad_norm": 1.5390625, "learning_rate": 1.696428776825118e-05, "loss": 1.0382, "step": 4532 }, { "epoch": 0.7772467153910453, "grad_norm": 1.6875, "learning_rate": 1.6962991673827038e-05, "loss": 1.0826, "step": 4533 }, { "epoch": 0.7774181794800351, "grad_norm": 1.6484375, "learning_rate": 1.696169535231352e-05, "loss": 1.0855, "step": 4534 }, { "epoch": 0.777589643569025, "grad_norm": 1.59375, "learning_rate": 1.69603988037529e-05, "loss": 0.9902, "step": 4535 }, { "epoch": 0.7777611076580149, "grad_norm": 1.609375, "learning_rate": 1.695910202818746e-05, "loss": 1.0447, "step": 4536 }, { "epoch": 0.7779325717470047, "grad_norm": 1.6484375, "learning_rate": 1.6957805025659504e-05, "loss": 1.1, "step": 4537 }, { "epoch": 0.7781040358359946, "grad_norm": 1.5390625, "learning_rate": 1.6956507796211325e-05, "loss": 0.9557, "step": 4538 }, { "epoch": 0.7782754999249845, "grad_norm": 1.53125, "learning_rate": 1.695521033988523e-05, "loss": 0.9928, "step": 4539 }, { "epoch": 0.7784469640139743, "grad_norm": 1.59375, "learning_rate": 1.695391265672353e-05, "loss": 0.9859, "step": 4540 }, { "epoch": 0.7786184281029642, "grad_norm": 1.5390625, "learning_rate": 1.6952614746768555e-05, "loss": 1.0547, "step": 4541 }, { "epoch": 0.778789892191954, "grad_norm": 1.625, "learning_rate": 1.6951316610062634e-05, "loss": 1.0959, "step": 4542 }, { "epoch": 0.7789613562809439, "grad_norm": 1.59375, "learning_rate": 1.6950018246648105e-05, "loss": 1.0866, "step": 4543 }, { "epoch": 0.7791328203699338, "grad_norm": 1.5546875, "learning_rate": 1.6948719656567304e-05, "loss": 0.9895, "step": 4544 }, { "epoch": 0.7793042844589236, "grad_norm": 1.59375, "learning_rate": 1.694742083986259e-05, "loss": 0.9399, "step": 4545 }, { "epoch": 0.7794757485479135, "grad_norm": 1.59375, "learning_rate": 1.6946121796576324e-05, "loss": 1.1096, "step": 4546 }, { "epoch": 0.7796472126369034, "grad_norm": 1.5078125, "learning_rate": 1.6944822526750865e-05, "loss": 0.9154, "step": 4547 }, { "epoch": 0.7798186767258932, "grad_norm": 1.5234375, "learning_rate": 1.6943523030428592e-05, "loss": 0.9304, "step": 4548 }, { "epoch": 0.779990140814883, "grad_norm": 1.671875, "learning_rate": 1.694222330765189e-05, "loss": 1.0091, "step": 4549 }, { "epoch": 0.780161604903873, "grad_norm": 1.5, "learning_rate": 1.694092335846314e-05, "loss": 1.0471, "step": 4550 }, { "epoch": 0.7803330689928628, "grad_norm": 1.6484375, "learning_rate": 1.6939623182904746e-05, "loss": 1.0572, "step": 4551 }, { "epoch": 0.7805045330818526, "grad_norm": 1.5078125, "learning_rate": 1.6938322781019106e-05, "loss": 0.8963, "step": 4552 }, { "epoch": 0.7806759971708426, "grad_norm": 1.65625, "learning_rate": 1.693702215284863e-05, "loss": 1.0282, "step": 4553 }, { "epoch": 0.7808474612598324, "grad_norm": 1.6484375, "learning_rate": 1.693572129843574e-05, "loss": 0.9697, "step": 4554 }, { "epoch": 0.7810189253488222, "grad_norm": 1.6796875, "learning_rate": 1.6934420217822864e-05, "loss": 1.0408, "step": 4555 }, { "epoch": 0.7811903894378122, "grad_norm": 1.515625, "learning_rate": 1.693311891105243e-05, "loss": 0.9593, "step": 4556 }, { "epoch": 0.781361853526802, "grad_norm": 1.5625, "learning_rate": 1.6931817378166885e-05, "loss": 0.9947, "step": 4557 }, { "epoch": 0.7815333176157918, "grad_norm": 1.734375, "learning_rate": 1.6930515619208665e-05, "loss": 1.0501, "step": 4558 }, { "epoch": 0.7817047817047817, "grad_norm": 1.6171875, "learning_rate": 1.6929213634220235e-05, "loss": 1.0618, "step": 4559 }, { "epoch": 0.7818762457937716, "grad_norm": 1.59375, "learning_rate": 1.692791142324406e-05, "loss": 1.0571, "step": 4560 }, { "epoch": 0.7820477098827614, "grad_norm": 1.5390625, "learning_rate": 1.69266089863226e-05, "loss": 0.9615, "step": 4561 }, { "epoch": 0.7822191739717513, "grad_norm": 1.6640625, "learning_rate": 1.692530632349834e-05, "loss": 1.072, "step": 4562 }, { "epoch": 0.7823906380607412, "grad_norm": 1.5546875, "learning_rate": 1.6924003434813763e-05, "loss": 0.9741, "step": 4563 }, { "epoch": 0.782562102149731, "grad_norm": 1.6015625, "learning_rate": 1.6922700320311357e-05, "loss": 1.0453, "step": 4564 }, { "epoch": 0.7827335662387209, "grad_norm": 1.5546875, "learning_rate": 1.692139698003363e-05, "loss": 1.0435, "step": 4565 }, { "epoch": 0.7829050303277107, "grad_norm": 1.4765625, "learning_rate": 1.6920093414023083e-05, "loss": 1.017, "step": 4566 }, { "epoch": 0.7830764944167006, "grad_norm": 1.578125, "learning_rate": 1.691878962232223e-05, "loss": 1.0075, "step": 4567 }, { "epoch": 0.7832479585056905, "grad_norm": 1.6171875, "learning_rate": 1.6917485604973595e-05, "loss": 0.9461, "step": 4568 }, { "epoch": 0.7834194225946803, "grad_norm": 1.640625, "learning_rate": 1.6916181362019704e-05, "loss": 1.0606, "step": 4569 }, { "epoch": 0.7835908866836702, "grad_norm": 1.578125, "learning_rate": 1.6914876893503093e-05, "loss": 1.0641, "step": 4570 }, { "epoch": 0.7837623507726601, "grad_norm": 1.5625, "learning_rate": 1.6913572199466312e-05, "loss": 0.9939, "step": 4571 }, { "epoch": 0.7839338148616499, "grad_norm": 1.5703125, "learning_rate": 1.6912267279951904e-05, "loss": 0.9261, "step": 4572 }, { "epoch": 0.7841052789506398, "grad_norm": 1.5546875, "learning_rate": 1.6910962135002433e-05, "loss": 0.9663, "step": 4573 }, { "epoch": 0.7842767430396297, "grad_norm": 1.5703125, "learning_rate": 1.690965676466046e-05, "loss": 1.0686, "step": 4574 }, { "epoch": 0.7844482071286195, "grad_norm": 1.53125, "learning_rate": 1.6908351168968563e-05, "loss": 1.02, "step": 4575 }, { "epoch": 0.7846196712176093, "grad_norm": 1.6484375, "learning_rate": 1.690704534796932e-05, "loss": 1.0676, "step": 4576 }, { "epoch": 0.7847911353065993, "grad_norm": 1.640625, "learning_rate": 1.6905739301705316e-05, "loss": 1.0048, "step": 4577 }, { "epoch": 0.7849625993955891, "grad_norm": 1.609375, "learning_rate": 1.690443303021915e-05, "loss": 1.0309, "step": 4578 }, { "epoch": 0.7851340634845789, "grad_norm": 1.46875, "learning_rate": 1.6903126533553425e-05, "loss": 1.0769, "step": 4579 }, { "epoch": 0.7853055275735689, "grad_norm": 1.5, "learning_rate": 1.690181981175075e-05, "loss": 0.9724, "step": 4580 }, { "epoch": 0.7854769916625587, "grad_norm": 1.453125, "learning_rate": 1.690051286485374e-05, "loss": 1.0019, "step": 4581 }, { "epoch": 0.7856484557515485, "grad_norm": 1.609375, "learning_rate": 1.6899205692905016e-05, "loss": 0.9549, "step": 4582 }, { "epoch": 0.7858199198405385, "grad_norm": 1.578125, "learning_rate": 1.6897898295947217e-05, "loss": 0.9591, "step": 4583 }, { "epoch": 0.7859913839295283, "grad_norm": 1.5234375, "learning_rate": 1.6896590674022977e-05, "loss": 0.9805, "step": 4584 }, { "epoch": 0.7861628480185181, "grad_norm": 1.546875, "learning_rate": 1.6895282827174952e-05, "loss": 0.9356, "step": 4585 }, { "epoch": 0.786334312107508, "grad_norm": 1.5546875, "learning_rate": 1.6893974755445785e-05, "loss": 1.0637, "step": 4586 }, { "epoch": 0.7865057761964979, "grad_norm": 1.515625, "learning_rate": 1.689266645887814e-05, "loss": 0.9466, "step": 4587 }, { "epoch": 0.7866772402854877, "grad_norm": 1.5859375, "learning_rate": 1.6891357937514685e-05, "loss": 0.9378, "step": 4588 }, { "epoch": 0.7868487043744776, "grad_norm": 1.5546875, "learning_rate": 1.6890049191398102e-05, "loss": 0.9389, "step": 4589 }, { "epoch": 0.7870201684634675, "grad_norm": 1.59375, "learning_rate": 1.688874022057107e-05, "loss": 1.0351, "step": 4590 }, { "epoch": 0.7871916325524573, "grad_norm": 1.6328125, "learning_rate": 1.688743102507627e-05, "loss": 1.0766, "step": 4591 }, { "epoch": 0.7873630966414471, "grad_norm": 1.609375, "learning_rate": 1.6886121604956415e-05, "loss": 1.1055, "step": 4592 }, { "epoch": 0.787534560730437, "grad_norm": 1.5390625, "learning_rate": 1.6884811960254203e-05, "loss": 1.0666, "step": 4593 }, { "epoch": 0.7877060248194269, "grad_norm": 1.6328125, "learning_rate": 1.6883502091012346e-05, "loss": 1.036, "step": 4594 }, { "epoch": 0.7878774889084167, "grad_norm": 1.5625, "learning_rate": 1.6882191997273567e-05, "loss": 1.0623, "step": 4595 }, { "epoch": 0.7880489529974066, "grad_norm": 1.578125, "learning_rate": 1.6880881679080592e-05, "loss": 0.9519, "step": 4596 }, { "epoch": 0.7882204170863965, "grad_norm": 1.5546875, "learning_rate": 1.687957113647615e-05, "loss": 1.1319, "step": 4597 }, { "epoch": 0.7883918811753863, "grad_norm": 1.5078125, "learning_rate": 1.6878260369502993e-05, "loss": 0.984, "step": 4598 }, { "epoch": 0.7885633452643762, "grad_norm": 1.5703125, "learning_rate": 1.6876949378203858e-05, "loss": 1.0901, "step": 4599 }, { "epoch": 0.788734809353366, "grad_norm": 1.5703125, "learning_rate": 1.6875638162621512e-05, "loss": 0.9269, "step": 4600 }, { "epoch": 0.7889062734423559, "grad_norm": 1.5234375, "learning_rate": 1.6874326722798713e-05, "loss": 1.0038, "step": 4601 }, { "epoch": 0.7890777375313458, "grad_norm": 1.6171875, "learning_rate": 1.6873015058778232e-05, "loss": 1.0358, "step": 4602 }, { "epoch": 0.7892492016203356, "grad_norm": 1.6484375, "learning_rate": 1.687170317060285e-05, "loss": 1.0957, "step": 4603 }, { "epoch": 0.7894206657093255, "grad_norm": 1.515625, "learning_rate": 1.687039105831535e-05, "loss": 0.9912, "step": 4604 }, { "epoch": 0.7895921297983154, "grad_norm": 1.453125, "learning_rate": 1.6869078721958528e-05, "loss": 0.982, "step": 4605 }, { "epoch": 0.7897635938873052, "grad_norm": 1.546875, "learning_rate": 1.686776616157518e-05, "loss": 0.9703, "step": 4606 }, { "epoch": 0.789935057976295, "grad_norm": 1.5234375, "learning_rate": 1.6866453377208115e-05, "loss": 1.0642, "step": 4607 }, { "epoch": 0.790106522065285, "grad_norm": 1.65625, "learning_rate": 1.6865140368900153e-05, "loss": 1.0153, "step": 4608 }, { "epoch": 0.7902779861542748, "grad_norm": 1.59375, "learning_rate": 1.686382713669411e-05, "loss": 0.9728, "step": 4609 }, { "epoch": 0.7904494502432646, "grad_norm": 1.578125, "learning_rate": 1.6862513680632815e-05, "loss": 0.9923, "step": 4610 }, { "epoch": 0.7906209143322546, "grad_norm": 1.53125, "learning_rate": 1.6861200000759108e-05, "loss": 1.0171, "step": 4611 }, { "epoch": 0.7907923784212444, "grad_norm": 1.6953125, "learning_rate": 1.6859886097115833e-05, "loss": 1.075, "step": 4612 }, { "epoch": 0.7909638425102342, "grad_norm": 1.671875, "learning_rate": 1.685857196974584e-05, "loss": 0.9838, "step": 4613 }, { "epoch": 0.7911353065992242, "grad_norm": 1.6328125, "learning_rate": 1.6857257618691992e-05, "loss": 1.0337, "step": 4614 }, { "epoch": 0.791306770688214, "grad_norm": 1.609375, "learning_rate": 1.6855943043997144e-05, "loss": 0.9971, "step": 4615 }, { "epoch": 0.7914782347772038, "grad_norm": 1.5703125, "learning_rate": 1.685462824570418e-05, "loss": 1.028, "step": 4616 }, { "epoch": 0.7916496988661937, "grad_norm": 1.6796875, "learning_rate": 1.6853313223855977e-05, "loss": 1.0616, "step": 4617 }, { "epoch": 0.7918211629551836, "grad_norm": 1.53125, "learning_rate": 1.685199797849542e-05, "loss": 1.0246, "step": 4618 }, { "epoch": 0.7919926270441734, "grad_norm": 1.578125, "learning_rate": 1.685068250966541e-05, "loss": 1.0009, "step": 4619 }, { "epoch": 0.7921640911331633, "grad_norm": 1.6015625, "learning_rate": 1.6849366817408846e-05, "loss": 1.0326, "step": 4620 }, { "epoch": 0.7923355552221532, "grad_norm": 1.5625, "learning_rate": 1.684805090176864e-05, "loss": 0.9854, "step": 4621 }, { "epoch": 0.792507019311143, "grad_norm": 1.4921875, "learning_rate": 1.68467347627877e-05, "loss": 0.9683, "step": 4622 }, { "epoch": 0.7926784834001329, "grad_norm": 1.515625, "learning_rate": 1.684541840050896e-05, "loss": 0.9217, "step": 4623 }, { "epoch": 0.7928499474891227, "grad_norm": 1.53125, "learning_rate": 1.6844101814975345e-05, "loss": 0.9986, "step": 4624 }, { "epoch": 0.7930214115781126, "grad_norm": 1.671875, "learning_rate": 1.6842785006229805e-05, "loss": 1.0345, "step": 4625 }, { "epoch": 0.7931928756671025, "grad_norm": 1.609375, "learning_rate": 1.6841467974315275e-05, "loss": 1.059, "step": 4626 }, { "epoch": 0.7933643397560923, "grad_norm": 1.6015625, "learning_rate": 1.6840150719274712e-05, "loss": 1.0887, "step": 4627 }, { "epoch": 0.7935358038450822, "grad_norm": 1.609375, "learning_rate": 1.6838833241151072e-05, "loss": 0.9706, "step": 4628 }, { "epoch": 0.7937072679340721, "grad_norm": 1.6328125, "learning_rate": 1.6837515539987333e-05, "loss": 1.0665, "step": 4629 }, { "epoch": 0.7938787320230619, "grad_norm": 1.546875, "learning_rate": 1.6836197615826463e-05, "loss": 0.9952, "step": 4630 }, { "epoch": 0.7940501961120517, "grad_norm": 1.5546875, "learning_rate": 1.6834879468711444e-05, "loss": 1.026, "step": 4631 }, { "epoch": 0.7942216602010417, "grad_norm": 1.59375, "learning_rate": 1.6833561098685272e-05, "loss": 1.0303, "step": 4632 }, { "epoch": 0.7943931242900315, "grad_norm": 1.5078125, "learning_rate": 1.6832242505790938e-05, "loss": 0.94, "step": 4633 }, { "epoch": 0.7945645883790213, "grad_norm": 1.6328125, "learning_rate": 1.6830923690071445e-05, "loss": 1.047, "step": 4634 }, { "epoch": 0.7947360524680113, "grad_norm": 1.6328125, "learning_rate": 1.6829604651569805e-05, "loss": 1.0085, "step": 4635 }, { "epoch": 0.7949075165570011, "grad_norm": 1.640625, "learning_rate": 1.6828285390329047e-05, "loss": 0.9228, "step": 4636 }, { "epoch": 0.7950789806459909, "grad_norm": 1.5546875, "learning_rate": 1.6826965906392187e-05, "loss": 1.0364, "step": 4637 }, { "epoch": 0.7952504447349809, "grad_norm": 1.5078125, "learning_rate": 1.682564619980226e-05, "loss": 0.965, "step": 4638 }, { "epoch": 0.7954219088239707, "grad_norm": 1.5546875, "learning_rate": 1.682432627060231e-05, "loss": 1.1089, "step": 4639 }, { "epoch": 0.7955933729129605, "grad_norm": 1.578125, "learning_rate": 1.682300611883538e-05, "loss": 0.9678, "step": 4640 }, { "epoch": 0.7957648370019504, "grad_norm": 1.59375, "learning_rate": 1.6821685744544526e-05, "loss": 0.9893, "step": 4641 }, { "epoch": 0.7959363010909403, "grad_norm": 1.6640625, "learning_rate": 1.6820365147772812e-05, "loss": 1.008, "step": 4642 }, { "epoch": 0.7961077651799301, "grad_norm": 1.6015625, "learning_rate": 1.681904432856331e-05, "loss": 1.0865, "step": 4643 }, { "epoch": 0.79627922926892, "grad_norm": 1.5703125, "learning_rate": 1.6817723286959092e-05, "loss": 1.0863, "step": 4644 }, { "epoch": 0.7964506933579099, "grad_norm": 1.5546875, "learning_rate": 1.6816402023003246e-05, "loss": 1.0579, "step": 4645 }, { "epoch": 0.7966221574468997, "grad_norm": 1.609375, "learning_rate": 1.6815080536738862e-05, "loss": 1.0737, "step": 4646 }, { "epoch": 0.7967936215358896, "grad_norm": 1.5078125, "learning_rate": 1.6813758828209036e-05, "loss": 1.0252, "step": 4647 }, { "epoch": 0.7969650856248794, "grad_norm": 1.59375, "learning_rate": 1.681243689745688e-05, "loss": 1.0197, "step": 4648 }, { "epoch": 0.7971365497138693, "grad_norm": 1.578125, "learning_rate": 1.6811114744525502e-05, "loss": 1.0342, "step": 4649 }, { "epoch": 0.7973080138028592, "grad_norm": 1.5546875, "learning_rate": 1.6809792369458022e-05, "loss": 0.9819, "step": 4650 }, { "epoch": 0.797479477891849, "grad_norm": 1.5859375, "learning_rate": 1.6808469772297572e-05, "loss": 1.0754, "step": 4651 }, { "epoch": 0.7976509419808389, "grad_norm": 1.515625, "learning_rate": 1.6807146953087282e-05, "loss": 0.925, "step": 4652 }, { "epoch": 0.7978224060698288, "grad_norm": 1.6015625, "learning_rate": 1.6805823911870298e-05, "loss": 1.0692, "step": 4653 }, { "epoch": 0.7979938701588186, "grad_norm": 1.609375, "learning_rate": 1.680450064868977e-05, "loss": 1.0828, "step": 4654 }, { "epoch": 0.7981653342478084, "grad_norm": 1.6015625, "learning_rate": 1.6803177163588848e-05, "loss": 1.0006, "step": 4655 }, { "epoch": 0.7983367983367984, "grad_norm": 1.5234375, "learning_rate": 1.6801853456610705e-05, "loss": 0.9446, "step": 4656 }, { "epoch": 0.7985082624257882, "grad_norm": 1.65625, "learning_rate": 1.68005295277985e-05, "loss": 0.9129, "step": 4657 }, { "epoch": 0.798679726514778, "grad_norm": 1.578125, "learning_rate": 1.6799205377195427e-05, "loss": 0.971, "step": 4658 }, { "epoch": 0.798851190603768, "grad_norm": 1.5546875, "learning_rate": 1.6797881004844658e-05, "loss": 0.9304, "step": 4659 }, { "epoch": 0.7990226546927578, "grad_norm": 1.5859375, "learning_rate": 1.6796556410789394e-05, "loss": 1.0222, "step": 4660 }, { "epoch": 0.7991941187817476, "grad_norm": 1.6640625, "learning_rate": 1.6795231595072832e-05, "loss": 0.969, "step": 4661 }, { "epoch": 0.7993655828707376, "grad_norm": 1.59375, "learning_rate": 1.6793906557738177e-05, "loss": 0.9851, "step": 4662 }, { "epoch": 0.7995370469597274, "grad_norm": 1.765625, "learning_rate": 1.6792581298828644e-05, "loss": 0.9575, "step": 4663 }, { "epoch": 0.7997085110487172, "grad_norm": 1.625, "learning_rate": 1.6791255818387462e-05, "loss": 0.9878, "step": 4664 }, { "epoch": 0.7998799751377071, "grad_norm": 1.5078125, "learning_rate": 1.678993011645785e-05, "loss": 0.9399, "step": 4665 }, { "epoch": 0.800051439226697, "grad_norm": 1.6640625, "learning_rate": 1.6788604193083052e-05, "loss": 0.9622, "step": 4666 }, { "epoch": 0.8002229033156868, "grad_norm": 1.5546875, "learning_rate": 1.6787278048306307e-05, "loss": 0.98, "step": 4667 }, { "epoch": 0.8003943674046767, "grad_norm": 1.640625, "learning_rate": 1.6785951682170863e-05, "loss": 1.1163, "step": 4668 }, { "epoch": 0.8005658314936666, "grad_norm": 1.546875, "learning_rate": 1.678462509471998e-05, "loss": 1.0644, "step": 4669 }, { "epoch": 0.8007372955826564, "grad_norm": 1.53125, "learning_rate": 1.6783298285996928e-05, "loss": 0.9675, "step": 4670 }, { "epoch": 0.8009087596716463, "grad_norm": 1.6640625, "learning_rate": 1.6781971256044975e-05, "loss": 0.9141, "step": 4671 }, { "epoch": 0.8010802237606361, "grad_norm": 1.53125, "learning_rate": 1.67806440049074e-05, "loss": 0.971, "step": 4672 }, { "epoch": 0.801251687849626, "grad_norm": 1.5625, "learning_rate": 1.677931653262749e-05, "loss": 0.9711, "step": 4673 }, { "epoch": 0.8014231519386159, "grad_norm": 1.5703125, "learning_rate": 1.677798883924854e-05, "loss": 0.9369, "step": 4674 }, { "epoch": 0.8015946160276057, "grad_norm": 1.5625, "learning_rate": 1.6776660924813854e-05, "loss": 0.9912, "step": 4675 }, { "epoch": 0.8017660801165956, "grad_norm": 1.6328125, "learning_rate": 1.677533278936673e-05, "loss": 1.1182, "step": 4676 }, { "epoch": 0.8019375442055855, "grad_norm": 1.6328125, "learning_rate": 1.6774004432950496e-05, "loss": 1.0211, "step": 4677 }, { "epoch": 0.8021090082945753, "grad_norm": 1.671875, "learning_rate": 1.6772675855608466e-05, "loss": 0.9907, "step": 4678 }, { "epoch": 0.8022804723835651, "grad_norm": 1.5625, "learning_rate": 1.677134705738397e-05, "loss": 0.9965, "step": 4679 }, { "epoch": 0.8024519364725551, "grad_norm": 1.6171875, "learning_rate": 1.6770018038320353e-05, "loss": 1.0059, "step": 4680 }, { "epoch": 0.8026234005615449, "grad_norm": 1.765625, "learning_rate": 1.676868879846095e-05, "loss": 1.092, "step": 4681 }, { "epoch": 0.8027948646505347, "grad_norm": 1.625, "learning_rate": 1.676735933784912e-05, "loss": 1.0292, "step": 4682 }, { "epoch": 0.8029663287395247, "grad_norm": 1.703125, "learning_rate": 1.6766029656528218e-05, "loss": 1.0185, "step": 4683 }, { "epoch": 0.8031377928285145, "grad_norm": 2.484375, "learning_rate": 1.6764699754541608e-05, "loss": 1.0001, "step": 4684 }, { "epoch": 0.8033092569175043, "grad_norm": 1.8125, "learning_rate": 1.6763369631932665e-05, "loss": 1.0169, "step": 4685 }, { "epoch": 0.8034807210064941, "grad_norm": 1.5859375, "learning_rate": 1.6762039288744777e-05, "loss": 0.9109, "step": 4686 }, { "epoch": 0.8036521850954841, "grad_norm": 3.171875, "learning_rate": 1.6760708725021316e-05, "loss": 0.9967, "step": 4687 }, { "epoch": 0.8038236491844739, "grad_norm": 1.5, "learning_rate": 1.675937794080569e-05, "loss": 0.963, "step": 4688 }, { "epoch": 0.8039951132734637, "grad_norm": 1.6015625, "learning_rate": 1.6758046936141295e-05, "loss": 1.0488, "step": 4689 }, { "epoch": 0.8041665773624537, "grad_norm": 1.5, "learning_rate": 1.675671571107154e-05, "loss": 0.9804, "step": 4690 }, { "epoch": 0.8043380414514435, "grad_norm": 1.671875, "learning_rate": 1.675538426563984e-05, "loss": 1.0073, "step": 4691 }, { "epoch": 0.8045095055404333, "grad_norm": 2.015625, "learning_rate": 1.6754052599889626e-05, "loss": 1.0464, "step": 4692 }, { "epoch": 0.8046809696294233, "grad_norm": 1.59375, "learning_rate": 1.675272071386432e-05, "loss": 1.0297, "step": 4693 }, { "epoch": 0.8048524337184131, "grad_norm": 1.5859375, "learning_rate": 1.6751388607607363e-05, "loss": 1.1082, "step": 4694 }, { "epoch": 0.8050238978074029, "grad_norm": 1.5859375, "learning_rate": 1.6750056281162203e-05, "loss": 0.9478, "step": 4695 }, { "epoch": 0.8051953618963928, "grad_norm": 1.671875, "learning_rate": 1.6748723734572285e-05, "loss": 1.0545, "step": 4696 }, { "epoch": 0.8053668259853827, "grad_norm": 1.5703125, "learning_rate": 1.6747390967881073e-05, "loss": 1.0008, "step": 4697 }, { "epoch": 0.8055382900743725, "grad_norm": 1.515625, "learning_rate": 1.6746057981132037e-05, "loss": 0.9503, "step": 4698 }, { "epoch": 0.8057097541633624, "grad_norm": 1.671875, "learning_rate": 1.6744724774368645e-05, "loss": 1.0142, "step": 4699 }, { "epoch": 0.8058812182523523, "grad_norm": 1.546875, "learning_rate": 1.674339134763438e-05, "loss": 0.9319, "step": 4700 }, { "epoch": 0.8060526823413421, "grad_norm": 1.5703125, "learning_rate": 1.674205770097273e-05, "loss": 1.079, "step": 4701 }, { "epoch": 0.806224146430332, "grad_norm": 2.1875, "learning_rate": 1.674072383442719e-05, "loss": 1.0402, "step": 4702 }, { "epoch": 0.8063956105193218, "grad_norm": 1.7578125, "learning_rate": 1.673938974804126e-05, "loss": 1.0822, "step": 4703 }, { "epoch": 0.8065670746083117, "grad_norm": 1.6328125, "learning_rate": 1.6738055441858456e-05, "loss": 1.075, "step": 4704 }, { "epoch": 0.8067385386973016, "grad_norm": 1.65625, "learning_rate": 1.673672091592229e-05, "loss": 1.077, "step": 4705 }, { "epoch": 0.8069100027862914, "grad_norm": 1.5078125, "learning_rate": 1.6735386170276283e-05, "loss": 1.0157, "step": 4706 }, { "epoch": 0.8070814668752813, "grad_norm": 1.6640625, "learning_rate": 1.6734051204963974e-05, "loss": 1.1408, "step": 4707 }, { "epoch": 0.8072529309642712, "grad_norm": 1.609375, "learning_rate": 1.6732716020028897e-05, "loss": 0.9205, "step": 4708 }, { "epoch": 0.807424395053261, "grad_norm": 1.5078125, "learning_rate": 1.6731380615514593e-05, "loss": 0.9743, "step": 4709 }, { "epoch": 0.8075958591422508, "grad_norm": 1.5703125, "learning_rate": 1.6730044991464626e-05, "loss": 1.0179, "step": 4710 }, { "epoch": 0.8077673232312408, "grad_norm": 1.59375, "learning_rate": 1.6728709147922543e-05, "loss": 0.9116, "step": 4711 }, { "epoch": 0.8079387873202306, "grad_norm": 1.6171875, "learning_rate": 1.672737308493192e-05, "loss": 0.9609, "step": 4712 }, { "epoch": 0.8081102514092204, "grad_norm": 1.5625, "learning_rate": 1.672603680253633e-05, "loss": 0.9789, "step": 4713 }, { "epoch": 0.8082817154982104, "grad_norm": 1.6640625, "learning_rate": 1.6724700300779346e-05, "loss": 1.0335, "step": 4714 }, { "epoch": 0.8084531795872002, "grad_norm": 1.625, "learning_rate": 1.6723363579704567e-05, "loss": 1.0538, "step": 4715 }, { "epoch": 0.80862464367619, "grad_norm": 1.640625, "learning_rate": 1.672202663935558e-05, "loss": 1.092, "step": 4716 }, { "epoch": 0.80879610776518, "grad_norm": 1.53125, "learning_rate": 1.6720689479775996e-05, "loss": 0.9346, "step": 4717 }, { "epoch": 0.8089675718541698, "grad_norm": 1.5546875, "learning_rate": 1.6719352101009417e-05, "loss": 0.9112, "step": 4718 }, { "epoch": 0.8091390359431596, "grad_norm": 1.484375, "learning_rate": 1.671801450309946e-05, "loss": 0.9348, "step": 4719 }, { "epoch": 0.8093105000321495, "grad_norm": 1.5, "learning_rate": 1.671667668608976e-05, "loss": 0.9716, "step": 4720 }, { "epoch": 0.8094819641211394, "grad_norm": 1.53125, "learning_rate": 1.6715338650023936e-05, "loss": 0.9984, "step": 4721 }, { "epoch": 0.8096534282101292, "grad_norm": 1.5703125, "learning_rate": 1.6714000394945632e-05, "loss": 0.9771, "step": 4722 }, { "epoch": 0.8098248922991191, "grad_norm": 1.5859375, "learning_rate": 1.6712661920898493e-05, "loss": 0.9918, "step": 4723 }, { "epoch": 0.809996356388109, "grad_norm": 1.4921875, "learning_rate": 1.671132322792617e-05, "loss": 0.9857, "step": 4724 }, { "epoch": 0.8101678204770988, "grad_norm": 1.4921875, "learning_rate": 1.6709984316072327e-05, "loss": 0.9775, "step": 4725 }, { "epoch": 0.8103392845660887, "grad_norm": 1.6015625, "learning_rate": 1.6708645185380622e-05, "loss": 1.0013, "step": 4726 }, { "epoch": 0.8105107486550786, "grad_norm": 1.5, "learning_rate": 1.670730583589474e-05, "loss": 0.9176, "step": 4727 }, { "epoch": 0.8106822127440684, "grad_norm": 1.6328125, "learning_rate": 1.6705966267658355e-05, "loss": 0.948, "step": 4728 }, { "epoch": 0.8108536768330583, "grad_norm": 1.5078125, "learning_rate": 1.6704626480715162e-05, "loss": 0.9791, "step": 4729 }, { "epoch": 0.8110251409220481, "grad_norm": 1.875, "learning_rate": 1.670328647510885e-05, "loss": 1.0124, "step": 4730 }, { "epoch": 0.811196605011038, "grad_norm": 1.6640625, "learning_rate": 1.670194625088312e-05, "loss": 1.1446, "step": 4731 }, { "epoch": 0.8113680691000279, "grad_norm": 1.546875, "learning_rate": 1.6700605808081688e-05, "loss": 1.0095, "step": 4732 }, { "epoch": 0.8115395331890177, "grad_norm": 1.546875, "learning_rate": 1.669926514674827e-05, "loss": 0.9893, "step": 4733 }, { "epoch": 0.8117109972780076, "grad_norm": 1.484375, "learning_rate": 1.6697924266926588e-05, "loss": 1.0897, "step": 4734 }, { "epoch": 0.8118824613669975, "grad_norm": 1.6171875, "learning_rate": 1.6696583168660373e-05, "loss": 0.9558, "step": 4735 }, { "epoch": 0.8120539254559873, "grad_norm": 1.515625, "learning_rate": 1.6695241851993363e-05, "loss": 0.952, "step": 4736 }, { "epoch": 0.8122253895449771, "grad_norm": 1.9765625, "learning_rate": 1.6693900316969303e-05, "loss": 1.0141, "step": 4737 }, { "epoch": 0.8123968536339671, "grad_norm": 1.5, "learning_rate": 1.669255856363195e-05, "loss": 0.9915, "step": 4738 }, { "epoch": 0.8125683177229569, "grad_norm": 1.8828125, "learning_rate": 1.669121659202506e-05, "loss": 1.0528, "step": 4739 }, { "epoch": 0.8127397818119467, "grad_norm": 1.546875, "learning_rate": 1.66898744021924e-05, "loss": 1.0535, "step": 4740 }, { "epoch": 0.8129112459009367, "grad_norm": 1.5859375, "learning_rate": 1.6688531994177745e-05, "loss": 0.9725, "step": 4741 }, { "epoch": 0.8130827099899265, "grad_norm": 1.578125, "learning_rate": 1.6687189368024874e-05, "loss": 1.026, "step": 4742 }, { "epoch": 0.8132541740789163, "grad_norm": 1.609375, "learning_rate": 1.6685846523777573e-05, "loss": 1.0988, "step": 4743 }, { "epoch": 0.8134256381679063, "grad_norm": 1.4765625, "learning_rate": 1.668450346147964e-05, "loss": 0.8971, "step": 4744 }, { "epoch": 0.8135971022568961, "grad_norm": 1.6328125, "learning_rate": 1.6683160181174886e-05, "loss": 1.0287, "step": 4745 }, { "epoch": 0.8137685663458859, "grad_norm": 1.53125, "learning_rate": 1.6681816682907104e-05, "loss": 1.0021, "step": 4746 }, { "epoch": 0.8139400304348758, "grad_norm": 1.515625, "learning_rate": 1.668047296672012e-05, "loss": 0.9952, "step": 4747 }, { "epoch": 0.8141114945238657, "grad_norm": 1.59375, "learning_rate": 1.667912903265776e-05, "loss": 0.9552, "step": 4748 }, { "epoch": 0.8142829586128555, "grad_norm": 1.6171875, "learning_rate": 1.6677784880763847e-05, "loss": 1.0447, "step": 4749 }, { "epoch": 0.8144544227018454, "grad_norm": 1.5546875, "learning_rate": 1.6676440511082223e-05, "loss": 1.0072, "step": 4750 }, { "epoch": 0.8146258867908353, "grad_norm": 1.6328125, "learning_rate": 1.6675095923656736e-05, "loss": 1.0373, "step": 4751 }, { "epoch": 0.8147973508798251, "grad_norm": 1.5390625, "learning_rate": 1.667375111853123e-05, "loss": 1.0665, "step": 4752 }, { "epoch": 0.814968814968815, "grad_norm": 1.6171875, "learning_rate": 1.6672406095749573e-05, "loss": 0.9888, "step": 4753 }, { "epoch": 0.8151402790578048, "grad_norm": 1.5546875, "learning_rate": 1.6671060855355626e-05, "loss": 1.0541, "step": 4754 }, { "epoch": 0.8153117431467947, "grad_norm": 1.5703125, "learning_rate": 1.666971539739326e-05, "loss": 1.0444, "step": 4755 }, { "epoch": 0.8154832072357846, "grad_norm": 1.6171875, "learning_rate": 1.6668369721906367e-05, "loss": 1.0401, "step": 4756 }, { "epoch": 0.8156546713247744, "grad_norm": 1.578125, "learning_rate": 1.666702382893882e-05, "loss": 1.0007, "step": 4757 }, { "epoch": 0.8158261354137643, "grad_norm": 1.5703125, "learning_rate": 1.6665677718534525e-05, "loss": 1.074, "step": 4758 }, { "epoch": 0.8159975995027542, "grad_norm": 1.4609375, "learning_rate": 1.6664331390737373e-05, "loss": 0.9096, "step": 4759 }, { "epoch": 0.816169063591744, "grad_norm": 1.5625, "learning_rate": 1.6662984845591283e-05, "loss": 1.0082, "step": 4760 }, { "epoch": 0.8163405276807338, "grad_norm": 1.515625, "learning_rate": 1.6661638083140167e-05, "loss": 0.9651, "step": 4761 }, { "epoch": 0.8165119917697238, "grad_norm": 1.5234375, "learning_rate": 1.6660291103427946e-05, "loss": 0.9039, "step": 4762 }, { "epoch": 0.8166834558587136, "grad_norm": 1.6015625, "learning_rate": 1.665894390649855e-05, "loss": 0.9503, "step": 4763 }, { "epoch": 0.8168549199477034, "grad_norm": 1.5234375, "learning_rate": 1.6657596492395926e-05, "loss": 0.9711, "step": 4764 }, { "epoch": 0.8170263840366934, "grad_norm": 1.5859375, "learning_rate": 1.6656248861164e-05, "loss": 1.0314, "step": 4765 }, { "epoch": 0.8171978481256832, "grad_norm": 1.53125, "learning_rate": 1.6654901012846737e-05, "loss": 0.948, "step": 4766 }, { "epoch": 0.817369312214673, "grad_norm": 1.515625, "learning_rate": 1.6653552947488095e-05, "loss": 0.9148, "step": 4767 }, { "epoch": 0.817540776303663, "grad_norm": 1.671875, "learning_rate": 1.6652204665132036e-05, "loss": 1.0365, "step": 4768 }, { "epoch": 0.8177122403926528, "grad_norm": 1.4921875, "learning_rate": 1.665085616582253e-05, "loss": 0.9708, "step": 4769 }, { "epoch": 0.8178837044816426, "grad_norm": 1.484375, "learning_rate": 1.664950744960356e-05, "loss": 1.0439, "step": 4770 }, { "epoch": 0.8180551685706325, "grad_norm": 1.546875, "learning_rate": 1.6648158516519115e-05, "loss": 0.9592, "step": 4771 }, { "epoch": 0.8182266326596224, "grad_norm": 1.6328125, "learning_rate": 1.6646809366613186e-05, "loss": 1.0561, "step": 4772 }, { "epoch": 0.8183980967486122, "grad_norm": 1.59375, "learning_rate": 1.664545999992977e-05, "loss": 1.0024, "step": 4773 }, { "epoch": 0.8185695608376021, "grad_norm": 1.5546875, "learning_rate": 1.6644110416512883e-05, "loss": 1.0269, "step": 4774 }, { "epoch": 0.818741024926592, "grad_norm": 1.640625, "learning_rate": 1.6642760616406536e-05, "loss": 1.0261, "step": 4775 }, { "epoch": 0.8189124890155818, "grad_norm": 1.5078125, "learning_rate": 1.664141059965475e-05, "loss": 1.0186, "step": 4776 }, { "epoch": 0.8190839531045717, "grad_norm": 1.59375, "learning_rate": 1.664006036630155e-05, "loss": 0.9797, "step": 4777 }, { "epoch": 0.8192554171935615, "grad_norm": 1.5078125, "learning_rate": 1.663870991639098e-05, "loss": 0.9059, "step": 4778 }, { "epoch": 0.8194268812825514, "grad_norm": 1.5625, "learning_rate": 1.6637359249967085e-05, "loss": 0.9974, "step": 4779 }, { "epoch": 0.8195983453715412, "grad_norm": 1.546875, "learning_rate": 1.6636008367073908e-05, "loss": 0.9284, "step": 4780 }, { "epoch": 0.8197698094605311, "grad_norm": 1.546875, "learning_rate": 1.6634657267755508e-05, "loss": 0.9575, "step": 4781 }, { "epoch": 0.819941273549521, "grad_norm": 1.53125, "learning_rate": 1.6633305952055955e-05, "loss": 1.0132, "step": 4782 }, { "epoch": 0.8201127376385108, "grad_norm": 1.625, "learning_rate": 1.6631954420019312e-05, "loss": 1.03, "step": 4783 }, { "epoch": 0.8202842017275007, "grad_norm": 1.578125, "learning_rate": 1.6630602671689665e-05, "loss": 0.9498, "step": 4784 }, { "epoch": 0.8204556658164905, "grad_norm": 1.5703125, "learning_rate": 1.662925070711109e-05, "loss": 0.9973, "step": 4785 }, { "epoch": 0.8206271299054804, "grad_norm": 1.5234375, "learning_rate": 1.662789852632769e-05, "loss": 1.1094, "step": 4786 }, { "epoch": 0.8207985939944703, "grad_norm": 1.546875, "learning_rate": 1.6626546129383564e-05, "loss": 0.9227, "step": 4787 }, { "epoch": 0.8209700580834601, "grad_norm": 1.625, "learning_rate": 1.6625193516322813e-05, "loss": 1.1013, "step": 4788 }, { "epoch": 0.82114152217245, "grad_norm": 1.53125, "learning_rate": 1.6623840687189554e-05, "loss": 0.9486, "step": 4789 }, { "epoch": 0.8213129862614399, "grad_norm": 1.5390625, "learning_rate": 1.6622487642027904e-05, "loss": 0.9604, "step": 4790 }, { "epoch": 0.8214844503504297, "grad_norm": 1.4765625, "learning_rate": 1.6621134380881997e-05, "loss": 0.955, "step": 4791 }, { "epoch": 0.8216559144394195, "grad_norm": 1.625, "learning_rate": 1.6619780903795964e-05, "loss": 1.0258, "step": 4792 }, { "epoch": 0.8218273785284095, "grad_norm": 1.6015625, "learning_rate": 1.6618427210813948e-05, "loss": 1.0394, "step": 4793 }, { "epoch": 0.8219988426173993, "grad_norm": 1.5390625, "learning_rate": 1.6617073301980097e-05, "loss": 0.9275, "step": 4794 }, { "epoch": 0.8221703067063891, "grad_norm": 1.5234375, "learning_rate": 1.6615719177338568e-05, "loss": 1.0212, "step": 4795 }, { "epoch": 0.8223417707953791, "grad_norm": 1.4375, "learning_rate": 1.6614364836933526e-05, "loss": 0.9618, "step": 4796 }, { "epoch": 0.8225132348843689, "grad_norm": 1.4765625, "learning_rate": 1.6613010280809138e-05, "loss": 1.0033, "step": 4797 }, { "epoch": 0.8226846989733587, "grad_norm": 1.5234375, "learning_rate": 1.661165550900958e-05, "loss": 1.0089, "step": 4798 }, { "epoch": 0.8228561630623487, "grad_norm": 1.515625, "learning_rate": 1.6610300521579046e-05, "loss": 1.0124, "step": 4799 }, { "epoch": 0.8230276271513385, "grad_norm": 1.6171875, "learning_rate": 1.6608945318561715e-05, "loss": 0.9778, "step": 4800 }, { "epoch": 0.8231990912403283, "grad_norm": 1.6015625, "learning_rate": 1.6607589900001787e-05, "loss": 0.9307, "step": 4801 }, { "epoch": 0.8233705553293182, "grad_norm": 1.6484375, "learning_rate": 1.6606234265943477e-05, "loss": 0.9979, "step": 4802 }, { "epoch": 0.8235420194183081, "grad_norm": 1.609375, "learning_rate": 1.6604878416430986e-05, "loss": 1.032, "step": 4803 }, { "epoch": 0.8237134835072979, "grad_norm": 1.640625, "learning_rate": 1.660352235150854e-05, "loss": 0.9481, "step": 4804 }, { "epoch": 0.8238849475962878, "grad_norm": 1.9921875, "learning_rate": 1.6602166071220365e-05, "loss": 0.9735, "step": 4805 }, { "epoch": 0.8240564116852777, "grad_norm": 1.5625, "learning_rate": 1.660080957561069e-05, "loss": 1.0189, "step": 4806 }, { "epoch": 0.8242278757742675, "grad_norm": 1.75, "learning_rate": 1.659945286472376e-05, "loss": 0.9874, "step": 4807 }, { "epoch": 0.8243993398632574, "grad_norm": 1.7109375, "learning_rate": 1.6598095938603818e-05, "loss": 1.0096, "step": 4808 }, { "epoch": 0.8245708039522472, "grad_norm": 1.6015625, "learning_rate": 1.6596738797295126e-05, "loss": 0.9903, "step": 4809 }, { "epoch": 0.8247422680412371, "grad_norm": 1.5390625, "learning_rate": 1.659538144084194e-05, "loss": 1.0101, "step": 4810 }, { "epoch": 0.824913732130227, "grad_norm": 1.65625, "learning_rate": 1.6594023869288528e-05, "loss": 0.928, "step": 4811 }, { "epoch": 0.8250851962192168, "grad_norm": 1.6015625, "learning_rate": 1.6592666082679165e-05, "loss": 0.9943, "step": 4812 }, { "epoch": 0.8252566603082067, "grad_norm": 1.671875, "learning_rate": 1.6591308081058136e-05, "loss": 1.0472, "step": 4813 }, { "epoch": 0.8254281243971966, "grad_norm": 1.609375, "learning_rate": 1.6589949864469733e-05, "loss": 0.9844, "step": 4814 }, { "epoch": 0.8255995884861864, "grad_norm": 1.6015625, "learning_rate": 1.6588591432958246e-05, "loss": 1.0661, "step": 4815 }, { "epoch": 0.8257710525751762, "grad_norm": 1.5546875, "learning_rate": 1.6587232786567985e-05, "loss": 1.0042, "step": 4816 }, { "epoch": 0.8259425166641662, "grad_norm": 1.5703125, "learning_rate": 1.6585873925343257e-05, "loss": 1.0843, "step": 4817 }, { "epoch": 0.826113980753156, "grad_norm": 1.5625, "learning_rate": 1.6584514849328378e-05, "loss": 1.0532, "step": 4818 }, { "epoch": 0.8262854448421458, "grad_norm": 2.015625, "learning_rate": 1.658315555856768e-05, "loss": 0.8471, "step": 4819 }, { "epoch": 0.8264569089311358, "grad_norm": 1.5859375, "learning_rate": 1.6581796053105483e-05, "loss": 0.9945, "step": 4820 }, { "epoch": 0.8266283730201256, "grad_norm": 1.578125, "learning_rate": 1.6580436332986134e-05, "loss": 0.9524, "step": 4821 }, { "epoch": 0.8267998371091154, "grad_norm": 1.640625, "learning_rate": 1.657907639825398e-05, "loss": 1.1008, "step": 4822 }, { "epoch": 0.8269713011981054, "grad_norm": 1.640625, "learning_rate": 1.6577716248953364e-05, "loss": 1.1223, "step": 4823 }, { "epoch": 0.8271427652870952, "grad_norm": 1.6328125, "learning_rate": 1.6576355885128657e-05, "loss": 1.0223, "step": 4824 }, { "epoch": 0.827314229376085, "grad_norm": 1.484375, "learning_rate": 1.6574995306824217e-05, "loss": 0.9231, "step": 4825 }, { "epoch": 0.827485693465075, "grad_norm": 1.5390625, "learning_rate": 1.657363451408442e-05, "loss": 1.0, "step": 4826 }, { "epoch": 0.8276571575540648, "grad_norm": 1.59375, "learning_rate": 1.657227350695365e-05, "loss": 1.0248, "step": 4827 }, { "epoch": 0.8278286216430546, "grad_norm": 1.640625, "learning_rate": 1.6570912285476294e-05, "loss": 0.9989, "step": 4828 }, { "epoch": 0.8280000857320445, "grad_norm": 1.515625, "learning_rate": 1.6569550849696742e-05, "loss": 1.0243, "step": 4829 }, { "epoch": 0.8281715498210344, "grad_norm": 1.453125, "learning_rate": 1.65681891996594e-05, "loss": 0.9558, "step": 4830 }, { "epoch": 0.8283430139100242, "grad_norm": 1.53125, "learning_rate": 1.656682733540867e-05, "loss": 1.0354, "step": 4831 }, { "epoch": 0.8285144779990141, "grad_norm": 1.5546875, "learning_rate": 1.6565465256988976e-05, "loss": 1.0249, "step": 4832 }, { "epoch": 0.828685942088004, "grad_norm": 1.484375, "learning_rate": 1.6564102964444736e-05, "loss": 0.8995, "step": 4833 }, { "epoch": 0.8288574061769938, "grad_norm": 1.5625, "learning_rate": 1.6562740457820382e-05, "loss": 0.9886, "step": 4834 }, { "epoch": 0.8290288702659837, "grad_norm": 1.59375, "learning_rate": 1.6561377737160347e-05, "loss": 1.084, "step": 4835 }, { "epoch": 0.8292003343549735, "grad_norm": 1.6484375, "learning_rate": 1.6560014802509076e-05, "loss": 0.9952, "step": 4836 }, { "epoch": 0.8293717984439634, "grad_norm": 1.5625, "learning_rate": 1.6558651653911018e-05, "loss": 0.981, "step": 4837 }, { "epoch": 0.8295432625329533, "grad_norm": 1.6484375, "learning_rate": 1.6557288291410632e-05, "loss": 1.0717, "step": 4838 }, { "epoch": 0.8297147266219431, "grad_norm": 1.546875, "learning_rate": 1.6555924715052387e-05, "loss": 1.0198, "step": 4839 }, { "epoch": 0.829886190710933, "grad_norm": 1.6015625, "learning_rate": 1.6554560924880747e-05, "loss": 1.0254, "step": 4840 }, { "epoch": 0.8300576547999229, "grad_norm": 1.734375, "learning_rate": 1.6553196920940192e-05, "loss": 1.043, "step": 4841 }, { "epoch": 0.8302291188889127, "grad_norm": 1.6875, "learning_rate": 1.655183270327521e-05, "loss": 1.0715, "step": 4842 }, { "epoch": 0.8304005829779025, "grad_norm": 1.6015625, "learning_rate": 1.6550468271930288e-05, "loss": 1.0748, "step": 4843 }, { "epoch": 0.8305720470668925, "grad_norm": 1.6015625, "learning_rate": 1.6549103626949934e-05, "loss": 0.9705, "step": 4844 }, { "epoch": 0.8307435111558823, "grad_norm": 1.5546875, "learning_rate": 1.6547738768378648e-05, "loss": 0.9233, "step": 4845 }, { "epoch": 0.8309149752448721, "grad_norm": 1.5234375, "learning_rate": 1.654637369626094e-05, "loss": 1.0039, "step": 4846 }, { "epoch": 0.8310864393338621, "grad_norm": 1.6796875, "learning_rate": 1.654500841064134e-05, "loss": 1.0685, "step": 4847 }, { "epoch": 0.8312579034228519, "grad_norm": 1.5703125, "learning_rate": 1.6543642911564366e-05, "loss": 0.9914, "step": 4848 }, { "epoch": 0.8314293675118417, "grad_norm": 1.5625, "learning_rate": 1.654227719907456e-05, "loss": 1.0263, "step": 4849 }, { "epoch": 0.8316008316008316, "grad_norm": 1.640625, "learning_rate": 1.654091127321645e-05, "loss": 0.9904, "step": 4850 }, { "epoch": 0.8317722956898215, "grad_norm": 1.6171875, "learning_rate": 1.65395451340346e-05, "loss": 0.9814, "step": 4851 }, { "epoch": 0.8319437597788113, "grad_norm": 1.546875, "learning_rate": 1.6538178781573553e-05, "loss": 1.0059, "step": 4852 }, { "epoch": 0.8321152238678012, "grad_norm": 1.5546875, "learning_rate": 1.6536812215877877e-05, "loss": 1.0505, "step": 4853 }, { "epoch": 0.8322866879567911, "grad_norm": 1.5390625, "learning_rate": 1.6535445436992142e-05, "loss": 0.9706, "step": 4854 }, { "epoch": 0.8324581520457809, "grad_norm": 1.53125, "learning_rate": 1.653407844496092e-05, "loss": 0.9743, "step": 4855 }, { "epoch": 0.8326296161347708, "grad_norm": 1.53125, "learning_rate": 1.6532711239828792e-05, "loss": 1.0592, "step": 4856 }, { "epoch": 0.8328010802237606, "grad_norm": 1.671875, "learning_rate": 1.6531343821640353e-05, "loss": 1.1352, "step": 4857 }, { "epoch": 0.8329725443127505, "grad_norm": 1.6171875, "learning_rate": 1.6529976190440197e-05, "loss": 1.0237, "step": 4858 }, { "epoch": 0.8331440084017404, "grad_norm": 1.625, "learning_rate": 1.652860834627293e-05, "loss": 1.0109, "step": 4859 }, { "epoch": 0.8333154724907302, "grad_norm": 1.5859375, "learning_rate": 1.6527240289183156e-05, "loss": 0.9852, "step": 4860 }, { "epoch": 0.8334869365797201, "grad_norm": 1.5625, "learning_rate": 1.6525872019215495e-05, "loss": 1.0334, "step": 4861 }, { "epoch": 0.83365840066871, "grad_norm": 1.515625, "learning_rate": 1.652450353641458e-05, "loss": 1.0522, "step": 4862 }, { "epoch": 0.8338298647576998, "grad_norm": 1.7578125, "learning_rate": 1.652313484082503e-05, "loss": 1.0455, "step": 4863 }, { "epoch": 0.8340013288466896, "grad_norm": 1.6328125, "learning_rate": 1.6521765932491493e-05, "loss": 1.0399, "step": 4864 }, { "epoch": 0.8341727929356796, "grad_norm": 1.5859375, "learning_rate": 1.652039681145861e-05, "loss": 0.9762, "step": 4865 }, { "epoch": 0.8343442570246694, "grad_norm": 1.5859375, "learning_rate": 1.6519027477771034e-05, "loss": 0.9692, "step": 4866 }, { "epoch": 0.8345157211136592, "grad_norm": 1.640625, "learning_rate": 1.651765793147342e-05, "loss": 1.0545, "step": 4867 }, { "epoch": 0.8346871852026492, "grad_norm": 1.546875, "learning_rate": 1.651628817261044e-05, "loss": 1.0397, "step": 4868 }, { "epoch": 0.834858649291639, "grad_norm": 1.609375, "learning_rate": 1.651491820122676e-05, "loss": 1.059, "step": 4869 }, { "epoch": 0.8350301133806288, "grad_norm": 1.59375, "learning_rate": 1.651354801736707e-05, "loss": 1.0279, "step": 4870 }, { "epoch": 0.8352015774696188, "grad_norm": 1.5078125, "learning_rate": 1.651217762107605e-05, "loss": 0.9188, "step": 4871 }, { "epoch": 0.8353730415586086, "grad_norm": 1.5234375, "learning_rate": 1.6510807012398394e-05, "loss": 1.0087, "step": 4872 }, { "epoch": 0.8355445056475984, "grad_norm": 1.625, "learning_rate": 1.650943619137881e-05, "loss": 1.0444, "step": 4873 }, { "epoch": 0.8357159697365882, "grad_norm": 1.5078125, "learning_rate": 1.6508065158061992e-05, "loss": 1.052, "step": 4874 }, { "epoch": 0.8358874338255782, "grad_norm": 1.53125, "learning_rate": 1.6506693912492666e-05, "loss": 1.0841, "step": 4875 }, { "epoch": 0.836058897914568, "grad_norm": 1.515625, "learning_rate": 1.650532245471555e-05, "loss": 0.9297, "step": 4876 }, { "epoch": 0.8362303620035578, "grad_norm": 1.5546875, "learning_rate": 1.6503950784775368e-05, "loss": 0.9995, "step": 4877 }, { "epoch": 0.8364018260925478, "grad_norm": 1.703125, "learning_rate": 1.6502578902716863e-05, "loss": 0.9401, "step": 4878 }, { "epoch": 0.8365732901815376, "grad_norm": 1.546875, "learning_rate": 1.650120680858477e-05, "loss": 1.0145, "step": 4879 }, { "epoch": 0.8367447542705274, "grad_norm": 1.578125, "learning_rate": 1.6499834502423848e-05, "loss": 1.0129, "step": 4880 }, { "epoch": 0.8369162183595173, "grad_norm": 1.6640625, "learning_rate": 1.649846198427884e-05, "loss": 1.1061, "step": 4881 }, { "epoch": 0.8370876824485072, "grad_norm": 1.5234375, "learning_rate": 1.6497089254194515e-05, "loss": 1.017, "step": 4882 }, { "epoch": 0.837259146537497, "grad_norm": 1.6328125, "learning_rate": 1.649571631221565e-05, "loss": 0.9935, "step": 4883 }, { "epoch": 0.8374306106264869, "grad_norm": 1.6484375, "learning_rate": 1.649434315838701e-05, "loss": 1.0117, "step": 4884 }, { "epoch": 0.8376020747154768, "grad_norm": 1.515625, "learning_rate": 1.6492969792753387e-05, "loss": 0.9968, "step": 4885 }, { "epoch": 0.8377735388044666, "grad_norm": 1.5234375, "learning_rate": 1.649159621535957e-05, "loss": 1.0011, "step": 4886 }, { "epoch": 0.8379450028934565, "grad_norm": 1.5546875, "learning_rate": 1.6490222426250354e-05, "loss": 1.019, "step": 4887 }, { "epoch": 0.8381164669824464, "grad_norm": 1.578125, "learning_rate": 1.6488848425470543e-05, "loss": 1.0191, "step": 4888 }, { "epoch": 0.8382879310714362, "grad_norm": 1.5703125, "learning_rate": 1.648747421306495e-05, "loss": 0.9839, "step": 4889 }, { "epoch": 0.8384593951604261, "grad_norm": 1.53125, "learning_rate": 1.64860997890784e-05, "loss": 0.9773, "step": 4890 }, { "epoch": 0.8386308592494159, "grad_norm": 1.546875, "learning_rate": 1.6484725153555703e-05, "loss": 1.0045, "step": 4891 }, { "epoch": 0.8388023233384058, "grad_norm": 1.6015625, "learning_rate": 1.6483350306541703e-05, "loss": 1.0592, "step": 4892 }, { "epoch": 0.8389737874273957, "grad_norm": 1.5625, "learning_rate": 1.6481975248081233e-05, "loss": 0.9609, "step": 4893 }, { "epoch": 0.8391452515163855, "grad_norm": 1.5078125, "learning_rate": 1.6480599978219143e-05, "loss": 0.9875, "step": 4894 }, { "epoch": 0.8393167156053754, "grad_norm": 1.5703125, "learning_rate": 1.6479224497000287e-05, "loss": 1.0036, "step": 4895 }, { "epoch": 0.8394881796943653, "grad_norm": 1.5625, "learning_rate": 1.647784880446952e-05, "loss": 0.9743, "step": 4896 }, { "epoch": 0.8396596437833551, "grad_norm": 1.46875, "learning_rate": 1.647647290067171e-05, "loss": 0.9982, "step": 4897 }, { "epoch": 0.8398311078723449, "grad_norm": 1.5234375, "learning_rate": 1.647509678565173e-05, "loss": 1.1047, "step": 4898 }, { "epoch": 0.8400025719613349, "grad_norm": 1.6015625, "learning_rate": 1.647372045945446e-05, "loss": 1.0122, "step": 4899 }, { "epoch": 0.8401740360503247, "grad_norm": 1.5546875, "learning_rate": 1.647234392212479e-05, "loss": 0.9983, "step": 4900 }, { "epoch": 0.8401740360503247, "eval_loss": 0.8566790223121643, "eval_runtime": 836.9111, "eval_samples_per_second": 2.986, "eval_steps_per_second": 2.986, "step": 4900 }, { "epoch": 0.8403455001393145, "grad_norm": 1.5859375, "learning_rate": 1.647096717370761e-05, "loss": 1.0025, "step": 4901 }, { "epoch": 0.8405169642283045, "grad_norm": 1.5625, "learning_rate": 1.6469590214247828e-05, "loss": 1.0119, "step": 4902 }, { "epoch": 0.8406884283172943, "grad_norm": 1.578125, "learning_rate": 1.646821304379034e-05, "loss": 1.0166, "step": 4903 }, { "epoch": 0.8408598924062841, "grad_norm": 1.7109375, "learning_rate": 1.6466835662380072e-05, "loss": 1.0238, "step": 4904 }, { "epoch": 0.841031356495274, "grad_norm": 1.5078125, "learning_rate": 1.646545807006194e-05, "loss": 0.9224, "step": 4905 }, { "epoch": 0.8412028205842639, "grad_norm": 1.6015625, "learning_rate": 1.646408026688087e-05, "loss": 1.0507, "step": 4906 }, { "epoch": 0.8413742846732537, "grad_norm": 1.5703125, "learning_rate": 1.6462702252881805e-05, "loss": 1.0593, "step": 4907 }, { "epoch": 0.8415457487622436, "grad_norm": 1.46875, "learning_rate": 1.6461324028109683e-05, "loss": 0.9803, "step": 4908 }, { "epoch": 0.8417172128512335, "grad_norm": 1.4765625, "learning_rate": 1.6459945592609455e-05, "loss": 0.9086, "step": 4909 }, { "epoch": 0.8418886769402233, "grad_norm": 1.578125, "learning_rate": 1.6458566946426072e-05, "loss": 1.0014, "step": 4910 }, { "epoch": 0.8420601410292132, "grad_norm": 1.609375, "learning_rate": 1.6457188089604505e-05, "loss": 0.9632, "step": 4911 }, { "epoch": 0.842231605118203, "grad_norm": 1.640625, "learning_rate": 1.6455809022189716e-05, "loss": 1.0341, "step": 4912 }, { "epoch": 0.8424030692071929, "grad_norm": 1.6328125, "learning_rate": 1.645442974422668e-05, "loss": 0.962, "step": 4913 }, { "epoch": 0.8425745332961828, "grad_norm": 1.5625, "learning_rate": 1.6453050255760392e-05, "loss": 0.9729, "step": 4914 }, { "epoch": 0.8427459973851726, "grad_norm": 1.671875, "learning_rate": 1.645167055683583e-05, "loss": 1.027, "step": 4915 }, { "epoch": 0.8429174614741625, "grad_norm": 1.59375, "learning_rate": 1.6450290647498e-05, "loss": 1.1018, "step": 4916 }, { "epoch": 0.8430889255631524, "grad_norm": 1.59375, "learning_rate": 1.64489105277919e-05, "loss": 0.9738, "step": 4917 }, { "epoch": 0.8432603896521422, "grad_norm": 1.53125, "learning_rate": 1.6447530197762545e-05, "loss": 1.0227, "step": 4918 }, { "epoch": 0.843431853741132, "grad_norm": 1.5390625, "learning_rate": 1.644614965745495e-05, "loss": 1.0394, "step": 4919 }, { "epoch": 0.843603317830122, "grad_norm": 1.59375, "learning_rate": 1.6444768906914143e-05, "loss": 1.0459, "step": 4920 }, { "epoch": 0.8437747819191118, "grad_norm": 1.5234375, "learning_rate": 1.644338794618515e-05, "loss": 0.9538, "step": 4921 }, { "epoch": 0.8439462460081016, "grad_norm": 1.4296875, "learning_rate": 1.6442006775313017e-05, "loss": 0.9086, "step": 4922 }, { "epoch": 0.8441177100970916, "grad_norm": 1.4609375, "learning_rate": 1.6440625394342777e-05, "loss": 0.9426, "step": 4923 }, { "epoch": 0.8442891741860814, "grad_norm": 1.4921875, "learning_rate": 1.6439243803319492e-05, "loss": 0.908, "step": 4924 }, { "epoch": 0.8444606382750712, "grad_norm": 1.5390625, "learning_rate": 1.6437862002288226e-05, "loss": 1.0451, "step": 4925 }, { "epoch": 0.8446321023640612, "grad_norm": 1.5234375, "learning_rate": 1.6436479991294032e-05, "loss": 1.0059, "step": 4926 }, { "epoch": 0.844803566453051, "grad_norm": 1.4921875, "learning_rate": 1.6435097770381988e-05, "loss": 1.0136, "step": 4927 }, { "epoch": 0.8449750305420408, "grad_norm": 1.65625, "learning_rate": 1.643371533959717e-05, "loss": 1.049, "step": 4928 }, { "epoch": 0.8451464946310308, "grad_norm": 1.5078125, "learning_rate": 1.6432332698984672e-05, "loss": 0.9991, "step": 4929 }, { "epoch": 0.8453179587200206, "grad_norm": 1.5234375, "learning_rate": 1.6430949848589583e-05, "loss": 0.9708, "step": 4930 }, { "epoch": 0.8454894228090104, "grad_norm": 1.5234375, "learning_rate": 1.6429566788457007e-05, "loss": 0.9411, "step": 4931 }, { "epoch": 0.8456608868980003, "grad_norm": 1.7421875, "learning_rate": 1.6428183518632036e-05, "loss": 1.0671, "step": 4932 }, { "epoch": 0.8458323509869902, "grad_norm": 1.5, "learning_rate": 1.64268000391598e-05, "loss": 0.9859, "step": 4933 }, { "epoch": 0.84600381507598, "grad_norm": 1.46875, "learning_rate": 1.6425416350085416e-05, "loss": 0.9405, "step": 4934 }, { "epoch": 0.8461752791649699, "grad_norm": 1.5, "learning_rate": 1.6424032451454004e-05, "loss": 0.9694, "step": 4935 }, { "epoch": 0.8463467432539598, "grad_norm": 2.28125, "learning_rate": 1.6422648343310706e-05, "loss": 1.0205, "step": 4936 }, { "epoch": 0.8465182073429496, "grad_norm": 1.546875, "learning_rate": 1.642126402570066e-05, "loss": 0.9597, "step": 4937 }, { "epoch": 0.8466896714319395, "grad_norm": 1.484375, "learning_rate": 1.6419879498669012e-05, "loss": 1.0294, "step": 4938 }, { "epoch": 0.8468611355209293, "grad_norm": 1.578125, "learning_rate": 1.641849476226092e-05, "loss": 1.0367, "step": 4939 }, { "epoch": 0.8470325996099192, "grad_norm": 1.5546875, "learning_rate": 1.641710981652154e-05, "loss": 1.0519, "step": 4940 }, { "epoch": 0.8472040636989091, "grad_norm": 1.6015625, "learning_rate": 1.641572466149605e-05, "loss": 0.9819, "step": 4941 }, { "epoch": 0.8473755277878989, "grad_norm": 1.5625, "learning_rate": 1.6414339297229614e-05, "loss": 0.9086, "step": 4942 }, { "epoch": 0.8475469918768888, "grad_norm": 1.625, "learning_rate": 1.6412953723767426e-05, "loss": 1.0298, "step": 4943 }, { "epoch": 0.8477184559658787, "grad_norm": 1.578125, "learning_rate": 1.6411567941154666e-05, "loss": 0.9991, "step": 4944 }, { "epoch": 0.8478899200548685, "grad_norm": 1.6328125, "learning_rate": 1.641018194943653e-05, "loss": 1.0165, "step": 4945 }, { "epoch": 0.8480613841438583, "grad_norm": 2.640625, "learning_rate": 1.6408795748658223e-05, "loss": 1.0844, "step": 4946 }, { "epoch": 0.8482328482328483, "grad_norm": 1.5, "learning_rate": 1.640740933886495e-05, "loss": 0.9461, "step": 4947 }, { "epoch": 0.8484043123218381, "grad_norm": 1.5703125, "learning_rate": 1.6406022720101935e-05, "loss": 0.9793, "step": 4948 }, { "epoch": 0.8485757764108279, "grad_norm": 1.5703125, "learning_rate": 1.6404635892414392e-05, "loss": 1.0438, "step": 4949 }, { "epoch": 0.8487472404998179, "grad_norm": 1.5234375, "learning_rate": 1.640324885584756e-05, "loss": 0.9791, "step": 4950 }, { "epoch": 0.8489187045888077, "grad_norm": 1.5546875, "learning_rate": 1.6401861610446667e-05, "loss": 0.9802, "step": 4951 }, { "epoch": 0.8490901686777975, "grad_norm": 1.5546875, "learning_rate": 1.640047415625696e-05, "loss": 0.9295, "step": 4952 }, { "epoch": 0.8492616327667875, "grad_norm": 1.65625, "learning_rate": 1.639908649332369e-05, "loss": 1.0223, "step": 4953 }, { "epoch": 0.8494330968557773, "grad_norm": 1.5546875, "learning_rate": 1.6397698621692112e-05, "loss": 0.9678, "step": 4954 }, { "epoch": 0.8496045609447671, "grad_norm": 1.6171875, "learning_rate": 1.639631054140749e-05, "loss": 1.0231, "step": 4955 }, { "epoch": 0.849776025033757, "grad_norm": 1.5703125, "learning_rate": 1.63949222525151e-05, "loss": 1.0565, "step": 4956 }, { "epoch": 0.8499474891227469, "grad_norm": 1.71875, "learning_rate": 1.6393533755060204e-05, "loss": 1.0848, "step": 4957 }, { "epoch": 0.8501189532117367, "grad_norm": 1.703125, "learning_rate": 1.6392145049088105e-05, "loss": 1.1402, "step": 4958 }, { "epoch": 0.8502904173007266, "grad_norm": 1.5546875, "learning_rate": 1.639075613464408e-05, "loss": 0.9722, "step": 4959 }, { "epoch": 0.8504618813897165, "grad_norm": 1.515625, "learning_rate": 1.6389367011773435e-05, "loss": 1.0092, "step": 4960 }, { "epoch": 0.8506333454787063, "grad_norm": 1.46875, "learning_rate": 1.6387977680521472e-05, "loss": 0.9036, "step": 4961 }, { "epoch": 0.8508048095676962, "grad_norm": 1.5703125, "learning_rate": 1.6386588140933503e-05, "loss": 1.0088, "step": 4962 }, { "epoch": 0.850976273656686, "grad_norm": 1.515625, "learning_rate": 1.6385198393054843e-05, "loss": 0.9637, "step": 4963 }, { "epoch": 0.8511477377456759, "grad_norm": 1.546875, "learning_rate": 1.638380843693082e-05, "loss": 0.9548, "step": 4964 }, { "epoch": 0.8513192018346658, "grad_norm": 1.5390625, "learning_rate": 1.6382418272606763e-05, "loss": 0.9837, "step": 4965 }, { "epoch": 0.8514906659236556, "grad_norm": 1.578125, "learning_rate": 1.6381027900128013e-05, "loss": 1.1239, "step": 4966 }, { "epoch": 0.8516621300126455, "grad_norm": 1.5390625, "learning_rate": 1.6379637319539913e-05, "loss": 0.9881, "step": 4967 }, { "epoch": 0.8518335941016354, "grad_norm": 1.6015625, "learning_rate": 1.637824653088782e-05, "loss": 0.9896, "step": 4968 }, { "epoch": 0.8520050581906252, "grad_norm": 1.6640625, "learning_rate": 1.637685553421709e-05, "loss": 1.053, "step": 4969 }, { "epoch": 0.852176522279615, "grad_norm": 1.484375, "learning_rate": 1.6375464329573084e-05, "loss": 1.0277, "step": 4970 }, { "epoch": 0.8523479863686049, "grad_norm": 1.6015625, "learning_rate": 1.637407291700118e-05, "loss": 0.9823, "step": 4971 }, { "epoch": 0.8525194504575948, "grad_norm": 1.5859375, "learning_rate": 1.6372681296546755e-05, "loss": 0.9795, "step": 4972 }, { "epoch": 0.8526909145465846, "grad_norm": 1.5703125, "learning_rate": 1.6371289468255196e-05, "loss": 1.042, "step": 4973 }, { "epoch": 0.8528623786355745, "grad_norm": 1.5859375, "learning_rate": 1.6369897432171896e-05, "loss": 1.0476, "step": 4974 }, { "epoch": 0.8530338427245644, "grad_norm": 1.671875, "learning_rate": 1.6368505188342253e-05, "loss": 1.0198, "step": 4975 }, { "epoch": 0.8532053068135542, "grad_norm": 1.6796875, "learning_rate": 1.636711273681167e-05, "loss": 1.0091, "step": 4976 }, { "epoch": 0.853376770902544, "grad_norm": 1.5234375, "learning_rate": 1.6365720077625575e-05, "loss": 1.0143, "step": 4977 }, { "epoch": 0.853548234991534, "grad_norm": 1.5859375, "learning_rate": 1.6364327210829373e-05, "loss": 0.8933, "step": 4978 }, { "epoch": 0.8537196990805238, "grad_norm": 1.5546875, "learning_rate": 1.636293413646849e-05, "loss": 0.9725, "step": 4979 }, { "epoch": 0.8538911631695136, "grad_norm": 1.578125, "learning_rate": 1.6361540854588364e-05, "loss": 1.0688, "step": 4980 }, { "epoch": 0.8540626272585036, "grad_norm": 1.6171875, "learning_rate": 1.636014736523444e-05, "loss": 1.0411, "step": 4981 }, { "epoch": 0.8542340913474934, "grad_norm": 1.59375, "learning_rate": 1.6358753668452162e-05, "loss": 1.0243, "step": 4982 }, { "epoch": 0.8544055554364832, "grad_norm": 1.6015625, "learning_rate": 1.6357359764286978e-05, "loss": 1.0091, "step": 4983 }, { "epoch": 0.8545770195254732, "grad_norm": 1.5390625, "learning_rate": 1.6355965652784353e-05, "loss": 0.9835, "step": 4984 }, { "epoch": 0.854748483614463, "grad_norm": 1.5859375, "learning_rate": 1.635457133398975e-05, "loss": 0.9444, "step": 4985 }, { "epoch": 0.8549199477034528, "grad_norm": 1.5078125, "learning_rate": 1.6353176807948654e-05, "loss": 1.0489, "step": 4986 }, { "epoch": 0.8550914117924427, "grad_norm": 1.4453125, "learning_rate": 1.6351782074706536e-05, "loss": 0.8774, "step": 4987 }, { "epoch": 0.8552628758814326, "grad_norm": 1.5546875, "learning_rate": 1.6350387134308884e-05, "loss": 1.002, "step": 4988 }, { "epoch": 0.8554343399704224, "grad_norm": 1.46875, "learning_rate": 1.6348991986801197e-05, "loss": 0.9584, "step": 4989 }, { "epoch": 0.8556058040594123, "grad_norm": 1.484375, "learning_rate": 1.634759663222897e-05, "loss": 0.9562, "step": 4990 }, { "epoch": 0.8557772681484022, "grad_norm": 1.7578125, "learning_rate": 1.634620107063772e-05, "loss": 0.9777, "step": 4991 }, { "epoch": 0.855948732237392, "grad_norm": 1.5625, "learning_rate": 1.634480530207295e-05, "loss": 0.9741, "step": 4992 }, { "epoch": 0.8561201963263819, "grad_norm": 1.59375, "learning_rate": 1.6343409326580185e-05, "loss": 1.0333, "step": 4993 }, { "epoch": 0.8562916604153717, "grad_norm": 1.640625, "learning_rate": 1.6342013144204956e-05, "loss": 1.0337, "step": 4994 }, { "epoch": 0.8564631245043616, "grad_norm": 1.625, "learning_rate": 1.6340616754992803e-05, "loss": 0.9508, "step": 4995 }, { "epoch": 0.8566345885933515, "grad_norm": 1.4609375, "learning_rate": 1.6339220158989256e-05, "loss": 0.9595, "step": 4996 }, { "epoch": 0.8568060526823413, "grad_norm": 1.5390625, "learning_rate": 1.633782335623987e-05, "loss": 0.9851, "step": 4997 }, { "epoch": 0.8569775167713312, "grad_norm": 1.5703125, "learning_rate": 1.6336426346790192e-05, "loss": 1.0379, "step": 4998 }, { "epoch": 0.8571489808603211, "grad_norm": 1.6015625, "learning_rate": 1.6335029130685794e-05, "loss": 1.1435, "step": 4999 }, { "epoch": 0.8573204449493109, "grad_norm": 1.515625, "learning_rate": 1.6333631707972245e-05, "loss": 0.9131, "step": 5000 }, { "epoch": 0.8574919090383007, "grad_norm": 1.4921875, "learning_rate": 1.6332234078695113e-05, "loss": 0.9825, "step": 5001 }, { "epoch": 0.8576633731272907, "grad_norm": 1.625, "learning_rate": 1.633083624289998e-05, "loss": 1.008, "step": 5002 }, { "epoch": 0.8578348372162805, "grad_norm": 1.671875, "learning_rate": 1.6329438200632443e-05, "loss": 1.1064, "step": 5003 }, { "epoch": 0.8580063013052703, "grad_norm": 1.5078125, "learning_rate": 1.6328039951938086e-05, "loss": 0.9704, "step": 5004 }, { "epoch": 0.8581777653942603, "grad_norm": 1.5234375, "learning_rate": 1.632664149686252e-05, "loss": 1.0046, "step": 5005 }, { "epoch": 0.8583492294832501, "grad_norm": 1.4453125, "learning_rate": 1.6325242835451353e-05, "loss": 0.9914, "step": 5006 }, { "epoch": 0.8585206935722399, "grad_norm": 1.5546875, "learning_rate": 1.632384396775019e-05, "loss": 1.0318, "step": 5007 }, { "epoch": 0.8586921576612299, "grad_norm": 1.609375, "learning_rate": 1.632244489380467e-05, "loss": 1.0568, "step": 5008 }, { "epoch": 0.8588636217502197, "grad_norm": 1.578125, "learning_rate": 1.632104561366041e-05, "loss": 1.0481, "step": 5009 }, { "epoch": 0.8590350858392095, "grad_norm": 1.609375, "learning_rate": 1.6319646127363053e-05, "loss": 1.0812, "step": 5010 }, { "epoch": 0.8592065499281994, "grad_norm": 1.5859375, "learning_rate": 1.6318246434958234e-05, "loss": 1.0274, "step": 5011 }, { "epoch": 0.8593780140171893, "grad_norm": 1.5703125, "learning_rate": 1.631684653649161e-05, "loss": 0.9391, "step": 5012 }, { "epoch": 0.8595494781061791, "grad_norm": 1.53125, "learning_rate": 1.631544643200883e-05, "loss": 0.9186, "step": 5013 }, { "epoch": 0.859720942195169, "grad_norm": 1.671875, "learning_rate": 1.6314046121555563e-05, "loss": 0.9996, "step": 5014 }, { "epoch": 0.8598924062841589, "grad_norm": 1.5078125, "learning_rate": 1.631264560517748e-05, "loss": 0.9689, "step": 5015 }, { "epoch": 0.8600638703731487, "grad_norm": 1.5390625, "learning_rate": 1.6311244882920243e-05, "loss": 1.0294, "step": 5016 }, { "epoch": 0.8602353344621386, "grad_norm": 1.46875, "learning_rate": 1.6309843954829547e-05, "loss": 0.9928, "step": 5017 }, { "epoch": 0.8604067985511284, "grad_norm": 1.5390625, "learning_rate": 1.630844282095108e-05, "loss": 0.968, "step": 5018 }, { "epoch": 0.8605782626401183, "grad_norm": 1.5, "learning_rate": 1.6307041481330542e-05, "loss": 1.0326, "step": 5019 }, { "epoch": 0.8607497267291082, "grad_norm": 1.5234375, "learning_rate": 1.6305639936013625e-05, "loss": 1.0035, "step": 5020 }, { "epoch": 0.860921190818098, "grad_norm": 1.515625, "learning_rate": 1.6304238185046046e-05, "loss": 0.9567, "step": 5021 }, { "epoch": 0.8610926549070879, "grad_norm": 1.53125, "learning_rate": 1.6302836228473518e-05, "loss": 1.0195, "step": 5022 }, { "epoch": 0.8612641189960778, "grad_norm": 1.5, "learning_rate": 1.6301434066341774e-05, "loss": 1.0022, "step": 5023 }, { "epoch": 0.8614355830850676, "grad_norm": 1.5546875, "learning_rate": 1.630003169869653e-05, "loss": 1.0037, "step": 5024 }, { "epoch": 0.8616070471740574, "grad_norm": 1.5703125, "learning_rate": 1.629862912558353e-05, "loss": 0.91, "step": 5025 }, { "epoch": 0.8617785112630474, "grad_norm": 1.5234375, "learning_rate": 1.6297226347048516e-05, "loss": 0.9843, "step": 5026 }, { "epoch": 0.8619499753520372, "grad_norm": 1.6171875, "learning_rate": 1.6295823363137238e-05, "loss": 0.9593, "step": 5027 }, { "epoch": 0.862121439441027, "grad_norm": 1.5625, "learning_rate": 1.629442017389545e-05, "loss": 0.9987, "step": 5028 }, { "epoch": 0.862292903530017, "grad_norm": 1.5703125, "learning_rate": 1.629301677936892e-05, "loss": 0.9576, "step": 5029 }, { "epoch": 0.8624643676190068, "grad_norm": 1.4921875, "learning_rate": 1.6291613179603417e-05, "loss": 0.9212, "step": 5030 }, { "epoch": 0.8626358317079966, "grad_norm": 1.609375, "learning_rate": 1.6290209374644712e-05, "loss": 1.0911, "step": 5031 }, { "epoch": 0.8628072957969866, "grad_norm": 1.6171875, "learning_rate": 1.6288805364538592e-05, "loss": 1.0831, "step": 5032 }, { "epoch": 0.8629787598859764, "grad_norm": 1.5546875, "learning_rate": 1.6287401149330854e-05, "loss": 0.9477, "step": 5033 }, { "epoch": 0.8631502239749662, "grad_norm": 1.6171875, "learning_rate": 1.6285996729067284e-05, "loss": 0.993, "step": 5034 }, { "epoch": 0.8633216880639561, "grad_norm": 1.6171875, "learning_rate": 1.628459210379369e-05, "loss": 0.9876, "step": 5035 }, { "epoch": 0.863493152152946, "grad_norm": 1.5390625, "learning_rate": 1.628318727355588e-05, "loss": 0.8755, "step": 5036 }, { "epoch": 0.8636646162419358, "grad_norm": 1.640625, "learning_rate": 1.6281782238399677e-05, "loss": 1.0774, "step": 5037 }, { "epoch": 0.8638360803309257, "grad_norm": 1.5703125, "learning_rate": 1.6280376998370896e-05, "loss": 0.9763, "step": 5038 }, { "epoch": 0.8640075444199156, "grad_norm": 1.578125, "learning_rate": 1.6278971553515375e-05, "loss": 1.0475, "step": 5039 }, { "epoch": 0.8641790085089054, "grad_norm": 1.6484375, "learning_rate": 1.6277565903878947e-05, "loss": 1.0331, "step": 5040 }, { "epoch": 0.8643504725978953, "grad_norm": 1.5390625, "learning_rate": 1.6276160049507455e-05, "loss": 1.018, "step": 5041 }, { "epoch": 0.8645219366868852, "grad_norm": 1.5625, "learning_rate": 1.627475399044675e-05, "loss": 0.9472, "step": 5042 }, { "epoch": 0.864693400775875, "grad_norm": 1.609375, "learning_rate": 1.6273347726742685e-05, "loss": 1.0766, "step": 5043 }, { "epoch": 0.8648648648648649, "grad_norm": 1.6171875, "learning_rate": 1.6271941258441134e-05, "loss": 0.9634, "step": 5044 }, { "epoch": 0.8650363289538547, "grad_norm": 1.625, "learning_rate": 1.6270534585587957e-05, "loss": 0.971, "step": 5045 }, { "epoch": 0.8652077930428446, "grad_norm": 1.4453125, "learning_rate": 1.6269127708229032e-05, "loss": 0.9035, "step": 5046 }, { "epoch": 0.8653792571318345, "grad_norm": 1.5390625, "learning_rate": 1.6267720626410248e-05, "loss": 1.032, "step": 5047 }, { "epoch": 0.8655507212208243, "grad_norm": 1.5546875, "learning_rate": 1.6266313340177494e-05, "loss": 1.0, "step": 5048 }, { "epoch": 0.8657221853098142, "grad_norm": 1.5390625, "learning_rate": 1.6264905849576663e-05, "loss": 1.0141, "step": 5049 }, { "epoch": 0.8658936493988041, "grad_norm": 1.5703125, "learning_rate": 1.626349815465366e-05, "loss": 0.9323, "step": 5050 }, { "epoch": 0.8660651134877939, "grad_norm": 1.8125, "learning_rate": 1.62620902554544e-05, "loss": 1.0563, "step": 5051 }, { "epoch": 0.8662365775767837, "grad_norm": 1.5625, "learning_rate": 1.626068215202479e-05, "loss": 1.0467, "step": 5052 }, { "epoch": 0.8664080416657737, "grad_norm": 1.5859375, "learning_rate": 1.6259273844410762e-05, "loss": 0.9192, "step": 5053 }, { "epoch": 0.8665795057547635, "grad_norm": 1.59375, "learning_rate": 1.6257865332658248e-05, "loss": 0.9773, "step": 5054 }, { "epoch": 0.8667509698437533, "grad_norm": 1.5703125, "learning_rate": 1.6256456616813176e-05, "loss": 0.9858, "step": 5055 }, { "epoch": 0.8669224339327433, "grad_norm": 1.5859375, "learning_rate": 1.6255047696921492e-05, "loss": 0.9744, "step": 5056 }, { "epoch": 0.8670938980217331, "grad_norm": 1.59375, "learning_rate": 1.6253638573029153e-05, "loss": 1.067, "step": 5057 }, { "epoch": 0.8672653621107229, "grad_norm": 1.5625, "learning_rate": 1.625222924518211e-05, "loss": 1.0363, "step": 5058 }, { "epoch": 0.8674368261997129, "grad_norm": 1.4921875, "learning_rate": 1.6250819713426326e-05, "loss": 0.9379, "step": 5059 }, { "epoch": 0.8676082902887027, "grad_norm": 1.53125, "learning_rate": 1.6249409977807772e-05, "loss": 1.0446, "step": 5060 }, { "epoch": 0.8677797543776925, "grad_norm": 1.5234375, "learning_rate": 1.624800003837243e-05, "loss": 1.0111, "step": 5061 }, { "epoch": 0.8679512184666824, "grad_norm": 1.5546875, "learning_rate": 1.6246589895166277e-05, "loss": 0.9468, "step": 5062 }, { "epoch": 0.8681226825556723, "grad_norm": 1.6796875, "learning_rate": 1.6245179548235303e-05, "loss": 0.9303, "step": 5063 }, { "epoch": 0.8682941466446621, "grad_norm": 1.59375, "learning_rate": 1.624376899762551e-05, "loss": 0.983, "step": 5064 }, { "epoch": 0.8684656107336519, "grad_norm": 1.5625, "learning_rate": 1.6242358243382894e-05, "loss": 0.9681, "step": 5065 }, { "epoch": 0.8686370748226419, "grad_norm": 1.6015625, "learning_rate": 1.6240947285553473e-05, "loss": 1.0484, "step": 5066 }, { "epoch": 0.8688085389116317, "grad_norm": 1.5234375, "learning_rate": 1.6239536124183258e-05, "loss": 1.0441, "step": 5067 }, { "epoch": 0.8689800030006215, "grad_norm": 1.59375, "learning_rate": 1.623812475931827e-05, "loss": 0.9782, "step": 5068 }, { "epoch": 0.8691514670896114, "grad_norm": 1.46875, "learning_rate": 1.623671319100455e-05, "loss": 0.9758, "step": 5069 }, { "epoch": 0.8693229311786013, "grad_norm": 1.515625, "learning_rate": 1.6235301419288125e-05, "loss": 0.924, "step": 5070 }, { "epoch": 0.8694943952675911, "grad_norm": 1.5390625, "learning_rate": 1.6233889444215042e-05, "loss": 0.9122, "step": 5071 }, { "epoch": 0.869665859356581, "grad_norm": 1.515625, "learning_rate": 1.6232477265831344e-05, "loss": 1.0136, "step": 5072 }, { "epoch": 0.8698373234455709, "grad_norm": 1.640625, "learning_rate": 1.62310648841831e-05, "loss": 1.0216, "step": 5073 }, { "epoch": 0.8700087875345607, "grad_norm": 1.4765625, "learning_rate": 1.622965229931636e-05, "loss": 0.9828, "step": 5074 }, { "epoch": 0.8701802516235506, "grad_norm": 1.671875, "learning_rate": 1.6228239511277207e-05, "loss": 0.9817, "step": 5075 }, { "epoch": 0.8703517157125404, "grad_norm": 1.6328125, "learning_rate": 1.6226826520111704e-05, "loss": 1.0222, "step": 5076 }, { "epoch": 0.8705231798015303, "grad_norm": 1.6171875, "learning_rate": 1.622541332586594e-05, "loss": 1.089, "step": 5077 }, { "epoch": 0.8706946438905202, "grad_norm": 1.5859375, "learning_rate": 1.622399992858601e-05, "loss": 0.9925, "step": 5078 }, { "epoch": 0.87086610797951, "grad_norm": 1.546875, "learning_rate": 1.6222586328317998e-05, "loss": 1.0387, "step": 5079 }, { "epoch": 0.8710375720684999, "grad_norm": 1.640625, "learning_rate": 1.622117252510802e-05, "loss": 1.0318, "step": 5080 }, { "epoch": 0.8712090361574898, "grad_norm": 1.65625, "learning_rate": 1.6219758519002174e-05, "loss": 1.0202, "step": 5081 }, { "epoch": 0.8713805002464796, "grad_norm": 1.5625, "learning_rate": 1.6218344310046585e-05, "loss": 0.9705, "step": 5082 }, { "epoch": 0.8715519643354694, "grad_norm": 1.5234375, "learning_rate": 1.621692989828737e-05, "loss": 1.0089, "step": 5083 }, { "epoch": 0.8717234284244594, "grad_norm": 1.578125, "learning_rate": 1.6215515283770666e-05, "loss": 0.9512, "step": 5084 }, { "epoch": 0.8718948925134492, "grad_norm": 1.515625, "learning_rate": 1.6214100466542598e-05, "loss": 1.0778, "step": 5085 }, { "epoch": 0.872066356602439, "grad_norm": 1.5546875, "learning_rate": 1.6212685446649313e-05, "loss": 1.0466, "step": 5086 }, { "epoch": 0.872237820691429, "grad_norm": 1.5078125, "learning_rate": 1.6211270224136962e-05, "loss": 0.9975, "step": 5087 }, { "epoch": 0.8724092847804188, "grad_norm": 1.5, "learning_rate": 1.6209854799051695e-05, "loss": 0.9996, "step": 5088 }, { "epoch": 0.8725807488694086, "grad_norm": 1.6484375, "learning_rate": 1.6208439171439688e-05, "loss": 1.0921, "step": 5089 }, { "epoch": 0.8727522129583986, "grad_norm": 1.65625, "learning_rate": 1.6207023341347094e-05, "loss": 0.9706, "step": 5090 }, { "epoch": 0.8729236770473884, "grad_norm": 1.5703125, "learning_rate": 1.6205607308820097e-05, "loss": 1.0387, "step": 5091 }, { "epoch": 0.8730951411363782, "grad_norm": 1.6015625, "learning_rate": 1.6204191073904877e-05, "loss": 1.0514, "step": 5092 }, { "epoch": 0.8732666052253681, "grad_norm": 1.6875, "learning_rate": 1.6202774636647624e-05, "loss": 1.0053, "step": 5093 }, { "epoch": 0.873438069314358, "grad_norm": 1.5859375, "learning_rate": 1.6201357997094534e-05, "loss": 0.9727, "step": 5094 }, { "epoch": 0.8736095334033478, "grad_norm": 1.515625, "learning_rate": 1.6199941155291805e-05, "loss": 1.0132, "step": 5095 }, { "epoch": 0.8737809974923377, "grad_norm": 1.609375, "learning_rate": 1.6198524111285648e-05, "loss": 1.0101, "step": 5096 }, { "epoch": 0.8739524615813276, "grad_norm": 1.5390625, "learning_rate": 1.6197106865122282e-05, "loss": 1.0265, "step": 5097 }, { "epoch": 0.8741239256703174, "grad_norm": 1.546875, "learning_rate": 1.619568941684792e-05, "loss": 1.0285, "step": 5098 }, { "epoch": 0.8742953897593073, "grad_norm": 1.4765625, "learning_rate": 1.61942717665088e-05, "loss": 0.9578, "step": 5099 }, { "epoch": 0.8744668538482971, "grad_norm": 1.5078125, "learning_rate": 1.6192853914151147e-05, "loss": 1.0192, "step": 5100 }, { "epoch": 0.874638317937287, "grad_norm": 1.5390625, "learning_rate": 1.6191435859821215e-05, "loss": 0.9872, "step": 5101 }, { "epoch": 0.8748097820262769, "grad_norm": 1.59375, "learning_rate": 1.6190017603565238e-05, "loss": 1.0022, "step": 5102 }, { "epoch": 0.8749812461152667, "grad_norm": 1.5234375, "learning_rate": 1.618859914542948e-05, "loss": 0.9653, "step": 5103 }, { "epoch": 0.8751527102042566, "grad_norm": 1.578125, "learning_rate": 1.61871804854602e-05, "loss": 1.0534, "step": 5104 }, { "epoch": 0.8753241742932465, "grad_norm": 1.6171875, "learning_rate": 1.6185761623703668e-05, "loss": 0.9797, "step": 5105 }, { "epoch": 0.8754956383822363, "grad_norm": 1.6015625, "learning_rate": 1.6184342560206153e-05, "loss": 1.0583, "step": 5106 }, { "epoch": 0.8756671024712261, "grad_norm": 1.5703125, "learning_rate": 1.6182923295013935e-05, "loss": 1.0972, "step": 5107 }, { "epoch": 0.8758385665602161, "grad_norm": 1.5859375, "learning_rate": 1.6181503828173314e-05, "loss": 0.9856, "step": 5108 }, { "epoch": 0.8760100306492059, "grad_norm": 1.53125, "learning_rate": 1.618008415973057e-05, "loss": 1.0702, "step": 5109 }, { "epoch": 0.8761814947381957, "grad_norm": 1.640625, "learning_rate": 1.6178664289732014e-05, "loss": 1.0591, "step": 5110 }, { "epoch": 0.8763529588271857, "grad_norm": 1.6328125, "learning_rate": 1.6177244218223943e-05, "loss": 0.9781, "step": 5111 }, { "epoch": 0.8765244229161755, "grad_norm": 1.640625, "learning_rate": 1.6175823945252684e-05, "loss": 0.9926, "step": 5112 }, { "epoch": 0.8766958870051653, "grad_norm": 1.46875, "learning_rate": 1.6174403470864545e-05, "loss": 0.951, "step": 5113 }, { "epoch": 0.8768673510941553, "grad_norm": 1.4375, "learning_rate": 1.6172982795105857e-05, "loss": 0.9442, "step": 5114 }, { "epoch": 0.8770388151831451, "grad_norm": 1.5703125, "learning_rate": 1.6171561918022954e-05, "loss": 0.9973, "step": 5115 }, { "epoch": 0.8772102792721349, "grad_norm": 1.7109375, "learning_rate": 1.6170140839662184e-05, "loss": 0.9902, "step": 5116 }, { "epoch": 0.8773817433611248, "grad_norm": 1.5859375, "learning_rate": 1.6168719560069882e-05, "loss": 1.1159, "step": 5117 }, { "epoch": 0.8775532074501147, "grad_norm": 1.578125, "learning_rate": 1.616729807929241e-05, "loss": 0.9024, "step": 5118 }, { "epoch": 0.8777246715391045, "grad_norm": 1.5234375, "learning_rate": 1.6165876397376117e-05, "loss": 0.9801, "step": 5119 }, { "epoch": 0.8778961356280944, "grad_norm": 1.6953125, "learning_rate": 1.616445451436738e-05, "loss": 1.106, "step": 5120 }, { "epoch": 0.8780675997170843, "grad_norm": 1.6875, "learning_rate": 1.616303243031257e-05, "loss": 0.9608, "step": 5121 }, { "epoch": 0.8782390638060741, "grad_norm": 1.5390625, "learning_rate": 1.6161610145258058e-05, "loss": 1.0416, "step": 5122 }, { "epoch": 0.878410527895064, "grad_norm": 1.6484375, "learning_rate": 1.616018765925024e-05, "loss": 1.0368, "step": 5123 }, { "epoch": 0.8785819919840538, "grad_norm": 1.6640625, "learning_rate": 1.6158764972335507e-05, "loss": 0.9011, "step": 5124 }, { "epoch": 0.8787534560730437, "grad_norm": 1.53125, "learning_rate": 1.6157342084560258e-05, "loss": 0.9417, "step": 5125 }, { "epoch": 0.8789249201620336, "grad_norm": 1.5546875, "learning_rate": 1.6155918995970892e-05, "loss": 1.0066, "step": 5126 }, { "epoch": 0.8790963842510234, "grad_norm": 1.5859375, "learning_rate": 1.615449570661383e-05, "loss": 1.0111, "step": 5127 }, { "epoch": 0.8792678483400133, "grad_norm": 1.5546875, "learning_rate": 1.6153072216535485e-05, "loss": 1.0012, "step": 5128 }, { "epoch": 0.8794393124290032, "grad_norm": 1.6640625, "learning_rate": 1.6151648525782286e-05, "loss": 1.0302, "step": 5129 }, { "epoch": 0.879610776517993, "grad_norm": 1.6328125, "learning_rate": 1.6150224634400666e-05, "loss": 1.0521, "step": 5130 }, { "epoch": 0.8797822406069828, "grad_norm": 1.625, "learning_rate": 1.614880054243706e-05, "loss": 1.0379, "step": 5131 }, { "epoch": 0.8799537046959728, "grad_norm": 1.484375, "learning_rate": 1.614737624993791e-05, "loss": 0.9418, "step": 5132 }, { "epoch": 0.8801251687849626, "grad_norm": 1.6796875, "learning_rate": 1.6145951756949676e-05, "loss": 1.0277, "step": 5133 }, { "epoch": 0.8802966328739524, "grad_norm": 1.5859375, "learning_rate": 1.6144527063518813e-05, "loss": 0.9721, "step": 5134 }, { "epoch": 0.8804680969629424, "grad_norm": 1.515625, "learning_rate": 1.614310216969178e-05, "loss": 1.0545, "step": 5135 }, { "epoch": 0.8806395610519322, "grad_norm": 1.5859375, "learning_rate": 1.6141677075515053e-05, "loss": 1.0213, "step": 5136 }, { "epoch": 0.880811025140922, "grad_norm": 1.5625, "learning_rate": 1.614025178103511e-05, "loss": 0.9924, "step": 5137 }, { "epoch": 0.880982489229912, "grad_norm": 1.5546875, "learning_rate": 1.6138826286298434e-05, "loss": 1.0368, "step": 5138 }, { "epoch": 0.8811539533189018, "grad_norm": 1.6328125, "learning_rate": 1.6137400591351518e-05, "loss": 1.032, "step": 5139 }, { "epoch": 0.8813254174078916, "grad_norm": 1.53125, "learning_rate": 1.6135974696240854e-05, "loss": 1.0255, "step": 5140 }, { "epoch": 0.8814968814968815, "grad_norm": 1.6015625, "learning_rate": 1.6134548601012954e-05, "loss": 0.9538, "step": 5141 }, { "epoch": 0.8816683455858714, "grad_norm": 1.53125, "learning_rate": 1.613312230571432e-05, "loss": 0.9656, "step": 5142 }, { "epoch": 0.8818398096748612, "grad_norm": 1.515625, "learning_rate": 1.613169581039147e-05, "loss": 1.0057, "step": 5143 }, { "epoch": 0.8820112737638511, "grad_norm": 1.5859375, "learning_rate": 1.6130269115090936e-05, "loss": 1.0336, "step": 5144 }, { "epoch": 0.882182737852841, "grad_norm": 1.6328125, "learning_rate": 1.6128842219859238e-05, "loss": 1.1094, "step": 5145 }, { "epoch": 0.8823542019418308, "grad_norm": 1.609375, "learning_rate": 1.6127415124742917e-05, "loss": 1.0314, "step": 5146 }, { "epoch": 0.8825256660308207, "grad_norm": 1.59375, "learning_rate": 1.6125987829788514e-05, "loss": 0.9689, "step": 5147 }, { "epoch": 0.8826971301198105, "grad_norm": 1.5390625, "learning_rate": 1.612456033504258e-05, "loss": 0.9291, "step": 5148 }, { "epoch": 0.8828685942088004, "grad_norm": 1.59375, "learning_rate": 1.6123132640551674e-05, "loss": 0.9894, "step": 5149 }, { "epoch": 0.8830400582977903, "grad_norm": 1.5703125, "learning_rate": 1.6121704746362352e-05, "loss": 0.965, "step": 5150 }, { "epoch": 0.8832115223867801, "grad_norm": 39.5, "learning_rate": 1.6120276652521185e-05, "loss": 1.0894, "step": 5151 }, { "epoch": 0.88338298647577, "grad_norm": 1.5859375, "learning_rate": 1.6118848359074753e-05, "loss": 1.0246, "step": 5152 }, { "epoch": 0.8835544505647599, "grad_norm": 1.5234375, "learning_rate": 1.611741986606963e-05, "loss": 0.974, "step": 5153 }, { "epoch": 0.8837259146537497, "grad_norm": 1.65625, "learning_rate": 1.611599117355241e-05, "loss": 1.0874, "step": 5154 }, { "epoch": 0.8838973787427395, "grad_norm": 1.640625, "learning_rate": 1.611456228156969e-05, "loss": 1.0439, "step": 5155 }, { "epoch": 0.8840688428317295, "grad_norm": 1.65625, "learning_rate": 1.611313319016807e-05, "loss": 0.9967, "step": 5156 }, { "epoch": 0.8842403069207193, "grad_norm": 1.5703125, "learning_rate": 1.611170389939415e-05, "loss": 0.9404, "step": 5157 }, { "epoch": 0.8844117710097091, "grad_norm": 1.5703125, "learning_rate": 1.6110274409294556e-05, "loss": 0.9975, "step": 5158 }, { "epoch": 0.884583235098699, "grad_norm": 1.625, "learning_rate": 1.6108844719915905e-05, "loss": 1.0937, "step": 5159 }, { "epoch": 0.8847546991876889, "grad_norm": 1.53125, "learning_rate": 1.610741483130482e-05, "loss": 1.0137, "step": 5160 }, { "epoch": 0.8849261632766787, "grad_norm": 1.59375, "learning_rate": 1.6105984743507944e-05, "loss": 1.0278, "step": 5161 }, { "epoch": 0.8850976273656685, "grad_norm": 1.5078125, "learning_rate": 1.610455445657191e-05, "loss": 0.9889, "step": 5162 }, { "epoch": 0.8852690914546585, "grad_norm": 1.6328125, "learning_rate": 1.6103123970543366e-05, "loss": 0.9304, "step": 5163 }, { "epoch": 0.8854405555436483, "grad_norm": 1.6328125, "learning_rate": 1.6101693285468968e-05, "loss": 1.0058, "step": 5164 }, { "epoch": 0.8856120196326381, "grad_norm": 1.4921875, "learning_rate": 1.6100262401395376e-05, "loss": 1.0408, "step": 5165 }, { "epoch": 0.8857834837216281, "grad_norm": 1.5234375, "learning_rate": 1.6098831318369253e-05, "loss": 1.0108, "step": 5166 }, { "epoch": 0.8859549478106179, "grad_norm": 1.4921875, "learning_rate": 1.6097400036437276e-05, "loss": 0.9301, "step": 5167 }, { "epoch": 0.8861264118996077, "grad_norm": 1.53125, "learning_rate": 1.6095968555646128e-05, "loss": 0.9718, "step": 5168 }, { "epoch": 0.8862978759885977, "grad_norm": 1.7421875, "learning_rate": 1.6094536876042486e-05, "loss": 1.1003, "step": 5169 }, { "epoch": 0.8864693400775875, "grad_norm": 1.6015625, "learning_rate": 1.6093104997673045e-05, "loss": 1.0531, "step": 5170 }, { "epoch": 0.8866408041665773, "grad_norm": 1.609375, "learning_rate": 1.6091672920584508e-05, "loss": 1.0346, "step": 5171 }, { "epoch": 0.8868122682555672, "grad_norm": 1.5625, "learning_rate": 1.6090240644823577e-05, "loss": 0.9871, "step": 5172 }, { "epoch": 0.8869837323445571, "grad_norm": 1.6015625, "learning_rate": 1.6088808170436964e-05, "loss": 0.9438, "step": 5173 }, { "epoch": 0.8871551964335469, "grad_norm": 1.578125, "learning_rate": 1.608737549747139e-05, "loss": 1.0102, "step": 5174 }, { "epoch": 0.8873266605225368, "grad_norm": 1.59375, "learning_rate": 1.6085942625973577e-05, "loss": 1.0018, "step": 5175 }, { "epoch": 0.8874981246115267, "grad_norm": 1.4609375, "learning_rate": 1.6084509555990258e-05, "loss": 0.9204, "step": 5176 }, { "epoch": 0.8876695887005165, "grad_norm": 1.59375, "learning_rate": 1.608307628756817e-05, "loss": 1.0178, "step": 5177 }, { "epoch": 0.8878410527895064, "grad_norm": 1.5625, "learning_rate": 1.608164282075406e-05, "loss": 0.9534, "step": 5178 }, { "epoch": 0.8880125168784962, "grad_norm": 1.4921875, "learning_rate": 1.608020915559467e-05, "loss": 1.0153, "step": 5179 }, { "epoch": 0.8881839809674861, "grad_norm": 1.6953125, "learning_rate": 1.607877529213677e-05, "loss": 1.0366, "step": 5180 }, { "epoch": 0.888355445056476, "grad_norm": 1.5703125, "learning_rate": 1.607734123042711e-05, "loss": 1.0342, "step": 5181 }, { "epoch": 0.8885269091454658, "grad_norm": 1.484375, "learning_rate": 1.6075906970512475e-05, "loss": 0.9779, "step": 5182 }, { "epoch": 0.8886983732344557, "grad_norm": 1.578125, "learning_rate": 1.607447251243963e-05, "loss": 0.994, "step": 5183 }, { "epoch": 0.8888698373234456, "grad_norm": 1.59375, "learning_rate": 1.6073037856255362e-05, "loss": 0.9917, "step": 5184 }, { "epoch": 0.8890413014124354, "grad_norm": 1.5703125, "learning_rate": 1.607160300200646e-05, "loss": 1.0062, "step": 5185 }, { "epoch": 0.8892127655014253, "grad_norm": 1.609375, "learning_rate": 1.6070167949739724e-05, "loss": 1.039, "step": 5186 }, { "epoch": 0.8893842295904152, "grad_norm": 1.5234375, "learning_rate": 1.606873269950195e-05, "loss": 0.9748, "step": 5187 }, { "epoch": 0.889555693679405, "grad_norm": 1.59375, "learning_rate": 1.606729725133995e-05, "loss": 0.9604, "step": 5188 }, { "epoch": 0.8897271577683948, "grad_norm": 1.5390625, "learning_rate": 1.606586160530054e-05, "loss": 0.9808, "step": 5189 }, { "epoch": 0.8898986218573848, "grad_norm": 1.5703125, "learning_rate": 1.606442576143054e-05, "loss": 0.894, "step": 5190 }, { "epoch": 0.8900700859463746, "grad_norm": 1.546875, "learning_rate": 1.6062989719776782e-05, "loss": 0.9603, "step": 5191 }, { "epoch": 0.8902415500353644, "grad_norm": 1.5, "learning_rate": 1.60615534803861e-05, "loss": 0.9645, "step": 5192 }, { "epoch": 0.8904130141243544, "grad_norm": 1.5, "learning_rate": 1.6060117043305327e-05, "loss": 0.9169, "step": 5193 }, { "epoch": 0.8905844782133442, "grad_norm": 1.4765625, "learning_rate": 1.605868040858132e-05, "loss": 0.9474, "step": 5194 }, { "epoch": 0.890755942302334, "grad_norm": 1.625, "learning_rate": 1.605724357626093e-05, "loss": 1.0531, "step": 5195 }, { "epoch": 0.890927406391324, "grad_norm": 1.6328125, "learning_rate": 1.605580654639102e-05, "loss": 1.035, "step": 5196 }, { "epoch": 0.8910988704803138, "grad_norm": 1.53125, "learning_rate": 1.605436931901845e-05, "loss": 1.0524, "step": 5197 }, { "epoch": 0.8912703345693036, "grad_norm": 1.578125, "learning_rate": 1.60529318941901e-05, "loss": 1.1239, "step": 5198 }, { "epoch": 0.8914417986582935, "grad_norm": 1.5078125, "learning_rate": 1.6051494271952844e-05, "loss": 1.0065, "step": 5199 }, { "epoch": 0.8916132627472834, "grad_norm": 1.578125, "learning_rate": 1.605005645235358e-05, "loss": 0.9343, "step": 5200 }, { "epoch": 0.8917847268362732, "grad_norm": 1.484375, "learning_rate": 1.6048618435439184e-05, "loss": 0.9976, "step": 5201 }, { "epoch": 0.8919561909252631, "grad_norm": 1.53125, "learning_rate": 1.604718022125657e-05, "loss": 0.986, "step": 5202 }, { "epoch": 0.892127655014253, "grad_norm": 1.4921875, "learning_rate": 1.6045741809852636e-05, "loss": 0.9586, "step": 5203 }, { "epoch": 0.8922991191032428, "grad_norm": 1.625, "learning_rate": 1.6044303201274293e-05, "loss": 1.0265, "step": 5204 }, { "epoch": 0.8924705831922327, "grad_norm": 1.515625, "learning_rate": 1.6042864395568466e-05, "loss": 0.9714, "step": 5205 }, { "epoch": 0.8926420472812225, "grad_norm": 1.59375, "learning_rate": 1.6041425392782073e-05, "loss": 1.0083, "step": 5206 }, { "epoch": 0.8928135113702124, "grad_norm": 1.5859375, "learning_rate": 1.6039986192962048e-05, "loss": 1.0321, "step": 5207 }, { "epoch": 0.8929849754592023, "grad_norm": 1.6640625, "learning_rate": 1.603854679615533e-05, "loss": 1.0479, "step": 5208 }, { "epoch": 0.8931564395481921, "grad_norm": 1.6875, "learning_rate": 1.6037107202408862e-05, "loss": 1.0948, "step": 5209 }, { "epoch": 0.893327903637182, "grad_norm": 1.6015625, "learning_rate": 1.6035667411769593e-05, "loss": 1.0308, "step": 5210 }, { "epoch": 0.8934993677261719, "grad_norm": 1.7578125, "learning_rate": 1.6034227424284482e-05, "loss": 1.0654, "step": 5211 }, { "epoch": 0.8936708318151617, "grad_norm": 1.609375, "learning_rate": 1.603278724000049e-05, "loss": 0.976, "step": 5212 }, { "epoch": 0.8938422959041515, "grad_norm": 1.6015625, "learning_rate": 1.603134685896459e-05, "loss": 1.0447, "step": 5213 }, { "epoch": 0.8940137599931415, "grad_norm": 1.5390625, "learning_rate": 1.602990628122376e-05, "loss": 1.0081, "step": 5214 }, { "epoch": 0.8941852240821313, "grad_norm": 1.6796875, "learning_rate": 1.6028465506824978e-05, "loss": 1.143, "step": 5215 }, { "epoch": 0.8943566881711211, "grad_norm": 1.625, "learning_rate": 1.6027024535815234e-05, "loss": 1.0891, "step": 5216 }, { "epoch": 0.8945281522601111, "grad_norm": 1.5078125, "learning_rate": 1.6025583368241524e-05, "loss": 0.9675, "step": 5217 }, { "epoch": 0.8946996163491009, "grad_norm": 1.4609375, "learning_rate": 1.602414200415085e-05, "loss": 0.9697, "step": 5218 }, { "epoch": 0.8948710804380907, "grad_norm": 1.5546875, "learning_rate": 1.602270044359022e-05, "loss": 1.0339, "step": 5219 }, { "epoch": 0.8950425445270807, "grad_norm": 1.515625, "learning_rate": 1.6021258686606652e-05, "loss": 1.018, "step": 5220 }, { "epoch": 0.8952140086160705, "grad_norm": 1.78125, "learning_rate": 1.6019816733247158e-05, "loss": 0.9383, "step": 5221 }, { "epoch": 0.8953854727050603, "grad_norm": 1.53125, "learning_rate": 1.601837458355878e-05, "loss": 0.9684, "step": 5222 }, { "epoch": 0.8955569367940502, "grad_norm": 1.453125, "learning_rate": 1.6016932237588537e-05, "loss": 1.0065, "step": 5223 }, { "epoch": 0.8957284008830401, "grad_norm": 1.6171875, "learning_rate": 1.601548969538348e-05, "loss": 0.9799, "step": 5224 }, { "epoch": 0.8958998649720299, "grad_norm": 1.5625, "learning_rate": 1.6014046956990653e-05, "loss": 1.0463, "step": 5225 }, { "epoch": 0.8960713290610198, "grad_norm": 1.5390625, "learning_rate": 1.6012604022457104e-05, "loss": 1.0254, "step": 5226 }, { "epoch": 0.8962427931500097, "grad_norm": 1.5703125, "learning_rate": 1.6011160891829898e-05, "loss": 1.0115, "step": 5227 }, { "epoch": 0.8964142572389995, "grad_norm": 1.578125, "learning_rate": 1.60097175651561e-05, "loss": 0.982, "step": 5228 }, { "epoch": 0.8965857213279894, "grad_norm": 1.5234375, "learning_rate": 1.600827404248278e-05, "loss": 0.9934, "step": 5229 }, { "epoch": 0.8967571854169792, "grad_norm": 1.5859375, "learning_rate": 1.600683032385702e-05, "loss": 1.0717, "step": 5230 }, { "epoch": 0.8969286495059691, "grad_norm": 1.578125, "learning_rate": 1.6005386409325906e-05, "loss": 1.0007, "step": 5231 }, { "epoch": 0.897100113594959, "grad_norm": 1.53125, "learning_rate": 1.6003942298936524e-05, "loss": 0.9075, "step": 5232 }, { "epoch": 0.8972715776839488, "grad_norm": 1.515625, "learning_rate": 1.6002497992735973e-05, "loss": 0.9794, "step": 5233 }, { "epoch": 0.8974430417729387, "grad_norm": 1.4765625, "learning_rate": 1.600105349077136e-05, "loss": 0.9492, "step": 5234 }, { "epoch": 0.8976145058619286, "grad_norm": 1.546875, "learning_rate": 1.5999608793089797e-05, "loss": 0.9647, "step": 5235 }, { "epoch": 0.8977859699509184, "grad_norm": 1.640625, "learning_rate": 1.5998163899738398e-05, "loss": 1.0268, "step": 5236 }, { "epoch": 0.8979574340399082, "grad_norm": 1.546875, "learning_rate": 1.5996718810764285e-05, "loss": 1.0403, "step": 5237 }, { "epoch": 0.8981288981288982, "grad_norm": 1.6328125, "learning_rate": 1.5995273526214596e-05, "loss": 0.9826, "step": 5238 }, { "epoch": 0.898300362217888, "grad_norm": 1.5234375, "learning_rate": 1.5993828046136454e-05, "loss": 0.9027, "step": 5239 }, { "epoch": 0.8984718263068778, "grad_norm": 1.578125, "learning_rate": 1.5992382370577013e-05, "loss": 0.9631, "step": 5240 }, { "epoch": 0.8986432903958678, "grad_norm": 1.578125, "learning_rate": 1.5990936499583415e-05, "loss": 1.037, "step": 5241 }, { "epoch": 0.8988147544848576, "grad_norm": 1.53125, "learning_rate": 1.598949043320282e-05, "loss": 1.0115, "step": 5242 }, { "epoch": 0.8989862185738474, "grad_norm": 1.578125, "learning_rate": 1.598804417148239e-05, "loss": 0.9891, "step": 5243 }, { "epoch": 0.8991576826628374, "grad_norm": 1.4609375, "learning_rate": 1.5986597714469288e-05, "loss": 1.0014, "step": 5244 }, { "epoch": 0.8993291467518272, "grad_norm": 1.53125, "learning_rate": 1.5985151062210687e-05, "loss": 0.9914, "step": 5245 }, { "epoch": 0.899500610840817, "grad_norm": 1.625, "learning_rate": 1.5983704214753777e-05, "loss": 1.0197, "step": 5246 }, { "epoch": 0.8996720749298069, "grad_norm": 1.5, "learning_rate": 1.5982257172145738e-05, "loss": 0.9477, "step": 5247 }, { "epoch": 0.8998435390187968, "grad_norm": 1.5703125, "learning_rate": 1.598080993443377e-05, "loss": 0.9749, "step": 5248 }, { "epoch": 0.9000150031077866, "grad_norm": 1.5234375, "learning_rate": 1.5979362501665062e-05, "loss": 0.9667, "step": 5249 }, { "epoch": 0.9001864671967765, "grad_norm": 1.5078125, "learning_rate": 1.597791487388683e-05, "loss": 1.0182, "step": 5250 }, { "epoch": 0.9003579312857664, "grad_norm": 1.5234375, "learning_rate": 1.5976467051146284e-05, "loss": 1.0425, "step": 5251 }, { "epoch": 0.9005293953747562, "grad_norm": 1.546875, "learning_rate": 1.597501903349064e-05, "loss": 1.0029, "step": 5252 }, { "epoch": 0.900700859463746, "grad_norm": 1.484375, "learning_rate": 1.5973570820967125e-05, "loss": 1.0055, "step": 5253 }, { "epoch": 0.9008723235527359, "grad_norm": 1.640625, "learning_rate": 1.5972122413622972e-05, "loss": 1.0582, "step": 5254 }, { "epoch": 0.9010437876417258, "grad_norm": 1.578125, "learning_rate": 1.597067381150542e-05, "loss": 1.0042, "step": 5255 }, { "epoch": 0.9012152517307156, "grad_norm": 1.7421875, "learning_rate": 1.5969225014661708e-05, "loss": 1.101, "step": 5256 }, { "epoch": 0.9013867158197055, "grad_norm": 1.484375, "learning_rate": 1.5967776023139094e-05, "loss": 0.9062, "step": 5257 }, { "epoch": 0.9015581799086954, "grad_norm": 1.5859375, "learning_rate": 1.596632683698483e-05, "loss": 1.0323, "step": 5258 }, { "epoch": 0.9017296439976852, "grad_norm": 1.6171875, "learning_rate": 1.596487745624618e-05, "loss": 1.0726, "step": 5259 }, { "epoch": 0.9019011080866751, "grad_norm": 1.5859375, "learning_rate": 1.5963427880970414e-05, "loss": 1.0093, "step": 5260 }, { "epoch": 0.9020725721756649, "grad_norm": 1.53125, "learning_rate": 1.596197811120481e-05, "loss": 0.9529, "step": 5261 }, { "epoch": 0.9022440362646548, "grad_norm": 1.546875, "learning_rate": 1.596052814699665e-05, "loss": 0.9538, "step": 5262 }, { "epoch": 0.9024155003536447, "grad_norm": 1.546875, "learning_rate": 1.595907798839322e-05, "loss": 1.0119, "step": 5263 }, { "epoch": 0.9025869644426345, "grad_norm": 1.5390625, "learning_rate": 1.5957627635441815e-05, "loss": 0.9586, "step": 5264 }, { "epoch": 0.9027584285316244, "grad_norm": 1.5625, "learning_rate": 1.5956177088189742e-05, "loss": 0.9865, "step": 5265 }, { "epoch": 0.9029298926206143, "grad_norm": 1.6171875, "learning_rate": 1.5954726346684303e-05, "loss": 1.0195, "step": 5266 }, { "epoch": 0.9031013567096041, "grad_norm": 1.5703125, "learning_rate": 1.5953275410972817e-05, "loss": 1.0341, "step": 5267 }, { "epoch": 0.903272820798594, "grad_norm": 1.4765625, "learning_rate": 1.59518242811026e-05, "loss": 0.974, "step": 5268 }, { "epoch": 0.9034442848875839, "grad_norm": 1.5, "learning_rate": 1.595037295712098e-05, "loss": 0.9357, "step": 5269 }, { "epoch": 0.9036157489765737, "grad_norm": 1.546875, "learning_rate": 1.5948921439075293e-05, "loss": 1.0212, "step": 5270 }, { "epoch": 0.9037872130655635, "grad_norm": 1.578125, "learning_rate": 1.5947469727012876e-05, "loss": 0.9645, "step": 5271 }, { "epoch": 0.9039586771545535, "grad_norm": 1.5078125, "learning_rate": 1.5946017820981073e-05, "loss": 0.9818, "step": 5272 }, { "epoch": 0.9041301412435433, "grad_norm": 1.5390625, "learning_rate": 1.5944565721027243e-05, "loss": 1.0271, "step": 5273 }, { "epoch": 0.9043016053325331, "grad_norm": 1.5234375, "learning_rate": 1.5943113427198735e-05, "loss": 0.9747, "step": 5274 }, { "epoch": 0.9044730694215231, "grad_norm": 1.546875, "learning_rate": 1.594166093954292e-05, "loss": 0.9688, "step": 5275 }, { "epoch": 0.9046445335105129, "grad_norm": 1.625, "learning_rate": 1.5940208258107164e-05, "loss": 0.9406, "step": 5276 }, { "epoch": 0.9048159975995027, "grad_norm": 1.5546875, "learning_rate": 1.5938755382938853e-05, "loss": 0.9435, "step": 5277 }, { "epoch": 0.9049874616884926, "grad_norm": 1.5390625, "learning_rate": 1.5937302314085363e-05, "loss": 0.9882, "step": 5278 }, { "epoch": 0.9051589257774825, "grad_norm": 1.5546875, "learning_rate": 1.5935849051594086e-05, "loss": 0.9744, "step": 5279 }, { "epoch": 0.9053303898664723, "grad_norm": 1.515625, "learning_rate": 1.5934395595512423e-05, "loss": 0.9818, "step": 5280 }, { "epoch": 0.9055018539554622, "grad_norm": 1.671875, "learning_rate": 1.593294194588777e-05, "loss": 1.0195, "step": 5281 }, { "epoch": 0.9056733180444521, "grad_norm": 1.6015625, "learning_rate": 1.593148810276754e-05, "loss": 1.0049, "step": 5282 }, { "epoch": 0.9058447821334419, "grad_norm": 1.71875, "learning_rate": 1.5930034066199146e-05, "loss": 1.0413, "step": 5283 }, { "epoch": 0.9060162462224318, "grad_norm": 1.4921875, "learning_rate": 1.5928579836230012e-05, "loss": 0.9894, "step": 5284 }, { "epoch": 0.9061877103114216, "grad_norm": 1.546875, "learning_rate": 1.5927125412907563e-05, "loss": 0.9478, "step": 5285 }, { "epoch": 0.9063591744004115, "grad_norm": 1.5234375, "learning_rate": 1.5925670796279235e-05, "loss": 0.9541, "step": 5286 }, { "epoch": 0.9065306384894014, "grad_norm": 1.5703125, "learning_rate": 1.592421598639247e-05, "loss": 0.969, "step": 5287 }, { "epoch": 0.9067021025783912, "grad_norm": 1.5078125, "learning_rate": 1.592276098329471e-05, "loss": 0.9962, "step": 5288 }, { "epoch": 0.9068735666673811, "grad_norm": 1.5390625, "learning_rate": 1.5921305787033416e-05, "loss": 1.0632, "step": 5289 }, { "epoch": 0.907045030756371, "grad_norm": 1.609375, "learning_rate": 1.5919850397656036e-05, "loss": 0.9742, "step": 5290 }, { "epoch": 0.9072164948453608, "grad_norm": 1.515625, "learning_rate": 1.591839481521005e-05, "loss": 1.0608, "step": 5291 }, { "epoch": 0.9073879589343506, "grad_norm": 1.609375, "learning_rate": 1.5916939039742915e-05, "loss": 0.9504, "step": 5292 }, { "epoch": 0.9075594230233406, "grad_norm": 1.609375, "learning_rate": 1.591548307130212e-05, "loss": 0.9848, "step": 5293 }, { "epoch": 0.9077308871123304, "grad_norm": 1.578125, "learning_rate": 1.591402690993515e-05, "loss": 1.0109, "step": 5294 }, { "epoch": 0.9079023512013202, "grad_norm": 1.5625, "learning_rate": 1.591257055568949e-05, "loss": 1.0148, "step": 5295 }, { "epoch": 0.9080738152903102, "grad_norm": 1.5546875, "learning_rate": 1.5911114008612637e-05, "loss": 1.0152, "step": 5296 }, { "epoch": 0.9082452793793, "grad_norm": 1.578125, "learning_rate": 1.5909657268752096e-05, "loss": 1.0008, "step": 5297 }, { "epoch": 0.9084167434682898, "grad_norm": 1.65625, "learning_rate": 1.590820033615538e-05, "loss": 1.0524, "step": 5298 }, { "epoch": 0.9085882075572798, "grad_norm": 1.546875, "learning_rate": 1.5906743210870007e-05, "loss": 0.9841, "step": 5299 }, { "epoch": 0.9087596716462696, "grad_norm": 1.6171875, "learning_rate": 1.590528589294349e-05, "loss": 0.9474, "step": 5300 }, { "epoch": 0.9089311357352594, "grad_norm": 1.8046875, "learning_rate": 1.5903828382423366e-05, "loss": 1.0609, "step": 5301 }, { "epoch": 0.9091025998242493, "grad_norm": 1.546875, "learning_rate": 1.5902370679357165e-05, "loss": 0.9618, "step": 5302 }, { "epoch": 0.9092740639132392, "grad_norm": 1.5, "learning_rate": 1.5900912783792426e-05, "loss": 0.9268, "step": 5303 }, { "epoch": 0.909445528002229, "grad_norm": 1.578125, "learning_rate": 1.5899454695776705e-05, "loss": 1.0258, "step": 5304 }, { "epoch": 0.9096169920912189, "grad_norm": 1.546875, "learning_rate": 1.589799641535755e-05, "loss": 0.9598, "step": 5305 }, { "epoch": 0.9097884561802088, "grad_norm": 1.703125, "learning_rate": 1.5896537942582523e-05, "loss": 1.0599, "step": 5306 }, { "epoch": 0.9099599202691986, "grad_norm": 1.5703125, "learning_rate": 1.589507927749919e-05, "loss": 0.9962, "step": 5307 }, { "epoch": 0.9101313843581885, "grad_norm": 1.5703125, "learning_rate": 1.589362042015512e-05, "loss": 1.0258, "step": 5308 }, { "epoch": 0.9103028484471783, "grad_norm": 1.5625, "learning_rate": 1.5892161370597897e-05, "loss": 0.9494, "step": 5309 }, { "epoch": 0.9104743125361682, "grad_norm": 1.5625, "learning_rate": 1.5890702128875103e-05, "loss": 1.0038, "step": 5310 }, { "epoch": 0.9106457766251581, "grad_norm": 1.6484375, "learning_rate": 1.588924269503433e-05, "loss": 0.9653, "step": 5311 }, { "epoch": 0.9108172407141479, "grad_norm": 1.6640625, "learning_rate": 1.5887783069123178e-05, "loss": 1.0415, "step": 5312 }, { "epoch": 0.9109887048031378, "grad_norm": 1.4921875, "learning_rate": 1.5886323251189247e-05, "loss": 0.9358, "step": 5313 }, { "epoch": 0.9111601688921277, "grad_norm": 1.5546875, "learning_rate": 1.5884863241280147e-05, "loss": 0.9164, "step": 5314 }, { "epoch": 0.9113316329811175, "grad_norm": 1.5234375, "learning_rate": 1.5883403039443498e-05, "loss": 0.9586, "step": 5315 }, { "epoch": 0.9115030970701073, "grad_norm": 1.5546875, "learning_rate": 1.5881942645726924e-05, "loss": 1.0247, "step": 5316 }, { "epoch": 0.9116745611590973, "grad_norm": 1.4765625, "learning_rate": 1.5880482060178048e-05, "loss": 0.9309, "step": 5317 }, { "epoch": 0.9118460252480871, "grad_norm": 1.53125, "learning_rate": 1.587902128284451e-05, "loss": 0.9456, "step": 5318 }, { "epoch": 0.9120174893370769, "grad_norm": 1.6328125, "learning_rate": 1.5877560313773946e-05, "loss": 0.9501, "step": 5319 }, { "epoch": 0.9121889534260669, "grad_norm": 1.609375, "learning_rate": 1.5876099153014008e-05, "loss": 0.9963, "step": 5320 }, { "epoch": 0.9123604175150567, "grad_norm": 1.53125, "learning_rate": 1.587463780061235e-05, "loss": 0.9477, "step": 5321 }, { "epoch": 0.9125318816040465, "grad_norm": 1.5078125, "learning_rate": 1.5873176256616634e-05, "loss": 0.9406, "step": 5322 }, { "epoch": 0.9127033456930365, "grad_norm": 1.5625, "learning_rate": 1.587171452107452e-05, "loss": 0.9296, "step": 5323 }, { "epoch": 0.9128748097820263, "grad_norm": 1.578125, "learning_rate": 1.5870252594033687e-05, "loss": 1.0263, "step": 5324 }, { "epoch": 0.9130462738710161, "grad_norm": 1.4921875, "learning_rate": 1.5868790475541812e-05, "loss": 0.934, "step": 5325 }, { "epoch": 0.913217737960006, "grad_norm": 1.59375, "learning_rate": 1.5867328165646583e-05, "loss": 1.0729, "step": 5326 }, { "epoch": 0.9133892020489959, "grad_norm": 1.546875, "learning_rate": 1.5865865664395684e-05, "loss": 0.9704, "step": 5327 }, { "epoch": 0.9135606661379857, "grad_norm": 1.65625, "learning_rate": 1.5864402971836813e-05, "loss": 1.025, "step": 5328 }, { "epoch": 0.9137321302269756, "grad_norm": 1.5546875, "learning_rate": 1.5862940088017683e-05, "loss": 1.0173, "step": 5329 }, { "epoch": 0.9139035943159655, "grad_norm": 1.5703125, "learning_rate": 1.5861477012986002e-05, "loss": 1.0344, "step": 5330 }, { "epoch": 0.9140750584049553, "grad_norm": 1.6171875, "learning_rate": 1.5860013746789477e-05, "loss": 0.9556, "step": 5331 }, { "epoch": 0.9142465224939452, "grad_norm": 1.6171875, "learning_rate": 1.585855028947584e-05, "loss": 0.9806, "step": 5332 }, { "epoch": 0.914417986582935, "grad_norm": 1.625, "learning_rate": 1.5857086641092817e-05, "loss": 0.9937, "step": 5333 }, { "epoch": 0.9145894506719249, "grad_norm": 1.6171875, "learning_rate": 1.585562280168814e-05, "loss": 1.0304, "step": 5334 }, { "epoch": 0.9147609147609148, "grad_norm": 1.578125, "learning_rate": 1.5854158771309557e-05, "loss": 1.0163, "step": 5335 }, { "epoch": 0.9149323788499046, "grad_norm": 1.5703125, "learning_rate": 1.585269455000481e-05, "loss": 0.9672, "step": 5336 }, { "epoch": 0.9151038429388945, "grad_norm": 1.5625, "learning_rate": 1.5851230137821656e-05, "loss": 1.0051, "step": 5337 }, { "epoch": 0.9152753070278844, "grad_norm": 1.625, "learning_rate": 1.584976553480785e-05, "loss": 1.0918, "step": 5338 }, { "epoch": 0.9154467711168742, "grad_norm": 1.5390625, "learning_rate": 1.5848300741011163e-05, "loss": 0.9945, "step": 5339 }, { "epoch": 0.915618235205864, "grad_norm": 1.5703125, "learning_rate": 1.5846835756479367e-05, "loss": 0.9911, "step": 5340 }, { "epoch": 0.915789699294854, "grad_norm": 1.5859375, "learning_rate": 1.5845370581260243e-05, "loss": 1.0362, "step": 5341 }, { "epoch": 0.9159611633838438, "grad_norm": 1.703125, "learning_rate": 1.5843905215401566e-05, "loss": 1.0358, "step": 5342 }, { "epoch": 0.9161326274728336, "grad_norm": 1.5234375, "learning_rate": 1.5842439658951137e-05, "loss": 1.0377, "step": 5343 }, { "epoch": 0.9163040915618236, "grad_norm": 1.671875, "learning_rate": 1.584097391195675e-05, "loss": 1.104, "step": 5344 }, { "epoch": 0.9164755556508134, "grad_norm": 1.515625, "learning_rate": 1.583950797446621e-05, "loss": 0.8953, "step": 5345 }, { "epoch": 0.9166470197398032, "grad_norm": 1.6796875, "learning_rate": 1.5838041846527325e-05, "loss": 1.0442, "step": 5346 }, { "epoch": 0.9168184838287932, "grad_norm": 1.5703125, "learning_rate": 1.583657552818791e-05, "loss": 0.9812, "step": 5347 }, { "epoch": 0.916989947917783, "grad_norm": 1.640625, "learning_rate": 1.583510901949579e-05, "loss": 1.0907, "step": 5348 }, { "epoch": 0.9171614120067728, "grad_norm": 1.6171875, "learning_rate": 1.583364232049879e-05, "loss": 0.9746, "step": 5349 }, { "epoch": 0.9173328760957626, "grad_norm": 1.546875, "learning_rate": 1.583217543124475e-05, "loss": 1.0124, "step": 5350 }, { "epoch": 0.9175043401847526, "grad_norm": 1.5546875, "learning_rate": 1.5830708351781507e-05, "loss": 1.044, "step": 5351 }, { "epoch": 0.9176758042737424, "grad_norm": 1.5859375, "learning_rate": 1.5829241082156907e-05, "loss": 1.047, "step": 5352 }, { "epoch": 0.9178472683627322, "grad_norm": 1.484375, "learning_rate": 1.58277736224188e-05, "loss": 0.946, "step": 5353 }, { "epoch": 0.9180187324517222, "grad_norm": 1.6328125, "learning_rate": 1.5826305972615053e-05, "loss": 1.0134, "step": 5354 }, { "epoch": 0.918190196540712, "grad_norm": 1.609375, "learning_rate": 1.582483813279353e-05, "loss": 1.0489, "step": 5355 }, { "epoch": 0.9183616606297018, "grad_norm": 1.6796875, "learning_rate": 1.58233701030021e-05, "loss": 1.0232, "step": 5356 }, { "epoch": 0.9185331247186918, "grad_norm": 1.5703125, "learning_rate": 1.5821901883288642e-05, "loss": 1.0153, "step": 5357 }, { "epoch": 0.9187045888076816, "grad_norm": 1.625, "learning_rate": 1.582043347370104e-05, "loss": 1.0195, "step": 5358 }, { "epoch": 0.9188760528966714, "grad_norm": 1.5859375, "learning_rate": 1.5818964874287185e-05, "loss": 1.0454, "step": 5359 }, { "epoch": 0.9190475169856613, "grad_norm": 1.65625, "learning_rate": 1.5817496085094974e-05, "loss": 1.0176, "step": 5360 }, { "epoch": 0.9192189810746512, "grad_norm": 1.6328125, "learning_rate": 1.5816027106172307e-05, "loss": 0.9667, "step": 5361 }, { "epoch": 0.919390445163641, "grad_norm": 1.515625, "learning_rate": 1.5814557937567097e-05, "loss": 1.0203, "step": 5362 }, { "epoch": 0.9195619092526309, "grad_norm": 1.59375, "learning_rate": 1.5813088579327256e-05, "loss": 1.0232, "step": 5363 }, { "epoch": 0.9197333733416208, "grad_norm": 1.5234375, "learning_rate": 1.5811619031500706e-05, "loss": 0.9786, "step": 5364 }, { "epoch": 0.9199048374306106, "grad_norm": 1.703125, "learning_rate": 1.5810149294135376e-05, "loss": 1.0583, "step": 5365 }, { "epoch": 0.9200763015196005, "grad_norm": 1.625, "learning_rate": 1.5808679367279197e-05, "loss": 1.0685, "step": 5366 }, { "epoch": 0.9202477656085903, "grad_norm": 1.6171875, "learning_rate": 1.5807209250980112e-05, "loss": 1.0277, "step": 5367 }, { "epoch": 0.9204192296975802, "grad_norm": 1.5234375, "learning_rate": 1.5805738945286066e-05, "loss": 0.9964, "step": 5368 }, { "epoch": 0.9205906937865701, "grad_norm": 1.640625, "learning_rate": 1.580426845024501e-05, "loss": 1.0182, "step": 5369 }, { "epoch": 0.9207621578755599, "grad_norm": 1.6015625, "learning_rate": 1.5802797765904903e-05, "loss": 1.051, "step": 5370 }, { "epoch": 0.9209336219645498, "grad_norm": 1.515625, "learning_rate": 1.5801326892313707e-05, "loss": 1.0433, "step": 5371 }, { "epoch": 0.9211050860535397, "grad_norm": 1.5625, "learning_rate": 1.57998558295194e-05, "loss": 0.9663, "step": 5372 }, { "epoch": 0.9212765501425295, "grad_norm": 1.65625, "learning_rate": 1.579838457756995e-05, "loss": 1.055, "step": 5373 }, { "epoch": 0.9214480142315193, "grad_norm": 1.4921875, "learning_rate": 1.579691313651335e-05, "loss": 1.0098, "step": 5374 }, { "epoch": 0.9216194783205093, "grad_norm": 1.46875, "learning_rate": 1.5795441506397574e-05, "loss": 0.9559, "step": 5375 }, { "epoch": 0.9217909424094991, "grad_norm": 1.5390625, "learning_rate": 1.5793969687270634e-05, "loss": 0.9153, "step": 5376 }, { "epoch": 0.9219624064984889, "grad_norm": 1.4921875, "learning_rate": 1.5792497679180524e-05, "loss": 0.9999, "step": 5377 }, { "epoch": 0.9221338705874789, "grad_norm": 1.5078125, "learning_rate": 1.5791025482175247e-05, "loss": 0.9207, "step": 5378 }, { "epoch": 0.9223053346764687, "grad_norm": 1.59375, "learning_rate": 1.578955309630283e-05, "loss": 0.9692, "step": 5379 }, { "epoch": 0.9224767987654585, "grad_norm": 1.609375, "learning_rate": 1.578808052161128e-05, "loss": 1.0302, "step": 5380 }, { "epoch": 0.9226482628544485, "grad_norm": 1.65625, "learning_rate": 1.5786607758148628e-05, "loss": 1.007, "step": 5381 }, { "epoch": 0.9228197269434383, "grad_norm": 1.546875, "learning_rate": 1.5785134805962907e-05, "loss": 0.9924, "step": 5382 }, { "epoch": 0.9229911910324281, "grad_norm": 1.5625, "learning_rate": 1.578366166510216e-05, "loss": 1.0786, "step": 5383 }, { "epoch": 0.923162655121418, "grad_norm": 1.6875, "learning_rate": 1.578218833561442e-05, "loss": 1.1266, "step": 5384 }, { "epoch": 0.9233341192104079, "grad_norm": 1.609375, "learning_rate": 1.5780714817547745e-05, "loss": 0.9992, "step": 5385 }, { "epoch": 0.9235055832993977, "grad_norm": 1.6015625, "learning_rate": 1.5779241110950195e-05, "loss": 1.0309, "step": 5386 }, { "epoch": 0.9236770473883876, "grad_norm": 1.5, "learning_rate": 1.577776721586983e-05, "loss": 0.9404, "step": 5387 }, { "epoch": 0.9238485114773775, "grad_norm": 1.671875, "learning_rate": 1.577629313235472e-05, "loss": 0.9784, "step": 5388 }, { "epoch": 0.9240199755663673, "grad_norm": 1.546875, "learning_rate": 1.5774818860452933e-05, "loss": 0.9775, "step": 5389 }, { "epoch": 0.9241914396553572, "grad_norm": 1.6640625, "learning_rate": 1.577334440021256e-05, "loss": 0.9955, "step": 5390 }, { "epoch": 0.924362903744347, "grad_norm": 1.65625, "learning_rate": 1.577186975168169e-05, "loss": 0.9753, "step": 5391 }, { "epoch": 0.9245343678333369, "grad_norm": 1.546875, "learning_rate": 1.577039491490841e-05, "loss": 1.0302, "step": 5392 }, { "epoch": 0.9247058319223268, "grad_norm": 1.5234375, "learning_rate": 1.576891988994082e-05, "loss": 0.9805, "step": 5393 }, { "epoch": 0.9248772960113166, "grad_norm": 1.640625, "learning_rate": 1.576744467682703e-05, "loss": 1.0489, "step": 5394 }, { "epoch": 0.9250487601003065, "grad_norm": 1.640625, "learning_rate": 1.5765969275615153e-05, "loss": 0.9113, "step": 5395 }, { "epoch": 0.9252202241892964, "grad_norm": 1.484375, "learning_rate": 1.5764493686353307e-05, "loss": 1.0295, "step": 5396 }, { "epoch": 0.9253916882782862, "grad_norm": 1.53125, "learning_rate": 1.5763017909089608e-05, "loss": 0.9964, "step": 5397 }, { "epoch": 0.925563152367276, "grad_norm": 1.65625, "learning_rate": 1.57615419438722e-05, "loss": 0.958, "step": 5398 }, { "epoch": 0.925734616456266, "grad_norm": 1.5859375, "learning_rate": 1.576006579074921e-05, "loss": 0.9351, "step": 5399 }, { "epoch": 0.9259060805452558, "grad_norm": 1.5703125, "learning_rate": 1.5758589449768784e-05, "loss": 0.9385, "step": 5400 }, { "epoch": 0.9260775446342456, "grad_norm": 1.578125, "learning_rate": 1.575711292097907e-05, "loss": 1.0104, "step": 5401 }, { "epoch": 0.9262490087232356, "grad_norm": 1.71875, "learning_rate": 1.5755636204428228e-05, "loss": 1.0488, "step": 5402 }, { "epoch": 0.9264204728122254, "grad_norm": 1.5390625, "learning_rate": 1.5754159300164414e-05, "loss": 0.9892, "step": 5403 }, { "epoch": 0.9265919369012152, "grad_norm": 1.546875, "learning_rate": 1.57526822082358e-05, "loss": 1.0528, "step": 5404 }, { "epoch": 0.9267634009902052, "grad_norm": 1.6640625, "learning_rate": 1.5751204928690552e-05, "loss": 1.0059, "step": 5405 }, { "epoch": 0.926934865079195, "grad_norm": 1.4140625, "learning_rate": 1.5749727461576852e-05, "loss": 0.9006, "step": 5406 }, { "epoch": 0.9271063291681848, "grad_norm": 1.578125, "learning_rate": 1.5748249806942895e-05, "loss": 1.0689, "step": 5407 }, { "epoch": 0.9272777932571747, "grad_norm": 1.7734375, "learning_rate": 1.5746771964836864e-05, "loss": 0.9228, "step": 5408 }, { "epoch": 0.9274492573461646, "grad_norm": 1.6328125, "learning_rate": 1.5745293935306954e-05, "loss": 1.0405, "step": 5409 }, { "epoch": 0.9276207214351544, "grad_norm": 1.640625, "learning_rate": 1.574381571840138e-05, "loss": 1.0065, "step": 5410 }, { "epoch": 0.9277921855241443, "grad_norm": 1.5546875, "learning_rate": 1.5742337314168343e-05, "loss": 1.0412, "step": 5411 }, { "epoch": 0.9279636496131342, "grad_norm": 1.5234375, "learning_rate": 1.574085872265606e-05, "loss": 1.0683, "step": 5412 }, { "epoch": 0.928135113702124, "grad_norm": 1.5625, "learning_rate": 1.573937994391276e-05, "loss": 1.0458, "step": 5413 }, { "epoch": 0.9283065777911139, "grad_norm": 1.5390625, "learning_rate": 1.5737900977986667e-05, "loss": 0.9001, "step": 5414 }, { "epoch": 0.9284780418801037, "grad_norm": 1.59375, "learning_rate": 1.5736421824926016e-05, "loss": 1.0702, "step": 5415 }, { "epoch": 0.9286495059690936, "grad_norm": 1.46875, "learning_rate": 1.573494248477905e-05, "loss": 0.9452, "step": 5416 }, { "epoch": 0.9288209700580835, "grad_norm": 1.5703125, "learning_rate": 1.573346295759401e-05, "loss": 0.9938, "step": 5417 }, { "epoch": 0.9289924341470733, "grad_norm": 1.640625, "learning_rate": 1.5731983243419154e-05, "loss": 1.0848, "step": 5418 }, { "epoch": 0.9291638982360632, "grad_norm": 1.5390625, "learning_rate": 1.573050334230274e-05, "loss": 1.0467, "step": 5419 }, { "epoch": 0.9293353623250531, "grad_norm": 1.625, "learning_rate": 1.5729023254293034e-05, "loss": 0.9948, "step": 5420 }, { "epoch": 0.9295068264140429, "grad_norm": 1.6171875, "learning_rate": 1.5727542979438305e-05, "loss": 0.968, "step": 5421 }, { "epoch": 0.9296782905030327, "grad_norm": 1.5625, "learning_rate": 1.5726062517786834e-05, "loss": 1.0831, "step": 5422 }, { "epoch": 0.9298497545920227, "grad_norm": 1.5234375, "learning_rate": 1.57245818693869e-05, "loss": 1.0289, "step": 5423 }, { "epoch": 0.9300212186810125, "grad_norm": 1.5546875, "learning_rate": 1.5723101034286794e-05, "loss": 1.0607, "step": 5424 }, { "epoch": 0.9301926827700023, "grad_norm": 1.5234375, "learning_rate": 1.572162001253481e-05, "loss": 1.0166, "step": 5425 }, { "epoch": 0.9303641468589923, "grad_norm": 1.6640625, "learning_rate": 1.5720138804179256e-05, "loss": 1.0684, "step": 5426 }, { "epoch": 0.9305356109479821, "grad_norm": 1.625, "learning_rate": 1.5718657409268436e-05, "loss": 1.0084, "step": 5427 }, { "epoch": 0.9307070750369719, "grad_norm": 1.46875, "learning_rate": 1.5717175827850657e-05, "loss": 1.0336, "step": 5428 }, { "epoch": 0.9308785391259619, "grad_norm": 1.5234375, "learning_rate": 1.571569405997425e-05, "loss": 1.0188, "step": 5429 }, { "epoch": 0.9310500032149517, "grad_norm": 1.5, "learning_rate": 1.5714212105687535e-05, "loss": 0.9354, "step": 5430 }, { "epoch": 0.9312214673039415, "grad_norm": 1.5390625, "learning_rate": 1.571272996503885e-05, "loss": 0.9608, "step": 5431 }, { "epoch": 0.9313929313929314, "grad_norm": 1.5859375, "learning_rate": 1.571124763807652e-05, "loss": 1.0545, "step": 5432 }, { "epoch": 0.9315643954819213, "grad_norm": 1.5546875, "learning_rate": 1.5709765124848907e-05, "loss": 0.9436, "step": 5433 }, { "epoch": 0.9317358595709111, "grad_norm": 1.5390625, "learning_rate": 1.5708282425404345e-05, "loss": 0.9691, "step": 5434 }, { "epoch": 0.931907323659901, "grad_norm": 1.515625, "learning_rate": 1.5706799539791204e-05, "loss": 0.9409, "step": 5435 }, { "epoch": 0.9320787877488909, "grad_norm": 1.5859375, "learning_rate": 1.5705316468057837e-05, "loss": 1.0345, "step": 5436 }, { "epoch": 0.9322502518378807, "grad_norm": 1.671875, "learning_rate": 1.5703833210252613e-05, "loss": 0.9729, "step": 5437 }, { "epoch": 0.9324217159268706, "grad_norm": 1.4921875, "learning_rate": 1.5702349766423913e-05, "loss": 0.9774, "step": 5438 }, { "epoch": 0.9325931800158604, "grad_norm": 1.5234375, "learning_rate": 1.5700866136620114e-05, "loss": 0.9663, "step": 5439 }, { "epoch": 0.9327646441048503, "grad_norm": 1.5625, "learning_rate": 1.56993823208896e-05, "loss": 0.9804, "step": 5440 }, { "epoch": 0.9329361081938402, "grad_norm": 1.5078125, "learning_rate": 1.5697898319280767e-05, "loss": 0.9341, "step": 5441 }, { "epoch": 0.93310757228283, "grad_norm": 1.546875, "learning_rate": 1.569641413184202e-05, "loss": 1.0012, "step": 5442 }, { "epoch": 0.9332790363718199, "grad_norm": 1.46875, "learning_rate": 1.569492975862175e-05, "loss": 1.0137, "step": 5443 }, { "epoch": 0.9334505004608097, "grad_norm": 1.7109375, "learning_rate": 1.569344519966838e-05, "loss": 1.0661, "step": 5444 }, { "epoch": 0.9336219645497996, "grad_norm": 1.4921875, "learning_rate": 1.569196045503032e-05, "loss": 0.9675, "step": 5445 }, { "epoch": 0.9337934286387894, "grad_norm": 1.609375, "learning_rate": 1.5690475524755996e-05, "loss": 1.1199, "step": 5446 }, { "epoch": 0.9339648927277793, "grad_norm": 1.5078125, "learning_rate": 1.5688990408893837e-05, "loss": 0.973, "step": 5447 }, { "epoch": 0.9341363568167692, "grad_norm": 1.546875, "learning_rate": 1.5687505107492278e-05, "loss": 1.0056, "step": 5448 }, { "epoch": 0.934307820905759, "grad_norm": 1.6171875, "learning_rate": 1.568601962059976e-05, "loss": 1.0917, "step": 5449 }, { "epoch": 0.9344792849947489, "grad_norm": 1.5, "learning_rate": 1.5684533948264734e-05, "loss": 0.9369, "step": 5450 }, { "epoch": 0.9346507490837388, "grad_norm": 1.5625, "learning_rate": 1.5683048090535645e-05, "loss": 1.0045, "step": 5451 }, { "epoch": 0.9348222131727286, "grad_norm": 1.5546875, "learning_rate": 1.568156204746096e-05, "loss": 0.9499, "step": 5452 }, { "epoch": 0.9349936772617184, "grad_norm": 1.6328125, "learning_rate": 1.568007581908914e-05, "loss": 1.0495, "step": 5453 }, { "epoch": 0.9351651413507084, "grad_norm": 1.5625, "learning_rate": 1.567858940546866e-05, "loss": 1.0209, "step": 5454 }, { "epoch": 0.9353366054396982, "grad_norm": 1.5234375, "learning_rate": 1.5677102806647993e-05, "loss": 1.0253, "step": 5455 }, { "epoch": 0.935508069528688, "grad_norm": 1.625, "learning_rate": 1.567561602267563e-05, "loss": 1.0378, "step": 5456 }, { "epoch": 0.935679533617678, "grad_norm": 1.5390625, "learning_rate": 1.5674129053600054e-05, "loss": 1.0162, "step": 5457 }, { "epoch": 0.9358509977066678, "grad_norm": 1.59375, "learning_rate": 1.5672641899469764e-05, "loss": 0.9873, "step": 5458 }, { "epoch": 0.9360224617956576, "grad_norm": 1.5703125, "learning_rate": 1.5671154560333258e-05, "loss": 1.0778, "step": 5459 }, { "epoch": 0.9361939258846476, "grad_norm": 1.546875, "learning_rate": 1.5669667036239047e-05, "loss": 1.0814, "step": 5460 }, { "epoch": 0.9363653899736374, "grad_norm": 1.5078125, "learning_rate": 1.5668179327235644e-05, "loss": 1.015, "step": 5461 }, { "epoch": 0.9365368540626272, "grad_norm": 1.5546875, "learning_rate": 1.566669143337157e-05, "loss": 0.9743, "step": 5462 }, { "epoch": 0.9367083181516171, "grad_norm": 1.5546875, "learning_rate": 1.566520335469535e-05, "loss": 0.9868, "step": 5463 }, { "epoch": 0.936879782240607, "grad_norm": 1.625, "learning_rate": 1.5663715091255513e-05, "loss": 1.0205, "step": 5464 }, { "epoch": 0.9370512463295968, "grad_norm": 1.4609375, "learning_rate": 1.56622266431006e-05, "loss": 0.9398, "step": 5465 }, { "epoch": 0.9372227104185867, "grad_norm": 1.53125, "learning_rate": 1.5660738010279156e-05, "loss": 0.9776, "step": 5466 }, { "epoch": 0.9373941745075766, "grad_norm": 1.6484375, "learning_rate": 1.5659249192839724e-05, "loss": 0.958, "step": 5467 }, { "epoch": 0.9375656385965664, "grad_norm": 1.7265625, "learning_rate": 1.565776019083087e-05, "loss": 0.9683, "step": 5468 }, { "epoch": 0.9377371026855563, "grad_norm": 1.703125, "learning_rate": 1.565627100430115e-05, "loss": 1.0691, "step": 5469 }, { "epoch": 0.9379085667745461, "grad_norm": 1.546875, "learning_rate": 1.5654781633299134e-05, "loss": 0.9959, "step": 5470 }, { "epoch": 0.938080030863536, "grad_norm": 1.640625, "learning_rate": 1.5653292077873393e-05, "loss": 1.0553, "step": 5471 }, { "epoch": 0.9382514949525259, "grad_norm": 1.4765625, "learning_rate": 1.565180233807251e-05, "loss": 1.0085, "step": 5472 }, { "epoch": 0.9384229590415157, "grad_norm": 1.609375, "learning_rate": 1.565031241394507e-05, "loss": 1.0366, "step": 5473 }, { "epoch": 0.9385944231305056, "grad_norm": 1.4765625, "learning_rate": 1.5648822305539667e-05, "loss": 0.8648, "step": 5474 }, { "epoch": 0.9387658872194955, "grad_norm": 1.5703125, "learning_rate": 1.5647332012904892e-05, "loss": 0.9774, "step": 5475 }, { "epoch": 0.9389373513084853, "grad_norm": 1.5625, "learning_rate": 1.564584153608936e-05, "loss": 0.9816, "step": 5476 }, { "epoch": 0.9391088153974751, "grad_norm": 1.609375, "learning_rate": 1.5644350875141673e-05, "loss": 1.0061, "step": 5477 }, { "epoch": 0.9392802794864651, "grad_norm": 1.484375, "learning_rate": 1.564286003011045e-05, "loss": 0.9747, "step": 5478 }, { "epoch": 0.9394517435754549, "grad_norm": 1.4921875, "learning_rate": 1.5641369001044308e-05, "loss": 0.8715, "step": 5479 }, { "epoch": 0.9396232076644447, "grad_norm": 1.5, "learning_rate": 1.5639877787991884e-05, "loss": 0.9639, "step": 5480 }, { "epoch": 0.9397946717534347, "grad_norm": 1.5390625, "learning_rate": 1.5638386391001804e-05, "loss": 1.0736, "step": 5481 }, { "epoch": 0.9399661358424245, "grad_norm": 1.515625, "learning_rate": 1.5636894810122717e-05, "loss": 1.0084, "step": 5482 }, { "epoch": 0.9401375999314143, "grad_norm": 1.609375, "learning_rate": 1.5635403045403257e-05, "loss": 1.0358, "step": 5483 }, { "epoch": 0.9403090640204043, "grad_norm": 1.4609375, "learning_rate": 1.5633911096892088e-05, "loss": 0.9374, "step": 5484 }, { "epoch": 0.9404805281093941, "grad_norm": 1.5859375, "learning_rate": 1.563241896463786e-05, "loss": 1.019, "step": 5485 }, { "epoch": 0.9406519921983839, "grad_norm": 1.578125, "learning_rate": 1.5630926648689243e-05, "loss": 1.0141, "step": 5486 }, { "epoch": 0.9408234562873738, "grad_norm": 1.546875, "learning_rate": 1.5629434149094898e-05, "loss": 1.0172, "step": 5487 }, { "epoch": 0.9409949203763637, "grad_norm": 1.484375, "learning_rate": 1.5627941465903512e-05, "loss": 0.941, "step": 5488 }, { "epoch": 0.9411663844653535, "grad_norm": 1.625, "learning_rate": 1.562644859916376e-05, "loss": 1.0477, "step": 5489 }, { "epoch": 0.9413378485543434, "grad_norm": 1.59375, "learning_rate": 1.5624955548924334e-05, "loss": 0.9985, "step": 5490 }, { "epoch": 0.9415093126433333, "grad_norm": 1.5, "learning_rate": 1.5623462315233923e-05, "loss": 0.9092, "step": 5491 }, { "epoch": 0.9416807767323231, "grad_norm": 1.5859375, "learning_rate": 1.562196889814123e-05, "loss": 1.0168, "step": 5492 }, { "epoch": 0.941852240821313, "grad_norm": 1.6796875, "learning_rate": 1.5620475297694963e-05, "loss": 1.0219, "step": 5493 }, { "epoch": 0.9420237049103029, "grad_norm": 1.796875, "learning_rate": 1.5618981513943833e-05, "loss": 1.0164, "step": 5494 }, { "epoch": 0.9421951689992927, "grad_norm": 1.640625, "learning_rate": 1.5617487546936555e-05, "loss": 1.0886, "step": 5495 }, { "epoch": 0.9423666330882826, "grad_norm": 1.5078125, "learning_rate": 1.5615993396721852e-05, "loss": 0.9554, "step": 5496 }, { "epoch": 0.9425380971772724, "grad_norm": 1.5859375, "learning_rate": 1.5614499063348457e-05, "loss": 1.0361, "step": 5497 }, { "epoch": 0.9427095612662623, "grad_norm": 1.578125, "learning_rate": 1.561300454686511e-05, "loss": 1.0378, "step": 5498 }, { "epoch": 0.9428810253552522, "grad_norm": 1.5390625, "learning_rate": 1.561150984732054e-05, "loss": 0.975, "step": 5499 }, { "epoch": 0.943052489444242, "grad_norm": 1.546875, "learning_rate": 1.561001496476351e-05, "loss": 0.9535, "step": 5500 }, { "epoch": 0.9432239535332319, "grad_norm": 1.53125, "learning_rate": 1.5608519899242764e-05, "loss": 1.0368, "step": 5501 }, { "epoch": 0.9433954176222218, "grad_norm": 1.609375, "learning_rate": 1.560702465080707e-05, "loss": 0.9941, "step": 5502 }, { "epoch": 0.9435668817112116, "grad_norm": 1.5234375, "learning_rate": 1.560552921950518e-05, "loss": 0.8931, "step": 5503 }, { "epoch": 0.9437383458002014, "grad_norm": 1.5234375, "learning_rate": 1.5604033605385874e-05, "loss": 1.0116, "step": 5504 }, { "epoch": 0.9439098098891914, "grad_norm": 1.3984375, "learning_rate": 1.5602537808497935e-05, "loss": 0.9978, "step": 5505 }, { "epoch": 0.9440812739781812, "grad_norm": 1.5546875, "learning_rate": 1.560104182889014e-05, "loss": 0.941, "step": 5506 }, { "epoch": 0.944252738067171, "grad_norm": 1.6875, "learning_rate": 1.5599545666611272e-05, "loss": 1.0056, "step": 5507 }, { "epoch": 0.944424202156161, "grad_norm": 1.578125, "learning_rate": 1.5598049321710143e-05, "loss": 0.9915, "step": 5508 }, { "epoch": 0.9445956662451508, "grad_norm": 1.546875, "learning_rate": 1.559655279423554e-05, "loss": 0.9526, "step": 5509 }, { "epoch": 0.9447671303341406, "grad_norm": 1.671875, "learning_rate": 1.5595056084236277e-05, "loss": 1.0734, "step": 5510 }, { "epoch": 0.9449385944231306, "grad_norm": 1.5390625, "learning_rate": 1.5593559191761165e-05, "loss": 0.9538, "step": 5511 }, { "epoch": 0.9451100585121204, "grad_norm": 1.6015625, "learning_rate": 1.5592062116859026e-05, "loss": 1.0745, "step": 5512 }, { "epoch": 0.9452815226011102, "grad_norm": 1.546875, "learning_rate": 1.5590564859578682e-05, "loss": 0.9885, "step": 5513 }, { "epoch": 0.9454529866901001, "grad_norm": 1.5859375, "learning_rate": 1.5589067419968967e-05, "loss": 0.9589, "step": 5514 }, { "epoch": 0.94562445077909, "grad_norm": 1.5390625, "learning_rate": 1.558756979807872e-05, "loss": 1.0317, "step": 5515 }, { "epoch": 0.9457959148680798, "grad_norm": 1.4453125, "learning_rate": 1.558607199395678e-05, "loss": 0.959, "step": 5516 }, { "epoch": 0.9459673789570697, "grad_norm": 1.5703125, "learning_rate": 1.5584574007651993e-05, "loss": 1.029, "step": 5517 }, { "epoch": 0.9461388430460596, "grad_norm": 1.5546875, "learning_rate": 1.558307583921322e-05, "loss": 0.9865, "step": 5518 }, { "epoch": 0.9463103071350494, "grad_norm": 1.5625, "learning_rate": 1.558157748868932e-05, "loss": 1.0517, "step": 5519 }, { "epoch": 0.9464817712240393, "grad_norm": 1.5703125, "learning_rate": 1.5580078956129158e-05, "loss": 1.0287, "step": 5520 }, { "epoch": 0.9466532353130291, "grad_norm": 1.5625, "learning_rate": 1.5578580241581612e-05, "loss": 0.966, "step": 5521 }, { "epoch": 0.946824699402019, "grad_norm": 1.703125, "learning_rate": 1.5577081345095556e-05, "loss": 1.0418, "step": 5522 }, { "epoch": 0.9469961634910089, "grad_norm": 1.6796875, "learning_rate": 1.557558226671987e-05, "loss": 0.9901, "step": 5523 }, { "epoch": 0.9471676275799987, "grad_norm": 1.578125, "learning_rate": 1.5574083006503462e-05, "loss": 1.0459, "step": 5524 }, { "epoch": 0.9473390916689886, "grad_norm": 1.46875, "learning_rate": 1.5572583564495212e-05, "loss": 0.9744, "step": 5525 }, { "epoch": 0.9475105557579785, "grad_norm": 1.59375, "learning_rate": 1.5571083940744025e-05, "loss": 0.9907, "step": 5526 }, { "epoch": 0.9476820198469683, "grad_norm": 1.4375, "learning_rate": 1.5569584135298813e-05, "loss": 0.8529, "step": 5527 }, { "epoch": 0.9478534839359581, "grad_norm": 1.5, "learning_rate": 1.5568084148208493e-05, "loss": 0.9824, "step": 5528 }, { "epoch": 0.9480249480249481, "grad_norm": 1.671875, "learning_rate": 1.5566583979521976e-05, "loss": 0.9589, "step": 5529 }, { "epoch": 0.9481964121139379, "grad_norm": 1.546875, "learning_rate": 1.5565083629288195e-05, "loss": 0.9606, "step": 5530 }, { "epoch": 0.9483678762029277, "grad_norm": 1.5703125, "learning_rate": 1.5563583097556086e-05, "loss": 1.0618, "step": 5531 }, { "epoch": 0.9485393402919177, "grad_norm": 1.5859375, "learning_rate": 1.556208238437458e-05, "loss": 0.965, "step": 5532 }, { "epoch": 0.9487108043809075, "grad_norm": 1.546875, "learning_rate": 1.5560581489792617e-05, "loss": 0.992, "step": 5533 }, { "epoch": 0.9488822684698973, "grad_norm": 1.4453125, "learning_rate": 1.5559080413859153e-05, "loss": 0.9641, "step": 5534 }, { "epoch": 0.9490537325588873, "grad_norm": 1.578125, "learning_rate": 1.555757915662315e-05, "loss": 0.9974, "step": 5535 }, { "epoch": 0.9492251966478771, "grad_norm": 1.5078125, "learning_rate": 1.5556077718133556e-05, "loss": 0.9771, "step": 5536 }, { "epoch": 0.9493966607368669, "grad_norm": 1.5703125, "learning_rate": 1.555457609843935e-05, "loss": 0.9597, "step": 5537 }, { "epoch": 0.9495681248258567, "grad_norm": 1.6484375, "learning_rate": 1.55530742975895e-05, "loss": 1.0468, "step": 5538 }, { "epoch": 0.9497395889148467, "grad_norm": 1.5859375, "learning_rate": 1.5551572315632983e-05, "loss": 1.0865, "step": 5539 }, { "epoch": 0.9499110530038365, "grad_norm": 1.5703125, "learning_rate": 1.555007015261879e-05, "loss": 1.0402, "step": 5540 }, { "epoch": 0.9500825170928263, "grad_norm": 1.5703125, "learning_rate": 1.5548567808595905e-05, "loss": 0.9962, "step": 5541 }, { "epoch": 0.9502539811818163, "grad_norm": 1.515625, "learning_rate": 1.5547065283613332e-05, "loss": 1.0879, "step": 5542 }, { "epoch": 0.9504254452708061, "grad_norm": 1.625, "learning_rate": 1.5545562577720077e-05, "loss": 1.0379, "step": 5543 }, { "epoch": 0.9505969093597959, "grad_norm": 1.53125, "learning_rate": 1.554405969096514e-05, "loss": 1.0111, "step": 5544 }, { "epoch": 0.9507683734487858, "grad_norm": 1.53125, "learning_rate": 1.5542556623397542e-05, "loss": 0.9819, "step": 5545 }, { "epoch": 0.9509398375377757, "grad_norm": 1.5078125, "learning_rate": 1.5541053375066298e-05, "loss": 0.9977, "step": 5546 }, { "epoch": 0.9511113016267655, "grad_norm": 1.5625, "learning_rate": 1.553954994602044e-05, "loss": 1.124, "step": 5547 }, { "epoch": 0.9512827657157554, "grad_norm": 1.4765625, "learning_rate": 1.5538046336308996e-05, "loss": 0.9165, "step": 5548 }, { "epoch": 0.9514542298047453, "grad_norm": 1.5625, "learning_rate": 1.553654254598101e-05, "loss": 0.9962, "step": 5549 }, { "epoch": 0.9516256938937351, "grad_norm": 1.609375, "learning_rate": 1.553503857508552e-05, "loss": 1.0265, "step": 5550 }, { "epoch": 0.951797157982725, "grad_norm": 1.625, "learning_rate": 1.5533534423671578e-05, "loss": 1.0501, "step": 5551 }, { "epoch": 0.9519686220717148, "grad_norm": 1.484375, "learning_rate": 1.5532030091788243e-05, "loss": 0.8984, "step": 5552 }, { "epoch": 0.9521400861607047, "grad_norm": 1.5703125, "learning_rate": 1.5530525579484575e-05, "loss": 1.0077, "step": 5553 }, { "epoch": 0.9523115502496946, "grad_norm": 1.578125, "learning_rate": 1.552902088680964e-05, "loss": 1.0534, "step": 5554 }, { "epoch": 0.9524830143386844, "grad_norm": 1.5390625, "learning_rate": 1.552751601381252e-05, "loss": 0.9774, "step": 5555 }, { "epoch": 0.9526544784276743, "grad_norm": 1.5859375, "learning_rate": 1.5526010960542278e-05, "loss": 1.0769, "step": 5556 }, { "epoch": 0.9528259425166642, "grad_norm": 1.6328125, "learning_rate": 1.5524505727048017e-05, "loss": 1.0191, "step": 5557 }, { "epoch": 0.952997406605654, "grad_norm": 1.5078125, "learning_rate": 1.5523000313378816e-05, "loss": 0.9485, "step": 5558 }, { "epoch": 0.9531688706946438, "grad_norm": 1.5546875, "learning_rate": 1.552149471958378e-05, "loss": 1.0232, "step": 5559 }, { "epoch": 0.9533403347836338, "grad_norm": 1.6015625, "learning_rate": 1.551998894571201e-05, "loss": 1.0259, "step": 5560 }, { "epoch": 0.9535117988726236, "grad_norm": 1.5859375, "learning_rate": 1.5518482991812614e-05, "loss": 0.9999, "step": 5561 }, { "epoch": 0.9536832629616134, "grad_norm": 1.5, "learning_rate": 1.5516976857934703e-05, "loss": 1.0025, "step": 5562 }, { "epoch": 0.9538547270506034, "grad_norm": 1.5390625, "learning_rate": 1.5515470544127405e-05, "loss": 1.0224, "step": 5563 }, { "epoch": 0.9540261911395932, "grad_norm": 1.5078125, "learning_rate": 1.5513964050439842e-05, "loss": 1.0007, "step": 5564 }, { "epoch": 0.954197655228583, "grad_norm": 1.5234375, "learning_rate": 1.5512457376921147e-05, "loss": 0.9864, "step": 5565 }, { "epoch": 0.954369119317573, "grad_norm": 1.5234375, "learning_rate": 1.5510950523620465e-05, "loss": 1.0098, "step": 5566 }, { "epoch": 0.9545405834065628, "grad_norm": 1.6328125, "learning_rate": 1.550944349058693e-05, "loss": 0.9862, "step": 5567 }, { "epoch": 0.9547120474955526, "grad_norm": 1.5, "learning_rate": 1.5507936277869693e-05, "loss": 1.0668, "step": 5568 }, { "epoch": 0.9548835115845425, "grad_norm": 1.5625, "learning_rate": 1.5506428885517917e-05, "loss": 0.9649, "step": 5569 }, { "epoch": 0.9550549756735324, "grad_norm": 1.4921875, "learning_rate": 1.5504921313580757e-05, "loss": 0.9597, "step": 5570 }, { "epoch": 0.9552264397625222, "grad_norm": 1.5234375, "learning_rate": 1.5503413562107385e-05, "loss": 0.9652, "step": 5571 }, { "epoch": 0.9553979038515121, "grad_norm": 1.5, "learning_rate": 1.5501905631146975e-05, "loss": 0.9575, "step": 5572 }, { "epoch": 0.955569367940502, "grad_norm": 1.515625, "learning_rate": 1.55003975207487e-05, "loss": 0.9843, "step": 5573 }, { "epoch": 0.9557408320294918, "grad_norm": 1.6796875, "learning_rate": 1.5498889230961753e-05, "loss": 1.0935, "step": 5574 }, { "epoch": 0.9559122961184817, "grad_norm": 1.5546875, "learning_rate": 1.5497380761835318e-05, "loss": 0.9589, "step": 5575 }, { "epoch": 0.9560837602074715, "grad_norm": 1.609375, "learning_rate": 1.5495872113418594e-05, "loss": 0.9689, "step": 5576 }, { "epoch": 0.9562552242964614, "grad_norm": 1.625, "learning_rate": 1.549436328576079e-05, "loss": 0.9995, "step": 5577 }, { "epoch": 0.9564266883854513, "grad_norm": 1.53125, "learning_rate": 1.5492854278911103e-05, "loss": 0.9451, "step": 5578 }, { "epoch": 0.9565981524744411, "grad_norm": 1.5546875, "learning_rate": 1.549134509291876e-05, "loss": 0.974, "step": 5579 }, { "epoch": 0.956769616563431, "grad_norm": 1.578125, "learning_rate": 1.5489835727832973e-05, "loss": 0.9706, "step": 5580 }, { "epoch": 0.9569410806524209, "grad_norm": 1.4921875, "learning_rate": 1.5488326183702967e-05, "loss": 0.9371, "step": 5581 }, { "epoch": 0.9571125447414107, "grad_norm": 1.6875, "learning_rate": 1.548681646057798e-05, "loss": 1.0169, "step": 5582 }, { "epoch": 0.9572840088304005, "grad_norm": 1.6015625, "learning_rate": 1.548530655850725e-05, "loss": 1.0346, "step": 5583 }, { "epoch": 0.9574554729193905, "grad_norm": 1.578125, "learning_rate": 1.548379647754001e-05, "loss": 0.9309, "step": 5584 }, { "epoch": 0.9576269370083803, "grad_norm": 1.59375, "learning_rate": 1.548228621772552e-05, "loss": 0.9644, "step": 5585 }, { "epoch": 0.9577984010973701, "grad_norm": 1.546875, "learning_rate": 1.5480775779113032e-05, "loss": 0.9528, "step": 5586 }, { "epoch": 0.9579698651863601, "grad_norm": 1.5, "learning_rate": 1.5479265161751807e-05, "loss": 0.9366, "step": 5587 }, { "epoch": 0.9581413292753499, "grad_norm": 1.578125, "learning_rate": 1.5477754365691113e-05, "loss": 0.9291, "step": 5588 }, { "epoch": 0.9583127933643397, "grad_norm": 1.53125, "learning_rate": 1.5476243390980224e-05, "loss": 1.0644, "step": 5589 }, { "epoch": 0.9584842574533297, "grad_norm": 1.515625, "learning_rate": 1.5474732237668413e-05, "loss": 0.9302, "step": 5590 }, { "epoch": 0.9586557215423195, "grad_norm": 1.5546875, "learning_rate": 1.547322090580497e-05, "loss": 0.956, "step": 5591 }, { "epoch": 0.9588271856313093, "grad_norm": 1.59375, "learning_rate": 1.5471709395439182e-05, "loss": 1.0559, "step": 5592 }, { "epoch": 0.9589986497202992, "grad_norm": 1.5234375, "learning_rate": 1.5470197706620345e-05, "loss": 0.9851, "step": 5593 }, { "epoch": 0.9591701138092891, "grad_norm": 1.4296875, "learning_rate": 1.5468685839397764e-05, "loss": 0.9395, "step": 5594 }, { "epoch": 0.9593415778982789, "grad_norm": 1.640625, "learning_rate": 1.546717379382074e-05, "loss": 1.025, "step": 5595 }, { "epoch": 0.9595130419872688, "grad_norm": 1.59375, "learning_rate": 1.5465661569938597e-05, "loss": 1.0304, "step": 5596 }, { "epoch": 0.9596845060762587, "grad_norm": 1.5546875, "learning_rate": 1.5464149167800643e-05, "loss": 0.9483, "step": 5597 }, { "epoch": 0.9598559701652485, "grad_norm": 1.546875, "learning_rate": 1.5462636587456216e-05, "loss": 0.9549, "step": 5598 }, { "epoch": 0.9600274342542384, "grad_norm": 1.546875, "learning_rate": 1.5461123828954635e-05, "loss": 0.9554, "step": 5599 }, { "epoch": 0.9601988983432282, "grad_norm": 1.65625, "learning_rate": 1.545961089234524e-05, "loss": 1.1681, "step": 5600 }, { "epoch": 0.9601988983432282, "eval_loss": 0.8496836423873901, "eval_runtime": 836.7427, "eval_samples_per_second": 2.987, "eval_steps_per_second": 2.987, "step": 5600 }, { "epoch": 0.9603703624322181, "grad_norm": 1.5078125, "learning_rate": 1.5458097777677377e-05, "loss": 1.0117, "step": 5601 }, { "epoch": 0.960541826521208, "grad_norm": 1.6015625, "learning_rate": 1.545658448500039e-05, "loss": 1.0554, "step": 5602 }, { "epoch": 0.9607132906101978, "grad_norm": 1.5859375, "learning_rate": 1.5455071014363637e-05, "loss": 0.9915, "step": 5603 }, { "epoch": 0.9608847546991877, "grad_norm": 1.6328125, "learning_rate": 1.5453557365816477e-05, "loss": 1.0956, "step": 5604 }, { "epoch": 0.9610562187881776, "grad_norm": 1.53125, "learning_rate": 1.545204353940827e-05, "loss": 0.9851, "step": 5605 }, { "epoch": 0.9612276828771674, "grad_norm": 1.5234375, "learning_rate": 1.54505295351884e-05, "loss": 0.9669, "step": 5606 }, { "epoch": 0.9613991469661572, "grad_norm": 1.515625, "learning_rate": 1.5449015353206232e-05, "loss": 0.9688, "step": 5607 }, { "epoch": 0.9615706110551472, "grad_norm": 1.5625, "learning_rate": 1.5447500993511155e-05, "loss": 0.9418, "step": 5608 }, { "epoch": 0.961742075144137, "grad_norm": 1.453125, "learning_rate": 1.5445986456152557e-05, "loss": 1.0225, "step": 5609 }, { "epoch": 0.9619135392331268, "grad_norm": 1.5390625, "learning_rate": 1.5444471741179838e-05, "loss": 1.0441, "step": 5610 }, { "epoch": 0.9620850033221168, "grad_norm": 1.6484375, "learning_rate": 1.544295684864239e-05, "loss": 1.0412, "step": 5611 }, { "epoch": 0.9622564674111066, "grad_norm": 1.5859375, "learning_rate": 1.5441441778589622e-05, "loss": 1.0482, "step": 5612 }, { "epoch": 0.9624279315000964, "grad_norm": 1.53125, "learning_rate": 1.5439926531070944e-05, "loss": 1.0165, "step": 5613 }, { "epoch": 0.9625993955890864, "grad_norm": 1.5078125, "learning_rate": 1.5438411106135784e-05, "loss": 0.9765, "step": 5614 }, { "epoch": 0.9627708596780762, "grad_norm": 1.6015625, "learning_rate": 1.5436895503833555e-05, "loss": 1.0007, "step": 5615 }, { "epoch": 0.962942323767066, "grad_norm": 1.6640625, "learning_rate": 1.543537972421369e-05, "loss": 0.9622, "step": 5616 }, { "epoch": 0.963113787856056, "grad_norm": 1.59375, "learning_rate": 1.5433863767325626e-05, "loss": 1.1012, "step": 5617 }, { "epoch": 0.9632852519450458, "grad_norm": 1.609375, "learning_rate": 1.5432347633218802e-05, "loss": 1.0813, "step": 5618 }, { "epoch": 0.9634567160340356, "grad_norm": 1.796875, "learning_rate": 1.5430831321942664e-05, "loss": 1.0686, "step": 5619 }, { "epoch": 0.9636281801230255, "grad_norm": 1.625, "learning_rate": 1.5429314833546665e-05, "loss": 1.0027, "step": 5620 }, { "epoch": 0.9637996442120154, "grad_norm": 1.6328125, "learning_rate": 1.5427798168080267e-05, "loss": 1.0444, "step": 5621 }, { "epoch": 0.9639711083010052, "grad_norm": 1.5078125, "learning_rate": 1.5426281325592932e-05, "loss": 0.9338, "step": 5622 }, { "epoch": 0.9641425723899951, "grad_norm": 1.640625, "learning_rate": 1.542476430613413e-05, "loss": 1.0205, "step": 5623 }, { "epoch": 0.964314036478985, "grad_norm": 1.515625, "learning_rate": 1.5423247109753332e-05, "loss": 0.9084, "step": 5624 }, { "epoch": 0.9644855005679748, "grad_norm": 1.609375, "learning_rate": 1.5421729736500024e-05, "loss": 1.0294, "step": 5625 }, { "epoch": 0.9646569646569647, "grad_norm": 1.53125, "learning_rate": 1.5420212186423696e-05, "loss": 0.9791, "step": 5626 }, { "epoch": 0.9648284287459545, "grad_norm": 1.59375, "learning_rate": 1.541869445957384e-05, "loss": 1.0413, "step": 5627 }, { "epoch": 0.9649998928349444, "grad_norm": 1.5859375, "learning_rate": 1.5417176555999948e-05, "loss": 0.9642, "step": 5628 }, { "epoch": 0.9651713569239343, "grad_norm": 1.6015625, "learning_rate": 1.541565847575153e-05, "loss": 0.9477, "step": 5629 }, { "epoch": 0.9653428210129241, "grad_norm": 1.5078125, "learning_rate": 1.5414140218878096e-05, "loss": 0.9937, "step": 5630 }, { "epoch": 0.965514285101914, "grad_norm": 1.7578125, "learning_rate": 1.5412621785429162e-05, "loss": 1.0819, "step": 5631 }, { "epoch": 0.9656857491909038, "grad_norm": 1.515625, "learning_rate": 1.541110317545425e-05, "loss": 0.9953, "step": 5632 }, { "epoch": 0.9658572132798937, "grad_norm": 1.5546875, "learning_rate": 1.5409584389002885e-05, "loss": 0.9715, "step": 5633 }, { "epoch": 0.9660286773688835, "grad_norm": 1.484375, "learning_rate": 1.5408065426124607e-05, "loss": 0.916, "step": 5634 }, { "epoch": 0.9662001414578734, "grad_norm": 1.578125, "learning_rate": 1.5406546286868946e-05, "loss": 0.9839, "step": 5635 }, { "epoch": 0.9663716055468633, "grad_norm": 1.484375, "learning_rate": 1.5405026971285454e-05, "loss": 0.9272, "step": 5636 }, { "epoch": 0.9665430696358531, "grad_norm": 1.546875, "learning_rate": 1.540350747942368e-05, "loss": 1.017, "step": 5637 }, { "epoch": 0.966714533724843, "grad_norm": 1.6171875, "learning_rate": 1.540198781133318e-05, "loss": 1.0335, "step": 5638 }, { "epoch": 0.9668859978138329, "grad_norm": 1.5, "learning_rate": 1.5400467967063512e-05, "loss": 1.0387, "step": 5639 }, { "epoch": 0.9670574619028227, "grad_norm": 1.6015625, "learning_rate": 1.5398947946664247e-05, "loss": 1.0063, "step": 5640 }, { "epoch": 0.9672289259918125, "grad_norm": 1.5390625, "learning_rate": 1.5397427750184962e-05, "loss": 0.9206, "step": 5641 }, { "epoch": 0.9674003900808025, "grad_norm": 1.578125, "learning_rate": 1.539590737767523e-05, "loss": 1.0224, "step": 5642 }, { "epoch": 0.9675718541697923, "grad_norm": 1.5625, "learning_rate": 1.5394386829184643e-05, "loss": 1.0046, "step": 5643 }, { "epoch": 0.9677433182587821, "grad_norm": 1.578125, "learning_rate": 1.5392866104762783e-05, "loss": 1.0165, "step": 5644 }, { "epoch": 0.9679147823477721, "grad_norm": 1.546875, "learning_rate": 1.5391345204459255e-05, "loss": 1.051, "step": 5645 }, { "epoch": 0.9680862464367619, "grad_norm": 1.578125, "learning_rate": 1.538982412832366e-05, "loss": 0.9658, "step": 5646 }, { "epoch": 0.9682577105257517, "grad_norm": 1.609375, "learning_rate": 1.5388302876405602e-05, "loss": 0.9937, "step": 5647 }, { "epoch": 0.9684291746147417, "grad_norm": 1.65625, "learning_rate": 1.5386781448754696e-05, "loss": 0.9847, "step": 5648 }, { "epoch": 0.9686006387037315, "grad_norm": 1.4765625, "learning_rate": 1.538525984542056e-05, "loss": 0.9305, "step": 5649 }, { "epoch": 0.9687721027927213, "grad_norm": 1.4609375, "learning_rate": 1.5383738066452825e-05, "loss": 0.9352, "step": 5650 }, { "epoch": 0.9689435668817112, "grad_norm": 1.7265625, "learning_rate": 1.5382216111901116e-05, "loss": 0.9158, "step": 5651 }, { "epoch": 0.9691150309707011, "grad_norm": 1.6328125, "learning_rate": 1.5380693981815077e-05, "loss": 0.9998, "step": 5652 }, { "epoch": 0.9692864950596909, "grad_norm": 1.6015625, "learning_rate": 1.5379171676244343e-05, "loss": 1.1242, "step": 5653 }, { "epoch": 0.9694579591486808, "grad_norm": 1.4765625, "learning_rate": 1.537764919523856e-05, "loss": 0.9611, "step": 5654 }, { "epoch": 0.9696294232376707, "grad_norm": 1.609375, "learning_rate": 1.537612653884739e-05, "loss": 0.9709, "step": 5655 }, { "epoch": 0.9698008873266605, "grad_norm": 1.609375, "learning_rate": 1.537460370712049e-05, "loss": 1.0086, "step": 5656 }, { "epoch": 0.9699723514156504, "grad_norm": 1.6015625, "learning_rate": 1.5373080700107522e-05, "loss": 0.9748, "step": 5657 }, { "epoch": 0.9701438155046402, "grad_norm": 1.625, "learning_rate": 1.5371557517858162e-05, "loss": 0.9927, "step": 5658 }, { "epoch": 0.9703152795936301, "grad_norm": 1.5078125, "learning_rate": 1.5370034160422084e-05, "loss": 1.0601, "step": 5659 }, { "epoch": 0.97048674368262, "grad_norm": 1.6875, "learning_rate": 1.5368510627848963e-05, "loss": 1.0856, "step": 5660 }, { "epoch": 0.9706582077716098, "grad_norm": 1.59375, "learning_rate": 1.53669869201885e-05, "loss": 1.0443, "step": 5661 }, { "epoch": 0.9708296718605997, "grad_norm": 1.5, "learning_rate": 1.5365463037490386e-05, "loss": 0.9502, "step": 5662 }, { "epoch": 0.9710011359495896, "grad_norm": 1.6171875, "learning_rate": 1.5363938979804312e-05, "loss": 0.9061, "step": 5663 }, { "epoch": 0.9711726000385794, "grad_norm": 1.6953125, "learning_rate": 1.5362414747179996e-05, "loss": 0.9255, "step": 5664 }, { "epoch": 0.9713440641275692, "grad_norm": 1.59375, "learning_rate": 1.536089033966714e-05, "loss": 1.0111, "step": 5665 }, { "epoch": 0.9715155282165592, "grad_norm": 1.59375, "learning_rate": 1.535936575731546e-05, "loss": 0.9452, "step": 5666 }, { "epoch": 0.971686992305549, "grad_norm": 1.4609375, "learning_rate": 1.535784100017468e-05, "loss": 0.9667, "step": 5667 }, { "epoch": 0.9718584563945388, "grad_norm": 1.5859375, "learning_rate": 1.5356316068294533e-05, "loss": 1.0044, "step": 5668 }, { "epoch": 0.9720299204835288, "grad_norm": 1.6171875, "learning_rate": 1.535479096172475e-05, "loss": 1.051, "step": 5669 }, { "epoch": 0.9722013845725186, "grad_norm": 1.59375, "learning_rate": 1.5353265680515067e-05, "loss": 1.0812, "step": 5670 }, { "epoch": 0.9723728486615084, "grad_norm": 1.578125, "learning_rate": 1.535174022471523e-05, "loss": 0.9584, "step": 5671 }, { "epoch": 0.9725443127504984, "grad_norm": 1.6328125, "learning_rate": 1.5350214594374995e-05, "loss": 0.9947, "step": 5672 }, { "epoch": 0.9727157768394882, "grad_norm": 1.5859375, "learning_rate": 1.5348688789544114e-05, "loss": 0.9427, "step": 5673 }, { "epoch": 0.972887240928478, "grad_norm": 1.53125, "learning_rate": 1.5347162810272348e-05, "loss": 0.974, "step": 5674 }, { "epoch": 0.9730587050174679, "grad_norm": 1.4921875, "learning_rate": 1.534563665660947e-05, "loss": 1.0238, "step": 5675 }, { "epoch": 0.9732301691064578, "grad_norm": 1.4921875, "learning_rate": 1.5344110328605248e-05, "loss": 0.9584, "step": 5676 }, { "epoch": 0.9734016331954476, "grad_norm": 1.578125, "learning_rate": 1.5342583826309464e-05, "loss": 1.0112, "step": 5677 }, { "epoch": 0.9735730972844375, "grad_norm": 1.65625, "learning_rate": 1.534105714977191e-05, "loss": 1.0449, "step": 5678 }, { "epoch": 0.9737445613734274, "grad_norm": 1.46875, "learning_rate": 1.533953029904236e-05, "loss": 0.9855, "step": 5679 }, { "epoch": 0.9739160254624172, "grad_norm": 1.53125, "learning_rate": 1.5338003274170626e-05, "loss": 0.9667, "step": 5680 }, { "epoch": 0.9740874895514071, "grad_norm": 1.59375, "learning_rate": 1.5336476075206506e-05, "loss": 1.02, "step": 5681 }, { "epoch": 0.9742589536403969, "grad_norm": 1.46875, "learning_rate": 1.5334948702199803e-05, "loss": 1.0029, "step": 5682 }, { "epoch": 0.9744304177293868, "grad_norm": 1.5546875, "learning_rate": 1.533342115520033e-05, "loss": 0.9316, "step": 5683 }, { "epoch": 0.9746018818183767, "grad_norm": 1.5625, "learning_rate": 1.533189343425791e-05, "loss": 0.8841, "step": 5684 }, { "epoch": 0.9747733459073665, "grad_norm": 1.546875, "learning_rate": 1.533036553942237e-05, "loss": 0.9879, "step": 5685 }, { "epoch": 0.9749448099963564, "grad_norm": 1.5390625, "learning_rate": 1.532883747074354e-05, "loss": 1.0564, "step": 5686 }, { "epoch": 0.9751162740853463, "grad_norm": 1.5546875, "learning_rate": 1.532730922827125e-05, "loss": 0.9616, "step": 5687 }, { "epoch": 0.9752877381743361, "grad_norm": 1.5625, "learning_rate": 1.5325780812055345e-05, "loss": 0.9662, "step": 5688 }, { "epoch": 0.9754592022633259, "grad_norm": 1.6953125, "learning_rate": 1.5324252222145673e-05, "loss": 0.9497, "step": 5689 }, { "epoch": 0.9756306663523159, "grad_norm": 1.5234375, "learning_rate": 1.532272345859209e-05, "loss": 1.0239, "step": 5690 }, { "epoch": 0.9758021304413057, "grad_norm": 1.578125, "learning_rate": 1.5321194521444445e-05, "loss": 0.9976, "step": 5691 }, { "epoch": 0.9759735945302955, "grad_norm": 1.5546875, "learning_rate": 1.5319665410752615e-05, "loss": 1.005, "step": 5692 }, { "epoch": 0.9761450586192855, "grad_norm": 1.515625, "learning_rate": 1.531813612656646e-05, "loss": 0.9203, "step": 5693 }, { "epoch": 0.9763165227082753, "grad_norm": 1.6796875, "learning_rate": 1.5316606668935862e-05, "loss": 1.037, "step": 5694 }, { "epoch": 0.9764879867972651, "grad_norm": 1.4921875, "learning_rate": 1.53150770379107e-05, "loss": 0.929, "step": 5695 }, { "epoch": 0.976659450886255, "grad_norm": 1.5703125, "learning_rate": 1.5313547233540858e-05, "loss": 1.0949, "step": 5696 }, { "epoch": 0.9768309149752449, "grad_norm": 1.5625, "learning_rate": 1.5312017255876234e-05, "loss": 0.9868, "step": 5697 }, { "epoch": 0.9770023790642347, "grad_norm": 1.5078125, "learning_rate": 1.5310487104966725e-05, "loss": 0.933, "step": 5698 }, { "epoch": 0.9771738431532246, "grad_norm": 1.5625, "learning_rate": 1.5308956780862234e-05, "loss": 0.9512, "step": 5699 }, { "epoch": 0.9773453072422145, "grad_norm": 1.5234375, "learning_rate": 1.5307426283612668e-05, "loss": 0.9092, "step": 5700 }, { "epoch": 0.9775167713312043, "grad_norm": 1.5703125, "learning_rate": 1.530589561326795e-05, "loss": 0.9629, "step": 5701 }, { "epoch": 0.9776882354201942, "grad_norm": 1.6796875, "learning_rate": 1.530436476987799e-05, "loss": 1.0289, "step": 5702 }, { "epoch": 0.977859699509184, "grad_norm": 1.5, "learning_rate": 1.5302833753492726e-05, "loss": 0.8838, "step": 5703 }, { "epoch": 0.9780311635981739, "grad_norm": 1.5390625, "learning_rate": 1.530130256416208e-05, "loss": 0.9919, "step": 5704 }, { "epoch": 0.9782026276871638, "grad_norm": 1.5390625, "learning_rate": 1.5299771201935998e-05, "loss": 0.9795, "step": 5705 }, { "epoch": 0.9783740917761536, "grad_norm": 1.6328125, "learning_rate": 1.5298239666864417e-05, "loss": 0.9597, "step": 5706 }, { "epoch": 0.9785455558651435, "grad_norm": 1.5859375, "learning_rate": 1.529670795899729e-05, "loss": 0.9669, "step": 5707 }, { "epoch": 0.9787170199541334, "grad_norm": 1.546875, "learning_rate": 1.529517607838457e-05, "loss": 1.0354, "step": 5708 }, { "epoch": 0.9788884840431232, "grad_norm": 1.5625, "learning_rate": 1.5293644025076223e-05, "loss": 1.0429, "step": 5709 }, { "epoch": 0.9790599481321131, "grad_norm": 1.5625, "learning_rate": 1.5292111799122208e-05, "loss": 0.98, "step": 5710 }, { "epoch": 0.979231412221103, "grad_norm": 1.65625, "learning_rate": 1.5290579400572497e-05, "loss": 1.0995, "step": 5711 }, { "epoch": 0.9794028763100928, "grad_norm": 1.515625, "learning_rate": 1.528904682947707e-05, "loss": 0.9783, "step": 5712 }, { "epoch": 0.9795743403990826, "grad_norm": 1.4921875, "learning_rate": 1.528751408588591e-05, "loss": 0.9386, "step": 5713 }, { "epoch": 0.9797458044880726, "grad_norm": 1.578125, "learning_rate": 1.5285981169849002e-05, "loss": 1.0191, "step": 5714 }, { "epoch": 0.9799172685770624, "grad_norm": 1.5, "learning_rate": 1.5284448081416346e-05, "loss": 0.8976, "step": 5715 }, { "epoch": 0.9800887326660522, "grad_norm": 1.4921875, "learning_rate": 1.5282914820637938e-05, "loss": 0.8958, "step": 5716 }, { "epoch": 0.9802601967550422, "grad_norm": 1.59375, "learning_rate": 1.5281381387563785e-05, "loss": 1.0732, "step": 5717 }, { "epoch": 0.980431660844032, "grad_norm": 1.53125, "learning_rate": 1.5279847782243896e-05, "loss": 0.9982, "step": 5718 }, { "epoch": 0.9806031249330218, "grad_norm": 1.625, "learning_rate": 1.5278314004728288e-05, "loss": 1.0392, "step": 5719 }, { "epoch": 0.9807745890220118, "grad_norm": 1.5546875, "learning_rate": 1.5276780055066985e-05, "loss": 1.02, "step": 5720 }, { "epoch": 0.9809460531110016, "grad_norm": 1.578125, "learning_rate": 1.5275245933310014e-05, "loss": 0.9646, "step": 5721 }, { "epoch": 0.9811175171999914, "grad_norm": 1.546875, "learning_rate": 1.5273711639507408e-05, "loss": 0.9817, "step": 5722 }, { "epoch": 0.9812889812889813, "grad_norm": 1.5625, "learning_rate": 1.5272177173709205e-05, "loss": 1.0012, "step": 5723 }, { "epoch": 0.9814604453779712, "grad_norm": 1.7265625, "learning_rate": 1.5270642535965455e-05, "loss": 1.0409, "step": 5724 }, { "epoch": 0.981631909466961, "grad_norm": 1.484375, "learning_rate": 1.52691077263262e-05, "loss": 0.8934, "step": 5725 }, { "epoch": 0.9818033735559508, "grad_norm": 1.46875, "learning_rate": 1.52675727448415e-05, "loss": 0.9786, "step": 5726 }, { "epoch": 0.9819748376449408, "grad_norm": 1.515625, "learning_rate": 1.526603759156142e-05, "loss": 1.0649, "step": 5727 }, { "epoch": 0.9821463017339306, "grad_norm": 1.4765625, "learning_rate": 1.526450226653603e-05, "loss": 0.9111, "step": 5728 }, { "epoch": 0.9823177658229204, "grad_norm": 1.6015625, "learning_rate": 1.5262966769815387e-05, "loss": 1.0624, "step": 5729 }, { "epoch": 0.9824892299119103, "grad_norm": 1.46875, "learning_rate": 1.5261431101449584e-05, "loss": 0.9538, "step": 5730 }, { "epoch": 0.9826606940009002, "grad_norm": 1.546875, "learning_rate": 1.52598952614887e-05, "loss": 1.0416, "step": 5731 }, { "epoch": 0.98283215808989, "grad_norm": 1.4921875, "learning_rate": 1.5258359249982818e-05, "loss": 0.9478, "step": 5732 }, { "epoch": 0.9830036221788799, "grad_norm": 1.5703125, "learning_rate": 1.5256823066982046e-05, "loss": 1.006, "step": 5733 }, { "epoch": 0.9831750862678698, "grad_norm": 1.515625, "learning_rate": 1.525528671253648e-05, "loss": 1.0836, "step": 5734 }, { "epoch": 0.9833465503568596, "grad_norm": 1.546875, "learning_rate": 1.5253750186696219e-05, "loss": 1.029, "step": 5735 }, { "epoch": 0.9835180144458495, "grad_norm": 1.5390625, "learning_rate": 1.5252213489511386e-05, "loss": 1.0368, "step": 5736 }, { "epoch": 0.9836894785348393, "grad_norm": 1.6171875, "learning_rate": 1.5250676621032091e-05, "loss": 0.9558, "step": 5737 }, { "epoch": 0.9838609426238292, "grad_norm": 1.546875, "learning_rate": 1.5249139581308457e-05, "loss": 1.0056, "step": 5738 }, { "epoch": 0.9840324067128191, "grad_norm": 1.5078125, "learning_rate": 1.524760237039062e-05, "loss": 0.8954, "step": 5739 }, { "epoch": 0.9842038708018089, "grad_norm": 1.5859375, "learning_rate": 1.5246064988328706e-05, "loss": 1.0487, "step": 5740 }, { "epoch": 0.9843753348907988, "grad_norm": 1.5234375, "learning_rate": 1.5244527435172858e-05, "loss": 0.9816, "step": 5741 }, { "epoch": 0.9845467989797887, "grad_norm": 1.5625, "learning_rate": 1.5242989710973218e-05, "loss": 0.9851, "step": 5742 }, { "epoch": 0.9847182630687785, "grad_norm": 1.5703125, "learning_rate": 1.5241451815779944e-05, "loss": 1.0613, "step": 5743 }, { "epoch": 0.9848897271577683, "grad_norm": 1.6328125, "learning_rate": 1.523991374964319e-05, "loss": 1.0254, "step": 5744 }, { "epoch": 0.9850611912467583, "grad_norm": 1.5390625, "learning_rate": 1.5238375512613113e-05, "loss": 0.9928, "step": 5745 }, { "epoch": 0.9852326553357481, "grad_norm": 1.6015625, "learning_rate": 1.5236837104739887e-05, "loss": 1.0757, "step": 5746 }, { "epoch": 0.9854041194247379, "grad_norm": 1.5234375, "learning_rate": 1.5235298526073684e-05, "loss": 0.9682, "step": 5747 }, { "epoch": 0.9855755835137279, "grad_norm": 1.65625, "learning_rate": 1.523375977666468e-05, "loss": 1.0222, "step": 5748 }, { "epoch": 0.9857470476027177, "grad_norm": 1.65625, "learning_rate": 1.523222085656306e-05, "loss": 1.0489, "step": 5749 }, { "epoch": 0.9859185116917075, "grad_norm": 1.5703125, "learning_rate": 1.523068176581902e-05, "loss": 1.0337, "step": 5750 }, { "epoch": 0.9860899757806975, "grad_norm": 1.5703125, "learning_rate": 1.5229142504482743e-05, "loss": 0.987, "step": 5751 }, { "epoch": 0.9862614398696873, "grad_norm": 1.5625, "learning_rate": 1.5227603072604442e-05, "loss": 1.0087, "step": 5752 }, { "epoch": 0.9864329039586771, "grad_norm": 1.453125, "learning_rate": 1.522606347023432e-05, "loss": 0.9602, "step": 5753 }, { "epoch": 0.986604368047667, "grad_norm": 1.6015625, "learning_rate": 1.5224523697422588e-05, "loss": 1.0332, "step": 5754 }, { "epoch": 0.9867758321366569, "grad_norm": 1.4765625, "learning_rate": 1.5222983754219466e-05, "loss": 0.9192, "step": 5755 }, { "epoch": 0.9869472962256467, "grad_norm": 1.5390625, "learning_rate": 1.5221443640675175e-05, "loss": 0.9666, "step": 5756 }, { "epoch": 0.9871187603146366, "grad_norm": 1.53125, "learning_rate": 1.5219903356839947e-05, "loss": 0.9095, "step": 5757 }, { "epoch": 0.9872902244036265, "grad_norm": 1.53125, "learning_rate": 1.521836290276401e-05, "loss": 0.9543, "step": 5758 }, { "epoch": 0.9874616884926163, "grad_norm": 1.5703125, "learning_rate": 1.521682227849761e-05, "loss": 1.002, "step": 5759 }, { "epoch": 0.9876331525816062, "grad_norm": 1.5, "learning_rate": 1.5215281484090989e-05, "loss": 0.9872, "step": 5760 }, { "epoch": 0.987804616670596, "grad_norm": 1.4765625, "learning_rate": 1.5213740519594402e-05, "loss": 0.9745, "step": 5761 }, { "epoch": 0.9879760807595859, "grad_norm": 2.0, "learning_rate": 1.5212199385058104e-05, "loss": 0.9453, "step": 5762 }, { "epoch": 0.9881475448485758, "grad_norm": 1.484375, "learning_rate": 1.5210658080532354e-05, "loss": 0.9648, "step": 5763 }, { "epoch": 0.9883190089375656, "grad_norm": 1.640625, "learning_rate": 1.5209116606067424e-05, "loss": 1.0302, "step": 5764 }, { "epoch": 0.9884904730265555, "grad_norm": 1.5703125, "learning_rate": 1.5207574961713585e-05, "loss": 1.0159, "step": 5765 }, { "epoch": 0.9886619371155454, "grad_norm": 1.578125, "learning_rate": 1.520603314752112e-05, "loss": 0.9792, "step": 5766 }, { "epoch": 0.9888334012045352, "grad_norm": 1.6015625, "learning_rate": 1.5204491163540307e-05, "loss": 0.9606, "step": 5767 }, { "epoch": 0.989004865293525, "grad_norm": 1.6171875, "learning_rate": 1.5202949009821439e-05, "loss": 1.0483, "step": 5768 }, { "epoch": 0.989176329382515, "grad_norm": 1.5390625, "learning_rate": 1.5201406686414812e-05, "loss": 1.0426, "step": 5769 }, { "epoch": 0.9893477934715048, "grad_norm": 1.46875, "learning_rate": 1.5199864193370725e-05, "loss": 0.9885, "step": 5770 }, { "epoch": 0.9895192575604946, "grad_norm": 1.5390625, "learning_rate": 1.5198321530739487e-05, "loss": 1.0593, "step": 5771 }, { "epoch": 0.9896907216494846, "grad_norm": 1.5625, "learning_rate": 1.5196778698571409e-05, "loss": 1.0358, "step": 5772 }, { "epoch": 0.9898621857384744, "grad_norm": 1.6640625, "learning_rate": 1.5195235696916809e-05, "loss": 0.9692, "step": 5773 }, { "epoch": 0.9900336498274642, "grad_norm": 1.5703125, "learning_rate": 1.5193692525826007e-05, "loss": 0.8914, "step": 5774 }, { "epoch": 0.9902051139164542, "grad_norm": 1.5625, "learning_rate": 1.5192149185349334e-05, "loss": 0.9372, "step": 5775 }, { "epoch": 0.990376578005444, "grad_norm": 1.4921875, "learning_rate": 1.5190605675537128e-05, "loss": 0.8688, "step": 5776 }, { "epoch": 0.9905480420944338, "grad_norm": 1.5, "learning_rate": 1.518906199643972e-05, "loss": 0.9517, "step": 5777 }, { "epoch": 0.9907195061834237, "grad_norm": 1.46875, "learning_rate": 1.5187518148107466e-05, "loss": 0.8902, "step": 5778 }, { "epoch": 0.9908909702724136, "grad_norm": 1.5703125, "learning_rate": 1.5185974130590704e-05, "loss": 1.0075, "step": 5779 }, { "epoch": 0.9910624343614034, "grad_norm": 1.625, "learning_rate": 1.51844299439398e-05, "loss": 1.0397, "step": 5780 }, { "epoch": 0.9912338984503933, "grad_norm": 1.5546875, "learning_rate": 1.5182885588205114e-05, "loss": 0.9898, "step": 5781 }, { "epoch": 0.9914053625393832, "grad_norm": 1.53125, "learning_rate": 1.5181341063437012e-05, "loss": 0.9545, "step": 5782 }, { "epoch": 0.991576826628373, "grad_norm": 1.65625, "learning_rate": 1.5179796369685867e-05, "loss": 1.1139, "step": 5783 }, { "epoch": 0.9917482907173629, "grad_norm": 1.46875, "learning_rate": 1.5178251507002056e-05, "loss": 0.883, "step": 5784 }, { "epoch": 0.9919197548063527, "grad_norm": 1.5234375, "learning_rate": 1.5176706475435964e-05, "loss": 0.9783, "step": 5785 }, { "epoch": 0.9920912188953426, "grad_norm": 1.640625, "learning_rate": 1.517516127503798e-05, "loss": 1.1041, "step": 5786 }, { "epoch": 0.9922626829843325, "grad_norm": 1.453125, "learning_rate": 1.5173615905858499e-05, "loss": 0.9326, "step": 5787 }, { "epoch": 0.9924341470733223, "grad_norm": 1.640625, "learning_rate": 1.5172070367947922e-05, "loss": 1.0264, "step": 5788 }, { "epoch": 0.9926056111623122, "grad_norm": 1.53125, "learning_rate": 1.5170524661356654e-05, "loss": 0.9666, "step": 5789 }, { "epoch": 0.9927770752513021, "grad_norm": 1.5078125, "learning_rate": 1.5168978786135102e-05, "loss": 0.9852, "step": 5790 }, { "epoch": 0.9929485393402919, "grad_norm": 1.6015625, "learning_rate": 1.5167432742333694e-05, "loss": 1.0273, "step": 5791 }, { "epoch": 0.9931200034292818, "grad_norm": 1.6015625, "learning_rate": 1.5165886530002842e-05, "loss": 1.0046, "step": 5792 }, { "epoch": 0.9932914675182717, "grad_norm": 1.578125, "learning_rate": 1.516434014919298e-05, "loss": 0.9921, "step": 5793 }, { "epoch": 0.9934629316072615, "grad_norm": 1.59375, "learning_rate": 1.5162793599954535e-05, "loss": 1.0535, "step": 5794 }, { "epoch": 0.9936343956962513, "grad_norm": 1.59375, "learning_rate": 1.5161246882337952e-05, "loss": 1.0442, "step": 5795 }, { "epoch": 0.9938058597852413, "grad_norm": 1.453125, "learning_rate": 1.5159699996393672e-05, "loss": 0.94, "step": 5796 }, { "epoch": 0.9939773238742311, "grad_norm": 1.59375, "learning_rate": 1.5158152942172144e-05, "loss": 1.0225, "step": 5797 }, { "epoch": 0.9941487879632209, "grad_norm": 1.546875, "learning_rate": 1.5156605719723824e-05, "loss": 0.9897, "step": 5798 }, { "epoch": 0.9943202520522109, "grad_norm": 1.5703125, "learning_rate": 1.5155058329099176e-05, "loss": 1.0447, "step": 5799 }, { "epoch": 0.9944917161412007, "grad_norm": 1.578125, "learning_rate": 1.5153510770348665e-05, "loss": 1.0434, "step": 5800 }, { "epoch": 0.9946631802301905, "grad_norm": 1.6015625, "learning_rate": 1.5151963043522759e-05, "loss": 0.934, "step": 5801 }, { "epoch": 0.9948346443191805, "grad_norm": 1.46875, "learning_rate": 1.515041514867194e-05, "loss": 0.9631, "step": 5802 }, { "epoch": 0.9950061084081703, "grad_norm": 1.546875, "learning_rate": 1.5148867085846686e-05, "loss": 0.9318, "step": 5803 }, { "epoch": 0.9951775724971601, "grad_norm": 1.5, "learning_rate": 1.5147318855097491e-05, "loss": 0.954, "step": 5804 }, { "epoch": 0.99534903658615, "grad_norm": 1.53125, "learning_rate": 1.5145770456474842e-05, "loss": 1.044, "step": 5805 }, { "epoch": 0.9955205006751399, "grad_norm": 1.625, "learning_rate": 1.514422189002924e-05, "loss": 1.0305, "step": 5806 }, { "epoch": 0.9956919647641297, "grad_norm": 1.59375, "learning_rate": 1.5142673155811192e-05, "loss": 1.0378, "step": 5807 }, { "epoch": 0.9958634288531196, "grad_norm": 1.5625, "learning_rate": 1.5141124253871206e-05, "loss": 1.0143, "step": 5808 }, { "epoch": 0.9960348929421095, "grad_norm": 1.7578125, "learning_rate": 1.51395751842598e-05, "loss": 1.0365, "step": 5809 }, { "epoch": 0.9962063570310993, "grad_norm": 1.6171875, "learning_rate": 1.5138025947027495e-05, "loss": 0.9591, "step": 5810 }, { "epoch": 0.9963778211200892, "grad_norm": 1.6171875, "learning_rate": 1.5136476542224813e-05, "loss": 1.0616, "step": 5811 }, { "epoch": 0.996549285209079, "grad_norm": 1.5546875, "learning_rate": 1.513492696990229e-05, "loss": 0.9995, "step": 5812 }, { "epoch": 0.9967207492980689, "grad_norm": 1.5234375, "learning_rate": 1.5133377230110461e-05, "loss": 0.9262, "step": 5813 }, { "epoch": 0.9968922133870588, "grad_norm": 1.6171875, "learning_rate": 1.5131827322899873e-05, "loss": 1.0377, "step": 5814 }, { "epoch": 0.9970636774760486, "grad_norm": 1.5859375, "learning_rate": 1.5130277248321068e-05, "loss": 1.0408, "step": 5815 }, { "epoch": 0.9972351415650385, "grad_norm": 1.6953125, "learning_rate": 1.5128727006424605e-05, "loss": 0.9927, "step": 5816 }, { "epoch": 0.9974066056540284, "grad_norm": 1.59375, "learning_rate": 1.512717659726104e-05, "loss": 0.9785, "step": 5817 }, { "epoch": 0.9975780697430182, "grad_norm": 1.6015625, "learning_rate": 1.5125626020880937e-05, "loss": 1.0031, "step": 5818 }, { "epoch": 0.997749533832008, "grad_norm": 1.6171875, "learning_rate": 1.5124075277334873e-05, "loss": 1.0179, "step": 5819 }, { "epoch": 0.997920997920998, "grad_norm": 1.5, "learning_rate": 1.5122524366673416e-05, "loss": 1.0134, "step": 5820 }, { "epoch": 0.9980924620099878, "grad_norm": 1.5390625, "learning_rate": 1.5120973288947149e-05, "loss": 1.0051, "step": 5821 }, { "epoch": 0.9982639260989776, "grad_norm": 1.53125, "learning_rate": 1.5119422044206661e-05, "loss": 1.0071, "step": 5822 }, { "epoch": 0.9984353901879675, "grad_norm": 1.578125, "learning_rate": 1.5117870632502542e-05, "loss": 0.9875, "step": 5823 }, { "epoch": 0.9986068542769574, "grad_norm": 1.6171875, "learning_rate": 1.5116319053885388e-05, "loss": 1.0501, "step": 5824 }, { "epoch": 0.9987783183659472, "grad_norm": 1.53125, "learning_rate": 1.5114767308405807e-05, "loss": 0.9723, "step": 5825 }, { "epoch": 0.998949782454937, "grad_norm": 1.5703125, "learning_rate": 1.51132153961144e-05, "loss": 1.0194, "step": 5826 }, { "epoch": 0.999121246543927, "grad_norm": 1.4296875, "learning_rate": 1.5111663317061785e-05, "loss": 1.0033, "step": 5827 }, { "epoch": 0.9992927106329168, "grad_norm": 1.6328125, "learning_rate": 1.5110111071298581e-05, "loss": 0.9697, "step": 5828 }, { "epoch": 0.9994641747219066, "grad_norm": 1.4765625, "learning_rate": 1.5108558658875411e-05, "loss": 0.9253, "step": 5829 }, { "epoch": 0.9996356388108966, "grad_norm": 1.4765625, "learning_rate": 1.510700607984291e-05, "loss": 0.9752, "step": 5830 }, { "epoch": 0.9998071028998864, "grad_norm": 1.6015625, "learning_rate": 1.5105453334251708e-05, "loss": 0.9819, "step": 5831 }, { "epoch": 0.9999785669888762, "grad_norm": 1.5078125, "learning_rate": 1.5103900422152445e-05, "loss": 0.9919, "step": 5832 }, { "epoch": 1.0001500310778662, "grad_norm": 1.5390625, "learning_rate": 1.510234734359577e-05, "loss": 0.9498, "step": 5833 }, { "epoch": 1.000321495166856, "grad_norm": 1.5390625, "learning_rate": 1.5100794098632336e-05, "loss": 0.9506, "step": 5834 }, { "epoch": 1.0004929592558458, "grad_norm": 1.5546875, "learning_rate": 1.5099240687312797e-05, "loss": 1.128, "step": 5835 }, { "epoch": 1.0006644233448356, "grad_norm": 1.46875, "learning_rate": 1.5097687109687818e-05, "loss": 0.9603, "step": 5836 }, { "epoch": 1.0008358874338257, "grad_norm": 1.625, "learning_rate": 1.5096133365808068e-05, "loss": 0.935, "step": 5837 }, { "epoch": 1.0010073515228155, "grad_norm": 1.5625, "learning_rate": 1.5094579455724216e-05, "loss": 1.0625, "step": 5838 }, { "epoch": 1.0011788156118053, "grad_norm": 1.5703125, "learning_rate": 1.5093025379486945e-05, "loss": 1.0261, "step": 5839 }, { "epoch": 1.0013502797007952, "grad_norm": 1.65625, "learning_rate": 1.5091471137146938e-05, "loss": 1.0069, "step": 5840 }, { "epoch": 1.001521743789785, "grad_norm": 1.5859375, "learning_rate": 1.5089916728754882e-05, "loss": 0.9623, "step": 5841 }, { "epoch": 1.0016932078787748, "grad_norm": 1.5390625, "learning_rate": 1.5088362154361477e-05, "loss": 0.9904, "step": 5842 }, { "epoch": 1.0018646719677649, "grad_norm": 1.4296875, "learning_rate": 1.5086807414017424e-05, "loss": 0.8465, "step": 5843 }, { "epoch": 1.0020361360567547, "grad_norm": 1.6171875, "learning_rate": 1.508525250777342e-05, "loss": 1.0136, "step": 5844 }, { "epoch": 1.0022076001457445, "grad_norm": 1.6015625, "learning_rate": 1.5083697435680185e-05, "loss": 1.001, "step": 5845 }, { "epoch": 1.0023790642347343, "grad_norm": 1.5703125, "learning_rate": 1.5082142197788435e-05, "loss": 1.0401, "step": 5846 }, { "epoch": 1.0025505283237242, "grad_norm": 1.5078125, "learning_rate": 1.5080586794148887e-05, "loss": 1.0249, "step": 5847 }, { "epoch": 1.002721992412714, "grad_norm": 1.5859375, "learning_rate": 1.5079031224812276e-05, "loss": 0.9805, "step": 5848 }, { "epoch": 1.002893456501704, "grad_norm": 1.484375, "learning_rate": 1.5077475489829327e-05, "loss": 0.9111, "step": 5849 }, { "epoch": 1.0030649205906939, "grad_norm": 1.5078125, "learning_rate": 1.5075919589250785e-05, "loss": 0.9759, "step": 5850 }, { "epoch": 1.0032363846796837, "grad_norm": 1.546875, "learning_rate": 1.5074363523127389e-05, "loss": 0.9997, "step": 5851 }, { "epoch": 1.0034078487686735, "grad_norm": 1.5546875, "learning_rate": 1.5072807291509892e-05, "loss": 0.9388, "step": 5852 }, { "epoch": 1.0035793128576633, "grad_norm": 1.453125, "learning_rate": 1.5071250894449046e-05, "loss": 0.9081, "step": 5853 }, { "epoch": 1.0037507769466532, "grad_norm": 1.5, "learning_rate": 1.506969433199561e-05, "loss": 0.9728, "step": 5854 }, { "epoch": 1.0039222410356432, "grad_norm": 1.5078125, "learning_rate": 1.5068137604200353e-05, "loss": 0.9584, "step": 5855 }, { "epoch": 1.004093705124633, "grad_norm": 1.546875, "learning_rate": 1.5066580711114045e-05, "loss": 1.0177, "step": 5856 }, { "epoch": 1.0042651692136229, "grad_norm": 1.5546875, "learning_rate": 1.5065023652787459e-05, "loss": 1.0031, "step": 5857 }, { "epoch": 1.0044366333026127, "grad_norm": 1.6171875, "learning_rate": 1.506346642927138e-05, "loss": 0.9219, "step": 5858 }, { "epoch": 1.0046080973916025, "grad_norm": 1.546875, "learning_rate": 1.5061909040616592e-05, "loss": 0.9689, "step": 5859 }, { "epoch": 1.0047795614805923, "grad_norm": 1.6171875, "learning_rate": 1.5060351486873893e-05, "loss": 1.0111, "step": 5860 }, { "epoch": 1.0049510255695824, "grad_norm": 1.6484375, "learning_rate": 1.505879376809407e-05, "loss": 0.9455, "step": 5861 }, { "epoch": 1.0051224896585722, "grad_norm": 1.65625, "learning_rate": 1.5057235884327938e-05, "loss": 0.9665, "step": 5862 }, { "epoch": 1.005293953747562, "grad_norm": 1.53125, "learning_rate": 1.5055677835626296e-05, "loss": 0.9387, "step": 5863 }, { "epoch": 1.0054654178365519, "grad_norm": 1.5546875, "learning_rate": 1.5054119622039962e-05, "loss": 0.998, "step": 5864 }, { "epoch": 1.0056368819255417, "grad_norm": 1.578125, "learning_rate": 1.5052561243619756e-05, "loss": 1.047, "step": 5865 }, { "epoch": 1.0058083460145315, "grad_norm": 1.5390625, "learning_rate": 1.5051002700416505e-05, "loss": 0.9357, "step": 5866 }, { "epoch": 1.0059798101035216, "grad_norm": 1.6171875, "learning_rate": 1.5049443992481031e-05, "loss": 1.1065, "step": 5867 }, { "epoch": 1.0061512741925114, "grad_norm": 1.5390625, "learning_rate": 1.5047885119864179e-05, "loss": 1.032, "step": 5868 }, { "epoch": 1.0063227382815012, "grad_norm": 2.53125, "learning_rate": 1.504632608261678e-05, "loss": 0.9967, "step": 5869 }, { "epoch": 1.006494202370491, "grad_norm": 1.6015625, "learning_rate": 1.5044766880789686e-05, "loss": 0.9772, "step": 5870 }, { "epoch": 1.0066656664594809, "grad_norm": 1.5234375, "learning_rate": 1.504320751443375e-05, "loss": 0.9998, "step": 5871 }, { "epoch": 1.0068371305484707, "grad_norm": 1.46875, "learning_rate": 1.504164798359982e-05, "loss": 0.9344, "step": 5872 }, { "epoch": 1.0070085946374605, "grad_norm": 1.578125, "learning_rate": 1.504008828833877e-05, "loss": 1.1216, "step": 5873 }, { "epoch": 1.0071800587264506, "grad_norm": 1.59375, "learning_rate": 1.503852842870146e-05, "loss": 1.004, "step": 5874 }, { "epoch": 1.0073515228154404, "grad_norm": 1.5078125, "learning_rate": 1.503696840473876e-05, "loss": 0.9362, "step": 5875 }, { "epoch": 1.0075229869044302, "grad_norm": 1.46875, "learning_rate": 1.5035408216501559e-05, "loss": 0.9073, "step": 5876 }, { "epoch": 1.00769445099342, "grad_norm": 1.4609375, "learning_rate": 1.5033847864040734e-05, "loss": 0.9681, "step": 5877 }, { "epoch": 1.0078659150824099, "grad_norm": 1.5625, "learning_rate": 1.5032287347407173e-05, "loss": 0.9663, "step": 5878 }, { "epoch": 1.0080373791713997, "grad_norm": 1.5234375, "learning_rate": 1.5030726666651771e-05, "loss": 0.9741, "step": 5879 }, { "epoch": 1.0082088432603897, "grad_norm": 1.53125, "learning_rate": 1.5029165821825429e-05, "loss": 1.0372, "step": 5880 }, { "epoch": 1.0083803073493796, "grad_norm": 1.5625, "learning_rate": 1.502760481297905e-05, "loss": 1.0015, "step": 5881 }, { "epoch": 1.0085517714383694, "grad_norm": 1.6953125, "learning_rate": 1.5026043640163548e-05, "loss": 1.0011, "step": 5882 }, { "epoch": 1.0087232355273592, "grad_norm": 1.6640625, "learning_rate": 1.5024482303429837e-05, "loss": 1.0835, "step": 5883 }, { "epoch": 1.008894699616349, "grad_norm": 1.5390625, "learning_rate": 1.5022920802828835e-05, "loss": 0.9673, "step": 5884 }, { "epoch": 1.0090661637053389, "grad_norm": 2.34375, "learning_rate": 1.5021359138411473e-05, "loss": 1.0423, "step": 5885 }, { "epoch": 1.009237627794329, "grad_norm": 1.53125, "learning_rate": 1.5019797310228681e-05, "loss": 0.9883, "step": 5886 }, { "epoch": 1.0094090918833187, "grad_norm": 1.6484375, "learning_rate": 1.5018235318331394e-05, "loss": 1.0063, "step": 5887 }, { "epoch": 1.0095805559723086, "grad_norm": 1.53125, "learning_rate": 1.5016673162770556e-05, "loss": 1.0686, "step": 5888 }, { "epoch": 1.0097520200612984, "grad_norm": 1.5625, "learning_rate": 1.5015110843597121e-05, "loss": 0.997, "step": 5889 }, { "epoch": 1.0099234841502882, "grad_norm": 1.6328125, "learning_rate": 1.5013548360862032e-05, "loss": 0.9651, "step": 5890 }, { "epoch": 1.010094948239278, "grad_norm": 1.796875, "learning_rate": 1.5011985714616248e-05, "loss": 1.0072, "step": 5891 }, { "epoch": 1.010266412328268, "grad_norm": 1.6328125, "learning_rate": 1.501042290491074e-05, "loss": 1.0448, "step": 5892 }, { "epoch": 1.010437876417258, "grad_norm": 1.640625, "learning_rate": 1.5008859931796474e-05, "loss": 1.0269, "step": 5893 }, { "epoch": 1.0106093405062477, "grad_norm": 1.5078125, "learning_rate": 1.5007296795324427e-05, "loss": 1.0092, "step": 5894 }, { "epoch": 1.0107808045952376, "grad_norm": 1.609375, "learning_rate": 1.5005733495545572e-05, "loss": 0.9801, "step": 5895 }, { "epoch": 1.0109522686842274, "grad_norm": 1.53125, "learning_rate": 1.5004170032510896e-05, "loss": 0.9324, "step": 5896 }, { "epoch": 1.0111237327732172, "grad_norm": 1.625, "learning_rate": 1.5002606406271393e-05, "loss": 1.0021, "step": 5897 }, { "epoch": 1.0112951968622073, "grad_norm": 1.5078125, "learning_rate": 1.5001042616878057e-05, "loss": 0.9742, "step": 5898 }, { "epoch": 1.011466660951197, "grad_norm": 1.5625, "learning_rate": 1.499947866438189e-05, "loss": 0.9153, "step": 5899 }, { "epoch": 1.011638125040187, "grad_norm": 1.5625, "learning_rate": 1.4997914548833898e-05, "loss": 1.026, "step": 5900 }, { "epoch": 1.0118095891291767, "grad_norm": 1.625, "learning_rate": 1.4996350270285089e-05, "loss": 0.9569, "step": 5901 }, { "epoch": 1.0119810532181666, "grad_norm": 1.8359375, "learning_rate": 1.4994785828786487e-05, "loss": 0.9064, "step": 5902 }, { "epoch": 1.0001714640889898, "grad_norm": 1.75, "learning_rate": 1.4993221224389109e-05, "loss": 0.9062, "step": 5903 }, { "epoch": 1.0003429281779797, "grad_norm": 1.7109375, "learning_rate": 1.4991656457143984e-05, "loss": 0.8624, "step": 5904 }, { "epoch": 1.0005143922669695, "grad_norm": 1.515625, "learning_rate": 1.4990091527102145e-05, "loss": 0.8074, "step": 5905 }, { "epoch": 1.0006858563559595, "grad_norm": 1.625, "learning_rate": 1.4988526434314633e-05, "loss": 0.8797, "step": 5906 }, { "epoch": 1.0008573204449493, "grad_norm": 1.5625, "learning_rate": 1.4986961178832485e-05, "loss": 0.9083, "step": 5907 }, { "epoch": 1.0010287845339392, "grad_norm": 1.6171875, "learning_rate": 1.4985395760706756e-05, "loss": 0.8394, "step": 5908 }, { "epoch": 1.001200248622929, "grad_norm": 1.7109375, "learning_rate": 1.4983830179988497e-05, "loss": 0.7831, "step": 5909 }, { "epoch": 1.0013717127119188, "grad_norm": 1.8359375, "learning_rate": 1.4982264436728768e-05, "loss": 0.9046, "step": 5910 }, { "epoch": 1.0015431768009087, "grad_norm": 1.84375, "learning_rate": 1.4980698530978632e-05, "loss": 0.8875, "step": 5911 }, { "epoch": 1.0017146408898987, "grad_norm": 1.703125, "learning_rate": 1.4979132462789168e-05, "loss": 0.7999, "step": 5912 }, { "epoch": 1.0018861049788885, "grad_norm": 1.6015625, "learning_rate": 1.497756623221144e-05, "loss": 0.8818, "step": 5913 }, { "epoch": 1.0020575690678784, "grad_norm": 1.609375, "learning_rate": 1.4975999839296534e-05, "loss": 0.8981, "step": 5914 }, { "epoch": 1.0022290331568682, "grad_norm": 1.71875, "learning_rate": 1.4974433284095535e-05, "loss": 0.9054, "step": 5915 }, { "epoch": 1.002400497245858, "grad_norm": 1.6953125, "learning_rate": 1.4972866566659537e-05, "loss": 0.8543, "step": 5916 }, { "epoch": 1.0025719613348478, "grad_norm": 2.0, "learning_rate": 1.4971299687039634e-05, "loss": 0.9116, "step": 5917 }, { "epoch": 1.0027434254238379, "grad_norm": 1.6015625, "learning_rate": 1.4969732645286925e-05, "loss": 0.8365, "step": 5918 }, { "epoch": 1.0029148895128277, "grad_norm": 1.71875, "learning_rate": 1.496816544145252e-05, "loss": 0.9019, "step": 5919 }, { "epoch": 1.0030863536018175, "grad_norm": 1.609375, "learning_rate": 1.4966598075587532e-05, "loss": 0.8775, "step": 5920 }, { "epoch": 1.0032578176908074, "grad_norm": 1.59375, "learning_rate": 1.496503054774308e-05, "loss": 0.7859, "step": 5921 }, { "epoch": 1.0034292817797972, "grad_norm": 1.703125, "learning_rate": 1.4963462857970285e-05, "loss": 0.8872, "step": 5922 }, { "epoch": 1.003600745868787, "grad_norm": 1.7109375, "learning_rate": 1.4961895006320276e-05, "loss": 0.9129, "step": 5923 }, { "epoch": 1.003772209957777, "grad_norm": 1.5703125, "learning_rate": 1.4960326992844188e-05, "loss": 0.8566, "step": 5924 }, { "epoch": 1.0039436740467669, "grad_norm": 1.7578125, "learning_rate": 1.4958758817593153e-05, "loss": 0.9195, "step": 5925 }, { "epoch": 1.0041151381357567, "grad_norm": 1.75, "learning_rate": 1.4957190480618323e-05, "loss": 0.9396, "step": 5926 }, { "epoch": 1.0042866022247465, "grad_norm": 1.6953125, "learning_rate": 1.4955621981970844e-05, "loss": 0.8806, "step": 5927 }, { "epoch": 1.0044580663137364, "grad_norm": 1.578125, "learning_rate": 1.4954053321701871e-05, "loss": 0.8617, "step": 5928 }, { "epoch": 1.0046295304027262, "grad_norm": 1.609375, "learning_rate": 1.4952484499862563e-05, "loss": 0.8603, "step": 5929 }, { "epoch": 1.0048009944917162, "grad_norm": 1.6953125, "learning_rate": 1.4950915516504088e-05, "loss": 0.8906, "step": 5930 }, { "epoch": 1.004972458580706, "grad_norm": 1.6953125, "learning_rate": 1.4949346371677612e-05, "loss": 0.9146, "step": 5931 }, { "epoch": 1.0051439226696959, "grad_norm": 1.625, "learning_rate": 1.4947777065434315e-05, "loss": 0.9114, "step": 5932 }, { "epoch": 1.0053153867586857, "grad_norm": 1.71875, "learning_rate": 1.4946207597825375e-05, "loss": 0.8995, "step": 5933 }, { "epoch": 1.0054868508476755, "grad_norm": 1.671875, "learning_rate": 1.494463796890198e-05, "loss": 0.8308, "step": 5934 }, { "epoch": 1.0056583149366654, "grad_norm": 1.65625, "learning_rate": 1.4943068178715322e-05, "loss": 0.8965, "step": 5935 }, { "epoch": 1.0058297790256554, "grad_norm": 1.7109375, "learning_rate": 1.49414982273166e-05, "loss": 0.8661, "step": 5936 }, { "epoch": 1.0060012431146452, "grad_norm": 1.671875, "learning_rate": 1.4939928114757007e-05, "loss": 0.863, "step": 5937 }, { "epoch": 1.006172707203635, "grad_norm": 1.71875, "learning_rate": 1.4938357841087757e-05, "loss": 0.8777, "step": 5938 }, { "epoch": 1.0063441712926249, "grad_norm": 1.6640625, "learning_rate": 1.4936787406360062e-05, "loss": 0.8211, "step": 5939 }, { "epoch": 1.0065156353816147, "grad_norm": 1.6484375, "learning_rate": 1.4935216810625141e-05, "loss": 0.9032, "step": 5940 }, { "epoch": 1.0066870994706045, "grad_norm": 1.59375, "learning_rate": 1.4933646053934216e-05, "loss": 0.8251, "step": 5941 }, { "epoch": 1.0068585635595946, "grad_norm": 1.6796875, "learning_rate": 1.4932075136338515e-05, "loss": 0.8489, "step": 5942 }, { "epoch": 1.0070300276485844, "grad_norm": 1.6015625, "learning_rate": 1.493050405788927e-05, "loss": 0.8285, "step": 5943 }, { "epoch": 1.0072014917375742, "grad_norm": 1.4921875, "learning_rate": 1.492893281863772e-05, "loss": 0.8343, "step": 5944 }, { "epoch": 1.007372955826564, "grad_norm": 1.5703125, "learning_rate": 1.4927361418635111e-05, "loss": 0.8195, "step": 5945 }, { "epoch": 1.0075444199155539, "grad_norm": 1.609375, "learning_rate": 1.4925789857932693e-05, "loss": 0.8629, "step": 5946 }, { "epoch": 1.0077158840045437, "grad_norm": 1.5859375, "learning_rate": 1.492421813658172e-05, "loss": 0.7921, "step": 5947 }, { "epoch": 1.0078873480935338, "grad_norm": 1.53125, "learning_rate": 1.4922646254633446e-05, "loss": 0.8804, "step": 5948 }, { "epoch": 1.0080588121825236, "grad_norm": 1.640625, "learning_rate": 1.4921074212139141e-05, "loss": 0.8934, "step": 5949 }, { "epoch": 1.0082302762715134, "grad_norm": 1.6171875, "learning_rate": 1.4919502009150077e-05, "loss": 0.8651, "step": 5950 }, { "epoch": 1.0084017403605032, "grad_norm": 1.5546875, "learning_rate": 1.4917929645717528e-05, "loss": 0.8723, "step": 5951 }, { "epoch": 1.008573204449493, "grad_norm": 1.6328125, "learning_rate": 1.4916357121892775e-05, "loss": 0.8532, "step": 5952 }, { "epoch": 1.0087446685384829, "grad_norm": 1.609375, "learning_rate": 1.49147844377271e-05, "loss": 0.8857, "step": 5953 }, { "epoch": 1.008916132627473, "grad_norm": 1.65625, "learning_rate": 1.4913211593271802e-05, "loss": 0.8886, "step": 5954 }, { "epoch": 1.0090875967164628, "grad_norm": 1.7734375, "learning_rate": 1.4911638588578167e-05, "loss": 0.8719, "step": 5955 }, { "epoch": 1.0092590608054526, "grad_norm": 1.65625, "learning_rate": 1.4910065423697504e-05, "loss": 0.9048, "step": 5956 }, { "epoch": 1.0094305248944424, "grad_norm": 1.5859375, "learning_rate": 1.4908492098681118e-05, "loss": 0.7576, "step": 5957 }, { "epoch": 1.0096019889834322, "grad_norm": 1.671875, "learning_rate": 1.4906918613580322e-05, "loss": 0.8747, "step": 5958 }, { "epoch": 1.009773453072422, "grad_norm": 1.546875, "learning_rate": 1.490534496844643e-05, "loss": 0.8154, "step": 5959 }, { "epoch": 1.009944917161412, "grad_norm": 1.6640625, "learning_rate": 1.4903771163330766e-05, "loss": 0.7974, "step": 5960 }, { "epoch": 1.010116381250402, "grad_norm": 1.7890625, "learning_rate": 1.4902197198284663e-05, "loss": 0.8984, "step": 5961 }, { "epoch": 1.0102878453393918, "grad_norm": 1.5234375, "learning_rate": 1.4900623073359445e-05, "loss": 0.8233, "step": 5962 }, { "epoch": 1.0104593094283816, "grad_norm": 1.5234375, "learning_rate": 1.4899048788606456e-05, "loss": 0.8013, "step": 5963 }, { "epoch": 1.0106307735173714, "grad_norm": 1.6015625, "learning_rate": 1.4897474344077041e-05, "loss": 0.8674, "step": 5964 }, { "epoch": 1.0108022376063612, "grad_norm": 1.6171875, "learning_rate": 1.4895899739822542e-05, "loss": 0.8388, "step": 5965 }, { "epoch": 1.0109737016953513, "grad_norm": 1.5234375, "learning_rate": 1.4894324975894314e-05, "loss": 0.8599, "step": 5966 }, { "epoch": 1.011145165784341, "grad_norm": 1.6796875, "learning_rate": 1.4892750052343723e-05, "loss": 0.8238, "step": 5967 }, { "epoch": 1.011316629873331, "grad_norm": 1.5625, "learning_rate": 1.4891174969222127e-05, "loss": 0.8735, "step": 5968 }, { "epoch": 1.0114880939623208, "grad_norm": 1.546875, "learning_rate": 1.4889599726580898e-05, "loss": 0.7963, "step": 5969 }, { "epoch": 1.0116595580513106, "grad_norm": 1.5703125, "learning_rate": 1.4888024324471406e-05, "loss": 0.9024, "step": 5970 }, { "epoch": 1.0118310221403004, "grad_norm": 1.546875, "learning_rate": 1.4886448762945035e-05, "loss": 0.8448, "step": 5971 }, { "epoch": 1.0120024862292905, "grad_norm": 1.578125, "learning_rate": 1.4884873042053171e-05, "loss": 0.8464, "step": 5972 }, { "epoch": 1.0121739503182803, "grad_norm": 1.5546875, "learning_rate": 1.4883297161847204e-05, "loss": 0.8066, "step": 5973 }, { "epoch": 1.01234541440727, "grad_norm": 1.625, "learning_rate": 1.4881721122378523e-05, "loss": 0.8588, "step": 5974 }, { "epoch": 1.01251687849626, "grad_norm": 1.640625, "learning_rate": 1.4880144923698537e-05, "loss": 0.8857, "step": 5975 }, { "epoch": 1.0126883425852498, "grad_norm": 1.6171875, "learning_rate": 1.4878568565858644e-05, "loss": 0.8952, "step": 5976 }, { "epoch": 1.0128598066742396, "grad_norm": 1.6875, "learning_rate": 1.4876992048910264e-05, "loss": 0.8991, "step": 5977 }, { "epoch": 1.0130312707632296, "grad_norm": 1.6171875, "learning_rate": 1.4875415372904804e-05, "loss": 0.842, "step": 5978 }, { "epoch": 1.0132027348522195, "grad_norm": 1.5625, "learning_rate": 1.4873838537893693e-05, "loss": 0.9149, "step": 5979 }, { "epoch": 1.0133741989412093, "grad_norm": 1.609375, "learning_rate": 1.4872261543928353e-05, "loss": 0.8517, "step": 5980 }, { "epoch": 1.013545663030199, "grad_norm": 1.6015625, "learning_rate": 1.4870684391060217e-05, "loss": 0.8273, "step": 5981 }, { "epoch": 1.013717127119189, "grad_norm": 1.5703125, "learning_rate": 1.4869107079340721e-05, "loss": 0.8642, "step": 5982 }, { "epoch": 1.0138885912081788, "grad_norm": 1.625, "learning_rate": 1.486752960882131e-05, "loss": 0.8797, "step": 5983 }, { "epoch": 1.0140600552971688, "grad_norm": 1.6328125, "learning_rate": 1.4865951979553426e-05, "loss": 0.8991, "step": 5984 }, { "epoch": 1.0142315193861586, "grad_norm": 1.65625, "learning_rate": 1.4864374191588522e-05, "loss": 0.8193, "step": 5985 }, { "epoch": 1.0144029834751485, "grad_norm": 1.6640625, "learning_rate": 1.4862796244978061e-05, "loss": 0.8228, "step": 5986 }, { "epoch": 1.0145744475641383, "grad_norm": 1.5546875, "learning_rate": 1.4861218139773506e-05, "loss": 0.7662, "step": 5987 }, { "epoch": 1.014745911653128, "grad_norm": 1.609375, "learning_rate": 1.485963987602632e-05, "loss": 0.9135, "step": 5988 }, { "epoch": 1.014917375742118, "grad_norm": 1.6015625, "learning_rate": 1.4858061453787977e-05, "loss": 0.8718, "step": 5989 }, { "epoch": 1.015088839831108, "grad_norm": 1.640625, "learning_rate": 1.4856482873109956e-05, "loss": 0.8989, "step": 5990 }, { "epoch": 1.0152603039200978, "grad_norm": 1.6484375, "learning_rate": 1.4854904134043741e-05, "loss": 0.7878, "step": 5991 }, { "epoch": 1.0154317680090876, "grad_norm": 1.7265625, "learning_rate": 1.4853325236640821e-05, "loss": 0.886, "step": 5992 }, { "epoch": 1.0156032320980775, "grad_norm": 1.6796875, "learning_rate": 1.4851746180952687e-05, "loss": 0.849, "step": 5993 }, { "epoch": 1.0157746961870673, "grad_norm": 1.5859375, "learning_rate": 1.4850166967030842e-05, "loss": 0.8621, "step": 5994 }, { "epoch": 1.015946160276057, "grad_norm": 1.6484375, "learning_rate": 1.4848587594926787e-05, "loss": 0.9694, "step": 5995 }, { "epoch": 1.0161176243650472, "grad_norm": 1.6328125, "learning_rate": 1.4847008064692035e-05, "loss": 0.932, "step": 5996 }, { "epoch": 1.016289088454037, "grad_norm": 1.6796875, "learning_rate": 1.4845428376378094e-05, "loss": 0.8344, "step": 5997 }, { "epoch": 1.0164605525430268, "grad_norm": 1.59375, "learning_rate": 1.484384853003649e-05, "loss": 0.8891, "step": 5998 }, { "epoch": 1.0166320166320166, "grad_norm": 1.6640625, "learning_rate": 1.4842268525718747e-05, "loss": 0.9256, "step": 5999 }, { "epoch": 1.0168034807210065, "grad_norm": 1.6484375, "learning_rate": 1.484068836347639e-05, "loss": 0.822, "step": 6000 }, { "epoch": 1.0169749448099963, "grad_norm": 1.5859375, "learning_rate": 1.4839108043360958e-05, "loss": 0.8274, "step": 6001 }, { "epoch": 1.017146408898986, "grad_norm": 1.6328125, "learning_rate": 1.483752756542399e-05, "loss": 0.8475, "step": 6002 }, { "epoch": 1.0173178729879762, "grad_norm": 1.546875, "learning_rate": 1.4835946929717031e-05, "loss": 0.797, "step": 6003 }, { "epoch": 1.017489337076966, "grad_norm": 1.6640625, "learning_rate": 1.4834366136291633e-05, "loss": 0.8124, "step": 6004 }, { "epoch": 1.0176608011659558, "grad_norm": 1.7265625, "learning_rate": 1.483278518519935e-05, "loss": 0.8464, "step": 6005 }, { "epoch": 1.0178322652549456, "grad_norm": 1.6796875, "learning_rate": 1.4831204076491744e-05, "loss": 0.8662, "step": 6006 }, { "epoch": 1.0180037293439355, "grad_norm": 1.6953125, "learning_rate": 1.4829622810220383e-05, "loss": 0.8657, "step": 6007 }, { "epoch": 1.0181751934329253, "grad_norm": 1.625, "learning_rate": 1.482804138643683e-05, "loss": 0.8258, "step": 6008 }, { "epoch": 1.0183466575219153, "grad_norm": 1.6875, "learning_rate": 1.4826459805192673e-05, "loss": 0.8241, "step": 6009 }, { "epoch": 1.0185181216109052, "grad_norm": 1.703125, "learning_rate": 1.4824878066539485e-05, "loss": 0.8901, "step": 6010 }, { "epoch": 1.018689585699895, "grad_norm": 1.7578125, "learning_rate": 1.4823296170528854e-05, "loss": 0.8794, "step": 6011 }, { "epoch": 1.0188610497888848, "grad_norm": 1.6015625, "learning_rate": 1.4821714117212369e-05, "loss": 0.8573, "step": 6012 }, { "epoch": 1.0190325138778746, "grad_norm": 1.625, "learning_rate": 1.4820131906641631e-05, "loss": 0.7691, "step": 6013 }, { "epoch": 1.0192039779668645, "grad_norm": 1.8125, "learning_rate": 1.481854953886824e-05, "loss": 0.9168, "step": 6014 }, { "epoch": 1.0193754420558545, "grad_norm": 1.65625, "learning_rate": 1.4816967013943808e-05, "loss": 0.8843, "step": 6015 }, { "epoch": 1.0195469061448443, "grad_norm": 1.546875, "learning_rate": 1.481538433191994e-05, "loss": 0.8351, "step": 6016 }, { "epoch": 1.0197183702338342, "grad_norm": 1.578125, "learning_rate": 1.4813801492848256e-05, "loss": 0.8354, "step": 6017 }, { "epoch": 1.019889834322824, "grad_norm": 1.65625, "learning_rate": 1.4812218496780378e-05, "loss": 1.0101, "step": 6018 }, { "epoch": 1.0200612984118138, "grad_norm": 1.6640625, "learning_rate": 1.4810635343767935e-05, "loss": 0.8163, "step": 6019 }, { "epoch": 1.0202327625008036, "grad_norm": 1.5703125, "learning_rate": 1.4809052033862555e-05, "loss": 0.781, "step": 6020 }, { "epoch": 1.0204042265897937, "grad_norm": 1.609375, "learning_rate": 1.480746856711588e-05, "loss": 0.8783, "step": 6021 }, { "epoch": 1.0205756906787835, "grad_norm": 1.5859375, "learning_rate": 1.4805884943579551e-05, "loss": 0.8485, "step": 6022 }, { "epoch": 1.0207471547677733, "grad_norm": 1.7265625, "learning_rate": 1.4804301163305219e-05, "loss": 0.9251, "step": 6023 }, { "epoch": 1.0209186188567632, "grad_norm": 1.6953125, "learning_rate": 1.4802717226344533e-05, "loss": 0.8207, "step": 6024 }, { "epoch": 1.021090082945753, "grad_norm": 1.6953125, "learning_rate": 1.4801133132749157e-05, "loss": 0.8953, "step": 6025 }, { "epoch": 1.0212615470347428, "grad_norm": 2.15625, "learning_rate": 1.4799548882570744e-05, "loss": 0.8914, "step": 6026 }, { "epoch": 1.0214330111237329, "grad_norm": 1.6796875, "learning_rate": 1.4797964475860975e-05, "loss": 0.8625, "step": 6027 }, { "epoch": 1.0216044752127227, "grad_norm": 1.6328125, "learning_rate": 1.4796379912671514e-05, "loss": 0.8708, "step": 6028 }, { "epoch": 1.0217759393017125, "grad_norm": 1.6875, "learning_rate": 1.479479519305404e-05, "loss": 0.8928, "step": 6029 }, { "epoch": 1.0219474033907023, "grad_norm": 1.5078125, "learning_rate": 1.4793210317060243e-05, "loss": 0.8637, "step": 6030 }, { "epoch": 1.0221188674796922, "grad_norm": 1.625, "learning_rate": 1.4791625284741807e-05, "loss": 0.8429, "step": 6031 }, { "epoch": 1.022290331568682, "grad_norm": 1.53125, "learning_rate": 1.4790040096150425e-05, "loss": 0.7881, "step": 6032 }, { "epoch": 1.022461795657672, "grad_norm": 1.6484375, "learning_rate": 1.4788454751337799e-05, "loss": 0.9203, "step": 6033 }, { "epoch": 1.0226332597466619, "grad_norm": 1.6328125, "learning_rate": 1.4786869250355632e-05, "loss": 0.8146, "step": 6034 }, { "epoch": 1.0228047238356517, "grad_norm": 1.5859375, "learning_rate": 1.4785283593255633e-05, "loss": 0.7602, "step": 6035 }, { "epoch": 1.0229761879246415, "grad_norm": 1.6484375, "learning_rate": 1.4783697780089517e-05, "loss": 0.8346, "step": 6036 }, { "epoch": 1.0231476520136313, "grad_norm": 1.671875, "learning_rate": 1.4782111810909002e-05, "loss": 0.9469, "step": 6037 }, { "epoch": 1.0233191161026212, "grad_norm": 1.6640625, "learning_rate": 1.4780525685765813e-05, "loss": 0.9122, "step": 6038 }, { "epoch": 1.0234905801916112, "grad_norm": 1.734375, "learning_rate": 1.4778939404711682e-05, "loss": 0.8712, "step": 6039 }, { "epoch": 1.023662044280601, "grad_norm": 1.7734375, "learning_rate": 1.4777352967798335e-05, "loss": 0.9272, "step": 6040 }, { "epoch": 1.0238335083695909, "grad_norm": 1.6796875, "learning_rate": 1.4775766375077523e-05, "loss": 0.885, "step": 6041 }, { "epoch": 1.0240049724585807, "grad_norm": 1.6953125, "learning_rate": 1.4774179626600984e-05, "loss": 0.8927, "step": 6042 }, { "epoch": 1.0241764365475705, "grad_norm": 1.578125, "learning_rate": 1.4772592722420468e-05, "loss": 0.7589, "step": 6043 }, { "epoch": 1.0243479006365603, "grad_norm": 1.7890625, "learning_rate": 1.4771005662587737e-05, "loss": 0.7592, "step": 6044 }, { "epoch": 1.0245193647255504, "grad_norm": 1.609375, "learning_rate": 1.4769418447154541e-05, "loss": 0.7909, "step": 6045 }, { "epoch": 1.0246908288145402, "grad_norm": 1.609375, "learning_rate": 1.4767831076172649e-05, "loss": 0.8449, "step": 6046 }, { "epoch": 1.02486229290353, "grad_norm": 1.5390625, "learning_rate": 1.4766243549693836e-05, "loss": 0.7959, "step": 6047 }, { "epoch": 1.0250337569925199, "grad_norm": 1.6796875, "learning_rate": 1.4764655867769869e-05, "loss": 0.8639, "step": 6048 }, { "epoch": 1.0252052210815097, "grad_norm": 1.6328125, "learning_rate": 1.4763068030452532e-05, "loss": 0.8417, "step": 6049 }, { "epoch": 1.0253766851704995, "grad_norm": 1.546875, "learning_rate": 1.476148003779361e-05, "loss": 0.8112, "step": 6050 }, { "epoch": 1.0255481492594896, "grad_norm": 1.6640625, "learning_rate": 1.4759891889844895e-05, "loss": 0.845, "step": 6051 }, { "epoch": 1.0257196133484794, "grad_norm": 1.5703125, "learning_rate": 1.4758303586658183e-05, "loss": 0.7849, "step": 6052 }, { "epoch": 1.0258910774374692, "grad_norm": 1.609375, "learning_rate": 1.4756715128285271e-05, "loss": 0.7499, "step": 6053 }, { "epoch": 1.026062541526459, "grad_norm": 1.609375, "learning_rate": 1.4755126514777965e-05, "loss": 0.8702, "step": 6054 }, { "epoch": 1.0262340056154489, "grad_norm": 1.7578125, "learning_rate": 1.4753537746188081e-05, "loss": 0.8347, "step": 6055 }, { "epoch": 1.0264054697044387, "grad_norm": 1.59375, "learning_rate": 1.475194882256743e-05, "loss": 0.8415, "step": 6056 }, { "epoch": 1.0265769337934287, "grad_norm": 1.6640625, "learning_rate": 1.4750359743967833e-05, "loss": 0.8358, "step": 6057 }, { "epoch": 1.0267483978824186, "grad_norm": 1.6484375, "learning_rate": 1.4748770510441115e-05, "loss": 0.8432, "step": 6058 }, { "epoch": 1.0269198619714084, "grad_norm": 1.6328125, "learning_rate": 1.4747181122039109e-05, "loss": 0.9186, "step": 6059 }, { "epoch": 1.0270913260603982, "grad_norm": 1.671875, "learning_rate": 1.4745591578813651e-05, "loss": 0.8913, "step": 6060 }, { "epoch": 1.027262790149388, "grad_norm": 1.671875, "learning_rate": 1.4744001880816581e-05, "loss": 0.8778, "step": 6061 }, { "epoch": 1.0274342542383779, "grad_norm": 1.78125, "learning_rate": 1.4742412028099748e-05, "loss": 0.9405, "step": 6062 }, { "epoch": 1.027605718327368, "grad_norm": 1.6953125, "learning_rate": 1.4740822020714999e-05, "loss": 0.8893, "step": 6063 }, { "epoch": 1.0277771824163577, "grad_norm": 1.6328125, "learning_rate": 1.473923185871419e-05, "loss": 0.8459, "step": 6064 }, { "epoch": 1.0279486465053476, "grad_norm": 1.6953125, "learning_rate": 1.4737641542149184e-05, "loss": 0.867, "step": 6065 }, { "epoch": 1.0281201105943374, "grad_norm": 1.6875, "learning_rate": 1.4736051071071848e-05, "loss": 0.8569, "step": 6066 }, { "epoch": 1.0282915746833272, "grad_norm": 1.6640625, "learning_rate": 1.4734460445534053e-05, "loss": 0.8905, "step": 6067 }, { "epoch": 1.028463038772317, "grad_norm": 1.5546875, "learning_rate": 1.4732869665587673e-05, "loss": 0.8276, "step": 6068 }, { "epoch": 1.028634502861307, "grad_norm": 1.7265625, "learning_rate": 1.4731278731284591e-05, "loss": 0.904, "step": 6069 }, { "epoch": 1.028805966950297, "grad_norm": 1.6171875, "learning_rate": 1.4729687642676693e-05, "loss": 0.8826, "step": 6070 }, { "epoch": 1.0289774310392867, "grad_norm": 1.6875, "learning_rate": 1.4728096399815873e-05, "loss": 0.8896, "step": 6071 }, { "epoch": 1.0291488951282766, "grad_norm": 1.8046875, "learning_rate": 1.4726505002754023e-05, "loss": 0.8389, "step": 6072 }, { "epoch": 1.0293203592172664, "grad_norm": 1.6171875, "learning_rate": 1.4724913451543048e-05, "loss": 0.8424, "step": 6073 }, { "epoch": 1.0294918233062562, "grad_norm": 1.7421875, "learning_rate": 1.4723321746234852e-05, "loss": 0.9061, "step": 6074 }, { "epoch": 1.0296632873952463, "grad_norm": 1.59375, "learning_rate": 1.4721729886881346e-05, "loss": 0.8773, "step": 6075 }, { "epoch": 1.029834751484236, "grad_norm": 1.640625, "learning_rate": 1.4720137873534451e-05, "loss": 0.8438, "step": 6076 }, { "epoch": 1.030006215573226, "grad_norm": 1.6640625, "learning_rate": 1.4718545706246083e-05, "loss": 0.8368, "step": 6077 }, { "epoch": 1.0301776796622157, "grad_norm": 1.6171875, "learning_rate": 1.471695338506817e-05, "loss": 0.9028, "step": 6078 }, { "epoch": 1.0303491437512056, "grad_norm": 1.625, "learning_rate": 1.4715360910052648e-05, "loss": 0.8155, "step": 6079 }, { "epoch": 1.0305206078401954, "grad_norm": 1.6640625, "learning_rate": 1.4713768281251448e-05, "loss": 0.9443, "step": 6080 }, { "epoch": 1.0306920719291854, "grad_norm": 1.609375, "learning_rate": 1.4712175498716517e-05, "loss": 0.8505, "step": 6081 }, { "epoch": 1.0308635360181753, "grad_norm": 1.5625, "learning_rate": 1.4710582562499796e-05, "loss": 0.8387, "step": 6082 }, { "epoch": 1.031035000107165, "grad_norm": 1.5703125, "learning_rate": 1.4708989472653239e-05, "loss": 0.7899, "step": 6083 }, { "epoch": 1.031206464196155, "grad_norm": 1.546875, "learning_rate": 1.4707396229228803e-05, "loss": 0.8112, "step": 6084 }, { "epoch": 1.0313779282851447, "grad_norm": 1.625, "learning_rate": 1.4705802832278453e-05, "loss": 0.9203, "step": 6085 }, { "epoch": 1.0315493923741346, "grad_norm": 1.625, "learning_rate": 1.4704209281854149e-05, "loss": 0.8632, "step": 6086 }, { "epoch": 1.0317208564631244, "grad_norm": 1.703125, "learning_rate": 1.4702615578007863e-05, "loss": 0.9001, "step": 6087 }, { "epoch": 1.0318923205521144, "grad_norm": 1.625, "learning_rate": 1.470102172079158e-05, "loss": 0.7713, "step": 6088 }, { "epoch": 1.0320637846411043, "grad_norm": 1.625, "learning_rate": 1.4699427710257275e-05, "loss": 0.8201, "step": 6089 }, { "epoch": 1.032235248730094, "grad_norm": 1.671875, "learning_rate": 1.4697833546456935e-05, "loss": 0.915, "step": 6090 }, { "epoch": 1.032406712819084, "grad_norm": 1.6015625, "learning_rate": 1.4696239229442553e-05, "loss": 0.8307, "step": 6091 }, { "epoch": 1.0325781769080737, "grad_norm": 1.6171875, "learning_rate": 1.4694644759266126e-05, "loss": 0.8525, "step": 6092 }, { "epoch": 1.0327496409970638, "grad_norm": 1.625, "learning_rate": 1.4693050135979654e-05, "loss": 0.784, "step": 6093 }, { "epoch": 1.0329211050860536, "grad_norm": 1.578125, "learning_rate": 1.4691455359635143e-05, "loss": 0.8912, "step": 6094 }, { "epoch": 1.0330925691750434, "grad_norm": 1.6796875, "learning_rate": 1.4689860430284608e-05, "loss": 0.8415, "step": 6095 }, { "epoch": 1.0332640332640333, "grad_norm": 1.640625, "learning_rate": 1.4688265347980065e-05, "loss": 0.9884, "step": 6096 }, { "epoch": 1.033435497353023, "grad_norm": 1.609375, "learning_rate": 1.4686670112773534e-05, "loss": 0.8707, "step": 6097 }, { "epoch": 1.033606961442013, "grad_norm": 1.4765625, "learning_rate": 1.4685074724717041e-05, "loss": 0.7489, "step": 6098 }, { "epoch": 1.0337784255310027, "grad_norm": 1.640625, "learning_rate": 1.468347918386262e-05, "loss": 0.866, "step": 6099 }, { "epoch": 1.0339498896199928, "grad_norm": 1.6328125, "learning_rate": 1.4681883490262308e-05, "loss": 0.8207, "step": 6100 }, { "epoch": 1.0341213537089826, "grad_norm": 1.609375, "learning_rate": 1.4680287643968143e-05, "loss": 0.7684, "step": 6101 }, { "epoch": 1.0342928177979724, "grad_norm": 1.7890625, "learning_rate": 1.4678691645032175e-05, "loss": 0.9001, "step": 6102 }, { "epoch": 1.0344642818869623, "grad_norm": 1.6484375, "learning_rate": 1.4677095493506452e-05, "loss": 0.9167, "step": 6103 }, { "epoch": 1.034635745975952, "grad_norm": 1.640625, "learning_rate": 1.4675499189443035e-05, "loss": 0.8546, "step": 6104 }, { "epoch": 1.034807210064942, "grad_norm": 1.6015625, "learning_rate": 1.4673902732893981e-05, "loss": 0.924, "step": 6105 }, { "epoch": 1.034978674153932, "grad_norm": 1.703125, "learning_rate": 1.4672306123911359e-05, "loss": 0.8848, "step": 6106 }, { "epoch": 1.0351501382429218, "grad_norm": 1.6640625, "learning_rate": 1.467070936254724e-05, "loss": 0.8747, "step": 6107 }, { "epoch": 1.0353216023319116, "grad_norm": 1.65625, "learning_rate": 1.4669112448853702e-05, "loss": 0.8458, "step": 6108 }, { "epoch": 1.0354930664209014, "grad_norm": 1.6015625, "learning_rate": 1.4667515382882825e-05, "loss": 0.899, "step": 6109 }, { "epoch": 1.0356645305098913, "grad_norm": 1.5078125, "learning_rate": 1.4665918164686692e-05, "loss": 0.8748, "step": 6110 }, { "epoch": 1.035835994598881, "grad_norm": 1.6875, "learning_rate": 1.4664320794317399e-05, "loss": 0.8575, "step": 6111 }, { "epoch": 1.0360074586878711, "grad_norm": 1.6875, "learning_rate": 1.4662723271827043e-05, "loss": 0.8943, "step": 6112 }, { "epoch": 1.036178922776861, "grad_norm": 1.640625, "learning_rate": 1.466112559726772e-05, "loss": 0.8982, "step": 6113 }, { "epoch": 1.0363503868658508, "grad_norm": 1.703125, "learning_rate": 1.4659527770691541e-05, "loss": 0.9443, "step": 6114 }, { "epoch": 1.0365218509548406, "grad_norm": 1.71875, "learning_rate": 1.4657929792150611e-05, "loss": 0.8187, "step": 6115 }, { "epoch": 1.0366933150438304, "grad_norm": 1.6015625, "learning_rate": 1.4656331661697055e-05, "loss": 0.8568, "step": 6116 }, { "epoch": 1.0368647791328203, "grad_norm": 1.5703125, "learning_rate": 1.4654733379382986e-05, "loss": 0.8306, "step": 6117 }, { "epoch": 1.0370362432218103, "grad_norm": 1.6953125, "learning_rate": 1.4653134945260536e-05, "loss": 0.9314, "step": 6118 }, { "epoch": 1.0372077073108001, "grad_norm": 1.71875, "learning_rate": 1.4651536359381835e-05, "loss": 0.8781, "step": 6119 }, { "epoch": 1.03737917139979, "grad_norm": 1.7109375, "learning_rate": 1.4649937621799016e-05, "loss": 0.9768, "step": 6120 }, { "epoch": 1.0375506354887798, "grad_norm": 1.59375, "learning_rate": 1.464833873256422e-05, "loss": 0.7535, "step": 6121 }, { "epoch": 1.0377220995777696, "grad_norm": 1.546875, "learning_rate": 1.4646739691729592e-05, "loss": 0.7748, "step": 6122 }, { "epoch": 1.0378935636667594, "grad_norm": 1.671875, "learning_rate": 1.4645140499347287e-05, "loss": 0.8655, "step": 6123 }, { "epoch": 1.0380650277557495, "grad_norm": 1.5625, "learning_rate": 1.4643541155469459e-05, "loss": 0.8241, "step": 6124 }, { "epoch": 1.0382364918447393, "grad_norm": 1.515625, "learning_rate": 1.4641941660148267e-05, "loss": 0.8163, "step": 6125 }, { "epoch": 1.0384079559337291, "grad_norm": 1.71875, "learning_rate": 1.4640342013435875e-05, "loss": 0.9213, "step": 6126 }, { "epoch": 1.038579420022719, "grad_norm": 1.625, "learning_rate": 1.463874221538446e-05, "loss": 0.7863, "step": 6127 }, { "epoch": 1.0387508841117088, "grad_norm": 1.7890625, "learning_rate": 1.4637142266046192e-05, "loss": 0.8414, "step": 6128 }, { "epoch": 1.0389223482006986, "grad_norm": 1.6640625, "learning_rate": 1.4635542165473254e-05, "loss": 0.9, "step": 6129 }, { "epoch": 1.0390938122896887, "grad_norm": 1.703125, "learning_rate": 1.4633941913717826e-05, "loss": 0.8092, "step": 6130 }, { "epoch": 1.0392652763786785, "grad_norm": 1.6484375, "learning_rate": 1.4632341510832109e-05, "loss": 0.9552, "step": 6131 }, { "epoch": 1.0394367404676683, "grad_norm": 1.6015625, "learning_rate": 1.4630740956868289e-05, "loss": 0.7918, "step": 6132 }, { "epoch": 1.0396082045566581, "grad_norm": 1.640625, "learning_rate": 1.4629140251878567e-05, "loss": 0.8943, "step": 6133 }, { "epoch": 1.039779668645648, "grad_norm": 1.6640625, "learning_rate": 1.4627539395915146e-05, "loss": 0.8804, "step": 6134 }, { "epoch": 1.0399511327346378, "grad_norm": 1.7578125, "learning_rate": 1.4625938389030246e-05, "loss": 0.8774, "step": 6135 }, { "epoch": 1.0401225968236278, "grad_norm": 1.6171875, "learning_rate": 1.4624337231276071e-05, "loss": 0.7932, "step": 6136 }, { "epoch": 1.0402940609126177, "grad_norm": 1.59375, "learning_rate": 1.4622735922704849e-05, "loss": 0.8117, "step": 6137 }, { "epoch": 1.0404655250016075, "grad_norm": 1.703125, "learning_rate": 1.46211344633688e-05, "loss": 0.9195, "step": 6138 }, { "epoch": 1.0406369890905973, "grad_norm": 1.734375, "learning_rate": 1.4619532853320153e-05, "loss": 0.8734, "step": 6139 }, { "epoch": 1.0408084531795871, "grad_norm": 1.5859375, "learning_rate": 1.4617931092611146e-05, "loss": 0.7903, "step": 6140 }, { "epoch": 1.040979917268577, "grad_norm": 1.8359375, "learning_rate": 1.4616329181294016e-05, "loss": 0.8192, "step": 6141 }, { "epoch": 1.041151381357567, "grad_norm": 1.609375, "learning_rate": 1.4614727119421007e-05, "loss": 0.8648, "step": 6142 }, { "epoch": 1.0413228454465568, "grad_norm": 1.7265625, "learning_rate": 1.4613124907044369e-05, "loss": 0.8881, "step": 6143 }, { "epoch": 1.0414943095355467, "grad_norm": 1.71875, "learning_rate": 1.4611522544216357e-05, "loss": 0.8722, "step": 6144 }, { "epoch": 1.0416657736245365, "grad_norm": 1.59375, "learning_rate": 1.460992003098923e-05, "loss": 0.81, "step": 6145 }, { "epoch": 1.0418372377135263, "grad_norm": 1.6484375, "learning_rate": 1.460831736741525e-05, "loss": 0.8484, "step": 6146 }, { "epoch": 1.0420087018025161, "grad_norm": 1.6953125, "learning_rate": 1.4606714553546689e-05, "loss": 0.8397, "step": 6147 }, { "epoch": 1.0421801658915062, "grad_norm": 1.671875, "learning_rate": 1.460511158943582e-05, "loss": 0.8013, "step": 6148 }, { "epoch": 1.042351629980496, "grad_norm": 1.734375, "learning_rate": 1.460350847513492e-05, "loss": 0.9249, "step": 6149 }, { "epoch": 1.0425230940694858, "grad_norm": 1.5625, "learning_rate": 1.4601905210696273e-05, "loss": 0.843, "step": 6150 }, { "epoch": 1.0426945581584757, "grad_norm": 1.8984375, "learning_rate": 1.4600301796172167e-05, "loss": 0.9169, "step": 6151 }, { "epoch": 1.0428660222474655, "grad_norm": 1.6640625, "learning_rate": 1.4598698231614896e-05, "loss": 0.8661, "step": 6152 }, { "epoch": 1.0430374863364553, "grad_norm": 1.625, "learning_rate": 1.459709451707676e-05, "loss": 0.8222, "step": 6153 }, { "epoch": 1.0432089504254454, "grad_norm": 1.578125, "learning_rate": 1.459549065261006e-05, "loss": 0.8195, "step": 6154 }, { "epoch": 1.0433804145144352, "grad_norm": 1.6328125, "learning_rate": 1.4593886638267104e-05, "loss": 0.9094, "step": 6155 }, { "epoch": 1.043551878603425, "grad_norm": 1.5859375, "learning_rate": 1.4592282474100207e-05, "loss": 0.8695, "step": 6156 }, { "epoch": 1.0437233426924148, "grad_norm": 1.609375, "learning_rate": 1.4590678160161686e-05, "loss": 0.8661, "step": 6157 }, { "epoch": 1.0438948067814047, "grad_norm": 1.5625, "learning_rate": 1.4589073696503864e-05, "loss": 0.8281, "step": 6158 }, { "epoch": 1.0440662708703945, "grad_norm": 1.75, "learning_rate": 1.4587469083179065e-05, "loss": 0.9914, "step": 6159 }, { "epoch": 1.0442377349593845, "grad_norm": 1.6796875, "learning_rate": 1.4585864320239629e-05, "loss": 0.8431, "step": 6160 }, { "epoch": 1.0444091990483744, "grad_norm": 1.6953125, "learning_rate": 1.4584259407737884e-05, "loss": 0.8542, "step": 6161 }, { "epoch": 1.0445806631373642, "grad_norm": 1.609375, "learning_rate": 1.4582654345726177e-05, "loss": 0.8523, "step": 6162 }, { "epoch": 1.044752127226354, "grad_norm": 1.625, "learning_rate": 1.4581049134256857e-05, "loss": 0.9015, "step": 6163 }, { "epoch": 1.0449235913153438, "grad_norm": 1.640625, "learning_rate": 1.4579443773382273e-05, "loss": 0.8365, "step": 6164 }, { "epoch": 1.0450950554043337, "grad_norm": 1.609375, "learning_rate": 1.4577838263154787e-05, "loss": 0.8479, "step": 6165 }, { "epoch": 1.0452665194933237, "grad_norm": 1.65625, "learning_rate": 1.4576232603626754e-05, "loss": 0.8198, "step": 6166 }, { "epoch": 1.0454379835823135, "grad_norm": 1.65625, "learning_rate": 1.4574626794850541e-05, "loss": 0.8248, "step": 6167 }, { "epoch": 1.0456094476713034, "grad_norm": 1.625, "learning_rate": 1.4573020836878524e-05, "loss": 0.8216, "step": 6168 }, { "epoch": 1.0457809117602932, "grad_norm": 1.609375, "learning_rate": 1.4571414729763076e-05, "loss": 0.8822, "step": 6169 }, { "epoch": 1.045952375849283, "grad_norm": 1.6484375, "learning_rate": 1.456980847355658e-05, "loss": 0.9406, "step": 6170 }, { "epoch": 1.0461238399382728, "grad_norm": 1.5234375, "learning_rate": 1.4568202068311421e-05, "loss": 0.7995, "step": 6171 }, { "epoch": 1.046295304027263, "grad_norm": 1.6640625, "learning_rate": 1.4566595514079991e-05, "loss": 0.7696, "step": 6172 }, { "epoch": 1.0464667681162527, "grad_norm": 1.515625, "learning_rate": 1.4564988810914686e-05, "loss": 0.8146, "step": 6173 }, { "epoch": 1.0466382322052425, "grad_norm": 1.6171875, "learning_rate": 1.4563381958867903e-05, "loss": 0.8455, "step": 6174 }, { "epoch": 1.0468096962942324, "grad_norm": 1.59375, "learning_rate": 1.4561774957992052e-05, "loss": 0.8198, "step": 6175 }, { "epoch": 1.0469811603832222, "grad_norm": 1.609375, "learning_rate": 1.456016780833954e-05, "loss": 0.8871, "step": 6176 }, { "epoch": 1.047152624472212, "grad_norm": 1.6640625, "learning_rate": 1.455856050996279e-05, "loss": 0.8185, "step": 6177 }, { "epoch": 1.047324088561202, "grad_norm": 1.546875, "learning_rate": 1.455695306291421e-05, "loss": 0.8382, "step": 6178 }, { "epoch": 1.047495552650192, "grad_norm": 1.625, "learning_rate": 1.4555345467246231e-05, "loss": 0.9098, "step": 6179 }, { "epoch": 1.0476670167391817, "grad_norm": 1.609375, "learning_rate": 1.4553737723011283e-05, "loss": 0.89, "step": 6180 }, { "epoch": 1.0478384808281715, "grad_norm": 1.671875, "learning_rate": 1.4552129830261797e-05, "loss": 0.8876, "step": 6181 }, { "epoch": 1.0480099449171614, "grad_norm": 1.671875, "learning_rate": 1.4550521789050218e-05, "loss": 0.9873, "step": 6182 }, { "epoch": 1.0481814090061512, "grad_norm": 1.6015625, "learning_rate": 1.454891359942899e-05, "loss": 0.8154, "step": 6183 }, { "epoch": 1.048352873095141, "grad_norm": 1.59375, "learning_rate": 1.4547305261450559e-05, "loss": 0.9423, "step": 6184 }, { "epoch": 1.048524337184131, "grad_norm": 1.671875, "learning_rate": 1.454569677516738e-05, "loss": 0.8704, "step": 6185 }, { "epoch": 1.048695801273121, "grad_norm": 1.6796875, "learning_rate": 1.4544088140631912e-05, "loss": 0.8998, "step": 6186 }, { "epoch": 1.0488672653621107, "grad_norm": 1.6328125, "learning_rate": 1.4542479357896615e-05, "loss": 0.9563, "step": 6187 }, { "epoch": 1.0490387294511005, "grad_norm": 1.6015625, "learning_rate": 1.4540870427013962e-05, "loss": 0.8739, "step": 6188 }, { "epoch": 1.0492101935400904, "grad_norm": 1.6953125, "learning_rate": 1.4539261348036426e-05, "loss": 0.8912, "step": 6189 }, { "epoch": 1.0493816576290804, "grad_norm": 1.671875, "learning_rate": 1.4537652121016485e-05, "loss": 0.8597, "step": 6190 }, { "epoch": 1.0495531217180702, "grad_norm": 1.671875, "learning_rate": 1.453604274600662e-05, "loss": 0.9169, "step": 6191 }, { "epoch": 1.04972458580706, "grad_norm": 1.796875, "learning_rate": 1.453443322305932e-05, "loss": 0.8305, "step": 6192 }, { "epoch": 1.04989604989605, "grad_norm": 1.6015625, "learning_rate": 1.4532823552227078e-05, "loss": 0.832, "step": 6193 }, { "epoch": 1.0500675139850397, "grad_norm": 1.7890625, "learning_rate": 1.4531213733562392e-05, "loss": 0.9607, "step": 6194 }, { "epoch": 1.0502389780740295, "grad_norm": 1.6640625, "learning_rate": 1.4529603767117761e-05, "loss": 0.8243, "step": 6195 }, { "epoch": 1.0504104421630194, "grad_norm": 1.625, "learning_rate": 1.4527993652945696e-05, "loss": 0.8031, "step": 6196 }, { "epoch": 1.0505819062520094, "grad_norm": 1.625, "learning_rate": 1.4526383391098704e-05, "loss": 0.9309, "step": 6197 }, { "epoch": 1.0507533703409992, "grad_norm": 1.640625, "learning_rate": 1.4524772981629309e-05, "loss": 0.8646, "step": 6198 }, { "epoch": 1.050924834429989, "grad_norm": 1.734375, "learning_rate": 1.4523162424590025e-05, "loss": 0.9192, "step": 6199 }, { "epoch": 1.051096298518979, "grad_norm": 1.5, "learning_rate": 1.4521551720033382e-05, "loss": 0.8055, "step": 6200 }, { "epoch": 1.0512677626079687, "grad_norm": 1.6640625, "learning_rate": 1.4519940868011913e-05, "loss": 0.8371, "step": 6201 }, { "epoch": 1.0514392266969586, "grad_norm": 1.546875, "learning_rate": 1.4518329868578149e-05, "loss": 0.8334, "step": 6202 }, { "epoch": 1.0516106907859486, "grad_norm": 1.671875, "learning_rate": 1.4516718721784635e-05, "loss": 0.8649, "step": 6203 }, { "epoch": 1.0517821548749384, "grad_norm": 1.703125, "learning_rate": 1.4515107427683917e-05, "loss": 0.9058, "step": 6204 }, { "epoch": 1.0519536189639282, "grad_norm": 1.6328125, "learning_rate": 1.4513495986328541e-05, "loss": 0.8435, "step": 6205 }, { "epoch": 1.052125083052918, "grad_norm": 1.734375, "learning_rate": 1.4511884397771065e-05, "loss": 0.846, "step": 6206 }, { "epoch": 1.052296547141908, "grad_norm": 1.6875, "learning_rate": 1.451027266206405e-05, "loss": 0.8909, "step": 6207 }, { "epoch": 1.0524680112308977, "grad_norm": 1.6328125, "learning_rate": 1.4508660779260057e-05, "loss": 0.9091, "step": 6208 }, { "epoch": 1.0526394753198878, "grad_norm": 1.5859375, "learning_rate": 1.4507048749411658e-05, "loss": 0.8144, "step": 6209 }, { "epoch": 1.0528109394088776, "grad_norm": 1.71875, "learning_rate": 1.4505436572571428e-05, "loss": 0.8372, "step": 6210 }, { "epoch": 1.0529824034978674, "grad_norm": 1.5703125, "learning_rate": 1.4503824248791946e-05, "loss": 0.8087, "step": 6211 }, { "epoch": 1.0531538675868573, "grad_norm": 1.671875, "learning_rate": 1.4502211778125799e-05, "loss": 0.9126, "step": 6212 }, { "epoch": 1.053325331675847, "grad_norm": 1.7421875, "learning_rate": 1.4500599160625565e-05, "loss": 0.8595, "step": 6213 }, { "epoch": 1.053496795764837, "grad_norm": 1.578125, "learning_rate": 1.4498986396343851e-05, "loss": 0.8395, "step": 6214 }, { "epoch": 1.053668259853827, "grad_norm": 1.6015625, "learning_rate": 1.4497373485333245e-05, "loss": 0.8365, "step": 6215 }, { "epoch": 1.0538397239428168, "grad_norm": 1.703125, "learning_rate": 1.4495760427646355e-05, "loss": 0.9233, "step": 6216 }, { "epoch": 1.0540111880318066, "grad_norm": 1.640625, "learning_rate": 1.4494147223335789e-05, "loss": 0.7995, "step": 6217 }, { "epoch": 1.0541826521207964, "grad_norm": 1.8125, "learning_rate": 1.4492533872454157e-05, "loss": 0.9579, "step": 6218 }, { "epoch": 1.0543541162097863, "grad_norm": 1.7734375, "learning_rate": 1.449092037505408e-05, "loss": 0.9061, "step": 6219 }, { "epoch": 1.054525580298776, "grad_norm": 1.78125, "learning_rate": 1.4489306731188178e-05, "loss": 0.8286, "step": 6220 }, { "epoch": 1.0546970443877661, "grad_norm": 1.734375, "learning_rate": 1.4487692940909077e-05, "loss": 0.9238, "step": 6221 }, { "epoch": 1.054868508476756, "grad_norm": 1.7265625, "learning_rate": 1.4486079004269411e-05, "loss": 0.8596, "step": 6222 }, { "epoch": 1.0550399725657458, "grad_norm": 1.6796875, "learning_rate": 1.4484464921321818e-05, "loss": 0.8859, "step": 6223 }, { "epoch": 1.0552114366547356, "grad_norm": 1.640625, "learning_rate": 1.4482850692118932e-05, "loss": 0.8388, "step": 6224 }, { "epoch": 1.0553829007437254, "grad_norm": 1.625, "learning_rate": 1.4481236316713408e-05, "loss": 0.8139, "step": 6225 }, { "epoch": 1.0555543648327153, "grad_norm": 1.7265625, "learning_rate": 1.447962179515789e-05, "loss": 0.9119, "step": 6226 }, { "epoch": 1.0557258289217053, "grad_norm": 1.6875, "learning_rate": 1.4478007127505039e-05, "loss": 0.8191, "step": 6227 }, { "epoch": 1.0558972930106951, "grad_norm": 1.703125, "learning_rate": 1.4476392313807508e-05, "loss": 0.8268, "step": 6228 }, { "epoch": 1.056068757099685, "grad_norm": 1.703125, "learning_rate": 1.4474777354117974e-05, "loss": 0.8519, "step": 6229 }, { "epoch": 1.0562402211886748, "grad_norm": 1.6015625, "learning_rate": 1.4473162248489097e-05, "loss": 0.8713, "step": 6230 }, { "epoch": 1.0564116852776646, "grad_norm": 1.703125, "learning_rate": 1.4471546996973555e-05, "loss": 0.8951, "step": 6231 }, { "epoch": 1.0565831493666544, "grad_norm": 1.6171875, "learning_rate": 1.4469931599624027e-05, "loss": 0.806, "step": 6232 }, { "epoch": 1.0567546134556445, "grad_norm": 1.6328125, "learning_rate": 1.4468316056493197e-05, "loss": 0.8072, "step": 6233 }, { "epoch": 1.0569260775446343, "grad_norm": 1.6875, "learning_rate": 1.4466700367633754e-05, "loss": 0.8183, "step": 6234 }, { "epoch": 1.0570975416336241, "grad_norm": 1.6328125, "learning_rate": 1.4465084533098394e-05, "loss": 0.7764, "step": 6235 }, { "epoch": 1.057269005722614, "grad_norm": 1.6875, "learning_rate": 1.446346855293981e-05, "loss": 0.9423, "step": 6236 }, { "epoch": 1.0574404698116038, "grad_norm": 1.5078125, "learning_rate": 1.4461852427210711e-05, "loss": 0.7811, "step": 6237 }, { "epoch": 1.0576119339005936, "grad_norm": 1.6953125, "learning_rate": 1.4460236155963803e-05, "loss": 0.8399, "step": 6238 }, { "epoch": 1.0577833979895837, "grad_norm": 1.6953125, "learning_rate": 1.4458619739251795e-05, "loss": 0.8339, "step": 6239 }, { "epoch": 1.0579548620785735, "grad_norm": 1.59375, "learning_rate": 1.4457003177127414e-05, "loss": 0.8646, "step": 6240 }, { "epoch": 1.0581263261675633, "grad_norm": 1.609375, "learning_rate": 1.4455386469643371e-05, "loss": 0.7481, "step": 6241 }, { "epoch": 1.0582977902565531, "grad_norm": 1.6640625, "learning_rate": 1.44537696168524e-05, "loss": 0.8451, "step": 6242 }, { "epoch": 1.058469254345543, "grad_norm": 1.609375, "learning_rate": 1.4452152618807228e-05, "loss": 0.8559, "step": 6243 }, { "epoch": 1.0586407184345328, "grad_norm": 1.6171875, "learning_rate": 1.4450535475560594e-05, "loss": 0.8876, "step": 6244 }, { "epoch": 1.0588121825235228, "grad_norm": 1.6171875, "learning_rate": 1.4448918187165242e-05, "loss": 0.8205, "step": 6245 }, { "epoch": 1.0589836466125127, "grad_norm": 1.7109375, "learning_rate": 1.4447300753673912e-05, "loss": 0.8484, "step": 6246 }, { "epoch": 1.0591551107015025, "grad_norm": 1.6015625, "learning_rate": 1.4445683175139357e-05, "loss": 0.8203, "step": 6247 }, { "epoch": 1.0593265747904923, "grad_norm": 1.609375, "learning_rate": 1.4444065451614336e-05, "loss": 0.8436, "step": 6248 }, { "epoch": 1.0594980388794821, "grad_norm": 1.65625, "learning_rate": 1.4442447583151604e-05, "loss": 0.8511, "step": 6249 }, { "epoch": 1.059669502968472, "grad_norm": 1.6328125, "learning_rate": 1.4440829569803927e-05, "loss": 0.8101, "step": 6250 }, { "epoch": 1.059840967057462, "grad_norm": 1.6328125, "learning_rate": 1.4439211411624074e-05, "loss": 0.8623, "step": 6251 }, { "epoch": 1.0600124311464518, "grad_norm": 1.7421875, "learning_rate": 1.4437593108664825e-05, "loss": 0.8703, "step": 6252 }, { "epoch": 1.0601838952354417, "grad_norm": 1.6328125, "learning_rate": 1.4435974660978951e-05, "loss": 0.8501, "step": 6253 }, { "epoch": 1.0603553593244315, "grad_norm": 1.71875, "learning_rate": 1.443435606861924e-05, "loss": 0.9815, "step": 6254 }, { "epoch": 1.0605268234134213, "grad_norm": 1.6015625, "learning_rate": 1.4432737331638477e-05, "loss": 0.8542, "step": 6255 }, { "epoch": 1.0606982875024111, "grad_norm": 1.6171875, "learning_rate": 1.4431118450089458e-05, "loss": 0.8357, "step": 6256 }, { "epoch": 1.0608697515914012, "grad_norm": 1.5546875, "learning_rate": 1.4429499424024982e-05, "loss": 0.8416, "step": 6257 }, { "epoch": 1.061041215680391, "grad_norm": 1.625, "learning_rate": 1.442788025349785e-05, "loss": 0.8575, "step": 6258 }, { "epoch": 1.0612126797693808, "grad_norm": 1.59375, "learning_rate": 1.4426260938560868e-05, "loss": 0.8339, "step": 6259 }, { "epoch": 1.0613841438583707, "grad_norm": 1.75, "learning_rate": 1.442464147926685e-05, "loss": 0.8274, "step": 6260 }, { "epoch": 1.0615556079473605, "grad_norm": 1.578125, "learning_rate": 1.442302187566861e-05, "loss": 0.796, "step": 6261 }, { "epoch": 1.0617270720363503, "grad_norm": 1.6640625, "learning_rate": 1.4421402127818974e-05, "loss": 0.8236, "step": 6262 }, { "epoch": 1.0618985361253404, "grad_norm": 1.6875, "learning_rate": 1.4419782235770763e-05, "loss": 0.8753, "step": 6263 }, { "epoch": 1.0620700002143302, "grad_norm": 1.6953125, "learning_rate": 1.4418162199576808e-05, "loss": 0.9048, "step": 6264 }, { "epoch": 1.06224146430332, "grad_norm": 1.703125, "learning_rate": 1.441654201928995e-05, "loss": 0.8329, "step": 6265 }, { "epoch": 1.0624129283923098, "grad_norm": 1.6875, "learning_rate": 1.4414921694963024e-05, "loss": 0.8477, "step": 6266 }, { "epoch": 1.0625843924812997, "grad_norm": 1.6171875, "learning_rate": 1.4413301226648877e-05, "loss": 0.8548, "step": 6267 }, { "epoch": 1.0627558565702895, "grad_norm": 1.75, "learning_rate": 1.4411680614400356e-05, "loss": 0.861, "step": 6268 }, { "epoch": 1.0629273206592793, "grad_norm": 1.625, "learning_rate": 1.4410059858270322e-05, "loss": 0.7642, "step": 6269 }, { "epoch": 1.0630987847482694, "grad_norm": 1.53125, "learning_rate": 1.4408438958311626e-05, "loss": 0.7788, "step": 6270 }, { "epoch": 1.0632702488372592, "grad_norm": 1.578125, "learning_rate": 1.4406817914577135e-05, "loss": 0.799, "step": 6271 }, { "epoch": 1.063441712926249, "grad_norm": 1.6640625, "learning_rate": 1.4405196727119717e-05, "loss": 0.8563, "step": 6272 }, { "epoch": 1.0636131770152388, "grad_norm": 1.578125, "learning_rate": 1.4403575395992247e-05, "loss": 0.7594, "step": 6273 }, { "epoch": 1.0637846411042287, "grad_norm": 1.671875, "learning_rate": 1.44019539212476e-05, "loss": 0.8161, "step": 6274 }, { "epoch": 1.0639561051932187, "grad_norm": 1.6640625, "learning_rate": 1.4400332302938658e-05, "loss": 0.8292, "step": 6275 }, { "epoch": 1.0641275692822085, "grad_norm": 1.7109375, "learning_rate": 1.439871054111831e-05, "loss": 0.808, "step": 6276 }, { "epoch": 1.0642990333711984, "grad_norm": 1.6171875, "learning_rate": 1.439708863583945e-05, "loss": 0.8312, "step": 6277 }, { "epoch": 1.0644704974601882, "grad_norm": 1.6484375, "learning_rate": 1.4395466587154969e-05, "loss": 0.8997, "step": 6278 }, { "epoch": 1.064641961549178, "grad_norm": 1.609375, "learning_rate": 1.4393844395117771e-05, "loss": 0.7794, "step": 6279 }, { "epoch": 1.0648134256381678, "grad_norm": 1.6328125, "learning_rate": 1.439222205978076e-05, "loss": 0.9294, "step": 6280 }, { "epoch": 1.0649848897271577, "grad_norm": 1.5625, "learning_rate": 1.4390599581196854e-05, "loss": 0.8536, "step": 6281 }, { "epoch": 1.0651563538161477, "grad_norm": 1.7265625, "learning_rate": 1.4388976959418956e-05, "loss": 0.8836, "step": 6282 }, { "epoch": 1.0653278179051375, "grad_norm": 1.6953125, "learning_rate": 1.4387354194499994e-05, "loss": 0.8734, "step": 6283 }, { "epoch": 1.0654992819941274, "grad_norm": 1.5625, "learning_rate": 1.438573128649289e-05, "loss": 0.7896, "step": 6284 }, { "epoch": 1.0656707460831172, "grad_norm": 1.734375, "learning_rate": 1.4384108235450574e-05, "loss": 0.9422, "step": 6285 }, { "epoch": 1.065842210172107, "grad_norm": 1.6015625, "learning_rate": 1.438248504142598e-05, "loss": 0.8169, "step": 6286 }, { "epoch": 1.066013674261097, "grad_norm": 1.7421875, "learning_rate": 1.4380861704472047e-05, "loss": 0.8534, "step": 6287 }, { "epoch": 1.0661851383500869, "grad_norm": 1.6171875, "learning_rate": 1.4379238224641716e-05, "loss": 0.8926, "step": 6288 }, { "epoch": 1.0663566024390767, "grad_norm": 1.5546875, "learning_rate": 1.4377614601987934e-05, "loss": 0.8646, "step": 6289 }, { "epoch": 1.0665280665280665, "grad_norm": 1.546875, "learning_rate": 1.4375990836563658e-05, "loss": 0.8611, "step": 6290 }, { "epoch": 1.0666995306170564, "grad_norm": 1.6875, "learning_rate": 1.437436692842184e-05, "loss": 0.7749, "step": 6291 }, { "epoch": 1.0668709947060462, "grad_norm": 1.671875, "learning_rate": 1.4372742877615447e-05, "loss": 0.8207, "step": 6292 }, { "epoch": 1.067042458795036, "grad_norm": 1.7265625, "learning_rate": 1.437111868419744e-05, "loss": 0.9227, "step": 6293 }, { "epoch": 1.067213922884026, "grad_norm": 1.6015625, "learning_rate": 1.4369494348220791e-05, "loss": 0.8164, "step": 6294 }, { "epoch": 1.0673853869730159, "grad_norm": 1.578125, "learning_rate": 1.4367869869738482e-05, "loss": 0.8078, "step": 6295 }, { "epoch": 1.0675568510620057, "grad_norm": 1.703125, "learning_rate": 1.4366245248803485e-05, "loss": 0.8588, "step": 6296 }, { "epoch": 1.0677283151509955, "grad_norm": 1.734375, "learning_rate": 1.436462048546879e-05, "loss": 0.9065, "step": 6297 }, { "epoch": 1.0678997792399854, "grad_norm": 1.734375, "learning_rate": 1.4362995579787389e-05, "loss": 0.8279, "step": 6298 }, { "epoch": 1.0680712433289752, "grad_norm": 1.6328125, "learning_rate": 1.4361370531812266e-05, "loss": 0.8336, "step": 6299 }, { "epoch": 1.0682427074179652, "grad_norm": 1.7421875, "learning_rate": 1.4359745341596431e-05, "loss": 0.9098, "step": 6300 }, { "epoch": 1.0682427074179652, "eval_loss": 0.8528460264205933, "eval_runtime": 837.1005, "eval_samples_per_second": 2.985, "eval_steps_per_second": 2.985, "step": 6300 }, { "epoch": 1.068414171506955, "grad_norm": 1.6640625, "learning_rate": 1.4358120009192881e-05, "loss": 0.8324, "step": 6301 }, { "epoch": 1.0685856355959449, "grad_norm": 1.7109375, "learning_rate": 1.4356494534654627e-05, "loss": 0.9125, "step": 6302 }, { "epoch": 1.0687570996849347, "grad_norm": 1.921875, "learning_rate": 1.4354868918034679e-05, "loss": 0.7998, "step": 6303 }, { "epoch": 1.0689285637739245, "grad_norm": 1.7265625, "learning_rate": 1.435324315938606e-05, "loss": 0.9459, "step": 6304 }, { "epoch": 1.0691000278629144, "grad_norm": 1.6953125, "learning_rate": 1.4351617258761787e-05, "loss": 0.874, "step": 6305 }, { "epoch": 1.0692714919519044, "grad_norm": 1.65625, "learning_rate": 1.434999121621489e-05, "loss": 0.8602, "step": 6306 }, { "epoch": 1.0694429560408942, "grad_norm": 1.6796875, "learning_rate": 1.4348365031798398e-05, "loss": 0.881, "step": 6307 }, { "epoch": 1.069614420129884, "grad_norm": 1.734375, "learning_rate": 1.4346738705565348e-05, "loss": 0.8294, "step": 6308 }, { "epoch": 1.0697858842188739, "grad_norm": 1.7265625, "learning_rate": 1.4345112237568781e-05, "loss": 0.8579, "step": 6309 }, { "epoch": 1.0699573483078637, "grad_norm": 1.6171875, "learning_rate": 1.4343485627861742e-05, "loss": 0.8984, "step": 6310 }, { "epoch": 1.0701288123968535, "grad_norm": 1.640625, "learning_rate": 1.4341858876497279e-05, "loss": 0.7913, "step": 6311 }, { "epoch": 1.0703002764858436, "grad_norm": 1.703125, "learning_rate": 1.4340231983528448e-05, "loss": 0.8667, "step": 6312 }, { "epoch": 1.0704717405748334, "grad_norm": 1.65625, "learning_rate": 1.433860494900831e-05, "loss": 0.8287, "step": 6313 }, { "epoch": 1.0706432046638232, "grad_norm": 1.6484375, "learning_rate": 1.4336977772989924e-05, "loss": 0.8595, "step": 6314 }, { "epoch": 1.070814668752813, "grad_norm": 1.640625, "learning_rate": 1.4335350455526367e-05, "loss": 0.8163, "step": 6315 }, { "epoch": 1.0709861328418029, "grad_norm": 1.734375, "learning_rate": 1.4333722996670702e-05, "loss": 0.8464, "step": 6316 }, { "epoch": 1.0711575969307927, "grad_norm": 1.7265625, "learning_rate": 1.4332095396476012e-05, "loss": 0.8942, "step": 6317 }, { "epoch": 1.0713290610197828, "grad_norm": 1.640625, "learning_rate": 1.4330467654995376e-05, "loss": 0.8218, "step": 6318 }, { "epoch": 1.0715005251087726, "grad_norm": 1.6953125, "learning_rate": 1.4328839772281884e-05, "loss": 0.8912, "step": 6319 }, { "epoch": 1.0716719891977624, "grad_norm": 1.6328125, "learning_rate": 1.4327211748388626e-05, "loss": 0.8496, "step": 6320 }, { "epoch": 1.0718434532867522, "grad_norm": 1.7734375, "learning_rate": 1.4325583583368698e-05, "loss": 0.9215, "step": 6321 }, { "epoch": 1.072014917375742, "grad_norm": 1.59375, "learning_rate": 1.4323955277275201e-05, "loss": 0.824, "step": 6322 }, { "epoch": 1.0721863814647319, "grad_norm": 1.6015625, "learning_rate": 1.432232683016124e-05, "loss": 0.903, "step": 6323 }, { "epoch": 1.072357845553722, "grad_norm": 1.8046875, "learning_rate": 1.4320698242079925e-05, "loss": 0.8151, "step": 6324 }, { "epoch": 1.0725293096427118, "grad_norm": 1.5234375, "learning_rate": 1.431906951308437e-05, "loss": 0.7505, "step": 6325 }, { "epoch": 1.0727007737317016, "grad_norm": 1.59375, "learning_rate": 1.4317440643227693e-05, "loss": 0.84, "step": 6326 }, { "epoch": 1.0728722378206914, "grad_norm": 1.6640625, "learning_rate": 1.4315811632563022e-05, "loss": 0.8941, "step": 6327 }, { "epoch": 1.0730437019096812, "grad_norm": 1.6171875, "learning_rate": 1.4314182481143478e-05, "loss": 0.9537, "step": 6328 }, { "epoch": 1.073215165998671, "grad_norm": 1.5546875, "learning_rate": 1.4312553189022201e-05, "loss": 0.8015, "step": 6329 }, { "epoch": 1.073386630087661, "grad_norm": 1.7109375, "learning_rate": 1.431092375625232e-05, "loss": 0.8483, "step": 6330 }, { "epoch": 1.073558094176651, "grad_norm": 1.7265625, "learning_rate": 1.4309294182886984e-05, "loss": 0.874, "step": 6331 }, { "epoch": 1.0737295582656408, "grad_norm": 1.6171875, "learning_rate": 1.4307664468979336e-05, "loss": 0.9018, "step": 6332 }, { "epoch": 1.0739010223546306, "grad_norm": 1.5625, "learning_rate": 1.4306034614582534e-05, "loss": 0.7619, "step": 6333 }, { "epoch": 1.0740724864436204, "grad_norm": 1.6484375, "learning_rate": 1.4304404619749724e-05, "loss": 0.7982, "step": 6334 }, { "epoch": 1.0742439505326102, "grad_norm": 1.578125, "learning_rate": 1.4302774484534073e-05, "loss": 0.8727, "step": 6335 }, { "epoch": 1.0744154146216003, "grad_norm": 1.8125, "learning_rate": 1.430114420898874e-05, "loss": 0.8913, "step": 6336 }, { "epoch": 1.07458687871059, "grad_norm": 1.6171875, "learning_rate": 1.4299513793166896e-05, "loss": 0.9844, "step": 6337 }, { "epoch": 1.07475834279958, "grad_norm": 1.671875, "learning_rate": 1.4297883237121721e-05, "loss": 0.8231, "step": 6338 }, { "epoch": 1.0749298068885698, "grad_norm": 1.640625, "learning_rate": 1.429625254090639e-05, "loss": 0.878, "step": 6339 }, { "epoch": 1.0751012709775596, "grad_norm": 1.703125, "learning_rate": 1.4294621704574084e-05, "loss": 0.8635, "step": 6340 }, { "epoch": 1.0752727350665494, "grad_norm": 1.734375, "learning_rate": 1.4292990728177992e-05, "loss": 0.8971, "step": 6341 }, { "epoch": 1.0754441991555395, "grad_norm": 1.59375, "learning_rate": 1.4291359611771304e-05, "loss": 0.8481, "step": 6342 }, { "epoch": 1.0756156632445293, "grad_norm": 1.6953125, "learning_rate": 1.4289728355407221e-05, "loss": 0.8761, "step": 6343 }, { "epoch": 1.075787127333519, "grad_norm": 1.5078125, "learning_rate": 1.4288096959138946e-05, "loss": 0.7794, "step": 6344 }, { "epoch": 1.075958591422509, "grad_norm": 1.78125, "learning_rate": 1.4286465423019679e-05, "loss": 0.9239, "step": 6345 }, { "epoch": 1.0761300555114988, "grad_norm": 1.6484375, "learning_rate": 1.4284833747102634e-05, "loss": 0.8863, "step": 6346 }, { "epoch": 1.0763015196004886, "grad_norm": 1.6953125, "learning_rate": 1.4283201931441024e-05, "loss": 0.9214, "step": 6347 }, { "epoch": 1.0764729836894786, "grad_norm": 1.671875, "learning_rate": 1.428156997608807e-05, "loss": 0.8686, "step": 6348 }, { "epoch": 1.0766444477784685, "grad_norm": 1.703125, "learning_rate": 1.4279937881096997e-05, "loss": 0.8189, "step": 6349 }, { "epoch": 1.0768159118674583, "grad_norm": 1.65625, "learning_rate": 1.4278305646521032e-05, "loss": 0.8565, "step": 6350 }, { "epoch": 1.0769873759564481, "grad_norm": 1.6015625, "learning_rate": 1.427667327241341e-05, "loss": 0.7881, "step": 6351 }, { "epoch": 1.077158840045438, "grad_norm": 1.5390625, "learning_rate": 1.4275040758827367e-05, "loss": 0.8265, "step": 6352 }, { "epoch": 1.0773303041344278, "grad_norm": 1.71875, "learning_rate": 1.4273408105816148e-05, "loss": 0.8923, "step": 6353 }, { "epoch": 1.0775017682234178, "grad_norm": 1.6484375, "learning_rate": 1.4271775313432998e-05, "loss": 0.8301, "step": 6354 }, { "epoch": 1.0776732323124076, "grad_norm": 1.6640625, "learning_rate": 1.4270142381731168e-05, "loss": 0.7962, "step": 6355 }, { "epoch": 1.0778446964013975, "grad_norm": 1.65625, "learning_rate": 1.426850931076392e-05, "loss": 0.856, "step": 6356 }, { "epoch": 1.0780161604903873, "grad_norm": 1.7109375, "learning_rate": 1.4266876100584505e-05, "loss": 0.9114, "step": 6357 }, { "epoch": 1.0781876245793771, "grad_norm": 1.6953125, "learning_rate": 1.4265242751246191e-05, "loss": 0.8902, "step": 6358 }, { "epoch": 1.078359088668367, "grad_norm": 1.6953125, "learning_rate": 1.4263609262802255e-05, "loss": 0.9172, "step": 6359 }, { "epoch": 1.078530552757357, "grad_norm": 1.6484375, "learning_rate": 1.426197563530596e-05, "loss": 0.9066, "step": 6360 }, { "epoch": 1.0787020168463468, "grad_norm": 1.6171875, "learning_rate": 1.4260341868810591e-05, "loss": 0.7365, "step": 6361 }, { "epoch": 1.0788734809353366, "grad_norm": 1.7109375, "learning_rate": 1.4258707963369438e-05, "loss": 0.8886, "step": 6362 }, { "epoch": 1.0790449450243265, "grad_norm": 1.640625, "learning_rate": 1.4257073919035775e-05, "loss": 0.8961, "step": 6363 }, { "epoch": 1.0792164091133163, "grad_norm": 1.609375, "learning_rate": 1.4255439735862901e-05, "loss": 0.782, "step": 6364 }, { "epoch": 1.0793878732023061, "grad_norm": 1.671875, "learning_rate": 1.4253805413904114e-05, "loss": 0.8904, "step": 6365 }, { "epoch": 1.079559337291296, "grad_norm": 1.75, "learning_rate": 1.4252170953212713e-05, "loss": 0.8601, "step": 6366 }, { "epoch": 1.079730801380286, "grad_norm": 1.78125, "learning_rate": 1.4250536353842009e-05, "loss": 0.8834, "step": 6367 }, { "epoch": 1.0799022654692758, "grad_norm": 1.625, "learning_rate": 1.4248901615845304e-05, "loss": 0.8273, "step": 6368 }, { "epoch": 1.0800737295582656, "grad_norm": 1.5859375, "learning_rate": 1.4247266739275918e-05, "loss": 0.8262, "step": 6369 }, { "epoch": 1.0802451936472555, "grad_norm": 1.6328125, "learning_rate": 1.4245631724187172e-05, "loss": 0.8552, "step": 6370 }, { "epoch": 1.0804166577362453, "grad_norm": 1.578125, "learning_rate": 1.4243996570632385e-05, "loss": 0.7704, "step": 6371 }, { "epoch": 1.0805881218252353, "grad_norm": 1.59375, "learning_rate": 1.4242361278664891e-05, "loss": 0.8284, "step": 6372 }, { "epoch": 1.0807595859142252, "grad_norm": 1.765625, "learning_rate": 1.4240725848338023e-05, "loss": 0.8723, "step": 6373 }, { "epoch": 1.080931050003215, "grad_norm": 1.7265625, "learning_rate": 1.423909027970511e-05, "loss": 0.8555, "step": 6374 }, { "epoch": 1.0811025140922048, "grad_norm": 1.6796875, "learning_rate": 1.4237454572819505e-05, "loss": 0.9163, "step": 6375 }, { "epoch": 1.0812739781811946, "grad_norm": 1.6953125, "learning_rate": 1.4235818727734544e-05, "loss": 0.8108, "step": 6376 }, { "epoch": 1.0814454422701845, "grad_norm": 1.6015625, "learning_rate": 1.4234182744503586e-05, "loss": 0.8376, "step": 6377 }, { "epoch": 1.0816169063591743, "grad_norm": 1.640625, "learning_rate": 1.4232546623179985e-05, "loss": 0.7456, "step": 6378 }, { "epoch": 1.0817883704481643, "grad_norm": 1.7265625, "learning_rate": 1.4230910363817104e-05, "loss": 0.8937, "step": 6379 }, { "epoch": 1.0819598345371542, "grad_norm": 1.6484375, "learning_rate": 1.4229273966468298e-05, "loss": 0.8592, "step": 6380 }, { "epoch": 1.082131298626144, "grad_norm": 1.6171875, "learning_rate": 1.4227637431186945e-05, "loss": 0.8733, "step": 6381 }, { "epoch": 1.0823027627151338, "grad_norm": 1.7265625, "learning_rate": 1.4226000758026414e-05, "loss": 0.9073, "step": 6382 }, { "epoch": 1.0824742268041236, "grad_norm": 1.59375, "learning_rate": 1.4224363947040085e-05, "loss": 0.7889, "step": 6383 }, { "epoch": 1.0826456908931137, "grad_norm": 1.71875, "learning_rate": 1.4222726998281342e-05, "loss": 0.8612, "step": 6384 }, { "epoch": 1.0828171549821035, "grad_norm": 1.6796875, "learning_rate": 1.422108991180357e-05, "loss": 0.8388, "step": 6385 }, { "epoch": 1.0829886190710933, "grad_norm": 1.6875, "learning_rate": 1.4219452687660158e-05, "loss": 0.8867, "step": 6386 }, { "epoch": 1.0831600831600832, "grad_norm": 1.640625, "learning_rate": 1.4217815325904508e-05, "loss": 0.8568, "step": 6387 }, { "epoch": 1.083331547249073, "grad_norm": 1.71875, "learning_rate": 1.4216177826590017e-05, "loss": 0.9192, "step": 6388 }, { "epoch": 1.0835030113380628, "grad_norm": 1.6875, "learning_rate": 1.4214540189770087e-05, "loss": 0.9198, "step": 6389 }, { "epoch": 1.0836744754270526, "grad_norm": 1.6015625, "learning_rate": 1.4212902415498136e-05, "loss": 0.8886, "step": 6390 }, { "epoch": 1.0838459395160427, "grad_norm": 1.640625, "learning_rate": 1.4211264503827571e-05, "loss": 0.8625, "step": 6391 }, { "epoch": 1.0840174036050325, "grad_norm": 1.6484375, "learning_rate": 1.4209626454811812e-05, "loss": 0.7927, "step": 6392 }, { "epoch": 1.0841888676940223, "grad_norm": 1.6640625, "learning_rate": 1.420798826850428e-05, "loss": 0.8718, "step": 6393 }, { "epoch": 1.0843603317830122, "grad_norm": 1.75, "learning_rate": 1.4206349944958407e-05, "loss": 0.9271, "step": 6394 }, { "epoch": 1.084531795872002, "grad_norm": 1.7265625, "learning_rate": 1.4204711484227623e-05, "loss": 0.8981, "step": 6395 }, { "epoch": 1.0847032599609918, "grad_norm": 1.7109375, "learning_rate": 1.4203072886365364e-05, "loss": 0.8554, "step": 6396 }, { "epoch": 1.0848747240499819, "grad_norm": 1.7265625, "learning_rate": 1.4201434151425072e-05, "loss": 0.9236, "step": 6397 }, { "epoch": 1.0850461881389717, "grad_norm": 1.6953125, "learning_rate": 1.4199795279460187e-05, "loss": 0.8479, "step": 6398 }, { "epoch": 1.0852176522279615, "grad_norm": 1.671875, "learning_rate": 1.4198156270524167e-05, "loss": 0.8149, "step": 6399 }, { "epoch": 1.0853891163169513, "grad_norm": 1.65625, "learning_rate": 1.419651712467046e-05, "loss": 0.8815, "step": 6400 }, { "epoch": 1.0855605804059412, "grad_norm": 1.7109375, "learning_rate": 1.4194877841952526e-05, "loss": 0.9174, "step": 6401 }, { "epoch": 1.085732044494931, "grad_norm": 1.609375, "learning_rate": 1.4193238422423834e-05, "loss": 0.8742, "step": 6402 }, { "epoch": 1.085903508583921, "grad_norm": 1.765625, "learning_rate": 1.4191598866137844e-05, "loss": 0.902, "step": 6403 }, { "epoch": 1.0860749726729109, "grad_norm": 1.7265625, "learning_rate": 1.418995917314803e-05, "loss": 0.9654, "step": 6404 }, { "epoch": 1.0862464367619007, "grad_norm": 1.75, "learning_rate": 1.4188319343507865e-05, "loss": 0.8984, "step": 6405 }, { "epoch": 1.0864179008508905, "grad_norm": 1.6640625, "learning_rate": 1.4186679377270841e-05, "loss": 0.9152, "step": 6406 }, { "epoch": 1.0865893649398803, "grad_norm": 1.59375, "learning_rate": 1.4185039274490436e-05, "loss": 0.8611, "step": 6407 }, { "epoch": 1.0867608290288702, "grad_norm": 1.671875, "learning_rate": 1.4183399035220143e-05, "loss": 0.8521, "step": 6408 }, { "epoch": 1.0869322931178602, "grad_norm": 1.765625, "learning_rate": 1.418175865951345e-05, "loss": 0.8818, "step": 6409 }, { "epoch": 1.08710375720685, "grad_norm": 1.6015625, "learning_rate": 1.4180118147423861e-05, "loss": 0.8461, "step": 6410 }, { "epoch": 1.0872752212958399, "grad_norm": 1.7109375, "learning_rate": 1.4178477499004879e-05, "loss": 0.7814, "step": 6411 }, { "epoch": 1.0874466853848297, "grad_norm": 1.703125, "learning_rate": 1.417683671431001e-05, "loss": 0.9004, "step": 6412 }, { "epoch": 1.0876181494738195, "grad_norm": 1.6484375, "learning_rate": 1.4175195793392769e-05, "loss": 0.851, "step": 6413 }, { "epoch": 1.0877896135628093, "grad_norm": 1.65625, "learning_rate": 1.417355473630667e-05, "loss": 0.825, "step": 6414 }, { "epoch": 1.0879610776517994, "grad_norm": 1.7109375, "learning_rate": 1.4171913543105233e-05, "loss": 0.932, "step": 6415 }, { "epoch": 1.0881325417407892, "grad_norm": 1.7734375, "learning_rate": 1.4170272213841988e-05, "loss": 0.8789, "step": 6416 }, { "epoch": 1.088304005829779, "grad_norm": 1.625, "learning_rate": 1.4168630748570462e-05, "loss": 0.8809, "step": 6417 }, { "epoch": 1.0884754699187689, "grad_norm": 1.6875, "learning_rate": 1.4166989147344188e-05, "loss": 0.8888, "step": 6418 }, { "epoch": 1.0886469340077587, "grad_norm": 1.6015625, "learning_rate": 1.416534741021671e-05, "loss": 0.8732, "step": 6419 }, { "epoch": 1.0888183980967485, "grad_norm": 1.5703125, "learning_rate": 1.4163705537241565e-05, "loss": 0.7691, "step": 6420 }, { "epoch": 1.0889898621857386, "grad_norm": 1.6328125, "learning_rate": 1.4162063528472302e-05, "loss": 0.864, "step": 6421 }, { "epoch": 1.0891613262747284, "grad_norm": 1.6328125, "learning_rate": 1.4160421383962478e-05, "loss": 0.793, "step": 6422 }, { "epoch": 1.0893327903637182, "grad_norm": 1.6015625, "learning_rate": 1.4158779103765642e-05, "loss": 0.8254, "step": 6423 }, { "epoch": 1.089504254452708, "grad_norm": 1.7578125, "learning_rate": 1.415713668793536e-05, "loss": 0.8618, "step": 6424 }, { "epoch": 1.0896757185416979, "grad_norm": 1.640625, "learning_rate": 1.4155494136525198e-05, "loss": 0.8551, "step": 6425 }, { "epoch": 1.0898471826306877, "grad_norm": 1.7109375, "learning_rate": 1.4153851449588725e-05, "loss": 0.9366, "step": 6426 }, { "epoch": 1.0900186467196777, "grad_norm": 1.640625, "learning_rate": 1.4152208627179513e-05, "loss": 0.8696, "step": 6427 }, { "epoch": 1.0901901108086676, "grad_norm": 1.6015625, "learning_rate": 1.4150565669351141e-05, "loss": 0.889, "step": 6428 }, { "epoch": 1.0903615748976574, "grad_norm": 1.7265625, "learning_rate": 1.4148922576157194e-05, "loss": 0.9755, "step": 6429 }, { "epoch": 1.0905330389866472, "grad_norm": 1.703125, "learning_rate": 1.4147279347651256e-05, "loss": 0.9041, "step": 6430 }, { "epoch": 1.090704503075637, "grad_norm": 1.546875, "learning_rate": 1.4145635983886927e-05, "loss": 0.7879, "step": 6431 }, { "epoch": 1.0908759671646269, "grad_norm": 1.65625, "learning_rate": 1.4143992484917792e-05, "loss": 0.7683, "step": 6432 }, { "epoch": 1.091047431253617, "grad_norm": 1.609375, "learning_rate": 1.4142348850797458e-05, "loss": 0.7695, "step": 6433 }, { "epoch": 1.0912188953426067, "grad_norm": 1.6171875, "learning_rate": 1.414070508157953e-05, "loss": 0.8802, "step": 6434 }, { "epoch": 1.0913903594315966, "grad_norm": 1.671875, "learning_rate": 1.4139061177317616e-05, "loss": 0.9043, "step": 6435 }, { "epoch": 1.0915618235205864, "grad_norm": 1.6796875, "learning_rate": 1.4137417138065333e-05, "loss": 0.8847, "step": 6436 }, { "epoch": 1.0917332876095762, "grad_norm": 1.640625, "learning_rate": 1.4135772963876297e-05, "loss": 0.8592, "step": 6437 }, { "epoch": 1.091904751698566, "grad_norm": 1.625, "learning_rate": 1.4134128654804131e-05, "loss": 0.7798, "step": 6438 }, { "epoch": 1.092076215787556, "grad_norm": 1.65625, "learning_rate": 1.413248421090246e-05, "loss": 0.8814, "step": 6439 }, { "epoch": 1.092247679876546, "grad_norm": 1.5625, "learning_rate": 1.4130839632224918e-05, "loss": 0.8163, "step": 6440 }, { "epoch": 1.0924191439655357, "grad_norm": 1.65625, "learning_rate": 1.412919491882514e-05, "loss": 0.9243, "step": 6441 }, { "epoch": 1.0925906080545256, "grad_norm": 1.6171875, "learning_rate": 1.4127550070756768e-05, "loss": 0.8714, "step": 6442 }, { "epoch": 1.0927620721435154, "grad_norm": 1.640625, "learning_rate": 1.4125905088073442e-05, "loss": 0.9572, "step": 6443 }, { "epoch": 1.0929335362325052, "grad_norm": 1.703125, "learning_rate": 1.4124259970828817e-05, "loss": 0.8775, "step": 6444 }, { "epoch": 1.0931050003214953, "grad_norm": 1.703125, "learning_rate": 1.4122614719076544e-05, "loss": 0.9437, "step": 6445 }, { "epoch": 1.093276464410485, "grad_norm": 1.6875, "learning_rate": 1.412096933287028e-05, "loss": 0.907, "step": 6446 }, { "epoch": 1.093447928499475, "grad_norm": 1.5703125, "learning_rate": 1.4119323812263688e-05, "loss": 0.807, "step": 6447 }, { "epoch": 1.0936193925884647, "grad_norm": 1.5625, "learning_rate": 1.4117678157310436e-05, "loss": 0.8288, "step": 6448 }, { "epoch": 1.0937908566774546, "grad_norm": 1.6875, "learning_rate": 1.4116032368064192e-05, "loss": 0.8559, "step": 6449 }, { "epoch": 1.0939623207664444, "grad_norm": 1.6484375, "learning_rate": 1.4114386444578632e-05, "loss": 0.8325, "step": 6450 }, { "epoch": 1.0941337848554344, "grad_norm": 1.6875, "learning_rate": 1.4112740386907438e-05, "loss": 0.8723, "step": 6451 }, { "epoch": 1.0943052489444243, "grad_norm": 1.6640625, "learning_rate": 1.4111094195104291e-05, "loss": 0.8334, "step": 6452 }, { "epoch": 1.094476713033414, "grad_norm": 1.7421875, "learning_rate": 1.410944786922288e-05, "loss": 1.0709, "step": 6453 }, { "epoch": 1.094648177122404, "grad_norm": 1.84375, "learning_rate": 1.4107801409316907e-05, "loss": 0.907, "step": 6454 }, { "epoch": 1.0948196412113937, "grad_norm": 1.65625, "learning_rate": 1.4106154815440056e-05, "loss": 0.8274, "step": 6455 }, { "epoch": 1.0949911053003836, "grad_norm": 1.59375, "learning_rate": 1.4104508087646033e-05, "loss": 0.7924, "step": 6456 }, { "epoch": 1.0951625693893736, "grad_norm": 1.578125, "learning_rate": 1.410286122598855e-05, "loss": 0.8505, "step": 6457 }, { "epoch": 1.0953340334783634, "grad_norm": 1.6953125, "learning_rate": 1.4101214230521307e-05, "loss": 0.9114, "step": 6458 }, { "epoch": 1.0955054975673533, "grad_norm": 1.734375, "learning_rate": 1.4099567101298026e-05, "loss": 0.8981, "step": 6459 }, { "epoch": 1.095676961656343, "grad_norm": 1.7109375, "learning_rate": 1.4097919838372425e-05, "loss": 0.8765, "step": 6460 }, { "epoch": 1.095848425745333, "grad_norm": 1.6953125, "learning_rate": 1.4096272441798227e-05, "loss": 0.9022, "step": 6461 }, { "epoch": 1.0960198898343227, "grad_norm": 2.046875, "learning_rate": 1.4094624911629158e-05, "loss": 0.8844, "step": 6462 }, { "epoch": 1.0961913539233126, "grad_norm": 1.546875, "learning_rate": 1.4092977247918953e-05, "loss": 0.7989, "step": 6463 }, { "epoch": 1.0963628180123026, "grad_norm": 1.625, "learning_rate": 1.4091329450721347e-05, "loss": 0.8971, "step": 6464 }, { "epoch": 1.0965342821012924, "grad_norm": 1.6875, "learning_rate": 1.4089681520090084e-05, "loss": 0.8169, "step": 6465 }, { "epoch": 1.0967057461902823, "grad_norm": 1.6640625, "learning_rate": 1.4088033456078902e-05, "loss": 0.8382, "step": 6466 }, { "epoch": 1.096877210279272, "grad_norm": 1.6640625, "learning_rate": 1.4086385258741557e-05, "loss": 0.8602, "step": 6467 }, { "epoch": 1.097048674368262, "grad_norm": 1.6875, "learning_rate": 1.40847369281318e-05, "loss": 0.8289, "step": 6468 }, { "epoch": 1.097220138457252, "grad_norm": 1.640625, "learning_rate": 1.408308846430339e-05, "loss": 0.868, "step": 6469 }, { "epoch": 1.0973916025462418, "grad_norm": 1.609375, "learning_rate": 1.408143986731009e-05, "loss": 0.8289, "step": 6470 }, { "epoch": 1.0975630666352316, "grad_norm": 1.6796875, "learning_rate": 1.4079791137205665e-05, "loss": 0.8554, "step": 6471 }, { "epoch": 1.0977345307242214, "grad_norm": 1.703125, "learning_rate": 1.407814227404389e-05, "loss": 0.9221, "step": 6472 }, { "epoch": 1.0979059948132113, "grad_norm": 1.7265625, "learning_rate": 1.4076493277878537e-05, "loss": 0.8511, "step": 6473 }, { "epoch": 1.098077458902201, "grad_norm": 1.609375, "learning_rate": 1.4074844148763388e-05, "loss": 0.8883, "step": 6474 }, { "epoch": 1.098248922991191, "grad_norm": 1.578125, "learning_rate": 1.4073194886752228e-05, "loss": 0.8379, "step": 6475 }, { "epoch": 1.098420387080181, "grad_norm": 1.59375, "learning_rate": 1.4071545491898843e-05, "loss": 0.8347, "step": 6476 }, { "epoch": 1.0985918511691708, "grad_norm": 1.59375, "learning_rate": 1.4069895964257032e-05, "loss": 0.9084, "step": 6477 }, { "epoch": 1.0987633152581606, "grad_norm": 1.6953125, "learning_rate": 1.4068246303880584e-05, "loss": 0.9504, "step": 6478 }, { "epoch": 1.0989347793471504, "grad_norm": 1.6015625, "learning_rate": 1.4066596510823303e-05, "loss": 0.8512, "step": 6479 }, { "epoch": 1.0991062434361403, "grad_norm": 1.5859375, "learning_rate": 1.4064946585138997e-05, "loss": 0.8665, "step": 6480 }, { "epoch": 1.0992777075251303, "grad_norm": 1.71875, "learning_rate": 1.4063296526881477e-05, "loss": 0.8951, "step": 6481 }, { "epoch": 1.0994491716141201, "grad_norm": 1.71875, "learning_rate": 1.4061646336104556e-05, "loss": 0.8214, "step": 6482 }, { "epoch": 1.09962063570311, "grad_norm": 1.6484375, "learning_rate": 1.4059996012862055e-05, "loss": 0.9158, "step": 6483 }, { "epoch": 1.0997920997920998, "grad_norm": 1.640625, "learning_rate": 1.4058345557207794e-05, "loss": 0.8622, "step": 6484 }, { "epoch": 1.0999635638810896, "grad_norm": 1.6640625, "learning_rate": 1.40566949691956e-05, "loss": 0.8542, "step": 6485 }, { "epoch": 1.1001350279700794, "grad_norm": 1.671875, "learning_rate": 1.405504424887931e-05, "loss": 0.9208, "step": 6486 }, { "epoch": 1.1003064920590693, "grad_norm": 1.7421875, "learning_rate": 1.4053393396312756e-05, "loss": 0.8273, "step": 6487 }, { "epoch": 1.1004779561480593, "grad_norm": 1.640625, "learning_rate": 1.405174241154978e-05, "loss": 0.8275, "step": 6488 }, { "epoch": 1.1006494202370491, "grad_norm": 1.6640625, "learning_rate": 1.4050091294644226e-05, "loss": 0.8867, "step": 6489 }, { "epoch": 1.100820884326039, "grad_norm": 1.671875, "learning_rate": 1.4048440045649943e-05, "loss": 0.8546, "step": 6490 }, { "epoch": 1.1009923484150288, "grad_norm": 1.7890625, "learning_rate": 1.4046788664620785e-05, "loss": 0.8363, "step": 6491 }, { "epoch": 1.1011638125040186, "grad_norm": 1.703125, "learning_rate": 1.4045137151610614e-05, "loss": 0.9141, "step": 6492 }, { "epoch": 1.1013352765930084, "grad_norm": 1.78125, "learning_rate": 1.4043485506673282e-05, "loss": 0.9679, "step": 6493 }, { "epoch": 1.1015067406819985, "grad_norm": 1.640625, "learning_rate": 1.4041833729862666e-05, "loss": 0.8664, "step": 6494 }, { "epoch": 1.1016782047709883, "grad_norm": 1.6953125, "learning_rate": 1.404018182123263e-05, "loss": 0.8076, "step": 6495 }, { "epoch": 1.1018496688599781, "grad_norm": 1.65625, "learning_rate": 1.4038529780837049e-05, "loss": 0.9172, "step": 6496 }, { "epoch": 1.102021132948968, "grad_norm": 1.6328125, "learning_rate": 1.4036877608729806e-05, "loss": 0.8456, "step": 6497 }, { "epoch": 1.1021925970379578, "grad_norm": 1.71875, "learning_rate": 1.4035225304964781e-05, "loss": 0.8858, "step": 6498 }, { "epoch": 1.1023640611269476, "grad_norm": 1.6640625, "learning_rate": 1.4033572869595866e-05, "loss": 0.8169, "step": 6499 }, { "epoch": 1.1025355252159377, "grad_norm": 1.5859375, "learning_rate": 1.4031920302676951e-05, "loss": 0.7992, "step": 6500 }, { "epoch": 1.1027069893049275, "grad_norm": 1.7265625, "learning_rate": 1.403026760426193e-05, "loss": 0.9074, "step": 6501 }, { "epoch": 1.1028784533939173, "grad_norm": 1.65625, "learning_rate": 1.4028614774404707e-05, "loss": 0.8155, "step": 6502 }, { "epoch": 1.1030499174829071, "grad_norm": 1.5703125, "learning_rate": 1.4026961813159186e-05, "loss": 0.8348, "step": 6503 }, { "epoch": 1.103221381571897, "grad_norm": 1.6953125, "learning_rate": 1.4025308720579276e-05, "loss": 0.8808, "step": 6504 }, { "epoch": 1.1033928456608868, "grad_norm": 1.640625, "learning_rate": 1.4023655496718892e-05, "loss": 0.9387, "step": 6505 }, { "epoch": 1.1035643097498768, "grad_norm": 1.734375, "learning_rate": 1.402200214163195e-05, "loss": 0.9147, "step": 6506 }, { "epoch": 1.1037357738388667, "grad_norm": 1.6796875, "learning_rate": 1.402034865537237e-05, "loss": 0.8228, "step": 6507 }, { "epoch": 1.1039072379278565, "grad_norm": 1.7109375, "learning_rate": 1.4018695037994081e-05, "loss": 0.8541, "step": 6508 }, { "epoch": 1.1040787020168463, "grad_norm": 1.6171875, "learning_rate": 1.4017041289551017e-05, "loss": 0.8786, "step": 6509 }, { "epoch": 1.1042501661058362, "grad_norm": 1.609375, "learning_rate": 1.4015387410097108e-05, "loss": 0.734, "step": 6510 }, { "epoch": 1.104421630194826, "grad_norm": 1.625, "learning_rate": 1.4013733399686296e-05, "loss": 0.8647, "step": 6511 }, { "epoch": 1.104593094283816, "grad_norm": 1.6015625, "learning_rate": 1.4012079258372523e-05, "loss": 0.871, "step": 6512 }, { "epoch": 1.1047645583728058, "grad_norm": 1.6875, "learning_rate": 1.4010424986209738e-05, "loss": 0.9104, "step": 6513 }, { "epoch": 1.1049360224617957, "grad_norm": 1.71875, "learning_rate": 1.4008770583251892e-05, "loss": 0.8532, "step": 6514 }, { "epoch": 1.1051074865507855, "grad_norm": 1.6171875, "learning_rate": 1.4007116049552942e-05, "loss": 0.8285, "step": 6515 }, { "epoch": 1.1052789506397753, "grad_norm": 1.71875, "learning_rate": 1.4005461385166847e-05, "loss": 0.8479, "step": 6516 }, { "epoch": 1.1054504147287652, "grad_norm": 1.609375, "learning_rate": 1.4003806590147573e-05, "loss": 0.8497, "step": 6517 }, { "epoch": 1.1056218788177552, "grad_norm": 1.7109375, "learning_rate": 1.4002151664549088e-05, "loss": 0.8999, "step": 6518 }, { "epoch": 1.105793342906745, "grad_norm": 1.59375, "learning_rate": 1.4000496608425369e-05, "loss": 0.9195, "step": 6519 }, { "epoch": 1.1059648069957349, "grad_norm": 1.5859375, "learning_rate": 1.3998841421830391e-05, "loss": 0.7609, "step": 6520 }, { "epoch": 1.1061362710847247, "grad_norm": 1.6484375, "learning_rate": 1.3997186104818134e-05, "loss": 0.8273, "step": 6521 }, { "epoch": 1.1063077351737145, "grad_norm": 1.6015625, "learning_rate": 1.3995530657442588e-05, "loss": 0.8517, "step": 6522 }, { "epoch": 1.1064791992627043, "grad_norm": 1.6796875, "learning_rate": 1.3993875079757744e-05, "loss": 0.8764, "step": 6523 }, { "epoch": 1.1066506633516944, "grad_norm": 1.6640625, "learning_rate": 1.399221937181759e-05, "loss": 0.8312, "step": 6524 }, { "epoch": 1.1068221274406842, "grad_norm": 1.5859375, "learning_rate": 1.3990563533676129e-05, "loss": 0.8393, "step": 6525 }, { "epoch": 1.106993591529674, "grad_norm": 1.625, "learning_rate": 1.3988907565387364e-05, "loss": 0.9126, "step": 6526 }, { "epoch": 1.1071650556186639, "grad_norm": 1.7578125, "learning_rate": 1.3987251467005303e-05, "loss": 0.8461, "step": 6527 }, { "epoch": 1.1073365197076537, "grad_norm": 1.640625, "learning_rate": 1.3985595238583958e-05, "loss": 0.889, "step": 6528 }, { "epoch": 1.1075079837966435, "grad_norm": 1.75, "learning_rate": 1.3983938880177345e-05, "loss": 0.8923, "step": 6529 }, { "epoch": 1.1076794478856336, "grad_norm": 1.6796875, "learning_rate": 1.3982282391839483e-05, "loss": 0.8516, "step": 6530 }, { "epoch": 1.1078509119746234, "grad_norm": 1.7578125, "learning_rate": 1.3980625773624394e-05, "loss": 0.9837, "step": 6531 }, { "epoch": 1.1080223760636132, "grad_norm": 1.7734375, "learning_rate": 1.3978969025586109e-05, "loss": 0.8922, "step": 6532 }, { "epoch": 1.108193840152603, "grad_norm": 1.7421875, "learning_rate": 1.397731214777866e-05, "loss": 0.8567, "step": 6533 }, { "epoch": 1.1083653042415929, "grad_norm": 1.546875, "learning_rate": 1.3975655140256089e-05, "loss": 0.7654, "step": 6534 }, { "epoch": 1.1085367683305827, "grad_norm": 1.640625, "learning_rate": 1.3973998003072429e-05, "loss": 0.8481, "step": 6535 }, { "epoch": 1.1087082324195727, "grad_norm": 1.5625, "learning_rate": 1.397234073628173e-05, "loss": 0.8526, "step": 6536 }, { "epoch": 1.1088796965085626, "grad_norm": 1.6640625, "learning_rate": 1.3970683339938042e-05, "loss": 0.886, "step": 6537 }, { "epoch": 1.1090511605975524, "grad_norm": 1.6640625, "learning_rate": 1.3969025814095419e-05, "loss": 0.9093, "step": 6538 }, { "epoch": 1.1092226246865422, "grad_norm": 1.640625, "learning_rate": 1.3967368158807915e-05, "loss": 0.8276, "step": 6539 }, { "epoch": 1.109394088775532, "grad_norm": 1.6484375, "learning_rate": 1.3965710374129599e-05, "loss": 0.8418, "step": 6540 }, { "epoch": 1.1095655528645219, "grad_norm": 1.625, "learning_rate": 1.3964052460114532e-05, "loss": 0.837, "step": 6541 }, { "epoch": 1.109737016953512, "grad_norm": 1.609375, "learning_rate": 1.396239441681679e-05, "loss": 0.8559, "step": 6542 }, { "epoch": 1.1099084810425017, "grad_norm": 1.703125, "learning_rate": 1.396073624429044e-05, "loss": 0.96, "step": 6543 }, { "epoch": 1.1100799451314916, "grad_norm": 1.6796875, "learning_rate": 1.395907794258957e-05, "loss": 0.894, "step": 6544 }, { "epoch": 1.1102514092204814, "grad_norm": 1.6015625, "learning_rate": 1.3957419511768257e-05, "loss": 0.8482, "step": 6545 }, { "epoch": 1.1104228733094712, "grad_norm": 1.640625, "learning_rate": 1.3955760951880595e-05, "loss": 0.8397, "step": 6546 }, { "epoch": 1.110594337398461, "grad_norm": 1.75, "learning_rate": 1.3954102262980672e-05, "loss": 0.9021, "step": 6547 }, { "epoch": 1.1107658014874509, "grad_norm": 1.703125, "learning_rate": 1.3952443445122583e-05, "loss": 0.8697, "step": 6548 }, { "epoch": 1.110937265576441, "grad_norm": 1.6328125, "learning_rate": 1.395078449836043e-05, "loss": 0.8267, "step": 6549 }, { "epoch": 1.1111087296654307, "grad_norm": 1.6953125, "learning_rate": 1.3949125422748315e-05, "loss": 0.8296, "step": 6550 }, { "epoch": 1.1112801937544206, "grad_norm": 1.671875, "learning_rate": 1.394746621834035e-05, "loss": 0.9149, "step": 6551 }, { "epoch": 1.1114516578434104, "grad_norm": 1.703125, "learning_rate": 1.3945806885190651e-05, "loss": 0.7707, "step": 6552 }, { "epoch": 1.1116231219324002, "grad_norm": 1.6640625, "learning_rate": 1.394414742335333e-05, "loss": 0.8369, "step": 6553 }, { "epoch": 1.1117945860213903, "grad_norm": 1.578125, "learning_rate": 1.3942487832882503e-05, "loss": 0.8169, "step": 6554 }, { "epoch": 1.11196605011038, "grad_norm": 1.65625, "learning_rate": 1.3940828113832306e-05, "loss": 0.8029, "step": 6555 }, { "epoch": 1.11213751419937, "grad_norm": 1.7578125, "learning_rate": 1.3939168266256865e-05, "loss": 0.8772, "step": 6556 }, { "epoch": 1.1123089782883597, "grad_norm": 1.6640625, "learning_rate": 1.3937508290210313e-05, "loss": 0.7688, "step": 6557 }, { "epoch": 1.1124804423773496, "grad_norm": 1.6875, "learning_rate": 1.393584818574679e-05, "loss": 0.8588, "step": 6558 }, { "epoch": 1.1126519064663394, "grad_norm": 1.65625, "learning_rate": 1.3934187952920433e-05, "loss": 0.8125, "step": 6559 }, { "epoch": 1.1128233705553292, "grad_norm": 1.6484375, "learning_rate": 1.3932527591785394e-05, "loss": 0.9402, "step": 6560 }, { "epoch": 1.1129948346443193, "grad_norm": 1.7109375, "learning_rate": 1.3930867102395821e-05, "loss": 0.891, "step": 6561 }, { "epoch": 1.113166298733309, "grad_norm": 1.65625, "learning_rate": 1.3929206484805869e-05, "loss": 0.8461, "step": 6562 }, { "epoch": 1.113337762822299, "grad_norm": 1.6171875, "learning_rate": 1.3927545739069699e-05, "loss": 0.7938, "step": 6563 }, { "epoch": 1.1135092269112887, "grad_norm": 22.125, "learning_rate": 1.3925884865241472e-05, "loss": 1.3668, "step": 6564 }, { "epoch": 1.1136806910002786, "grad_norm": 1.6953125, "learning_rate": 1.3924223863375356e-05, "loss": 0.8056, "step": 6565 }, { "epoch": 1.1138521550892686, "grad_norm": 1.6171875, "learning_rate": 1.3922562733525522e-05, "loss": 0.8781, "step": 6566 }, { "epoch": 1.1140236191782584, "grad_norm": 1.671875, "learning_rate": 1.3920901475746147e-05, "loss": 0.7755, "step": 6567 }, { "epoch": 1.1141950832672483, "grad_norm": 1.7421875, "learning_rate": 1.391924009009141e-05, "loss": 0.8503, "step": 6568 }, { "epoch": 1.114366547356238, "grad_norm": 1.671875, "learning_rate": 1.3917578576615495e-05, "loss": 0.8424, "step": 6569 }, { "epoch": 1.114538011445228, "grad_norm": 1.6640625, "learning_rate": 1.3915916935372594e-05, "loss": 0.8289, "step": 6570 }, { "epoch": 1.1147094755342177, "grad_norm": 1.703125, "learning_rate": 1.391425516641689e-05, "loss": 0.8466, "step": 6571 }, { "epoch": 1.1148809396232076, "grad_norm": 1.734375, "learning_rate": 1.3912593269802588e-05, "loss": 0.9011, "step": 6572 }, { "epoch": 1.1150524037121976, "grad_norm": 1.6640625, "learning_rate": 1.3910931245583883e-05, "loss": 0.8765, "step": 6573 }, { "epoch": 1.1152238678011874, "grad_norm": 1.7109375, "learning_rate": 1.3909269093814985e-05, "loss": 0.8353, "step": 6574 }, { "epoch": 1.1153953318901773, "grad_norm": 1.6953125, "learning_rate": 1.3907606814550105e-05, "loss": 0.9173, "step": 6575 }, { "epoch": 1.115566795979167, "grad_norm": 1.546875, "learning_rate": 1.3905944407843447e-05, "loss": 0.8287, "step": 6576 }, { "epoch": 1.115738260068157, "grad_norm": 1.65625, "learning_rate": 1.3904281873749237e-05, "loss": 0.857, "step": 6577 }, { "epoch": 1.115909724157147, "grad_norm": 1.75, "learning_rate": 1.3902619212321692e-05, "loss": 0.8939, "step": 6578 }, { "epoch": 1.1160811882461368, "grad_norm": 1.7734375, "learning_rate": 1.3900956423615035e-05, "loss": 0.8707, "step": 6579 }, { "epoch": 1.1162526523351266, "grad_norm": 1.6796875, "learning_rate": 1.3899293507683502e-05, "loss": 0.7755, "step": 6580 }, { "epoch": 1.1164241164241164, "grad_norm": 1.640625, "learning_rate": 1.3897630464581325e-05, "loss": 0.8604, "step": 6581 }, { "epoch": 1.1165955805131063, "grad_norm": 1.6484375, "learning_rate": 1.389596729436274e-05, "loss": 0.8777, "step": 6582 }, { "epoch": 1.116767044602096, "grad_norm": 1.625, "learning_rate": 1.3894303997081991e-05, "loss": 0.8515, "step": 6583 }, { "epoch": 1.116938508691086, "grad_norm": 1.6171875, "learning_rate": 1.3892640572793323e-05, "loss": 0.9125, "step": 6584 }, { "epoch": 1.117109972780076, "grad_norm": 1.703125, "learning_rate": 1.3890977021550988e-05, "loss": 0.8635, "step": 6585 }, { "epoch": 1.1172814368690658, "grad_norm": 1.6953125, "learning_rate": 1.3889313343409244e-05, "loss": 0.8413, "step": 6586 }, { "epoch": 1.1174529009580556, "grad_norm": 1.6171875, "learning_rate": 1.3887649538422344e-05, "loss": 0.8536, "step": 6587 }, { "epoch": 1.1176243650470454, "grad_norm": 1.78125, "learning_rate": 1.3885985606644552e-05, "loss": 0.872, "step": 6588 }, { "epoch": 1.1177958291360353, "grad_norm": 1.7109375, "learning_rate": 1.3884321548130136e-05, "loss": 0.8876, "step": 6589 }, { "epoch": 1.117967293225025, "grad_norm": 1.625, "learning_rate": 1.3882657362933368e-05, "loss": 0.9244, "step": 6590 }, { "epoch": 1.1181387573140151, "grad_norm": 1.625, "learning_rate": 1.3880993051108522e-05, "loss": 0.868, "step": 6591 }, { "epoch": 1.118310221403005, "grad_norm": 1.7109375, "learning_rate": 1.3879328612709879e-05, "loss": 0.9156, "step": 6592 }, { "epoch": 1.1184816854919948, "grad_norm": 1.765625, "learning_rate": 1.387766404779172e-05, "loss": 0.8918, "step": 6593 }, { "epoch": 1.1186531495809846, "grad_norm": 1.671875, "learning_rate": 1.3875999356408336e-05, "loss": 0.9196, "step": 6594 }, { "epoch": 1.1188246136699744, "grad_norm": 1.625, "learning_rate": 1.3874334538614016e-05, "loss": 0.8413, "step": 6595 }, { "epoch": 1.1189960777589643, "grad_norm": 1.7734375, "learning_rate": 1.3872669594463059e-05, "loss": 0.8718, "step": 6596 }, { "epoch": 1.1191675418479543, "grad_norm": 1.6796875, "learning_rate": 1.3871004524009764e-05, "loss": 0.8636, "step": 6597 }, { "epoch": 1.1193390059369441, "grad_norm": 1.75, "learning_rate": 1.3869339327308433e-05, "loss": 0.9268, "step": 6598 }, { "epoch": 1.119510470025934, "grad_norm": 1.6328125, "learning_rate": 1.3867674004413379e-05, "loss": 0.8645, "step": 6599 }, { "epoch": 1.1196819341149238, "grad_norm": 1.671875, "learning_rate": 1.3866008555378906e-05, "loss": 0.8711, "step": 6600 }, { "epoch": 1.1198533982039136, "grad_norm": 1.6015625, "learning_rate": 1.3864342980259338e-05, "loss": 0.7778, "step": 6601 }, { "epoch": 1.1200248622929034, "grad_norm": 1.6953125, "learning_rate": 1.3862677279108993e-05, "loss": 0.9177, "step": 6602 }, { "epoch": 1.1201963263818935, "grad_norm": 1.6484375, "learning_rate": 1.3861011451982198e-05, "loss": 0.8764, "step": 6603 }, { "epoch": 1.1203677904708833, "grad_norm": 1.6484375, "learning_rate": 1.3859345498933283e-05, "loss": 0.7912, "step": 6604 }, { "epoch": 1.1205392545598731, "grad_norm": 1.59375, "learning_rate": 1.3857679420016575e-05, "loss": 0.8659, "step": 6605 }, { "epoch": 1.120710718648863, "grad_norm": 1.6484375, "learning_rate": 1.3856013215286415e-05, "loss": 0.832, "step": 6606 }, { "epoch": 1.1208821827378528, "grad_norm": 1.6875, "learning_rate": 1.3854346884797143e-05, "loss": 0.8807, "step": 6607 }, { "epoch": 1.1210536468268426, "grad_norm": 1.6953125, "learning_rate": 1.3852680428603105e-05, "loss": 0.8882, "step": 6608 }, { "epoch": 1.1212251109158327, "grad_norm": 1.7109375, "learning_rate": 1.3851013846758651e-05, "loss": 0.9261, "step": 6609 }, { "epoch": 1.1213965750048225, "grad_norm": 1.609375, "learning_rate": 1.3849347139318134e-05, "loss": 0.8339, "step": 6610 }, { "epoch": 1.1215680390938123, "grad_norm": 1.6015625, "learning_rate": 1.3847680306335911e-05, "loss": 0.8992, "step": 6611 }, { "epoch": 1.1217395031828021, "grad_norm": 1.6171875, "learning_rate": 1.3846013347866345e-05, "loss": 0.8608, "step": 6612 }, { "epoch": 1.121910967271792, "grad_norm": 1.609375, "learning_rate": 1.38443462639638e-05, "loss": 0.922, "step": 6613 }, { "epoch": 1.1220824313607818, "grad_norm": 1.65625, "learning_rate": 1.3842679054682646e-05, "loss": 0.7879, "step": 6614 }, { "epoch": 1.1222538954497718, "grad_norm": 1.640625, "learning_rate": 1.3841011720077262e-05, "loss": 0.8357, "step": 6615 }, { "epoch": 1.1224253595387617, "grad_norm": 1.6640625, "learning_rate": 1.383934426020202e-05, "loss": 0.814, "step": 6616 }, { "epoch": 1.1225968236277515, "grad_norm": 1.625, "learning_rate": 1.3837676675111303e-05, "loss": 0.8158, "step": 6617 }, { "epoch": 1.1227682877167413, "grad_norm": 1.65625, "learning_rate": 1.3836008964859497e-05, "loss": 0.8426, "step": 6618 }, { "epoch": 1.1229397518057311, "grad_norm": 1.75, "learning_rate": 1.3834341129500995e-05, "loss": 0.9509, "step": 6619 }, { "epoch": 1.123111215894721, "grad_norm": 1.6484375, "learning_rate": 1.3832673169090187e-05, "loss": 0.8784, "step": 6620 }, { "epoch": 1.123282679983711, "grad_norm": 1.625, "learning_rate": 1.3831005083681479e-05, "loss": 0.8034, "step": 6621 }, { "epoch": 1.1234541440727008, "grad_norm": 1.6484375, "learning_rate": 1.3829336873329266e-05, "loss": 0.9271, "step": 6622 }, { "epoch": 1.1236256081616907, "grad_norm": 1.578125, "learning_rate": 1.382766853808796e-05, "loss": 0.8658, "step": 6623 }, { "epoch": 1.1237970722506805, "grad_norm": 1.6640625, "learning_rate": 1.3826000078011968e-05, "loss": 0.8444, "step": 6624 }, { "epoch": 1.1239685363396703, "grad_norm": 1.7265625, "learning_rate": 1.3824331493155706e-05, "loss": 0.9042, "step": 6625 }, { "epoch": 1.1241400004286601, "grad_norm": 1.6640625, "learning_rate": 1.3822662783573595e-05, "loss": 0.7856, "step": 6626 }, { "epoch": 1.1243114645176502, "grad_norm": 1.6171875, "learning_rate": 1.3820993949320054e-05, "loss": 0.8483, "step": 6627 }, { "epoch": 1.12448292860664, "grad_norm": 1.59375, "learning_rate": 1.381932499044951e-05, "loss": 0.8497, "step": 6628 }, { "epoch": 1.1246543926956298, "grad_norm": 1.6328125, "learning_rate": 1.3817655907016399e-05, "loss": 0.896, "step": 6629 }, { "epoch": 1.1248258567846197, "grad_norm": 1.7265625, "learning_rate": 1.381598669907515e-05, "loss": 0.8452, "step": 6630 }, { "epoch": 1.1249973208736095, "grad_norm": 1.6953125, "learning_rate": 1.3814317366680206e-05, "loss": 0.8716, "step": 6631 }, { "epoch": 1.1251687849625993, "grad_norm": 1.7265625, "learning_rate": 1.381264790988601e-05, "loss": 0.882, "step": 6632 }, { "epoch": 1.1253402490515891, "grad_norm": 1.6640625, "learning_rate": 1.381097832874701e-05, "loss": 0.8736, "step": 6633 }, { "epoch": 1.1255117131405792, "grad_norm": 1.703125, "learning_rate": 1.3809308623317655e-05, "loss": 0.8683, "step": 6634 }, { "epoch": 1.125683177229569, "grad_norm": 1.734375, "learning_rate": 1.3807638793652401e-05, "loss": 0.908, "step": 6635 }, { "epoch": 1.1258546413185588, "grad_norm": 1.6484375, "learning_rate": 1.3805968839805709e-05, "loss": 0.8239, "step": 6636 }, { "epoch": 1.1260261054075487, "grad_norm": 1.703125, "learning_rate": 1.3804298761832038e-05, "loss": 0.9048, "step": 6637 }, { "epoch": 1.1261975694965385, "grad_norm": 1.703125, "learning_rate": 1.380262855978586e-05, "loss": 0.8182, "step": 6638 }, { "epoch": 1.1263690335855285, "grad_norm": 1.6484375, "learning_rate": 1.3800958233721644e-05, "loss": 0.9245, "step": 6639 }, { "epoch": 1.1265404976745184, "grad_norm": 1.65625, "learning_rate": 1.379928778369387e-05, "loss": 0.8484, "step": 6640 }, { "epoch": 1.1267119617635082, "grad_norm": 1.6171875, "learning_rate": 1.3797617209757013e-05, "loss": 0.8341, "step": 6641 }, { "epoch": 1.126883425852498, "grad_norm": 1.65625, "learning_rate": 1.3795946511965557e-05, "loss": 0.8392, "step": 6642 }, { "epoch": 1.1270548899414878, "grad_norm": 1.6171875, "learning_rate": 1.3794275690373993e-05, "loss": 0.9658, "step": 6643 }, { "epoch": 1.1272263540304777, "grad_norm": 1.609375, "learning_rate": 1.379260474503681e-05, "loss": 0.8144, "step": 6644 }, { "epoch": 1.1273978181194675, "grad_norm": 1.7421875, "learning_rate": 1.3790933676008505e-05, "loss": 0.8777, "step": 6645 }, { "epoch": 1.1275692822084575, "grad_norm": 1.6953125, "learning_rate": 1.3789262483343578e-05, "loss": 0.8284, "step": 6646 }, { "epoch": 1.1277407462974474, "grad_norm": 2.03125, "learning_rate": 1.3787591167096528e-05, "loss": 0.846, "step": 6647 }, { "epoch": 1.1279122103864372, "grad_norm": 1.609375, "learning_rate": 1.3785919727321872e-05, "loss": 0.8, "step": 6648 }, { "epoch": 1.128083674475427, "grad_norm": 1.609375, "learning_rate": 1.3784248164074116e-05, "loss": 0.9209, "step": 6649 }, { "epoch": 1.1282551385644168, "grad_norm": 1.625, "learning_rate": 1.3782576477407778e-05, "loss": 0.8437, "step": 6650 }, { "epoch": 1.1284266026534069, "grad_norm": 1.7109375, "learning_rate": 1.3780904667377377e-05, "loss": 0.9282, "step": 6651 }, { "epoch": 1.1285980667423967, "grad_norm": 1.6328125, "learning_rate": 1.3779232734037437e-05, "loss": 0.8401, "step": 6652 }, { "epoch": 1.1287695308313865, "grad_norm": 1.7890625, "learning_rate": 1.3777560677442485e-05, "loss": 0.986, "step": 6653 }, { "epoch": 1.1289409949203764, "grad_norm": 1.7265625, "learning_rate": 1.3775888497647056e-05, "loss": 0.8463, "step": 6654 }, { "epoch": 1.1291124590093662, "grad_norm": 1.71875, "learning_rate": 1.3774216194705683e-05, "loss": 0.8735, "step": 6655 }, { "epoch": 1.129283923098356, "grad_norm": 1.671875, "learning_rate": 1.377254376867291e-05, "loss": 0.8773, "step": 6656 }, { "epoch": 1.1294553871873458, "grad_norm": 1.640625, "learning_rate": 1.3770871219603276e-05, "loss": 0.9427, "step": 6657 }, { "epoch": 1.1296268512763359, "grad_norm": 1.640625, "learning_rate": 1.3769198547551333e-05, "loss": 0.8364, "step": 6658 }, { "epoch": 1.1297983153653257, "grad_norm": 1.65625, "learning_rate": 1.3767525752571631e-05, "loss": 0.835, "step": 6659 }, { "epoch": 1.1299697794543155, "grad_norm": 1.640625, "learning_rate": 1.3765852834718727e-05, "loss": 0.9108, "step": 6660 }, { "epoch": 1.1301412435433054, "grad_norm": 1.609375, "learning_rate": 1.3764179794047183e-05, "loss": 0.8486, "step": 6661 }, { "epoch": 1.1303127076322952, "grad_norm": 1.59375, "learning_rate": 1.376250663061156e-05, "loss": 0.8227, "step": 6662 }, { "epoch": 1.1304841717212852, "grad_norm": 1.5859375, "learning_rate": 1.3760833344466428e-05, "loss": 0.8379, "step": 6663 }, { "epoch": 1.130655635810275, "grad_norm": 1.7578125, "learning_rate": 1.3759159935666358e-05, "loss": 0.8692, "step": 6664 }, { "epoch": 1.1308270998992649, "grad_norm": 1.671875, "learning_rate": 1.3757486404265927e-05, "loss": 0.8368, "step": 6665 }, { "epoch": 1.1309985639882547, "grad_norm": 1.625, "learning_rate": 1.3755812750319716e-05, "loss": 0.793, "step": 6666 }, { "epoch": 1.1311700280772445, "grad_norm": 1.65625, "learning_rate": 1.3754138973882304e-05, "loss": 0.8666, "step": 6667 }, { "epoch": 1.1313414921662344, "grad_norm": 1.75, "learning_rate": 1.3752465075008288e-05, "loss": 0.909, "step": 6668 }, { "epoch": 1.1315129562552242, "grad_norm": 1.703125, "learning_rate": 1.3750791053752254e-05, "loss": 0.9476, "step": 6669 }, { "epoch": 1.1316844203442142, "grad_norm": 1.6171875, "learning_rate": 1.3749116910168798e-05, "loss": 0.8361, "step": 6670 }, { "epoch": 1.131855884433204, "grad_norm": 1.6875, "learning_rate": 1.3747442644312523e-05, "loss": 0.9592, "step": 6671 }, { "epoch": 1.1320273485221939, "grad_norm": 1.671875, "learning_rate": 1.3745768256238033e-05, "loss": 0.8749, "step": 6672 }, { "epoch": 1.1321988126111837, "grad_norm": 1.6015625, "learning_rate": 1.3744093745999935e-05, "loss": 0.7972, "step": 6673 }, { "epoch": 1.1323702767001735, "grad_norm": 1.6796875, "learning_rate": 1.374241911365284e-05, "loss": 0.8684, "step": 6674 }, { "epoch": 1.1325417407891636, "grad_norm": 1.6640625, "learning_rate": 1.3740744359251363e-05, "loss": 0.7984, "step": 6675 }, { "epoch": 1.1327132048781534, "grad_norm": 1.7421875, "learning_rate": 1.3739069482850128e-05, "loss": 0.8367, "step": 6676 }, { "epoch": 1.1328846689671432, "grad_norm": 1.703125, "learning_rate": 1.3737394484503756e-05, "loss": 0.8666, "step": 6677 }, { "epoch": 1.133056133056133, "grad_norm": 1.65625, "learning_rate": 1.3735719364266878e-05, "loss": 0.9206, "step": 6678 }, { "epoch": 1.133227597145123, "grad_norm": 1.671875, "learning_rate": 1.3734044122194126e-05, "loss": 0.8831, "step": 6679 }, { "epoch": 1.1333990612341127, "grad_norm": 1.6171875, "learning_rate": 1.373236875834013e-05, "loss": 0.8538, "step": 6680 }, { "epoch": 1.1335705253231025, "grad_norm": 1.734375, "learning_rate": 1.3730693272759537e-05, "loss": 0.95, "step": 6681 }, { "epoch": 1.1337419894120926, "grad_norm": 1.7265625, "learning_rate": 1.3729017665506985e-05, "loss": 0.8747, "step": 6682 }, { "epoch": 1.1339134535010824, "grad_norm": 1.7421875, "learning_rate": 1.3727341936637126e-05, "loss": 0.8905, "step": 6683 }, { "epoch": 1.1340849175900722, "grad_norm": 1.6796875, "learning_rate": 1.372566608620461e-05, "loss": 0.8614, "step": 6684 }, { "epoch": 1.134256381679062, "grad_norm": 1.5546875, "learning_rate": 1.3723990114264094e-05, "loss": 0.8072, "step": 6685 }, { "epoch": 1.134427845768052, "grad_norm": 1.6484375, "learning_rate": 1.3722314020870237e-05, "loss": 0.798, "step": 6686 }, { "epoch": 1.134599309857042, "grad_norm": 1.7578125, "learning_rate": 1.3720637806077703e-05, "loss": 0.9451, "step": 6687 }, { "epoch": 1.1347707739460318, "grad_norm": 1.71875, "learning_rate": 1.3718961469941159e-05, "loss": 0.8844, "step": 6688 }, { "epoch": 1.1349422380350216, "grad_norm": 1.671875, "learning_rate": 1.3717285012515278e-05, "loss": 0.7849, "step": 6689 }, { "epoch": 1.1351137021240114, "grad_norm": 1.65625, "learning_rate": 1.3715608433854738e-05, "loss": 0.9024, "step": 6690 }, { "epoch": 1.1352851662130012, "grad_norm": 1.6875, "learning_rate": 1.371393173401421e-05, "loss": 0.8926, "step": 6691 }, { "epoch": 1.135456630301991, "grad_norm": 1.7734375, "learning_rate": 1.3712254913048384e-05, "loss": 0.8341, "step": 6692 }, { "epoch": 1.135628094390981, "grad_norm": 1.7109375, "learning_rate": 1.3710577971011948e-05, "loss": 0.934, "step": 6693 }, { "epoch": 1.135799558479971, "grad_norm": 1.671875, "learning_rate": 1.3708900907959585e-05, "loss": 0.847, "step": 6694 }, { "epoch": 1.1359710225689608, "grad_norm": 1.5703125, "learning_rate": 1.3707223723946003e-05, "loss": 0.827, "step": 6695 }, { "epoch": 1.1361424866579506, "grad_norm": 1.65625, "learning_rate": 1.3705546419025897e-05, "loss": 0.8121, "step": 6696 }, { "epoch": 1.1363139507469404, "grad_norm": 1.578125, "learning_rate": 1.3703868993253967e-05, "loss": 0.7692, "step": 6697 }, { "epoch": 1.1364854148359302, "grad_norm": 1.6328125, "learning_rate": 1.370219144668492e-05, "loss": 0.8743, "step": 6698 }, { "epoch": 1.13665687892492, "grad_norm": 1.6484375, "learning_rate": 1.3700513779373467e-05, "loss": 0.9635, "step": 6699 }, { "epoch": 1.1368283430139101, "grad_norm": 1.7265625, "learning_rate": 1.3698835991374328e-05, "loss": 0.9609, "step": 6700 }, { "epoch": 1.1369998071029, "grad_norm": 1.6640625, "learning_rate": 1.3697158082742219e-05, "loss": 0.8923, "step": 6701 }, { "epoch": 1.1371712711918898, "grad_norm": 1.65625, "learning_rate": 1.3695480053531864e-05, "loss": 0.809, "step": 6702 }, { "epoch": 1.1373427352808796, "grad_norm": 1.625, "learning_rate": 1.3693801903797984e-05, "loss": 0.8371, "step": 6703 }, { "epoch": 1.1375141993698694, "grad_norm": 1.6875, "learning_rate": 1.3692123633595315e-05, "loss": 0.9109, "step": 6704 }, { "epoch": 1.1376856634588592, "grad_norm": 1.7109375, "learning_rate": 1.3690445242978594e-05, "loss": 0.9534, "step": 6705 }, { "epoch": 1.1378571275478493, "grad_norm": 1.640625, "learning_rate": 1.3688766732002555e-05, "loss": 0.8689, "step": 6706 }, { "epoch": 1.1380285916368391, "grad_norm": 1.7109375, "learning_rate": 1.3687088100721942e-05, "loss": 0.8972, "step": 6707 }, { "epoch": 1.138200055725829, "grad_norm": 1.71875, "learning_rate": 1.3685409349191505e-05, "loss": 0.8928, "step": 6708 }, { "epoch": 1.1383715198148188, "grad_norm": 1.5859375, "learning_rate": 1.3683730477465989e-05, "loss": 0.882, "step": 6709 }, { "epoch": 1.1385429839038086, "grad_norm": 1.75, "learning_rate": 1.3682051485600148e-05, "loss": 0.9091, "step": 6710 }, { "epoch": 1.1387144479927984, "grad_norm": 1.6015625, "learning_rate": 1.3680372373648744e-05, "loss": 0.795, "step": 6711 }, { "epoch": 1.1388859120817885, "grad_norm": 1.6953125, "learning_rate": 1.3678693141666539e-05, "loss": 0.9273, "step": 6712 }, { "epoch": 1.1390573761707783, "grad_norm": 1.6640625, "learning_rate": 1.3677013789708297e-05, "loss": 0.9238, "step": 6713 }, { "epoch": 1.1392288402597681, "grad_norm": 1.6328125, "learning_rate": 1.367533431782879e-05, "loss": 0.8532, "step": 6714 }, { "epoch": 1.139400304348758, "grad_norm": 1.65625, "learning_rate": 1.367365472608279e-05, "loss": 0.8438, "step": 6715 }, { "epoch": 1.1395717684377478, "grad_norm": 1.6875, "learning_rate": 1.3671975014525076e-05, "loss": 0.8949, "step": 6716 }, { "epoch": 1.1397432325267376, "grad_norm": 1.7578125, "learning_rate": 1.3670295183210431e-05, "loss": 0.8876, "step": 6717 }, { "epoch": 1.1399146966157276, "grad_norm": 1.6015625, "learning_rate": 1.3668615232193636e-05, "loss": 0.864, "step": 6718 }, { "epoch": 1.1400861607047175, "grad_norm": 1.6328125, "learning_rate": 1.3666935161529487e-05, "loss": 0.8316, "step": 6719 }, { "epoch": 1.1402576247937073, "grad_norm": 1.7578125, "learning_rate": 1.3665254971272772e-05, "loss": 0.9238, "step": 6720 }, { "epoch": 1.1404290888826971, "grad_norm": 1.7890625, "learning_rate": 1.3663574661478291e-05, "loss": 0.9234, "step": 6721 }, { "epoch": 1.140600552971687, "grad_norm": 1.796875, "learning_rate": 1.366189423220084e-05, "loss": 0.9048, "step": 6722 }, { "epoch": 1.1407720170606768, "grad_norm": 1.7421875, "learning_rate": 1.3660213683495233e-05, "loss": 0.9594, "step": 6723 }, { "epoch": 1.1409434811496668, "grad_norm": 1.6328125, "learning_rate": 1.3658533015416276e-05, "loss": 0.8342, "step": 6724 }, { "epoch": 1.1411149452386566, "grad_norm": 1.6796875, "learning_rate": 1.3656852228018779e-05, "loss": 0.8728, "step": 6725 }, { "epoch": 1.1412864093276465, "grad_norm": 1.640625, "learning_rate": 1.3655171321357561e-05, "loss": 0.8813, "step": 6726 }, { "epoch": 1.1414578734166363, "grad_norm": 1.7109375, "learning_rate": 1.3653490295487442e-05, "loss": 0.8986, "step": 6727 }, { "epoch": 1.1416293375056261, "grad_norm": 1.75, "learning_rate": 1.3651809150463246e-05, "loss": 0.9289, "step": 6728 }, { "epoch": 1.141800801594616, "grad_norm": 1.6640625, "learning_rate": 1.3650127886339801e-05, "loss": 0.8557, "step": 6729 }, { "epoch": 1.1419722656836058, "grad_norm": 1.640625, "learning_rate": 1.3648446503171942e-05, "loss": 0.9425, "step": 6730 }, { "epoch": 1.1421437297725958, "grad_norm": 1.609375, "learning_rate": 1.3646765001014504e-05, "loss": 0.8632, "step": 6731 }, { "epoch": 1.1423151938615856, "grad_norm": 1.625, "learning_rate": 1.3645083379922327e-05, "loss": 0.7903, "step": 6732 }, { "epoch": 1.1424866579505755, "grad_norm": 1.71875, "learning_rate": 1.3643401639950253e-05, "loss": 0.9099, "step": 6733 }, { "epoch": 1.1426581220395653, "grad_norm": 1.6953125, "learning_rate": 1.3641719781153132e-05, "loss": 0.8892, "step": 6734 }, { "epoch": 1.1428295861285551, "grad_norm": 1.671875, "learning_rate": 1.3640037803585818e-05, "loss": 0.843, "step": 6735 }, { "epoch": 1.1430010502175452, "grad_norm": 1.7265625, "learning_rate": 1.3638355707303163e-05, "loss": 0.8157, "step": 6736 }, { "epoch": 1.143172514306535, "grad_norm": 1.5703125, "learning_rate": 1.3636673492360029e-05, "loss": 0.8234, "step": 6737 }, { "epoch": 1.1433439783955248, "grad_norm": 1.6796875, "learning_rate": 1.3634991158811276e-05, "loss": 0.8297, "step": 6738 }, { "epoch": 1.1435154424845146, "grad_norm": 1.7578125, "learning_rate": 1.3633308706711772e-05, "loss": 0.9657, "step": 6739 }, { "epoch": 1.1436869065735045, "grad_norm": 1.5390625, "learning_rate": 1.3631626136116392e-05, "loss": 0.7461, "step": 6740 }, { "epoch": 1.1438583706624943, "grad_norm": 1.6484375, "learning_rate": 1.3629943447080003e-05, "loss": 0.8086, "step": 6741 }, { "epoch": 1.1440298347514841, "grad_norm": 1.7265625, "learning_rate": 1.3628260639657496e-05, "loss": 0.8388, "step": 6742 }, { "epoch": 1.1442012988404742, "grad_norm": 1.6953125, "learning_rate": 1.3626577713903744e-05, "loss": 0.9211, "step": 6743 }, { "epoch": 1.144372762929464, "grad_norm": 1.640625, "learning_rate": 1.3624894669873636e-05, "loss": 0.8763, "step": 6744 }, { "epoch": 1.1445442270184538, "grad_norm": 1.84375, "learning_rate": 1.3623211507622063e-05, "loss": 0.9707, "step": 6745 }, { "epoch": 1.1447156911074436, "grad_norm": 1.5859375, "learning_rate": 1.3621528227203918e-05, "loss": 0.841, "step": 6746 }, { "epoch": 1.1448871551964335, "grad_norm": 1.609375, "learning_rate": 1.3619844828674101e-05, "loss": 0.8323, "step": 6747 }, { "epoch": 1.1450586192854235, "grad_norm": 1.7109375, "learning_rate": 1.3618161312087515e-05, "loss": 0.838, "step": 6748 }, { "epoch": 1.1452300833744133, "grad_norm": 1.6953125, "learning_rate": 1.361647767749906e-05, "loss": 0.8694, "step": 6749 }, { "epoch": 1.1454015474634032, "grad_norm": 1.6953125, "learning_rate": 1.3614793924963649e-05, "loss": 0.8692, "step": 6750 }, { "epoch": 1.145573011552393, "grad_norm": 1.6953125, "learning_rate": 1.36131100545362e-05, "loss": 0.8843, "step": 6751 }, { "epoch": 1.1457444756413828, "grad_norm": 1.765625, "learning_rate": 1.3611426066271625e-05, "loss": 0.9358, "step": 6752 }, { "epoch": 1.1459159397303726, "grad_norm": 1.625, "learning_rate": 1.3609741960224847e-05, "loss": 0.7755, "step": 6753 }, { "epoch": 1.1460874038193625, "grad_norm": 1.6953125, "learning_rate": 1.3608057736450792e-05, "loss": 0.9001, "step": 6754 }, { "epoch": 1.1462588679083525, "grad_norm": 1.671875, "learning_rate": 1.3606373395004384e-05, "loss": 0.9319, "step": 6755 }, { "epoch": 1.1464303319973423, "grad_norm": 1.6171875, "learning_rate": 1.3604688935940562e-05, "loss": 0.8448, "step": 6756 }, { "epoch": 1.1466017960863322, "grad_norm": 1.6484375, "learning_rate": 1.3603004359314259e-05, "loss": 0.8968, "step": 6757 }, { "epoch": 1.146773260175322, "grad_norm": 1.703125, "learning_rate": 1.3601319665180415e-05, "loss": 0.8989, "step": 6758 }, { "epoch": 1.1469447242643118, "grad_norm": 1.640625, "learning_rate": 1.3599634853593977e-05, "loss": 0.896, "step": 6759 }, { "epoch": 1.1471161883533019, "grad_norm": 1.7734375, "learning_rate": 1.359794992460989e-05, "loss": 0.9144, "step": 6760 }, { "epoch": 1.1472876524422917, "grad_norm": 1.8203125, "learning_rate": 1.3596264878283107e-05, "loss": 0.8946, "step": 6761 }, { "epoch": 1.1474591165312815, "grad_norm": 1.640625, "learning_rate": 1.3594579714668585e-05, "loss": 0.8703, "step": 6762 }, { "epoch": 1.1476305806202713, "grad_norm": 1.7265625, "learning_rate": 1.3592894433821284e-05, "loss": 0.9466, "step": 6763 }, { "epoch": 1.1478020447092612, "grad_norm": 1.75, "learning_rate": 1.3591209035796164e-05, "loss": 0.8969, "step": 6764 }, { "epoch": 1.147973508798251, "grad_norm": 1.640625, "learning_rate": 1.3589523520648195e-05, "loss": 0.9025, "step": 6765 }, { "epoch": 1.1481449728872408, "grad_norm": 1.578125, "learning_rate": 1.3587837888432348e-05, "loss": 0.8256, "step": 6766 }, { "epoch": 1.1483164369762309, "grad_norm": 1.6015625, "learning_rate": 1.3586152139203596e-05, "loss": 0.8648, "step": 6767 }, { "epoch": 1.1484879010652207, "grad_norm": 1.703125, "learning_rate": 1.3584466273016919e-05, "loss": 0.8814, "step": 6768 }, { "epoch": 1.1486593651542105, "grad_norm": 1.6796875, "learning_rate": 1.3582780289927296e-05, "loss": 0.9134, "step": 6769 }, { "epoch": 1.1488308292432003, "grad_norm": 1.703125, "learning_rate": 1.3581094189989718e-05, "loss": 0.8339, "step": 6770 }, { "epoch": 1.1490022933321902, "grad_norm": 1.71875, "learning_rate": 1.3579407973259176e-05, "loss": 0.8392, "step": 6771 }, { "epoch": 1.1491737574211802, "grad_norm": 1.6171875, "learning_rate": 1.3577721639790658e-05, "loss": 0.8614, "step": 6772 }, { "epoch": 1.14934522151017, "grad_norm": 1.625, "learning_rate": 1.3576035189639165e-05, "loss": 0.7583, "step": 6773 }, { "epoch": 1.1495166855991599, "grad_norm": 1.6171875, "learning_rate": 1.35743486228597e-05, "loss": 0.8958, "step": 6774 }, { "epoch": 1.1496881496881497, "grad_norm": 1.6640625, "learning_rate": 1.3572661939507266e-05, "loss": 0.8706, "step": 6775 }, { "epoch": 1.1498596137771395, "grad_norm": 1.65625, "learning_rate": 1.3570975139636872e-05, "loss": 0.8905, "step": 6776 }, { "epoch": 1.1500310778661293, "grad_norm": 1.6953125, "learning_rate": 1.3569288223303531e-05, "loss": 0.876, "step": 6777 }, { "epoch": 1.1502025419551192, "grad_norm": 1.6953125, "learning_rate": 1.3567601190562264e-05, "loss": 0.818, "step": 6778 }, { "epoch": 1.1503740060441092, "grad_norm": 1.7109375, "learning_rate": 1.3565914041468085e-05, "loss": 0.9179, "step": 6779 }, { "epoch": 1.150545470133099, "grad_norm": 1.71875, "learning_rate": 1.3564226776076022e-05, "loss": 0.8875, "step": 6780 }, { "epoch": 1.1507169342220889, "grad_norm": 1.578125, "learning_rate": 1.3562539394441101e-05, "loss": 0.875, "step": 6781 }, { "epoch": 1.1508883983110787, "grad_norm": 1.6328125, "learning_rate": 1.3560851896618354e-05, "loss": 0.8554, "step": 6782 }, { "epoch": 1.1510598624000685, "grad_norm": 1.78125, "learning_rate": 1.3559164282662821e-05, "loss": 0.8489, "step": 6783 }, { "epoch": 1.1512313264890586, "grad_norm": 1.609375, "learning_rate": 1.3557476552629538e-05, "loss": 0.8179, "step": 6784 }, { "epoch": 1.1514027905780484, "grad_norm": 1.640625, "learning_rate": 1.3555788706573547e-05, "loss": 0.9496, "step": 6785 }, { "epoch": 1.1515742546670382, "grad_norm": 1.625, "learning_rate": 1.3554100744549899e-05, "loss": 0.8999, "step": 6786 }, { "epoch": 1.151745718756028, "grad_norm": 1.6640625, "learning_rate": 1.3552412666613637e-05, "loss": 0.816, "step": 6787 }, { "epoch": 1.1519171828450179, "grad_norm": 1.6796875, "learning_rate": 1.3550724472819825e-05, "loss": 0.8914, "step": 6788 }, { "epoch": 1.1520886469340077, "grad_norm": 1.6796875, "learning_rate": 1.3549036163223515e-05, "loss": 0.8921, "step": 6789 }, { "epoch": 1.1522601110229975, "grad_norm": 1.6171875, "learning_rate": 1.3547347737879772e-05, "loss": 0.8957, "step": 6790 }, { "epoch": 1.1524315751119876, "grad_norm": 1.578125, "learning_rate": 1.3545659196843661e-05, "loss": 0.8737, "step": 6791 }, { "epoch": 1.1526030392009774, "grad_norm": 1.671875, "learning_rate": 1.3543970540170253e-05, "loss": 0.8306, "step": 6792 }, { "epoch": 1.1527745032899672, "grad_norm": 1.765625, "learning_rate": 1.3542281767914617e-05, "loss": 0.9172, "step": 6793 }, { "epoch": 1.152945967378957, "grad_norm": 1.703125, "learning_rate": 1.3540592880131839e-05, "loss": 0.8589, "step": 6794 }, { "epoch": 1.1531174314679469, "grad_norm": 1.625, "learning_rate": 1.3538903876876993e-05, "loss": 0.8532, "step": 6795 }, { "epoch": 1.1532888955569367, "grad_norm": 1.78125, "learning_rate": 1.353721475820516e-05, "loss": 1.0322, "step": 6796 }, { "epoch": 1.1534603596459267, "grad_norm": 1.6875, "learning_rate": 1.3535525524171438e-05, "loss": 0.8166, "step": 6797 }, { "epoch": 1.1536318237349166, "grad_norm": 1.6328125, "learning_rate": 1.3533836174830915e-05, "loss": 0.8826, "step": 6798 }, { "epoch": 1.1538032878239064, "grad_norm": 1.765625, "learning_rate": 1.3532146710238684e-05, "loss": 0.8866, "step": 6799 }, { "epoch": 1.1539747519128962, "grad_norm": 1.609375, "learning_rate": 1.3530457130449855e-05, "loss": 0.9088, "step": 6800 }, { "epoch": 1.154146216001886, "grad_norm": 1.6640625, "learning_rate": 1.3528767435519521e-05, "loss": 0.8938, "step": 6801 }, { "epoch": 1.1543176800908759, "grad_norm": 1.6328125, "learning_rate": 1.3527077625502791e-05, "loss": 0.9035, "step": 6802 }, { "epoch": 1.154489144179866, "grad_norm": 1.59375, "learning_rate": 1.3525387700454779e-05, "loss": 0.8444, "step": 6803 }, { "epoch": 1.1546606082688557, "grad_norm": 1.625, "learning_rate": 1.3523697660430599e-05, "loss": 0.9083, "step": 6804 }, { "epoch": 1.1548320723578456, "grad_norm": 1.734375, "learning_rate": 1.3522007505485368e-05, "loss": 0.8714, "step": 6805 }, { "epoch": 1.1550035364468354, "grad_norm": 1.640625, "learning_rate": 1.3520317235674212e-05, "loss": 0.8453, "step": 6806 }, { "epoch": 1.1551750005358252, "grad_norm": 1.7421875, "learning_rate": 1.3518626851052251e-05, "loss": 0.8262, "step": 6807 }, { "epoch": 1.155346464624815, "grad_norm": 1.75, "learning_rate": 1.3516936351674623e-05, "loss": 0.8558, "step": 6808 }, { "epoch": 1.155517928713805, "grad_norm": 1.6875, "learning_rate": 1.3515245737596453e-05, "loss": 0.8869, "step": 6809 }, { "epoch": 1.155689392802795, "grad_norm": 1.6640625, "learning_rate": 1.3513555008872884e-05, "loss": 0.8404, "step": 6810 }, { "epoch": 1.1558608568917847, "grad_norm": 1.7421875, "learning_rate": 1.3511864165559056e-05, "loss": 0.8585, "step": 6811 }, { "epoch": 1.1560323209807746, "grad_norm": 1.6875, "learning_rate": 1.3510173207710113e-05, "loss": 0.8694, "step": 6812 }, { "epoch": 1.1562037850697644, "grad_norm": 1.8046875, "learning_rate": 1.3508482135381205e-05, "loss": 0.9258, "step": 6813 }, { "epoch": 1.1563752491587542, "grad_norm": 1.75, "learning_rate": 1.350679094862748e-05, "loss": 0.8493, "step": 6814 }, { "epoch": 1.156546713247744, "grad_norm": 1.6640625, "learning_rate": 1.3505099647504097e-05, "loss": 0.8316, "step": 6815 }, { "epoch": 1.156718177336734, "grad_norm": 1.609375, "learning_rate": 1.3503408232066215e-05, "loss": 0.8276, "step": 6816 }, { "epoch": 1.156889641425724, "grad_norm": 1.671875, "learning_rate": 1.3501716702369e-05, "loss": 0.8804, "step": 6817 }, { "epoch": 1.1570611055147138, "grad_norm": 1.6875, "learning_rate": 1.3500025058467618e-05, "loss": 0.8201, "step": 6818 }, { "epoch": 1.1572325696037036, "grad_norm": 1.6953125, "learning_rate": 1.3498333300417238e-05, "loss": 0.8295, "step": 6819 }, { "epoch": 1.1574040336926934, "grad_norm": 1.6484375, "learning_rate": 1.3496641428273032e-05, "loss": 0.907, "step": 6820 }, { "epoch": 1.1575754977816834, "grad_norm": 1.640625, "learning_rate": 1.3494949442090186e-05, "loss": 0.8573, "step": 6821 }, { "epoch": 1.1577469618706733, "grad_norm": 1.5546875, "learning_rate": 1.3493257341923876e-05, "loss": 0.8617, "step": 6822 }, { "epoch": 1.157918425959663, "grad_norm": 1.78125, "learning_rate": 1.3491565127829293e-05, "loss": 0.9322, "step": 6823 }, { "epoch": 1.158089890048653, "grad_norm": 1.6875, "learning_rate": 1.3489872799861616e-05, "loss": 0.8675, "step": 6824 }, { "epoch": 1.1582613541376428, "grad_norm": 1.7421875, "learning_rate": 1.348818035807605e-05, "loss": 0.8458, "step": 6825 }, { "epoch": 1.1584328182266326, "grad_norm": 1.6484375, "learning_rate": 1.3486487802527788e-05, "loss": 0.8289, "step": 6826 }, { "epoch": 1.1586042823156224, "grad_norm": 1.7109375, "learning_rate": 1.3484795133272028e-05, "loss": 0.9288, "step": 6827 }, { "epoch": 1.1587757464046125, "grad_norm": 1.7890625, "learning_rate": 1.348310235036398e-05, "loss": 0.917, "step": 6828 }, { "epoch": 1.1589472104936023, "grad_norm": 1.6796875, "learning_rate": 1.3481409453858846e-05, "loss": 0.8495, "step": 6829 }, { "epoch": 1.159118674582592, "grad_norm": 1.640625, "learning_rate": 1.3479716443811838e-05, "loss": 0.8831, "step": 6830 }, { "epoch": 1.159290138671582, "grad_norm": 1.609375, "learning_rate": 1.3478023320278175e-05, "loss": 0.8112, "step": 6831 }, { "epoch": 1.1594616027605718, "grad_norm": 1.7421875, "learning_rate": 1.3476330083313074e-05, "loss": 0.9331, "step": 6832 }, { "epoch": 1.1596330668495618, "grad_norm": 1.6796875, "learning_rate": 1.3474636732971758e-05, "loss": 0.8153, "step": 6833 }, { "epoch": 1.1598045309385516, "grad_norm": 1.75, "learning_rate": 1.3472943269309455e-05, "loss": 0.8215, "step": 6834 }, { "epoch": 1.1599759950275415, "grad_norm": 1.625, "learning_rate": 1.3471249692381394e-05, "loss": 0.8797, "step": 6835 }, { "epoch": 1.1601474591165313, "grad_norm": 1.8125, "learning_rate": 1.3469556002242808e-05, "loss": 0.846, "step": 6836 }, { "epoch": 1.160318923205521, "grad_norm": 1.71875, "learning_rate": 1.3467862198948935e-05, "loss": 0.9078, "step": 6837 }, { "epoch": 1.160490387294511, "grad_norm": 1.6015625, "learning_rate": 1.3466168282555018e-05, "loss": 0.772, "step": 6838 }, { "epoch": 1.1606618513835008, "grad_norm": 1.703125, "learning_rate": 1.3464474253116303e-05, "loss": 0.8907, "step": 6839 }, { "epoch": 1.1608333154724908, "grad_norm": 1.671875, "learning_rate": 1.3462780110688036e-05, "loss": 0.7626, "step": 6840 }, { "epoch": 1.1610047795614806, "grad_norm": 1.734375, "learning_rate": 1.3461085855325467e-05, "loss": 0.8413, "step": 6841 }, { "epoch": 1.1611762436504705, "grad_norm": 1.7890625, "learning_rate": 1.3459391487083858e-05, "loss": 0.8411, "step": 6842 }, { "epoch": 1.1613477077394603, "grad_norm": 1.6171875, "learning_rate": 1.3457697006018462e-05, "loss": 0.8852, "step": 6843 }, { "epoch": 1.16151917182845, "grad_norm": 1.625, "learning_rate": 1.3456002412184548e-05, "loss": 0.8167, "step": 6844 }, { "epoch": 1.1616906359174402, "grad_norm": 1.6484375, "learning_rate": 1.3454307705637382e-05, "loss": 0.8421, "step": 6845 }, { "epoch": 1.16186210000643, "grad_norm": 1.65625, "learning_rate": 1.3452612886432234e-05, "loss": 0.8503, "step": 6846 }, { "epoch": 1.1620335640954198, "grad_norm": 1.625, "learning_rate": 1.3450917954624378e-05, "loss": 0.854, "step": 6847 }, { "epoch": 1.1622050281844096, "grad_norm": 1.7578125, "learning_rate": 1.3449222910269093e-05, "loss": 0.8682, "step": 6848 }, { "epoch": 1.1623764922733995, "grad_norm": 1.6328125, "learning_rate": 1.3447527753421661e-05, "loss": 0.976, "step": 6849 }, { "epoch": 1.1625479563623893, "grad_norm": 1.640625, "learning_rate": 1.3445832484137365e-05, "loss": 0.8742, "step": 6850 }, { "epoch": 1.162719420451379, "grad_norm": 1.6875, "learning_rate": 1.3444137102471495e-05, "loss": 0.8685, "step": 6851 }, { "epoch": 1.1628908845403692, "grad_norm": 1.625, "learning_rate": 1.3442441608479349e-05, "loss": 0.7899, "step": 6852 }, { "epoch": 1.163062348629359, "grad_norm": 1.734375, "learning_rate": 1.3440746002216213e-05, "loss": 0.8784, "step": 6853 }, { "epoch": 1.1632338127183488, "grad_norm": 1.6875, "learning_rate": 1.3439050283737399e-05, "loss": 0.8723, "step": 6854 }, { "epoch": 1.1634052768073386, "grad_norm": 1.6484375, "learning_rate": 1.3437354453098202e-05, "loss": 0.8222, "step": 6855 }, { "epoch": 1.1635767408963285, "grad_norm": 1.6875, "learning_rate": 1.3435658510353933e-05, "loss": 0.8552, "step": 6856 }, { "epoch": 1.1637482049853185, "grad_norm": 1.640625, "learning_rate": 1.3433962455559901e-05, "loss": 0.8358, "step": 6857 }, { "epoch": 1.1639196690743083, "grad_norm": 1.7265625, "learning_rate": 1.3432266288771427e-05, "loss": 0.8489, "step": 6858 }, { "epoch": 1.1640911331632982, "grad_norm": 1.578125, "learning_rate": 1.3430570010043821e-05, "loss": 0.7495, "step": 6859 }, { "epoch": 1.164262597252288, "grad_norm": 1.6875, "learning_rate": 1.342887361943241e-05, "loss": 0.8586, "step": 6860 }, { "epoch": 1.1644340613412778, "grad_norm": 1.6953125, "learning_rate": 1.3427177116992515e-05, "loss": 0.8377, "step": 6861 }, { "epoch": 1.1646055254302676, "grad_norm": 1.6875, "learning_rate": 1.3425480502779471e-05, "loss": 0.7789, "step": 6862 }, { "epoch": 1.1647769895192575, "grad_norm": 1.7109375, "learning_rate": 1.3423783776848609e-05, "loss": 0.8138, "step": 6863 }, { "epoch": 1.1649484536082475, "grad_norm": 1.75, "learning_rate": 1.3422086939255265e-05, "loss": 0.9288, "step": 6864 }, { "epoch": 1.1651199176972373, "grad_norm": 1.640625, "learning_rate": 1.342038999005478e-05, "loss": 0.8544, "step": 6865 }, { "epoch": 1.1652913817862272, "grad_norm": 1.828125, "learning_rate": 1.3418692929302497e-05, "loss": 0.9111, "step": 6866 }, { "epoch": 1.165462845875217, "grad_norm": 1.7109375, "learning_rate": 1.3416995757053764e-05, "loss": 0.9236, "step": 6867 }, { "epoch": 1.1656343099642068, "grad_norm": 1.5859375, "learning_rate": 1.3415298473363932e-05, "loss": 0.8341, "step": 6868 }, { "epoch": 1.1658057740531969, "grad_norm": 1.7109375, "learning_rate": 1.3413601078288356e-05, "loss": 0.8754, "step": 6869 }, { "epoch": 1.1659772381421867, "grad_norm": 1.8125, "learning_rate": 1.3411903571882395e-05, "loss": 0.9529, "step": 6870 }, { "epoch": 1.1661487022311765, "grad_norm": 1.6328125, "learning_rate": 1.3410205954201407e-05, "loss": 0.8552, "step": 6871 }, { "epoch": 1.1663201663201663, "grad_norm": 1.6796875, "learning_rate": 1.3408508225300765e-05, "loss": 0.8721, "step": 6872 }, { "epoch": 1.1664916304091562, "grad_norm": 2.015625, "learning_rate": 1.3406810385235833e-05, "loss": 0.8574, "step": 6873 }, { "epoch": 1.166663094498146, "grad_norm": 1.78125, "learning_rate": 1.3405112434061986e-05, "loss": 0.893, "step": 6874 }, { "epoch": 1.1668345585871358, "grad_norm": 1.7421875, "learning_rate": 1.3403414371834602e-05, "loss": 0.8774, "step": 6875 }, { "epoch": 1.1670060226761259, "grad_norm": 1.6796875, "learning_rate": 1.3401716198609056e-05, "loss": 0.9101, "step": 6876 }, { "epoch": 1.1671774867651157, "grad_norm": 1.703125, "learning_rate": 1.3400017914440738e-05, "loss": 0.883, "step": 6877 }, { "epoch": 1.1673489508541055, "grad_norm": 1.65625, "learning_rate": 1.339831951938503e-05, "loss": 0.8582, "step": 6878 }, { "epoch": 1.1675204149430953, "grad_norm": 1.6484375, "learning_rate": 1.3396621013497327e-05, "loss": 0.8527, "step": 6879 }, { "epoch": 1.1676918790320852, "grad_norm": 1.7578125, "learning_rate": 1.3394922396833021e-05, "loss": 0.8982, "step": 6880 }, { "epoch": 1.1678633431210752, "grad_norm": 1.703125, "learning_rate": 1.3393223669447513e-05, "loss": 0.9034, "step": 6881 }, { "epoch": 1.168034807210065, "grad_norm": 1.671875, "learning_rate": 1.3391524831396202e-05, "loss": 0.945, "step": 6882 }, { "epoch": 1.1682062712990549, "grad_norm": 1.6953125, "learning_rate": 1.3389825882734495e-05, "loss": 0.8772, "step": 6883 }, { "epoch": 1.1683777353880447, "grad_norm": 1.6171875, "learning_rate": 1.3388126823517802e-05, "loss": 0.8102, "step": 6884 }, { "epoch": 1.1685491994770345, "grad_norm": 1.6171875, "learning_rate": 1.3386427653801535e-05, "loss": 0.8419, "step": 6885 }, { "epoch": 1.1687206635660243, "grad_norm": 1.703125, "learning_rate": 1.3384728373641111e-05, "loss": 0.9198, "step": 6886 }, { "epoch": 1.1688921276550142, "grad_norm": 1.671875, "learning_rate": 1.3383028983091948e-05, "loss": 0.8829, "step": 6887 }, { "epoch": 1.1690635917440042, "grad_norm": 1.625, "learning_rate": 1.3381329482209471e-05, "loss": 0.8461, "step": 6888 }, { "epoch": 1.169235055832994, "grad_norm": 1.625, "learning_rate": 1.3379629871049105e-05, "loss": 0.8655, "step": 6889 }, { "epoch": 1.1694065199219839, "grad_norm": 1.8203125, "learning_rate": 1.337793014966628e-05, "loss": 0.8972, "step": 6890 }, { "epoch": 1.1695779840109737, "grad_norm": 1.6953125, "learning_rate": 1.3376230318116437e-05, "loss": 0.8264, "step": 6891 }, { "epoch": 1.1697494480999635, "grad_norm": 1.65625, "learning_rate": 1.337453037645501e-05, "loss": 0.9174, "step": 6892 }, { "epoch": 1.1699209121889533, "grad_norm": 1.609375, "learning_rate": 1.3372830324737438e-05, "loss": 0.7695, "step": 6893 }, { "epoch": 1.1700923762779434, "grad_norm": 1.7421875, "learning_rate": 1.3371130163019168e-05, "loss": 0.8772, "step": 6894 }, { "epoch": 1.1702638403669332, "grad_norm": 1.7109375, "learning_rate": 1.3369429891355653e-05, "loss": 0.9167, "step": 6895 }, { "epoch": 1.170435304455923, "grad_norm": 1.640625, "learning_rate": 1.3367729509802336e-05, "loss": 0.8204, "step": 6896 }, { "epoch": 1.1706067685449129, "grad_norm": 1.671875, "learning_rate": 1.3366029018414679e-05, "loss": 0.7964, "step": 6897 }, { "epoch": 1.1707782326339027, "grad_norm": 1.640625, "learning_rate": 1.3364328417248142e-05, "loss": 0.8479, "step": 6898 }, { "epoch": 1.1709496967228925, "grad_norm": 1.59375, "learning_rate": 1.3362627706358187e-05, "loss": 0.8342, "step": 6899 }, { "epoch": 1.1711211608118826, "grad_norm": 1.7265625, "learning_rate": 1.3360926885800279e-05, "loss": 0.866, "step": 6900 }, { "epoch": 1.1712926249008724, "grad_norm": 1.6953125, "learning_rate": 1.3359225955629889e-05, "loss": 0.9075, "step": 6901 }, { "epoch": 1.1714640889898622, "grad_norm": 1.71875, "learning_rate": 1.3357524915902488e-05, "loss": 0.9485, "step": 6902 }, { "epoch": 1.171635553078852, "grad_norm": 1.6640625, "learning_rate": 1.335582376667356e-05, "loss": 0.8911, "step": 6903 }, { "epoch": 1.1718070171678419, "grad_norm": 1.625, "learning_rate": 1.3354122507998584e-05, "loss": 0.8715, "step": 6904 }, { "epoch": 1.1719784812568317, "grad_norm": 1.6484375, "learning_rate": 1.3352421139933038e-05, "loss": 0.8291, "step": 6905 }, { "epoch": 1.1721499453458217, "grad_norm": 1.625, "learning_rate": 1.3350719662532416e-05, "loss": 0.8443, "step": 6906 }, { "epoch": 1.1723214094348116, "grad_norm": 1.6171875, "learning_rate": 1.334901807585221e-05, "loss": 0.8684, "step": 6907 }, { "epoch": 1.1724928735238014, "grad_norm": 1.6796875, "learning_rate": 1.3347316379947912e-05, "loss": 0.8409, "step": 6908 }, { "epoch": 1.1726643376127912, "grad_norm": 1.703125, "learning_rate": 1.3345614574875022e-05, "loss": 0.9154, "step": 6909 }, { "epoch": 1.172835801701781, "grad_norm": 1.671875, "learning_rate": 1.334391266068904e-05, "loss": 0.8695, "step": 6910 }, { "epoch": 1.1730072657907709, "grad_norm": 1.7109375, "learning_rate": 1.3342210637445478e-05, "loss": 0.8835, "step": 6911 }, { "epoch": 1.1731787298797607, "grad_norm": 1.6875, "learning_rate": 1.3340508505199839e-05, "loss": 0.8492, "step": 6912 }, { "epoch": 1.1733501939687507, "grad_norm": 1.6484375, "learning_rate": 1.333880626400764e-05, "loss": 0.793, "step": 6913 }, { "epoch": 1.1735216580577406, "grad_norm": 1.59375, "learning_rate": 1.3337103913924394e-05, "loss": 0.8493, "step": 6914 }, { "epoch": 1.1736931221467304, "grad_norm": 1.734375, "learning_rate": 1.3335401455005625e-05, "loss": 0.8716, "step": 6915 }, { "epoch": 1.1738645862357202, "grad_norm": 1.6484375, "learning_rate": 1.3333698887306854e-05, "loss": 0.8844, "step": 6916 }, { "epoch": 1.17403605032471, "grad_norm": 1.7734375, "learning_rate": 1.3331996210883609e-05, "loss": 0.9383, "step": 6917 }, { "epoch": 1.1742075144137, "grad_norm": 1.6328125, "learning_rate": 1.3330293425791419e-05, "loss": 0.7867, "step": 6918 }, { "epoch": 1.17437897850269, "grad_norm": 1.6171875, "learning_rate": 1.3328590532085822e-05, "loss": 0.8331, "step": 6919 }, { "epoch": 1.1745504425916797, "grad_norm": 1.640625, "learning_rate": 1.332688752982235e-05, "loss": 0.8586, "step": 6920 }, { "epoch": 1.1747219066806696, "grad_norm": 1.6171875, "learning_rate": 1.3325184419056552e-05, "loss": 0.8463, "step": 6921 }, { "epoch": 1.1748933707696594, "grad_norm": 1.6953125, "learning_rate": 1.3323481199843966e-05, "loss": 0.8227, "step": 6922 }, { "epoch": 1.1750648348586492, "grad_norm": 1.578125, "learning_rate": 1.3321777872240142e-05, "loss": 0.8337, "step": 6923 }, { "epoch": 1.175236298947639, "grad_norm": 1.6328125, "learning_rate": 1.3320074436300635e-05, "loss": 0.9401, "step": 6924 }, { "epoch": 1.175407763036629, "grad_norm": 1.609375, "learning_rate": 1.3318370892080998e-05, "loss": 0.8901, "step": 6925 }, { "epoch": 1.175579227125619, "grad_norm": 1.5625, "learning_rate": 1.3316667239636792e-05, "loss": 0.7857, "step": 6926 }, { "epoch": 1.1757506912146087, "grad_norm": 1.625, "learning_rate": 1.3314963479023575e-05, "loss": 0.8501, "step": 6927 }, { "epoch": 1.1759221553035986, "grad_norm": 1.6640625, "learning_rate": 1.3313259610296916e-05, "loss": 0.9171, "step": 6928 }, { "epoch": 1.1760936193925884, "grad_norm": 1.6796875, "learning_rate": 1.3311555633512386e-05, "loss": 0.8805, "step": 6929 }, { "epoch": 1.1762650834815784, "grad_norm": 1.703125, "learning_rate": 1.3309851548725553e-05, "loss": 0.8211, "step": 6930 }, { "epoch": 1.1764365475705683, "grad_norm": 1.6171875, "learning_rate": 1.3308147355992002e-05, "loss": 0.8142, "step": 6931 }, { "epoch": 1.176608011659558, "grad_norm": 1.7421875, "learning_rate": 1.3306443055367306e-05, "loss": 0.9, "step": 6932 }, { "epoch": 1.176779475748548, "grad_norm": 1.6796875, "learning_rate": 1.3304738646907057e-05, "loss": 0.8249, "step": 6933 }, { "epoch": 1.1769509398375377, "grad_norm": 1.7421875, "learning_rate": 1.330303413066683e-05, "loss": 0.8355, "step": 6934 }, { "epoch": 1.1771224039265276, "grad_norm": 1.78125, "learning_rate": 1.3301329506702221e-05, "loss": 0.8734, "step": 6935 }, { "epoch": 1.1772938680155174, "grad_norm": 1.6640625, "learning_rate": 1.3299624775068826e-05, "loss": 0.9342, "step": 6936 }, { "epoch": 1.1774653321045074, "grad_norm": 1.6484375, "learning_rate": 1.3297919935822243e-05, "loss": 0.8546, "step": 6937 }, { "epoch": 1.1776367961934973, "grad_norm": 1.671875, "learning_rate": 1.3296214989018075e-05, "loss": 0.8482, "step": 6938 }, { "epoch": 1.177808260282487, "grad_norm": 1.703125, "learning_rate": 1.3294509934711919e-05, "loss": 0.8412, "step": 6939 }, { "epoch": 1.177979724371477, "grad_norm": 1.671875, "learning_rate": 1.3292804772959391e-05, "loss": 0.8311, "step": 6940 }, { "epoch": 1.1781511884604667, "grad_norm": 1.6875, "learning_rate": 1.3291099503816098e-05, "loss": 0.8382, "step": 6941 }, { "epoch": 1.1783226525494568, "grad_norm": 1.6171875, "learning_rate": 1.3289394127337658e-05, "loss": 0.8111, "step": 6942 }, { "epoch": 1.1784941166384466, "grad_norm": 1.640625, "learning_rate": 1.3287688643579688e-05, "loss": 0.8764, "step": 6943 }, { "epoch": 1.1786655807274364, "grad_norm": 1.6328125, "learning_rate": 1.3285983052597813e-05, "loss": 0.8151, "step": 6944 }, { "epoch": 1.1788370448164263, "grad_norm": 1.6171875, "learning_rate": 1.3284277354447655e-05, "loss": 0.8074, "step": 6945 }, { "epoch": 1.179008508905416, "grad_norm": 1.671875, "learning_rate": 1.3282571549184844e-05, "loss": 0.8439, "step": 6946 }, { "epoch": 1.179179972994406, "grad_norm": 1.7265625, "learning_rate": 1.3280865636865014e-05, "loss": 0.9015, "step": 6947 }, { "epoch": 1.1793514370833957, "grad_norm": 1.6015625, "learning_rate": 1.3279159617543801e-05, "loss": 0.8248, "step": 6948 }, { "epoch": 1.1795229011723858, "grad_norm": 1.640625, "learning_rate": 1.3277453491276849e-05, "loss": 0.9074, "step": 6949 }, { "epoch": 1.1796943652613756, "grad_norm": 1.6328125, "learning_rate": 1.3275747258119793e-05, "loss": 0.81, "step": 6950 }, { "epoch": 1.1798658293503654, "grad_norm": 1.59375, "learning_rate": 1.3274040918128286e-05, "loss": 0.8182, "step": 6951 }, { "epoch": 1.1800372934393553, "grad_norm": 1.625, "learning_rate": 1.3272334471357975e-05, "loss": 0.8618, "step": 6952 }, { "epoch": 1.180208757528345, "grad_norm": 1.7265625, "learning_rate": 1.327062791786451e-05, "loss": 0.9241, "step": 6953 }, { "epoch": 1.1803802216173351, "grad_norm": 1.671875, "learning_rate": 1.3268921257703557e-05, "loss": 0.7983, "step": 6954 }, { "epoch": 1.180551685706325, "grad_norm": 1.671875, "learning_rate": 1.3267214490930771e-05, "loss": 0.8148, "step": 6955 }, { "epoch": 1.1807231497953148, "grad_norm": 1.65625, "learning_rate": 1.3265507617601818e-05, "loss": 0.7872, "step": 6956 }, { "epoch": 1.1808946138843046, "grad_norm": 1.71875, "learning_rate": 1.3263800637772363e-05, "loss": 0.8317, "step": 6957 }, { "epoch": 1.1810660779732944, "grad_norm": 1.640625, "learning_rate": 1.3262093551498084e-05, "loss": 0.7933, "step": 6958 }, { "epoch": 1.1812375420622843, "grad_norm": 1.71875, "learning_rate": 1.3260386358834645e-05, "loss": 0.8589, "step": 6959 }, { "epoch": 1.181409006151274, "grad_norm": 1.703125, "learning_rate": 1.3258679059837731e-05, "loss": 0.8669, "step": 6960 }, { "epoch": 1.1815804702402641, "grad_norm": 1.796875, "learning_rate": 1.3256971654563022e-05, "loss": 0.8813, "step": 6961 }, { "epoch": 1.181751934329254, "grad_norm": 1.7109375, "learning_rate": 1.3255264143066202e-05, "loss": 0.8569, "step": 6962 }, { "epoch": 1.1819233984182438, "grad_norm": 1.65625, "learning_rate": 1.3253556525402963e-05, "loss": 0.8348, "step": 6963 }, { "epoch": 1.1820948625072336, "grad_norm": 1.65625, "learning_rate": 1.325184880162899e-05, "loss": 0.8558, "step": 6964 }, { "epoch": 1.1822663265962234, "grad_norm": 1.6171875, "learning_rate": 1.3250140971799984e-05, "loss": 0.8327, "step": 6965 }, { "epoch": 1.1824377906852135, "grad_norm": 1.703125, "learning_rate": 1.324843303597164e-05, "loss": 0.9319, "step": 6966 }, { "epoch": 1.1826092547742033, "grad_norm": 1.6328125, "learning_rate": 1.3246724994199664e-05, "loss": 0.8491, "step": 6967 }, { "epoch": 1.1827807188631931, "grad_norm": 1.734375, "learning_rate": 1.324501684653976e-05, "loss": 0.8954, "step": 6968 }, { "epoch": 1.182952182952183, "grad_norm": 1.6875, "learning_rate": 1.3243308593047637e-05, "loss": 0.8383, "step": 6969 }, { "epoch": 1.1831236470411728, "grad_norm": 1.734375, "learning_rate": 1.3241600233779008e-05, "loss": 0.8032, "step": 6970 }, { "epoch": 1.1832951111301626, "grad_norm": 1.6328125, "learning_rate": 1.3239891768789587e-05, "loss": 0.8594, "step": 6971 }, { "epoch": 1.1834665752191524, "grad_norm": 1.7265625, "learning_rate": 1.3238183198135098e-05, "loss": 0.8261, "step": 6972 }, { "epoch": 1.1836380393081425, "grad_norm": 1.6875, "learning_rate": 1.323647452187126e-05, "loss": 0.9486, "step": 6973 }, { "epoch": 1.1838095033971323, "grad_norm": 1.6328125, "learning_rate": 1.3234765740053799e-05, "loss": 0.8751, "step": 6974 }, { "epoch": 1.1839809674861221, "grad_norm": 1.796875, "learning_rate": 1.3233056852738446e-05, "loss": 0.8476, "step": 6975 }, { "epoch": 1.184152431575112, "grad_norm": 1.6796875, "learning_rate": 1.3231347859980937e-05, "loss": 0.88, "step": 6976 }, { "epoch": 1.1843238956641018, "grad_norm": 1.7265625, "learning_rate": 1.3229638761837003e-05, "loss": 0.8816, "step": 6977 }, { "epoch": 1.1844953597530918, "grad_norm": 1.6484375, "learning_rate": 1.322792955836239e-05, "loss": 0.8436, "step": 6978 }, { "epoch": 1.1846668238420817, "grad_norm": 1.7265625, "learning_rate": 1.3226220249612837e-05, "loss": 0.8455, "step": 6979 }, { "epoch": 1.1848382879310715, "grad_norm": 1.7109375, "learning_rate": 1.3224510835644095e-05, "loss": 0.833, "step": 6980 }, { "epoch": 1.1850097520200613, "grad_norm": 1.6484375, "learning_rate": 1.322280131651191e-05, "loss": 0.885, "step": 6981 }, { "epoch": 1.1851812161090511, "grad_norm": 1.6640625, "learning_rate": 1.3221091692272042e-05, "loss": 0.8174, "step": 6982 }, { "epoch": 1.185352680198041, "grad_norm": 1.7109375, "learning_rate": 1.321938196298024e-05, "loss": 0.9002, "step": 6983 }, { "epoch": 1.1855241442870308, "grad_norm": 1.71875, "learning_rate": 1.321767212869227e-05, "loss": 0.8802, "step": 6984 }, { "epoch": 1.1856956083760208, "grad_norm": 1.5859375, "learning_rate": 1.3215962189463896e-05, "loss": 0.8047, "step": 6985 }, { "epoch": 1.1858670724650107, "grad_norm": 1.5859375, "learning_rate": 1.3214252145350883e-05, "loss": 0.8862, "step": 6986 }, { "epoch": 1.1860385365540005, "grad_norm": 1.625, "learning_rate": 1.3212541996409005e-05, "loss": 0.8768, "step": 6987 }, { "epoch": 1.1862100006429903, "grad_norm": 1.6015625, "learning_rate": 1.3210831742694037e-05, "loss": 0.8169, "step": 6988 }, { "epoch": 1.1863814647319801, "grad_norm": 1.6875, "learning_rate": 1.3209121384261754e-05, "loss": 0.8902, "step": 6989 }, { "epoch": 1.18655292882097, "grad_norm": 1.796875, "learning_rate": 1.3207410921167938e-05, "loss": 0.8861, "step": 6990 }, { "epoch": 1.18672439290996, "grad_norm": 1.7109375, "learning_rate": 1.3205700353468373e-05, "loss": 0.8325, "step": 6991 }, { "epoch": 1.1868958569989498, "grad_norm": 1.6015625, "learning_rate": 1.3203989681218847e-05, "loss": 0.8863, "step": 6992 }, { "epoch": 1.1870673210879397, "grad_norm": 1.703125, "learning_rate": 1.3202278904475154e-05, "loss": 0.8174, "step": 6993 }, { "epoch": 1.1872387851769295, "grad_norm": 1.703125, "learning_rate": 1.3200568023293085e-05, "loss": 0.8864, "step": 6994 }, { "epoch": 1.1874102492659193, "grad_norm": 1.65625, "learning_rate": 1.3198857037728446e-05, "loss": 0.8373, "step": 6995 }, { "epoch": 1.1875817133549091, "grad_norm": 1.703125, "learning_rate": 1.3197145947837031e-05, "loss": 0.9123, "step": 6996 }, { "epoch": 1.1877531774438992, "grad_norm": 1.6171875, "learning_rate": 1.3195434753674645e-05, "loss": 0.9354, "step": 6997 }, { "epoch": 1.187924641532889, "grad_norm": 1.6171875, "learning_rate": 1.31937234552971e-05, "loss": 0.7199, "step": 6998 }, { "epoch": 1.1880961056218788, "grad_norm": 1.578125, "learning_rate": 1.3192012052760208e-05, "loss": 0.7695, "step": 6999 }, { "epoch": 1.1882675697108687, "grad_norm": 1.6640625, "learning_rate": 1.3190300546119781e-05, "loss": 0.8186, "step": 7000 }, { "epoch": 1.1882675697108687, "eval_loss": 0.8492981195449829, "eval_runtime": 836.9828, "eval_samples_per_second": 2.986, "eval_steps_per_second": 2.986, "step": 7000 }, { "epoch": 1.1884390337998585, "grad_norm": 1.7421875, "learning_rate": 1.3188588935431642e-05, "loss": 0.9399, "step": 7001 }, { "epoch": 1.1886104978888483, "grad_norm": 1.703125, "learning_rate": 1.3186877220751605e-05, "loss": 0.8776, "step": 7002 }, { "epoch": 1.1887819619778384, "grad_norm": 11.1875, "learning_rate": 1.3185165402135507e-05, "loss": 0.8575, "step": 7003 }, { "epoch": 1.1889534260668282, "grad_norm": 1.6875, "learning_rate": 1.3183453479639167e-05, "loss": 0.8264, "step": 7004 }, { "epoch": 1.189124890155818, "grad_norm": 1.6171875, "learning_rate": 1.3181741453318427e-05, "loss": 0.8503, "step": 7005 }, { "epoch": 1.1892963542448078, "grad_norm": 1.6875, "learning_rate": 1.3180029323229111e-05, "loss": 0.8506, "step": 7006 }, { "epoch": 1.1894678183337977, "grad_norm": 1.78125, "learning_rate": 1.3178317089427066e-05, "loss": 0.9127, "step": 7007 }, { "epoch": 1.1896392824227875, "grad_norm": 1.640625, "learning_rate": 1.3176604751968133e-05, "loss": 0.8574, "step": 7008 }, { "epoch": 1.1898107465117773, "grad_norm": 1.6015625, "learning_rate": 1.3174892310908158e-05, "loss": 0.8767, "step": 7009 }, { "epoch": 1.1899822106007674, "grad_norm": 1.7578125, "learning_rate": 1.3173179766302988e-05, "loss": 0.8028, "step": 7010 }, { "epoch": 1.1901536746897572, "grad_norm": 1.671875, "learning_rate": 1.3171467118208476e-05, "loss": 0.8199, "step": 7011 }, { "epoch": 1.190325138778747, "grad_norm": 1.6484375, "learning_rate": 1.3169754366680476e-05, "loss": 0.842, "step": 7012 }, { "epoch": 1.1904966028677368, "grad_norm": 1.7109375, "learning_rate": 1.3168041511774856e-05, "loss": 0.9031, "step": 7013 }, { "epoch": 1.1906680669567267, "grad_norm": 1.6796875, "learning_rate": 1.3166328553547469e-05, "loss": 0.7974, "step": 7014 }, { "epoch": 1.1908395310457167, "grad_norm": 1.6875, "learning_rate": 1.3164615492054184e-05, "loss": 0.8576, "step": 7015 }, { "epoch": 1.1910109951347065, "grad_norm": 1.71875, "learning_rate": 1.3162902327350873e-05, "loss": 0.9097, "step": 7016 }, { "epoch": 1.1911824592236964, "grad_norm": 1.6328125, "learning_rate": 1.3161189059493407e-05, "loss": 0.8865, "step": 7017 }, { "epoch": 1.1913539233126862, "grad_norm": 1.65625, "learning_rate": 1.315947568853766e-05, "loss": 0.8532, "step": 7018 }, { "epoch": 1.191525387401676, "grad_norm": 1.7578125, "learning_rate": 1.3157762214539516e-05, "loss": 0.9153, "step": 7019 }, { "epoch": 1.1916968514906658, "grad_norm": 1.6796875, "learning_rate": 1.315604863755485e-05, "loss": 0.7673, "step": 7020 }, { "epoch": 1.1918683155796557, "grad_norm": 1.640625, "learning_rate": 1.3154334957639557e-05, "loss": 0.8389, "step": 7021 }, { "epoch": 1.1920397796686457, "grad_norm": 1.625, "learning_rate": 1.3152621174849522e-05, "loss": 0.7504, "step": 7022 }, { "epoch": 1.1922112437576355, "grad_norm": 1.71875, "learning_rate": 1.3150907289240639e-05, "loss": 0.8595, "step": 7023 }, { "epoch": 1.1923827078466254, "grad_norm": 1.5625, "learning_rate": 1.3149193300868803e-05, "loss": 0.7952, "step": 7024 }, { "epoch": 1.1925541719356152, "grad_norm": 1.6953125, "learning_rate": 1.314747920978992e-05, "loss": 0.8829, "step": 7025 }, { "epoch": 1.192725636024605, "grad_norm": 1.703125, "learning_rate": 1.3145765016059882e-05, "loss": 0.9387, "step": 7026 }, { "epoch": 1.192897100113595, "grad_norm": 1.953125, "learning_rate": 1.3144050719734602e-05, "loss": 0.8143, "step": 7027 }, { "epoch": 1.193068564202585, "grad_norm": 1.71875, "learning_rate": 1.314233632086999e-05, "loss": 0.8377, "step": 7028 }, { "epoch": 1.1932400282915747, "grad_norm": 1.7578125, "learning_rate": 1.3140621819521957e-05, "loss": 0.8073, "step": 7029 }, { "epoch": 1.1934114923805645, "grad_norm": 1.7109375, "learning_rate": 1.313890721574642e-05, "loss": 0.8738, "step": 7030 }, { "epoch": 1.1935829564695544, "grad_norm": 1.8203125, "learning_rate": 1.3137192509599297e-05, "loss": 0.8952, "step": 7031 }, { "epoch": 1.1937544205585442, "grad_norm": 1.703125, "learning_rate": 1.3135477701136515e-05, "loss": 0.8685, "step": 7032 }, { "epoch": 1.193925884647534, "grad_norm": 1.59375, "learning_rate": 1.3133762790413998e-05, "loss": 0.8827, "step": 7033 }, { "epoch": 1.194097348736524, "grad_norm": 1.7109375, "learning_rate": 1.3132047777487676e-05, "loss": 0.8834, "step": 7034 }, { "epoch": 1.194268812825514, "grad_norm": 1.609375, "learning_rate": 1.3130332662413478e-05, "loss": 0.8182, "step": 7035 }, { "epoch": 1.1944402769145037, "grad_norm": 1.7265625, "learning_rate": 1.312861744524735e-05, "loss": 0.9155, "step": 7036 }, { "epoch": 1.1946117410034935, "grad_norm": 1.765625, "learning_rate": 1.3126902126045223e-05, "loss": 0.9201, "step": 7037 }, { "epoch": 1.1947832050924834, "grad_norm": 1.734375, "learning_rate": 1.3125186704863042e-05, "loss": 0.8421, "step": 7038 }, { "epoch": 1.1949546691814734, "grad_norm": 1.6484375, "learning_rate": 1.3123471181756753e-05, "loss": 0.8264, "step": 7039 }, { "epoch": 1.1951261332704632, "grad_norm": 1.703125, "learning_rate": 1.3121755556782307e-05, "loss": 0.8418, "step": 7040 }, { "epoch": 1.195297597359453, "grad_norm": 1.6328125, "learning_rate": 1.3120039829995659e-05, "loss": 0.7933, "step": 7041 }, { "epoch": 1.195469061448443, "grad_norm": 1.71875, "learning_rate": 1.3118324001452765e-05, "loss": 0.8414, "step": 7042 }, { "epoch": 1.1956405255374327, "grad_norm": 1.65625, "learning_rate": 1.311660807120958e-05, "loss": 0.8003, "step": 7043 }, { "epoch": 1.1958119896264225, "grad_norm": 1.6640625, "learning_rate": 1.3114892039322072e-05, "loss": 0.8051, "step": 7044 }, { "epoch": 1.1959834537154124, "grad_norm": 1.6328125, "learning_rate": 1.31131759058462e-05, "loss": 0.8998, "step": 7045 }, { "epoch": 1.1961549178044024, "grad_norm": 4.8125, "learning_rate": 1.3111459670837942e-05, "loss": 0.9592, "step": 7046 }, { "epoch": 1.1963263818933922, "grad_norm": 1.671875, "learning_rate": 1.310974333435327e-05, "loss": 0.8949, "step": 7047 }, { "epoch": 1.196497845982382, "grad_norm": 1.75, "learning_rate": 1.3108026896448153e-05, "loss": 0.8741, "step": 7048 }, { "epoch": 1.196669310071372, "grad_norm": 1.8359375, "learning_rate": 1.3106310357178575e-05, "loss": 0.8458, "step": 7049 }, { "epoch": 1.1968407741603617, "grad_norm": 1.609375, "learning_rate": 1.3104593716600521e-05, "loss": 0.8015, "step": 7050 }, { "epoch": 1.1970122382493518, "grad_norm": 1.640625, "learning_rate": 1.3102876974769975e-05, "loss": 0.8168, "step": 7051 }, { "epoch": 1.1971837023383416, "grad_norm": 1.7265625, "learning_rate": 1.3101160131742927e-05, "loss": 0.8824, "step": 7052 }, { "epoch": 1.1973551664273314, "grad_norm": 1.65625, "learning_rate": 1.309944318757537e-05, "loss": 0.7913, "step": 7053 }, { "epoch": 1.1975266305163212, "grad_norm": 1.6171875, "learning_rate": 1.30977261423233e-05, "loss": 0.792, "step": 7054 }, { "epoch": 1.197698094605311, "grad_norm": 1.7734375, "learning_rate": 1.3096008996042712e-05, "loss": 0.805, "step": 7055 }, { "epoch": 1.197869558694301, "grad_norm": 1.5625, "learning_rate": 1.3094291748789614e-05, "loss": 0.7919, "step": 7056 }, { "epoch": 1.1980410227832907, "grad_norm": 1.6640625, "learning_rate": 1.309257440062001e-05, "loss": 0.8871, "step": 7057 }, { "epoch": 1.1982124868722808, "grad_norm": 1.6171875, "learning_rate": 1.309085695158991e-05, "loss": 0.8488, "step": 7058 }, { "epoch": 1.1983839509612706, "grad_norm": 1.6484375, "learning_rate": 1.3089139401755325e-05, "loss": 0.8912, "step": 7059 }, { "epoch": 1.1985554150502604, "grad_norm": 1.8046875, "learning_rate": 1.3087421751172269e-05, "loss": 0.8928, "step": 7060 }, { "epoch": 1.1987268791392502, "grad_norm": 1.6328125, "learning_rate": 1.3085703999896765e-05, "loss": 0.8323, "step": 7061 }, { "epoch": 1.19889834322824, "grad_norm": 1.625, "learning_rate": 1.3083986147984835e-05, "loss": 0.8322, "step": 7062 }, { "epoch": 1.1990698073172301, "grad_norm": 1.6171875, "learning_rate": 1.3082268195492505e-05, "loss": 0.8949, "step": 7063 }, { "epoch": 1.19924127140622, "grad_norm": 1.71875, "learning_rate": 1.3080550142475798e-05, "loss": 0.9076, "step": 7064 }, { "epoch": 1.1994127354952098, "grad_norm": 1.7890625, "learning_rate": 1.3078831988990757e-05, "loss": 0.9873, "step": 7065 }, { "epoch": 1.1995841995841996, "grad_norm": 1.7421875, "learning_rate": 1.3077113735093407e-05, "loss": 0.8335, "step": 7066 }, { "epoch": 1.1997556636731894, "grad_norm": 1.7578125, "learning_rate": 1.3075395380839787e-05, "loss": 0.9648, "step": 7067 }, { "epoch": 1.1999271277621792, "grad_norm": 1.75, "learning_rate": 1.3073676926285947e-05, "loss": 0.8638, "step": 7068 }, { "epoch": 1.200098591851169, "grad_norm": 1.609375, "learning_rate": 1.3071958371487927e-05, "loss": 0.7763, "step": 7069 }, { "epoch": 1.2002700559401591, "grad_norm": 1.59375, "learning_rate": 1.3070239716501778e-05, "loss": 0.7944, "step": 7070 }, { "epoch": 1.200441520029149, "grad_norm": 1.671875, "learning_rate": 1.3068520961383552e-05, "loss": 0.894, "step": 7071 }, { "epoch": 1.2006129841181388, "grad_norm": 1.6796875, "learning_rate": 1.30668021061893e-05, "loss": 0.9565, "step": 7072 }, { "epoch": 1.2007844482071286, "grad_norm": 1.703125, "learning_rate": 1.3065083150975081e-05, "loss": 0.8892, "step": 7073 }, { "epoch": 1.2009559122961184, "grad_norm": 1.6953125, "learning_rate": 1.3063364095796962e-05, "loss": 0.7983, "step": 7074 }, { "epoch": 1.2011273763851082, "grad_norm": 1.59375, "learning_rate": 1.3061644940711002e-05, "loss": 0.8658, "step": 7075 }, { "epoch": 1.2012988404740983, "grad_norm": 1.703125, "learning_rate": 1.3059925685773273e-05, "loss": 0.8818, "step": 7076 }, { "epoch": 1.2014703045630881, "grad_norm": 1.8203125, "learning_rate": 1.3058206331039842e-05, "loss": 0.875, "step": 7077 }, { "epoch": 1.201641768652078, "grad_norm": 1.53125, "learning_rate": 1.305648687656679e-05, "loss": 0.792, "step": 7078 }, { "epoch": 1.2018132327410678, "grad_norm": 1.6796875, "learning_rate": 1.3054767322410188e-05, "loss": 0.7946, "step": 7079 }, { "epoch": 1.2019846968300576, "grad_norm": 1.6484375, "learning_rate": 1.3053047668626122e-05, "loss": 0.812, "step": 7080 }, { "epoch": 1.2021561609190474, "grad_norm": 1.6875, "learning_rate": 1.3051327915270676e-05, "loss": 0.8986, "step": 7081 }, { "epoch": 1.2023276250080375, "grad_norm": 1.703125, "learning_rate": 1.3049608062399934e-05, "loss": 0.8617, "step": 7082 }, { "epoch": 1.2024990890970273, "grad_norm": 1.6875, "learning_rate": 1.3047888110069993e-05, "loss": 0.8427, "step": 7083 }, { "epoch": 1.2026705531860171, "grad_norm": 1.703125, "learning_rate": 1.3046168058336941e-05, "loss": 0.8435, "step": 7084 }, { "epoch": 1.202842017275007, "grad_norm": 1.609375, "learning_rate": 1.3044447907256877e-05, "loss": 0.8159, "step": 7085 }, { "epoch": 1.2030134813639968, "grad_norm": 1.65625, "learning_rate": 1.30427276568859e-05, "loss": 0.8177, "step": 7086 }, { "epoch": 1.2031849454529866, "grad_norm": 1.640625, "learning_rate": 1.304100730728012e-05, "loss": 0.8673, "step": 7087 }, { "epoch": 1.2033564095419766, "grad_norm": 1.6640625, "learning_rate": 1.3039286858495642e-05, "loss": 0.7668, "step": 7088 }, { "epoch": 1.2035278736309665, "grad_norm": 1.734375, "learning_rate": 1.3037566310588571e-05, "loss": 0.9049, "step": 7089 }, { "epoch": 1.2036993377199563, "grad_norm": 1.5546875, "learning_rate": 1.3035845663615025e-05, "loss": 0.8266, "step": 7090 }, { "epoch": 1.2038708018089461, "grad_norm": 1.6484375, "learning_rate": 1.3034124917631124e-05, "loss": 0.7289, "step": 7091 }, { "epoch": 1.204042265897936, "grad_norm": 1.703125, "learning_rate": 1.3032404072692979e-05, "loss": 0.8224, "step": 7092 }, { "epoch": 1.2042137299869258, "grad_norm": 1.59375, "learning_rate": 1.3030683128856719e-05, "loss": 0.879, "step": 7093 }, { "epoch": 1.2043851940759158, "grad_norm": 1.640625, "learning_rate": 1.3028962086178472e-05, "loss": 0.7933, "step": 7094 }, { "epoch": 1.2045566581649056, "grad_norm": 1.65625, "learning_rate": 1.3027240944714361e-05, "loss": 0.8349, "step": 7095 }, { "epoch": 1.2047281222538955, "grad_norm": 1.7265625, "learning_rate": 1.3025519704520528e-05, "loss": 0.8045, "step": 7096 }, { "epoch": 1.2048995863428853, "grad_norm": 1.7109375, "learning_rate": 1.3023798365653102e-05, "loss": 0.8647, "step": 7097 }, { "epoch": 1.2050710504318751, "grad_norm": 1.7734375, "learning_rate": 1.3022076928168224e-05, "loss": 0.848, "step": 7098 }, { "epoch": 1.205242514520865, "grad_norm": 1.7421875, "learning_rate": 1.302035539212204e-05, "loss": 0.9455, "step": 7099 }, { "epoch": 1.205413978609855, "grad_norm": 1.703125, "learning_rate": 1.301863375757069e-05, "loss": 0.8987, "step": 7100 }, { "epoch": 1.2055854426988448, "grad_norm": 1.640625, "learning_rate": 1.3016912024570329e-05, "loss": 0.8748, "step": 7101 }, { "epoch": 1.2057569067878346, "grad_norm": 1.6015625, "learning_rate": 1.3015190193177105e-05, "loss": 0.8823, "step": 7102 }, { "epoch": 1.2059283708768245, "grad_norm": 1.625, "learning_rate": 1.3013468263447175e-05, "loss": 0.8718, "step": 7103 }, { "epoch": 1.2060998349658143, "grad_norm": 1.7109375, "learning_rate": 1.3011746235436698e-05, "loss": 0.9014, "step": 7104 }, { "epoch": 1.2062712990548041, "grad_norm": 1.65625, "learning_rate": 1.3010024109201834e-05, "loss": 0.8978, "step": 7105 }, { "epoch": 1.206442763143794, "grad_norm": 1.6171875, "learning_rate": 1.3008301884798746e-05, "loss": 0.8466, "step": 7106 }, { "epoch": 1.206614227232784, "grad_norm": 1.671875, "learning_rate": 1.3006579562283607e-05, "loss": 0.9149, "step": 7107 }, { "epoch": 1.2067856913217738, "grad_norm": 1.703125, "learning_rate": 1.300485714171259e-05, "loss": 0.8793, "step": 7108 }, { "epoch": 1.2069571554107636, "grad_norm": 1.5625, "learning_rate": 1.3003134623141864e-05, "loss": 0.8049, "step": 7109 }, { "epoch": 1.2071286194997535, "grad_norm": 1.625, "learning_rate": 1.300141200662761e-05, "loss": 0.8108, "step": 7110 }, { "epoch": 1.2073000835887433, "grad_norm": 1.5703125, "learning_rate": 1.299968929222601e-05, "loss": 0.7865, "step": 7111 }, { "epoch": 1.2074715476777333, "grad_norm": 1.6640625, "learning_rate": 1.2997966479993243e-05, "loss": 0.8582, "step": 7112 }, { "epoch": 1.2076430117667232, "grad_norm": 1.65625, "learning_rate": 1.2996243569985501e-05, "loss": 0.8529, "step": 7113 }, { "epoch": 1.207814475855713, "grad_norm": 1.71875, "learning_rate": 1.299452056225897e-05, "loss": 0.8457, "step": 7114 }, { "epoch": 1.2079859399447028, "grad_norm": 1.671875, "learning_rate": 1.299279745686985e-05, "loss": 0.8492, "step": 7115 }, { "epoch": 1.2081574040336927, "grad_norm": 1.578125, "learning_rate": 1.2991074253874337e-05, "loss": 0.7936, "step": 7116 }, { "epoch": 1.2083288681226825, "grad_norm": 1.625, "learning_rate": 1.2989350953328628e-05, "loss": 0.861, "step": 7117 }, { "epoch": 1.2085003322116723, "grad_norm": 1.6328125, "learning_rate": 1.2987627555288928e-05, "loss": 0.7935, "step": 7118 }, { "epoch": 1.2086717963006623, "grad_norm": 1.6953125, "learning_rate": 1.2985904059811442e-05, "loss": 0.8859, "step": 7119 }, { "epoch": 1.2088432603896522, "grad_norm": 1.6015625, "learning_rate": 1.2984180466952381e-05, "loss": 0.8689, "step": 7120 }, { "epoch": 1.209014724478642, "grad_norm": 1.625, "learning_rate": 1.2982456776767957e-05, "loss": 0.8682, "step": 7121 }, { "epoch": 1.2091861885676318, "grad_norm": 1.6796875, "learning_rate": 1.298073298931439e-05, "loss": 0.803, "step": 7122 }, { "epoch": 1.2093576526566217, "grad_norm": 1.71875, "learning_rate": 1.297900910464789e-05, "loss": 0.7887, "step": 7123 }, { "epoch": 1.2095291167456117, "grad_norm": 1.8046875, "learning_rate": 1.2977285122824688e-05, "loss": 0.8895, "step": 7124 }, { "epoch": 1.2097005808346015, "grad_norm": 1.6796875, "learning_rate": 1.2975561043901008e-05, "loss": 0.8874, "step": 7125 }, { "epoch": 1.2098720449235913, "grad_norm": 1.7890625, "learning_rate": 1.2973836867933076e-05, "loss": 0.885, "step": 7126 }, { "epoch": 1.2100435090125812, "grad_norm": 1.6328125, "learning_rate": 1.2972112594977127e-05, "loss": 0.8098, "step": 7127 }, { "epoch": 1.210214973101571, "grad_norm": 1.7578125, "learning_rate": 1.2970388225089393e-05, "loss": 0.8832, "step": 7128 }, { "epoch": 1.2103864371905608, "grad_norm": 1.8359375, "learning_rate": 1.2968663758326115e-05, "loss": 0.9134, "step": 7129 }, { "epoch": 1.2105579012795507, "grad_norm": 1.6328125, "learning_rate": 1.2966939194743531e-05, "loss": 0.8345, "step": 7130 }, { "epoch": 1.2107293653685407, "grad_norm": 1.78125, "learning_rate": 1.2965214534397888e-05, "loss": 0.8522, "step": 7131 }, { "epoch": 1.2109008294575305, "grad_norm": 1.6484375, "learning_rate": 1.2963489777345433e-05, "loss": 0.8498, "step": 7132 }, { "epoch": 1.2110722935465204, "grad_norm": 1.7109375, "learning_rate": 1.2961764923642415e-05, "loss": 0.8472, "step": 7133 }, { "epoch": 1.2112437576355102, "grad_norm": 1.703125, "learning_rate": 1.2960039973345094e-05, "loss": 0.9188, "step": 7134 }, { "epoch": 1.2114152217245, "grad_norm": 1.625, "learning_rate": 1.295831492650972e-05, "loss": 0.8582, "step": 7135 }, { "epoch": 1.21158668581349, "grad_norm": 1.78125, "learning_rate": 1.2956589783192557e-05, "loss": 0.902, "step": 7136 }, { "epoch": 1.2117581499024799, "grad_norm": 1.671875, "learning_rate": 1.2954864543449866e-05, "loss": 0.8307, "step": 7137 }, { "epoch": 1.2119296139914697, "grad_norm": 1.703125, "learning_rate": 1.2953139207337917e-05, "loss": 0.9602, "step": 7138 }, { "epoch": 1.2121010780804595, "grad_norm": 1.6328125, "learning_rate": 1.2951413774912977e-05, "loss": 0.8885, "step": 7139 }, { "epoch": 1.2122725421694494, "grad_norm": 1.7578125, "learning_rate": 1.2949688246231324e-05, "loss": 0.9143, "step": 7140 }, { "epoch": 1.2124440062584392, "grad_norm": 1.6796875, "learning_rate": 1.2947962621349224e-05, "loss": 0.955, "step": 7141 }, { "epoch": 1.212615470347429, "grad_norm": 1.7265625, "learning_rate": 1.2946236900322965e-05, "loss": 0.8418, "step": 7142 }, { "epoch": 1.212786934436419, "grad_norm": 1.71875, "learning_rate": 1.2944511083208827e-05, "loss": 0.8636, "step": 7143 }, { "epoch": 1.2129583985254089, "grad_norm": 1.578125, "learning_rate": 1.294278517006309e-05, "loss": 0.8267, "step": 7144 }, { "epoch": 1.2131298626143987, "grad_norm": 1.6328125, "learning_rate": 1.294105916094205e-05, "loss": 0.8761, "step": 7145 }, { "epoch": 1.2133013267033885, "grad_norm": 1.640625, "learning_rate": 1.2939333055902e-05, "loss": 0.7752, "step": 7146 }, { "epoch": 1.2134727907923784, "grad_norm": 1.671875, "learning_rate": 1.2937606854999225e-05, "loss": 0.8522, "step": 7147 }, { "epoch": 1.2136442548813684, "grad_norm": 1.65625, "learning_rate": 1.293588055829003e-05, "loss": 0.8549, "step": 7148 }, { "epoch": 1.2138157189703582, "grad_norm": 1.6328125, "learning_rate": 1.2934154165830714e-05, "loss": 0.8497, "step": 7149 }, { "epoch": 1.213987183059348, "grad_norm": 1.65625, "learning_rate": 1.2932427677677582e-05, "loss": 0.8336, "step": 7150 }, { "epoch": 1.2141586471483379, "grad_norm": 1.6953125, "learning_rate": 1.2930701093886942e-05, "loss": 0.8674, "step": 7151 }, { "epoch": 1.2143301112373277, "grad_norm": 1.6875, "learning_rate": 1.29289744145151e-05, "loss": 0.9201, "step": 7152 }, { "epoch": 1.2145015753263175, "grad_norm": 1.6953125, "learning_rate": 1.2927247639618376e-05, "loss": 0.8189, "step": 7153 }, { "epoch": 1.2146730394153074, "grad_norm": 1.6953125, "learning_rate": 1.2925520769253085e-05, "loss": 0.9168, "step": 7154 }, { "epoch": 1.2148445035042974, "grad_norm": 1.6640625, "learning_rate": 1.2923793803475542e-05, "loss": 0.8362, "step": 7155 }, { "epoch": 1.2150159675932872, "grad_norm": 1.7421875, "learning_rate": 1.2922066742342074e-05, "loss": 0.8896, "step": 7156 }, { "epoch": 1.215187431682277, "grad_norm": 1.6640625, "learning_rate": 1.2920339585909006e-05, "loss": 0.8406, "step": 7157 }, { "epoch": 1.2153588957712669, "grad_norm": 1.6171875, "learning_rate": 1.291861233423267e-05, "loss": 0.8135, "step": 7158 }, { "epoch": 1.2155303598602567, "grad_norm": 1.6640625, "learning_rate": 1.2916884987369391e-05, "loss": 0.8631, "step": 7159 }, { "epoch": 1.2157018239492468, "grad_norm": 1.6875, "learning_rate": 1.291515754537551e-05, "loss": 0.8561, "step": 7160 }, { "epoch": 1.2158732880382366, "grad_norm": 1.703125, "learning_rate": 1.2913430008307361e-05, "loss": 0.8446, "step": 7161 }, { "epoch": 1.2160447521272264, "grad_norm": 1.5859375, "learning_rate": 1.2911702376221294e-05, "loss": 0.8365, "step": 7162 }, { "epoch": 1.2162162162162162, "grad_norm": 1.6875, "learning_rate": 1.2909974649173646e-05, "loss": 0.8221, "step": 7163 }, { "epoch": 1.216387680305206, "grad_norm": 1.6640625, "learning_rate": 1.2908246827220766e-05, "loss": 0.8318, "step": 7164 }, { "epoch": 1.2165591443941959, "grad_norm": 1.71875, "learning_rate": 1.2906518910419006e-05, "loss": 0.8276, "step": 7165 }, { "epoch": 1.2167306084831857, "grad_norm": 1.671875, "learning_rate": 1.2904790898824719e-05, "loss": 0.9029, "step": 7166 }, { "epoch": 1.2169020725721758, "grad_norm": 1.640625, "learning_rate": 1.290306279249426e-05, "loss": 0.7511, "step": 7167 }, { "epoch": 1.2170735366611656, "grad_norm": 1.625, "learning_rate": 1.2901334591483996e-05, "loss": 0.8239, "step": 7168 }, { "epoch": 1.2172450007501554, "grad_norm": 1.71875, "learning_rate": 1.2899606295850284e-05, "loss": 0.8241, "step": 7169 }, { "epoch": 1.2174164648391452, "grad_norm": 1.625, "learning_rate": 1.2897877905649492e-05, "loss": 0.76, "step": 7170 }, { "epoch": 1.217587928928135, "grad_norm": 1.6640625, "learning_rate": 1.2896149420937988e-05, "loss": 0.876, "step": 7171 }, { "epoch": 1.2177593930171249, "grad_norm": 1.7109375, "learning_rate": 1.2894420841772144e-05, "loss": 0.911, "step": 7172 }, { "epoch": 1.217930857106115, "grad_norm": 1.78125, "learning_rate": 1.289269216820834e-05, "loss": 0.9441, "step": 7173 }, { "epoch": 1.2181023211951048, "grad_norm": 1.734375, "learning_rate": 1.2890963400302949e-05, "loss": 0.8377, "step": 7174 }, { "epoch": 1.2182737852840946, "grad_norm": 1.7578125, "learning_rate": 1.2889234538112359e-05, "loss": 0.8068, "step": 7175 }, { "epoch": 1.2184452493730844, "grad_norm": 1.7265625, "learning_rate": 1.288750558169295e-05, "loss": 0.8127, "step": 7176 }, { "epoch": 1.2186167134620742, "grad_norm": 1.6640625, "learning_rate": 1.2885776531101109e-05, "loss": 0.8431, "step": 7177 }, { "epoch": 1.218788177551064, "grad_norm": 1.6328125, "learning_rate": 1.2884047386393228e-05, "loss": 0.8766, "step": 7178 }, { "epoch": 1.218959641640054, "grad_norm": 1.765625, "learning_rate": 1.2882318147625701e-05, "loss": 1.0066, "step": 7179 }, { "epoch": 1.219131105729044, "grad_norm": 1.7734375, "learning_rate": 1.2880588814854923e-05, "loss": 0.8912, "step": 7180 }, { "epoch": 1.2193025698180338, "grad_norm": 1.6953125, "learning_rate": 1.2878859388137304e-05, "loss": 0.9064, "step": 7181 }, { "epoch": 1.2194740339070236, "grad_norm": 1.6953125, "learning_rate": 1.2877129867529234e-05, "loss": 0.9499, "step": 7182 }, { "epoch": 1.2196454979960134, "grad_norm": 1.8125, "learning_rate": 1.2875400253087126e-05, "loss": 0.8005, "step": 7183 }, { "epoch": 1.2198169620850032, "grad_norm": 1.59375, "learning_rate": 1.2873670544867389e-05, "loss": 0.8108, "step": 7184 }, { "epoch": 1.2199884261739933, "grad_norm": 1.6328125, "learning_rate": 1.2871940742926432e-05, "loss": 0.8299, "step": 7185 }, { "epoch": 1.220159890262983, "grad_norm": 1.6875, "learning_rate": 1.2870210847320676e-05, "loss": 0.8547, "step": 7186 }, { "epoch": 1.220331354351973, "grad_norm": 1.7421875, "learning_rate": 1.2868480858106535e-05, "loss": 0.9192, "step": 7187 }, { "epoch": 1.2205028184409628, "grad_norm": 1.640625, "learning_rate": 1.286675077534043e-05, "loss": 0.8247, "step": 7188 }, { "epoch": 1.2206742825299526, "grad_norm": 1.7734375, "learning_rate": 1.2865020599078786e-05, "loss": 0.9666, "step": 7189 }, { "epoch": 1.2208457466189424, "grad_norm": 1.6875, "learning_rate": 1.2863290329378033e-05, "loss": 0.8251, "step": 7190 }, { "epoch": 1.2210172107079325, "grad_norm": 1.703125, "learning_rate": 1.2861559966294602e-05, "loss": 0.8475, "step": 7191 }, { "epoch": 1.2211886747969223, "grad_norm": 1.6640625, "learning_rate": 1.2859829509884924e-05, "loss": 0.8498, "step": 7192 }, { "epoch": 1.221360138885912, "grad_norm": 1.71875, "learning_rate": 1.2858098960205437e-05, "loss": 0.9615, "step": 7193 }, { "epoch": 1.221531602974902, "grad_norm": 1.75, "learning_rate": 1.285636831731258e-05, "loss": 0.8848, "step": 7194 }, { "epoch": 1.2217030670638918, "grad_norm": 1.5859375, "learning_rate": 1.2854637581262794e-05, "loss": 0.8595, "step": 7195 }, { "epoch": 1.2218745311528816, "grad_norm": 1.59375, "learning_rate": 1.2852906752112528e-05, "loss": 0.8858, "step": 7196 }, { "epoch": 1.2220459952418716, "grad_norm": 1.734375, "learning_rate": 1.285117582991823e-05, "loss": 0.9023, "step": 7197 }, { "epoch": 1.2222174593308615, "grad_norm": 1.640625, "learning_rate": 1.2849444814736351e-05, "loss": 0.8564, "step": 7198 }, { "epoch": 1.2223889234198513, "grad_norm": 1.625, "learning_rate": 1.2847713706623348e-05, "loss": 0.9097, "step": 7199 }, { "epoch": 1.222560387508841, "grad_norm": 1.640625, "learning_rate": 1.2845982505635677e-05, "loss": 0.8507, "step": 7200 }, { "epoch": 1.222731851597831, "grad_norm": 1.6953125, "learning_rate": 1.2844251211829799e-05, "loss": 0.7985, "step": 7201 }, { "epoch": 1.2229033156868208, "grad_norm": 1.71875, "learning_rate": 1.284251982526218e-05, "loss": 0.863, "step": 7202 }, { "epoch": 1.2230747797758106, "grad_norm": 1.6640625, "learning_rate": 1.284078834598928e-05, "loss": 0.8775, "step": 7203 }, { "epoch": 1.2232462438648006, "grad_norm": 1.6953125, "learning_rate": 1.2839056774067583e-05, "loss": 0.8685, "step": 7204 }, { "epoch": 1.2234177079537905, "grad_norm": 1.71875, "learning_rate": 1.2837325109553549e-05, "loss": 0.8441, "step": 7205 }, { "epoch": 1.2235891720427803, "grad_norm": 1.796875, "learning_rate": 1.283559335250366e-05, "loss": 0.8704, "step": 7206 }, { "epoch": 1.22376063613177, "grad_norm": 1.6328125, "learning_rate": 1.2833861502974392e-05, "loss": 0.901, "step": 7207 }, { "epoch": 1.22393210022076, "grad_norm": 1.6484375, "learning_rate": 1.2832129561022228e-05, "loss": 0.7814, "step": 7208 }, { "epoch": 1.22410356430975, "grad_norm": 1.578125, "learning_rate": 1.2830397526703659e-05, "loss": 0.789, "step": 7209 }, { "epoch": 1.2242750283987398, "grad_norm": 1.6953125, "learning_rate": 1.2828665400075166e-05, "loss": 0.9164, "step": 7210 }, { "epoch": 1.2244464924877296, "grad_norm": 1.7734375, "learning_rate": 1.2826933181193244e-05, "loss": 0.9192, "step": 7211 }, { "epoch": 1.2246179565767195, "grad_norm": 1.6640625, "learning_rate": 1.2825200870114382e-05, "loss": 0.8172, "step": 7212 }, { "epoch": 1.2247894206657093, "grad_norm": 1.625, "learning_rate": 1.2823468466895083e-05, "loss": 0.8955, "step": 7213 }, { "epoch": 1.224960884754699, "grad_norm": 1.625, "learning_rate": 1.2821735971591846e-05, "loss": 0.8579, "step": 7214 }, { "epoch": 1.225132348843689, "grad_norm": 1.5859375, "learning_rate": 1.2820003384261175e-05, "loss": 0.8119, "step": 7215 }, { "epoch": 1.225303812932679, "grad_norm": 1.6953125, "learning_rate": 1.281827070495957e-05, "loss": 0.9551, "step": 7216 }, { "epoch": 1.2254752770216688, "grad_norm": 1.8203125, "learning_rate": 1.2816537933743547e-05, "loss": 0.9452, "step": 7217 }, { "epoch": 1.2256467411106586, "grad_norm": 1.7421875, "learning_rate": 1.2814805070669616e-05, "loss": 0.8825, "step": 7218 }, { "epoch": 1.2258182051996485, "grad_norm": 1.6328125, "learning_rate": 1.2813072115794292e-05, "loss": 0.84, "step": 7219 }, { "epoch": 1.2259896692886383, "grad_norm": 1.765625, "learning_rate": 1.2811339069174091e-05, "loss": 0.9508, "step": 7220 }, { "epoch": 1.2261611333776283, "grad_norm": 1.640625, "learning_rate": 1.280960593086554e-05, "loss": 0.8388, "step": 7221 }, { "epoch": 1.2263325974666182, "grad_norm": 1.8125, "learning_rate": 1.280787270092516e-05, "loss": 0.8698, "step": 7222 }, { "epoch": 1.226504061555608, "grad_norm": 1.6953125, "learning_rate": 1.2806139379409475e-05, "loss": 0.8033, "step": 7223 }, { "epoch": 1.2266755256445978, "grad_norm": 1.8203125, "learning_rate": 1.2804405966375018e-05, "loss": 0.9809, "step": 7224 }, { "epoch": 1.2268469897335876, "grad_norm": 1.7265625, "learning_rate": 1.2802672461878323e-05, "loss": 0.857, "step": 7225 }, { "epoch": 1.2270184538225775, "grad_norm": 1.546875, "learning_rate": 1.2800938865975923e-05, "loss": 0.7538, "step": 7226 }, { "epoch": 1.2271899179115673, "grad_norm": 1.609375, "learning_rate": 1.2799205178724362e-05, "loss": 0.8331, "step": 7227 }, { "epoch": 1.2273613820005573, "grad_norm": 1.7578125, "learning_rate": 1.2797471400180177e-05, "loss": 0.9054, "step": 7228 }, { "epoch": 1.2275328460895472, "grad_norm": 1.703125, "learning_rate": 1.2795737530399919e-05, "loss": 0.8271, "step": 7229 }, { "epoch": 1.227704310178537, "grad_norm": 1.6328125, "learning_rate": 1.2794003569440128e-05, "loss": 0.836, "step": 7230 }, { "epoch": 1.2278757742675268, "grad_norm": 1.703125, "learning_rate": 1.2792269517357361e-05, "loss": 0.9147, "step": 7231 }, { "epoch": 1.2280472383565166, "grad_norm": 1.7109375, "learning_rate": 1.279053537420817e-05, "loss": 0.8463, "step": 7232 }, { "epoch": 1.2282187024455067, "grad_norm": 1.640625, "learning_rate": 1.2788801140049117e-05, "loss": 0.7947, "step": 7233 }, { "epoch": 1.2283901665344965, "grad_norm": 1.6640625, "learning_rate": 1.2787066814936753e-05, "loss": 0.8187, "step": 7234 }, { "epoch": 1.2285616306234863, "grad_norm": 1.703125, "learning_rate": 1.2785332398927641e-05, "loss": 0.8147, "step": 7235 }, { "epoch": 1.2287330947124762, "grad_norm": 1.703125, "learning_rate": 1.2783597892078357e-05, "loss": 0.8814, "step": 7236 }, { "epoch": 1.228904558801466, "grad_norm": 1.609375, "learning_rate": 1.278186329444546e-05, "loss": 0.8836, "step": 7237 }, { "epoch": 1.2290760228904558, "grad_norm": 1.640625, "learning_rate": 1.278012860608553e-05, "loss": 0.8651, "step": 7238 }, { "epoch": 1.2292474869794456, "grad_norm": 1.6640625, "learning_rate": 1.2778393827055133e-05, "loss": 0.8794, "step": 7239 }, { "epoch": 1.2294189510684357, "grad_norm": 1.65625, "learning_rate": 1.2776658957410852e-05, "loss": 0.7917, "step": 7240 }, { "epoch": 1.2295904151574255, "grad_norm": 1.609375, "learning_rate": 1.2774923997209268e-05, "loss": 0.8094, "step": 7241 }, { "epoch": 1.2297618792464153, "grad_norm": 1.609375, "learning_rate": 1.277318894650696e-05, "loss": 0.867, "step": 7242 }, { "epoch": 1.2299333433354052, "grad_norm": 1.703125, "learning_rate": 1.2771453805360521e-05, "loss": 0.8815, "step": 7243 }, { "epoch": 1.230104807424395, "grad_norm": 1.7421875, "learning_rate": 1.2769718573826536e-05, "loss": 0.8418, "step": 7244 }, { "epoch": 1.230276271513385, "grad_norm": 1.6953125, "learning_rate": 1.2767983251961596e-05, "loss": 0.9079, "step": 7245 }, { "epoch": 1.2304477356023749, "grad_norm": 1.703125, "learning_rate": 1.2766247839822302e-05, "loss": 0.8975, "step": 7246 }, { "epoch": 1.2306191996913647, "grad_norm": 1.640625, "learning_rate": 1.2764512337465247e-05, "loss": 0.9299, "step": 7247 }, { "epoch": 1.2307906637803545, "grad_norm": 1.703125, "learning_rate": 1.2762776744947034e-05, "loss": 0.8715, "step": 7248 }, { "epoch": 1.2309621278693443, "grad_norm": 1.6171875, "learning_rate": 1.2761041062324268e-05, "loss": 0.8434, "step": 7249 }, { "epoch": 1.2311335919583342, "grad_norm": 1.6953125, "learning_rate": 1.275930528965356e-05, "loss": 0.9107, "step": 7250 }, { "epoch": 1.231305056047324, "grad_norm": 1.7265625, "learning_rate": 1.2757569426991514e-05, "loss": 0.9146, "step": 7251 }, { "epoch": 1.231476520136314, "grad_norm": 1.6328125, "learning_rate": 1.2755833474394744e-05, "loss": 0.8134, "step": 7252 }, { "epoch": 1.2316479842253039, "grad_norm": 1.5390625, "learning_rate": 1.2754097431919864e-05, "loss": 0.8832, "step": 7253 }, { "epoch": 1.2318194483142937, "grad_norm": 1.703125, "learning_rate": 1.2752361299623499e-05, "loss": 0.9229, "step": 7254 }, { "epoch": 1.2319909124032835, "grad_norm": 1.7109375, "learning_rate": 1.2750625077562266e-05, "loss": 0.9005, "step": 7255 }, { "epoch": 1.2321623764922733, "grad_norm": 1.6640625, "learning_rate": 1.2748888765792792e-05, "loss": 0.902, "step": 7256 }, { "epoch": 1.2323338405812634, "grad_norm": 1.6171875, "learning_rate": 1.2747152364371706e-05, "loss": 0.8608, "step": 7257 }, { "epoch": 1.2325053046702532, "grad_norm": 1.7421875, "learning_rate": 1.2745415873355636e-05, "loss": 0.7951, "step": 7258 }, { "epoch": 1.232676768759243, "grad_norm": 1.6796875, "learning_rate": 1.2743679292801215e-05, "loss": 0.8875, "step": 7259 }, { "epoch": 1.2328482328482329, "grad_norm": 1.6640625, "learning_rate": 1.274194262276508e-05, "loss": 0.8669, "step": 7260 }, { "epoch": 1.2330196969372227, "grad_norm": 1.703125, "learning_rate": 1.2740205863303873e-05, "loss": 0.942, "step": 7261 }, { "epoch": 1.2331911610262125, "grad_norm": 1.625, "learning_rate": 1.2738469014474233e-05, "loss": 0.8201, "step": 7262 }, { "epoch": 1.2333626251152023, "grad_norm": 1.6484375, "learning_rate": 1.2736732076332802e-05, "loss": 0.8711, "step": 7263 }, { "epoch": 1.2335340892041924, "grad_norm": 1.71875, "learning_rate": 1.2734995048936236e-05, "loss": 1.0015, "step": 7264 }, { "epoch": 1.2337055532931822, "grad_norm": 1.5546875, "learning_rate": 1.2733257932341182e-05, "loss": 0.7611, "step": 7265 }, { "epoch": 1.233877017382172, "grad_norm": 1.78125, "learning_rate": 1.2731520726604298e-05, "loss": 0.8811, "step": 7266 }, { "epoch": 1.2340484814711619, "grad_norm": 1.6953125, "learning_rate": 1.2729783431782234e-05, "loss": 0.9193, "step": 7267 }, { "epoch": 1.2342199455601517, "grad_norm": 1.6875, "learning_rate": 1.2728046047931655e-05, "loss": 0.8882, "step": 7268 }, { "epoch": 1.2343914096491415, "grad_norm": 1.65625, "learning_rate": 1.2726308575109219e-05, "loss": 0.8562, "step": 7269 }, { "epoch": 1.2345628737381316, "grad_norm": 1.59375, "learning_rate": 1.2724571013371593e-05, "loss": 0.865, "step": 7270 }, { "epoch": 1.2347343378271214, "grad_norm": 1.6875, "learning_rate": 1.2722833362775448e-05, "loss": 0.8359, "step": 7271 }, { "epoch": 1.2349058019161112, "grad_norm": 1.6796875, "learning_rate": 1.2721095623377456e-05, "loss": 0.9229, "step": 7272 }, { "epoch": 1.235077266005101, "grad_norm": 1.8515625, "learning_rate": 1.2719357795234287e-05, "loss": 0.9621, "step": 7273 }, { "epoch": 1.2352487300940909, "grad_norm": 1.6171875, "learning_rate": 1.2717619878402618e-05, "loss": 0.834, "step": 7274 }, { "epoch": 1.2354201941830807, "grad_norm": 1.546875, "learning_rate": 1.2715881872939133e-05, "loss": 0.7711, "step": 7275 }, { "epoch": 1.2355916582720707, "grad_norm": 1.6484375, "learning_rate": 1.2714143778900514e-05, "loss": 0.8419, "step": 7276 }, { "epoch": 1.2357631223610606, "grad_norm": 1.6328125, "learning_rate": 1.2712405596343444e-05, "loss": 0.8519, "step": 7277 }, { "epoch": 1.2359345864500504, "grad_norm": 1.7109375, "learning_rate": 1.2710667325324618e-05, "loss": 0.9297, "step": 7278 }, { "epoch": 1.2361060505390402, "grad_norm": 1.7109375, "learning_rate": 1.2708928965900719e-05, "loss": 0.9045, "step": 7279 }, { "epoch": 1.23627751462803, "grad_norm": 1.7265625, "learning_rate": 1.2707190518128446e-05, "loss": 0.9259, "step": 7280 }, { "epoch": 1.2364489787170199, "grad_norm": 1.6171875, "learning_rate": 1.2705451982064497e-05, "loss": 0.8243, "step": 7281 }, { "epoch": 1.23662044280601, "grad_norm": 1.6796875, "learning_rate": 1.2703713357765569e-05, "loss": 0.8922, "step": 7282 }, { "epoch": 1.2367919068949997, "grad_norm": 1.828125, "learning_rate": 1.2701974645288366e-05, "loss": 0.8736, "step": 7283 }, { "epoch": 1.2369633709839896, "grad_norm": 1.7265625, "learning_rate": 1.2700235844689602e-05, "loss": 0.9215, "step": 7284 }, { "epoch": 1.2371348350729794, "grad_norm": 1.6953125, "learning_rate": 1.2698496956025972e-05, "loss": 0.9459, "step": 7285 }, { "epoch": 1.2373062991619692, "grad_norm": 1.7265625, "learning_rate": 1.2696757979354199e-05, "loss": 0.9388, "step": 7286 }, { "epoch": 1.237477763250959, "grad_norm": 1.640625, "learning_rate": 1.269501891473099e-05, "loss": 0.8158, "step": 7287 }, { "epoch": 1.2376492273399489, "grad_norm": 1.6796875, "learning_rate": 1.2693279762213067e-05, "loss": 0.8949, "step": 7288 }, { "epoch": 1.237820691428939, "grad_norm": 1.6484375, "learning_rate": 1.269154052185715e-05, "loss": 0.8572, "step": 7289 }, { "epoch": 1.2379921555179287, "grad_norm": 1.7109375, "learning_rate": 1.268980119371996e-05, "loss": 0.7945, "step": 7290 }, { "epoch": 1.2381636196069186, "grad_norm": 1.765625, "learning_rate": 1.2688061777858223e-05, "loss": 0.9051, "step": 7291 }, { "epoch": 1.2383350836959084, "grad_norm": 1.84375, "learning_rate": 1.268632227432867e-05, "loss": 0.9626, "step": 7292 }, { "epoch": 1.2385065477848982, "grad_norm": 1.765625, "learning_rate": 1.2684582683188033e-05, "loss": 0.8529, "step": 7293 }, { "epoch": 1.2386780118738883, "grad_norm": 1.6875, "learning_rate": 1.2682843004493047e-05, "loss": 0.8847, "step": 7294 }, { "epoch": 1.238849475962878, "grad_norm": 1.7265625, "learning_rate": 1.2681103238300446e-05, "loss": 0.8428, "step": 7295 }, { "epoch": 1.239020940051868, "grad_norm": 1.6953125, "learning_rate": 1.2679363384666972e-05, "loss": 0.8807, "step": 7296 }, { "epoch": 1.2391924041408577, "grad_norm": 1.7109375, "learning_rate": 1.267762344364937e-05, "loss": 0.8548, "step": 7297 }, { "epoch": 1.2393638682298476, "grad_norm": 1.625, "learning_rate": 1.2675883415304383e-05, "loss": 0.8498, "step": 7298 }, { "epoch": 1.2395353323188374, "grad_norm": 1.75, "learning_rate": 1.2674143299688761e-05, "loss": 0.8959, "step": 7299 }, { "epoch": 1.2397067964078272, "grad_norm": 1.6484375, "learning_rate": 1.2672403096859257e-05, "loss": 0.8864, "step": 7300 }, { "epoch": 1.2398782604968173, "grad_norm": 1.7421875, "learning_rate": 1.2670662806872625e-05, "loss": 0.8839, "step": 7301 }, { "epoch": 1.240049724585807, "grad_norm": 1.6328125, "learning_rate": 1.2668922429785619e-05, "loss": 0.8581, "step": 7302 }, { "epoch": 1.240221188674797, "grad_norm": 1.6484375, "learning_rate": 1.2667181965655006e-05, "loss": 0.8189, "step": 7303 }, { "epoch": 1.2403926527637867, "grad_norm": 1.6640625, "learning_rate": 1.2665441414537544e-05, "loss": 0.8233, "step": 7304 }, { "epoch": 1.2405641168527766, "grad_norm": 1.6328125, "learning_rate": 1.266370077649e-05, "loss": 0.8569, "step": 7305 }, { "epoch": 1.2407355809417666, "grad_norm": 1.6796875, "learning_rate": 1.2661960051569144e-05, "loss": 0.8426, "step": 7306 }, { "epoch": 1.2409070450307564, "grad_norm": 1.59375, "learning_rate": 1.2660219239831748e-05, "loss": 0.842, "step": 7307 }, { "epoch": 1.2410785091197463, "grad_norm": 1.6953125, "learning_rate": 1.2658478341334583e-05, "loss": 0.943, "step": 7308 }, { "epoch": 1.241249973208736, "grad_norm": 1.6328125, "learning_rate": 1.265673735613443e-05, "loss": 0.8756, "step": 7309 }, { "epoch": 1.241421437297726, "grad_norm": 1.5546875, "learning_rate": 1.2654996284288063e-05, "loss": 0.7903, "step": 7310 }, { "epoch": 1.2415929013867157, "grad_norm": 1.6796875, "learning_rate": 1.2653255125852272e-05, "loss": 0.8261, "step": 7311 }, { "epoch": 1.2417643654757056, "grad_norm": 1.6484375, "learning_rate": 1.2651513880883841e-05, "loss": 0.8329, "step": 7312 }, { "epoch": 1.2419358295646956, "grad_norm": 1.7578125, "learning_rate": 1.264977254943956e-05, "loss": 0.9497, "step": 7313 }, { "epoch": 1.2421072936536854, "grad_norm": 1.71875, "learning_rate": 1.2648031131576217e-05, "loss": 0.9085, "step": 7314 }, { "epoch": 1.2422787577426753, "grad_norm": 1.7421875, "learning_rate": 1.2646289627350605e-05, "loss": 0.8737, "step": 7315 }, { "epoch": 1.242450221831665, "grad_norm": 1.6953125, "learning_rate": 1.2644548036819525e-05, "loss": 0.8706, "step": 7316 }, { "epoch": 1.242621685920655, "grad_norm": 1.65625, "learning_rate": 1.2642806360039774e-05, "loss": 0.7986, "step": 7317 }, { "epoch": 1.242793150009645, "grad_norm": 1.6484375, "learning_rate": 1.2641064597068156e-05, "loss": 0.9261, "step": 7318 }, { "epoch": 1.2429646140986348, "grad_norm": 1.734375, "learning_rate": 1.2639322747961476e-05, "loss": 0.8666, "step": 7319 }, { "epoch": 1.2431360781876246, "grad_norm": 1.640625, "learning_rate": 1.2637580812776542e-05, "loss": 0.8477, "step": 7320 }, { "epoch": 1.2433075422766144, "grad_norm": 1.6953125, "learning_rate": 1.2635838791570166e-05, "loss": 0.8551, "step": 7321 }, { "epoch": 1.2434790063656043, "grad_norm": 1.71875, "learning_rate": 1.263409668439916e-05, "loss": 0.954, "step": 7322 }, { "epoch": 1.243650470454594, "grad_norm": 1.7421875, "learning_rate": 1.2632354491320342e-05, "loss": 0.8462, "step": 7323 }, { "epoch": 1.243821934543584, "grad_norm": 1.734375, "learning_rate": 1.2630612212390534e-05, "loss": 0.8168, "step": 7324 }, { "epoch": 1.243993398632574, "grad_norm": 1.578125, "learning_rate": 1.2628869847666556e-05, "loss": 0.7898, "step": 7325 }, { "epoch": 1.2441648627215638, "grad_norm": 1.6328125, "learning_rate": 1.2627127397205232e-05, "loss": 0.803, "step": 7326 }, { "epoch": 1.2443363268105536, "grad_norm": 1.7578125, "learning_rate": 1.262538486106339e-05, "loss": 0.8628, "step": 7327 }, { "epoch": 1.2445077908995434, "grad_norm": 1.703125, "learning_rate": 1.2623642239297862e-05, "loss": 0.8727, "step": 7328 }, { "epoch": 1.2446792549885333, "grad_norm": 1.7265625, "learning_rate": 1.2621899531965477e-05, "loss": 0.9508, "step": 7329 }, { "epoch": 1.2448507190775233, "grad_norm": 1.6796875, "learning_rate": 1.2620156739123079e-05, "loss": 0.8475, "step": 7330 }, { "epoch": 1.2450221831665131, "grad_norm": 1.6796875, "learning_rate": 1.2618413860827506e-05, "loss": 0.8502, "step": 7331 }, { "epoch": 1.245193647255503, "grad_norm": 1.609375, "learning_rate": 1.2616670897135592e-05, "loss": 0.7271, "step": 7332 }, { "epoch": 1.2453651113444928, "grad_norm": 1.6875, "learning_rate": 1.2614927848104189e-05, "loss": 0.851, "step": 7333 }, { "epoch": 1.2455365754334826, "grad_norm": 1.59375, "learning_rate": 1.261318471379014e-05, "loss": 0.7724, "step": 7334 }, { "epoch": 1.2457080395224724, "grad_norm": 1.71875, "learning_rate": 1.2611441494250299e-05, "loss": 0.8478, "step": 7335 }, { "epoch": 1.2458795036114623, "grad_norm": 1.6640625, "learning_rate": 1.260969818954152e-05, "loss": 0.8933, "step": 7336 }, { "epoch": 1.2460509677004523, "grad_norm": 1.609375, "learning_rate": 1.2607954799720647e-05, "loss": 0.8304, "step": 7337 }, { "epoch": 1.2462224317894421, "grad_norm": 1.6796875, "learning_rate": 1.2606211324844555e-05, "loss": 0.8571, "step": 7338 }, { "epoch": 1.246393895878432, "grad_norm": 1.6875, "learning_rate": 1.2604467764970094e-05, "loss": 0.8683, "step": 7339 }, { "epoch": 1.2465653599674218, "grad_norm": 1.671875, "learning_rate": 1.2602724120154134e-05, "loss": 0.8936, "step": 7340 }, { "epoch": 1.2467368240564116, "grad_norm": 1.7109375, "learning_rate": 1.2600980390453537e-05, "loss": 0.9597, "step": 7341 }, { "epoch": 1.2469082881454017, "grad_norm": 1.71875, "learning_rate": 1.2599236575925178e-05, "loss": 0.8553, "step": 7342 }, { "epoch": 1.2470797522343915, "grad_norm": 1.6328125, "learning_rate": 1.2597492676625925e-05, "loss": 0.806, "step": 7343 }, { "epoch": 1.2472512163233813, "grad_norm": 1.8125, "learning_rate": 1.2595748692612654e-05, "loss": 0.81, "step": 7344 }, { "epoch": 1.2474226804123711, "grad_norm": 1.6953125, "learning_rate": 1.2594004623942244e-05, "loss": 0.8586, "step": 7345 }, { "epoch": 1.247594144501361, "grad_norm": 1.6640625, "learning_rate": 1.2592260470671576e-05, "loss": 0.8506, "step": 7346 }, { "epoch": 1.2477656085903508, "grad_norm": 1.703125, "learning_rate": 1.259051623285753e-05, "loss": 0.9173, "step": 7347 }, { "epoch": 1.2479370726793406, "grad_norm": 1.7265625, "learning_rate": 1.2588771910556996e-05, "loss": 0.8443, "step": 7348 }, { "epoch": 1.2481085367683307, "grad_norm": 1.7109375, "learning_rate": 1.2587027503826863e-05, "loss": 0.8387, "step": 7349 }, { "epoch": 1.2482800008573205, "grad_norm": 1.59375, "learning_rate": 1.2585283012724019e-05, "loss": 0.852, "step": 7350 }, { "epoch": 1.2484514649463103, "grad_norm": 1.5625, "learning_rate": 1.2583538437305363e-05, "loss": 0.8271, "step": 7351 }, { "epoch": 1.2486229290353001, "grad_norm": 1.609375, "learning_rate": 1.2581793777627787e-05, "loss": 0.9076, "step": 7352 }, { "epoch": 1.24879439312429, "grad_norm": 1.609375, "learning_rate": 1.2580049033748195e-05, "loss": 0.9031, "step": 7353 }, { "epoch": 1.24896585721328, "grad_norm": 1.703125, "learning_rate": 1.257830420572349e-05, "loss": 0.9212, "step": 7354 }, { "epoch": 1.2491373213022698, "grad_norm": 1.671875, "learning_rate": 1.2576559293610575e-05, "loss": 0.8859, "step": 7355 }, { "epoch": 1.2493087853912597, "grad_norm": 1.7109375, "learning_rate": 1.257481429746636e-05, "loss": 0.815, "step": 7356 }, { "epoch": 1.2494802494802495, "grad_norm": 1.734375, "learning_rate": 1.2573069217347751e-05, "loss": 0.9199, "step": 7357 }, { "epoch": 1.2496517135692393, "grad_norm": 1.6875, "learning_rate": 1.257132405331167e-05, "loss": 0.7968, "step": 7358 }, { "epoch": 1.2498231776582291, "grad_norm": 1.6875, "learning_rate": 1.2569578805415026e-05, "loss": 0.8123, "step": 7359 }, { "epoch": 1.249994641747219, "grad_norm": 1.5625, "learning_rate": 1.2567833473714743e-05, "loss": 0.8134, "step": 7360 }, { "epoch": 1.2501661058362088, "grad_norm": 1.796875, "learning_rate": 1.2566088058267737e-05, "loss": 0.8872, "step": 7361 }, { "epoch": 1.2503375699251988, "grad_norm": 1.671875, "learning_rate": 1.256434255913094e-05, "loss": 0.8235, "step": 7362 }, { "epoch": 1.2505090340141887, "grad_norm": 1.59375, "learning_rate": 1.2562596976361276e-05, "loss": 0.8185, "step": 7363 }, { "epoch": 1.2506804981031785, "grad_norm": 1.609375, "learning_rate": 1.2560851310015674e-05, "loss": 0.861, "step": 7364 }, { "epoch": 1.2508519621921683, "grad_norm": 1.6953125, "learning_rate": 1.2559105560151065e-05, "loss": 0.8713, "step": 7365 }, { "epoch": 1.2510234262811584, "grad_norm": 1.6640625, "learning_rate": 1.2557359726824392e-05, "loss": 0.8448, "step": 7366 }, { "epoch": 1.2511948903701482, "grad_norm": 1.6875, "learning_rate": 1.2555613810092585e-05, "loss": 0.8949, "step": 7367 }, { "epoch": 1.251366354459138, "grad_norm": 1.6328125, "learning_rate": 1.2553867810012588e-05, "loss": 0.8536, "step": 7368 }, { "epoch": 1.2515378185481278, "grad_norm": 1.703125, "learning_rate": 1.2552121726641344e-05, "loss": 0.8656, "step": 7369 }, { "epoch": 1.2517092826371177, "grad_norm": 1.7734375, "learning_rate": 1.2550375560035804e-05, "loss": 0.8059, "step": 7370 }, { "epoch": 1.2518807467261075, "grad_norm": 1.6015625, "learning_rate": 1.2548629310252912e-05, "loss": 0.8736, "step": 7371 }, { "epoch": 1.2520522108150973, "grad_norm": 1.7421875, "learning_rate": 1.254688297734962e-05, "loss": 0.9381, "step": 7372 }, { "epoch": 1.2522236749040871, "grad_norm": 1.65625, "learning_rate": 1.254513656138288e-05, "loss": 0.8456, "step": 7373 }, { "epoch": 1.2523951389930772, "grad_norm": 1.609375, "learning_rate": 1.2543390062409655e-05, "loss": 0.8379, "step": 7374 }, { "epoch": 1.252566603082067, "grad_norm": 1.6796875, "learning_rate": 1.2541643480486905e-05, "loss": 0.868, "step": 7375 }, { "epoch": 1.2527380671710568, "grad_norm": 1.71875, "learning_rate": 1.2539896815671585e-05, "loss": 0.8134, "step": 7376 }, { "epoch": 1.2529095312600467, "grad_norm": 1.7578125, "learning_rate": 1.2538150068020672e-05, "loss": 0.8924, "step": 7377 }, { "epoch": 1.2530809953490367, "grad_norm": 1.8046875, "learning_rate": 1.2536403237591125e-05, "loss": 0.9193, "step": 7378 }, { "epoch": 1.2532524594380265, "grad_norm": 1.8046875, "learning_rate": 1.2534656324439912e-05, "loss": 0.8861, "step": 7379 }, { "epoch": 1.2534239235270164, "grad_norm": 1.6328125, "learning_rate": 1.253290932862402e-05, "loss": 0.8459, "step": 7380 }, { "epoch": 1.2535953876160062, "grad_norm": 1.7109375, "learning_rate": 1.2531162250200411e-05, "loss": 0.9036, "step": 7381 }, { "epoch": 1.253766851704996, "grad_norm": 1.6875, "learning_rate": 1.2529415089226073e-05, "loss": 0.9044, "step": 7382 }, { "epoch": 1.2539383157939858, "grad_norm": 1.7421875, "learning_rate": 1.2527667845757983e-05, "loss": 0.8205, "step": 7383 }, { "epoch": 1.2541097798829757, "grad_norm": 1.7265625, "learning_rate": 1.2525920519853123e-05, "loss": 0.8355, "step": 7384 }, { "epoch": 1.2542812439719655, "grad_norm": 1.625, "learning_rate": 1.2524173111568485e-05, "loss": 0.8957, "step": 7385 }, { "epoch": 1.2544527080609555, "grad_norm": 1.6875, "learning_rate": 1.2522425620961059e-05, "loss": 0.8491, "step": 7386 }, { "epoch": 1.2546241721499454, "grad_norm": 1.6640625, "learning_rate": 1.252067804808783e-05, "loss": 0.7969, "step": 7387 }, { "epoch": 1.2547956362389352, "grad_norm": 1.7265625, "learning_rate": 1.2518930393005807e-05, "loss": 0.8935, "step": 7388 }, { "epoch": 1.254967100327925, "grad_norm": 1.65625, "learning_rate": 1.2517182655771971e-05, "loss": 0.814, "step": 7389 }, { "epoch": 1.255138564416915, "grad_norm": 1.6484375, "learning_rate": 1.2515434836443331e-05, "loss": 0.8395, "step": 7390 }, { "epoch": 1.255310028505905, "grad_norm": 1.640625, "learning_rate": 1.251368693507689e-05, "loss": 0.8807, "step": 7391 }, { "epoch": 1.2554814925948947, "grad_norm": 1.7109375, "learning_rate": 1.251193895172965e-05, "loss": 0.9083, "step": 7392 }, { "epoch": 1.2556529566838845, "grad_norm": 1.6796875, "learning_rate": 1.2510190886458625e-05, "loss": 0.9081, "step": 7393 }, { "epoch": 1.2558244207728744, "grad_norm": 1.6015625, "learning_rate": 1.250844273932082e-05, "loss": 0.8703, "step": 7394 }, { "epoch": 1.2559958848618642, "grad_norm": 1.5625, "learning_rate": 1.2506694510373252e-05, "loss": 0.8511, "step": 7395 }, { "epoch": 1.256167348950854, "grad_norm": 1.671875, "learning_rate": 1.2504946199672935e-05, "loss": 0.8408, "step": 7396 }, { "epoch": 1.2563388130398438, "grad_norm": 1.75, "learning_rate": 1.2503197807276894e-05, "loss": 0.9484, "step": 7397 }, { "epoch": 1.256510277128834, "grad_norm": 1.6171875, "learning_rate": 1.2501449333242144e-05, "loss": 0.8141, "step": 7398 }, { "epoch": 1.2566817412178237, "grad_norm": 1.7109375, "learning_rate": 1.2499700777625709e-05, "loss": 0.9525, "step": 7399 }, { "epoch": 1.2568532053068135, "grad_norm": 1.671875, "learning_rate": 1.2497952140484624e-05, "loss": 0.9133, "step": 7400 }, { "epoch": 1.2570246693958034, "grad_norm": 1.71875, "learning_rate": 1.2496203421875912e-05, "loss": 0.8465, "step": 7401 }, { "epoch": 1.2571961334847932, "grad_norm": 1.71875, "learning_rate": 1.2494454621856604e-05, "loss": 0.8892, "step": 7402 }, { "epoch": 1.2573675975737832, "grad_norm": 1.6875, "learning_rate": 1.249270574048374e-05, "loss": 0.8771, "step": 7403 }, { "epoch": 1.257539061662773, "grad_norm": 1.625, "learning_rate": 1.249095677781435e-05, "loss": 0.9029, "step": 7404 }, { "epoch": 1.257710525751763, "grad_norm": 1.6640625, "learning_rate": 1.2489207733905485e-05, "loss": 0.8047, "step": 7405 }, { "epoch": 1.2578819898407527, "grad_norm": 1.703125, "learning_rate": 1.248745860881418e-05, "loss": 0.8067, "step": 7406 }, { "epoch": 1.2580534539297425, "grad_norm": 1.5859375, "learning_rate": 1.2485709402597483e-05, "loss": 0.8371, "step": 7407 }, { "epoch": 1.2582249180187324, "grad_norm": 1.78125, "learning_rate": 1.2483960115312443e-05, "loss": 0.9358, "step": 7408 }, { "epoch": 1.2583963821077222, "grad_norm": 1.6015625, "learning_rate": 1.248221074701611e-05, "loss": 0.8565, "step": 7409 }, { "epoch": 1.2585678461967122, "grad_norm": 1.8359375, "learning_rate": 1.2480461297765536e-05, "loss": 0.8881, "step": 7410 }, { "epoch": 1.258739310285702, "grad_norm": 1.671875, "learning_rate": 1.2478711767617782e-05, "loss": 0.8481, "step": 7411 }, { "epoch": 1.258910774374692, "grad_norm": 1.796875, "learning_rate": 1.2476962156629897e-05, "loss": 0.8353, "step": 7412 }, { "epoch": 1.2590822384636817, "grad_norm": 1.7421875, "learning_rate": 1.2475212464858952e-05, "loss": 0.851, "step": 7413 }, { "epoch": 1.2592537025526715, "grad_norm": 1.7734375, "learning_rate": 1.2473462692362008e-05, "loss": 0.8653, "step": 7414 }, { "epoch": 1.2594251666416616, "grad_norm": 1.6328125, "learning_rate": 1.2471712839196133e-05, "loss": 0.7982, "step": 7415 }, { "epoch": 1.2595966307306514, "grad_norm": 1.6328125, "learning_rate": 1.2469962905418393e-05, "loss": 0.8258, "step": 7416 }, { "epoch": 1.2597680948196412, "grad_norm": 1.640625, "learning_rate": 1.2468212891085862e-05, "loss": 0.8672, "step": 7417 }, { "epoch": 1.259939558908631, "grad_norm": 1.671875, "learning_rate": 1.2466462796255617e-05, "loss": 0.7899, "step": 7418 }, { "epoch": 1.260111022997621, "grad_norm": 1.6484375, "learning_rate": 1.2464712620984727e-05, "loss": 0.829, "step": 7419 }, { "epoch": 1.2602824870866107, "grad_norm": 1.703125, "learning_rate": 1.246296236533028e-05, "loss": 0.8621, "step": 7420 }, { "epoch": 1.2604539511756006, "grad_norm": 1.671875, "learning_rate": 1.2461212029349354e-05, "loss": 0.8306, "step": 7421 }, { "epoch": 1.2606254152645906, "grad_norm": 1.7890625, "learning_rate": 1.2459461613099037e-05, "loss": 0.8217, "step": 7422 }, { "epoch": 1.2607968793535804, "grad_norm": 1.6484375, "learning_rate": 1.2457711116636416e-05, "loss": 0.8073, "step": 7423 }, { "epoch": 1.2609683434425702, "grad_norm": 1.671875, "learning_rate": 1.2455960540018579e-05, "loss": 0.8963, "step": 7424 }, { "epoch": 1.26113980753156, "grad_norm": 1.6640625, "learning_rate": 1.2454209883302622e-05, "loss": 0.8579, "step": 7425 }, { "epoch": 1.26131127162055, "grad_norm": 1.7265625, "learning_rate": 1.2452459146545637e-05, "loss": 0.8723, "step": 7426 }, { "epoch": 1.26148273570954, "grad_norm": 1.7734375, "learning_rate": 1.2450708329804724e-05, "loss": 0.833, "step": 7427 }, { "epoch": 1.2616541997985298, "grad_norm": 1.6328125, "learning_rate": 1.2448957433136985e-05, "loss": 0.8809, "step": 7428 }, { "epoch": 1.2618256638875196, "grad_norm": 1.6796875, "learning_rate": 1.2447206456599525e-05, "loss": 0.8621, "step": 7429 }, { "epoch": 1.2619971279765094, "grad_norm": 1.6796875, "learning_rate": 1.2445455400249444e-05, "loss": 0.8685, "step": 7430 }, { "epoch": 1.2621685920654993, "grad_norm": 1.6875, "learning_rate": 1.2443704264143852e-05, "loss": 0.8792, "step": 7431 }, { "epoch": 1.262340056154489, "grad_norm": 1.75, "learning_rate": 1.2441953048339866e-05, "loss": 0.8524, "step": 7432 }, { "epoch": 1.262511520243479, "grad_norm": 1.703125, "learning_rate": 1.2440201752894592e-05, "loss": 0.832, "step": 7433 }, { "epoch": 1.262682984332469, "grad_norm": 1.703125, "learning_rate": 1.2438450377865153e-05, "loss": 0.8459, "step": 7434 }, { "epoch": 1.2628544484214588, "grad_norm": 1.5546875, "learning_rate": 1.2436698923308662e-05, "loss": 0.7745, "step": 7435 }, { "epoch": 1.2630259125104486, "grad_norm": 1.703125, "learning_rate": 1.2434947389282247e-05, "loss": 0.8945, "step": 7436 }, { "epoch": 1.2631973765994384, "grad_norm": 1.6875, "learning_rate": 1.2433195775843026e-05, "loss": 0.8573, "step": 7437 }, { "epoch": 1.2633688406884283, "grad_norm": 1.6953125, "learning_rate": 1.2431444083048128e-05, "loss": 0.8573, "step": 7438 }, { "epoch": 1.2635403047774183, "grad_norm": 1.765625, "learning_rate": 1.2429692310954682e-05, "loss": 0.8684, "step": 7439 }, { "epoch": 1.2637117688664081, "grad_norm": 1.6484375, "learning_rate": 1.2427940459619821e-05, "loss": 0.8501, "step": 7440 }, { "epoch": 1.263883232955398, "grad_norm": 1.6953125, "learning_rate": 1.2426188529100677e-05, "loss": 0.9016, "step": 7441 }, { "epoch": 1.2640546970443878, "grad_norm": 1.7109375, "learning_rate": 1.2424436519454393e-05, "loss": 0.8824, "step": 7442 }, { "epoch": 1.2642261611333776, "grad_norm": 1.734375, "learning_rate": 1.24226844307381e-05, "loss": 0.9148, "step": 7443 }, { "epoch": 1.2643976252223674, "grad_norm": 1.71875, "learning_rate": 1.2420932263008947e-05, "loss": 0.8463, "step": 7444 }, { "epoch": 1.2645690893113573, "grad_norm": 1.5859375, "learning_rate": 1.2419180016324076e-05, "loss": 0.8036, "step": 7445 }, { "epoch": 1.264740553400347, "grad_norm": 1.703125, "learning_rate": 1.241742769074064e-05, "loss": 0.92, "step": 7446 }, { "epoch": 1.2649120174893371, "grad_norm": 1.609375, "learning_rate": 1.2415675286315778e-05, "loss": 0.862, "step": 7447 }, { "epoch": 1.265083481578327, "grad_norm": 1.5703125, "learning_rate": 1.2413922803106649e-05, "loss": 0.7859, "step": 7448 }, { "epoch": 1.2652549456673168, "grad_norm": 1.6015625, "learning_rate": 1.2412170241170406e-05, "loss": 0.8329, "step": 7449 }, { "epoch": 1.2654264097563066, "grad_norm": 1.6171875, "learning_rate": 1.2410417600564212e-05, "loss": 0.8083, "step": 7450 }, { "epoch": 1.2655978738452967, "grad_norm": 1.734375, "learning_rate": 1.2408664881345219e-05, "loss": 0.9278, "step": 7451 }, { "epoch": 1.2657693379342865, "grad_norm": 1.75, "learning_rate": 1.2406912083570599e-05, "loss": 0.8668, "step": 7452 }, { "epoch": 1.2659408020232763, "grad_norm": 1.71875, "learning_rate": 1.2405159207297508e-05, "loss": 0.9055, "step": 7453 }, { "epoch": 1.2661122661122661, "grad_norm": 1.6640625, "learning_rate": 1.2403406252583121e-05, "loss": 0.874, "step": 7454 }, { "epoch": 1.266283730201256, "grad_norm": 1.6796875, "learning_rate": 1.2401653219484604e-05, "loss": 0.9812, "step": 7455 }, { "epoch": 1.2664551942902458, "grad_norm": 1.65625, "learning_rate": 1.2399900108059134e-05, "loss": 0.86, "step": 7456 }, { "epoch": 1.2666266583792356, "grad_norm": 1.5625, "learning_rate": 1.2398146918363882e-05, "loss": 0.7916, "step": 7457 }, { "epoch": 1.2667981224682254, "grad_norm": 1.734375, "learning_rate": 1.239639365045603e-05, "loss": 0.9295, "step": 7458 }, { "epoch": 1.2669695865572155, "grad_norm": 1.6875, "learning_rate": 1.2394640304392753e-05, "loss": 0.8695, "step": 7459 }, { "epoch": 1.2671410506462053, "grad_norm": 1.6875, "learning_rate": 1.2392886880231243e-05, "loss": 0.8425, "step": 7460 }, { "epoch": 1.2673125147351951, "grad_norm": 1.5703125, "learning_rate": 1.239113337802868e-05, "loss": 0.7969, "step": 7461 }, { "epoch": 1.267483978824185, "grad_norm": 1.6796875, "learning_rate": 1.2389379797842252e-05, "loss": 0.8745, "step": 7462 }, { "epoch": 1.267655442913175, "grad_norm": 1.7109375, "learning_rate": 1.2387626139729155e-05, "loss": 0.8732, "step": 7463 }, { "epoch": 1.2678269070021648, "grad_norm": 1.7109375, "learning_rate": 1.2385872403746575e-05, "loss": 0.8747, "step": 7464 }, { "epoch": 1.2679983710911547, "grad_norm": 1.6796875, "learning_rate": 1.238411858995171e-05, "loss": 0.8893, "step": 7465 }, { "epoch": 1.2681698351801445, "grad_norm": 1.625, "learning_rate": 1.2382364698401764e-05, "loss": 0.7905, "step": 7466 }, { "epoch": 1.2683412992691343, "grad_norm": 1.7734375, "learning_rate": 1.238061072915393e-05, "loss": 0.8982, "step": 7467 }, { "epoch": 1.2685127633581241, "grad_norm": 1.671875, "learning_rate": 1.2378856682265419e-05, "loss": 0.7652, "step": 7468 }, { "epoch": 1.268684227447114, "grad_norm": 1.7421875, "learning_rate": 1.2377102557793433e-05, "loss": 0.8618, "step": 7469 }, { "epoch": 1.2688556915361038, "grad_norm": 1.671875, "learning_rate": 1.2375348355795181e-05, "loss": 0.8758, "step": 7470 }, { "epoch": 1.2690271556250938, "grad_norm": 1.71875, "learning_rate": 1.2373594076327873e-05, "loss": 0.8996, "step": 7471 }, { "epoch": 1.2691986197140837, "grad_norm": 1.6328125, "learning_rate": 1.2371839719448725e-05, "loss": 0.8519, "step": 7472 }, { "epoch": 1.2693700838030735, "grad_norm": 1.6640625, "learning_rate": 1.2370085285214953e-05, "loss": 0.8879, "step": 7473 }, { "epoch": 1.2695415478920633, "grad_norm": 1.6953125, "learning_rate": 1.2368330773683774e-05, "loss": 0.8703, "step": 7474 }, { "epoch": 1.2697130119810534, "grad_norm": 1.6484375, "learning_rate": 1.236657618491241e-05, "loss": 0.8536, "step": 7475 }, { "epoch": 1.2698844760700432, "grad_norm": 1.75, "learning_rate": 1.2364821518958088e-05, "loss": 0.8663, "step": 7476 }, { "epoch": 1.270055940159033, "grad_norm": 1.546875, "learning_rate": 1.2363066775878028e-05, "loss": 0.8806, "step": 7477 }, { "epoch": 1.2702274042480228, "grad_norm": 1.625, "learning_rate": 1.236131195572946e-05, "loss": 0.7836, "step": 7478 }, { "epoch": 1.2703988683370127, "grad_norm": 1.6328125, "learning_rate": 1.235955705856962e-05, "loss": 0.7843, "step": 7479 }, { "epoch": 1.2705703324260025, "grad_norm": 1.5703125, "learning_rate": 1.2357802084455738e-05, "loss": 0.8683, "step": 7480 }, { "epoch": 1.2707417965149923, "grad_norm": 1.65625, "learning_rate": 1.2356047033445055e-05, "loss": 0.9125, "step": 7481 }, { "epoch": 1.2709132606039821, "grad_norm": 1.6875, "learning_rate": 1.2354291905594801e-05, "loss": 0.8758, "step": 7482 }, { "epoch": 1.2710847246929722, "grad_norm": 1.6171875, "learning_rate": 1.2352536700962228e-05, "loss": 0.8316, "step": 7483 }, { "epoch": 1.271256188781962, "grad_norm": 1.609375, "learning_rate": 1.2350781419604569e-05, "loss": 0.8318, "step": 7484 }, { "epoch": 1.2714276528709518, "grad_norm": 1.75, "learning_rate": 1.234902606157908e-05, "loss": 0.9431, "step": 7485 }, { "epoch": 1.2715991169599417, "grad_norm": 1.7890625, "learning_rate": 1.2347270626943002e-05, "loss": 0.9024, "step": 7486 }, { "epoch": 1.2717705810489317, "grad_norm": 1.7890625, "learning_rate": 1.234551511575359e-05, "loss": 0.945, "step": 7487 }, { "epoch": 1.2719420451379215, "grad_norm": 1.6328125, "learning_rate": 1.23437595280681e-05, "loss": 0.8238, "step": 7488 }, { "epoch": 1.2721135092269114, "grad_norm": 1.71875, "learning_rate": 1.2342003863943785e-05, "loss": 0.8964, "step": 7489 }, { "epoch": 1.2722849733159012, "grad_norm": 1.640625, "learning_rate": 1.2340248123437904e-05, "loss": 0.8467, "step": 7490 }, { "epoch": 1.272456437404891, "grad_norm": 1.7265625, "learning_rate": 1.2338492306607721e-05, "loss": 0.8657, "step": 7491 }, { "epoch": 1.2726279014938808, "grad_norm": 1.75, "learning_rate": 1.2336736413510497e-05, "loss": 0.8956, "step": 7492 }, { "epoch": 1.2727993655828707, "grad_norm": 1.609375, "learning_rate": 1.23349804442035e-05, "loss": 0.8019, "step": 7493 }, { "epoch": 1.2729708296718605, "grad_norm": 1.6796875, "learning_rate": 1.2333224398743997e-05, "loss": 0.8398, "step": 7494 }, { "epoch": 1.2731422937608505, "grad_norm": 1.65625, "learning_rate": 1.233146827718926e-05, "loss": 0.8467, "step": 7495 }, { "epoch": 1.2733137578498404, "grad_norm": 1.6953125, "learning_rate": 1.2329712079596563e-05, "loss": 0.9317, "step": 7496 }, { "epoch": 1.2734852219388302, "grad_norm": 1.71875, "learning_rate": 1.2327955806023181e-05, "loss": 0.8621, "step": 7497 }, { "epoch": 1.27365668602782, "grad_norm": 1.625, "learning_rate": 1.2326199456526397e-05, "loss": 0.8108, "step": 7498 }, { "epoch": 1.2738281501168098, "grad_norm": 1.6875, "learning_rate": 1.2324443031163487e-05, "loss": 0.8021, "step": 7499 }, { "epoch": 1.2739996142057999, "grad_norm": 1.734375, "learning_rate": 1.2322686529991739e-05, "loss": 0.8839, "step": 7500 }, { "epoch": 1.2741710782947897, "grad_norm": 1.7421875, "learning_rate": 1.2320929953068435e-05, "loss": 0.883, "step": 7501 }, { "epoch": 1.2743425423837795, "grad_norm": 1.625, "learning_rate": 1.2319173300450864e-05, "loss": 0.8737, "step": 7502 }, { "epoch": 1.2745140064727694, "grad_norm": 1.53125, "learning_rate": 1.231741657219632e-05, "loss": 0.797, "step": 7503 }, { "epoch": 1.2746854705617592, "grad_norm": 1.6015625, "learning_rate": 1.2315659768362097e-05, "loss": 0.812, "step": 7504 }, { "epoch": 1.274856934650749, "grad_norm": 1.65625, "learning_rate": 1.2313902889005486e-05, "loss": 0.9194, "step": 7505 }, { "epoch": 1.2750283987397388, "grad_norm": 1.6640625, "learning_rate": 1.2312145934183788e-05, "loss": 0.9029, "step": 7506 }, { "epoch": 1.2751998628287289, "grad_norm": 1.6640625, "learning_rate": 1.2310388903954304e-05, "loss": 0.8317, "step": 7507 }, { "epoch": 1.2753713269177187, "grad_norm": 1.578125, "learning_rate": 1.2308631798374339e-05, "loss": 0.8674, "step": 7508 }, { "epoch": 1.2755427910067085, "grad_norm": 1.625, "learning_rate": 1.23068746175012e-05, "loss": 0.7913, "step": 7509 }, { "epoch": 1.2757142550956984, "grad_norm": 1.671875, "learning_rate": 1.2305117361392192e-05, "loss": 0.8466, "step": 7510 }, { "epoch": 1.2758857191846882, "grad_norm": 1.625, "learning_rate": 1.2303360030104625e-05, "loss": 0.8082, "step": 7511 }, { "epoch": 1.2760571832736782, "grad_norm": 1.8125, "learning_rate": 1.2301602623695814e-05, "loss": 0.9373, "step": 7512 }, { "epoch": 1.276228647362668, "grad_norm": 1.6484375, "learning_rate": 1.2299845142223075e-05, "loss": 0.8069, "step": 7513 }, { "epoch": 1.2764001114516579, "grad_norm": 1.65625, "learning_rate": 1.2298087585743725e-05, "loss": 0.8812, "step": 7514 }, { "epoch": 1.2765715755406477, "grad_norm": 1.71875, "learning_rate": 1.2296329954315085e-05, "loss": 0.8012, "step": 7515 }, { "epoch": 1.2767430396296375, "grad_norm": 1.6875, "learning_rate": 1.2294572247994478e-05, "loss": 0.8418, "step": 7516 }, { "epoch": 1.2769145037186274, "grad_norm": 1.7265625, "learning_rate": 1.229281446683923e-05, "loss": 0.9418, "step": 7517 }, { "epoch": 1.2770859678076172, "grad_norm": 1.71875, "learning_rate": 1.2291056610906666e-05, "loss": 0.9058, "step": 7518 }, { "epoch": 1.2772574318966072, "grad_norm": 1.625, "learning_rate": 1.228929868025412e-05, "loss": 0.8718, "step": 7519 }, { "epoch": 1.277428895985597, "grad_norm": 1.7421875, "learning_rate": 1.2287540674938925e-05, "loss": 0.8379, "step": 7520 }, { "epoch": 1.2776003600745869, "grad_norm": 1.578125, "learning_rate": 1.2285782595018417e-05, "loss": 0.8615, "step": 7521 }, { "epoch": 1.2777718241635767, "grad_norm": 1.7109375, "learning_rate": 1.2284024440549924e-05, "loss": 0.9499, "step": 7522 }, { "epoch": 1.2779432882525665, "grad_norm": 1.59375, "learning_rate": 1.2282266211590798e-05, "loss": 0.8202, "step": 7523 }, { "epoch": 1.2781147523415566, "grad_norm": 1.703125, "learning_rate": 1.2280507908198375e-05, "loss": 0.9373, "step": 7524 }, { "epoch": 1.2782862164305464, "grad_norm": 1.6953125, "learning_rate": 1.227874953043e-05, "loss": 0.8337, "step": 7525 }, { "epoch": 1.2784576805195362, "grad_norm": 1.65625, "learning_rate": 1.2276991078343024e-05, "loss": 0.8789, "step": 7526 }, { "epoch": 1.278629144608526, "grad_norm": 1.625, "learning_rate": 1.2275232551994795e-05, "loss": 0.8343, "step": 7527 }, { "epoch": 1.2788006086975159, "grad_norm": 1.6640625, "learning_rate": 1.2273473951442663e-05, "loss": 0.8961, "step": 7528 }, { "epoch": 1.2789720727865057, "grad_norm": 1.7265625, "learning_rate": 1.2271715276743984e-05, "loss": 0.8509, "step": 7529 }, { "epoch": 1.2791435368754955, "grad_norm": 1.625, "learning_rate": 1.2269956527956118e-05, "loss": 0.8277, "step": 7530 }, { "epoch": 1.2793150009644856, "grad_norm": 1.7421875, "learning_rate": 1.226819770513642e-05, "loss": 0.8797, "step": 7531 }, { "epoch": 1.2794864650534754, "grad_norm": 1.75, "learning_rate": 1.2266438808342257e-05, "loss": 0.8979, "step": 7532 }, { "epoch": 1.2796579291424652, "grad_norm": 1.8359375, "learning_rate": 1.2264679837630984e-05, "loss": 0.8799, "step": 7533 }, { "epoch": 1.279829393231455, "grad_norm": 1.640625, "learning_rate": 1.2262920793059976e-05, "loss": 0.8473, "step": 7534 }, { "epoch": 1.2800008573204449, "grad_norm": 1.6640625, "learning_rate": 1.22611616746866e-05, "loss": 0.7805, "step": 7535 }, { "epoch": 1.280172321409435, "grad_norm": 1.6328125, "learning_rate": 1.2259402482568226e-05, "loss": 0.8182, "step": 7536 }, { "epoch": 1.2803437854984248, "grad_norm": 1.7421875, "learning_rate": 1.225764321676223e-05, "loss": 0.8051, "step": 7537 }, { "epoch": 1.2805152495874146, "grad_norm": 1.765625, "learning_rate": 1.2255883877325989e-05, "loss": 0.7994, "step": 7538 }, { "epoch": 1.2806867136764044, "grad_norm": 1.8125, "learning_rate": 1.2254124464316876e-05, "loss": 0.9064, "step": 7539 }, { "epoch": 1.2808581777653942, "grad_norm": 1.7578125, "learning_rate": 1.225236497779228e-05, "loss": 0.9003, "step": 7540 }, { "epoch": 1.281029641854384, "grad_norm": 1.765625, "learning_rate": 1.2250605417809579e-05, "loss": 0.9243, "step": 7541 }, { "epoch": 1.2812011059433739, "grad_norm": 1.65625, "learning_rate": 1.2248845784426157e-05, "loss": 0.7572, "step": 7542 }, { "epoch": 1.2813725700323637, "grad_norm": 1.640625, "learning_rate": 1.2247086077699408e-05, "loss": 0.9036, "step": 7543 }, { "epoch": 1.2815440341213538, "grad_norm": 1.703125, "learning_rate": 1.2245326297686721e-05, "loss": 0.8296, "step": 7544 }, { "epoch": 1.2817154982103436, "grad_norm": 1.71875, "learning_rate": 1.2243566444445487e-05, "loss": 0.8512, "step": 7545 }, { "epoch": 1.2818869622993334, "grad_norm": 1.671875, "learning_rate": 1.2241806518033104e-05, "loss": 0.8552, "step": 7546 }, { "epoch": 1.2820584263883232, "grad_norm": 1.734375, "learning_rate": 1.2240046518506967e-05, "loss": 0.8765, "step": 7547 }, { "epoch": 1.2822298904773133, "grad_norm": 1.515625, "learning_rate": 1.2238286445924479e-05, "loss": 0.808, "step": 7548 }, { "epoch": 1.282401354566303, "grad_norm": 1.71875, "learning_rate": 1.2236526300343041e-05, "loss": 0.8427, "step": 7549 }, { "epoch": 1.282572818655293, "grad_norm": 1.6640625, "learning_rate": 1.2234766081820063e-05, "loss": 0.8815, "step": 7550 }, { "epoch": 1.2827442827442828, "grad_norm": 1.6328125, "learning_rate": 1.2233005790412942e-05, "loss": 0.846, "step": 7551 }, { "epoch": 1.2829157468332726, "grad_norm": 1.703125, "learning_rate": 1.2231245426179095e-05, "loss": 0.8415, "step": 7552 }, { "epoch": 1.2830872109222624, "grad_norm": 1.6484375, "learning_rate": 1.222948498917593e-05, "loss": 0.8797, "step": 7553 }, { "epoch": 1.2832586750112522, "grad_norm": 1.7578125, "learning_rate": 1.2227724479460866e-05, "loss": 0.8813, "step": 7554 }, { "epoch": 1.283430139100242, "grad_norm": 1.6484375, "learning_rate": 1.2225963897091317e-05, "loss": 0.8223, "step": 7555 }, { "epoch": 1.283601603189232, "grad_norm": 1.703125, "learning_rate": 1.2224203242124707e-05, "loss": 0.8077, "step": 7556 }, { "epoch": 1.283773067278222, "grad_norm": 1.703125, "learning_rate": 1.2222442514618452e-05, "loss": 0.9232, "step": 7557 }, { "epoch": 1.2839445313672118, "grad_norm": 1.6796875, "learning_rate": 1.2220681714629976e-05, "loss": 0.8709, "step": 7558 }, { "epoch": 1.2841159954562016, "grad_norm": 1.71875, "learning_rate": 1.2218920842216708e-05, "loss": 0.877, "step": 7559 }, { "epoch": 1.2842874595451916, "grad_norm": 1.609375, "learning_rate": 1.2217159897436075e-05, "loss": 0.9244, "step": 7560 }, { "epoch": 1.2844589236341815, "grad_norm": 1.6796875, "learning_rate": 1.2215398880345507e-05, "loss": 0.9621, "step": 7561 }, { "epoch": 1.2846303877231713, "grad_norm": 1.765625, "learning_rate": 1.2213637791002443e-05, "loss": 0.9206, "step": 7562 }, { "epoch": 1.284801851812161, "grad_norm": 1.640625, "learning_rate": 1.2211876629464314e-05, "loss": 0.86, "step": 7563 }, { "epoch": 1.284973315901151, "grad_norm": 1.6484375, "learning_rate": 1.2210115395788558e-05, "loss": 0.8369, "step": 7564 }, { "epoch": 1.2851447799901408, "grad_norm": 1.625, "learning_rate": 1.2208354090032615e-05, "loss": 0.8893, "step": 7565 }, { "epoch": 1.2853162440791306, "grad_norm": 1.75, "learning_rate": 1.2206592712253931e-05, "loss": 0.8371, "step": 7566 }, { "epoch": 1.2854877081681204, "grad_norm": 1.71875, "learning_rate": 1.220483126250995e-05, "loss": 0.8877, "step": 7567 }, { "epoch": 1.2856591722571105, "grad_norm": 1.7109375, "learning_rate": 1.220306974085812e-05, "loss": 0.8343, "step": 7568 }, { "epoch": 1.2858306363461003, "grad_norm": 1.7421875, "learning_rate": 1.2201308147355886e-05, "loss": 0.927, "step": 7569 }, { "epoch": 1.2860021004350901, "grad_norm": 1.734375, "learning_rate": 1.2199546482060707e-05, "loss": 0.8311, "step": 7570 }, { "epoch": 1.28617356452408, "grad_norm": 1.6328125, "learning_rate": 1.2197784745030033e-05, "loss": 0.8742, "step": 7571 }, { "epoch": 1.28634502861307, "grad_norm": 1.703125, "learning_rate": 1.219602293632132e-05, "loss": 0.8421, "step": 7572 }, { "epoch": 1.2865164927020598, "grad_norm": 1.75, "learning_rate": 1.2194261055992033e-05, "loss": 0.9014, "step": 7573 }, { "epoch": 1.2866879567910496, "grad_norm": 1.6953125, "learning_rate": 1.219249910409963e-05, "loss": 0.8843, "step": 7574 }, { "epoch": 1.2868594208800395, "grad_norm": 1.625, "learning_rate": 1.2190737080701574e-05, "loss": 0.8677, "step": 7575 }, { "epoch": 1.2870308849690293, "grad_norm": 1.6875, "learning_rate": 1.2188974985855334e-05, "loss": 0.8641, "step": 7576 }, { "epoch": 1.2872023490580191, "grad_norm": 1.7421875, "learning_rate": 1.2187212819618376e-05, "loss": 0.8851, "step": 7577 }, { "epoch": 1.287373813147009, "grad_norm": 1.8046875, "learning_rate": 1.2185450582048168e-05, "loss": 0.826, "step": 7578 }, { "epoch": 1.2875452772359988, "grad_norm": 1.7265625, "learning_rate": 1.2183688273202192e-05, "loss": 0.9094, "step": 7579 }, { "epoch": 1.2877167413249888, "grad_norm": 1.6796875, "learning_rate": 1.2181925893137914e-05, "loss": 0.853, "step": 7580 }, { "epoch": 1.2878882054139786, "grad_norm": 1.75, "learning_rate": 1.2180163441912817e-05, "loss": 0.9173, "step": 7581 }, { "epoch": 1.2880596695029685, "grad_norm": 1.75, "learning_rate": 1.2178400919584384e-05, "loss": 0.8648, "step": 7582 }, { "epoch": 1.2882311335919583, "grad_norm": 1.6875, "learning_rate": 1.2176638326210091e-05, "loss": 0.8913, "step": 7583 }, { "epoch": 1.2884025976809481, "grad_norm": 1.734375, "learning_rate": 1.217487566184743e-05, "loss": 0.8438, "step": 7584 }, { "epoch": 1.2885740617699382, "grad_norm": 1.71875, "learning_rate": 1.217311292655388e-05, "loss": 0.8476, "step": 7585 }, { "epoch": 1.288745525858928, "grad_norm": 1.75, "learning_rate": 1.2171350120386933e-05, "loss": 0.8477, "step": 7586 }, { "epoch": 1.2889169899479178, "grad_norm": 1.765625, "learning_rate": 1.2169587243404085e-05, "loss": 0.8612, "step": 7587 }, { "epoch": 1.2890884540369076, "grad_norm": 1.640625, "learning_rate": 1.2167824295662825e-05, "loss": 0.8484, "step": 7588 }, { "epoch": 1.2892599181258975, "grad_norm": 1.7578125, "learning_rate": 1.2166061277220653e-05, "loss": 0.9121, "step": 7589 }, { "epoch": 1.2894313822148873, "grad_norm": 1.671875, "learning_rate": 1.2164298188135065e-05, "loss": 0.8612, "step": 7590 }, { "epoch": 1.2896028463038771, "grad_norm": 1.765625, "learning_rate": 1.2162535028463563e-05, "loss": 0.8444, "step": 7591 }, { "epoch": 1.2897743103928672, "grad_norm": 1.6328125, "learning_rate": 1.2160771798263651e-05, "loss": 0.7786, "step": 7592 }, { "epoch": 1.289945774481857, "grad_norm": 1.71875, "learning_rate": 1.2159008497592834e-05, "loss": 0.8248, "step": 7593 }, { "epoch": 1.2901172385708468, "grad_norm": 1.6640625, "learning_rate": 1.2157245126508619e-05, "loss": 0.8333, "step": 7594 }, { "epoch": 1.2902887026598366, "grad_norm": 1.7109375, "learning_rate": 1.2155481685068517e-05, "loss": 0.8895, "step": 7595 }, { "epoch": 1.2904601667488265, "grad_norm": 1.7109375, "learning_rate": 1.2153718173330045e-05, "loss": 0.902, "step": 7596 }, { "epoch": 1.2906316308378165, "grad_norm": 1.6796875, "learning_rate": 1.2151954591350708e-05, "loss": 0.8519, "step": 7597 }, { "epoch": 1.2908030949268063, "grad_norm": 1.703125, "learning_rate": 1.215019093918803e-05, "loss": 0.8967, "step": 7598 }, { "epoch": 1.2909745590157962, "grad_norm": 1.6796875, "learning_rate": 1.2148427216899528e-05, "loss": 0.8826, "step": 7599 }, { "epoch": 1.291146023104786, "grad_norm": 1.5625, "learning_rate": 1.2146663424542722e-05, "loss": 0.8378, "step": 7600 }, { "epoch": 1.2913174871937758, "grad_norm": 1.609375, "learning_rate": 1.214489956217514e-05, "loss": 0.8425, "step": 7601 }, { "epoch": 1.2914889512827656, "grad_norm": 1.6171875, "learning_rate": 1.2143135629854307e-05, "loss": 0.8659, "step": 7602 }, { "epoch": 1.2916604153717555, "grad_norm": 1.6640625, "learning_rate": 1.2141371627637752e-05, "loss": 0.8331, "step": 7603 }, { "epoch": 1.2918318794607455, "grad_norm": 1.703125, "learning_rate": 1.2139607555583004e-05, "loss": 0.8546, "step": 7604 }, { "epoch": 1.2920033435497353, "grad_norm": 1.71875, "learning_rate": 1.2137843413747594e-05, "loss": 0.9113, "step": 7605 }, { "epoch": 1.2921748076387252, "grad_norm": 1.625, "learning_rate": 1.2136079202189061e-05, "loss": 0.8261, "step": 7606 }, { "epoch": 1.292346271727715, "grad_norm": 1.6953125, "learning_rate": 1.2134314920964947e-05, "loss": 0.901, "step": 7607 }, { "epoch": 1.2925177358167048, "grad_norm": 1.8125, "learning_rate": 1.2132550570132779e-05, "loss": 0.8526, "step": 7608 }, { "epoch": 1.2926891999056949, "grad_norm": 1.6953125, "learning_rate": 1.2130786149750108e-05, "loss": 0.8381, "step": 7609 }, { "epoch": 1.2928606639946847, "grad_norm": 1.7109375, "learning_rate": 1.2129021659874478e-05, "loss": 0.8988, "step": 7610 }, { "epoch": 1.2930321280836745, "grad_norm": 1.703125, "learning_rate": 1.2127257100563435e-05, "loss": 0.866, "step": 7611 }, { "epoch": 1.2932035921726643, "grad_norm": 1.6953125, "learning_rate": 1.2125492471874526e-05, "loss": 0.9133, "step": 7612 }, { "epoch": 1.2933750562616542, "grad_norm": 1.8125, "learning_rate": 1.2123727773865305e-05, "loss": 0.8806, "step": 7613 }, { "epoch": 1.293546520350644, "grad_norm": 1.734375, "learning_rate": 1.2121963006593325e-05, "loss": 0.9313, "step": 7614 }, { "epoch": 1.2937179844396338, "grad_norm": 1.6953125, "learning_rate": 1.212019817011614e-05, "loss": 0.8593, "step": 7615 }, { "epoch": 1.2938894485286239, "grad_norm": 1.6796875, "learning_rate": 1.2118433264491307e-05, "loss": 0.8664, "step": 7616 }, { "epoch": 1.2940609126176137, "grad_norm": 1.7578125, "learning_rate": 1.211666828977639e-05, "loss": 0.8287, "step": 7617 }, { "epoch": 1.2942323767066035, "grad_norm": 1.7265625, "learning_rate": 1.2114903246028949e-05, "loss": 0.9472, "step": 7618 }, { "epoch": 1.2944038407955933, "grad_norm": 1.6953125, "learning_rate": 1.2113138133306545e-05, "loss": 0.807, "step": 7619 }, { "epoch": 1.2945753048845832, "grad_norm": 1.7890625, "learning_rate": 1.2111372951666755e-05, "loss": 0.7895, "step": 7620 }, { "epoch": 1.2947467689735732, "grad_norm": 1.6953125, "learning_rate": 1.2109607701167139e-05, "loss": 0.822, "step": 7621 }, { "epoch": 1.294918233062563, "grad_norm": 1.75, "learning_rate": 1.2107842381865273e-05, "loss": 0.9415, "step": 7622 }, { "epoch": 1.2950896971515529, "grad_norm": 1.671875, "learning_rate": 1.210607699381873e-05, "loss": 0.8165, "step": 7623 }, { "epoch": 1.2952611612405427, "grad_norm": 1.671875, "learning_rate": 1.2104311537085085e-05, "loss": 0.793, "step": 7624 }, { "epoch": 1.2954326253295325, "grad_norm": 1.6796875, "learning_rate": 1.2102546011721918e-05, "loss": 0.8712, "step": 7625 }, { "epoch": 1.2956040894185223, "grad_norm": 1.6171875, "learning_rate": 1.2100780417786805e-05, "loss": 0.7509, "step": 7626 }, { "epoch": 1.2957755535075122, "grad_norm": 1.7421875, "learning_rate": 1.2099014755337333e-05, "loss": 0.8279, "step": 7627 }, { "epoch": 1.2959470175965022, "grad_norm": 1.7109375, "learning_rate": 1.2097249024431086e-05, "loss": 0.8896, "step": 7628 }, { "epoch": 1.296118481685492, "grad_norm": 1.734375, "learning_rate": 1.209548322512565e-05, "loss": 0.8632, "step": 7629 }, { "epoch": 1.2962899457744819, "grad_norm": 1.671875, "learning_rate": 1.2093717357478618e-05, "loss": 0.9059, "step": 7630 }, { "epoch": 1.2964614098634717, "grad_norm": 1.75, "learning_rate": 1.2091951421547579e-05, "loss": 0.8138, "step": 7631 }, { "epoch": 1.2966328739524615, "grad_norm": 1.65625, "learning_rate": 1.2090185417390124e-05, "loss": 0.8017, "step": 7632 }, { "epoch": 1.2968043380414516, "grad_norm": 1.7109375, "learning_rate": 1.2088419345063851e-05, "loss": 0.8845, "step": 7633 }, { "epoch": 1.2969758021304414, "grad_norm": 1.640625, "learning_rate": 1.208665320462636e-05, "loss": 0.8646, "step": 7634 }, { "epoch": 1.2971472662194312, "grad_norm": 1.6875, "learning_rate": 1.208488699613525e-05, "loss": 0.8522, "step": 7635 }, { "epoch": 1.297318730308421, "grad_norm": 1.71875, "learning_rate": 1.2083120719648124e-05, "loss": 0.8765, "step": 7636 }, { "epoch": 1.2974901943974109, "grad_norm": 1.71875, "learning_rate": 1.2081354375222589e-05, "loss": 0.9103, "step": 7637 }, { "epoch": 1.2976616584864007, "grad_norm": 1.6953125, "learning_rate": 1.2079587962916248e-05, "loss": 0.8577, "step": 7638 }, { "epoch": 1.2978331225753905, "grad_norm": 1.75, "learning_rate": 1.2077821482786714e-05, "loss": 0.9572, "step": 7639 }, { "epoch": 1.2980045866643803, "grad_norm": 1.546875, "learning_rate": 1.2076054934891594e-05, "loss": 0.8155, "step": 7640 }, { "epoch": 1.2981760507533704, "grad_norm": 1.7265625, "learning_rate": 1.2074288319288508e-05, "loss": 0.8119, "step": 7641 }, { "epoch": 1.2983475148423602, "grad_norm": 1.6875, "learning_rate": 1.207252163603507e-05, "loss": 0.8288, "step": 7642 }, { "epoch": 1.29851897893135, "grad_norm": 1.7265625, "learning_rate": 1.2070754885188895e-05, "loss": 0.8586, "step": 7643 }, { "epoch": 1.2986904430203399, "grad_norm": 1.8125, "learning_rate": 1.2068988066807606e-05, "loss": 0.9835, "step": 7644 }, { "epoch": 1.29886190710933, "grad_norm": 1.734375, "learning_rate": 1.2067221180948826e-05, "loss": 0.8761, "step": 7645 }, { "epoch": 1.2990333711983197, "grad_norm": 1.7421875, "learning_rate": 1.2065454227670173e-05, "loss": 0.8515, "step": 7646 }, { "epoch": 1.2992048352873096, "grad_norm": 1.7265625, "learning_rate": 1.2063687207029285e-05, "loss": 0.9168, "step": 7647 }, { "epoch": 1.2993762993762994, "grad_norm": 1.6015625, "learning_rate": 1.2061920119083788e-05, "loss": 0.7479, "step": 7648 }, { "epoch": 1.2995477634652892, "grad_norm": 1.6875, "learning_rate": 1.2060152963891307e-05, "loss": 0.9024, "step": 7649 }, { "epoch": 1.299719227554279, "grad_norm": 1.734375, "learning_rate": 1.2058385741509481e-05, "loss": 0.7964, "step": 7650 }, { "epoch": 1.2998906916432689, "grad_norm": 1.5703125, "learning_rate": 1.2056618451995947e-05, "loss": 0.8361, "step": 7651 }, { "epoch": 1.3000621557322587, "grad_norm": 1.7109375, "learning_rate": 1.2054851095408339e-05, "loss": 0.8796, "step": 7652 }, { "epoch": 1.3002336198212487, "grad_norm": 1.640625, "learning_rate": 1.20530836718043e-05, "loss": 0.8677, "step": 7653 }, { "epoch": 1.3004050839102386, "grad_norm": 1.6953125, "learning_rate": 1.2051316181241472e-05, "loss": 0.878, "step": 7654 }, { "epoch": 1.3005765479992284, "grad_norm": 1.7734375, "learning_rate": 1.2049548623777494e-05, "loss": 0.9416, "step": 7655 }, { "epoch": 1.3007480120882182, "grad_norm": 1.65625, "learning_rate": 1.2047780999470023e-05, "loss": 0.8483, "step": 7656 }, { "epoch": 1.3009194761772083, "grad_norm": 1.71875, "learning_rate": 1.2046013308376698e-05, "loss": 0.9298, "step": 7657 }, { "epoch": 1.301090940266198, "grad_norm": 1.625, "learning_rate": 1.2044245550555179e-05, "loss": 0.8033, "step": 7658 }, { "epoch": 1.301262404355188, "grad_norm": 1.6328125, "learning_rate": 1.2042477726063113e-05, "loss": 0.8801, "step": 7659 }, { "epoch": 1.3014338684441777, "grad_norm": 1.625, "learning_rate": 1.2040709834958155e-05, "loss": 0.8041, "step": 7660 }, { "epoch": 1.3016053325331676, "grad_norm": 1.78125, "learning_rate": 1.2038941877297968e-05, "loss": 0.8988, "step": 7661 }, { "epoch": 1.3017767966221574, "grad_norm": 1.75, "learning_rate": 1.2037173853140206e-05, "loss": 0.89, "step": 7662 }, { "epoch": 1.3019482607111472, "grad_norm": 1.59375, "learning_rate": 1.2035405762542534e-05, "loss": 0.8521, "step": 7663 }, { "epoch": 1.302119724800137, "grad_norm": 1.65625, "learning_rate": 1.2033637605562616e-05, "loss": 0.896, "step": 7664 }, { "epoch": 1.302291188889127, "grad_norm": 1.8515625, "learning_rate": 1.2031869382258116e-05, "loss": 0.8838, "step": 7665 }, { "epoch": 1.302462652978117, "grad_norm": 1.8359375, "learning_rate": 1.2030101092686705e-05, "loss": 0.9619, "step": 7666 }, { "epoch": 1.3026341170671067, "grad_norm": 1.6875, "learning_rate": 1.2028332736906052e-05, "loss": 0.8727, "step": 7667 }, { "epoch": 1.3028055811560966, "grad_norm": 1.59375, "learning_rate": 1.2026564314973832e-05, "loss": 0.8851, "step": 7668 }, { "epoch": 1.3029770452450866, "grad_norm": 1.640625, "learning_rate": 1.2024795826947718e-05, "loss": 0.9059, "step": 7669 }, { "epoch": 1.3031485093340764, "grad_norm": 1.7265625, "learning_rate": 1.2023027272885388e-05, "loss": 0.8688, "step": 7670 }, { "epoch": 1.3033199734230663, "grad_norm": 1.8046875, "learning_rate": 1.2021258652844521e-05, "loss": 0.9054, "step": 7671 }, { "epoch": 1.303491437512056, "grad_norm": 1.75, "learning_rate": 1.2019489966882796e-05, "loss": 0.8857, "step": 7672 }, { "epoch": 1.303662901601046, "grad_norm": 1.6484375, "learning_rate": 1.20177212150579e-05, "loss": 0.8302, "step": 7673 }, { "epoch": 1.3038343656900357, "grad_norm": 1.640625, "learning_rate": 1.2015952397427513e-05, "loss": 0.7771, "step": 7674 }, { "epoch": 1.3040058297790256, "grad_norm": 1.71875, "learning_rate": 1.2014183514049331e-05, "loss": 0.8096, "step": 7675 }, { "epoch": 1.3041772938680154, "grad_norm": 1.6796875, "learning_rate": 1.2012414564981039e-05, "loss": 0.8404, "step": 7676 }, { "epoch": 1.3043487579570054, "grad_norm": 1.765625, "learning_rate": 1.2010645550280332e-05, "loss": 0.9386, "step": 7677 }, { "epoch": 1.3045202220459953, "grad_norm": 1.625, "learning_rate": 1.20088764700049e-05, "loss": 0.8149, "step": 7678 }, { "epoch": 1.304691686134985, "grad_norm": 1.65625, "learning_rate": 1.2007107324212441e-05, "loss": 0.8008, "step": 7679 }, { "epoch": 1.304863150223975, "grad_norm": 1.609375, "learning_rate": 1.2005338112960657e-05, "loss": 0.8362, "step": 7680 }, { "epoch": 1.3050346143129647, "grad_norm": 1.6796875, "learning_rate": 1.2003568836307244e-05, "loss": 0.838, "step": 7681 }, { "epoch": 1.3052060784019548, "grad_norm": 1.6640625, "learning_rate": 1.2001799494309905e-05, "loss": 0.7765, "step": 7682 }, { "epoch": 1.3053775424909446, "grad_norm": 1.625, "learning_rate": 1.200003008702635e-05, "loss": 0.8431, "step": 7683 }, { "epoch": 1.3055490065799344, "grad_norm": 1.7734375, "learning_rate": 1.1998260614514279e-05, "loss": 0.8863, "step": 7684 }, { "epoch": 1.3057204706689243, "grad_norm": 1.78125, "learning_rate": 1.1996491076831404e-05, "loss": 0.8998, "step": 7685 }, { "epoch": 1.305891934757914, "grad_norm": 1.6953125, "learning_rate": 1.199472147403544e-05, "loss": 0.8527, "step": 7686 }, { "epoch": 1.306063398846904, "grad_norm": 1.8125, "learning_rate": 1.1992951806184095e-05, "loss": 0.8908, "step": 7687 }, { "epoch": 1.3062348629358937, "grad_norm": 1.578125, "learning_rate": 1.199118207333509e-05, "loss": 0.8322, "step": 7688 }, { "epoch": 1.3064063270248838, "grad_norm": 1.6328125, "learning_rate": 1.1989412275546137e-05, "loss": 0.7763, "step": 7689 }, { "epoch": 1.3065777911138736, "grad_norm": 1.6015625, "learning_rate": 1.198764241287496e-05, "loss": 0.8284, "step": 7690 }, { "epoch": 1.3067492552028634, "grad_norm": 1.7109375, "learning_rate": 1.1985872485379278e-05, "loss": 0.8426, "step": 7691 }, { "epoch": 1.3069207192918533, "grad_norm": 1.6328125, "learning_rate": 1.1984102493116813e-05, "loss": 0.8513, "step": 7692 }, { "epoch": 1.307092183380843, "grad_norm": 1.6484375, "learning_rate": 1.1982332436145296e-05, "loss": 0.7398, "step": 7693 }, { "epoch": 1.3072636474698331, "grad_norm": 1.78125, "learning_rate": 1.1980562314522457e-05, "loss": 0.8287, "step": 7694 }, { "epoch": 1.307435111558823, "grad_norm": 1.703125, "learning_rate": 1.1978792128306021e-05, "loss": 0.82, "step": 7695 }, { "epoch": 1.3076065756478128, "grad_norm": 1.7421875, "learning_rate": 1.1977021877553721e-05, "loss": 0.8936, "step": 7696 }, { "epoch": 1.3077780397368026, "grad_norm": 1.7109375, "learning_rate": 1.1975251562323293e-05, "loss": 0.9517, "step": 7697 }, { "epoch": 1.3079495038257924, "grad_norm": 1.671875, "learning_rate": 1.1973481182672475e-05, "loss": 0.8354, "step": 7698 }, { "epoch": 1.3081209679147823, "grad_norm": 1.6328125, "learning_rate": 1.1971710738659002e-05, "loss": 0.7502, "step": 7699 }, { "epoch": 1.308292432003772, "grad_norm": 1.640625, "learning_rate": 1.196994023034062e-05, "loss": 0.8179, "step": 7700 }, { "epoch": 1.308292432003772, "eval_loss": 0.8456034660339355, "eval_runtime": 836.8754, "eval_samples_per_second": 2.986, "eval_steps_per_second": 2.986, "step": 7700 }, { "epoch": 1.3084638960927621, "grad_norm": 1.65625, "learning_rate": 1.1968169657775069e-05, "loss": 0.8484, "step": 7701 }, { "epoch": 1.308635360181752, "grad_norm": 1.7265625, "learning_rate": 1.196639902102009e-05, "loss": 0.8856, "step": 7702 }, { "epoch": 1.3088068242707418, "grad_norm": 1.7890625, "learning_rate": 1.1964628320133436e-05, "loss": 0.8913, "step": 7703 }, { "epoch": 1.3089782883597316, "grad_norm": 1.671875, "learning_rate": 1.1962857555172854e-05, "loss": 0.8323, "step": 7704 }, { "epoch": 1.3091497524487214, "grad_norm": 1.6640625, "learning_rate": 1.1961086726196097e-05, "loss": 0.7931, "step": 7705 }, { "epoch": 1.3093212165377115, "grad_norm": 1.703125, "learning_rate": 1.1959315833260918e-05, "loss": 0.9209, "step": 7706 }, { "epoch": 1.3094926806267013, "grad_norm": 1.6640625, "learning_rate": 1.1957544876425069e-05, "loss": 0.8677, "step": 7707 }, { "epoch": 1.3096641447156911, "grad_norm": 1.6015625, "learning_rate": 1.195577385574631e-05, "loss": 0.7754, "step": 7708 }, { "epoch": 1.309835608804681, "grad_norm": 1.78125, "learning_rate": 1.1954002771282403e-05, "loss": 0.887, "step": 7709 }, { "epoch": 1.3100070728936708, "grad_norm": 1.7421875, "learning_rate": 1.1952231623091106e-05, "loss": 0.8784, "step": 7710 }, { "epoch": 1.3101785369826606, "grad_norm": 1.75, "learning_rate": 1.1950460411230185e-05, "loss": 0.8745, "step": 7711 }, { "epoch": 1.3103500010716504, "grad_norm": 1.6875, "learning_rate": 1.1948689135757406e-05, "loss": 0.8325, "step": 7712 }, { "epoch": 1.3105214651606405, "grad_norm": 1.6796875, "learning_rate": 1.1946917796730534e-05, "loss": 0.8219, "step": 7713 }, { "epoch": 1.3106929292496303, "grad_norm": 1.6953125, "learning_rate": 1.1945146394207342e-05, "loss": 0.8945, "step": 7714 }, { "epoch": 1.3108643933386201, "grad_norm": 1.6484375, "learning_rate": 1.1943374928245603e-05, "loss": 0.8552, "step": 7715 }, { "epoch": 1.31103585742761, "grad_norm": 1.609375, "learning_rate": 1.1941603398903088e-05, "loss": 0.8237, "step": 7716 }, { "epoch": 1.3112073215165998, "grad_norm": 1.71875, "learning_rate": 1.193983180623758e-05, "loss": 0.8898, "step": 7717 }, { "epoch": 1.3113787856055898, "grad_norm": 1.734375, "learning_rate": 1.1938060150306844e-05, "loss": 0.8595, "step": 7718 }, { "epoch": 1.3115502496945797, "grad_norm": 1.7109375, "learning_rate": 1.1936288431168673e-05, "loss": 0.8116, "step": 7719 }, { "epoch": 1.3117217137835695, "grad_norm": 1.6796875, "learning_rate": 1.1934516648880842e-05, "loss": 0.8697, "step": 7720 }, { "epoch": 1.3118931778725593, "grad_norm": 1.703125, "learning_rate": 1.193274480350114e-05, "loss": 0.8571, "step": 7721 }, { "epoch": 1.3120646419615491, "grad_norm": 1.7265625, "learning_rate": 1.1930972895087349e-05, "loss": 0.8991, "step": 7722 }, { "epoch": 1.312236106050539, "grad_norm": 1.578125, "learning_rate": 1.1929200923697267e-05, "loss": 0.798, "step": 7723 }, { "epoch": 1.3124075701395288, "grad_norm": 1.6953125, "learning_rate": 1.1927428889388674e-05, "loss": 0.8255, "step": 7724 }, { "epoch": 1.3125790342285186, "grad_norm": 1.71875, "learning_rate": 1.1925656792219366e-05, "loss": 0.9045, "step": 7725 }, { "epoch": 1.3127504983175087, "grad_norm": 1.6796875, "learning_rate": 1.1923884632247143e-05, "loss": 0.9382, "step": 7726 }, { "epoch": 1.3129219624064985, "grad_norm": 1.65625, "learning_rate": 1.1922112409529793e-05, "loss": 0.86, "step": 7727 }, { "epoch": 1.3130934264954883, "grad_norm": 1.6875, "learning_rate": 1.192034012412512e-05, "loss": 0.8747, "step": 7728 }, { "epoch": 1.3132648905844782, "grad_norm": 1.6328125, "learning_rate": 1.1918567776090925e-05, "loss": 0.871, "step": 7729 }, { "epoch": 1.3134363546734682, "grad_norm": 1.6484375, "learning_rate": 1.191679536548501e-05, "loss": 0.887, "step": 7730 }, { "epoch": 1.313607818762458, "grad_norm": 1.5546875, "learning_rate": 1.1915022892365182e-05, "loss": 0.7853, "step": 7731 }, { "epoch": 1.3137792828514478, "grad_norm": 1.7109375, "learning_rate": 1.1913250356789244e-05, "loss": 0.8789, "step": 7732 }, { "epoch": 1.3139507469404377, "grad_norm": 1.6953125, "learning_rate": 1.1911477758815006e-05, "loss": 0.9076, "step": 7733 }, { "epoch": 1.3141222110294275, "grad_norm": 1.6953125, "learning_rate": 1.1909705098500287e-05, "loss": 0.8747, "step": 7734 }, { "epoch": 1.3142936751184173, "grad_norm": 1.7109375, "learning_rate": 1.1907932375902886e-05, "loss": 0.9129, "step": 7735 }, { "epoch": 1.3144651392074072, "grad_norm": 1.6015625, "learning_rate": 1.1906159591080628e-05, "loss": 0.8726, "step": 7736 }, { "epoch": 1.314636603296397, "grad_norm": 1.6328125, "learning_rate": 1.190438674409133e-05, "loss": 0.753, "step": 7737 }, { "epoch": 1.314808067385387, "grad_norm": 1.6328125, "learning_rate": 1.1902613834992807e-05, "loss": 0.8014, "step": 7738 }, { "epoch": 1.3149795314743769, "grad_norm": 1.71875, "learning_rate": 1.1900840863842883e-05, "loss": 0.8932, "step": 7739 }, { "epoch": 1.3151509955633667, "grad_norm": 1.7421875, "learning_rate": 1.1899067830699378e-05, "loss": 0.8219, "step": 7740 }, { "epoch": 1.3153224596523565, "grad_norm": 1.6796875, "learning_rate": 1.1897294735620123e-05, "loss": 0.9269, "step": 7741 }, { "epoch": 1.3154939237413465, "grad_norm": 1.8046875, "learning_rate": 1.1895521578662939e-05, "loss": 0.9126, "step": 7742 }, { "epoch": 1.3156653878303364, "grad_norm": 1.796875, "learning_rate": 1.189374835988566e-05, "loss": 0.8141, "step": 7743 }, { "epoch": 1.3158368519193262, "grad_norm": 1.671875, "learning_rate": 1.1891975079346117e-05, "loss": 0.8801, "step": 7744 }, { "epoch": 1.316008316008316, "grad_norm": 1.8125, "learning_rate": 1.1890201737102141e-05, "loss": 0.9298, "step": 7745 }, { "epoch": 1.3161797800973059, "grad_norm": 1.8203125, "learning_rate": 1.188842833321157e-05, "loss": 0.9065, "step": 7746 }, { "epoch": 1.3163512441862957, "grad_norm": 1.7109375, "learning_rate": 1.1886654867732239e-05, "loss": 0.8665, "step": 7747 }, { "epoch": 1.3165227082752855, "grad_norm": 1.6875, "learning_rate": 1.1884881340721985e-05, "loss": 0.8517, "step": 7748 }, { "epoch": 1.3166941723642753, "grad_norm": 1.765625, "learning_rate": 1.1883107752238657e-05, "loss": 0.8489, "step": 7749 }, { "epoch": 1.3168656364532654, "grad_norm": 1.7421875, "learning_rate": 1.188133410234009e-05, "loss": 0.8792, "step": 7750 }, { "epoch": 1.3170371005422552, "grad_norm": 1.703125, "learning_rate": 1.1879560391084137e-05, "loss": 0.8721, "step": 7751 }, { "epoch": 1.317208564631245, "grad_norm": 1.6875, "learning_rate": 1.1877786618528642e-05, "loss": 0.874, "step": 7752 }, { "epoch": 1.3173800287202349, "grad_norm": 1.734375, "learning_rate": 1.1876012784731454e-05, "loss": 0.8498, "step": 7753 }, { "epoch": 1.317551492809225, "grad_norm": 1.6328125, "learning_rate": 1.1874238889750423e-05, "loss": 0.7978, "step": 7754 }, { "epoch": 1.3177229568982147, "grad_norm": 1.6875, "learning_rate": 1.1872464933643406e-05, "loss": 0.834, "step": 7755 }, { "epoch": 1.3178944209872046, "grad_norm": 1.6953125, "learning_rate": 1.1870690916468255e-05, "loss": 0.8322, "step": 7756 }, { "epoch": 1.3180658850761944, "grad_norm": 1.7265625, "learning_rate": 1.1868916838282829e-05, "loss": 0.9117, "step": 7757 }, { "epoch": 1.3182373491651842, "grad_norm": 1.6328125, "learning_rate": 1.1867142699144986e-05, "loss": 0.7738, "step": 7758 }, { "epoch": 1.318408813254174, "grad_norm": 1.6875, "learning_rate": 1.1865368499112591e-05, "loss": 0.8178, "step": 7759 }, { "epoch": 1.3185802773431639, "grad_norm": 1.6796875, "learning_rate": 1.1863594238243503e-05, "loss": 0.8666, "step": 7760 }, { "epoch": 1.3187517414321537, "grad_norm": 1.640625, "learning_rate": 1.1861819916595592e-05, "loss": 0.8319, "step": 7761 }, { "epoch": 1.3189232055211437, "grad_norm": 1.703125, "learning_rate": 1.1860045534226719e-05, "loss": 0.8273, "step": 7762 }, { "epoch": 1.3190946696101336, "grad_norm": 1.671875, "learning_rate": 1.1858271091194762e-05, "loss": 0.8214, "step": 7763 }, { "epoch": 1.3192661336991234, "grad_norm": 1.6796875, "learning_rate": 1.1856496587557582e-05, "loss": 0.8312, "step": 7764 }, { "epoch": 1.3194375977881132, "grad_norm": 1.65625, "learning_rate": 1.185472202337306e-05, "loss": 0.8638, "step": 7765 }, { "epoch": 1.3196090618771033, "grad_norm": 1.6484375, "learning_rate": 1.1852947398699065e-05, "loss": 0.8527, "step": 7766 }, { "epoch": 1.319780525966093, "grad_norm": 1.671875, "learning_rate": 1.1851172713593482e-05, "loss": 0.8765, "step": 7767 }, { "epoch": 1.319951990055083, "grad_norm": 6.34375, "learning_rate": 1.1849397968114183e-05, "loss": 0.9896, "step": 7768 }, { "epoch": 1.3201234541440727, "grad_norm": 1.6484375, "learning_rate": 1.1847623162319056e-05, "loss": 0.7787, "step": 7769 }, { "epoch": 1.3202949182330626, "grad_norm": 1.7890625, "learning_rate": 1.1845848296265976e-05, "loss": 0.8828, "step": 7770 }, { "epoch": 1.3204663823220524, "grad_norm": 1.65625, "learning_rate": 1.1844073370012835e-05, "loss": 0.8136, "step": 7771 }, { "epoch": 1.3206378464110422, "grad_norm": 1.7578125, "learning_rate": 1.1842298383617515e-05, "loss": 0.8852, "step": 7772 }, { "epoch": 1.320809310500032, "grad_norm": 1.7734375, "learning_rate": 1.1840523337137908e-05, "loss": 0.8697, "step": 7773 }, { "epoch": 1.320980774589022, "grad_norm": 1.7109375, "learning_rate": 1.1838748230631905e-05, "loss": 0.8943, "step": 7774 }, { "epoch": 1.321152238678012, "grad_norm": 1.6796875, "learning_rate": 1.1836973064157399e-05, "loss": 0.8774, "step": 7775 }, { "epoch": 1.3213237027670017, "grad_norm": 1.8125, "learning_rate": 1.1835197837772282e-05, "loss": 0.841, "step": 7776 }, { "epoch": 1.3214951668559916, "grad_norm": 1.6953125, "learning_rate": 1.1833422551534453e-05, "loss": 0.9008, "step": 7777 }, { "epoch": 1.3216666309449814, "grad_norm": 1.53125, "learning_rate": 1.183164720550181e-05, "loss": 0.8287, "step": 7778 }, { "epoch": 1.3218380950339714, "grad_norm": 1.7109375, "learning_rate": 1.1829871799732255e-05, "loss": 0.8227, "step": 7779 }, { "epoch": 1.3220095591229613, "grad_norm": 1.65625, "learning_rate": 1.1828096334283692e-05, "loss": 0.9078, "step": 7780 }, { "epoch": 1.322181023211951, "grad_norm": 1.625, "learning_rate": 1.1826320809214023e-05, "loss": 0.8535, "step": 7781 }, { "epoch": 1.322352487300941, "grad_norm": 1.71875, "learning_rate": 1.1824545224581155e-05, "loss": 0.8968, "step": 7782 }, { "epoch": 1.3225239513899307, "grad_norm": 1.703125, "learning_rate": 1.1822769580442995e-05, "loss": 0.8102, "step": 7783 }, { "epoch": 1.3226954154789206, "grad_norm": 1.7265625, "learning_rate": 1.1820993876857453e-05, "loss": 0.8553, "step": 7784 }, { "epoch": 1.3228668795679104, "grad_norm": 1.6640625, "learning_rate": 1.1819218113882448e-05, "loss": 0.8806, "step": 7785 }, { "epoch": 1.3230383436569004, "grad_norm": 1.609375, "learning_rate": 1.1817442291575887e-05, "loss": 0.7205, "step": 7786 }, { "epoch": 1.3232098077458903, "grad_norm": 1.625, "learning_rate": 1.181566640999569e-05, "loss": 0.9008, "step": 7787 }, { "epoch": 1.32338127183488, "grad_norm": 1.75, "learning_rate": 1.1813890469199774e-05, "loss": 0.8797, "step": 7788 }, { "epoch": 1.32355273592387, "grad_norm": 1.7109375, "learning_rate": 1.181211446924606e-05, "loss": 0.9086, "step": 7789 }, { "epoch": 1.3237242000128597, "grad_norm": 1.6015625, "learning_rate": 1.1810338410192468e-05, "loss": 0.8851, "step": 7790 }, { "epoch": 1.3238956641018498, "grad_norm": 1.703125, "learning_rate": 1.1808562292096923e-05, "loss": 0.8203, "step": 7791 }, { "epoch": 1.3240671281908396, "grad_norm": 1.7421875, "learning_rate": 1.1806786115017354e-05, "loss": 0.8045, "step": 7792 }, { "epoch": 1.3242385922798294, "grad_norm": 1.671875, "learning_rate": 1.1805009879011686e-05, "loss": 0.8182, "step": 7793 }, { "epoch": 1.3244100563688193, "grad_norm": 1.625, "learning_rate": 1.1803233584137847e-05, "loss": 0.9296, "step": 7794 }, { "epoch": 1.324581520457809, "grad_norm": 1.734375, "learning_rate": 1.1801457230453768e-05, "loss": 0.9271, "step": 7795 }, { "epoch": 1.324752984546799, "grad_norm": 1.765625, "learning_rate": 1.1799680818017387e-05, "loss": 0.8048, "step": 7796 }, { "epoch": 1.3249244486357887, "grad_norm": 1.703125, "learning_rate": 1.179790434688664e-05, "loss": 0.8658, "step": 7797 }, { "epoch": 1.3250959127247788, "grad_norm": 1.578125, "learning_rate": 1.179612781711946e-05, "loss": 0.8503, "step": 7798 }, { "epoch": 1.3252673768137686, "grad_norm": 1.6484375, "learning_rate": 1.179435122877379e-05, "loss": 0.8722, "step": 7799 }, { "epoch": 1.3254388409027584, "grad_norm": 1.5625, "learning_rate": 1.1792574581907567e-05, "loss": 0.7956, "step": 7800 }, { "epoch": 1.3256103049917483, "grad_norm": 1.6875, "learning_rate": 1.179079787657874e-05, "loss": 0.8823, "step": 7801 }, { "epoch": 1.325781769080738, "grad_norm": 1.75, "learning_rate": 1.1789021112845247e-05, "loss": 0.9627, "step": 7802 }, { "epoch": 1.3259532331697281, "grad_norm": 1.59375, "learning_rate": 1.178724429076504e-05, "loss": 0.8166, "step": 7803 }, { "epoch": 1.326124697258718, "grad_norm": 1.734375, "learning_rate": 1.1785467410396066e-05, "loss": 0.7876, "step": 7804 }, { "epoch": 1.3262961613477078, "grad_norm": 1.7109375, "learning_rate": 1.1783690471796276e-05, "loss": 0.8619, "step": 7805 }, { "epoch": 1.3264676254366976, "grad_norm": 1.6328125, "learning_rate": 1.1781913475023626e-05, "loss": 0.8014, "step": 7806 }, { "epoch": 1.3266390895256874, "grad_norm": 1.6015625, "learning_rate": 1.1780136420136063e-05, "loss": 0.8473, "step": 7807 }, { "epoch": 1.3268105536146773, "grad_norm": 1.6484375, "learning_rate": 1.1778359307191552e-05, "loss": 0.8229, "step": 7808 }, { "epoch": 1.326982017703667, "grad_norm": 1.8203125, "learning_rate": 1.1776582136248047e-05, "loss": 0.8526, "step": 7809 }, { "epoch": 1.3271534817926571, "grad_norm": 1.640625, "learning_rate": 1.1774804907363508e-05, "loss": 0.8695, "step": 7810 }, { "epoch": 1.327324945881647, "grad_norm": 1.7109375, "learning_rate": 1.1773027620595897e-05, "loss": 0.8008, "step": 7811 }, { "epoch": 1.3274964099706368, "grad_norm": 1.7421875, "learning_rate": 1.1771250276003179e-05, "loss": 0.9204, "step": 7812 }, { "epoch": 1.3276678740596266, "grad_norm": 1.7109375, "learning_rate": 1.1769472873643317e-05, "loss": 0.8668, "step": 7813 }, { "epoch": 1.3278393381486164, "grad_norm": 1.6953125, "learning_rate": 1.1767695413574284e-05, "loss": 0.8277, "step": 7814 }, { "epoch": 1.3280108022376065, "grad_norm": 1.65625, "learning_rate": 1.1765917895854046e-05, "loss": 0.7791, "step": 7815 }, { "epoch": 1.3281822663265963, "grad_norm": 1.7109375, "learning_rate": 1.1764140320540578e-05, "loss": 0.8521, "step": 7816 }, { "epoch": 1.3283537304155861, "grad_norm": 1.734375, "learning_rate": 1.1762362687691849e-05, "loss": 0.9123, "step": 7817 }, { "epoch": 1.328525194504576, "grad_norm": 1.546875, "learning_rate": 1.1760584997365835e-05, "loss": 0.8249, "step": 7818 }, { "epoch": 1.3286966585935658, "grad_norm": 1.6796875, "learning_rate": 1.1758807249620517e-05, "loss": 0.7786, "step": 7819 }, { "epoch": 1.3288681226825556, "grad_norm": 1.640625, "learning_rate": 1.175702944451387e-05, "loss": 0.8714, "step": 7820 }, { "epoch": 1.3290395867715454, "grad_norm": 1.640625, "learning_rate": 1.1755251582103879e-05, "loss": 0.8321, "step": 7821 }, { "epoch": 1.3292110508605353, "grad_norm": 1.6484375, "learning_rate": 1.1753473662448523e-05, "loss": 0.8579, "step": 7822 }, { "epoch": 1.3293825149495253, "grad_norm": 1.6640625, "learning_rate": 1.1751695685605784e-05, "loss": 0.8547, "step": 7823 }, { "epoch": 1.3295539790385151, "grad_norm": 1.7265625, "learning_rate": 1.1749917651633655e-05, "loss": 0.8092, "step": 7824 }, { "epoch": 1.329725443127505, "grad_norm": 1.6953125, "learning_rate": 1.1748139560590123e-05, "loss": 0.8526, "step": 7825 }, { "epoch": 1.3298969072164948, "grad_norm": 1.71875, "learning_rate": 1.1746361412533175e-05, "loss": 0.841, "step": 7826 }, { "epoch": 1.3300683713054848, "grad_norm": 1.734375, "learning_rate": 1.174458320752081e-05, "loss": 0.8476, "step": 7827 }, { "epoch": 1.3302398353944747, "grad_norm": 1.6484375, "learning_rate": 1.1742804945611014e-05, "loss": 0.932, "step": 7828 }, { "epoch": 1.3304112994834645, "grad_norm": 1.5703125, "learning_rate": 1.1741026626861784e-05, "loss": 0.786, "step": 7829 }, { "epoch": 1.3305827635724543, "grad_norm": 1.6875, "learning_rate": 1.1739248251331124e-05, "loss": 0.9036, "step": 7830 }, { "epoch": 1.3307542276614441, "grad_norm": 1.7421875, "learning_rate": 1.1737469819077026e-05, "loss": 0.7656, "step": 7831 }, { "epoch": 1.330925691750434, "grad_norm": 1.7265625, "learning_rate": 1.1735691330157492e-05, "loss": 0.9576, "step": 7832 }, { "epoch": 1.3310971558394238, "grad_norm": 1.5703125, "learning_rate": 1.1733912784630532e-05, "loss": 0.7358, "step": 7833 }, { "epoch": 1.3312686199284136, "grad_norm": 1.703125, "learning_rate": 1.1732134182554144e-05, "loss": 0.8518, "step": 7834 }, { "epoch": 1.3314400840174037, "grad_norm": 1.734375, "learning_rate": 1.173035552398634e-05, "loss": 0.8398, "step": 7835 }, { "epoch": 1.3316115481063935, "grad_norm": 1.6484375, "learning_rate": 1.1728576808985126e-05, "loss": 0.8388, "step": 7836 }, { "epoch": 1.3317830121953833, "grad_norm": 1.7265625, "learning_rate": 1.1726798037608514e-05, "loss": 0.9044, "step": 7837 }, { "epoch": 1.3319544762843731, "grad_norm": 1.7421875, "learning_rate": 1.1725019209914514e-05, "loss": 0.9309, "step": 7838 }, { "epoch": 1.3321259403733632, "grad_norm": 1.65625, "learning_rate": 1.1723240325961142e-05, "loss": 0.8207, "step": 7839 }, { "epoch": 1.332297404462353, "grad_norm": 1.671875, "learning_rate": 1.1721461385806414e-05, "loss": 0.846, "step": 7840 }, { "epoch": 1.3324688685513428, "grad_norm": 1.6875, "learning_rate": 1.1719682389508346e-05, "loss": 0.8934, "step": 7841 }, { "epoch": 1.3326403326403327, "grad_norm": 1.7578125, "learning_rate": 1.171790333712496e-05, "loss": 0.8431, "step": 7842 }, { "epoch": 1.3328117967293225, "grad_norm": 1.7421875, "learning_rate": 1.1716124228714278e-05, "loss": 0.8962, "step": 7843 }, { "epoch": 1.3329832608183123, "grad_norm": 1.71875, "learning_rate": 1.1714345064334325e-05, "loss": 0.923, "step": 7844 }, { "epoch": 1.3331547249073021, "grad_norm": 1.734375, "learning_rate": 1.171256584404312e-05, "loss": 0.8543, "step": 7845 }, { "epoch": 1.333326188996292, "grad_norm": 1.6328125, "learning_rate": 1.1710786567898696e-05, "loss": 0.8548, "step": 7846 }, { "epoch": 1.333497653085282, "grad_norm": 1.796875, "learning_rate": 1.1709007235959078e-05, "loss": 0.8992, "step": 7847 }, { "epoch": 1.3336691171742718, "grad_norm": 1.6796875, "learning_rate": 1.1707227848282301e-05, "loss": 0.8639, "step": 7848 }, { "epoch": 1.3338405812632617, "grad_norm": 1.75, "learning_rate": 1.1705448404926392e-05, "loss": 0.8901, "step": 7849 }, { "epoch": 1.3340120453522515, "grad_norm": 1.625, "learning_rate": 1.1703668905949393e-05, "loss": 0.8759, "step": 7850 }, { "epoch": 1.3341835094412415, "grad_norm": 1.65625, "learning_rate": 1.170188935140933e-05, "loss": 0.8358, "step": 7851 }, { "epoch": 1.3343549735302314, "grad_norm": 1.7578125, "learning_rate": 1.170010974136425e-05, "loss": 0.8509, "step": 7852 }, { "epoch": 1.3345264376192212, "grad_norm": 1.7265625, "learning_rate": 1.1698330075872188e-05, "loss": 0.8816, "step": 7853 }, { "epoch": 1.334697901708211, "grad_norm": 1.625, "learning_rate": 1.1696550354991187e-05, "loss": 0.8541, "step": 7854 }, { "epoch": 1.3348693657972008, "grad_norm": 1.7578125, "learning_rate": 1.1694770578779293e-05, "loss": 0.887, "step": 7855 }, { "epoch": 1.3350408298861907, "grad_norm": 1.703125, "learning_rate": 1.1692990747294546e-05, "loss": 0.8772, "step": 7856 }, { "epoch": 1.3352122939751805, "grad_norm": 1.578125, "learning_rate": 1.1691210860594997e-05, "loss": 0.8723, "step": 7857 }, { "epoch": 1.3353837580641703, "grad_norm": 1.6875, "learning_rate": 1.1689430918738691e-05, "loss": 0.901, "step": 7858 }, { "epoch": 1.3355552221531604, "grad_norm": 1.640625, "learning_rate": 1.1687650921783683e-05, "loss": 0.8865, "step": 7859 }, { "epoch": 1.3357266862421502, "grad_norm": 1.6796875, "learning_rate": 1.168587086978802e-05, "loss": 0.8233, "step": 7860 }, { "epoch": 1.33589815033114, "grad_norm": 1.703125, "learning_rate": 1.1684090762809762e-05, "loss": 0.9395, "step": 7861 }, { "epoch": 1.3360696144201298, "grad_norm": 1.640625, "learning_rate": 1.1682310600906962e-05, "loss": 0.8309, "step": 7862 }, { "epoch": 1.3362410785091199, "grad_norm": 1.65625, "learning_rate": 1.1680530384137681e-05, "loss": 0.9013, "step": 7863 }, { "epoch": 1.3364125425981097, "grad_norm": 1.6640625, "learning_rate": 1.1678750112559971e-05, "loss": 0.8431, "step": 7864 }, { "epoch": 1.3365840066870995, "grad_norm": 1.78125, "learning_rate": 1.1676969786231903e-05, "loss": 0.9223, "step": 7865 }, { "epoch": 1.3367554707760894, "grad_norm": 1.65625, "learning_rate": 1.167518940521153e-05, "loss": 0.7831, "step": 7866 }, { "epoch": 1.3369269348650792, "grad_norm": 1.84375, "learning_rate": 1.167340896955693e-05, "loss": 0.8798, "step": 7867 }, { "epoch": 1.337098398954069, "grad_norm": 1.7109375, "learning_rate": 1.1671628479326157e-05, "loss": 0.8683, "step": 7868 }, { "epoch": 1.3372698630430588, "grad_norm": 1.6875, "learning_rate": 1.1669847934577285e-05, "loss": 0.8294, "step": 7869 }, { "epoch": 1.3374413271320487, "grad_norm": 1.765625, "learning_rate": 1.1668067335368383e-05, "loss": 0.8769, "step": 7870 }, { "epoch": 1.3376127912210387, "grad_norm": 1.6484375, "learning_rate": 1.1666286681757524e-05, "loss": 0.8865, "step": 7871 }, { "epoch": 1.3377842553100285, "grad_norm": 1.6796875, "learning_rate": 1.1664505973802782e-05, "loss": 0.8907, "step": 7872 }, { "epoch": 1.3379557193990184, "grad_norm": 1.65625, "learning_rate": 1.1662725211562233e-05, "loss": 0.8434, "step": 7873 }, { "epoch": 1.3381271834880082, "grad_norm": 1.75, "learning_rate": 1.1660944395093954e-05, "loss": 0.8884, "step": 7874 }, { "epoch": 1.338298647576998, "grad_norm": 1.6328125, "learning_rate": 1.1659163524456023e-05, "loss": 0.8571, "step": 7875 }, { "epoch": 1.338470111665988, "grad_norm": 1.71875, "learning_rate": 1.1657382599706522e-05, "loss": 0.8605, "step": 7876 }, { "epoch": 1.3386415757549779, "grad_norm": 1.671875, "learning_rate": 1.1655601620903531e-05, "loss": 0.9084, "step": 7877 }, { "epoch": 1.3388130398439677, "grad_norm": 1.6953125, "learning_rate": 1.165382058810514e-05, "loss": 0.8482, "step": 7878 }, { "epoch": 1.3389845039329575, "grad_norm": 1.7421875, "learning_rate": 1.165203950136943e-05, "loss": 0.8411, "step": 7879 }, { "epoch": 1.3391559680219474, "grad_norm": 1.6953125, "learning_rate": 1.1650258360754491e-05, "loss": 0.8961, "step": 7880 }, { "epoch": 1.3393274321109372, "grad_norm": 1.75, "learning_rate": 1.1648477166318413e-05, "loss": 0.8305, "step": 7881 }, { "epoch": 1.339498896199927, "grad_norm": 1.6640625, "learning_rate": 1.1646695918119289e-05, "loss": 0.8248, "step": 7882 }, { "epoch": 1.339670360288917, "grad_norm": 1.765625, "learning_rate": 1.1644914616215206e-05, "loss": 0.9079, "step": 7883 }, { "epoch": 1.3398418243779069, "grad_norm": 1.7109375, "learning_rate": 1.1643133260664268e-05, "loss": 0.9124, "step": 7884 }, { "epoch": 1.3400132884668967, "grad_norm": 1.7265625, "learning_rate": 1.1641351851524564e-05, "loss": 0.8732, "step": 7885 }, { "epoch": 1.3401847525558865, "grad_norm": 1.796875, "learning_rate": 1.1639570388854196e-05, "loss": 0.8122, "step": 7886 }, { "epoch": 1.3403562166448764, "grad_norm": 1.7734375, "learning_rate": 1.1637788872711262e-05, "loss": 0.8699, "step": 7887 }, { "epoch": 1.3405276807338664, "grad_norm": 1.75, "learning_rate": 1.1636007303153867e-05, "loss": 0.9392, "step": 7888 }, { "epoch": 1.3406991448228562, "grad_norm": 1.7265625, "learning_rate": 1.163422568024011e-05, "loss": 0.9003, "step": 7889 }, { "epoch": 1.340870608911846, "grad_norm": 1.6875, "learning_rate": 1.1632444004028105e-05, "loss": 0.88, "step": 7890 }, { "epoch": 1.3410420730008359, "grad_norm": 1.75, "learning_rate": 1.1630662274575952e-05, "loss": 0.8422, "step": 7891 }, { "epoch": 1.3412135370898257, "grad_norm": 1.7421875, "learning_rate": 1.162888049194176e-05, "loss": 0.9335, "step": 7892 }, { "epoch": 1.3413850011788155, "grad_norm": 1.703125, "learning_rate": 1.1627098656183645e-05, "loss": 0.8748, "step": 7893 }, { "epoch": 1.3415564652678054, "grad_norm": 1.578125, "learning_rate": 1.1625316767359711e-05, "loss": 0.7939, "step": 7894 }, { "epoch": 1.3417279293567954, "grad_norm": 1.7578125, "learning_rate": 1.162353482552808e-05, "loss": 0.8575, "step": 7895 }, { "epoch": 1.3418993934457852, "grad_norm": 1.703125, "learning_rate": 1.1621752830746863e-05, "loss": 0.8959, "step": 7896 }, { "epoch": 1.342070857534775, "grad_norm": 1.6328125, "learning_rate": 1.1619970783074179e-05, "loss": 0.8454, "step": 7897 }, { "epoch": 1.342242321623765, "grad_norm": 1.7109375, "learning_rate": 1.1618188682568148e-05, "loss": 0.9552, "step": 7898 }, { "epoch": 1.3424137857127547, "grad_norm": 1.71875, "learning_rate": 1.1616406529286891e-05, "loss": 0.858, "step": 7899 }, { "epoch": 1.3425852498017448, "grad_norm": 1.625, "learning_rate": 1.161462432328853e-05, "loss": 0.9018, "step": 7900 }, { "epoch": 1.3427567138907346, "grad_norm": 1.7109375, "learning_rate": 1.161284206463119e-05, "loss": 0.8469, "step": 7901 }, { "epoch": 1.3429281779797244, "grad_norm": 1.6015625, "learning_rate": 1.1611059753373e-05, "loss": 0.8626, "step": 7902 }, { "epoch": 1.3430996420687142, "grad_norm": 1.6953125, "learning_rate": 1.1609277389572082e-05, "loss": 0.8517, "step": 7903 }, { "epoch": 1.343271106157704, "grad_norm": 1.828125, "learning_rate": 1.1607494973286566e-05, "loss": 0.888, "step": 7904 }, { "epoch": 1.343442570246694, "grad_norm": 1.7578125, "learning_rate": 1.1605712504574588e-05, "loss": 0.993, "step": 7905 }, { "epoch": 1.3436140343356837, "grad_norm": 1.8203125, "learning_rate": 1.160392998349428e-05, "loss": 0.9164, "step": 7906 }, { "epoch": 1.3437854984246738, "grad_norm": 1.6640625, "learning_rate": 1.1602147410103772e-05, "loss": 0.7978, "step": 7907 }, { "epoch": 1.3439569625136636, "grad_norm": 1.6171875, "learning_rate": 1.1600364784461207e-05, "loss": 0.8315, "step": 7908 }, { "epoch": 1.3441284266026534, "grad_norm": 1.6328125, "learning_rate": 1.1598582106624716e-05, "loss": 0.8325, "step": 7909 }, { "epoch": 1.3442998906916432, "grad_norm": 1.6796875, "learning_rate": 1.1596799376652446e-05, "loss": 0.8375, "step": 7910 }, { "epoch": 1.344471354780633, "grad_norm": 1.8125, "learning_rate": 1.1595016594602535e-05, "loss": 0.8738, "step": 7911 }, { "epoch": 1.3446428188696231, "grad_norm": 1.703125, "learning_rate": 1.1593233760533127e-05, "loss": 0.8572, "step": 7912 }, { "epoch": 1.344814282958613, "grad_norm": 1.671875, "learning_rate": 1.1591450874502365e-05, "loss": 0.8272, "step": 7913 }, { "epoch": 1.3449857470476028, "grad_norm": 1.6796875, "learning_rate": 1.1589667936568399e-05, "loss": 0.8233, "step": 7914 }, { "epoch": 1.3451572111365926, "grad_norm": 1.7421875, "learning_rate": 1.1587884946789374e-05, "loss": 0.8449, "step": 7915 }, { "epoch": 1.3453286752255824, "grad_norm": 1.6171875, "learning_rate": 1.1586101905223442e-05, "loss": 0.7674, "step": 7916 }, { "epoch": 1.3455001393145722, "grad_norm": 1.7109375, "learning_rate": 1.1584318811928752e-05, "loss": 0.8283, "step": 7917 }, { "epoch": 1.345671603403562, "grad_norm": 1.7421875, "learning_rate": 1.1582535666963462e-05, "loss": 0.8261, "step": 7918 }, { "epoch": 1.345843067492552, "grad_norm": 1.625, "learning_rate": 1.1580752470385725e-05, "loss": 0.9092, "step": 7919 }, { "epoch": 1.346014531581542, "grad_norm": 1.7734375, "learning_rate": 1.1578969222253696e-05, "loss": 0.8821, "step": 7920 }, { "epoch": 1.3461859956705318, "grad_norm": 1.6953125, "learning_rate": 1.1577185922625536e-05, "loss": 0.8901, "step": 7921 }, { "epoch": 1.3463574597595216, "grad_norm": 1.6640625, "learning_rate": 1.1575402571559403e-05, "loss": 0.8937, "step": 7922 }, { "epoch": 1.3465289238485114, "grad_norm": 1.8046875, "learning_rate": 1.157361916911346e-05, "loss": 0.8723, "step": 7923 }, { "epoch": 1.3467003879375015, "grad_norm": 1.6796875, "learning_rate": 1.1571835715345869e-05, "loss": 0.885, "step": 7924 }, { "epoch": 1.3468718520264913, "grad_norm": 1.6640625, "learning_rate": 1.1570052210314798e-05, "loss": 0.8569, "step": 7925 }, { "epoch": 1.3470433161154811, "grad_norm": 1.6953125, "learning_rate": 1.1568268654078411e-05, "loss": 0.9641, "step": 7926 }, { "epoch": 1.347214780204471, "grad_norm": 1.65625, "learning_rate": 1.1566485046694877e-05, "loss": 0.8032, "step": 7927 }, { "epoch": 1.3473862442934608, "grad_norm": 1.5859375, "learning_rate": 1.156470138822237e-05, "loss": 0.8226, "step": 7928 }, { "epoch": 1.3475577083824506, "grad_norm": 1.640625, "learning_rate": 1.1562917678719056e-05, "loss": 0.8646, "step": 7929 }, { "epoch": 1.3477291724714404, "grad_norm": 1.7109375, "learning_rate": 1.1561133918243113e-05, "loss": 0.8501, "step": 7930 }, { "epoch": 1.3479006365604302, "grad_norm": 1.6640625, "learning_rate": 1.1559350106852713e-05, "loss": 0.735, "step": 7931 }, { "epoch": 1.3480721006494203, "grad_norm": 1.640625, "learning_rate": 1.1557566244606035e-05, "loss": 0.9369, "step": 7932 }, { "epoch": 1.3482435647384101, "grad_norm": 1.765625, "learning_rate": 1.1555782331561253e-05, "loss": 0.9249, "step": 7933 }, { "epoch": 1.3484150288274, "grad_norm": 1.671875, "learning_rate": 1.1553998367776555e-05, "loss": 0.9296, "step": 7934 }, { "epoch": 1.3485864929163898, "grad_norm": 1.65625, "learning_rate": 1.1552214353310117e-05, "loss": 0.8557, "step": 7935 }, { "epoch": 1.3487579570053798, "grad_norm": 1.6015625, "learning_rate": 1.1550430288220122e-05, "loss": 0.8573, "step": 7936 }, { "epoch": 1.3489294210943696, "grad_norm": 1.734375, "learning_rate": 1.154864617256476e-05, "loss": 0.832, "step": 7937 }, { "epoch": 1.3491008851833595, "grad_norm": 1.703125, "learning_rate": 1.1546862006402211e-05, "loss": 0.8838, "step": 7938 }, { "epoch": 1.3492723492723493, "grad_norm": 1.65625, "learning_rate": 1.1545077789790668e-05, "loss": 0.8245, "step": 7939 }, { "epoch": 1.3494438133613391, "grad_norm": 1.7421875, "learning_rate": 1.1543293522788321e-05, "loss": 0.8864, "step": 7940 }, { "epoch": 1.349615277450329, "grad_norm": 1.703125, "learning_rate": 1.1541509205453363e-05, "loss": 0.9193, "step": 7941 }, { "epoch": 1.3497867415393188, "grad_norm": 1.75, "learning_rate": 1.1539724837843984e-05, "loss": 0.8673, "step": 7942 }, { "epoch": 1.3499582056283086, "grad_norm": 1.6875, "learning_rate": 1.1537940420018378e-05, "loss": 0.8974, "step": 7943 }, { "epoch": 1.3501296697172986, "grad_norm": 1.6640625, "learning_rate": 1.1536155952034743e-05, "loss": 0.8876, "step": 7944 }, { "epoch": 1.3503011338062885, "grad_norm": 1.671875, "learning_rate": 1.1534371433951279e-05, "loss": 0.8667, "step": 7945 }, { "epoch": 1.3504725978952783, "grad_norm": 1.7578125, "learning_rate": 1.1532586865826185e-05, "loss": 0.9604, "step": 7946 }, { "epoch": 1.3506440619842681, "grad_norm": 1.546875, "learning_rate": 1.153080224771766e-05, "loss": 0.7985, "step": 7947 }, { "epoch": 1.3508155260732582, "grad_norm": 1.671875, "learning_rate": 1.1529017579683915e-05, "loss": 0.8312, "step": 7948 }, { "epoch": 1.350986990162248, "grad_norm": 1.6875, "learning_rate": 1.1527232861783145e-05, "loss": 0.9297, "step": 7949 }, { "epoch": 1.3511584542512378, "grad_norm": 1.7265625, "learning_rate": 1.1525448094073559e-05, "loss": 0.8972, "step": 7950 }, { "epoch": 1.3513299183402276, "grad_norm": 1.7109375, "learning_rate": 1.1523663276613368e-05, "loss": 0.8746, "step": 7951 }, { "epoch": 1.3515013824292175, "grad_norm": 1.65625, "learning_rate": 1.152187840946078e-05, "loss": 0.8506, "step": 7952 }, { "epoch": 1.3516728465182073, "grad_norm": 1.671875, "learning_rate": 1.1520093492674004e-05, "loss": 0.9189, "step": 7953 }, { "epoch": 1.3518443106071971, "grad_norm": 1.71875, "learning_rate": 1.1518308526311257e-05, "loss": 0.829, "step": 7954 }, { "epoch": 1.352015774696187, "grad_norm": 1.6875, "learning_rate": 1.1516523510430753e-05, "loss": 0.8295, "step": 7955 }, { "epoch": 1.352187238785177, "grad_norm": 1.6796875, "learning_rate": 1.1514738445090705e-05, "loss": 0.8435, "step": 7956 }, { "epoch": 1.3523587028741668, "grad_norm": 1.6015625, "learning_rate": 1.1512953330349332e-05, "loss": 0.8204, "step": 7957 }, { "epoch": 1.3525301669631566, "grad_norm": 1.6953125, "learning_rate": 1.1511168166264854e-05, "loss": 0.8189, "step": 7958 }, { "epoch": 1.3527016310521465, "grad_norm": 1.6640625, "learning_rate": 1.1509382952895494e-05, "loss": 0.794, "step": 7959 }, { "epoch": 1.3528730951411365, "grad_norm": 1.6328125, "learning_rate": 1.150759769029947e-05, "loss": 0.7831, "step": 7960 }, { "epoch": 1.3530445592301263, "grad_norm": 1.734375, "learning_rate": 1.1505812378535008e-05, "loss": 0.8546, "step": 7961 }, { "epoch": 1.3532160233191162, "grad_norm": 1.671875, "learning_rate": 1.1504027017660333e-05, "loss": 0.9, "step": 7962 }, { "epoch": 1.353387487408106, "grad_norm": 1.5703125, "learning_rate": 1.1502241607733675e-05, "loss": 0.8458, "step": 7963 }, { "epoch": 1.3535589514970958, "grad_norm": 1.75, "learning_rate": 1.1500456148813258e-05, "loss": 0.8764, "step": 7964 }, { "epoch": 1.3537304155860856, "grad_norm": 1.71875, "learning_rate": 1.1498670640957319e-05, "loss": 0.8472, "step": 7965 }, { "epoch": 1.3539018796750755, "grad_norm": 1.6640625, "learning_rate": 1.1496885084224088e-05, "loss": 0.7855, "step": 7966 }, { "epoch": 1.3540733437640653, "grad_norm": 1.765625, "learning_rate": 1.1495099478671797e-05, "loss": 0.962, "step": 7967 }, { "epoch": 1.3542448078530553, "grad_norm": 1.625, "learning_rate": 1.1493313824358678e-05, "loss": 0.8217, "step": 7968 }, { "epoch": 1.3544162719420452, "grad_norm": 1.6328125, "learning_rate": 1.1491528121342977e-05, "loss": 0.846, "step": 7969 }, { "epoch": 1.354587736031035, "grad_norm": 1.6640625, "learning_rate": 1.1489742369682923e-05, "loss": 0.8412, "step": 7970 }, { "epoch": 1.3547592001200248, "grad_norm": 1.671875, "learning_rate": 1.1487956569436766e-05, "loss": 0.9254, "step": 7971 }, { "epoch": 1.3549306642090146, "grad_norm": 1.7734375, "learning_rate": 1.1486170720662736e-05, "loss": 0.8846, "step": 7972 }, { "epoch": 1.3551021282980047, "grad_norm": 1.9296875, "learning_rate": 1.1484384823419086e-05, "loss": 0.8607, "step": 7973 }, { "epoch": 1.3552735923869945, "grad_norm": 1.78125, "learning_rate": 1.1482598877764055e-05, "loss": 0.8201, "step": 7974 }, { "epoch": 1.3554450564759843, "grad_norm": 1.6640625, "learning_rate": 1.1480812883755894e-05, "loss": 0.8109, "step": 7975 }, { "epoch": 1.3556165205649742, "grad_norm": 1.640625, "learning_rate": 1.1479026841452848e-05, "loss": 0.9103, "step": 7976 }, { "epoch": 1.355787984653964, "grad_norm": 1.7109375, "learning_rate": 1.1477240750913168e-05, "loss": 0.8906, "step": 7977 }, { "epoch": 1.3559594487429538, "grad_norm": 1.734375, "learning_rate": 1.1475454612195102e-05, "loss": 0.8033, "step": 7978 }, { "epoch": 1.3561309128319436, "grad_norm": 1.6953125, "learning_rate": 1.1473668425356908e-05, "loss": 0.8839, "step": 7979 }, { "epoch": 1.3563023769209337, "grad_norm": 1.5625, "learning_rate": 1.1471882190456836e-05, "loss": 0.7843, "step": 7980 }, { "epoch": 1.3564738410099235, "grad_norm": 1.7109375, "learning_rate": 1.1470095907553143e-05, "loss": 0.8963, "step": 7981 }, { "epoch": 1.3566453050989133, "grad_norm": 1.734375, "learning_rate": 1.1468309576704084e-05, "loss": 0.9226, "step": 7982 }, { "epoch": 1.3568167691879032, "grad_norm": 1.6484375, "learning_rate": 1.1466523197967923e-05, "loss": 0.8045, "step": 7983 }, { "epoch": 1.356988233276893, "grad_norm": 1.6875, "learning_rate": 1.1464736771402918e-05, "loss": 0.8812, "step": 7984 }, { "epoch": 1.357159697365883, "grad_norm": 1.6875, "learning_rate": 1.1462950297067333e-05, "loss": 0.8684, "step": 7985 }, { "epoch": 1.3573311614548729, "grad_norm": 1.7890625, "learning_rate": 1.1461163775019426e-05, "loss": 0.9137, "step": 7986 }, { "epoch": 1.3575026255438627, "grad_norm": 1.65625, "learning_rate": 1.1459377205317467e-05, "loss": 0.8363, "step": 7987 }, { "epoch": 1.3576740896328525, "grad_norm": 1.703125, "learning_rate": 1.1457590588019726e-05, "loss": 0.8861, "step": 7988 }, { "epoch": 1.3578455537218423, "grad_norm": 1.6015625, "learning_rate": 1.1455803923184465e-05, "loss": 0.8179, "step": 7989 }, { "epoch": 1.3580170178108322, "grad_norm": 1.671875, "learning_rate": 1.1454017210869954e-05, "loss": 0.8174, "step": 7990 }, { "epoch": 1.358188481899822, "grad_norm": 1.671875, "learning_rate": 1.1452230451134465e-05, "loss": 0.8978, "step": 7991 }, { "epoch": 1.358359945988812, "grad_norm": 1.640625, "learning_rate": 1.1450443644036276e-05, "loss": 0.8766, "step": 7992 }, { "epoch": 1.3585314100778019, "grad_norm": 1.734375, "learning_rate": 1.1448656789633657e-05, "loss": 0.9489, "step": 7993 }, { "epoch": 1.3587028741667917, "grad_norm": 1.71875, "learning_rate": 1.1446869887984887e-05, "loss": 0.9773, "step": 7994 }, { "epoch": 1.3588743382557815, "grad_norm": 1.734375, "learning_rate": 1.144508293914824e-05, "loss": 0.9206, "step": 7995 }, { "epoch": 1.3590458023447713, "grad_norm": 1.859375, "learning_rate": 1.1443295943181996e-05, "loss": 0.8688, "step": 7996 }, { "epoch": 1.3592172664337614, "grad_norm": 1.65625, "learning_rate": 1.1441508900144436e-05, "loss": 0.8146, "step": 7997 }, { "epoch": 1.3593887305227512, "grad_norm": 1.6953125, "learning_rate": 1.1439721810093843e-05, "loss": 0.878, "step": 7998 }, { "epoch": 1.359560194611741, "grad_norm": 1.7578125, "learning_rate": 1.14379346730885e-05, "loss": 0.8769, "step": 7999 }, { "epoch": 1.3597316587007309, "grad_norm": 1.7421875, "learning_rate": 1.1436147489186693e-05, "loss": 0.8759, "step": 8000 }, { "epoch": 1.3599031227897207, "grad_norm": 1.640625, "learning_rate": 1.1434360258446705e-05, "loss": 0.8362, "step": 8001 }, { "epoch": 1.3600745868787105, "grad_norm": 1.7265625, "learning_rate": 1.1432572980926833e-05, "loss": 0.8737, "step": 8002 }, { "epoch": 1.3602460509677003, "grad_norm": 1.6171875, "learning_rate": 1.1430785656685358e-05, "loss": 0.8076, "step": 8003 }, { "epoch": 1.3604175150566904, "grad_norm": 1.609375, "learning_rate": 1.1428998285780576e-05, "loss": 0.8479, "step": 8004 }, { "epoch": 1.3605889791456802, "grad_norm": 1.6796875, "learning_rate": 1.142721086827078e-05, "loss": 0.8055, "step": 8005 }, { "epoch": 1.36076044323467, "grad_norm": 1.7421875, "learning_rate": 1.1425423404214263e-05, "loss": 0.847, "step": 8006 }, { "epoch": 1.3609319073236599, "grad_norm": 1.765625, "learning_rate": 1.142363589366932e-05, "loss": 0.8638, "step": 8007 }, { "epoch": 1.3611033714126497, "grad_norm": 1.6640625, "learning_rate": 1.142184833669425e-05, "loss": 0.859, "step": 8008 }, { "epoch": 1.3612748355016397, "grad_norm": 1.703125, "learning_rate": 1.1420060733347353e-05, "loss": 0.7812, "step": 8009 }, { "epoch": 1.3614462995906296, "grad_norm": 1.8359375, "learning_rate": 1.1418273083686926e-05, "loss": 0.8876, "step": 8010 }, { "epoch": 1.3616177636796194, "grad_norm": 1.71875, "learning_rate": 1.1416485387771274e-05, "loss": 0.8667, "step": 8011 }, { "epoch": 1.3617892277686092, "grad_norm": 1.6484375, "learning_rate": 1.1414697645658699e-05, "loss": 0.8258, "step": 8012 }, { "epoch": 1.361960691857599, "grad_norm": 1.7578125, "learning_rate": 1.1412909857407505e-05, "loss": 0.8949, "step": 8013 }, { "epoch": 1.3621321559465889, "grad_norm": 1.6640625, "learning_rate": 1.1411122023076002e-05, "loss": 0.8998, "step": 8014 }, { "epoch": 1.3623036200355787, "grad_norm": 1.59375, "learning_rate": 1.1409334142722494e-05, "loss": 0.8753, "step": 8015 }, { "epoch": 1.3624750841245685, "grad_norm": 1.671875, "learning_rate": 1.1407546216405296e-05, "loss": 0.8763, "step": 8016 }, { "epoch": 1.3626465482135586, "grad_norm": 1.8671875, "learning_rate": 1.1405758244182716e-05, "loss": 0.9637, "step": 8017 }, { "epoch": 1.3628180123025484, "grad_norm": 1.7734375, "learning_rate": 1.1403970226113064e-05, "loss": 0.9156, "step": 8018 }, { "epoch": 1.3629894763915382, "grad_norm": 1.8125, "learning_rate": 1.1402182162254653e-05, "loss": 0.8782, "step": 8019 }, { "epoch": 1.363160940480528, "grad_norm": 1.609375, "learning_rate": 1.1400394052665804e-05, "loss": 0.7742, "step": 8020 }, { "epoch": 1.363332404569518, "grad_norm": 1.765625, "learning_rate": 1.1398605897404833e-05, "loss": 0.9132, "step": 8021 }, { "epoch": 1.363503868658508, "grad_norm": 1.703125, "learning_rate": 1.1396817696530055e-05, "loss": 0.8553, "step": 8022 }, { "epoch": 1.3636753327474977, "grad_norm": 1.640625, "learning_rate": 1.1395029450099794e-05, "loss": 0.8345, "step": 8023 }, { "epoch": 1.3638467968364876, "grad_norm": 1.6953125, "learning_rate": 1.139324115817237e-05, "loss": 0.8422, "step": 8024 }, { "epoch": 1.3640182609254774, "grad_norm": 1.7421875, "learning_rate": 1.1391452820806103e-05, "loss": 0.9355, "step": 8025 }, { "epoch": 1.3641897250144672, "grad_norm": 1.7734375, "learning_rate": 1.138966443805932e-05, "loss": 0.9624, "step": 8026 }, { "epoch": 1.364361189103457, "grad_norm": 1.7109375, "learning_rate": 1.1387876009990348e-05, "loss": 0.8817, "step": 8027 }, { "epoch": 1.3645326531924469, "grad_norm": 1.7109375, "learning_rate": 1.1386087536657513e-05, "loss": 0.7908, "step": 8028 }, { "epoch": 1.364704117281437, "grad_norm": 1.703125, "learning_rate": 1.1384299018119142e-05, "loss": 0.862, "step": 8029 }, { "epoch": 1.3648755813704267, "grad_norm": 1.703125, "learning_rate": 1.1382510454433568e-05, "loss": 0.8278, "step": 8030 }, { "epoch": 1.3650470454594166, "grad_norm": 1.734375, "learning_rate": 1.1380721845659123e-05, "loss": 0.8649, "step": 8031 }, { "epoch": 1.3652185095484064, "grad_norm": 1.65625, "learning_rate": 1.1378933191854137e-05, "loss": 0.8856, "step": 8032 }, { "epoch": 1.3653899736373964, "grad_norm": 1.6796875, "learning_rate": 1.137714449307695e-05, "loss": 0.9367, "step": 8033 }, { "epoch": 1.3655614377263863, "grad_norm": 1.5703125, "learning_rate": 1.1375355749385896e-05, "loss": 0.7769, "step": 8034 }, { "epoch": 1.365732901815376, "grad_norm": 1.578125, "learning_rate": 1.137356696083931e-05, "loss": 0.8368, "step": 8035 }, { "epoch": 1.365904365904366, "grad_norm": 1.6875, "learning_rate": 1.1371778127495535e-05, "loss": 0.8367, "step": 8036 }, { "epoch": 1.3660758299933558, "grad_norm": 1.6640625, "learning_rate": 1.1369989249412905e-05, "loss": 0.8095, "step": 8037 }, { "epoch": 1.3662472940823456, "grad_norm": 1.625, "learning_rate": 1.1368200326649768e-05, "loss": 0.8846, "step": 8038 }, { "epoch": 1.3664187581713354, "grad_norm": 1.7109375, "learning_rate": 1.1366411359264466e-05, "loss": 0.9167, "step": 8039 }, { "epoch": 1.3665902222603252, "grad_norm": 1.6484375, "learning_rate": 1.1364622347315348e-05, "loss": 0.853, "step": 8040 }, { "epoch": 1.3667616863493153, "grad_norm": 1.625, "learning_rate": 1.1362833290860756e-05, "loss": 0.8976, "step": 8041 }, { "epoch": 1.366933150438305, "grad_norm": 1.6796875, "learning_rate": 1.1361044189959035e-05, "loss": 0.8774, "step": 8042 }, { "epoch": 1.367104614527295, "grad_norm": 1.7265625, "learning_rate": 1.135925504466854e-05, "loss": 0.8337, "step": 8043 }, { "epoch": 1.3672760786162848, "grad_norm": 1.7109375, "learning_rate": 1.1357465855047617e-05, "loss": 0.9059, "step": 8044 }, { "epoch": 1.3674475427052748, "grad_norm": 1.8203125, "learning_rate": 1.1355676621154624e-05, "loss": 0.8321, "step": 8045 }, { "epoch": 1.3676190067942646, "grad_norm": 1.7421875, "learning_rate": 1.135388734304791e-05, "loss": 0.8401, "step": 8046 }, { "epoch": 1.3677904708832545, "grad_norm": 1.6484375, "learning_rate": 1.1352098020785832e-05, "loss": 0.8627, "step": 8047 }, { "epoch": 1.3679619349722443, "grad_norm": 1.7265625, "learning_rate": 1.1350308654426744e-05, "loss": 0.8864, "step": 8048 }, { "epoch": 1.368133399061234, "grad_norm": 1.796875, "learning_rate": 1.1348519244029008e-05, "loss": 0.8816, "step": 8049 }, { "epoch": 1.368304863150224, "grad_norm": 1.734375, "learning_rate": 1.1346729789650982e-05, "loss": 0.8284, "step": 8050 }, { "epoch": 1.3684763272392138, "grad_norm": 1.6875, "learning_rate": 1.1344940291351022e-05, "loss": 0.8998, "step": 8051 }, { "epoch": 1.3686477913282036, "grad_norm": 1.59375, "learning_rate": 1.1343150749187501e-05, "loss": 0.8313, "step": 8052 }, { "epoch": 1.3688192554171936, "grad_norm": 1.6953125, "learning_rate": 1.1341361163218771e-05, "loss": 0.9093, "step": 8053 }, { "epoch": 1.3689907195061835, "grad_norm": 1.734375, "learning_rate": 1.1339571533503207e-05, "loss": 0.9283, "step": 8054 }, { "epoch": 1.3691621835951733, "grad_norm": 1.7265625, "learning_rate": 1.1337781860099165e-05, "loss": 0.957, "step": 8055 }, { "epoch": 1.369333647684163, "grad_norm": 1.6640625, "learning_rate": 1.1335992143065023e-05, "loss": 0.8692, "step": 8056 }, { "epoch": 1.369505111773153, "grad_norm": 1.7109375, "learning_rate": 1.1334202382459145e-05, "loss": 0.8551, "step": 8057 }, { "epoch": 1.369676575862143, "grad_norm": 1.703125, "learning_rate": 1.1332412578339905e-05, "loss": 0.8644, "step": 8058 }, { "epoch": 1.3698480399511328, "grad_norm": 1.640625, "learning_rate": 1.133062273076567e-05, "loss": 0.848, "step": 8059 }, { "epoch": 1.3700195040401226, "grad_norm": 1.75, "learning_rate": 1.1328832839794819e-05, "loss": 0.7959, "step": 8060 }, { "epoch": 1.3701909681291125, "grad_norm": 1.6796875, "learning_rate": 1.1327042905485725e-05, "loss": 0.8229, "step": 8061 }, { "epoch": 1.3703624322181023, "grad_norm": 1.7265625, "learning_rate": 1.1325252927896765e-05, "loss": 0.8541, "step": 8062 }, { "epoch": 1.370533896307092, "grad_norm": 1.734375, "learning_rate": 1.132346290708632e-05, "loss": 0.8699, "step": 8063 }, { "epoch": 1.370705360396082, "grad_norm": 1.734375, "learning_rate": 1.132167284311276e-05, "loss": 0.8418, "step": 8064 }, { "epoch": 1.370876824485072, "grad_norm": 1.765625, "learning_rate": 1.1319882736034476e-05, "loss": 0.8203, "step": 8065 }, { "epoch": 1.3710482885740618, "grad_norm": 1.5703125, "learning_rate": 1.1318092585909841e-05, "loss": 0.8273, "step": 8066 }, { "epoch": 1.3712197526630516, "grad_norm": 1.640625, "learning_rate": 1.1316302392797244e-05, "loss": 0.8852, "step": 8067 }, { "epoch": 1.3713912167520415, "grad_norm": 1.6953125, "learning_rate": 1.1314512156755073e-05, "loss": 0.8474, "step": 8068 }, { "epoch": 1.3715626808410313, "grad_norm": 1.6640625, "learning_rate": 1.131272187784171e-05, "loss": 0.7935, "step": 8069 }, { "epoch": 1.3717341449300213, "grad_norm": 1.765625, "learning_rate": 1.1310931556115543e-05, "loss": 0.9003, "step": 8070 }, { "epoch": 1.3719056090190112, "grad_norm": 1.609375, "learning_rate": 1.130914119163496e-05, "loss": 0.8306, "step": 8071 }, { "epoch": 1.372077073108001, "grad_norm": 1.703125, "learning_rate": 1.1307350784458355e-05, "loss": 0.9435, "step": 8072 }, { "epoch": 1.3722485371969908, "grad_norm": 1.7421875, "learning_rate": 1.1305560334644116e-05, "loss": 0.9552, "step": 8073 }, { "epoch": 1.3724200012859806, "grad_norm": 1.7109375, "learning_rate": 1.1303769842250638e-05, "loss": 0.8818, "step": 8074 }, { "epoch": 1.3725914653749705, "grad_norm": 1.65625, "learning_rate": 1.1301979307336317e-05, "loss": 0.8798, "step": 8075 }, { "epoch": 1.3727629294639603, "grad_norm": 1.71875, "learning_rate": 1.1300188729959548e-05, "loss": 0.8477, "step": 8076 }, { "epoch": 1.3729343935529503, "grad_norm": 1.640625, "learning_rate": 1.1298398110178728e-05, "loss": 0.8337, "step": 8077 }, { "epoch": 1.3731058576419402, "grad_norm": 1.6875, "learning_rate": 1.1296607448052255e-05, "loss": 0.865, "step": 8078 }, { "epoch": 1.37327732173093, "grad_norm": 1.640625, "learning_rate": 1.1294816743638533e-05, "loss": 0.8704, "step": 8079 }, { "epoch": 1.3734487858199198, "grad_norm": 1.65625, "learning_rate": 1.1293025996995962e-05, "loss": 0.8725, "step": 8080 }, { "epoch": 1.3736202499089096, "grad_norm": 1.796875, "learning_rate": 1.1291235208182942e-05, "loss": 0.898, "step": 8081 }, { "epoch": 1.3737917139978997, "grad_norm": 1.640625, "learning_rate": 1.128944437725788e-05, "loss": 0.814, "step": 8082 }, { "epoch": 1.3739631780868895, "grad_norm": 1.6015625, "learning_rate": 1.128765350427918e-05, "loss": 0.9043, "step": 8083 }, { "epoch": 1.3741346421758793, "grad_norm": 1.6171875, "learning_rate": 1.1285862589305252e-05, "loss": 0.8093, "step": 8084 }, { "epoch": 1.3743061062648692, "grad_norm": 1.6796875, "learning_rate": 1.12840716323945e-05, "loss": 0.9416, "step": 8085 }, { "epoch": 1.374477570353859, "grad_norm": 1.6484375, "learning_rate": 1.128228063360534e-05, "loss": 0.8444, "step": 8086 }, { "epoch": 1.3746490344428488, "grad_norm": 1.7109375, "learning_rate": 1.1280489592996177e-05, "loss": 0.817, "step": 8087 }, { "epoch": 1.3748204985318386, "grad_norm": 1.6953125, "learning_rate": 1.127869851062543e-05, "loss": 0.8625, "step": 8088 }, { "epoch": 1.3749919626208287, "grad_norm": 1.7734375, "learning_rate": 1.1276907386551508e-05, "loss": 0.8986, "step": 8089 }, { "epoch": 1.3751634267098185, "grad_norm": 1.6875, "learning_rate": 1.1275116220832826e-05, "loss": 0.8341, "step": 8090 }, { "epoch": 1.3753348907988083, "grad_norm": 1.6171875, "learning_rate": 1.1273325013527805e-05, "loss": 0.8414, "step": 8091 }, { "epoch": 1.3755063548877982, "grad_norm": 1.6796875, "learning_rate": 1.1271533764694862e-05, "loss": 0.8472, "step": 8092 }, { "epoch": 1.375677818976788, "grad_norm": 1.6875, "learning_rate": 1.126974247439241e-05, "loss": 0.7282, "step": 8093 }, { "epoch": 1.375849283065778, "grad_norm": 1.6484375, "learning_rate": 1.1267951142678877e-05, "loss": 0.8236, "step": 8094 }, { "epoch": 1.3760207471547679, "grad_norm": 1.671875, "learning_rate": 1.126615976961268e-05, "loss": 0.8156, "step": 8095 }, { "epoch": 1.3761922112437577, "grad_norm": 1.734375, "learning_rate": 1.1264368355252248e-05, "loss": 0.8662, "step": 8096 }, { "epoch": 1.3763636753327475, "grad_norm": 1.71875, "learning_rate": 1.1262576899656001e-05, "loss": 0.8275, "step": 8097 }, { "epoch": 1.3765351394217373, "grad_norm": 1.6640625, "learning_rate": 1.1260785402882371e-05, "loss": 0.9154, "step": 8098 }, { "epoch": 1.3767066035107272, "grad_norm": 1.703125, "learning_rate": 1.1258993864989777e-05, "loss": 0.8768, "step": 8099 }, { "epoch": 1.376878067599717, "grad_norm": 1.8671875, "learning_rate": 1.1257202286036653e-05, "loss": 0.9332, "step": 8100 }, { "epoch": 1.377049531688707, "grad_norm": 1.6328125, "learning_rate": 1.125541066608143e-05, "loss": 0.8837, "step": 8101 }, { "epoch": 1.3772209957776969, "grad_norm": 1.6953125, "learning_rate": 1.1253619005182534e-05, "loss": 0.8344, "step": 8102 }, { "epoch": 1.3773924598666867, "grad_norm": 1.7734375, "learning_rate": 1.1251827303398406e-05, "loss": 0.9364, "step": 8103 }, { "epoch": 1.3775639239556765, "grad_norm": 1.703125, "learning_rate": 1.1250035560787473e-05, "loss": 0.9229, "step": 8104 }, { "epoch": 1.3777353880446663, "grad_norm": 1.6484375, "learning_rate": 1.1248243777408172e-05, "loss": 0.8675, "step": 8105 }, { "epoch": 1.3779068521336564, "grad_norm": 1.6328125, "learning_rate": 1.1246451953318944e-05, "loss": 0.8326, "step": 8106 }, { "epoch": 1.3780783162226462, "grad_norm": 1.6953125, "learning_rate": 1.1244660088578223e-05, "loss": 0.9618, "step": 8107 }, { "epoch": 1.378249780311636, "grad_norm": 1.7265625, "learning_rate": 1.124286818324445e-05, "loss": 0.8605, "step": 8108 }, { "epoch": 1.3784212444006259, "grad_norm": 1.6953125, "learning_rate": 1.1241076237376065e-05, "loss": 0.8746, "step": 8109 }, { "epoch": 1.3785927084896157, "grad_norm": 1.78125, "learning_rate": 1.1239284251031511e-05, "loss": 0.904, "step": 8110 }, { "epoch": 1.3787641725786055, "grad_norm": 1.6953125, "learning_rate": 1.1237492224269229e-05, "loss": 0.892, "step": 8111 }, { "epoch": 1.3789356366675953, "grad_norm": 1.71875, "learning_rate": 1.1235700157147666e-05, "loss": 0.8849, "step": 8112 }, { "epoch": 1.3791071007565852, "grad_norm": 1.78125, "learning_rate": 1.1233908049725267e-05, "loss": 0.8826, "step": 8113 }, { "epoch": 1.3792785648455752, "grad_norm": 1.6484375, "learning_rate": 1.1232115902060481e-05, "loss": 0.835, "step": 8114 }, { "epoch": 1.379450028934565, "grad_norm": 1.6484375, "learning_rate": 1.1230323714211757e-05, "loss": 0.837, "step": 8115 }, { "epoch": 1.3796214930235549, "grad_norm": 1.546875, "learning_rate": 1.1228531486237545e-05, "loss": 0.8524, "step": 8116 }, { "epoch": 1.3797929571125447, "grad_norm": 1.65625, "learning_rate": 1.1226739218196289e-05, "loss": 0.7318, "step": 8117 }, { "epoch": 1.3799644212015347, "grad_norm": 1.703125, "learning_rate": 1.1224946910146452e-05, "loss": 0.8689, "step": 8118 }, { "epoch": 1.3801358852905246, "grad_norm": 1.640625, "learning_rate": 1.1223154562146484e-05, "loss": 0.841, "step": 8119 }, { "epoch": 1.3803073493795144, "grad_norm": 1.7109375, "learning_rate": 1.1221362174254837e-05, "loss": 0.8204, "step": 8120 }, { "epoch": 1.3804788134685042, "grad_norm": 1.7890625, "learning_rate": 1.1219569746529973e-05, "loss": 0.9147, "step": 8121 }, { "epoch": 1.380650277557494, "grad_norm": 1.5859375, "learning_rate": 1.1217777279030346e-05, "loss": 0.785, "step": 8122 }, { "epoch": 1.3808217416464839, "grad_norm": 1.734375, "learning_rate": 1.1215984771814416e-05, "loss": 0.8118, "step": 8123 }, { "epoch": 1.3809932057354737, "grad_norm": 1.640625, "learning_rate": 1.1214192224940643e-05, "loss": 0.8752, "step": 8124 }, { "epoch": 1.3811646698244635, "grad_norm": 1.640625, "learning_rate": 1.1212399638467491e-05, "loss": 0.7907, "step": 8125 }, { "epoch": 1.3813361339134536, "grad_norm": 1.671875, "learning_rate": 1.121060701245342e-05, "loss": 0.8421, "step": 8126 }, { "epoch": 1.3815075980024434, "grad_norm": 1.7109375, "learning_rate": 1.1208814346956898e-05, "loss": 0.849, "step": 8127 }, { "epoch": 1.3816790620914332, "grad_norm": 1.6484375, "learning_rate": 1.1207021642036387e-05, "loss": 0.8065, "step": 8128 }, { "epoch": 1.381850526180423, "grad_norm": 1.671875, "learning_rate": 1.1205228897750353e-05, "loss": 0.8164, "step": 8129 }, { "epoch": 1.382021990269413, "grad_norm": 1.6640625, "learning_rate": 1.1203436114157267e-05, "loss": 0.8265, "step": 8130 }, { "epoch": 1.382193454358403, "grad_norm": 1.6875, "learning_rate": 1.1201643291315599e-05, "loss": 0.8398, "step": 8131 }, { "epoch": 1.3823649184473927, "grad_norm": 1.8359375, "learning_rate": 1.1199850429283817e-05, "loss": 0.833, "step": 8132 }, { "epoch": 1.3825363825363826, "grad_norm": 1.84375, "learning_rate": 1.1198057528120394e-05, "loss": 0.946, "step": 8133 }, { "epoch": 1.3827078466253724, "grad_norm": 1.671875, "learning_rate": 1.1196264587883803e-05, "loss": 0.8525, "step": 8134 }, { "epoch": 1.3828793107143622, "grad_norm": 1.8046875, "learning_rate": 1.119447160863252e-05, "loss": 0.9194, "step": 8135 }, { "epoch": 1.383050774803352, "grad_norm": 1.640625, "learning_rate": 1.1192678590425021e-05, "loss": 0.8202, "step": 8136 }, { "epoch": 1.3832222388923419, "grad_norm": 1.71875, "learning_rate": 1.119088553331978e-05, "loss": 0.9056, "step": 8137 }, { "epoch": 1.383393702981332, "grad_norm": 1.7265625, "learning_rate": 1.1189092437375282e-05, "loss": 0.8426, "step": 8138 }, { "epoch": 1.3835651670703217, "grad_norm": 1.6875, "learning_rate": 1.1187299302649996e-05, "loss": 0.9339, "step": 8139 }, { "epoch": 1.3837366311593116, "grad_norm": 1.6953125, "learning_rate": 1.118550612920241e-05, "loss": 0.7956, "step": 8140 }, { "epoch": 1.3839080952483014, "grad_norm": 1.6953125, "learning_rate": 1.1183712917091006e-05, "loss": 0.9118, "step": 8141 }, { "epoch": 1.3840795593372914, "grad_norm": 1.703125, "learning_rate": 1.1181919666374266e-05, "loss": 0.8234, "step": 8142 }, { "epoch": 1.3842510234262813, "grad_norm": 1.7109375, "learning_rate": 1.1180126377110674e-05, "loss": 0.8845, "step": 8143 }, { "epoch": 1.384422487515271, "grad_norm": 1.703125, "learning_rate": 1.117833304935872e-05, "loss": 0.8255, "step": 8144 }, { "epoch": 1.384593951604261, "grad_norm": 1.625, "learning_rate": 1.1176539683176887e-05, "loss": 0.8834, "step": 8145 }, { "epoch": 1.3847654156932507, "grad_norm": 1.671875, "learning_rate": 1.1174746278623662e-05, "loss": 0.7859, "step": 8146 }, { "epoch": 1.3849368797822406, "grad_norm": 1.65625, "learning_rate": 1.117295283575754e-05, "loss": 0.9149, "step": 8147 }, { "epoch": 1.3851083438712304, "grad_norm": 1.6640625, "learning_rate": 1.1171159354637008e-05, "loss": 0.8333, "step": 8148 }, { "epoch": 1.3852798079602202, "grad_norm": 1.734375, "learning_rate": 1.116936583532056e-05, "loss": 0.8805, "step": 8149 }, { "epoch": 1.3854512720492103, "grad_norm": 1.7421875, "learning_rate": 1.1167572277866688e-05, "loss": 0.8682, "step": 8150 }, { "epoch": 1.3856227361382, "grad_norm": 1.625, "learning_rate": 1.1165778682333888e-05, "loss": 0.8281, "step": 8151 }, { "epoch": 1.38579420022719, "grad_norm": 1.59375, "learning_rate": 1.1163985048780652e-05, "loss": 0.8823, "step": 8152 }, { "epoch": 1.3859656643161797, "grad_norm": 1.7421875, "learning_rate": 1.1162191377265483e-05, "loss": 0.8931, "step": 8153 }, { "epoch": 1.3861371284051696, "grad_norm": 1.671875, "learning_rate": 1.1160397667846875e-05, "loss": 0.8454, "step": 8154 }, { "epoch": 1.3863085924941596, "grad_norm": 1.6484375, "learning_rate": 1.1158603920583336e-05, "loss": 0.8052, "step": 8155 }, { "epoch": 1.3864800565831494, "grad_norm": 1.7265625, "learning_rate": 1.1156810135533354e-05, "loss": 0.8256, "step": 8156 }, { "epoch": 1.3866515206721393, "grad_norm": 1.7265625, "learning_rate": 1.115501631275544e-05, "loss": 0.8202, "step": 8157 }, { "epoch": 1.386822984761129, "grad_norm": 1.5390625, "learning_rate": 1.1153222452308093e-05, "loss": 0.7897, "step": 8158 }, { "epoch": 1.386994448850119, "grad_norm": 1.7890625, "learning_rate": 1.1151428554249818e-05, "loss": 0.931, "step": 8159 }, { "epoch": 1.3871659129391087, "grad_norm": 1.609375, "learning_rate": 1.1149634618639123e-05, "loss": 0.7929, "step": 8160 }, { "epoch": 1.3873373770280986, "grad_norm": 1.8125, "learning_rate": 1.1147840645534515e-05, "loss": 0.9386, "step": 8161 }, { "epoch": 1.3875088411170886, "grad_norm": 1.6484375, "learning_rate": 1.11460466349945e-05, "loss": 0.8324, "step": 8162 }, { "epoch": 1.3876803052060784, "grad_norm": 1.640625, "learning_rate": 1.114425258707759e-05, "loss": 0.8598, "step": 8163 }, { "epoch": 1.3878517692950683, "grad_norm": 2.015625, "learning_rate": 1.1142458501842292e-05, "loss": 0.901, "step": 8164 }, { "epoch": 1.388023233384058, "grad_norm": 1.84375, "learning_rate": 1.1140664379347124e-05, "loss": 0.8722, "step": 8165 }, { "epoch": 1.388194697473048, "grad_norm": 1.6953125, "learning_rate": 1.1138870219650592e-05, "loss": 0.845, "step": 8166 }, { "epoch": 1.388366161562038, "grad_norm": 2.015625, "learning_rate": 1.1137076022811217e-05, "loss": 0.8876, "step": 8167 }, { "epoch": 1.3885376256510278, "grad_norm": 1.7109375, "learning_rate": 1.1135281788887506e-05, "loss": 0.8479, "step": 8168 }, { "epoch": 1.3887090897400176, "grad_norm": 1.6171875, "learning_rate": 1.1133487517937985e-05, "loss": 0.8327, "step": 8169 }, { "epoch": 1.3888805538290074, "grad_norm": 1.765625, "learning_rate": 1.1131693210021163e-05, "loss": 0.8874, "step": 8170 }, { "epoch": 1.3890520179179973, "grad_norm": 1.703125, "learning_rate": 1.1129898865195568e-05, "loss": 0.8704, "step": 8171 }, { "epoch": 1.389223482006987, "grad_norm": 1.59375, "learning_rate": 1.1128104483519715e-05, "loss": 0.8239, "step": 8172 }, { "epoch": 1.389394946095977, "grad_norm": 1.6953125, "learning_rate": 1.1126310065052128e-05, "loss": 0.8521, "step": 8173 }, { "epoch": 1.389566410184967, "grad_norm": 1.6484375, "learning_rate": 1.1124515609851328e-05, "loss": 0.8451, "step": 8174 }, { "epoch": 1.3897378742739568, "grad_norm": 1.75, "learning_rate": 1.1122721117975838e-05, "loss": 0.8913, "step": 8175 }, { "epoch": 1.3899093383629466, "grad_norm": 1.640625, "learning_rate": 1.1120926589484187e-05, "loss": 0.8485, "step": 8176 }, { "epoch": 1.3900808024519364, "grad_norm": 1.6484375, "learning_rate": 1.1119132024434896e-05, "loss": 0.8612, "step": 8177 }, { "epoch": 1.3902522665409263, "grad_norm": 1.609375, "learning_rate": 1.1117337422886497e-05, "loss": 0.8926, "step": 8178 }, { "epoch": 1.3904237306299163, "grad_norm": 1.65625, "learning_rate": 1.1115542784897518e-05, "loss": 0.8277, "step": 8179 }, { "epoch": 1.3905951947189061, "grad_norm": 1.7734375, "learning_rate": 1.1113748110526486e-05, "loss": 0.9561, "step": 8180 }, { "epoch": 1.390766658807896, "grad_norm": 1.6484375, "learning_rate": 1.1111953399831934e-05, "loss": 0.7891, "step": 8181 }, { "epoch": 1.3909381228968858, "grad_norm": 1.65625, "learning_rate": 1.1110158652872395e-05, "loss": 0.849, "step": 8182 }, { "epoch": 1.3911095869858756, "grad_norm": 1.640625, "learning_rate": 1.1108363869706402e-05, "loss": 0.7991, "step": 8183 }, { "epoch": 1.3912810510748654, "grad_norm": 1.671875, "learning_rate": 1.1106569050392492e-05, "loss": 0.8487, "step": 8184 }, { "epoch": 1.3914525151638553, "grad_norm": 1.640625, "learning_rate": 1.1104774194989197e-05, "loss": 0.8098, "step": 8185 }, { "epoch": 1.3916239792528453, "grad_norm": 1.75, "learning_rate": 1.1102979303555052e-05, "loss": 0.8735, "step": 8186 }, { "epoch": 1.3917954433418351, "grad_norm": 1.6328125, "learning_rate": 1.11011843761486e-05, "loss": 0.7936, "step": 8187 }, { "epoch": 1.391966907430825, "grad_norm": 1.6796875, "learning_rate": 1.1099389412828379e-05, "loss": 0.9163, "step": 8188 }, { "epoch": 1.3921383715198148, "grad_norm": 1.7578125, "learning_rate": 1.109759441365293e-05, "loss": 0.9175, "step": 8189 }, { "epoch": 1.3923098356088046, "grad_norm": 1.640625, "learning_rate": 1.1095799378680796e-05, "loss": 0.8568, "step": 8190 }, { "epoch": 1.3924812996977947, "grad_norm": 1.703125, "learning_rate": 1.1094004307970516e-05, "loss": 0.8124, "step": 8191 }, { "epoch": 1.3926527637867845, "grad_norm": 1.703125, "learning_rate": 1.1092209201580634e-05, "loss": 0.9517, "step": 8192 }, { "epoch": 1.3928242278757743, "grad_norm": 1.7421875, "learning_rate": 1.10904140595697e-05, "loss": 0.83, "step": 8193 }, { "epoch": 1.3929956919647641, "grad_norm": 1.6640625, "learning_rate": 1.1088618881996257e-05, "loss": 0.8657, "step": 8194 }, { "epoch": 1.393167156053754, "grad_norm": 1.6484375, "learning_rate": 1.108682366891885e-05, "loss": 0.7925, "step": 8195 }, { "epoch": 1.3933386201427438, "grad_norm": 1.578125, "learning_rate": 1.1085028420396033e-05, "loss": 0.6944, "step": 8196 }, { "epoch": 1.3935100842317336, "grad_norm": 1.578125, "learning_rate": 1.1083233136486354e-05, "loss": 0.8304, "step": 8197 }, { "epoch": 1.3936815483207234, "grad_norm": 1.7421875, "learning_rate": 1.1081437817248362e-05, "loss": 0.8687, "step": 8198 }, { "epoch": 1.3938530124097135, "grad_norm": 1.5859375, "learning_rate": 1.1079642462740612e-05, "loss": 0.7692, "step": 8199 }, { "epoch": 1.3940244764987033, "grad_norm": 1.671875, "learning_rate": 1.1077847073021653e-05, "loss": 0.8898, "step": 8200 }, { "epoch": 1.3941959405876931, "grad_norm": 1.703125, "learning_rate": 1.1076051648150043e-05, "loss": 0.8604, "step": 8201 }, { "epoch": 1.394367404676683, "grad_norm": 1.6328125, "learning_rate": 1.107425618818434e-05, "loss": 0.793, "step": 8202 }, { "epoch": 1.394538868765673, "grad_norm": 1.7578125, "learning_rate": 1.1072460693183095e-05, "loss": 0.9108, "step": 8203 }, { "epoch": 1.3947103328546628, "grad_norm": 1.7890625, "learning_rate": 1.1070665163204868e-05, "loss": 0.8878, "step": 8204 }, { "epoch": 1.3948817969436527, "grad_norm": 1.6484375, "learning_rate": 1.1068869598308218e-05, "loss": 0.8829, "step": 8205 }, { "epoch": 1.3950532610326425, "grad_norm": 1.7421875, "learning_rate": 1.1067073998551706e-05, "loss": 0.9368, "step": 8206 }, { "epoch": 1.3952247251216323, "grad_norm": 1.625, "learning_rate": 1.1065278363993892e-05, "loss": 0.8212, "step": 8207 }, { "epoch": 1.3953961892106221, "grad_norm": 1.734375, "learning_rate": 1.1063482694693339e-05, "loss": 0.892, "step": 8208 }, { "epoch": 1.395567653299612, "grad_norm": 1.7265625, "learning_rate": 1.1061686990708612e-05, "loss": 0.8342, "step": 8209 }, { "epoch": 1.3957391173886018, "grad_norm": 1.6875, "learning_rate": 1.1059891252098273e-05, "loss": 0.8443, "step": 8210 }, { "epoch": 1.3959105814775918, "grad_norm": 1.6796875, "learning_rate": 1.105809547892089e-05, "loss": 0.8837, "step": 8211 }, { "epoch": 1.3960820455665817, "grad_norm": 1.5859375, "learning_rate": 1.1056299671235028e-05, "loss": 0.8203, "step": 8212 }, { "epoch": 1.3962535096555715, "grad_norm": 1.765625, "learning_rate": 1.105450382909926e-05, "loss": 0.8863, "step": 8213 }, { "epoch": 1.3964249737445613, "grad_norm": 1.7421875, "learning_rate": 1.1052707952572149e-05, "loss": 0.8789, "step": 8214 }, { "epoch": 1.3965964378335514, "grad_norm": 1.6875, "learning_rate": 1.1050912041712264e-05, "loss": 0.8143, "step": 8215 }, { "epoch": 1.3967679019225412, "grad_norm": 1.734375, "learning_rate": 1.1049116096578181e-05, "loss": 0.8136, "step": 8216 }, { "epoch": 1.396939366011531, "grad_norm": 1.8203125, "learning_rate": 1.1047320117228472e-05, "loss": 0.9049, "step": 8217 }, { "epoch": 1.3971108301005208, "grad_norm": 1.6171875, "learning_rate": 1.1045524103721712e-05, "loss": 0.806, "step": 8218 }, { "epoch": 1.3972822941895107, "grad_norm": 1.734375, "learning_rate": 1.1043728056116474e-05, "loss": 0.8712, "step": 8219 }, { "epoch": 1.3974537582785005, "grad_norm": 1.5625, "learning_rate": 1.1041931974471331e-05, "loss": 0.8647, "step": 8220 }, { "epoch": 1.3976252223674903, "grad_norm": 1.765625, "learning_rate": 1.1040135858844864e-05, "loss": 0.9376, "step": 8221 }, { "epoch": 1.3977966864564801, "grad_norm": 1.6484375, "learning_rate": 1.1038339709295652e-05, "loss": 0.8598, "step": 8222 }, { "epoch": 1.3979681505454702, "grad_norm": 1.71875, "learning_rate": 1.103654352588227e-05, "loss": 0.8858, "step": 8223 }, { "epoch": 1.39813961463446, "grad_norm": 1.75, "learning_rate": 1.10347473086633e-05, "loss": 0.9499, "step": 8224 }, { "epoch": 1.3983110787234498, "grad_norm": 1.765625, "learning_rate": 1.1032951057697325e-05, "loss": 0.8693, "step": 8225 }, { "epoch": 1.3984825428124397, "grad_norm": 1.703125, "learning_rate": 1.1031154773042925e-05, "loss": 0.8588, "step": 8226 }, { "epoch": 1.3986540069014297, "grad_norm": 1.65625, "learning_rate": 1.1029358454758687e-05, "loss": 0.8084, "step": 8227 }, { "epoch": 1.3988254709904195, "grad_norm": 1.6640625, "learning_rate": 1.102756210290319e-05, "loss": 0.819, "step": 8228 }, { "epoch": 1.3989969350794094, "grad_norm": 1.671875, "learning_rate": 1.1025765717535026e-05, "loss": 0.8563, "step": 8229 }, { "epoch": 1.3991683991683992, "grad_norm": 1.7109375, "learning_rate": 1.1023969298712783e-05, "loss": 0.8326, "step": 8230 }, { "epoch": 1.399339863257389, "grad_norm": 1.7890625, "learning_rate": 1.102217284649504e-05, "loss": 0.9333, "step": 8231 }, { "epoch": 1.3995113273463788, "grad_norm": 1.734375, "learning_rate": 1.1020376360940393e-05, "loss": 0.8824, "step": 8232 }, { "epoch": 1.3996827914353687, "grad_norm": 1.6953125, "learning_rate": 1.101857984210743e-05, "loss": 0.8249, "step": 8233 }, { "epoch": 1.3998542555243585, "grad_norm": 1.8046875, "learning_rate": 1.101678329005474e-05, "loss": 0.8813, "step": 8234 }, { "epoch": 1.4000257196133485, "grad_norm": 1.7734375, "learning_rate": 1.101498670484092e-05, "loss": 0.8005, "step": 8235 }, { "epoch": 1.4001971837023384, "grad_norm": 1.7109375, "learning_rate": 1.1013190086524564e-05, "loss": 0.8701, "step": 8236 }, { "epoch": 1.4003686477913282, "grad_norm": 1.6953125, "learning_rate": 1.1011393435164261e-05, "loss": 0.894, "step": 8237 }, { "epoch": 1.400540111880318, "grad_norm": 1.6875, "learning_rate": 1.1009596750818611e-05, "loss": 0.843, "step": 8238 }, { "epoch": 1.400711575969308, "grad_norm": 1.59375, "learning_rate": 1.100780003354621e-05, "loss": 0.844, "step": 8239 }, { "epoch": 1.400883040058298, "grad_norm": 1.78125, "learning_rate": 1.1006003283405653e-05, "loss": 0.9017, "step": 8240 }, { "epoch": 1.4010545041472877, "grad_norm": 1.703125, "learning_rate": 1.1004206500455541e-05, "loss": 0.8942, "step": 8241 }, { "epoch": 1.4012259682362775, "grad_norm": 1.59375, "learning_rate": 1.1002409684754473e-05, "loss": 0.886, "step": 8242 }, { "epoch": 1.4013974323252674, "grad_norm": 1.7578125, "learning_rate": 1.1000612836361053e-05, "loss": 0.9032, "step": 8243 }, { "epoch": 1.4015688964142572, "grad_norm": 1.875, "learning_rate": 1.0998815955333878e-05, "loss": 0.8702, "step": 8244 }, { "epoch": 1.401740360503247, "grad_norm": 1.6796875, "learning_rate": 1.0997019041731553e-05, "loss": 0.9045, "step": 8245 }, { "epoch": 1.4019118245922368, "grad_norm": 1.6875, "learning_rate": 1.0995222095612686e-05, "loss": 0.942, "step": 8246 }, { "epoch": 1.402083288681227, "grad_norm": 1.734375, "learning_rate": 1.0993425117035876e-05, "loss": 0.8191, "step": 8247 }, { "epoch": 1.4022547527702167, "grad_norm": 1.7421875, "learning_rate": 1.0991628106059734e-05, "loss": 0.8259, "step": 8248 }, { "epoch": 1.4024262168592065, "grad_norm": 1.6640625, "learning_rate": 1.0989831062742867e-05, "loss": 0.7985, "step": 8249 }, { "epoch": 1.4025976809481964, "grad_norm": 1.6484375, "learning_rate": 1.0988033987143876e-05, "loss": 0.8193, "step": 8250 }, { "epoch": 1.4027691450371862, "grad_norm": 1.7265625, "learning_rate": 1.0986236879321379e-05, "loss": 0.9145, "step": 8251 }, { "epoch": 1.4029406091261762, "grad_norm": 1.6796875, "learning_rate": 1.0984439739333983e-05, "loss": 0.8936, "step": 8252 }, { "epoch": 1.403112073215166, "grad_norm": 1.6328125, "learning_rate": 1.09826425672403e-05, "loss": 0.8913, "step": 8253 }, { "epoch": 1.403283537304156, "grad_norm": 1.6875, "learning_rate": 1.0980845363098945e-05, "loss": 0.8355, "step": 8254 }, { "epoch": 1.4034550013931457, "grad_norm": 1.671875, "learning_rate": 1.0979048126968525e-05, "loss": 0.8765, "step": 8255 }, { "epoch": 1.4036264654821355, "grad_norm": 1.703125, "learning_rate": 1.0977250858907662e-05, "loss": 0.8471, "step": 8256 }, { "epoch": 1.4037979295711254, "grad_norm": 1.6953125, "learning_rate": 1.0975453558974968e-05, "loss": 0.8599, "step": 8257 }, { "epoch": 1.4039693936601152, "grad_norm": 1.6875, "learning_rate": 1.0973656227229059e-05, "loss": 0.8168, "step": 8258 }, { "epoch": 1.4041408577491052, "grad_norm": 1.71875, "learning_rate": 1.0971858863728557e-05, "loss": 0.8604, "step": 8259 }, { "epoch": 1.404312321838095, "grad_norm": 1.6484375, "learning_rate": 1.0970061468532075e-05, "loss": 0.836, "step": 8260 }, { "epoch": 1.404483785927085, "grad_norm": 1.6875, "learning_rate": 1.0968264041698237e-05, "loss": 0.8421, "step": 8261 }, { "epoch": 1.4046552500160747, "grad_norm": 1.703125, "learning_rate": 1.096646658328566e-05, "loss": 0.8652, "step": 8262 }, { "epoch": 1.4048267141050645, "grad_norm": 1.7734375, "learning_rate": 1.0964669093352969e-05, "loss": 0.9318, "step": 8263 }, { "epoch": 1.4049981781940546, "grad_norm": 1.6484375, "learning_rate": 1.0962871571958788e-05, "loss": 0.8129, "step": 8264 }, { "epoch": 1.4051696422830444, "grad_norm": 1.671875, "learning_rate": 1.096107401916174e-05, "loss": 0.9011, "step": 8265 }, { "epoch": 1.4053411063720342, "grad_norm": 1.78125, "learning_rate": 1.0959276435020448e-05, "loss": 0.9159, "step": 8266 }, { "epoch": 1.405512570461024, "grad_norm": 1.6640625, "learning_rate": 1.0957478819593538e-05, "loss": 0.8199, "step": 8267 }, { "epoch": 1.405684034550014, "grad_norm": 1.640625, "learning_rate": 1.0955681172939639e-05, "loss": 0.8743, "step": 8268 }, { "epoch": 1.4058554986390037, "grad_norm": 1.6796875, "learning_rate": 1.0953883495117379e-05, "loss": 0.7695, "step": 8269 }, { "epoch": 1.4060269627279935, "grad_norm": 1.640625, "learning_rate": 1.0952085786185385e-05, "loss": 0.8522, "step": 8270 }, { "epoch": 1.4061984268169836, "grad_norm": 1.7421875, "learning_rate": 1.095028804620229e-05, "loss": 0.8782, "step": 8271 }, { "epoch": 1.4063698909059734, "grad_norm": 1.75, "learning_rate": 1.094849027522672e-05, "loss": 1.0023, "step": 8272 }, { "epoch": 1.4065413549949632, "grad_norm": 1.703125, "learning_rate": 1.0946692473317313e-05, "loss": 0.8444, "step": 8273 }, { "epoch": 1.406712819083953, "grad_norm": 1.7578125, "learning_rate": 1.0944894640532697e-05, "loss": 0.825, "step": 8274 }, { "epoch": 1.406884283172943, "grad_norm": 1.8125, "learning_rate": 1.0943096776931512e-05, "loss": 0.931, "step": 8275 }, { "epoch": 1.407055747261933, "grad_norm": 1.7421875, "learning_rate": 1.0941298882572387e-05, "loss": 0.8311, "step": 8276 }, { "epoch": 1.4072272113509228, "grad_norm": 1.7578125, "learning_rate": 1.0939500957513964e-05, "loss": 0.9291, "step": 8277 }, { "epoch": 1.4073986754399126, "grad_norm": 1.6796875, "learning_rate": 1.0937703001814877e-05, "loss": 0.8732, "step": 8278 }, { "epoch": 1.4075701395289024, "grad_norm": 1.6796875, "learning_rate": 1.0935905015533761e-05, "loss": 0.7694, "step": 8279 }, { "epoch": 1.4077416036178922, "grad_norm": 1.6875, "learning_rate": 1.093410699872926e-05, "loss": 0.8506, "step": 8280 }, { "epoch": 1.407913067706882, "grad_norm": 1.6171875, "learning_rate": 1.093230895146001e-05, "loss": 0.839, "step": 8281 }, { "epoch": 1.408084531795872, "grad_norm": 1.6953125, "learning_rate": 1.0930510873784657e-05, "loss": 0.8417, "step": 8282 }, { "epoch": 1.408255995884862, "grad_norm": 1.7734375, "learning_rate": 1.0928712765761837e-05, "loss": 0.8868, "step": 8283 }, { "epoch": 1.4084274599738518, "grad_norm": 1.734375, "learning_rate": 1.09269146274502e-05, "loss": 0.8386, "step": 8284 }, { "epoch": 1.4085989240628416, "grad_norm": 1.6171875, "learning_rate": 1.0925116458908385e-05, "loss": 0.7894, "step": 8285 }, { "epoch": 1.4087703881518314, "grad_norm": 1.75, "learning_rate": 1.092331826019504e-05, "loss": 0.8861, "step": 8286 }, { "epoch": 1.4089418522408212, "grad_norm": 1.765625, "learning_rate": 1.0921520031368808e-05, "loss": 0.8584, "step": 8287 }, { "epoch": 1.4091133163298113, "grad_norm": 1.7578125, "learning_rate": 1.0919721772488341e-05, "loss": 0.869, "step": 8288 }, { "epoch": 1.4092847804188011, "grad_norm": 1.5859375, "learning_rate": 1.0917923483612278e-05, "loss": 0.8477, "step": 8289 }, { "epoch": 1.409456244507791, "grad_norm": 1.6484375, "learning_rate": 1.0916125164799276e-05, "loss": 0.8251, "step": 8290 }, { "epoch": 1.4096277085967808, "grad_norm": 1.8046875, "learning_rate": 1.0914326816107984e-05, "loss": 0.8932, "step": 8291 }, { "epoch": 1.4097991726857706, "grad_norm": 1.6796875, "learning_rate": 1.091252843759705e-05, "loss": 0.8542, "step": 8292 }, { "epoch": 1.4099706367747604, "grad_norm": 1.71875, "learning_rate": 1.0910730029325127e-05, "loss": 0.9365, "step": 8293 }, { "epoch": 1.4101421008637502, "grad_norm": 1.5546875, "learning_rate": 1.0908931591350871e-05, "loss": 0.873, "step": 8294 }, { "epoch": 1.41031356495274, "grad_norm": 1.6953125, "learning_rate": 1.0907133123732931e-05, "loss": 0.849, "step": 8295 }, { "epoch": 1.4104850290417301, "grad_norm": 1.6484375, "learning_rate": 1.0905334626529964e-05, "loss": 0.8907, "step": 8296 }, { "epoch": 1.41065649313072, "grad_norm": 1.7109375, "learning_rate": 1.0903536099800627e-05, "loss": 0.8811, "step": 8297 }, { "epoch": 1.4108279572197098, "grad_norm": 1.6953125, "learning_rate": 1.0901737543603574e-05, "loss": 0.8517, "step": 8298 }, { "epoch": 1.4109994213086996, "grad_norm": 1.7265625, "learning_rate": 1.0899938957997465e-05, "loss": 0.8915, "step": 8299 }, { "epoch": 1.4111708853976896, "grad_norm": 1.7421875, "learning_rate": 1.0898140343040957e-05, "loss": 0.8506, "step": 8300 }, { "epoch": 1.4113423494866795, "grad_norm": 1.6875, "learning_rate": 1.089634169879271e-05, "loss": 0.8708, "step": 8301 }, { "epoch": 1.4115138135756693, "grad_norm": 1.7734375, "learning_rate": 1.0894543025311384e-05, "loss": 0.9046, "step": 8302 }, { "epoch": 1.4116852776646591, "grad_norm": 1.8046875, "learning_rate": 1.0892744322655645e-05, "loss": 0.8828, "step": 8303 }, { "epoch": 1.411856741753649, "grad_norm": 1.625, "learning_rate": 1.0890945590884147e-05, "loss": 0.824, "step": 8304 }, { "epoch": 1.4120282058426388, "grad_norm": 1.609375, "learning_rate": 1.0889146830055564e-05, "loss": 0.8713, "step": 8305 }, { "epoch": 1.4121996699316286, "grad_norm": 1.703125, "learning_rate": 1.0887348040228552e-05, "loss": 0.8932, "step": 8306 }, { "epoch": 1.4123711340206184, "grad_norm": 1.71875, "learning_rate": 1.0885549221461777e-05, "loss": 0.8144, "step": 8307 }, { "epoch": 1.4125425981096085, "grad_norm": 1.6953125, "learning_rate": 1.0883750373813909e-05, "loss": 0.8233, "step": 8308 }, { "epoch": 1.4127140621985983, "grad_norm": 1.703125, "learning_rate": 1.088195149734361e-05, "loss": 0.945, "step": 8309 }, { "epoch": 1.4128855262875881, "grad_norm": 1.6640625, "learning_rate": 1.0880152592109554e-05, "loss": 0.8226, "step": 8310 }, { "epoch": 1.413056990376578, "grad_norm": 1.6953125, "learning_rate": 1.0878353658170411e-05, "loss": 1.0318, "step": 8311 }, { "epoch": 1.413228454465568, "grad_norm": 1.640625, "learning_rate": 1.0876554695584845e-05, "loss": 0.8666, "step": 8312 }, { "epoch": 1.4133999185545578, "grad_norm": 1.6171875, "learning_rate": 1.087475570441153e-05, "loss": 0.8516, "step": 8313 }, { "epoch": 1.4135713826435476, "grad_norm": 1.671875, "learning_rate": 1.0872956684709138e-05, "loss": 0.8655, "step": 8314 }, { "epoch": 1.4137428467325375, "grad_norm": 1.7421875, "learning_rate": 1.0871157636536338e-05, "loss": 0.8286, "step": 8315 }, { "epoch": 1.4139143108215273, "grad_norm": 1.6875, "learning_rate": 1.086935855995181e-05, "loss": 0.904, "step": 8316 }, { "epoch": 1.4140857749105171, "grad_norm": 1.75, "learning_rate": 1.0867559455014224e-05, "loss": 0.9264, "step": 8317 }, { "epoch": 1.414257238999507, "grad_norm": 1.671875, "learning_rate": 1.086576032178226e-05, "loss": 0.8874, "step": 8318 }, { "epoch": 1.4144287030884968, "grad_norm": 1.6640625, "learning_rate": 1.0863961160314588e-05, "loss": 0.8748, "step": 8319 }, { "epoch": 1.4146001671774868, "grad_norm": 1.7109375, "learning_rate": 1.086216197066989e-05, "loss": 0.8369, "step": 8320 }, { "epoch": 1.4147716312664766, "grad_norm": 1.6875, "learning_rate": 1.0860362752906846e-05, "loss": 0.8304, "step": 8321 }, { "epoch": 1.4149430953554665, "grad_norm": 1.7578125, "learning_rate": 1.085856350708413e-05, "loss": 0.8489, "step": 8322 }, { "epoch": 1.4151145594444563, "grad_norm": 1.625, "learning_rate": 1.0856764233260428e-05, "loss": 0.7751, "step": 8323 }, { "epoch": 1.4152860235334463, "grad_norm": 1.6640625, "learning_rate": 1.0854964931494418e-05, "loss": 0.8466, "step": 8324 }, { "epoch": 1.4154574876224362, "grad_norm": 1.6484375, "learning_rate": 1.0853165601844779e-05, "loss": 0.8576, "step": 8325 }, { "epoch": 1.415628951711426, "grad_norm": 1.7734375, "learning_rate": 1.0851366244370199e-05, "loss": 0.8965, "step": 8326 }, { "epoch": 1.4158004158004158, "grad_norm": 1.765625, "learning_rate": 1.084956685912936e-05, "loss": 0.9559, "step": 8327 }, { "epoch": 1.4159718798894056, "grad_norm": 1.59375, "learning_rate": 1.0847767446180945e-05, "loss": 0.8144, "step": 8328 }, { "epoch": 1.4161433439783955, "grad_norm": 1.765625, "learning_rate": 1.0845968005583643e-05, "loss": 0.8764, "step": 8329 }, { "epoch": 1.4163148080673853, "grad_norm": 1.6484375, "learning_rate": 1.0844168537396138e-05, "loss": 0.8322, "step": 8330 }, { "epoch": 1.4164862721563751, "grad_norm": 1.6796875, "learning_rate": 1.0842369041677116e-05, "loss": 0.8158, "step": 8331 }, { "epoch": 1.4166577362453652, "grad_norm": 1.796875, "learning_rate": 1.0840569518485272e-05, "loss": 0.9187, "step": 8332 }, { "epoch": 1.416829200334355, "grad_norm": 1.65625, "learning_rate": 1.0838769967879286e-05, "loss": 0.7694, "step": 8333 }, { "epoch": 1.4170006644233448, "grad_norm": 1.734375, "learning_rate": 1.0836970389917857e-05, "loss": 0.8279, "step": 8334 }, { "epoch": 1.4171721285123347, "grad_norm": 1.6171875, "learning_rate": 1.083517078465967e-05, "loss": 0.814, "step": 8335 }, { "epoch": 1.4173435926013247, "grad_norm": 1.65625, "learning_rate": 1.0833371152163417e-05, "loss": 0.7892, "step": 8336 }, { "epoch": 1.4175150566903145, "grad_norm": 1.75, "learning_rate": 1.0831571492487794e-05, "loss": 0.8923, "step": 8337 }, { "epoch": 1.4176865207793043, "grad_norm": 1.84375, "learning_rate": 1.0829771805691493e-05, "loss": 0.9074, "step": 8338 }, { "epoch": 1.4178579848682942, "grad_norm": 1.6796875, "learning_rate": 1.0827972091833207e-05, "loss": 0.8024, "step": 8339 }, { "epoch": 1.418029448957284, "grad_norm": 1.6953125, "learning_rate": 1.082617235097164e-05, "loss": 0.8634, "step": 8340 }, { "epoch": 1.4182009130462738, "grad_norm": 1.6953125, "learning_rate": 1.0824372583165477e-05, "loss": 0.8486, "step": 8341 }, { "epoch": 1.4183723771352637, "grad_norm": 1.6015625, "learning_rate": 1.0822572788473418e-05, "loss": 0.8686, "step": 8342 }, { "epoch": 1.4185438412242535, "grad_norm": 1.6015625, "learning_rate": 1.0820772966954164e-05, "loss": 0.8334, "step": 8343 }, { "epoch": 1.4187153053132435, "grad_norm": 1.7265625, "learning_rate": 1.0818973118666414e-05, "loss": 0.9122, "step": 8344 }, { "epoch": 1.4188867694022333, "grad_norm": 1.796875, "learning_rate": 1.0817173243668866e-05, "loss": 0.888, "step": 8345 }, { "epoch": 1.4190582334912232, "grad_norm": 1.6640625, "learning_rate": 1.0815373342020221e-05, "loss": 0.8482, "step": 8346 }, { "epoch": 1.419229697580213, "grad_norm": 1.640625, "learning_rate": 1.0813573413779181e-05, "loss": 0.8716, "step": 8347 }, { "epoch": 1.4194011616692028, "grad_norm": 1.625, "learning_rate": 1.0811773459004449e-05, "loss": 0.8647, "step": 8348 }, { "epoch": 1.4195726257581929, "grad_norm": 1.5625, "learning_rate": 1.0809973477754727e-05, "loss": 0.7912, "step": 8349 }, { "epoch": 1.4197440898471827, "grad_norm": 1.8203125, "learning_rate": 1.0808173470088719e-05, "loss": 0.9811, "step": 8350 }, { "epoch": 1.4199155539361725, "grad_norm": 1.6640625, "learning_rate": 1.0806373436065134e-05, "loss": 0.8185, "step": 8351 }, { "epoch": 1.4200870180251624, "grad_norm": 1.6953125, "learning_rate": 1.0804573375742673e-05, "loss": 0.9047, "step": 8352 }, { "epoch": 1.4202584821141522, "grad_norm": 1.65625, "learning_rate": 1.0802773289180044e-05, "loss": 0.7983, "step": 8353 }, { "epoch": 1.420429946203142, "grad_norm": 1.703125, "learning_rate": 1.0800973176435953e-05, "loss": 0.8938, "step": 8354 }, { "epoch": 1.4206014102921318, "grad_norm": 1.75, "learning_rate": 1.0799173037569116e-05, "loss": 0.8996, "step": 8355 }, { "epoch": 1.4207728743811219, "grad_norm": 1.7109375, "learning_rate": 1.079737287263823e-05, "loss": 0.86, "step": 8356 }, { "epoch": 1.4209443384701117, "grad_norm": 1.78125, "learning_rate": 1.0795572681702018e-05, "loss": 0.8385, "step": 8357 }, { "epoch": 1.4211158025591015, "grad_norm": 1.7421875, "learning_rate": 1.079377246481918e-05, "loss": 0.8656, "step": 8358 }, { "epoch": 1.4212872666480914, "grad_norm": 1.7734375, "learning_rate": 1.0791972222048436e-05, "loss": 0.8572, "step": 8359 }, { "epoch": 1.4214587307370812, "grad_norm": 1.7734375, "learning_rate": 1.0790171953448496e-05, "loss": 0.8884, "step": 8360 }, { "epoch": 1.4216301948260712, "grad_norm": 1.625, "learning_rate": 1.0788371659078072e-05, "loss": 0.8412, "step": 8361 }, { "epoch": 1.421801658915061, "grad_norm": 1.703125, "learning_rate": 1.0786571338995879e-05, "loss": 0.8863, "step": 8362 }, { "epoch": 1.4219731230040509, "grad_norm": 1.7109375, "learning_rate": 1.0784770993260634e-05, "loss": 0.8877, "step": 8363 }, { "epoch": 1.4221445870930407, "grad_norm": 1.6875, "learning_rate": 1.0782970621931048e-05, "loss": 0.8147, "step": 8364 }, { "epoch": 1.4223160511820305, "grad_norm": 1.859375, "learning_rate": 1.0781170225065845e-05, "loss": 0.8803, "step": 8365 }, { "epoch": 1.4224875152710204, "grad_norm": 1.7890625, "learning_rate": 1.0779369802723738e-05, "loss": 0.8713, "step": 8366 }, { "epoch": 1.4226589793600102, "grad_norm": 1.65625, "learning_rate": 1.0777569354963448e-05, "loss": 0.8642, "step": 8367 }, { "epoch": 1.4228304434490002, "grad_norm": 1.7109375, "learning_rate": 1.0775768881843693e-05, "loss": 0.8819, "step": 8368 }, { "epoch": 1.42300190753799, "grad_norm": 1.703125, "learning_rate": 1.0773968383423197e-05, "loss": 0.8153, "step": 8369 }, { "epoch": 1.4231733716269799, "grad_norm": 1.734375, "learning_rate": 1.0772167859760674e-05, "loss": 0.9, "step": 8370 }, { "epoch": 1.4233448357159697, "grad_norm": 1.7265625, "learning_rate": 1.0770367310914849e-05, "loss": 0.8695, "step": 8371 }, { "epoch": 1.4235162998049595, "grad_norm": 1.7734375, "learning_rate": 1.0768566736944445e-05, "loss": 0.9497, "step": 8372 }, { "epoch": 1.4236877638939496, "grad_norm": 1.71875, "learning_rate": 1.0766766137908187e-05, "loss": 0.8198, "step": 8373 }, { "epoch": 1.4238592279829394, "grad_norm": 1.7265625, "learning_rate": 1.0764965513864796e-05, "loss": 0.9214, "step": 8374 }, { "epoch": 1.4240306920719292, "grad_norm": 1.6875, "learning_rate": 1.0763164864873e-05, "loss": 0.8708, "step": 8375 }, { "epoch": 1.424202156160919, "grad_norm": 1.6796875, "learning_rate": 1.0761364190991523e-05, "loss": 0.8654, "step": 8376 }, { "epoch": 1.4243736202499089, "grad_norm": 1.6171875, "learning_rate": 1.0759563492279093e-05, "loss": 0.8355, "step": 8377 }, { "epoch": 1.4245450843388987, "grad_norm": 1.671875, "learning_rate": 1.0757762768794438e-05, "loss": 0.8843, "step": 8378 }, { "epoch": 1.4247165484278885, "grad_norm": 1.6953125, "learning_rate": 1.0755962020596285e-05, "loss": 0.7986, "step": 8379 }, { "epoch": 1.4248880125168786, "grad_norm": 1.6171875, "learning_rate": 1.0754161247743367e-05, "loss": 0.8253, "step": 8380 }, { "epoch": 1.4250594766058684, "grad_norm": 1.6953125, "learning_rate": 1.0752360450294408e-05, "loss": 0.8199, "step": 8381 }, { "epoch": 1.4252309406948582, "grad_norm": 1.6328125, "learning_rate": 1.0750559628308142e-05, "loss": 0.8519, "step": 8382 }, { "epoch": 1.425402404783848, "grad_norm": 1.65625, "learning_rate": 1.0748758781843296e-05, "loss": 0.8915, "step": 8383 }, { "epoch": 1.4255738688728379, "grad_norm": 1.703125, "learning_rate": 1.0746957910958612e-05, "loss": 0.8812, "step": 8384 }, { "epoch": 1.425745332961828, "grad_norm": 1.7890625, "learning_rate": 1.0745157015712814e-05, "loss": 0.8603, "step": 8385 }, { "epoch": 1.4259167970508178, "grad_norm": 1.78125, "learning_rate": 1.0743356096164646e-05, "loss": 0.8808, "step": 8386 }, { "epoch": 1.4260882611398076, "grad_norm": 1.8046875, "learning_rate": 1.074155515237283e-05, "loss": 0.9764, "step": 8387 }, { "epoch": 1.4262597252287974, "grad_norm": 1.640625, "learning_rate": 1.0739754184396112e-05, "loss": 0.8728, "step": 8388 }, { "epoch": 1.4264311893177872, "grad_norm": 1.8671875, "learning_rate": 1.0737953192293222e-05, "loss": 0.8995, "step": 8389 }, { "epoch": 1.426602653406777, "grad_norm": 1.671875, "learning_rate": 1.0736152176122901e-05, "loss": 0.7569, "step": 8390 }, { "epoch": 1.4267741174957669, "grad_norm": 1.671875, "learning_rate": 1.0734351135943883e-05, "loss": 0.9227, "step": 8391 }, { "epoch": 1.4269455815847567, "grad_norm": 1.65625, "learning_rate": 1.0732550071814912e-05, "loss": 0.8322, "step": 8392 }, { "epoch": 1.4271170456737468, "grad_norm": 1.6796875, "learning_rate": 1.0730748983794723e-05, "loss": 0.884, "step": 8393 }, { "epoch": 1.4272885097627366, "grad_norm": 1.6796875, "learning_rate": 1.0728947871942057e-05, "loss": 0.8518, "step": 8394 }, { "epoch": 1.4274599738517264, "grad_norm": 1.6640625, "learning_rate": 1.0727146736315656e-05, "loss": 0.791, "step": 8395 }, { "epoch": 1.4276314379407162, "grad_norm": 1.6328125, "learning_rate": 1.0725345576974265e-05, "loss": 0.7869, "step": 8396 }, { "epoch": 1.4278029020297063, "grad_norm": 1.5859375, "learning_rate": 1.0723544393976622e-05, "loss": 0.812, "step": 8397 }, { "epoch": 1.427974366118696, "grad_norm": 1.703125, "learning_rate": 1.0721743187381473e-05, "loss": 0.888, "step": 8398 }, { "epoch": 1.428145830207686, "grad_norm": 1.75, "learning_rate": 1.0719941957247557e-05, "loss": 0.9114, "step": 8399 }, { "epoch": 1.4283172942966758, "grad_norm": 1.6484375, "learning_rate": 1.0718140703633626e-05, "loss": 0.8384, "step": 8400 }, { "epoch": 1.4283172942966758, "eval_loss": 0.8431193828582764, "eval_runtime": 836.8951, "eval_samples_per_second": 2.986, "eval_steps_per_second": 2.986, "step": 8400 }, { "epoch": 1.4284887583856656, "grad_norm": 1.6484375, "learning_rate": 1.0716339426598421e-05, "loss": 0.8651, "step": 8401 }, { "epoch": 1.4286602224746554, "grad_norm": 1.765625, "learning_rate": 1.0714538126200688e-05, "loss": 0.8133, "step": 8402 }, { "epoch": 1.4288316865636452, "grad_norm": 1.75, "learning_rate": 1.0712736802499179e-05, "loss": 0.8836, "step": 8403 }, { "epoch": 1.429003150652635, "grad_norm": 1.6328125, "learning_rate": 1.0710935455552637e-05, "loss": 0.8841, "step": 8404 }, { "epoch": 1.429174614741625, "grad_norm": 1.75, "learning_rate": 1.0709134085419814e-05, "loss": 0.8592, "step": 8405 }, { "epoch": 1.429346078830615, "grad_norm": 1.78125, "learning_rate": 1.0707332692159459e-05, "loss": 0.8613, "step": 8406 }, { "epoch": 1.4295175429196048, "grad_norm": 1.75, "learning_rate": 1.0705531275830319e-05, "loss": 0.8449, "step": 8407 }, { "epoch": 1.4296890070085946, "grad_norm": 1.6796875, "learning_rate": 1.0703729836491148e-05, "loss": 0.8013, "step": 8408 }, { "epoch": 1.4298604710975846, "grad_norm": 1.65625, "learning_rate": 1.0701928374200699e-05, "loss": 0.8001, "step": 8409 }, { "epoch": 1.4300319351865745, "grad_norm": 1.7109375, "learning_rate": 1.070012688901772e-05, "loss": 0.8349, "step": 8410 }, { "epoch": 1.4302033992755643, "grad_norm": 1.6953125, "learning_rate": 1.0698325381000965e-05, "loss": 0.803, "step": 8411 }, { "epoch": 1.430374863364554, "grad_norm": 1.7265625, "learning_rate": 1.0696523850209193e-05, "loss": 0.8824, "step": 8412 }, { "epoch": 1.430546327453544, "grad_norm": 1.6796875, "learning_rate": 1.0694722296701155e-05, "loss": 0.8719, "step": 8413 }, { "epoch": 1.4307177915425338, "grad_norm": 1.71875, "learning_rate": 1.0692920720535604e-05, "loss": 0.8876, "step": 8414 }, { "epoch": 1.4308892556315236, "grad_norm": 1.6171875, "learning_rate": 1.0691119121771305e-05, "loss": 0.8705, "step": 8415 }, { "epoch": 1.4310607197205134, "grad_norm": 1.7109375, "learning_rate": 1.0689317500467006e-05, "loss": 0.792, "step": 8416 }, { "epoch": 1.4312321838095035, "grad_norm": 1.671875, "learning_rate": 1.0687515856681466e-05, "loss": 0.8669, "step": 8417 }, { "epoch": 1.4314036478984933, "grad_norm": 1.59375, "learning_rate": 1.0685714190473444e-05, "loss": 0.8029, "step": 8418 }, { "epoch": 1.431575111987483, "grad_norm": 1.625, "learning_rate": 1.0683912501901703e-05, "loss": 0.8535, "step": 8419 }, { "epoch": 1.431746576076473, "grad_norm": 1.703125, "learning_rate": 1.0682110791024997e-05, "loss": 0.9202, "step": 8420 }, { "epoch": 1.431918040165463, "grad_norm": 1.6171875, "learning_rate": 1.0680309057902091e-05, "loss": 0.92, "step": 8421 }, { "epoch": 1.4320895042544528, "grad_norm": 1.625, "learning_rate": 1.0678507302591748e-05, "loss": 0.8744, "step": 8422 }, { "epoch": 1.4322609683434426, "grad_norm": 1.59375, "learning_rate": 1.0676705525152722e-05, "loss": 0.7187, "step": 8423 }, { "epoch": 1.4324324324324325, "grad_norm": 1.5859375, "learning_rate": 1.0674903725643783e-05, "loss": 0.8218, "step": 8424 }, { "epoch": 1.4326038965214223, "grad_norm": 1.7265625, "learning_rate": 1.067310190412369e-05, "loss": 0.8625, "step": 8425 }, { "epoch": 1.432775360610412, "grad_norm": 1.6484375, "learning_rate": 1.0671300060651215e-05, "loss": 0.8802, "step": 8426 }, { "epoch": 1.432946824699402, "grad_norm": 1.640625, "learning_rate": 1.0669498195285113e-05, "loss": 0.9051, "step": 8427 }, { "epoch": 1.4331182887883918, "grad_norm": 1.609375, "learning_rate": 1.0667696308084152e-05, "loss": 0.795, "step": 8428 }, { "epoch": 1.4332897528773818, "grad_norm": 1.734375, "learning_rate": 1.0665894399107103e-05, "loss": 0.8164, "step": 8429 }, { "epoch": 1.4334612169663716, "grad_norm": 1.7265625, "learning_rate": 1.0664092468412726e-05, "loss": 0.8222, "step": 8430 }, { "epoch": 1.4336326810553615, "grad_norm": 1.6953125, "learning_rate": 1.0662290516059797e-05, "loss": 0.8728, "step": 8431 }, { "epoch": 1.4338041451443513, "grad_norm": 1.8046875, "learning_rate": 1.0660488542107081e-05, "loss": 0.8853, "step": 8432 }, { "epoch": 1.4339756092333413, "grad_norm": 1.65625, "learning_rate": 1.0658686546613344e-05, "loss": 0.9017, "step": 8433 }, { "epoch": 1.4341470733223312, "grad_norm": 1.734375, "learning_rate": 1.065688452963736e-05, "loss": 0.8426, "step": 8434 }, { "epoch": 1.434318537411321, "grad_norm": 1.7734375, "learning_rate": 1.0655082491237896e-05, "loss": 0.8915, "step": 8435 }, { "epoch": 1.4344900015003108, "grad_norm": 1.65625, "learning_rate": 1.0653280431473725e-05, "loss": 0.8554, "step": 8436 }, { "epoch": 1.4346614655893006, "grad_norm": 1.75, "learning_rate": 1.0651478350403621e-05, "loss": 0.9187, "step": 8437 }, { "epoch": 1.4348329296782905, "grad_norm": 1.703125, "learning_rate": 1.0649676248086353e-05, "loss": 0.8988, "step": 8438 }, { "epoch": 1.4350043937672803, "grad_norm": 1.65625, "learning_rate": 1.0647874124580697e-05, "loss": 0.9059, "step": 8439 }, { "epoch": 1.43517585785627, "grad_norm": 1.65625, "learning_rate": 1.0646071979945427e-05, "loss": 0.883, "step": 8440 }, { "epoch": 1.4353473219452602, "grad_norm": 1.8984375, "learning_rate": 1.0644269814239314e-05, "loss": 0.9435, "step": 8441 }, { "epoch": 1.43551878603425, "grad_norm": 1.71875, "learning_rate": 1.0642467627521138e-05, "loss": 0.9301, "step": 8442 }, { "epoch": 1.4356902501232398, "grad_norm": 1.6796875, "learning_rate": 1.0640665419849674e-05, "loss": 0.8366, "step": 8443 }, { "epoch": 1.4358617142122296, "grad_norm": 1.6484375, "learning_rate": 1.06388631912837e-05, "loss": 0.8886, "step": 8444 }, { "epoch": 1.4360331783012195, "grad_norm": 1.75, "learning_rate": 1.063706094188199e-05, "loss": 0.8579, "step": 8445 }, { "epoch": 1.4362046423902095, "grad_norm": 1.6796875, "learning_rate": 1.0635258671703324e-05, "loss": 0.874, "step": 8446 }, { "epoch": 1.4363761064791993, "grad_norm": 1.671875, "learning_rate": 1.0633456380806478e-05, "loss": 0.8619, "step": 8447 }, { "epoch": 1.4365475705681892, "grad_norm": 1.7109375, "learning_rate": 1.0631654069250235e-05, "loss": 0.8869, "step": 8448 }, { "epoch": 1.436719034657179, "grad_norm": 1.671875, "learning_rate": 1.0629851737093375e-05, "loss": 0.815, "step": 8449 }, { "epoch": 1.4368904987461688, "grad_norm": 1.6328125, "learning_rate": 1.062804938439468e-05, "loss": 0.842, "step": 8450 }, { "epoch": 1.4370619628351586, "grad_norm": 1.5625, "learning_rate": 1.0626247011212924e-05, "loss": 0.8771, "step": 8451 }, { "epoch": 1.4372334269241485, "grad_norm": 1.6640625, "learning_rate": 1.0624444617606899e-05, "loss": 0.865, "step": 8452 }, { "epoch": 1.4374048910131385, "grad_norm": 1.640625, "learning_rate": 1.0622642203635383e-05, "loss": 0.8332, "step": 8453 }, { "epoch": 1.4375763551021283, "grad_norm": 1.78125, "learning_rate": 1.0620839769357158e-05, "loss": 0.904, "step": 8454 }, { "epoch": 1.4377478191911182, "grad_norm": 1.6953125, "learning_rate": 1.0619037314831015e-05, "loss": 0.8609, "step": 8455 }, { "epoch": 1.437919283280108, "grad_norm": 1.7265625, "learning_rate": 1.0617234840115731e-05, "loss": 0.9157, "step": 8456 }, { "epoch": 1.4380907473690978, "grad_norm": 1.7421875, "learning_rate": 1.0615432345270095e-05, "loss": 0.7958, "step": 8457 }, { "epoch": 1.4382622114580879, "grad_norm": 1.7265625, "learning_rate": 1.0613629830352892e-05, "loss": 0.9063, "step": 8458 }, { "epoch": 1.4384336755470777, "grad_norm": 1.671875, "learning_rate": 1.061182729542291e-05, "loss": 0.8571, "step": 8459 }, { "epoch": 1.4386051396360675, "grad_norm": 1.59375, "learning_rate": 1.0610024740538933e-05, "loss": 0.843, "step": 8460 }, { "epoch": 1.4387766037250573, "grad_norm": 1.6328125, "learning_rate": 1.0608222165759758e-05, "loss": 0.7849, "step": 8461 }, { "epoch": 1.4389480678140472, "grad_norm": 1.6328125, "learning_rate": 1.0606419571144166e-05, "loss": 0.7812, "step": 8462 }, { "epoch": 1.439119531903037, "grad_norm": 1.6875, "learning_rate": 1.0604616956750947e-05, "loss": 0.7796, "step": 8463 }, { "epoch": 1.4392909959920268, "grad_norm": 1.7265625, "learning_rate": 1.0602814322638893e-05, "loss": 0.8566, "step": 8464 }, { "epoch": 1.4394624600810169, "grad_norm": 1.7890625, "learning_rate": 1.0601011668866793e-05, "loss": 0.8924, "step": 8465 }, { "epoch": 1.4396339241700067, "grad_norm": 1.6015625, "learning_rate": 1.0599208995493437e-05, "loss": 0.7558, "step": 8466 }, { "epoch": 1.4398053882589965, "grad_norm": 1.6875, "learning_rate": 1.0597406302577622e-05, "loss": 0.858, "step": 8467 }, { "epoch": 1.4399768523479863, "grad_norm": 1.625, "learning_rate": 1.0595603590178138e-05, "loss": 0.8138, "step": 8468 }, { "epoch": 1.4401483164369762, "grad_norm": 1.65625, "learning_rate": 1.0593800858353778e-05, "loss": 0.8355, "step": 8469 }, { "epoch": 1.4403197805259662, "grad_norm": 1.6328125, "learning_rate": 1.0591998107163335e-05, "loss": 0.7906, "step": 8470 }, { "epoch": 1.440491244614956, "grad_norm": 1.640625, "learning_rate": 1.0590195336665605e-05, "loss": 0.8069, "step": 8471 }, { "epoch": 1.4406627087039459, "grad_norm": 1.6953125, "learning_rate": 1.0588392546919384e-05, "loss": 0.8495, "step": 8472 }, { "epoch": 1.4408341727929357, "grad_norm": 1.6484375, "learning_rate": 1.0586589737983465e-05, "loss": 0.8981, "step": 8473 }, { "epoch": 1.4410056368819255, "grad_norm": 1.6640625, "learning_rate": 1.0584786909916647e-05, "loss": 0.8765, "step": 8474 }, { "epoch": 1.4411771009709153, "grad_norm": 1.7265625, "learning_rate": 1.0582984062777726e-05, "loss": 0.7787, "step": 8475 }, { "epoch": 1.4413485650599052, "grad_norm": 1.7421875, "learning_rate": 1.0581181196625496e-05, "loss": 0.8043, "step": 8476 }, { "epoch": 1.4415200291488952, "grad_norm": 1.6875, "learning_rate": 1.0579378311518759e-05, "loss": 0.8697, "step": 8477 }, { "epoch": 1.441691493237885, "grad_norm": 1.65625, "learning_rate": 1.0577575407516318e-05, "loss": 0.9282, "step": 8478 }, { "epoch": 1.4418629573268749, "grad_norm": 1.796875, "learning_rate": 1.0575772484676965e-05, "loss": 0.9028, "step": 8479 }, { "epoch": 1.4420344214158647, "grad_norm": 1.8359375, "learning_rate": 1.0573969543059503e-05, "loss": 0.8979, "step": 8480 }, { "epoch": 1.4422058855048545, "grad_norm": 1.7109375, "learning_rate": 1.0572166582722734e-05, "loss": 0.7764, "step": 8481 }, { "epoch": 1.4423773495938446, "grad_norm": 1.6796875, "learning_rate": 1.0570363603725456e-05, "loss": 0.8835, "step": 8482 }, { "epoch": 1.4425488136828344, "grad_norm": 1.6953125, "learning_rate": 1.0568560606126475e-05, "loss": 0.7674, "step": 8483 }, { "epoch": 1.4427202777718242, "grad_norm": 1.7890625, "learning_rate": 1.0566757589984593e-05, "loss": 0.9045, "step": 8484 }, { "epoch": 1.442891741860814, "grad_norm": 1.671875, "learning_rate": 1.0564954555358606e-05, "loss": 0.8809, "step": 8485 }, { "epoch": 1.4430632059498039, "grad_norm": 1.7109375, "learning_rate": 1.056315150230733e-05, "loss": 0.8396, "step": 8486 }, { "epoch": 1.4432346700387937, "grad_norm": 1.7578125, "learning_rate": 1.056134843088956e-05, "loss": 0.89, "step": 8487 }, { "epoch": 1.4434061341277835, "grad_norm": 1.796875, "learning_rate": 1.0559545341164103e-05, "loss": 0.8926, "step": 8488 }, { "epoch": 1.4435775982167733, "grad_norm": 1.75, "learning_rate": 1.0557742233189767e-05, "loss": 0.8657, "step": 8489 }, { "epoch": 1.4437490623057634, "grad_norm": 1.75, "learning_rate": 1.0555939107025357e-05, "loss": 0.8467, "step": 8490 }, { "epoch": 1.4439205263947532, "grad_norm": 1.65625, "learning_rate": 1.055413596272968e-05, "loss": 0.8783, "step": 8491 }, { "epoch": 1.444091990483743, "grad_norm": 1.7734375, "learning_rate": 1.055233280036154e-05, "loss": 0.8765, "step": 8492 }, { "epoch": 1.4442634545727329, "grad_norm": 1.7734375, "learning_rate": 1.0550529619979748e-05, "loss": 0.9313, "step": 8493 }, { "epoch": 1.444434918661723, "grad_norm": 1.71875, "learning_rate": 1.0548726421643112e-05, "loss": 0.8334, "step": 8494 }, { "epoch": 1.4446063827507127, "grad_norm": 1.765625, "learning_rate": 1.054692320541044e-05, "loss": 0.8327, "step": 8495 }, { "epoch": 1.4447778468397026, "grad_norm": 1.7265625, "learning_rate": 1.0545119971340545e-05, "loss": 0.943, "step": 8496 }, { "epoch": 1.4449493109286924, "grad_norm": 1.734375, "learning_rate": 1.0543316719492233e-05, "loss": 0.8807, "step": 8497 }, { "epoch": 1.4451207750176822, "grad_norm": 1.703125, "learning_rate": 1.054151344992432e-05, "loss": 0.8186, "step": 8498 }, { "epoch": 1.445292239106672, "grad_norm": 1.6484375, "learning_rate": 1.0539710162695611e-05, "loss": 0.943, "step": 8499 }, { "epoch": 1.4454637031956619, "grad_norm": 1.796875, "learning_rate": 1.053790685786492e-05, "loss": 0.8957, "step": 8500 }, { "epoch": 1.4456351672846517, "grad_norm": 1.6484375, "learning_rate": 1.0536103535491067e-05, "loss": 0.8395, "step": 8501 }, { "epoch": 1.4458066313736417, "grad_norm": 1.7421875, "learning_rate": 1.0534300195632856e-05, "loss": 0.802, "step": 8502 }, { "epoch": 1.4459780954626316, "grad_norm": 1.7421875, "learning_rate": 1.0532496838349101e-05, "loss": 0.8584, "step": 8503 }, { "epoch": 1.4461495595516214, "grad_norm": 1.6484375, "learning_rate": 1.0530693463698622e-05, "loss": 0.8241, "step": 8504 }, { "epoch": 1.4463210236406112, "grad_norm": 1.6015625, "learning_rate": 1.0528890071740227e-05, "loss": 0.8376, "step": 8505 }, { "epoch": 1.4464924877296013, "grad_norm": 1.7578125, "learning_rate": 1.0527086662532737e-05, "loss": 0.9222, "step": 8506 }, { "epoch": 1.446663951818591, "grad_norm": 1.8046875, "learning_rate": 1.0525283236134968e-05, "loss": 0.9086, "step": 8507 }, { "epoch": 1.446835415907581, "grad_norm": 1.734375, "learning_rate": 1.0523479792605733e-05, "loss": 0.8853, "step": 8508 }, { "epoch": 1.4470068799965707, "grad_norm": 1.6796875, "learning_rate": 1.0521676332003851e-05, "loss": 0.8754, "step": 8509 }, { "epoch": 1.4471783440855606, "grad_norm": 1.828125, "learning_rate": 1.051987285438814e-05, "loss": 0.8787, "step": 8510 }, { "epoch": 1.4473498081745504, "grad_norm": 1.6640625, "learning_rate": 1.0518069359817418e-05, "loss": 0.8779, "step": 8511 }, { "epoch": 1.4475212722635402, "grad_norm": 1.7109375, "learning_rate": 1.0516265848350502e-05, "loss": 0.9028, "step": 8512 }, { "epoch": 1.44769273635253, "grad_norm": 1.796875, "learning_rate": 1.0514462320046212e-05, "loss": 0.8451, "step": 8513 }, { "epoch": 1.44786420044152, "grad_norm": 1.625, "learning_rate": 1.051265877496337e-05, "loss": 0.8443, "step": 8514 }, { "epoch": 1.44803566453051, "grad_norm": 1.59375, "learning_rate": 1.0510855213160793e-05, "loss": 0.8919, "step": 8515 }, { "epoch": 1.4482071286194997, "grad_norm": 1.7421875, "learning_rate": 1.0509051634697305e-05, "loss": 0.8251, "step": 8516 }, { "epoch": 1.4483785927084896, "grad_norm": 1.7890625, "learning_rate": 1.0507248039631726e-05, "loss": 0.8602, "step": 8517 }, { "epoch": 1.4485500567974796, "grad_norm": 1.71875, "learning_rate": 1.050544442802288e-05, "loss": 0.8148, "step": 8518 }, { "epoch": 1.4487215208864694, "grad_norm": 1.796875, "learning_rate": 1.0503640799929586e-05, "loss": 0.8857, "step": 8519 }, { "epoch": 1.4488929849754593, "grad_norm": 1.671875, "learning_rate": 1.050183715541067e-05, "loss": 0.8083, "step": 8520 }, { "epoch": 1.449064449064449, "grad_norm": 1.6875, "learning_rate": 1.0500033494524955e-05, "loss": 0.9437, "step": 8521 }, { "epoch": 1.449235913153439, "grad_norm": 1.6953125, "learning_rate": 1.0498229817331262e-05, "loss": 0.82, "step": 8522 }, { "epoch": 1.4494073772424287, "grad_norm": 1.71875, "learning_rate": 1.049642612388842e-05, "loss": 0.9085, "step": 8523 }, { "epoch": 1.4495788413314186, "grad_norm": 1.6015625, "learning_rate": 1.049462241425525e-05, "loss": 0.7999, "step": 8524 }, { "epoch": 1.4497503054204084, "grad_norm": 1.6328125, "learning_rate": 1.0492818688490587e-05, "loss": 0.9326, "step": 8525 }, { "epoch": 1.4499217695093984, "grad_norm": 1.640625, "learning_rate": 1.0491014946653248e-05, "loss": 0.8545, "step": 8526 }, { "epoch": 1.4500932335983883, "grad_norm": 1.625, "learning_rate": 1.0489211188802061e-05, "loss": 0.9042, "step": 8527 }, { "epoch": 1.450264697687378, "grad_norm": 1.7578125, "learning_rate": 1.0487407414995856e-05, "loss": 0.8962, "step": 8528 }, { "epoch": 1.450436161776368, "grad_norm": 1.6640625, "learning_rate": 1.048560362529346e-05, "loss": 0.833, "step": 8529 }, { "epoch": 1.4506076258653577, "grad_norm": 1.71875, "learning_rate": 1.0483799819753704e-05, "loss": 0.8709, "step": 8530 }, { "epoch": 1.4507790899543478, "grad_norm": 1.6953125, "learning_rate": 1.048199599843541e-05, "loss": 0.8699, "step": 8531 }, { "epoch": 1.4509505540433376, "grad_norm": 1.703125, "learning_rate": 1.0480192161397408e-05, "loss": 0.855, "step": 8532 }, { "epoch": 1.4511220181323274, "grad_norm": 1.6640625, "learning_rate": 1.0478388308698537e-05, "loss": 0.9085, "step": 8533 }, { "epoch": 1.4512934822213173, "grad_norm": 1.765625, "learning_rate": 1.0476584440397622e-05, "loss": 0.81, "step": 8534 }, { "epoch": 1.451464946310307, "grad_norm": 1.6796875, "learning_rate": 1.0474780556553491e-05, "loss": 0.8483, "step": 8535 }, { "epoch": 1.451636410399297, "grad_norm": 1.734375, "learning_rate": 1.0472976657224982e-05, "loss": 0.9003, "step": 8536 }, { "epoch": 1.4518078744882867, "grad_norm": 1.7890625, "learning_rate": 1.047117274247092e-05, "loss": 0.848, "step": 8537 }, { "epoch": 1.4519793385772768, "grad_norm": 1.765625, "learning_rate": 1.046936881235014e-05, "loss": 0.8042, "step": 8538 }, { "epoch": 1.4521508026662666, "grad_norm": 1.609375, "learning_rate": 1.0467564866921478e-05, "loss": 0.8757, "step": 8539 }, { "epoch": 1.4523222667552564, "grad_norm": 1.6875, "learning_rate": 1.0465760906243762e-05, "loss": 0.8383, "step": 8540 }, { "epoch": 1.4524937308442463, "grad_norm": 1.71875, "learning_rate": 1.0463956930375832e-05, "loss": 0.7948, "step": 8541 }, { "epoch": 1.452665194933236, "grad_norm": 1.6875, "learning_rate": 1.0462152939376518e-05, "loss": 0.9375, "step": 8542 }, { "epoch": 1.4528366590222261, "grad_norm": 1.625, "learning_rate": 1.0460348933304656e-05, "loss": 0.8774, "step": 8543 }, { "epoch": 1.453008123111216, "grad_norm": 1.7890625, "learning_rate": 1.0458544912219082e-05, "loss": 0.9465, "step": 8544 }, { "epoch": 1.4531795872002058, "grad_norm": 1.7734375, "learning_rate": 1.0456740876178633e-05, "loss": 0.8724, "step": 8545 }, { "epoch": 1.4533510512891956, "grad_norm": 1.78125, "learning_rate": 1.0454936825242143e-05, "loss": 0.9094, "step": 8546 }, { "epoch": 1.4535225153781854, "grad_norm": 1.5859375, "learning_rate": 1.0453132759468449e-05, "loss": 0.9206, "step": 8547 }, { "epoch": 1.4536939794671753, "grad_norm": 1.8046875, "learning_rate": 1.0451328678916393e-05, "loss": 0.9343, "step": 8548 }, { "epoch": 1.453865443556165, "grad_norm": 1.6171875, "learning_rate": 1.0449524583644806e-05, "loss": 0.8568, "step": 8549 }, { "epoch": 1.4540369076451551, "grad_norm": 1.7265625, "learning_rate": 1.044772047371253e-05, "loss": 0.8451, "step": 8550 }, { "epoch": 1.454208371734145, "grad_norm": 1.765625, "learning_rate": 1.0445916349178404e-05, "loss": 0.8701, "step": 8551 }, { "epoch": 1.4543798358231348, "grad_norm": 1.578125, "learning_rate": 1.0444112210101264e-05, "loss": 0.8086, "step": 8552 }, { "epoch": 1.4545512999121246, "grad_norm": 1.6796875, "learning_rate": 1.0442308056539956e-05, "loss": 0.8005, "step": 8553 }, { "epoch": 1.4547227640011144, "grad_norm": 1.640625, "learning_rate": 1.0440503888553316e-05, "loss": 0.8714, "step": 8554 }, { "epoch": 1.4548942280901045, "grad_norm": 1.609375, "learning_rate": 1.0438699706200184e-05, "loss": 0.8887, "step": 8555 }, { "epoch": 1.4550656921790943, "grad_norm": 1.796875, "learning_rate": 1.0436895509539405e-05, "loss": 0.9123, "step": 8556 }, { "epoch": 1.4552371562680841, "grad_norm": 1.6484375, "learning_rate": 1.0435091298629815e-05, "loss": 0.8783, "step": 8557 }, { "epoch": 1.455408620357074, "grad_norm": 1.71875, "learning_rate": 1.0433287073530263e-05, "loss": 0.897, "step": 8558 }, { "epoch": 1.4555800844460638, "grad_norm": 1.78125, "learning_rate": 1.0431482834299586e-05, "loss": 0.8824, "step": 8559 }, { "epoch": 1.4557515485350536, "grad_norm": 1.640625, "learning_rate": 1.0429678580996627e-05, "loss": 0.8112, "step": 8560 }, { "epoch": 1.4559230126240434, "grad_norm": 1.75, "learning_rate": 1.0427874313680234e-05, "loss": 0.861, "step": 8561 }, { "epoch": 1.4560944767130335, "grad_norm": 1.6953125, "learning_rate": 1.0426070032409247e-05, "loss": 0.863, "step": 8562 }, { "epoch": 1.4562659408020233, "grad_norm": 1.703125, "learning_rate": 1.0424265737242514e-05, "loss": 0.8264, "step": 8563 }, { "epoch": 1.4564374048910131, "grad_norm": 1.7734375, "learning_rate": 1.0422461428238874e-05, "loss": 0.8788, "step": 8564 }, { "epoch": 1.456608868980003, "grad_norm": 1.71875, "learning_rate": 1.0420657105457182e-05, "loss": 0.8383, "step": 8565 }, { "epoch": 1.4567803330689928, "grad_norm": 1.7109375, "learning_rate": 1.0418852768956274e-05, "loss": 0.9114, "step": 8566 }, { "epoch": 1.4569517971579828, "grad_norm": 1.703125, "learning_rate": 1.0417048418795e-05, "loss": 0.8532, "step": 8567 }, { "epoch": 1.4571232612469727, "grad_norm": 1.734375, "learning_rate": 1.0415244055032205e-05, "loss": 0.8919, "step": 8568 }, { "epoch": 1.4572947253359625, "grad_norm": 1.625, "learning_rate": 1.041343967772674e-05, "loss": 0.9256, "step": 8569 }, { "epoch": 1.4574661894249523, "grad_norm": 1.671875, "learning_rate": 1.0411635286937448e-05, "loss": 0.8848, "step": 8570 }, { "epoch": 1.4576376535139421, "grad_norm": 1.625, "learning_rate": 1.0409830882723182e-05, "loss": 0.8562, "step": 8571 }, { "epoch": 1.457809117602932, "grad_norm": 1.7578125, "learning_rate": 1.0408026465142787e-05, "loss": 0.938, "step": 8572 }, { "epoch": 1.4579805816919218, "grad_norm": 1.671875, "learning_rate": 1.040622203425511e-05, "loss": 0.8254, "step": 8573 }, { "epoch": 1.4581520457809118, "grad_norm": 1.6953125, "learning_rate": 1.0404417590119004e-05, "loss": 0.8174, "step": 8574 }, { "epoch": 1.4583235098699017, "grad_norm": 1.7265625, "learning_rate": 1.0402613132793317e-05, "loss": 0.9119, "step": 8575 }, { "epoch": 1.4584949739588915, "grad_norm": 1.6875, "learning_rate": 1.0400808662336906e-05, "loss": 0.8193, "step": 8576 }, { "epoch": 1.4586664380478813, "grad_norm": 1.7265625, "learning_rate": 1.0399004178808609e-05, "loss": 0.8587, "step": 8577 }, { "epoch": 1.4588379021368711, "grad_norm": 1.6015625, "learning_rate": 1.0397199682267283e-05, "loss": 0.8488, "step": 8578 }, { "epoch": 1.4590093662258612, "grad_norm": 1.7421875, "learning_rate": 1.0395395172771779e-05, "loss": 0.8938, "step": 8579 }, { "epoch": 1.459180830314851, "grad_norm": 1.6796875, "learning_rate": 1.0393590650380951e-05, "loss": 0.8643, "step": 8580 }, { "epoch": 1.4593522944038408, "grad_norm": 1.5859375, "learning_rate": 1.039178611515365e-05, "loss": 0.7745, "step": 8581 }, { "epoch": 1.4595237584928307, "grad_norm": 1.7265625, "learning_rate": 1.038998156714873e-05, "loss": 0.8998, "step": 8582 }, { "epoch": 1.4596952225818205, "grad_norm": 1.734375, "learning_rate": 1.0388177006425042e-05, "loss": 0.9082, "step": 8583 }, { "epoch": 1.4598666866708103, "grad_norm": 1.6484375, "learning_rate": 1.0386372433041438e-05, "loss": 0.8217, "step": 8584 }, { "epoch": 1.4600381507598001, "grad_norm": 1.6953125, "learning_rate": 1.0384567847056776e-05, "loss": 0.8611, "step": 8585 }, { "epoch": 1.46020961484879, "grad_norm": 1.6640625, "learning_rate": 1.0382763248529908e-05, "loss": 0.7299, "step": 8586 }, { "epoch": 1.46038107893778, "grad_norm": 1.7734375, "learning_rate": 1.0380958637519688e-05, "loss": 0.9224, "step": 8587 }, { "epoch": 1.4605525430267698, "grad_norm": 1.796875, "learning_rate": 1.0379154014084973e-05, "loss": 0.8753, "step": 8588 }, { "epoch": 1.4607240071157597, "grad_norm": 1.7109375, "learning_rate": 1.0377349378284618e-05, "loss": 0.9347, "step": 8589 }, { "epoch": 1.4608954712047495, "grad_norm": 1.609375, "learning_rate": 1.0375544730177477e-05, "loss": 0.8349, "step": 8590 }, { "epoch": 1.4610669352937395, "grad_norm": 1.75, "learning_rate": 1.0373740069822411e-05, "loss": 0.8822, "step": 8591 }, { "epoch": 1.4612383993827294, "grad_norm": 1.765625, "learning_rate": 1.0371935397278272e-05, "loss": 0.896, "step": 8592 }, { "epoch": 1.4614098634717192, "grad_norm": 1.7265625, "learning_rate": 1.0370130712603919e-05, "loss": 0.8061, "step": 8593 }, { "epoch": 1.461581327560709, "grad_norm": 1.71875, "learning_rate": 1.0368326015858212e-05, "loss": 0.8535, "step": 8594 }, { "epoch": 1.4617527916496988, "grad_norm": 1.5703125, "learning_rate": 1.0366521307100007e-05, "loss": 0.773, "step": 8595 }, { "epoch": 1.4619242557386887, "grad_norm": 1.6640625, "learning_rate": 1.036471658638816e-05, "loss": 0.8229, "step": 8596 }, { "epoch": 1.4620957198276785, "grad_norm": 1.6953125, "learning_rate": 1.036291185378153e-05, "loss": 0.9088, "step": 8597 }, { "epoch": 1.4622671839166683, "grad_norm": 1.7890625, "learning_rate": 1.036110710933898e-05, "loss": 0.8838, "step": 8598 }, { "epoch": 1.4624386480056584, "grad_norm": 1.6953125, "learning_rate": 1.0359302353119365e-05, "loss": 0.8271, "step": 8599 }, { "epoch": 1.4626101120946482, "grad_norm": 1.7109375, "learning_rate": 1.035749758518155e-05, "loss": 0.8883, "step": 8600 }, { "epoch": 1.462781576183638, "grad_norm": 1.6953125, "learning_rate": 1.0355692805584394e-05, "loss": 0.8199, "step": 8601 }, { "epoch": 1.4629530402726278, "grad_norm": 1.6328125, "learning_rate": 1.0353888014386753e-05, "loss": 0.8284, "step": 8602 }, { "epoch": 1.463124504361618, "grad_norm": 1.65625, "learning_rate": 1.0352083211647493e-05, "loss": 0.8684, "step": 8603 }, { "epoch": 1.4632959684506077, "grad_norm": 1.640625, "learning_rate": 1.0350278397425473e-05, "loss": 0.7954, "step": 8604 }, { "epoch": 1.4634674325395975, "grad_norm": 1.6015625, "learning_rate": 1.0348473571779556e-05, "loss": 0.8125, "step": 8605 }, { "epoch": 1.4636388966285874, "grad_norm": 1.6953125, "learning_rate": 1.0346668734768603e-05, "loss": 0.9514, "step": 8606 }, { "epoch": 1.4638103607175772, "grad_norm": 1.6484375, "learning_rate": 1.0344863886451476e-05, "loss": 0.8339, "step": 8607 }, { "epoch": 1.463981824806567, "grad_norm": 1.734375, "learning_rate": 1.0343059026887039e-05, "loss": 0.7786, "step": 8608 }, { "epoch": 1.4641532888955568, "grad_norm": 1.6640625, "learning_rate": 1.0341254156134157e-05, "loss": 0.8554, "step": 8609 }, { "epoch": 1.4643247529845467, "grad_norm": 1.5546875, "learning_rate": 1.033944927425169e-05, "loss": 0.8097, "step": 8610 }, { "epoch": 1.4644962170735367, "grad_norm": 1.5625, "learning_rate": 1.0337644381298508e-05, "loss": 0.8734, "step": 8611 }, { "epoch": 1.4646676811625265, "grad_norm": 1.65625, "learning_rate": 1.033583947733347e-05, "loss": 0.8432, "step": 8612 }, { "epoch": 1.4648391452515164, "grad_norm": 1.6640625, "learning_rate": 1.0334034562415438e-05, "loss": 0.8445, "step": 8613 }, { "epoch": 1.4650106093405062, "grad_norm": 1.6953125, "learning_rate": 1.0332229636603283e-05, "loss": 0.8689, "step": 8614 }, { "epoch": 1.4651820734294962, "grad_norm": 1.671875, "learning_rate": 1.033042469995587e-05, "loss": 0.8642, "step": 8615 }, { "epoch": 1.465353537518486, "grad_norm": 1.75, "learning_rate": 1.0328619752532063e-05, "loss": 0.989, "step": 8616 }, { "epoch": 1.465525001607476, "grad_norm": 1.640625, "learning_rate": 1.0326814794390728e-05, "loss": 0.9084, "step": 8617 }, { "epoch": 1.4656964656964657, "grad_norm": 1.6953125, "learning_rate": 1.032500982559073e-05, "loss": 0.8981, "step": 8618 }, { "epoch": 1.4658679297854555, "grad_norm": 1.6328125, "learning_rate": 1.032320484619094e-05, "loss": 0.8218, "step": 8619 }, { "epoch": 1.4660393938744454, "grad_norm": 1.671875, "learning_rate": 1.032139985625022e-05, "loss": 0.8495, "step": 8620 }, { "epoch": 1.4662108579634352, "grad_norm": 1.6171875, "learning_rate": 1.0319594855827444e-05, "loss": 0.8132, "step": 8621 }, { "epoch": 1.466382322052425, "grad_norm": 1.7421875, "learning_rate": 1.0317789844981474e-05, "loss": 0.8734, "step": 8622 }, { "epoch": 1.466553786141415, "grad_norm": 1.703125, "learning_rate": 1.0315984823771183e-05, "loss": 0.7964, "step": 8623 }, { "epoch": 1.466725250230405, "grad_norm": 1.671875, "learning_rate": 1.0314179792255436e-05, "loss": 0.9034, "step": 8624 }, { "epoch": 1.4668967143193947, "grad_norm": 1.71875, "learning_rate": 1.0312374750493101e-05, "loss": 0.8649, "step": 8625 }, { "epoch": 1.4670681784083845, "grad_norm": 1.625, "learning_rate": 1.031056969854305e-05, "loss": 0.8618, "step": 8626 }, { "epoch": 1.4672396424973744, "grad_norm": 1.6015625, "learning_rate": 1.030876463646415e-05, "loss": 0.8017, "step": 8627 }, { "epoch": 1.4674111065863644, "grad_norm": 1.75, "learning_rate": 1.0306959564315278e-05, "loss": 0.8205, "step": 8628 }, { "epoch": 1.4675825706753542, "grad_norm": 1.6640625, "learning_rate": 1.0305154482155296e-05, "loss": 0.8564, "step": 8629 }, { "epoch": 1.467754034764344, "grad_norm": 1.75, "learning_rate": 1.0303349390043076e-05, "loss": 0.8954, "step": 8630 }, { "epoch": 1.467925498853334, "grad_norm": 1.609375, "learning_rate": 1.030154428803749e-05, "loss": 0.8172, "step": 8631 }, { "epoch": 1.4680969629423237, "grad_norm": 1.640625, "learning_rate": 1.0299739176197409e-05, "loss": 0.8819, "step": 8632 }, { "epoch": 1.4682684270313135, "grad_norm": 1.6171875, "learning_rate": 1.0297934054581707e-05, "loss": 0.7962, "step": 8633 }, { "epoch": 1.4684398911203034, "grad_norm": 1.6328125, "learning_rate": 1.0296128923249251e-05, "loss": 0.757, "step": 8634 }, { "epoch": 1.4686113552092934, "grad_norm": 1.703125, "learning_rate": 1.0294323782258917e-05, "loss": 0.8211, "step": 8635 }, { "epoch": 1.4687828192982832, "grad_norm": 1.7890625, "learning_rate": 1.0292518631669575e-05, "loss": 0.8835, "step": 8636 }, { "epoch": 1.468954283387273, "grad_norm": 1.6953125, "learning_rate": 1.02907134715401e-05, "loss": 0.8118, "step": 8637 }, { "epoch": 1.469125747476263, "grad_norm": 1.6796875, "learning_rate": 1.0288908301929364e-05, "loss": 0.8418, "step": 8638 }, { "epoch": 1.4692972115652527, "grad_norm": 1.7265625, "learning_rate": 1.0287103122896237e-05, "loss": 0.8533, "step": 8639 }, { "epoch": 1.4694686756542428, "grad_norm": 1.7109375, "learning_rate": 1.0285297934499604e-05, "loss": 0.8903, "step": 8640 }, { "epoch": 1.4696401397432326, "grad_norm": 1.7890625, "learning_rate": 1.0283492736798327e-05, "loss": 0.8225, "step": 8641 }, { "epoch": 1.4698116038322224, "grad_norm": 1.65625, "learning_rate": 1.028168752985128e-05, "loss": 0.9088, "step": 8642 }, { "epoch": 1.4699830679212122, "grad_norm": 1.6015625, "learning_rate": 1.0279882313717346e-05, "loss": 0.7967, "step": 8643 }, { "epoch": 1.470154532010202, "grad_norm": 1.6875, "learning_rate": 1.0278077088455396e-05, "loss": 0.8248, "step": 8644 }, { "epoch": 1.470325996099192, "grad_norm": 1.6640625, "learning_rate": 1.0276271854124303e-05, "loss": 0.8308, "step": 8645 }, { "epoch": 1.4704974601881817, "grad_norm": 1.7265625, "learning_rate": 1.0274466610782948e-05, "loss": 0.8704, "step": 8646 }, { "epoch": 1.4706689242771718, "grad_norm": 1.7109375, "learning_rate": 1.0272661358490199e-05, "loss": 0.934, "step": 8647 }, { "epoch": 1.4708403883661616, "grad_norm": 1.875, "learning_rate": 1.0270856097304939e-05, "loss": 0.9382, "step": 8648 }, { "epoch": 1.4710118524551514, "grad_norm": 1.78125, "learning_rate": 1.026905082728604e-05, "loss": 0.9287, "step": 8649 }, { "epoch": 1.4711833165441413, "grad_norm": 1.671875, "learning_rate": 1.0267245548492382e-05, "loss": 0.8068, "step": 8650 }, { "epoch": 1.471354780633131, "grad_norm": 1.6796875, "learning_rate": 1.0265440260982841e-05, "loss": 0.8128, "step": 8651 }, { "epoch": 1.4715262447221211, "grad_norm": 1.6875, "learning_rate": 1.0263634964816293e-05, "loss": 0.9054, "step": 8652 }, { "epoch": 1.471697708811111, "grad_norm": 1.6875, "learning_rate": 1.0261829660051615e-05, "loss": 0.8416, "step": 8653 }, { "epoch": 1.4718691729001008, "grad_norm": 1.8125, "learning_rate": 1.0260024346747683e-05, "loss": 0.9562, "step": 8654 }, { "epoch": 1.4720406369890906, "grad_norm": 1.6640625, "learning_rate": 1.0258219024963382e-05, "loss": 0.8248, "step": 8655 }, { "epoch": 1.4722121010780804, "grad_norm": 1.6875, "learning_rate": 1.0256413694757585e-05, "loss": 0.9124, "step": 8656 }, { "epoch": 1.4723835651670703, "grad_norm": 1.5859375, "learning_rate": 1.0254608356189176e-05, "loss": 0.8353, "step": 8657 }, { "epoch": 1.47255502925606, "grad_norm": 1.75, "learning_rate": 1.0252803009317025e-05, "loss": 0.873, "step": 8658 }, { "epoch": 1.4727264933450501, "grad_norm": 1.5625, "learning_rate": 1.0250997654200014e-05, "loss": 0.75, "step": 8659 }, { "epoch": 1.47289795743404, "grad_norm": 1.671875, "learning_rate": 1.0249192290897028e-05, "loss": 0.9558, "step": 8660 }, { "epoch": 1.4730694215230298, "grad_norm": 1.6796875, "learning_rate": 1.0247386919466942e-05, "loss": 0.8325, "step": 8661 }, { "epoch": 1.4732408856120196, "grad_norm": 1.7578125, "learning_rate": 1.0245581539968637e-05, "loss": 0.9022, "step": 8662 }, { "epoch": 1.4734123497010094, "grad_norm": 1.640625, "learning_rate": 1.0243776152460993e-05, "loss": 0.7822, "step": 8663 }, { "epoch": 1.4735838137899995, "grad_norm": 1.625, "learning_rate": 1.024197075700289e-05, "loss": 0.8517, "step": 8664 }, { "epoch": 1.4737552778789893, "grad_norm": 1.6796875, "learning_rate": 1.0240165353653211e-05, "loss": 0.859, "step": 8665 }, { "epoch": 1.4739267419679791, "grad_norm": 1.6953125, "learning_rate": 1.0238359942470834e-05, "loss": 0.8532, "step": 8666 }, { "epoch": 1.474098206056969, "grad_norm": 1.625, "learning_rate": 1.0236554523514644e-05, "loss": 0.7895, "step": 8667 }, { "epoch": 1.4742696701459588, "grad_norm": 1.6875, "learning_rate": 1.023474909684352e-05, "loss": 0.8081, "step": 8668 }, { "epoch": 1.4744411342349486, "grad_norm": 1.71875, "learning_rate": 1.0232943662516343e-05, "loss": 0.8918, "step": 8669 }, { "epoch": 1.4746125983239384, "grad_norm": 1.703125, "learning_rate": 1.0231138220591996e-05, "loss": 0.9468, "step": 8670 }, { "epoch": 1.4747840624129283, "grad_norm": 1.7734375, "learning_rate": 1.022933277112936e-05, "loss": 0.8936, "step": 8671 }, { "epoch": 1.4749555265019183, "grad_norm": 1.8359375, "learning_rate": 1.022752731418732e-05, "loss": 1.0092, "step": 8672 }, { "epoch": 1.4751269905909081, "grad_norm": 1.6953125, "learning_rate": 1.0225721849824754e-05, "loss": 0.8682, "step": 8673 }, { "epoch": 1.475298454679898, "grad_norm": 1.6875, "learning_rate": 1.0223916378100555e-05, "loss": 0.8251, "step": 8674 }, { "epoch": 1.4754699187688878, "grad_norm": 1.734375, "learning_rate": 1.0222110899073597e-05, "loss": 0.8766, "step": 8675 }, { "epoch": 1.4756413828578778, "grad_norm": 1.6953125, "learning_rate": 1.0220305412802765e-05, "loss": 0.8304, "step": 8676 }, { "epoch": 1.4758128469468677, "grad_norm": 1.671875, "learning_rate": 1.0218499919346945e-05, "loss": 0.8465, "step": 8677 }, { "epoch": 1.4759843110358575, "grad_norm": 1.6640625, "learning_rate": 1.0216694418765019e-05, "loss": 0.8278, "step": 8678 }, { "epoch": 1.4761557751248473, "grad_norm": 1.8203125, "learning_rate": 1.0214888911115872e-05, "loss": 0.9232, "step": 8679 }, { "epoch": 1.4763272392138371, "grad_norm": 1.671875, "learning_rate": 1.021308339645839e-05, "loss": 0.864, "step": 8680 }, { "epoch": 1.476498703302827, "grad_norm": 1.6328125, "learning_rate": 1.0211277874851453e-05, "loss": 0.7689, "step": 8681 }, { "epoch": 1.4766701673918168, "grad_norm": 1.71875, "learning_rate": 1.020947234635395e-05, "loss": 0.9219, "step": 8682 }, { "epoch": 1.4768416314808066, "grad_norm": 1.6328125, "learning_rate": 1.0207666811024766e-05, "loss": 0.8304, "step": 8683 }, { "epoch": 1.4770130955697967, "grad_norm": 1.765625, "learning_rate": 1.0205861268922786e-05, "loss": 0.931, "step": 8684 }, { "epoch": 1.4771845596587865, "grad_norm": 1.671875, "learning_rate": 1.0204055720106891e-05, "loss": 0.8371, "step": 8685 }, { "epoch": 1.4773560237477763, "grad_norm": 1.6328125, "learning_rate": 1.0202250164635977e-05, "loss": 0.8339, "step": 8686 }, { "epoch": 1.4775274878367661, "grad_norm": 1.71875, "learning_rate": 1.020044460256892e-05, "loss": 0.8499, "step": 8687 }, { "epoch": 1.4776989519257562, "grad_norm": 1.703125, "learning_rate": 1.0198639033964608e-05, "loss": 0.8418, "step": 8688 }, { "epoch": 1.477870416014746, "grad_norm": 1.640625, "learning_rate": 1.0196833458881931e-05, "loss": 0.8294, "step": 8689 }, { "epoch": 1.4780418801037358, "grad_norm": 1.65625, "learning_rate": 1.0195027877379773e-05, "loss": 0.7643, "step": 8690 }, { "epoch": 1.4782133441927257, "grad_norm": 1.7421875, "learning_rate": 1.0193222289517021e-05, "loss": 0.8708, "step": 8691 }, { "epoch": 1.4783848082817155, "grad_norm": 1.7109375, "learning_rate": 1.0191416695352564e-05, "loss": 0.8079, "step": 8692 }, { "epoch": 1.4785562723707053, "grad_norm": 1.671875, "learning_rate": 1.0189611094945287e-05, "loss": 0.8953, "step": 8693 }, { "epoch": 1.4787277364596951, "grad_norm": 1.6953125, "learning_rate": 1.0187805488354079e-05, "loss": 0.8435, "step": 8694 }, { "epoch": 1.478899200548685, "grad_norm": 1.609375, "learning_rate": 1.0185999875637827e-05, "loss": 0.7899, "step": 8695 }, { "epoch": 1.479070664637675, "grad_norm": 1.671875, "learning_rate": 1.0184194256855418e-05, "loss": 0.8195, "step": 8696 }, { "epoch": 1.4792421287266648, "grad_norm": 1.625, "learning_rate": 1.0182388632065738e-05, "loss": 0.8243, "step": 8697 }, { "epoch": 1.4794135928156547, "grad_norm": 1.78125, "learning_rate": 1.0180583001327684e-05, "loss": 0.9301, "step": 8698 }, { "epoch": 1.4795850569046445, "grad_norm": 1.71875, "learning_rate": 1.0178777364700134e-05, "loss": 0.8632, "step": 8699 }, { "epoch": 1.4797565209936345, "grad_norm": 1.7265625, "learning_rate": 1.0176971722241982e-05, "loss": 0.8346, "step": 8700 }, { "epoch": 1.4799279850826244, "grad_norm": 1.6640625, "learning_rate": 1.0175166074012113e-05, "loss": 0.8498, "step": 8701 }, { "epoch": 1.4800994491716142, "grad_norm": 1.7734375, "learning_rate": 1.0173360420069421e-05, "loss": 0.8662, "step": 8702 }, { "epoch": 1.480270913260604, "grad_norm": 1.703125, "learning_rate": 1.0171554760472796e-05, "loss": 0.811, "step": 8703 }, { "epoch": 1.4804423773495938, "grad_norm": 1.7265625, "learning_rate": 1.0169749095281118e-05, "loss": 0.9006, "step": 8704 }, { "epoch": 1.4806138414385837, "grad_norm": 1.8046875, "learning_rate": 1.0167943424553288e-05, "loss": 0.9017, "step": 8705 }, { "epoch": 1.4807853055275735, "grad_norm": 1.6171875, "learning_rate": 1.0166137748348188e-05, "loss": 0.8064, "step": 8706 }, { "epoch": 1.4809567696165633, "grad_norm": 1.6640625, "learning_rate": 1.0164332066724712e-05, "loss": 0.8161, "step": 8707 }, { "epoch": 1.4811282337055534, "grad_norm": 1.6796875, "learning_rate": 1.0162526379741746e-05, "loss": 0.8809, "step": 8708 }, { "epoch": 1.4812996977945432, "grad_norm": 1.6484375, "learning_rate": 1.0160720687458183e-05, "loss": 0.7899, "step": 8709 }, { "epoch": 1.481471161883533, "grad_norm": 1.625, "learning_rate": 1.0158914989932915e-05, "loss": 0.8231, "step": 8710 }, { "epoch": 1.4816426259725228, "grad_norm": 1.6171875, "learning_rate": 1.015710928722483e-05, "loss": 0.8379, "step": 8711 }, { "epoch": 1.4818140900615129, "grad_norm": 1.671875, "learning_rate": 1.0155303579392819e-05, "loss": 0.8668, "step": 8712 }, { "epoch": 1.4819855541505027, "grad_norm": 1.734375, "learning_rate": 1.0153497866495774e-05, "loss": 0.897, "step": 8713 }, { "epoch": 1.4821570182394925, "grad_norm": 1.7109375, "learning_rate": 1.0151692148592584e-05, "loss": 0.8135, "step": 8714 }, { "epoch": 1.4823284823284824, "grad_norm": 1.7109375, "learning_rate": 1.0149886425742145e-05, "loss": 0.827, "step": 8715 }, { "epoch": 1.4824999464174722, "grad_norm": 1.734375, "learning_rate": 1.0148080698003347e-05, "loss": 0.8225, "step": 8716 }, { "epoch": 1.482671410506462, "grad_norm": 1.71875, "learning_rate": 1.0146274965435074e-05, "loss": 0.9086, "step": 8717 }, { "epoch": 1.4828428745954518, "grad_norm": 1.78125, "learning_rate": 1.0144469228096228e-05, "loss": 0.8494, "step": 8718 }, { "epoch": 1.4830143386844417, "grad_norm": 1.6484375, "learning_rate": 1.0142663486045692e-05, "loss": 0.79, "step": 8719 }, { "epoch": 1.4831858027734317, "grad_norm": 1.625, "learning_rate": 1.0140857739342365e-05, "loss": 0.8612, "step": 8720 }, { "epoch": 1.4833572668624215, "grad_norm": 1.6328125, "learning_rate": 1.0139051988045136e-05, "loss": 0.8386, "step": 8721 }, { "epoch": 1.4835287309514114, "grad_norm": 1.6796875, "learning_rate": 1.0137246232212901e-05, "loss": 0.8252, "step": 8722 }, { "epoch": 1.4837001950404012, "grad_norm": 1.6640625, "learning_rate": 1.0135440471904546e-05, "loss": 0.7856, "step": 8723 }, { "epoch": 1.483871659129391, "grad_norm": 1.7578125, "learning_rate": 1.0133634707178968e-05, "loss": 0.917, "step": 8724 }, { "epoch": 1.484043123218381, "grad_norm": 1.7421875, "learning_rate": 1.0131828938095059e-05, "loss": 0.8445, "step": 8725 }, { "epoch": 1.4842145873073709, "grad_norm": 1.6171875, "learning_rate": 1.0130023164711715e-05, "loss": 0.8146, "step": 8726 }, { "epoch": 1.4843860513963607, "grad_norm": 1.6640625, "learning_rate": 1.0128217387087825e-05, "loss": 0.8103, "step": 8727 }, { "epoch": 1.4845575154853505, "grad_norm": 1.609375, "learning_rate": 1.0126411605282279e-05, "loss": 0.8867, "step": 8728 }, { "epoch": 1.4847289795743404, "grad_norm": 1.75, "learning_rate": 1.0124605819353974e-05, "loss": 0.96, "step": 8729 }, { "epoch": 1.4849004436633302, "grad_norm": 1.703125, "learning_rate": 1.0122800029361808e-05, "loss": 0.9119, "step": 8730 }, { "epoch": 1.48507190775232, "grad_norm": 1.703125, "learning_rate": 1.012099423536467e-05, "loss": 0.8755, "step": 8731 }, { "epoch": 1.48524337184131, "grad_norm": 1.703125, "learning_rate": 1.0119188437421456e-05, "loss": 0.8813, "step": 8732 }, { "epoch": 1.4854148359302999, "grad_norm": 1.7265625, "learning_rate": 1.0117382635591055e-05, "loss": 0.8764, "step": 8733 }, { "epoch": 1.4855863000192897, "grad_norm": 1.7734375, "learning_rate": 1.0115576829932365e-05, "loss": 0.884, "step": 8734 }, { "epoch": 1.4857577641082795, "grad_norm": 1.6640625, "learning_rate": 1.0113771020504281e-05, "loss": 0.8937, "step": 8735 }, { "epoch": 1.4859292281972694, "grad_norm": 1.7734375, "learning_rate": 1.0111965207365695e-05, "loss": 0.8606, "step": 8736 }, { "epoch": 1.4861006922862594, "grad_norm": 1.6171875, "learning_rate": 1.0110159390575501e-05, "loss": 0.8762, "step": 8737 }, { "epoch": 1.4862721563752492, "grad_norm": 1.703125, "learning_rate": 1.0108353570192595e-05, "loss": 0.8707, "step": 8738 }, { "epoch": 1.486443620464239, "grad_norm": 1.7578125, "learning_rate": 1.010654774627587e-05, "loss": 0.8746, "step": 8739 }, { "epoch": 1.4866150845532289, "grad_norm": 1.640625, "learning_rate": 1.0104741918884222e-05, "loss": 0.8249, "step": 8740 }, { "epoch": 1.4867865486422187, "grad_norm": 1.671875, "learning_rate": 1.0102936088076547e-05, "loss": 0.8829, "step": 8741 }, { "epoch": 1.4869580127312085, "grad_norm": 1.6796875, "learning_rate": 1.0101130253911737e-05, "loss": 0.8185, "step": 8742 }, { "epoch": 1.4871294768201984, "grad_norm": 1.6328125, "learning_rate": 1.009932441644869e-05, "loss": 0.9014, "step": 8743 }, { "epoch": 1.4873009409091884, "grad_norm": 1.71875, "learning_rate": 1.0097518575746301e-05, "loss": 0.8529, "step": 8744 }, { "epoch": 1.4874724049981782, "grad_norm": 1.6484375, "learning_rate": 1.0095712731863463e-05, "loss": 0.8076, "step": 8745 }, { "epoch": 1.487643869087168, "grad_norm": 1.75, "learning_rate": 1.009390688485907e-05, "loss": 0.8656, "step": 8746 }, { "epoch": 1.4878153331761579, "grad_norm": 1.671875, "learning_rate": 1.009210103479202e-05, "loss": 0.7715, "step": 8747 }, { "epoch": 1.4879867972651477, "grad_norm": 1.7890625, "learning_rate": 1.0090295181721209e-05, "loss": 0.9446, "step": 8748 }, { "epoch": 1.4881582613541378, "grad_norm": 1.640625, "learning_rate": 1.0088489325705535e-05, "loss": 0.8537, "step": 8749 }, { "epoch": 1.4883297254431276, "grad_norm": 1.6875, "learning_rate": 1.0086683466803889e-05, "loss": 0.9564, "step": 8750 }, { "epoch": 1.4885011895321174, "grad_norm": 1.734375, "learning_rate": 1.0084877605075168e-05, "loss": 0.9122, "step": 8751 }, { "epoch": 1.4886726536211072, "grad_norm": 1.7421875, "learning_rate": 1.0083071740578269e-05, "loss": 0.8204, "step": 8752 }, { "epoch": 1.488844117710097, "grad_norm": 1.7421875, "learning_rate": 1.0081265873372088e-05, "loss": 0.7796, "step": 8753 }, { "epoch": 1.4890155817990869, "grad_norm": 1.7109375, "learning_rate": 1.0079460003515522e-05, "loss": 0.8749, "step": 8754 }, { "epoch": 1.4891870458880767, "grad_norm": 1.65625, "learning_rate": 1.0077654131067467e-05, "loss": 0.8704, "step": 8755 }, { "epoch": 1.4893585099770668, "grad_norm": 1.796875, "learning_rate": 1.0075848256086814e-05, "loss": 0.9248, "step": 8756 }, { "epoch": 1.4895299740660566, "grad_norm": 1.65625, "learning_rate": 1.0074042378632464e-05, "loss": 0.8705, "step": 8757 }, { "epoch": 1.4897014381550464, "grad_norm": 1.8046875, "learning_rate": 1.0072236498763316e-05, "loss": 0.8117, "step": 8758 }, { "epoch": 1.4898729022440362, "grad_norm": 1.6953125, "learning_rate": 1.0070430616538262e-05, "loss": 0.8788, "step": 8759 }, { "epoch": 1.490044366333026, "grad_norm": 1.7421875, "learning_rate": 1.0068624732016201e-05, "loss": 0.8653, "step": 8760 }, { "epoch": 1.490215830422016, "grad_norm": 1.5859375, "learning_rate": 1.0066818845256032e-05, "loss": 0.7809, "step": 8761 }, { "epoch": 1.490387294511006, "grad_norm": 1.6484375, "learning_rate": 1.0065012956316647e-05, "loss": 0.8333, "step": 8762 }, { "epoch": 1.4905587585999958, "grad_norm": 1.6796875, "learning_rate": 1.0063207065256944e-05, "loss": 0.8556, "step": 8763 }, { "epoch": 1.4907302226889856, "grad_norm": 1.71875, "learning_rate": 1.006140117213582e-05, "loss": 0.9508, "step": 8764 }, { "epoch": 1.4909016867779754, "grad_norm": 1.6953125, "learning_rate": 1.0059595277012173e-05, "loss": 0.8847, "step": 8765 }, { "epoch": 1.4910731508669652, "grad_norm": 1.8359375, "learning_rate": 1.00577893799449e-05, "loss": 0.9396, "step": 8766 }, { "epoch": 1.491244614955955, "grad_norm": 1.6953125, "learning_rate": 1.0055983480992897e-05, "loss": 0.9099, "step": 8767 }, { "epoch": 1.4914160790449449, "grad_norm": 1.71875, "learning_rate": 1.0054177580215063e-05, "loss": 0.8431, "step": 8768 }, { "epoch": 1.491587543133935, "grad_norm": 1.7421875, "learning_rate": 1.0052371677670291e-05, "loss": 0.8217, "step": 8769 }, { "epoch": 1.4917590072229248, "grad_norm": 1.640625, "learning_rate": 1.0050565773417484e-05, "loss": 0.8995, "step": 8770 }, { "epoch": 1.4919304713119146, "grad_norm": 1.71875, "learning_rate": 1.0048759867515538e-05, "loss": 0.9212, "step": 8771 }, { "epoch": 1.4921019354009044, "grad_norm": 1.671875, "learning_rate": 1.0046953960023346e-05, "loss": 0.8676, "step": 8772 }, { "epoch": 1.4922733994898945, "grad_norm": 1.6796875, "learning_rate": 1.0045148050999814e-05, "loss": 0.8223, "step": 8773 }, { "epoch": 1.4924448635788843, "grad_norm": 1.59375, "learning_rate": 1.0043342140503829e-05, "loss": 0.8115, "step": 8774 }, { "epoch": 1.492616327667874, "grad_norm": 1.6328125, "learning_rate": 1.0041536228594293e-05, "loss": 0.8233, "step": 8775 }, { "epoch": 1.492787791756864, "grad_norm": 1.625, "learning_rate": 1.0039730315330105e-05, "loss": 0.8075, "step": 8776 }, { "epoch": 1.4929592558458538, "grad_norm": 1.6640625, "learning_rate": 1.0037924400770163e-05, "loss": 0.9386, "step": 8777 }, { "epoch": 1.4931307199348436, "grad_norm": 1.6953125, "learning_rate": 1.0036118484973365e-05, "loss": 0.8208, "step": 8778 }, { "epoch": 1.4933021840238334, "grad_norm": 1.625, "learning_rate": 1.0034312567998605e-05, "loss": 0.8615, "step": 8779 }, { "epoch": 1.4934736481128232, "grad_norm": 1.7421875, "learning_rate": 1.0032506649904783e-05, "loss": 0.9546, "step": 8780 }, { "epoch": 1.4936451122018133, "grad_norm": 1.65625, "learning_rate": 1.0030700730750798e-05, "loss": 0.8694, "step": 8781 }, { "epoch": 1.493816576290803, "grad_norm": 1.5703125, "learning_rate": 1.0028894810595547e-05, "loss": 0.753, "step": 8782 }, { "epoch": 1.493988040379793, "grad_norm": 1.6171875, "learning_rate": 1.0027088889497924e-05, "loss": 0.849, "step": 8783 }, { "epoch": 1.4941595044687828, "grad_norm": 1.7421875, "learning_rate": 1.0025282967516833e-05, "loss": 0.8654, "step": 8784 }, { "epoch": 1.4943309685577728, "grad_norm": 1.6484375, "learning_rate": 1.0023477044711168e-05, "loss": 0.8175, "step": 8785 }, { "epoch": 1.4945024326467626, "grad_norm": 1.7265625, "learning_rate": 1.0021671121139828e-05, "loss": 0.8664, "step": 8786 }, { "epoch": 1.4946738967357525, "grad_norm": 1.6953125, "learning_rate": 1.0019865196861714e-05, "loss": 0.8677, "step": 8787 }, { "epoch": 1.4948453608247423, "grad_norm": 1.625, "learning_rate": 1.0018059271935719e-05, "loss": 0.8039, "step": 8788 }, { "epoch": 1.4950168249137321, "grad_norm": 1.796875, "learning_rate": 1.0016253346420742e-05, "loss": 0.8663, "step": 8789 }, { "epoch": 1.495188289002722, "grad_norm": 1.7109375, "learning_rate": 1.0014447420375686e-05, "loss": 0.8492, "step": 8790 }, { "epoch": 1.4953597530917118, "grad_norm": 1.6796875, "learning_rate": 1.0012641493859441e-05, "loss": 0.8019, "step": 8791 }, { "epoch": 1.4955312171807016, "grad_norm": 1.6640625, "learning_rate": 1.0010835566930912e-05, "loss": 0.8758, "step": 8792 }, { "epoch": 1.4957026812696916, "grad_norm": 1.6953125, "learning_rate": 1.0009029639648993e-05, "loss": 0.8206, "step": 8793 }, { "epoch": 1.4958741453586815, "grad_norm": 1.671875, "learning_rate": 1.000722371207258e-05, "loss": 0.895, "step": 8794 }, { "epoch": 1.4960456094476713, "grad_norm": 1.640625, "learning_rate": 1.0005417784260578e-05, "loss": 0.8039, "step": 8795 }, { "epoch": 1.4962170735366611, "grad_norm": 1.75, "learning_rate": 1.0003611856271885e-05, "loss": 0.8428, "step": 8796 }, { "epoch": 1.4963885376256512, "grad_norm": 1.6953125, "learning_rate": 1.0001805928165392e-05, "loss": 0.8586, "step": 8797 }, { "epoch": 1.496560001714641, "grad_norm": 1.671875, "learning_rate": 1e-05, "loss": 0.8142, "step": 8798 }, { "epoch": 1.4967314658036308, "grad_norm": 1.6640625, "learning_rate": 9.99819407183461e-06, "loss": 0.835, "step": 8799 }, { "epoch": 1.4969029298926206, "grad_norm": 1.7578125, "learning_rate": 9.996388143728118e-06, "loss": 0.8529, "step": 8800 }, { "epoch": 1.4970743939816105, "grad_norm": 1.6953125, "learning_rate": 9.994582215739422e-06, "loss": 0.8341, "step": 8801 }, { "epoch": 1.4972458580706003, "grad_norm": 1.7109375, "learning_rate": 9.99277628792742e-06, "loss": 0.8221, "step": 8802 }, { "epoch": 1.4974173221595901, "grad_norm": 1.640625, "learning_rate": 9.990970360351012e-06, "loss": 0.8617, "step": 8803 }, { "epoch": 1.49758878624858, "grad_norm": 1.7578125, "learning_rate": 9.989164433069093e-06, "loss": 0.8577, "step": 8804 }, { "epoch": 1.49776025033757, "grad_norm": 1.859375, "learning_rate": 9.987358506140564e-06, "loss": 0.8556, "step": 8805 }, { "epoch": 1.4979317144265598, "grad_norm": 1.8203125, "learning_rate": 9.98555257962432e-06, "loss": 0.8676, "step": 8806 }, { "epoch": 1.4981031785155496, "grad_norm": 1.7109375, "learning_rate": 9.983746653579262e-06, "loss": 0.9136, "step": 8807 }, { "epoch": 1.4982746426045395, "grad_norm": 1.6484375, "learning_rate": 9.981940728064286e-06, "loss": 0.9122, "step": 8808 }, { "epoch": 1.4984461066935295, "grad_norm": 1.6796875, "learning_rate": 9.980134803138291e-06, "loss": 0.8316, "step": 8809 }, { "epoch": 1.4986175707825193, "grad_norm": 1.71875, "learning_rate": 9.978328878860174e-06, "loss": 0.9408, "step": 8810 }, { "epoch": 1.4987890348715092, "grad_norm": 1.6484375, "learning_rate": 9.976522955288834e-06, "loss": 0.7907, "step": 8811 }, { "epoch": 1.498960498960499, "grad_norm": 1.6875, "learning_rate": 9.974717032483169e-06, "loss": 0.8882, "step": 8812 }, { "epoch": 1.4991319630494888, "grad_norm": 1.6640625, "learning_rate": 9.972911110502078e-06, "loss": 0.787, "step": 8813 }, { "epoch": 1.4993034271384786, "grad_norm": 1.71875, "learning_rate": 9.971105189404455e-06, "loss": 0.9062, "step": 8814 }, { "epoch": 1.4994748912274685, "grad_norm": 1.7265625, "learning_rate": 9.969299269249202e-06, "loss": 0.8384, "step": 8815 }, { "epoch": 1.4996463553164583, "grad_norm": 1.71875, "learning_rate": 9.96749335009522e-06, "loss": 0.8959, "step": 8816 }, { "epoch": 1.4998178194054483, "grad_norm": 1.6328125, "learning_rate": 9.9656874320014e-06, "loss": 0.8492, "step": 8817 }, { "epoch": 1.4999892834944382, "grad_norm": 1.6171875, "learning_rate": 9.96388151502664e-06, "loss": 0.7821, "step": 8818 }, { "epoch": 1.500160747583428, "grad_norm": 1.609375, "learning_rate": 9.96207559922984e-06, "loss": 0.8641, "step": 8819 }, { "epoch": 1.5003322116724178, "grad_norm": 1.7578125, "learning_rate": 9.960269684669898e-06, "loss": 0.8587, "step": 8820 }, { "epoch": 1.5005036757614079, "grad_norm": 1.609375, "learning_rate": 9.95846377140571e-06, "loss": 0.8383, "step": 8821 }, { "epoch": 1.5006751398503977, "grad_norm": 1.7109375, "learning_rate": 9.956657859496175e-06, "loss": 0.8629, "step": 8822 }, { "epoch": 1.5008466039393875, "grad_norm": 1.671875, "learning_rate": 9.954851949000189e-06, "loss": 0.8301, "step": 8823 }, { "epoch": 1.5010180680283773, "grad_norm": 1.640625, "learning_rate": 9.953046039976654e-06, "loss": 0.8623, "step": 8824 }, { "epoch": 1.5011895321173672, "grad_norm": 1.65625, "learning_rate": 9.951240132484464e-06, "loss": 0.7772, "step": 8825 }, { "epoch": 1.501360996206357, "grad_norm": 1.6484375, "learning_rate": 9.949434226582516e-06, "loss": 0.9161, "step": 8826 }, { "epoch": 1.5015324602953468, "grad_norm": 1.734375, "learning_rate": 9.947628322329714e-06, "loss": 0.889, "step": 8827 }, { "epoch": 1.5017039243843366, "grad_norm": 1.7421875, "learning_rate": 9.945822419784944e-06, "loss": 0.8393, "step": 8828 }, { "epoch": 1.5018753884733265, "grad_norm": 1.7109375, "learning_rate": 9.944016519007108e-06, "loss": 0.8285, "step": 8829 }, { "epoch": 1.5020468525623165, "grad_norm": 1.75, "learning_rate": 9.942210620055102e-06, "loss": 0.8412, "step": 8830 }, { "epoch": 1.5022183166513063, "grad_norm": 1.78125, "learning_rate": 9.94040472298783e-06, "loss": 0.8117, "step": 8831 }, { "epoch": 1.5023897807402962, "grad_norm": 1.75, "learning_rate": 9.938598827864183e-06, "loss": 0.8945, "step": 8832 }, { "epoch": 1.5025612448292862, "grad_norm": 1.7265625, "learning_rate": 9.936792934743059e-06, "loss": 0.8701, "step": 8833 }, { "epoch": 1.502732708918276, "grad_norm": 1.625, "learning_rate": 9.934987043683356e-06, "loss": 0.8044, "step": 8834 }, { "epoch": 1.5029041730072659, "grad_norm": 1.765625, "learning_rate": 9.933181154743971e-06, "loss": 0.8532, "step": 8835 }, { "epoch": 1.5030756370962557, "grad_norm": 1.625, "learning_rate": 9.931375267983799e-06, "loss": 0.8653, "step": 8836 }, { "epoch": 1.5032471011852455, "grad_norm": 1.71875, "learning_rate": 9.929569383461738e-06, "loss": 0.8524, "step": 8837 }, { "epoch": 1.5034185652742353, "grad_norm": 1.609375, "learning_rate": 9.927763501236685e-06, "loss": 0.8179, "step": 8838 }, { "epoch": 1.5035900293632252, "grad_norm": 1.703125, "learning_rate": 9.925957621367538e-06, "loss": 0.841, "step": 8839 }, { "epoch": 1.503761493452215, "grad_norm": 1.6796875, "learning_rate": 9.92415174391319e-06, "loss": 0.8189, "step": 8840 }, { "epoch": 1.5039329575412048, "grad_norm": 1.6953125, "learning_rate": 9.92234586893254e-06, "loss": 0.7867, "step": 8841 }, { "epoch": 1.5041044216301949, "grad_norm": 1.625, "learning_rate": 9.920539996484482e-06, "loss": 0.8511, "step": 8842 }, { "epoch": 1.5042758857191847, "grad_norm": 1.640625, "learning_rate": 9.918734126627914e-06, "loss": 0.8939, "step": 8843 }, { "epoch": 1.5044473498081745, "grad_norm": 1.7421875, "learning_rate": 9.916928259421733e-06, "loss": 0.8351, "step": 8844 }, { "epoch": 1.5046188138971646, "grad_norm": 1.7109375, "learning_rate": 9.915122394924834e-06, "loss": 0.8679, "step": 8845 }, { "epoch": 1.5047902779861544, "grad_norm": 1.7578125, "learning_rate": 9.913316533196113e-06, "loss": 0.8887, "step": 8846 }, { "epoch": 1.5049617420751442, "grad_norm": 1.6875, "learning_rate": 9.911510674294468e-06, "loss": 0.8625, "step": 8847 }, { "epoch": 1.505133206164134, "grad_norm": 1.65625, "learning_rate": 9.909704818278791e-06, "loss": 0.8188, "step": 8848 }, { "epoch": 1.5053046702531239, "grad_norm": 1.703125, "learning_rate": 9.907898965207982e-06, "loss": 0.8792, "step": 8849 }, { "epoch": 1.5054761343421137, "grad_norm": 1.7265625, "learning_rate": 9.906093115140935e-06, "loss": 0.8196, "step": 8850 }, { "epoch": 1.5056475984311035, "grad_norm": 1.6640625, "learning_rate": 9.904287268136544e-06, "loss": 0.8907, "step": 8851 }, { "epoch": 1.5058190625200933, "grad_norm": 1.640625, "learning_rate": 9.902481424253704e-06, "loss": 0.8197, "step": 8852 }, { "epoch": 1.5059905266090832, "grad_norm": 1.6796875, "learning_rate": 9.900675583551314e-06, "loss": 0.9056, "step": 8853 }, { "epoch": 1.5061619906980732, "grad_norm": 1.671875, "learning_rate": 9.898869746088266e-06, "loss": 0.8384, "step": 8854 }, { "epoch": 1.506333454787063, "grad_norm": 1.6953125, "learning_rate": 9.897063911923456e-06, "loss": 0.9468, "step": 8855 }, { "epoch": 1.5065049188760529, "grad_norm": 1.640625, "learning_rate": 9.89525808111578e-06, "loss": 0.828, "step": 8856 }, { "epoch": 1.506676382965043, "grad_norm": 1.6171875, "learning_rate": 9.893452253724133e-06, "loss": 0.7832, "step": 8857 }, { "epoch": 1.5068478470540327, "grad_norm": 1.640625, "learning_rate": 9.891646429807409e-06, "loss": 0.8636, "step": 8858 }, { "epoch": 1.5070193111430226, "grad_norm": 1.71875, "learning_rate": 9.889840609424502e-06, "loss": 0.8303, "step": 8859 }, { "epoch": 1.5071907752320124, "grad_norm": 1.703125, "learning_rate": 9.888034792634308e-06, "loss": 0.7927, "step": 8860 }, { "epoch": 1.5073622393210022, "grad_norm": 1.7578125, "learning_rate": 9.88622897949572e-06, "loss": 0.8017, "step": 8861 }, { "epoch": 1.507533703409992, "grad_norm": 1.796875, "learning_rate": 9.884423170067639e-06, "loss": 0.9169, "step": 8862 }, { "epoch": 1.5077051674989819, "grad_norm": 1.6796875, "learning_rate": 9.88261736440895e-06, "loss": 0.8707, "step": 8863 }, { "epoch": 1.5078766315879717, "grad_norm": 1.7265625, "learning_rate": 9.880811562578549e-06, "loss": 0.9191, "step": 8864 }, { "epoch": 1.5080480956769615, "grad_norm": 1.734375, "learning_rate": 9.879005764635334e-06, "loss": 0.8502, "step": 8865 }, { "epoch": 1.5082195597659516, "grad_norm": 1.6640625, "learning_rate": 9.877199970638195e-06, "loss": 0.8907, "step": 8866 }, { "epoch": 1.5083910238549414, "grad_norm": 1.6640625, "learning_rate": 9.875394180646028e-06, "loss": 0.8445, "step": 8867 }, { "epoch": 1.5085624879439312, "grad_norm": 1.7265625, "learning_rate": 9.873588394717726e-06, "loss": 0.87, "step": 8868 }, { "epoch": 1.5087339520329213, "grad_norm": 1.703125, "learning_rate": 9.87178261291218e-06, "loss": 0.8434, "step": 8869 }, { "epoch": 1.508905416121911, "grad_norm": 1.65625, "learning_rate": 9.869976835288288e-06, "loss": 0.7995, "step": 8870 }, { "epoch": 1.509076880210901, "grad_norm": 1.640625, "learning_rate": 9.868171061904941e-06, "loss": 0.9007, "step": 8871 }, { "epoch": 1.5092483442998907, "grad_norm": 1.828125, "learning_rate": 9.866365292821032e-06, "loss": 0.8768, "step": 8872 }, { "epoch": 1.5094198083888806, "grad_norm": 1.7265625, "learning_rate": 9.864559528095459e-06, "loss": 0.8668, "step": 8873 }, { "epoch": 1.5095912724778704, "grad_norm": 1.640625, "learning_rate": 9.862753767787106e-06, "loss": 0.9018, "step": 8874 }, { "epoch": 1.5097627365668602, "grad_norm": 1.6953125, "learning_rate": 9.860948011954868e-06, "loss": 0.8425, "step": 8875 }, { "epoch": 1.50993420065585, "grad_norm": 1.6875, "learning_rate": 9.859142260657639e-06, "loss": 0.7678, "step": 8876 }, { "epoch": 1.5101056647448399, "grad_norm": 1.6875, "learning_rate": 9.85733651395431e-06, "loss": 0.8245, "step": 8877 }, { "epoch": 1.51027712883383, "grad_norm": 1.6875, "learning_rate": 9.855530771903776e-06, "loss": 0.8217, "step": 8878 }, { "epoch": 1.5104485929228197, "grad_norm": 1.6953125, "learning_rate": 9.853725034564928e-06, "loss": 0.8625, "step": 8879 }, { "epoch": 1.5106200570118096, "grad_norm": 1.75, "learning_rate": 9.851919301996658e-06, "loss": 0.8898, "step": 8880 }, { "epoch": 1.5107915211007996, "grad_norm": 1.6875, "learning_rate": 9.850113574257857e-06, "loss": 0.7756, "step": 8881 }, { "epoch": 1.5109629851897894, "grad_norm": 1.7578125, "learning_rate": 9.848307851407414e-06, "loss": 0.8532, "step": 8882 }, { "epoch": 1.5111344492787793, "grad_norm": 1.6796875, "learning_rate": 9.846502133504227e-06, "loss": 0.8205, "step": 8883 }, { "epoch": 1.511305913367769, "grad_norm": 1.6796875, "learning_rate": 9.844696420607181e-06, "loss": 0.8913, "step": 8884 }, { "epoch": 1.511477377456759, "grad_norm": 1.65625, "learning_rate": 9.842890712775172e-06, "loss": 0.8602, "step": 8885 }, { "epoch": 1.5116488415457487, "grad_norm": 1.5859375, "learning_rate": 9.841085010067089e-06, "loss": 0.7896, "step": 8886 }, { "epoch": 1.5118203056347386, "grad_norm": 1.65625, "learning_rate": 9.83927931254182e-06, "loss": 0.8216, "step": 8887 }, { "epoch": 1.5119917697237284, "grad_norm": 1.734375, "learning_rate": 9.837473620258258e-06, "loss": 0.8148, "step": 8888 }, { "epoch": 1.5121632338127182, "grad_norm": 1.6484375, "learning_rate": 9.835667933275292e-06, "loss": 0.7759, "step": 8889 }, { "epoch": 1.512334697901708, "grad_norm": 1.828125, "learning_rate": 9.833862251651815e-06, "loss": 0.8444, "step": 8890 }, { "epoch": 1.512506161990698, "grad_norm": 1.625, "learning_rate": 9.832056575446715e-06, "loss": 0.8576, "step": 8891 }, { "epoch": 1.512677626079688, "grad_norm": 1.5703125, "learning_rate": 9.830250904718884e-06, "loss": 0.8579, "step": 8892 }, { "epoch": 1.5128490901686777, "grad_norm": 1.8046875, "learning_rate": 9.82844523952721e-06, "loss": 0.9643, "step": 8893 }, { "epoch": 1.5130205542576678, "grad_norm": 1.5703125, "learning_rate": 9.82663957993058e-06, "loss": 0.8405, "step": 8894 }, { "epoch": 1.5131920183466576, "grad_norm": 1.8515625, "learning_rate": 9.824833925987888e-06, "loss": 0.9236, "step": 8895 }, { "epoch": 1.5133634824356474, "grad_norm": 1.6328125, "learning_rate": 9.823028277758021e-06, "loss": 0.8099, "step": 8896 }, { "epoch": 1.5135349465246373, "grad_norm": 1.6953125, "learning_rate": 9.821222635299871e-06, "loss": 0.8192, "step": 8897 }, { "epoch": 1.513706410613627, "grad_norm": 1.65625, "learning_rate": 9.819416998672323e-06, "loss": 0.8839, "step": 8898 }, { "epoch": 1.513877874702617, "grad_norm": 1.6015625, "learning_rate": 9.817611367934264e-06, "loss": 0.7973, "step": 8899 }, { "epoch": 1.5140493387916067, "grad_norm": 1.703125, "learning_rate": 9.815805743144587e-06, "loss": 0.7841, "step": 8900 }, { "epoch": 1.5142208028805966, "grad_norm": 1.8203125, "learning_rate": 9.814000124362178e-06, "loss": 0.8855, "step": 8901 }, { "epoch": 1.5143922669695864, "grad_norm": 1.671875, "learning_rate": 9.812194511645924e-06, "loss": 0.7829, "step": 8902 }, { "epoch": 1.5145637310585764, "grad_norm": 1.796875, "learning_rate": 9.810388905054715e-06, "loss": 0.8709, "step": 8903 }, { "epoch": 1.5147351951475663, "grad_norm": 1.7109375, "learning_rate": 9.808583304647439e-06, "loss": 0.8896, "step": 8904 }, { "epoch": 1.514906659236556, "grad_norm": 1.671875, "learning_rate": 9.80677771048298e-06, "loss": 0.8687, "step": 8905 }, { "epoch": 1.5150781233255461, "grad_norm": 1.6484375, "learning_rate": 9.80497212262023e-06, "loss": 0.7913, "step": 8906 }, { "epoch": 1.515249587414536, "grad_norm": 1.765625, "learning_rate": 9.803166541118072e-06, "loss": 0.81, "step": 8907 }, { "epoch": 1.5154210515035258, "grad_norm": 1.6796875, "learning_rate": 9.801360966035396e-06, "loss": 0.807, "step": 8908 }, { "epoch": 1.5155925155925156, "grad_norm": 1.8125, "learning_rate": 9.799555397431086e-06, "loss": 0.8345, "step": 8909 }, { "epoch": 1.5157639796815054, "grad_norm": 1.71875, "learning_rate": 9.797749835364028e-06, "loss": 0.8034, "step": 8910 }, { "epoch": 1.5159354437704953, "grad_norm": 1.765625, "learning_rate": 9.79594427989311e-06, "loss": 0.7871, "step": 8911 }, { "epoch": 1.516106907859485, "grad_norm": 1.6953125, "learning_rate": 9.794138731077219e-06, "loss": 0.9255, "step": 8912 }, { "epoch": 1.516278371948475, "grad_norm": 1.734375, "learning_rate": 9.792333188975237e-06, "loss": 0.9397, "step": 8913 }, { "epoch": 1.5164498360374647, "grad_norm": 1.71875, "learning_rate": 9.790527653646053e-06, "loss": 0.8087, "step": 8914 }, { "epoch": 1.5166213001264548, "grad_norm": 1.703125, "learning_rate": 9.78872212514855e-06, "loss": 0.8213, "step": 8915 }, { "epoch": 1.5167927642154446, "grad_norm": 1.703125, "learning_rate": 9.786916603541614e-06, "loss": 0.8722, "step": 8916 }, { "epoch": 1.5169642283044344, "grad_norm": 1.7421875, "learning_rate": 9.78511108888413e-06, "loss": 0.885, "step": 8917 }, { "epoch": 1.5171356923934245, "grad_norm": 1.6640625, "learning_rate": 9.783305581234983e-06, "loss": 0.8474, "step": 8918 }, { "epoch": 1.5173071564824143, "grad_norm": 1.6796875, "learning_rate": 9.781500080653057e-06, "loss": 0.7978, "step": 8919 }, { "epoch": 1.5174786205714041, "grad_norm": 1.6171875, "learning_rate": 9.77969458719724e-06, "loss": 0.7995, "step": 8920 }, { "epoch": 1.517650084660394, "grad_norm": 1.6015625, "learning_rate": 9.777889100926408e-06, "loss": 0.8285, "step": 8921 }, { "epoch": 1.5178215487493838, "grad_norm": 1.59375, "learning_rate": 9.77608362189945e-06, "loss": 0.8133, "step": 8922 }, { "epoch": 1.5179930128383736, "grad_norm": 1.6484375, "learning_rate": 9.774278150175249e-06, "loss": 0.8127, "step": 8923 }, { "epoch": 1.5181644769273634, "grad_norm": 1.640625, "learning_rate": 9.772472685812682e-06, "loss": 0.8633, "step": 8924 }, { "epoch": 1.5183359410163533, "grad_norm": 1.6796875, "learning_rate": 9.770667228870643e-06, "loss": 0.9239, "step": 8925 }, { "epoch": 1.518507405105343, "grad_norm": 1.640625, "learning_rate": 9.768861779408006e-06, "loss": 0.942, "step": 8926 }, { "epoch": 1.5186788691943331, "grad_norm": 1.671875, "learning_rate": 9.767056337483659e-06, "loss": 0.7934, "step": 8927 }, { "epoch": 1.518850333283323, "grad_norm": 1.6953125, "learning_rate": 9.765250903156482e-06, "loss": 0.8728, "step": 8928 }, { "epoch": 1.5190217973723128, "grad_norm": 1.625, "learning_rate": 9.763445476485357e-06, "loss": 0.8055, "step": 8929 }, { "epoch": 1.5191932614613028, "grad_norm": 1.6875, "learning_rate": 9.761640057529164e-06, "loss": 0.8335, "step": 8930 }, { "epoch": 1.5193647255502927, "grad_norm": 1.6875, "learning_rate": 9.759834646346794e-06, "loss": 0.9022, "step": 8931 }, { "epoch": 1.5195361896392825, "grad_norm": 1.6875, "learning_rate": 9.758029242997112e-06, "loss": 0.8732, "step": 8932 }, { "epoch": 1.5197076537282723, "grad_norm": 1.7890625, "learning_rate": 9.75622384753901e-06, "loss": 0.8624, "step": 8933 }, { "epoch": 1.5198791178172621, "grad_norm": 1.734375, "learning_rate": 9.754418460031365e-06, "loss": 0.8726, "step": 8934 }, { "epoch": 1.520050581906252, "grad_norm": 1.71875, "learning_rate": 9.75261308053306e-06, "loss": 0.8423, "step": 8935 }, { "epoch": 1.5202220459952418, "grad_norm": 1.65625, "learning_rate": 9.750807709102974e-06, "loss": 0.8287, "step": 8936 }, { "epoch": 1.5203935100842316, "grad_norm": 1.703125, "learning_rate": 9.749002345799988e-06, "loss": 0.8095, "step": 8937 }, { "epoch": 1.5205649741732215, "grad_norm": 1.6796875, "learning_rate": 9.747196990682979e-06, "loss": 0.8468, "step": 8938 }, { "epoch": 1.5207364382622115, "grad_norm": 1.71875, "learning_rate": 9.74539164381083e-06, "loss": 0.88, "step": 8939 }, { "epoch": 1.5209079023512013, "grad_norm": 1.671875, "learning_rate": 9.743586305242417e-06, "loss": 0.8588, "step": 8940 }, { "epoch": 1.5210793664401911, "grad_norm": 1.78125, "learning_rate": 9.74178097503662e-06, "loss": 0.8929, "step": 8941 }, { "epoch": 1.5212508305291812, "grad_norm": 1.6640625, "learning_rate": 9.739975653252317e-06, "loss": 0.9824, "step": 8942 }, { "epoch": 1.521422294618171, "grad_norm": 1.65625, "learning_rate": 9.73817033994839e-06, "loss": 0.8122, "step": 8943 }, { "epoch": 1.5215937587071608, "grad_norm": 1.703125, "learning_rate": 9.736365035183712e-06, "loss": 0.824, "step": 8944 }, { "epoch": 1.5217652227961507, "grad_norm": 1.703125, "learning_rate": 9.734559739017162e-06, "loss": 0.8004, "step": 8945 }, { "epoch": 1.5219366868851405, "grad_norm": 1.671875, "learning_rate": 9.732754451507621e-06, "loss": 0.7771, "step": 8946 }, { "epoch": 1.5221081509741303, "grad_norm": 1.7421875, "learning_rate": 9.730949172713961e-06, "loss": 0.9023, "step": 8947 }, { "epoch": 1.5222796150631202, "grad_norm": 1.703125, "learning_rate": 9.729143902695064e-06, "loss": 0.8699, "step": 8948 }, { "epoch": 1.52245107915211, "grad_norm": 1.7265625, "learning_rate": 9.727338641509804e-06, "loss": 0.9692, "step": 8949 }, { "epoch": 1.5226225432410998, "grad_norm": 2.0, "learning_rate": 9.725533389217056e-06, "loss": 0.8999, "step": 8950 }, { "epoch": 1.5227940073300898, "grad_norm": 1.7109375, "learning_rate": 9.723728145875699e-06, "loss": 0.8602, "step": 8951 }, { "epoch": 1.5229654714190797, "grad_norm": 1.8671875, "learning_rate": 9.721922911544607e-06, "loss": 0.888, "step": 8952 }, { "epoch": 1.5231369355080695, "grad_norm": 1.7421875, "learning_rate": 9.720117686282657e-06, "loss": 0.8557, "step": 8953 }, { "epoch": 1.5233083995970595, "grad_norm": 1.6015625, "learning_rate": 9.718312470148723e-06, "loss": 0.8532, "step": 8954 }, { "epoch": 1.5234798636860494, "grad_norm": 1.7578125, "learning_rate": 9.71650726320168e-06, "loss": 0.9081, "step": 8955 }, { "epoch": 1.5236513277750392, "grad_norm": 1.7109375, "learning_rate": 9.714702065500401e-06, "loss": 0.8824, "step": 8956 }, { "epoch": 1.523822791864029, "grad_norm": 1.703125, "learning_rate": 9.712896877103764e-06, "loss": 0.878, "step": 8957 }, { "epoch": 1.5239942559530189, "grad_norm": 1.8125, "learning_rate": 9.71109169807064e-06, "loss": 0.9402, "step": 8958 }, { "epoch": 1.5241657200420087, "grad_norm": 1.6328125, "learning_rate": 9.709286528459904e-06, "loss": 0.7948, "step": 8959 }, { "epoch": 1.5243371841309985, "grad_norm": 1.59375, "learning_rate": 9.707481368330428e-06, "loss": 0.6961, "step": 8960 }, { "epoch": 1.5245086482199883, "grad_norm": 1.6796875, "learning_rate": 9.705676217741087e-06, "loss": 0.8052, "step": 8961 }, { "epoch": 1.5246801123089782, "grad_norm": 1.6796875, "learning_rate": 9.703871076750752e-06, "loss": 0.8027, "step": 8962 }, { "epoch": 1.5248515763979682, "grad_norm": 1.7421875, "learning_rate": 9.702065945418295e-06, "loss": 0.9244, "step": 8963 }, { "epoch": 1.525023040486958, "grad_norm": 1.7265625, "learning_rate": 9.700260823802592e-06, "loss": 0.8807, "step": 8964 }, { "epoch": 1.5251945045759479, "grad_norm": 1.671875, "learning_rate": 9.698455711962511e-06, "loss": 0.9256, "step": 8965 }, { "epoch": 1.525365968664938, "grad_norm": 1.796875, "learning_rate": 9.696650609956931e-06, "loss": 0.8056, "step": 8966 }, { "epoch": 1.5255374327539277, "grad_norm": 1.71875, "learning_rate": 9.69484551784471e-06, "loss": 0.807, "step": 8967 }, { "epoch": 1.5257088968429176, "grad_norm": 1.6875, "learning_rate": 9.693040435684727e-06, "loss": 0.9571, "step": 8968 }, { "epoch": 1.5258803609319074, "grad_norm": 1.671875, "learning_rate": 9.691235363535852e-06, "loss": 0.8819, "step": 8969 }, { "epoch": 1.5260518250208972, "grad_norm": 1.703125, "learning_rate": 9.689430301456954e-06, "loss": 0.828, "step": 8970 }, { "epoch": 1.526223289109887, "grad_norm": 1.5859375, "learning_rate": 9.6876252495069e-06, "loss": 0.8439, "step": 8971 }, { "epoch": 1.5263947531988769, "grad_norm": 1.71875, "learning_rate": 9.685820207744565e-06, "loss": 0.8378, "step": 8972 }, { "epoch": 1.5265662172878667, "grad_norm": 1.6640625, "learning_rate": 9.68401517622882e-06, "loss": 0.9048, "step": 8973 }, { "epoch": 1.5267376813768565, "grad_norm": 1.734375, "learning_rate": 9.682210155018526e-06, "loss": 0.8459, "step": 8974 }, { "epoch": 1.5269091454658466, "grad_norm": 1.71875, "learning_rate": 9.680405144172556e-06, "loss": 0.79, "step": 8975 }, { "epoch": 1.5270806095548364, "grad_norm": 1.6640625, "learning_rate": 9.67860014374978e-06, "loss": 0.8598, "step": 8976 }, { "epoch": 1.5272520736438262, "grad_norm": 1.6640625, "learning_rate": 9.676795153809065e-06, "loss": 0.783, "step": 8977 }, { "epoch": 1.527423537732816, "grad_norm": 1.71875, "learning_rate": 9.674990174409275e-06, "loss": 0.8452, "step": 8978 }, { "epoch": 1.527595001821806, "grad_norm": 1.8359375, "learning_rate": 9.673185205609277e-06, "loss": 0.8368, "step": 8979 }, { "epoch": 1.527766465910796, "grad_norm": 1.75, "learning_rate": 9.67138024746794e-06, "loss": 0.8541, "step": 8980 }, { "epoch": 1.5279379299997857, "grad_norm": 1.6875, "learning_rate": 9.669575300044133e-06, "loss": 0.9475, "step": 8981 }, { "epoch": 1.5281093940887756, "grad_norm": 1.7421875, "learning_rate": 9.667770363396718e-06, "loss": 0.8133, "step": 8982 }, { "epoch": 1.5282808581777654, "grad_norm": 1.78125, "learning_rate": 9.665965437584564e-06, "loss": 0.8793, "step": 8983 }, { "epoch": 1.5284523222667552, "grad_norm": 1.6796875, "learning_rate": 9.664160522666534e-06, "loss": 0.885, "step": 8984 }, { "epoch": 1.528623786355745, "grad_norm": 1.6953125, "learning_rate": 9.662355618701495e-06, "loss": 0.8527, "step": 8985 }, { "epoch": 1.5287952504447349, "grad_norm": 1.65625, "learning_rate": 9.66055072574831e-06, "loss": 0.8093, "step": 8986 }, { "epoch": 1.5289667145337247, "grad_norm": 1.671875, "learning_rate": 9.658745843865845e-06, "loss": 0.8133, "step": 8987 }, { "epoch": 1.5291381786227147, "grad_norm": 1.71875, "learning_rate": 9.656940973112961e-06, "loss": 0.8026, "step": 8988 }, { "epoch": 1.5293096427117046, "grad_norm": 1.6875, "learning_rate": 9.655136113548528e-06, "loss": 0.7791, "step": 8989 }, { "epoch": 1.5294811068006944, "grad_norm": 1.7734375, "learning_rate": 9.6533312652314e-06, "loss": 0.9006, "step": 8990 }, { "epoch": 1.5296525708896844, "grad_norm": 1.6796875, "learning_rate": 9.65152642822045e-06, "loss": 0.9335, "step": 8991 }, { "epoch": 1.5298240349786743, "grad_norm": 1.765625, "learning_rate": 9.649721602574532e-06, "loss": 0.8893, "step": 8992 }, { "epoch": 1.529995499067664, "grad_norm": 1.6796875, "learning_rate": 9.64791678835251e-06, "loss": 0.8333, "step": 8993 }, { "epoch": 1.530166963156654, "grad_norm": 1.6875, "learning_rate": 9.64611198561325e-06, "loss": 0.9223, "step": 8994 }, { "epoch": 1.5303384272456437, "grad_norm": 1.640625, "learning_rate": 9.64430719441561e-06, "loss": 0.809, "step": 8995 }, { "epoch": 1.5305098913346336, "grad_norm": 1.578125, "learning_rate": 9.642502414818452e-06, "loss": 0.805, "step": 8996 }, { "epoch": 1.5306813554236234, "grad_norm": 1.84375, "learning_rate": 9.640697646880636e-06, "loss": 0.824, "step": 8997 }, { "epoch": 1.5308528195126132, "grad_norm": 1.734375, "learning_rate": 9.638892890661022e-06, "loss": 0.8221, "step": 8998 }, { "epoch": 1.531024283601603, "grad_norm": 1.59375, "learning_rate": 9.637088146218471e-06, "loss": 0.7839, "step": 8999 }, { "epoch": 1.531195747690593, "grad_norm": 1.6484375, "learning_rate": 9.635283413611846e-06, "loss": 0.8316, "step": 9000 }, { "epoch": 1.531367211779583, "grad_norm": 1.734375, "learning_rate": 9.633478692899998e-06, "loss": 0.8849, "step": 9001 }, { "epoch": 1.5315386758685727, "grad_norm": 1.7421875, "learning_rate": 9.631673984141792e-06, "loss": 0.9323, "step": 9002 }, { "epoch": 1.5317101399575628, "grad_norm": 1.78125, "learning_rate": 9.629869287396083e-06, "loss": 0.8271, "step": 9003 }, { "epoch": 1.5318816040465526, "grad_norm": 1.7734375, "learning_rate": 9.62806460272173e-06, "loss": 0.9224, "step": 9004 }, { "epoch": 1.5320530681355424, "grad_norm": 1.609375, "learning_rate": 9.626259930177592e-06, "loss": 0.8317, "step": 9005 }, { "epoch": 1.5322245322245323, "grad_norm": 1.7421875, "learning_rate": 9.624455269822526e-06, "loss": 0.8575, "step": 9006 }, { "epoch": 1.532395996313522, "grad_norm": 1.7578125, "learning_rate": 9.622650621715385e-06, "loss": 0.8243, "step": 9007 }, { "epoch": 1.532567460402512, "grad_norm": 1.734375, "learning_rate": 9.62084598591503e-06, "loss": 0.8792, "step": 9008 }, { "epoch": 1.5327389244915017, "grad_norm": 1.71875, "learning_rate": 9.619041362480314e-06, "loss": 0.9154, "step": 9009 }, { "epoch": 1.5329103885804916, "grad_norm": 1.9140625, "learning_rate": 9.617236751470094e-06, "loss": 0.9161, "step": 9010 }, { "epoch": 1.5330818526694814, "grad_norm": 1.6328125, "learning_rate": 9.615432152943225e-06, "loss": 0.8368, "step": 9011 }, { "epoch": 1.5332533167584714, "grad_norm": 1.65625, "learning_rate": 9.613627566958567e-06, "loss": 0.828, "step": 9012 }, { "epoch": 1.5334247808474613, "grad_norm": 1.703125, "learning_rate": 9.611822993574963e-06, "loss": 0.8458, "step": 9013 }, { "epoch": 1.533596244936451, "grad_norm": 1.6171875, "learning_rate": 9.610018432851275e-06, "loss": 0.7959, "step": 9014 }, { "epoch": 1.5337677090254411, "grad_norm": 1.734375, "learning_rate": 9.608213884846353e-06, "loss": 0.9074, "step": 9015 }, { "epoch": 1.533939173114431, "grad_norm": 1.65625, "learning_rate": 9.60640934961905e-06, "loss": 0.7942, "step": 9016 }, { "epoch": 1.5341106372034208, "grad_norm": 1.71875, "learning_rate": 9.604604827228225e-06, "loss": 0.9097, "step": 9017 }, { "epoch": 1.5342821012924106, "grad_norm": 1.6640625, "learning_rate": 9.602800317732718e-06, "loss": 0.886, "step": 9018 }, { "epoch": 1.5344535653814004, "grad_norm": 1.625, "learning_rate": 9.600995821191395e-06, "loss": 0.7985, "step": 9019 }, { "epoch": 1.5346250294703903, "grad_norm": 1.7578125, "learning_rate": 9.599191337663098e-06, "loss": 0.82, "step": 9020 }, { "epoch": 1.53479649355938, "grad_norm": 1.7421875, "learning_rate": 9.597386867206683e-06, "loss": 0.9522, "step": 9021 }, { "epoch": 1.53496795764837, "grad_norm": 1.8515625, "learning_rate": 9.595582409880996e-06, "loss": 0.9203, "step": 9022 }, { "epoch": 1.5351394217373597, "grad_norm": 1.7421875, "learning_rate": 9.593777965744894e-06, "loss": 0.8255, "step": 9023 }, { "epoch": 1.5353108858263498, "grad_norm": 1.78125, "learning_rate": 9.59197353485722e-06, "loss": 0.8641, "step": 9024 }, { "epoch": 1.5354823499153396, "grad_norm": 1.765625, "learning_rate": 9.590169117276825e-06, "loss": 0.8695, "step": 9025 }, { "epoch": 1.5356538140043294, "grad_norm": 1.7421875, "learning_rate": 9.588364713062555e-06, "loss": 0.8724, "step": 9026 }, { "epoch": 1.5358252780933195, "grad_norm": 1.828125, "learning_rate": 9.586560322273264e-06, "loss": 0.8957, "step": 9027 }, { "epoch": 1.5359967421823093, "grad_norm": 1.640625, "learning_rate": 9.584755944967798e-06, "loss": 0.819, "step": 9028 }, { "epoch": 1.5361682062712991, "grad_norm": 1.703125, "learning_rate": 9.582951581205005e-06, "loss": 0.8256, "step": 9029 }, { "epoch": 1.536339670360289, "grad_norm": 1.8515625, "learning_rate": 9.581147231043731e-06, "loss": 0.8678, "step": 9030 }, { "epoch": 1.5365111344492788, "grad_norm": 1.6953125, "learning_rate": 9.579342894542822e-06, "loss": 0.9207, "step": 9031 }, { "epoch": 1.5366825985382686, "grad_norm": 1.7890625, "learning_rate": 9.577538571761126e-06, "loss": 0.7908, "step": 9032 }, { "epoch": 1.5368540626272584, "grad_norm": 1.640625, "learning_rate": 9.575734262757488e-06, "loss": 0.8578, "step": 9033 }, { "epoch": 1.5370255267162483, "grad_norm": 1.671875, "learning_rate": 9.573929967590755e-06, "loss": 0.8436, "step": 9034 }, { "epoch": 1.537196990805238, "grad_norm": 1.734375, "learning_rate": 9.57212568631977e-06, "loss": 0.8757, "step": 9035 }, { "epoch": 1.5373684548942281, "grad_norm": 1.671875, "learning_rate": 9.570321419003375e-06, "loss": 0.8262, "step": 9036 }, { "epoch": 1.537539918983218, "grad_norm": 1.7265625, "learning_rate": 9.568517165700419e-06, "loss": 0.8632, "step": 9037 }, { "epoch": 1.5377113830722078, "grad_norm": 1.65625, "learning_rate": 9.56671292646974e-06, "loss": 0.8484, "step": 9038 }, { "epoch": 1.5378828471611978, "grad_norm": 1.765625, "learning_rate": 9.564908701370186e-06, "loss": 0.9305, "step": 9039 }, { "epoch": 1.5380543112501877, "grad_norm": 1.640625, "learning_rate": 9.563104490460599e-06, "loss": 0.8929, "step": 9040 }, { "epoch": 1.5382257753391775, "grad_norm": 1.6484375, "learning_rate": 9.561300293799818e-06, "loss": 0.8637, "step": 9041 }, { "epoch": 1.5383972394281673, "grad_norm": 1.671875, "learning_rate": 9.559496111446686e-06, "loss": 0.7468, "step": 9042 }, { "epoch": 1.5385687035171571, "grad_norm": 1.7265625, "learning_rate": 9.557691943460046e-06, "loss": 0.8259, "step": 9043 }, { "epoch": 1.538740167606147, "grad_norm": 1.640625, "learning_rate": 9.555887789898737e-06, "loss": 0.8728, "step": 9044 }, { "epoch": 1.5389116316951368, "grad_norm": 1.8125, "learning_rate": 9.554083650821598e-06, "loss": 0.858, "step": 9045 }, { "epoch": 1.5390830957841266, "grad_norm": 1.7421875, "learning_rate": 9.552279526287472e-06, "loss": 0.8959, "step": 9046 }, { "epoch": 1.5392545598731164, "grad_norm": 1.6796875, "learning_rate": 9.550475416355199e-06, "loss": 0.8063, "step": 9047 }, { "epoch": 1.5394260239621065, "grad_norm": 1.65625, "learning_rate": 9.548671321083612e-06, "loss": 0.7583, "step": 9048 }, { "epoch": 1.5395974880510963, "grad_norm": 1.65625, "learning_rate": 9.546867240531553e-06, "loss": 0.91, "step": 9049 }, { "epoch": 1.5397689521400861, "grad_norm": 1.6953125, "learning_rate": 9.54506317475786e-06, "loss": 0.8993, "step": 9050 }, { "epoch": 1.5399404162290762, "grad_norm": 1.828125, "learning_rate": 9.54325912382137e-06, "loss": 0.8975, "step": 9051 }, { "epoch": 1.540111880318066, "grad_norm": 1.7109375, "learning_rate": 9.54145508778092e-06, "loss": 0.881, "step": 9052 }, { "epoch": 1.5402833444070558, "grad_norm": 1.609375, "learning_rate": 9.539651066695346e-06, "loss": 0.8008, "step": 9053 }, { "epoch": 1.5404548084960457, "grad_norm": 1.6796875, "learning_rate": 9.537847060623484e-06, "loss": 0.845, "step": 9054 }, { "epoch": 1.5406262725850355, "grad_norm": 1.7109375, "learning_rate": 9.53604306962417e-06, "loss": 0.8601, "step": 9055 }, { "epoch": 1.5407977366740253, "grad_norm": 1.7421875, "learning_rate": 9.53423909375624e-06, "loss": 0.8381, "step": 9056 }, { "epoch": 1.5409692007630151, "grad_norm": 1.75, "learning_rate": 9.532435133078523e-06, "loss": 0.9294, "step": 9057 }, { "epoch": 1.541140664852005, "grad_norm": 1.6796875, "learning_rate": 9.530631187649864e-06, "loss": 0.7982, "step": 9058 }, { "epoch": 1.5413121289409948, "grad_norm": 1.7265625, "learning_rate": 9.528827257529085e-06, "loss": 0.8474, "step": 9059 }, { "epoch": 1.5414835930299848, "grad_norm": 1.703125, "learning_rate": 9.527023342775023e-06, "loss": 0.8985, "step": 9060 }, { "epoch": 1.5416550571189747, "grad_norm": 1.7109375, "learning_rate": 9.525219443446512e-06, "loss": 0.909, "step": 9061 }, { "epoch": 1.5418265212079645, "grad_norm": 1.625, "learning_rate": 9.523415559602383e-06, "loss": 0.8313, "step": 9062 }, { "epoch": 1.5419979852969545, "grad_norm": 1.6328125, "learning_rate": 9.521611691301465e-06, "loss": 0.8706, "step": 9063 }, { "epoch": 1.5421694493859444, "grad_norm": 1.7109375, "learning_rate": 9.519807838602593e-06, "loss": 0.8734, "step": 9064 }, { "epoch": 1.5423409134749342, "grad_norm": 1.7265625, "learning_rate": 9.518004001564592e-06, "loss": 0.9018, "step": 9065 }, { "epoch": 1.542512377563924, "grad_norm": 1.7421875, "learning_rate": 9.5162001802463e-06, "loss": 0.8821, "step": 9066 }, { "epoch": 1.5426838416529138, "grad_norm": 1.65625, "learning_rate": 9.514396374706541e-06, "loss": 0.8449, "step": 9067 }, { "epoch": 1.5428553057419037, "grad_norm": 1.6875, "learning_rate": 9.512592585004144e-06, "loss": 0.9382, "step": 9068 }, { "epoch": 1.5430267698308935, "grad_norm": 1.609375, "learning_rate": 9.510788811197939e-06, "loss": 0.7869, "step": 9069 }, { "epoch": 1.5431982339198833, "grad_norm": 1.6328125, "learning_rate": 9.508985053346757e-06, "loss": 0.8338, "step": 9070 }, { "epoch": 1.5433696980088731, "grad_norm": 1.6953125, "learning_rate": 9.507181311509416e-06, "loss": 0.7595, "step": 9071 }, { "epoch": 1.5435411620978632, "grad_norm": 1.609375, "learning_rate": 9.505377585744751e-06, "loss": 0.9012, "step": 9072 }, { "epoch": 1.543712626186853, "grad_norm": 1.6484375, "learning_rate": 9.503573876111581e-06, "loss": 0.8346, "step": 9073 }, { "epoch": 1.5438840902758428, "grad_norm": 1.6796875, "learning_rate": 9.501770182668739e-06, "loss": 0.8816, "step": 9074 }, { "epoch": 1.5440555543648327, "grad_norm": 1.546875, "learning_rate": 9.499966505475049e-06, "loss": 0.7961, "step": 9075 }, { "epoch": 1.5442270184538227, "grad_norm": 1.78125, "learning_rate": 9.498162844589333e-06, "loss": 0.8428, "step": 9076 }, { "epoch": 1.5443984825428125, "grad_norm": 1.7421875, "learning_rate": 9.496359200070416e-06, "loss": 0.9426, "step": 9077 }, { "epoch": 1.5445699466318024, "grad_norm": 1.6953125, "learning_rate": 9.494555571977122e-06, "loss": 0.823, "step": 9078 }, { "epoch": 1.5447414107207922, "grad_norm": 1.703125, "learning_rate": 9.492751960368274e-06, "loss": 0.8447, "step": 9079 }, { "epoch": 1.544912874809782, "grad_norm": 1.7265625, "learning_rate": 9.490948365302695e-06, "loss": 0.9213, "step": 9080 }, { "epoch": 1.5450843388987718, "grad_norm": 1.6015625, "learning_rate": 9.489144786839209e-06, "loss": 0.7861, "step": 9081 }, { "epoch": 1.5452558029877617, "grad_norm": 1.7109375, "learning_rate": 9.487341225036634e-06, "loss": 0.8669, "step": 9082 }, { "epoch": 1.5454272670767515, "grad_norm": 1.6875, "learning_rate": 9.485537679953791e-06, "loss": 0.8379, "step": 9083 }, { "epoch": 1.5455987311657413, "grad_norm": 1.71875, "learning_rate": 9.483734151649503e-06, "loss": 0.9128, "step": 9084 }, { "epoch": 1.5457701952547314, "grad_norm": 1.7265625, "learning_rate": 9.481930640182586e-06, "loss": 0.8098, "step": 9085 }, { "epoch": 1.5459416593437212, "grad_norm": 1.6015625, "learning_rate": 9.480127145611863e-06, "loss": 0.8269, "step": 9086 }, { "epoch": 1.546113123432711, "grad_norm": 1.75, "learning_rate": 9.47832366799615e-06, "loss": 0.9293, "step": 9087 }, { "epoch": 1.546284587521701, "grad_norm": 1.734375, "learning_rate": 9.476520207394268e-06, "loss": 0.8146, "step": 9088 }, { "epoch": 1.5464560516106909, "grad_norm": 1.6953125, "learning_rate": 9.474716763865033e-06, "loss": 0.8419, "step": 9089 }, { "epoch": 1.5466275156996807, "grad_norm": 1.71875, "learning_rate": 9.472913337467263e-06, "loss": 0.7645, "step": 9090 }, { "epoch": 1.5467989797886705, "grad_norm": 1.7578125, "learning_rate": 9.471109928259774e-06, "loss": 0.9052, "step": 9091 }, { "epoch": 1.5469704438776604, "grad_norm": 1.7578125, "learning_rate": 9.469306536301381e-06, "loss": 0.9394, "step": 9092 }, { "epoch": 1.5471419079666502, "grad_norm": 1.6953125, "learning_rate": 9.467503161650902e-06, "loss": 0.8726, "step": 9093 }, { "epoch": 1.54731337205564, "grad_norm": 1.703125, "learning_rate": 9.46569980436715e-06, "loss": 0.833, "step": 9094 }, { "epoch": 1.5474848361446298, "grad_norm": 1.59375, "learning_rate": 9.463896464508938e-06, "loss": 0.7878, "step": 9095 }, { "epoch": 1.5476563002336197, "grad_norm": 1.625, "learning_rate": 9.462093142135081e-06, "loss": 0.7952, "step": 9096 }, { "epoch": 1.5478277643226097, "grad_norm": 1.65625, "learning_rate": 9.460289837304392e-06, "loss": 0.8398, "step": 9097 }, { "epoch": 1.5479992284115995, "grad_norm": 1.5859375, "learning_rate": 9.458486550075684e-06, "loss": 0.725, "step": 9098 }, { "epoch": 1.5481706925005894, "grad_norm": 1.59375, "learning_rate": 9.456683280507768e-06, "loss": 0.8511, "step": 9099 }, { "epoch": 1.5483421565895794, "grad_norm": 1.5859375, "learning_rate": 9.454880028659458e-06, "loss": 0.736, "step": 9100 }, { "epoch": 1.5483421565895794, "eval_loss": 0.8392665386199951, "eval_runtime": 835.9126, "eval_samples_per_second": 2.99, "eval_steps_per_second": 2.99, "step": 9100 }, { "epoch": 1.5485136206785692, "grad_norm": 1.71875, "learning_rate": 9.453076794589562e-06, "loss": 0.8413, "step": 9101 }, { "epoch": 1.548685084767559, "grad_norm": 1.734375, "learning_rate": 9.451273578356892e-06, "loss": 0.8524, "step": 9102 }, { "epoch": 1.5488565488565489, "grad_norm": 1.6171875, "learning_rate": 9.449470380020256e-06, "loss": 0.7827, "step": 9103 }, { "epoch": 1.5490280129455387, "grad_norm": 1.875, "learning_rate": 9.447667199638467e-06, "loss": 0.9264, "step": 9104 }, { "epoch": 1.5491994770345285, "grad_norm": 1.71875, "learning_rate": 9.445864037270328e-06, "loss": 0.971, "step": 9105 }, { "epoch": 1.5493709411235184, "grad_norm": 1.734375, "learning_rate": 9.444060892974648e-06, "loss": 0.864, "step": 9106 }, { "epoch": 1.5495424052125082, "grad_norm": 1.8203125, "learning_rate": 9.442257766810237e-06, "loss": 0.8644, "step": 9107 }, { "epoch": 1.549713869301498, "grad_norm": 1.59375, "learning_rate": 9.4404546588359e-06, "loss": 0.8537, "step": 9108 }, { "epoch": 1.549885333390488, "grad_norm": 1.7578125, "learning_rate": 9.438651569110444e-06, "loss": 0.8993, "step": 9109 }, { "epoch": 1.5500567974794779, "grad_norm": 1.6796875, "learning_rate": 9.436848497692674e-06, "loss": 0.8384, "step": 9110 }, { "epoch": 1.5502282615684677, "grad_norm": 1.640625, "learning_rate": 9.435045444641396e-06, "loss": 0.8226, "step": 9111 }, { "epoch": 1.5503997256574578, "grad_norm": 1.75, "learning_rate": 9.43324241001541e-06, "loss": 0.8532, "step": 9112 }, { "epoch": 1.5505711897464476, "grad_norm": 1.6875, "learning_rate": 9.431439393873527e-06, "loss": 0.8572, "step": 9113 }, { "epoch": 1.5507426538354374, "grad_norm": 1.65625, "learning_rate": 9.429636396274544e-06, "loss": 0.8009, "step": 9114 }, { "epoch": 1.5509141179244272, "grad_norm": 1.7578125, "learning_rate": 9.427833417277268e-06, "loss": 0.8088, "step": 9115 }, { "epoch": 1.551085582013417, "grad_norm": 1.65625, "learning_rate": 9.426030456940502e-06, "loss": 0.8109, "step": 9116 }, { "epoch": 1.551257046102407, "grad_norm": 1.7578125, "learning_rate": 9.42422751532304e-06, "loss": 0.8577, "step": 9117 }, { "epoch": 1.5514285101913967, "grad_norm": 1.7421875, "learning_rate": 9.422424592483687e-06, "loss": 0.9251, "step": 9118 }, { "epoch": 1.5515999742803865, "grad_norm": 1.6953125, "learning_rate": 9.420621688481244e-06, "loss": 0.8698, "step": 9119 }, { "epoch": 1.5517714383693764, "grad_norm": 1.65625, "learning_rate": 9.418818803374506e-06, "loss": 0.8414, "step": 9120 }, { "epoch": 1.5519429024583664, "grad_norm": 1.71875, "learning_rate": 9.417015937222279e-06, "loss": 0.8404, "step": 9121 }, { "epoch": 1.5521143665473562, "grad_norm": 1.6484375, "learning_rate": 9.415213090083355e-06, "loss": 0.8677, "step": 9122 }, { "epoch": 1.552285830636346, "grad_norm": 1.7421875, "learning_rate": 9.413410262016536e-06, "loss": 0.8818, "step": 9123 }, { "epoch": 1.5524572947253361, "grad_norm": 1.6640625, "learning_rate": 9.411607453080616e-06, "loss": 0.8228, "step": 9124 }, { "epoch": 1.552628758814326, "grad_norm": 1.6328125, "learning_rate": 9.409804663334395e-06, "loss": 0.8459, "step": 9125 }, { "epoch": 1.5528002229033158, "grad_norm": 1.703125, "learning_rate": 9.408001892836665e-06, "loss": 0.8643, "step": 9126 }, { "epoch": 1.5529716869923056, "grad_norm": 1.671875, "learning_rate": 9.406199141646227e-06, "loss": 0.8558, "step": 9127 }, { "epoch": 1.5531431510812954, "grad_norm": 1.6875, "learning_rate": 9.404396409821863e-06, "loss": 0.8688, "step": 9128 }, { "epoch": 1.5533146151702852, "grad_norm": 1.6875, "learning_rate": 9.40259369742238e-06, "loss": 0.8741, "step": 9129 }, { "epoch": 1.553486079259275, "grad_norm": 1.65625, "learning_rate": 9.400791004506564e-06, "loss": 0.826, "step": 9130 }, { "epoch": 1.553657543348265, "grad_norm": 1.7109375, "learning_rate": 9.39898833113321e-06, "loss": 0.8623, "step": 9131 }, { "epoch": 1.5538290074372547, "grad_norm": 1.7734375, "learning_rate": 9.397185677361112e-06, "loss": 0.925, "step": 9132 }, { "epoch": 1.5540004715262448, "grad_norm": 30.375, "learning_rate": 9.395383043249057e-06, "loss": 1.2323, "step": 9133 }, { "epoch": 1.5541719356152346, "grad_norm": 1.65625, "learning_rate": 9.393580428855837e-06, "loss": 0.8077, "step": 9134 }, { "epoch": 1.5543433997042244, "grad_norm": 1.7890625, "learning_rate": 9.391777834240245e-06, "loss": 0.9564, "step": 9135 }, { "epoch": 1.5545148637932145, "grad_norm": 1.734375, "learning_rate": 9.389975259461065e-06, "loss": 0.852, "step": 9136 }, { "epoch": 1.5546863278822043, "grad_norm": 1.7109375, "learning_rate": 9.388172704577092e-06, "loss": 0.9216, "step": 9137 }, { "epoch": 1.5548577919711941, "grad_norm": 1.75, "learning_rate": 9.38637016964711e-06, "loss": 0.895, "step": 9138 }, { "epoch": 1.555029256060184, "grad_norm": 1.6640625, "learning_rate": 9.38456765472991e-06, "loss": 0.8907, "step": 9139 }, { "epoch": 1.5552007201491738, "grad_norm": 1.6953125, "learning_rate": 9.382765159884272e-06, "loss": 0.8439, "step": 9140 }, { "epoch": 1.5553721842381636, "grad_norm": 1.671875, "learning_rate": 9.38096268516899e-06, "loss": 0.8292, "step": 9141 }, { "epoch": 1.5555436483271534, "grad_norm": 1.71875, "learning_rate": 9.379160230642844e-06, "loss": 0.8789, "step": 9142 }, { "epoch": 1.5557151124161432, "grad_norm": 1.7265625, "learning_rate": 9.37735779636462e-06, "loss": 0.9112, "step": 9143 }, { "epoch": 1.555886576505133, "grad_norm": 1.78125, "learning_rate": 9.375555382393103e-06, "loss": 0.991, "step": 9144 }, { "epoch": 1.5560580405941231, "grad_norm": 1.7421875, "learning_rate": 9.373752988787077e-06, "loss": 0.856, "step": 9145 }, { "epoch": 1.556229504683113, "grad_norm": 1.6015625, "learning_rate": 9.371950615605326e-06, "loss": 0.8673, "step": 9146 }, { "epoch": 1.5564009687721028, "grad_norm": 1.703125, "learning_rate": 9.370148262906628e-06, "loss": 0.8611, "step": 9147 }, { "epoch": 1.5565724328610928, "grad_norm": 1.65625, "learning_rate": 9.368345930749767e-06, "loss": 0.8286, "step": 9148 }, { "epoch": 1.5567438969500826, "grad_norm": 1.6484375, "learning_rate": 9.366543619193526e-06, "loss": 0.8725, "step": 9149 }, { "epoch": 1.5569153610390725, "grad_norm": 1.6875, "learning_rate": 9.364741328296683e-06, "loss": 0.8377, "step": 9150 }, { "epoch": 1.5570868251280623, "grad_norm": 1.7734375, "learning_rate": 9.362939058118015e-06, "loss": 0.9226, "step": 9151 }, { "epoch": 1.5572582892170521, "grad_norm": 1.671875, "learning_rate": 9.361136808716305e-06, "loss": 0.9356, "step": 9152 }, { "epoch": 1.557429753306042, "grad_norm": 1.6171875, "learning_rate": 9.359334580150328e-06, "loss": 0.9033, "step": 9153 }, { "epoch": 1.5576012173950318, "grad_norm": 1.6640625, "learning_rate": 9.357532372478863e-06, "loss": 0.8383, "step": 9154 }, { "epoch": 1.5577726814840216, "grad_norm": 1.6875, "learning_rate": 9.355730185760688e-06, "loss": 0.9208, "step": 9155 }, { "epoch": 1.5579441455730114, "grad_norm": 1.671875, "learning_rate": 9.353928020054576e-06, "loss": 0.8347, "step": 9156 }, { "epoch": 1.5581156096620015, "grad_norm": 1.71875, "learning_rate": 9.352125875419306e-06, "loss": 0.8323, "step": 9157 }, { "epoch": 1.5582870737509913, "grad_norm": 1.75, "learning_rate": 9.35032375191365e-06, "loss": 0.864, "step": 9158 }, { "epoch": 1.5584585378399811, "grad_norm": 1.7109375, "learning_rate": 9.34852164959638e-06, "loss": 0.9107, "step": 9159 }, { "epoch": 1.5586300019289712, "grad_norm": 1.8125, "learning_rate": 9.346719568526275e-06, "loss": 0.8692, "step": 9160 }, { "epoch": 1.558801466017961, "grad_norm": 1.703125, "learning_rate": 9.344917508762104e-06, "loss": 0.9507, "step": 9161 }, { "epoch": 1.5589729301069508, "grad_norm": 1.75, "learning_rate": 9.343115470362645e-06, "loss": 0.8296, "step": 9162 }, { "epoch": 1.5591443941959406, "grad_norm": 1.765625, "learning_rate": 9.34131345338666e-06, "loss": 0.8738, "step": 9163 }, { "epoch": 1.5593158582849305, "grad_norm": 1.6171875, "learning_rate": 9.339511457892924e-06, "loss": 0.8605, "step": 9164 }, { "epoch": 1.5594873223739203, "grad_norm": 1.640625, "learning_rate": 9.337709483940208e-06, "loss": 0.8543, "step": 9165 }, { "epoch": 1.5596587864629101, "grad_norm": 1.75, "learning_rate": 9.335907531587275e-06, "loss": 0.791, "step": 9166 }, { "epoch": 1.5598302505519, "grad_norm": 1.7265625, "learning_rate": 9.334105600892899e-06, "loss": 0.8977, "step": 9167 }, { "epoch": 1.5600017146408898, "grad_norm": 1.578125, "learning_rate": 9.33230369191585e-06, "loss": 0.8126, "step": 9168 }, { "epoch": 1.5601731787298796, "grad_norm": 1.625, "learning_rate": 9.33050180471489e-06, "loss": 0.8863, "step": 9169 }, { "epoch": 1.5603446428188696, "grad_norm": 1.6875, "learning_rate": 9.328699939348788e-06, "loss": 0.8737, "step": 9170 }, { "epoch": 1.5605161069078595, "grad_norm": 1.65625, "learning_rate": 9.326898095876308e-06, "loss": 0.8067, "step": 9171 }, { "epoch": 1.5606875709968493, "grad_norm": 1.6015625, "learning_rate": 9.325096274356219e-06, "loss": 0.8524, "step": 9172 }, { "epoch": 1.5608590350858393, "grad_norm": 1.7109375, "learning_rate": 9.323294474847281e-06, "loss": 0.8329, "step": 9173 }, { "epoch": 1.5610304991748292, "grad_norm": 1.625, "learning_rate": 9.321492697408259e-06, "loss": 0.7939, "step": 9174 }, { "epoch": 1.561201963263819, "grad_norm": 1.7890625, "learning_rate": 9.31969094209791e-06, "loss": 0.8204, "step": 9175 }, { "epoch": 1.5613734273528088, "grad_norm": 1.65625, "learning_rate": 9.317889208975004e-06, "loss": 0.8583, "step": 9176 }, { "epoch": 1.5615448914417986, "grad_norm": 1.59375, "learning_rate": 9.3160874980983e-06, "loss": 0.8618, "step": 9177 }, { "epoch": 1.5617163555307885, "grad_norm": 1.6171875, "learning_rate": 9.314285809526558e-06, "loss": 0.812, "step": 9178 }, { "epoch": 1.5618878196197783, "grad_norm": 1.5859375, "learning_rate": 9.312484143318538e-06, "loss": 0.8151, "step": 9179 }, { "epoch": 1.5620592837087681, "grad_norm": 2.03125, "learning_rate": 9.310682499532998e-06, "loss": 0.9445, "step": 9180 }, { "epoch": 1.562230747797758, "grad_norm": 1.8046875, "learning_rate": 9.308880878228698e-06, "loss": 0.9197, "step": 9181 }, { "epoch": 1.562402211886748, "grad_norm": 1.6796875, "learning_rate": 9.307079279464396e-06, "loss": 0.8719, "step": 9182 }, { "epoch": 1.5625736759757378, "grad_norm": 1.7734375, "learning_rate": 9.305277703298847e-06, "loss": 0.8668, "step": 9183 }, { "epoch": 1.5627451400647276, "grad_norm": 1.671875, "learning_rate": 9.303476149790808e-06, "loss": 0.797, "step": 9184 }, { "epoch": 1.5629166041537177, "grad_norm": 1.7109375, "learning_rate": 9.301674618999038e-06, "loss": 0.8912, "step": 9185 }, { "epoch": 1.5630880682427075, "grad_norm": 1.65625, "learning_rate": 9.299873110982285e-06, "loss": 0.8996, "step": 9186 }, { "epoch": 1.5632595323316973, "grad_norm": 1.7265625, "learning_rate": 9.298071625799306e-06, "loss": 0.8605, "step": 9187 }, { "epoch": 1.5634309964206872, "grad_norm": 1.6015625, "learning_rate": 9.296270163508856e-06, "loss": 0.8674, "step": 9188 }, { "epoch": 1.563602460509677, "grad_norm": 1.7265625, "learning_rate": 9.294468724169685e-06, "loss": 0.886, "step": 9189 }, { "epoch": 1.5637739245986668, "grad_norm": 1.703125, "learning_rate": 9.292667307840546e-06, "loss": 0.8608, "step": 9190 }, { "epoch": 1.5639453886876566, "grad_norm": 1.703125, "learning_rate": 9.29086591458019e-06, "loss": 0.8972, "step": 9191 }, { "epoch": 1.5641168527766465, "grad_norm": 1.65625, "learning_rate": 9.289064544447366e-06, "loss": 0.8055, "step": 9192 }, { "epoch": 1.5642883168656363, "grad_norm": 1.71875, "learning_rate": 9.287263197500825e-06, "loss": 0.9147, "step": 9193 }, { "epoch": 1.5644597809546263, "grad_norm": 1.6875, "learning_rate": 9.285461873799315e-06, "loss": 0.8236, "step": 9194 }, { "epoch": 1.5646312450436162, "grad_norm": 1.6328125, "learning_rate": 9.283660573401582e-06, "loss": 0.827, "step": 9195 }, { "epoch": 1.564802709132606, "grad_norm": 1.6484375, "learning_rate": 9.28185929636638e-06, "loss": 0.8257, "step": 9196 }, { "epoch": 1.564974173221596, "grad_norm": 1.7421875, "learning_rate": 9.280058042752446e-06, "loss": 0.8898, "step": 9197 }, { "epoch": 1.5651456373105859, "grad_norm": 1.6640625, "learning_rate": 9.278256812618534e-06, "loss": 0.8702, "step": 9198 }, { "epoch": 1.5653171013995757, "grad_norm": 1.6328125, "learning_rate": 9.276455606023382e-06, "loss": 0.8156, "step": 9199 }, { "epoch": 1.5654885654885655, "grad_norm": 1.71875, "learning_rate": 9.274654423025739e-06, "loss": 0.8452, "step": 9200 }, { "epoch": 1.5656600295775553, "grad_norm": 1.7578125, "learning_rate": 9.272853263684345e-06, "loss": 0.8568, "step": 9201 }, { "epoch": 1.5658314936665452, "grad_norm": 1.6953125, "learning_rate": 9.271052128057946e-06, "loss": 0.8569, "step": 9202 }, { "epoch": 1.566002957755535, "grad_norm": 1.9375, "learning_rate": 9.26925101620528e-06, "loss": 0.8666, "step": 9203 }, { "epoch": 1.5661744218445248, "grad_norm": 1.765625, "learning_rate": 9.267449928185091e-06, "loss": 0.8248, "step": 9204 }, { "epoch": 1.5663458859335146, "grad_norm": 1.765625, "learning_rate": 9.265648864056119e-06, "loss": 0.8597, "step": 9205 }, { "epoch": 1.5665173500225047, "grad_norm": 1.8671875, "learning_rate": 9.2638478238771e-06, "loss": 0.8764, "step": 9206 }, { "epoch": 1.5666888141114945, "grad_norm": 1.65625, "learning_rate": 9.26204680770678e-06, "loss": 0.8861, "step": 9207 }, { "epoch": 1.5668602782004843, "grad_norm": 1.65625, "learning_rate": 9.260245815603894e-06, "loss": 0.8443, "step": 9208 }, { "epoch": 1.5670317422894744, "grad_norm": 1.7109375, "learning_rate": 9.258444847627174e-06, "loss": 0.859, "step": 9209 }, { "epoch": 1.5672032063784642, "grad_norm": 1.640625, "learning_rate": 9.256643903835361e-06, "loss": 0.9118, "step": 9210 }, { "epoch": 1.567374670467454, "grad_norm": 1.6484375, "learning_rate": 9.254842984287187e-06, "loss": 0.7796, "step": 9211 }, { "epoch": 1.5675461345564439, "grad_norm": 1.5703125, "learning_rate": 9.253042089041392e-06, "loss": 0.7964, "step": 9212 }, { "epoch": 1.5677175986454337, "grad_norm": 1.6015625, "learning_rate": 9.251241218156706e-06, "loss": 0.8026, "step": 9213 }, { "epoch": 1.5678890627344235, "grad_norm": 1.65625, "learning_rate": 9.249440371691863e-06, "loss": 0.7103, "step": 9214 }, { "epoch": 1.5680605268234133, "grad_norm": 1.7265625, "learning_rate": 9.247639549705597e-06, "loss": 0.8532, "step": 9215 }, { "epoch": 1.5682319909124032, "grad_norm": 1.7421875, "learning_rate": 9.245838752256636e-06, "loss": 0.8715, "step": 9216 }, { "epoch": 1.568403455001393, "grad_norm": 1.734375, "learning_rate": 9.244037979403715e-06, "loss": 0.9013, "step": 9217 }, { "epoch": 1.568574919090383, "grad_norm": 1.8046875, "learning_rate": 9.242237231205563e-06, "loss": 0.8126, "step": 9218 }, { "epoch": 1.5687463831793729, "grad_norm": 1.640625, "learning_rate": 9.240436507720907e-06, "loss": 0.816, "step": 9219 }, { "epoch": 1.5689178472683627, "grad_norm": 1.671875, "learning_rate": 9.238635809008482e-06, "loss": 0.8114, "step": 9220 }, { "epoch": 1.5690893113573527, "grad_norm": 1.796875, "learning_rate": 9.236835135127005e-06, "loss": 0.8607, "step": 9221 }, { "epoch": 1.5692607754463426, "grad_norm": 1.703125, "learning_rate": 9.235034486135207e-06, "loss": 0.8988, "step": 9222 }, { "epoch": 1.5694322395353324, "grad_norm": 1.71875, "learning_rate": 9.233233862091818e-06, "loss": 0.8391, "step": 9223 }, { "epoch": 1.5696037036243222, "grad_norm": 1.65625, "learning_rate": 9.231433263055558e-06, "loss": 0.8328, "step": 9224 }, { "epoch": 1.569775167713312, "grad_norm": 1.6484375, "learning_rate": 9.229632689085155e-06, "loss": 0.8381, "step": 9225 }, { "epoch": 1.5699466318023019, "grad_norm": 1.65625, "learning_rate": 9.22783214023933e-06, "loss": 0.8908, "step": 9226 }, { "epoch": 1.5701180958912917, "grad_norm": 1.7109375, "learning_rate": 9.226031616576806e-06, "loss": 0.8383, "step": 9227 }, { "epoch": 1.5702895599802815, "grad_norm": 1.734375, "learning_rate": 9.224231118156307e-06, "loss": 0.9127, "step": 9228 }, { "epoch": 1.5704610240692713, "grad_norm": 1.7578125, "learning_rate": 9.222430645036552e-06, "loss": 0.8773, "step": 9229 }, { "epoch": 1.5706324881582614, "grad_norm": 1.640625, "learning_rate": 9.220630197276262e-06, "loss": 0.8129, "step": 9230 }, { "epoch": 1.5708039522472512, "grad_norm": 1.6875, "learning_rate": 9.218829774934157e-06, "loss": 0.8681, "step": 9231 }, { "epoch": 1.570975416336241, "grad_norm": 1.609375, "learning_rate": 9.217029378068954e-06, "loss": 0.8128, "step": 9232 }, { "epoch": 1.571146880425231, "grad_norm": 1.609375, "learning_rate": 9.21522900673937e-06, "loss": 0.7568, "step": 9233 }, { "epoch": 1.571318344514221, "grad_norm": 1.640625, "learning_rate": 9.213428661004124e-06, "loss": 0.7764, "step": 9234 }, { "epoch": 1.5714898086032107, "grad_norm": 1.6796875, "learning_rate": 9.211628340921932e-06, "loss": 0.8369, "step": 9235 }, { "epoch": 1.5716612726922006, "grad_norm": 1.734375, "learning_rate": 9.209828046551507e-06, "loss": 0.8511, "step": 9236 }, { "epoch": 1.5718327367811904, "grad_norm": 1.6953125, "learning_rate": 9.208027777951565e-06, "loss": 0.8305, "step": 9237 }, { "epoch": 1.5720042008701802, "grad_norm": 1.78125, "learning_rate": 9.206227535180821e-06, "loss": 0.8703, "step": 9238 }, { "epoch": 1.57217566495917, "grad_norm": 1.703125, "learning_rate": 9.204427318297987e-06, "loss": 0.8569, "step": 9239 }, { "epoch": 1.5723471290481599, "grad_norm": 1.7734375, "learning_rate": 9.202627127361772e-06, "loss": 0.8437, "step": 9240 }, { "epoch": 1.5725185931371497, "grad_norm": 1.6796875, "learning_rate": 9.20082696243089e-06, "loss": 0.8106, "step": 9241 }, { "epoch": 1.5726900572261397, "grad_norm": 1.6171875, "learning_rate": 9.199026823564048e-06, "loss": 0.7673, "step": 9242 }, { "epoch": 1.5728615213151296, "grad_norm": 1.6796875, "learning_rate": 9.197226710819961e-06, "loss": 0.8538, "step": 9243 }, { "epoch": 1.5730329854041194, "grad_norm": 1.734375, "learning_rate": 9.195426624257332e-06, "loss": 0.9001, "step": 9244 }, { "epoch": 1.5732044494931094, "grad_norm": 1.734375, "learning_rate": 9.19362656393487e-06, "loss": 0.8341, "step": 9245 }, { "epoch": 1.5733759135820993, "grad_norm": 1.640625, "learning_rate": 9.191826529911283e-06, "loss": 0.903, "step": 9246 }, { "epoch": 1.573547377671089, "grad_norm": 1.6640625, "learning_rate": 9.190026522245277e-06, "loss": 0.8623, "step": 9247 }, { "epoch": 1.573718841760079, "grad_norm": 1.6875, "learning_rate": 9.188226540995555e-06, "loss": 0.8689, "step": 9248 }, { "epoch": 1.5738903058490687, "grad_norm": 1.6640625, "learning_rate": 9.186426586220822e-06, "loss": 0.8787, "step": 9249 }, { "epoch": 1.5740617699380586, "grad_norm": 1.6953125, "learning_rate": 9.184626657979782e-06, "loss": 0.8006, "step": 9250 }, { "epoch": 1.5742332340270484, "grad_norm": 1.640625, "learning_rate": 9.182826756331137e-06, "loss": 0.8874, "step": 9251 }, { "epoch": 1.5744046981160382, "grad_norm": 1.703125, "learning_rate": 9.181026881333589e-06, "loss": 0.8036, "step": 9252 }, { "epoch": 1.574576162205028, "grad_norm": 1.6484375, "learning_rate": 9.179227033045836e-06, "loss": 0.8686, "step": 9253 }, { "epoch": 1.574747626294018, "grad_norm": 1.75, "learning_rate": 9.177427211526585e-06, "loss": 0.9301, "step": 9254 }, { "epoch": 1.574919090383008, "grad_norm": 1.7265625, "learning_rate": 9.17562741683453e-06, "loss": 0.8932, "step": 9255 }, { "epoch": 1.5750905544719978, "grad_norm": 1.640625, "learning_rate": 9.173827649028366e-06, "loss": 0.8502, "step": 9256 }, { "epoch": 1.5752620185609878, "grad_norm": 1.6796875, "learning_rate": 9.172027908166795e-06, "loss": 0.8476, "step": 9257 }, { "epoch": 1.5754334826499776, "grad_norm": 1.59375, "learning_rate": 9.17022819430851e-06, "loss": 0.8561, "step": 9258 }, { "epoch": 1.5756049467389674, "grad_norm": 1.6484375, "learning_rate": 9.16842850751221e-06, "loss": 0.7854, "step": 9259 }, { "epoch": 1.5757764108279573, "grad_norm": 1.7265625, "learning_rate": 9.166628847836586e-06, "loss": 0.7733, "step": 9260 }, { "epoch": 1.575947874916947, "grad_norm": 1.8046875, "learning_rate": 9.164829215340332e-06, "loss": 0.8804, "step": 9261 }, { "epoch": 1.576119339005937, "grad_norm": 1.6640625, "learning_rate": 9.163029610082146e-06, "loss": 0.7935, "step": 9262 }, { "epoch": 1.5762908030949268, "grad_norm": 1.7265625, "learning_rate": 9.161230032120714e-06, "loss": 0.8089, "step": 9263 }, { "epoch": 1.5764622671839166, "grad_norm": 1.59375, "learning_rate": 9.15943048151473e-06, "loss": 0.8143, "step": 9264 }, { "epoch": 1.5766337312729064, "grad_norm": 1.703125, "learning_rate": 9.157630958322884e-06, "loss": 0.8527, "step": 9265 }, { "epoch": 1.5768051953618962, "grad_norm": 1.71875, "learning_rate": 9.155831462603867e-06, "loss": 0.8017, "step": 9266 }, { "epoch": 1.5769766594508863, "grad_norm": 1.6015625, "learning_rate": 9.154031994416362e-06, "loss": 0.8363, "step": 9267 }, { "epoch": 1.577148123539876, "grad_norm": 1.7109375, "learning_rate": 9.15223255381906e-06, "loss": 0.8843, "step": 9268 }, { "epoch": 1.577319587628866, "grad_norm": 1.796875, "learning_rate": 9.150433140870642e-06, "loss": 0.8803, "step": 9269 }, { "epoch": 1.577491051717856, "grad_norm": 1.7421875, "learning_rate": 9.148633755629803e-06, "loss": 0.761, "step": 9270 }, { "epoch": 1.5776625158068458, "grad_norm": 1.6796875, "learning_rate": 9.146834398155223e-06, "loss": 0.7771, "step": 9271 }, { "epoch": 1.5778339798958356, "grad_norm": 1.5859375, "learning_rate": 9.145035068505586e-06, "loss": 0.7734, "step": 9272 }, { "epoch": 1.5780054439848255, "grad_norm": 1.671875, "learning_rate": 9.143235766739574e-06, "loss": 0.8499, "step": 9273 }, { "epoch": 1.5781769080738153, "grad_norm": 1.640625, "learning_rate": 9.141436492915869e-06, "loss": 0.8435, "step": 9274 }, { "epoch": 1.578348372162805, "grad_norm": 1.65625, "learning_rate": 9.139637247093156e-06, "loss": 0.876, "step": 9275 }, { "epoch": 1.578519836251795, "grad_norm": 1.7265625, "learning_rate": 9.13783802933011e-06, "loss": 0.8139, "step": 9276 }, { "epoch": 1.5786913003407848, "grad_norm": 1.734375, "learning_rate": 9.136038839685415e-06, "loss": 0.935, "step": 9277 }, { "epoch": 1.5788627644297746, "grad_norm": 1.6875, "learning_rate": 9.134239678217744e-06, "loss": 0.9228, "step": 9278 }, { "epoch": 1.5790342285187646, "grad_norm": 1.734375, "learning_rate": 9.13244054498578e-06, "loss": 0.8317, "step": 9279 }, { "epoch": 1.5792056926077545, "grad_norm": 1.7421875, "learning_rate": 9.130641440048194e-06, "loss": 0.8331, "step": 9280 }, { "epoch": 1.5793771566967443, "grad_norm": 1.7109375, "learning_rate": 9.128842363463663e-06, "loss": 0.792, "step": 9281 }, { "epoch": 1.5795486207857343, "grad_norm": 1.6953125, "learning_rate": 9.127043315290867e-06, "loss": 0.8437, "step": 9282 }, { "epoch": 1.5797200848747242, "grad_norm": 1.59375, "learning_rate": 9.125244295588473e-06, "loss": 0.8124, "step": 9283 }, { "epoch": 1.579891548963714, "grad_norm": 1.78125, "learning_rate": 9.123445304415157e-06, "loss": 0.8643, "step": 9284 }, { "epoch": 1.5800630130527038, "grad_norm": 1.78125, "learning_rate": 9.12164634182959e-06, "loss": 0.9127, "step": 9285 }, { "epoch": 1.5802344771416936, "grad_norm": 1.6640625, "learning_rate": 9.119847407890446e-06, "loss": 0.7783, "step": 9286 }, { "epoch": 1.5804059412306835, "grad_norm": 1.8046875, "learning_rate": 9.118048502656391e-06, "loss": 0.8905, "step": 9287 }, { "epoch": 1.5805774053196733, "grad_norm": 1.703125, "learning_rate": 9.116249626186094e-06, "loss": 0.8882, "step": 9288 }, { "epoch": 1.580748869408663, "grad_norm": 1.671875, "learning_rate": 9.114450778538228e-06, "loss": 0.881, "step": 9289 }, { "epoch": 1.580920333497653, "grad_norm": 1.6796875, "learning_rate": 9.112651959771455e-06, "loss": 0.863, "step": 9290 }, { "epoch": 1.581091797586643, "grad_norm": 1.65625, "learning_rate": 9.110853169944441e-06, "loss": 0.8287, "step": 9291 }, { "epoch": 1.5812632616756328, "grad_norm": 1.7109375, "learning_rate": 9.109054409115854e-06, "loss": 0.7872, "step": 9292 }, { "epoch": 1.5814347257646226, "grad_norm": 1.7421875, "learning_rate": 9.10725567734436e-06, "loss": 0.9243, "step": 9293 }, { "epoch": 1.5816061898536127, "grad_norm": 1.71875, "learning_rate": 9.105456974688618e-06, "loss": 0.905, "step": 9294 }, { "epoch": 1.5817776539426025, "grad_norm": 1.75, "learning_rate": 9.103658301207294e-06, "loss": 0.899, "step": 9295 }, { "epoch": 1.5819491180315923, "grad_norm": 1.8046875, "learning_rate": 9.101859656959047e-06, "loss": 0.8408, "step": 9296 }, { "epoch": 1.5821205821205822, "grad_norm": 1.7578125, "learning_rate": 9.100061042002539e-06, "loss": 0.7829, "step": 9297 }, { "epoch": 1.582292046209572, "grad_norm": 1.71875, "learning_rate": 9.09826245639643e-06, "loss": 0.9463, "step": 9298 }, { "epoch": 1.5824635102985618, "grad_norm": 1.65625, "learning_rate": 9.096463900199376e-06, "loss": 0.8112, "step": 9299 }, { "epoch": 1.5826349743875516, "grad_norm": 1.7265625, "learning_rate": 9.094665373470039e-06, "loss": 0.8755, "step": 9300 }, { "epoch": 1.5828064384765415, "grad_norm": 1.78125, "learning_rate": 9.092866876267074e-06, "loss": 0.9219, "step": 9301 }, { "epoch": 1.5829779025655313, "grad_norm": 1.6796875, "learning_rate": 9.091068408649134e-06, "loss": 0.8709, "step": 9302 }, { "epoch": 1.5831493666545213, "grad_norm": 1.6484375, "learning_rate": 9.089269970674875e-06, "loss": 0.8616, "step": 9303 }, { "epoch": 1.5833208307435112, "grad_norm": 1.6328125, "learning_rate": 9.087471562402953e-06, "loss": 0.8286, "step": 9304 }, { "epoch": 1.583492294832501, "grad_norm": 1.6875, "learning_rate": 9.08567318389202e-06, "loss": 0.8534, "step": 9305 }, { "epoch": 1.583663758921491, "grad_norm": 1.765625, "learning_rate": 9.083874835200727e-06, "loss": 0.9082, "step": 9306 }, { "epoch": 1.5838352230104809, "grad_norm": 1.7265625, "learning_rate": 9.082076516387726e-06, "loss": 0.8264, "step": 9307 }, { "epoch": 1.5840066870994707, "grad_norm": 1.8515625, "learning_rate": 9.080278227511664e-06, "loss": 0.9196, "step": 9308 }, { "epoch": 1.5841781511884605, "grad_norm": 1.6328125, "learning_rate": 9.078479968631193e-06, "loss": 0.9158, "step": 9309 }, { "epoch": 1.5843496152774503, "grad_norm": 1.65625, "learning_rate": 9.076681739804961e-06, "loss": 0.8469, "step": 9310 }, { "epoch": 1.5845210793664402, "grad_norm": 1.703125, "learning_rate": 9.074883541091616e-06, "loss": 0.8779, "step": 9311 }, { "epoch": 1.58469254345543, "grad_norm": 1.546875, "learning_rate": 9.073085372549805e-06, "loss": 0.7402, "step": 9312 }, { "epoch": 1.5848640075444198, "grad_norm": 1.78125, "learning_rate": 9.071287234238166e-06, "loss": 0.8958, "step": 9313 }, { "epoch": 1.5850354716334096, "grad_norm": 1.625, "learning_rate": 9.06948912621535e-06, "loss": 0.8287, "step": 9314 }, { "epoch": 1.5852069357223997, "grad_norm": 11.0625, "learning_rate": 9.067691048539996e-06, "loss": 0.9041, "step": 9315 }, { "epoch": 1.5853783998113895, "grad_norm": 1.71875, "learning_rate": 9.065893001270744e-06, "loss": 0.8723, "step": 9316 }, { "epoch": 1.5855498639003793, "grad_norm": 1.609375, "learning_rate": 9.064094984466242e-06, "loss": 0.8329, "step": 9317 }, { "epoch": 1.5857213279893694, "grad_norm": 1.59375, "learning_rate": 9.062296998185127e-06, "loss": 0.8169, "step": 9318 }, { "epoch": 1.5858927920783592, "grad_norm": 1.734375, "learning_rate": 9.060499042486039e-06, "loss": 0.9146, "step": 9319 }, { "epoch": 1.586064256167349, "grad_norm": 1.6015625, "learning_rate": 9.058701117427613e-06, "loss": 0.8901, "step": 9320 }, { "epoch": 1.5862357202563389, "grad_norm": 1.78125, "learning_rate": 9.05690322306849e-06, "loss": 0.8549, "step": 9321 }, { "epoch": 1.5864071843453287, "grad_norm": 1.640625, "learning_rate": 9.055105359467303e-06, "loss": 0.8646, "step": 9322 }, { "epoch": 1.5865786484343185, "grad_norm": 1.6328125, "learning_rate": 9.053307526682694e-06, "loss": 0.8383, "step": 9323 }, { "epoch": 1.5867501125233083, "grad_norm": 1.65625, "learning_rate": 9.051509724773283e-06, "loss": 0.835, "step": 9324 }, { "epoch": 1.5869215766122982, "grad_norm": 1.65625, "learning_rate": 9.049711953797716e-06, "loss": 0.8508, "step": 9325 }, { "epoch": 1.587093040701288, "grad_norm": 1.640625, "learning_rate": 9.04791421381462e-06, "loss": 0.8332, "step": 9326 }, { "epoch": 1.587264504790278, "grad_norm": 1.671875, "learning_rate": 9.046116504882625e-06, "loss": 0.8394, "step": 9327 }, { "epoch": 1.5874359688792679, "grad_norm": 1.7265625, "learning_rate": 9.044318827060364e-06, "loss": 0.8588, "step": 9328 }, { "epoch": 1.5876074329682577, "grad_norm": 1.8125, "learning_rate": 9.042521180406465e-06, "loss": 0.9078, "step": 9329 }, { "epoch": 1.5877788970572477, "grad_norm": 1.640625, "learning_rate": 9.040723564979556e-06, "loss": 0.8752, "step": 9330 }, { "epoch": 1.5879503611462376, "grad_norm": 1.6015625, "learning_rate": 9.038925980838262e-06, "loss": 0.9109, "step": 9331 }, { "epoch": 1.5881218252352274, "grad_norm": 1.703125, "learning_rate": 9.037128428041214e-06, "loss": 0.8279, "step": 9332 }, { "epoch": 1.5882932893242172, "grad_norm": 1.6484375, "learning_rate": 9.035330906647031e-06, "loss": 0.8455, "step": 9333 }, { "epoch": 1.588464753413207, "grad_norm": 1.6875, "learning_rate": 9.033533416714341e-06, "loss": 0.798, "step": 9334 }, { "epoch": 1.5886362175021969, "grad_norm": 1.6875, "learning_rate": 9.031735958301768e-06, "loss": 0.9019, "step": 9335 }, { "epoch": 1.5888076815911867, "grad_norm": 1.6328125, "learning_rate": 9.02993853146793e-06, "loss": 0.8193, "step": 9336 }, { "epoch": 1.5889791456801765, "grad_norm": 1.6484375, "learning_rate": 9.028141136271448e-06, "loss": 0.8333, "step": 9337 }, { "epoch": 1.5891506097691663, "grad_norm": 1.75, "learning_rate": 9.026343772770944e-06, "loss": 0.8714, "step": 9338 }, { "epoch": 1.5893220738581564, "grad_norm": 1.7265625, "learning_rate": 9.024546441025035e-06, "loss": 0.8913, "step": 9339 }, { "epoch": 1.5894935379471462, "grad_norm": 1.6953125, "learning_rate": 9.022749141092341e-06, "loss": 0.8662, "step": 9340 }, { "epoch": 1.589665002036136, "grad_norm": 1.71875, "learning_rate": 9.020951873031477e-06, "loss": 0.9092, "step": 9341 }, { "epoch": 1.589836466125126, "grad_norm": 1.75, "learning_rate": 9.019154636901059e-06, "loss": 0.8917, "step": 9342 }, { "epoch": 1.590007930214116, "grad_norm": 1.640625, "learning_rate": 9.017357432759702e-06, "loss": 0.8658, "step": 9343 }, { "epoch": 1.5901793943031057, "grad_norm": 1.625, "learning_rate": 9.015560260666019e-06, "loss": 0.8088, "step": 9344 }, { "epoch": 1.5903508583920956, "grad_norm": 1.65625, "learning_rate": 9.013763120678624e-06, "loss": 0.9525, "step": 9345 }, { "epoch": 1.5905223224810854, "grad_norm": 1.625, "learning_rate": 9.011966012856129e-06, "loss": 0.853, "step": 9346 }, { "epoch": 1.5906937865700752, "grad_norm": 1.703125, "learning_rate": 9.01016893725714e-06, "loss": 0.8231, "step": 9347 }, { "epoch": 1.590865250659065, "grad_norm": 1.75, "learning_rate": 9.00837189394027e-06, "loss": 0.8723, "step": 9348 }, { "epoch": 1.5910367147480549, "grad_norm": 1.6796875, "learning_rate": 9.006574882964127e-06, "loss": 0.8768, "step": 9349 }, { "epoch": 1.5912081788370447, "grad_norm": 1.6875, "learning_rate": 9.004777904387318e-06, "loss": 0.8245, "step": 9350 }, { "epoch": 1.5913796429260347, "grad_norm": 1.703125, "learning_rate": 9.002980958268448e-06, "loss": 0.8931, "step": 9351 }, { "epoch": 1.5915511070150246, "grad_norm": 1.703125, "learning_rate": 9.001184044666125e-06, "loss": 0.8787, "step": 9352 }, { "epoch": 1.5917225711040144, "grad_norm": 1.625, "learning_rate": 8.99938716363895e-06, "loss": 0.8112, "step": 9353 }, { "epoch": 1.5918940351930044, "grad_norm": 1.734375, "learning_rate": 8.997590315245528e-06, "loss": 0.9623, "step": 9354 }, { "epoch": 1.5920654992819943, "grad_norm": 1.6484375, "learning_rate": 8.995793499544459e-06, "loss": 0.8516, "step": 9355 }, { "epoch": 1.592236963370984, "grad_norm": 1.7265625, "learning_rate": 8.993996716594347e-06, "loss": 0.893, "step": 9356 }, { "epoch": 1.592408427459974, "grad_norm": 1.671875, "learning_rate": 8.99219996645379e-06, "loss": 0.8418, "step": 9357 }, { "epoch": 1.5925798915489637, "grad_norm": 1.71875, "learning_rate": 8.990403249181394e-06, "loss": 0.8901, "step": 9358 }, { "epoch": 1.5927513556379536, "grad_norm": 1.59375, "learning_rate": 8.988606564835742e-06, "loss": 0.8342, "step": 9359 }, { "epoch": 1.5929228197269434, "grad_norm": 1.71875, "learning_rate": 8.986809913475441e-06, "loss": 0.8132, "step": 9360 }, { "epoch": 1.5930942838159332, "grad_norm": 1.7109375, "learning_rate": 8.985013295159083e-06, "loss": 0.8687, "step": 9361 }, { "epoch": 1.593265747904923, "grad_norm": 1.734375, "learning_rate": 8.983216709945264e-06, "loss": 0.8738, "step": 9362 }, { "epoch": 1.5934372119939129, "grad_norm": 1.765625, "learning_rate": 8.981420157892574e-06, "loss": 0.9278, "step": 9363 }, { "epoch": 1.593608676082903, "grad_norm": 1.6796875, "learning_rate": 8.97962363905961e-06, "loss": 0.8692, "step": 9364 }, { "epoch": 1.5937801401718927, "grad_norm": 1.75, "learning_rate": 8.977827153504963e-06, "loss": 0.8823, "step": 9365 }, { "epoch": 1.5939516042608826, "grad_norm": 1.6640625, "learning_rate": 8.97603070128722e-06, "loss": 0.8109, "step": 9366 }, { "epoch": 1.5941230683498726, "grad_norm": 1.8046875, "learning_rate": 8.974234282464974e-06, "loss": 0.8447, "step": 9367 }, { "epoch": 1.5942945324388624, "grad_norm": 1.609375, "learning_rate": 8.97243789709681e-06, "loss": 0.917, "step": 9368 }, { "epoch": 1.5944659965278523, "grad_norm": 1.625, "learning_rate": 8.970641545241314e-06, "loss": 0.8451, "step": 9369 }, { "epoch": 1.594637460616842, "grad_norm": 1.59375, "learning_rate": 8.968845226957079e-06, "loss": 0.7834, "step": 9370 }, { "epoch": 1.594808924705832, "grad_norm": 1.78125, "learning_rate": 8.967048942302677e-06, "loss": 0.8885, "step": 9371 }, { "epoch": 1.5949803887948217, "grad_norm": 1.6328125, "learning_rate": 8.965252691336701e-06, "loss": 0.8003, "step": 9372 }, { "epoch": 1.5951518528838116, "grad_norm": 1.65625, "learning_rate": 8.963456474117732e-06, "loss": 0.7934, "step": 9373 }, { "epoch": 1.5953233169728014, "grad_norm": 1.6484375, "learning_rate": 8.96166029070435e-06, "loss": 0.8281, "step": 9374 }, { "epoch": 1.5954947810617912, "grad_norm": 1.6796875, "learning_rate": 8.959864141155137e-06, "loss": 0.8975, "step": 9375 }, { "epoch": 1.5956662451507813, "grad_norm": 1.609375, "learning_rate": 8.95806802552867e-06, "loss": 0.8455, "step": 9376 }, { "epoch": 1.595837709239771, "grad_norm": 1.796875, "learning_rate": 8.95627194388353e-06, "loss": 0.8677, "step": 9377 }, { "epoch": 1.596009173328761, "grad_norm": 1.6796875, "learning_rate": 8.95447589627829e-06, "loss": 0.7717, "step": 9378 }, { "epoch": 1.596180637417751, "grad_norm": 1.765625, "learning_rate": 8.95267988277153e-06, "loss": 0.8799, "step": 9379 }, { "epoch": 1.5963521015067408, "grad_norm": 1.7109375, "learning_rate": 8.95088390342182e-06, "loss": 0.8059, "step": 9380 }, { "epoch": 1.5965235655957306, "grad_norm": 1.703125, "learning_rate": 8.949087958287741e-06, "loss": 0.855, "step": 9381 }, { "epoch": 1.5966950296847204, "grad_norm": 1.65625, "learning_rate": 8.947292047427858e-06, "loss": 0.8773, "step": 9382 }, { "epoch": 1.5968664937737103, "grad_norm": 1.7109375, "learning_rate": 8.945496170900745e-06, "loss": 0.8761, "step": 9383 }, { "epoch": 1.5970379578627, "grad_norm": 1.7265625, "learning_rate": 8.943700328764975e-06, "loss": 0.9317, "step": 9384 }, { "epoch": 1.59720942195169, "grad_norm": 1.7265625, "learning_rate": 8.941904521079113e-06, "loss": 0.8575, "step": 9385 }, { "epoch": 1.5973808860406797, "grad_norm": 1.6875, "learning_rate": 8.940108747901728e-06, "loss": 0.845, "step": 9386 }, { "epoch": 1.5975523501296696, "grad_norm": 1.6953125, "learning_rate": 8.93831300929139e-06, "loss": 0.8701, "step": 9387 }, { "epoch": 1.5977238142186596, "grad_norm": 1.6328125, "learning_rate": 8.936517305306663e-06, "loss": 0.8437, "step": 9388 }, { "epoch": 1.5978952783076494, "grad_norm": 1.671875, "learning_rate": 8.93472163600611e-06, "loss": 0.8959, "step": 9389 }, { "epoch": 1.5980667423966393, "grad_norm": 1.6328125, "learning_rate": 8.932926001448296e-06, "loss": 0.7727, "step": 9390 }, { "epoch": 1.5982382064856293, "grad_norm": 1.6796875, "learning_rate": 8.931130401691784e-06, "loss": 0.862, "step": 9391 }, { "epoch": 1.5984096705746191, "grad_norm": 1.671875, "learning_rate": 8.929334836795135e-06, "loss": 0.8813, "step": 9392 }, { "epoch": 1.598581134663609, "grad_norm": 1.6875, "learning_rate": 8.92753930681691e-06, "loss": 0.9302, "step": 9393 }, { "epoch": 1.5987525987525988, "grad_norm": 1.71875, "learning_rate": 8.925743811815665e-06, "loss": 0.8973, "step": 9394 }, { "epoch": 1.5989240628415886, "grad_norm": 1.6875, "learning_rate": 8.923948351849959e-06, "loss": 0.8164, "step": 9395 }, { "epoch": 1.5990955269305784, "grad_norm": 1.671875, "learning_rate": 8.922152926978349e-06, "loss": 0.8623, "step": 9396 }, { "epoch": 1.5992669910195683, "grad_norm": 1.7578125, "learning_rate": 8.920357537259393e-06, "loss": 0.7758, "step": 9397 }, { "epoch": 1.599438455108558, "grad_norm": 1.71875, "learning_rate": 8.918562182751642e-06, "loss": 0.887, "step": 9398 }, { "epoch": 1.599609919197548, "grad_norm": 1.640625, "learning_rate": 8.916766863513648e-06, "loss": 0.8277, "step": 9399 }, { "epoch": 1.599781383286538, "grad_norm": 1.765625, "learning_rate": 8.914971579603969e-06, "loss": 0.8924, "step": 9400 }, { "epoch": 1.5999528473755278, "grad_norm": 1.7109375, "learning_rate": 8.913176331081151e-06, "loss": 0.8944, "step": 9401 }, { "epoch": 1.6001243114645176, "grad_norm": 1.609375, "learning_rate": 8.911381118003743e-06, "loss": 0.7726, "step": 9402 }, { "epoch": 1.6002957755535077, "grad_norm": 1.6640625, "learning_rate": 8.909585940430299e-06, "loss": 0.8849, "step": 9403 }, { "epoch": 1.6004672396424975, "grad_norm": 1.65625, "learning_rate": 8.907790798419369e-06, "loss": 0.8906, "step": 9404 }, { "epoch": 1.6006387037314873, "grad_norm": 1.7265625, "learning_rate": 8.90599569202949e-06, "loss": 0.9278, "step": 9405 }, { "epoch": 1.6008101678204771, "grad_norm": 1.6484375, "learning_rate": 8.90420062131921e-06, "loss": 0.8265, "step": 9406 }, { "epoch": 1.600981631909467, "grad_norm": 1.7578125, "learning_rate": 8.902405586347072e-06, "loss": 0.9074, "step": 9407 }, { "epoch": 1.6011530959984568, "grad_norm": 1.734375, "learning_rate": 8.900610587171623e-06, "loss": 0.9064, "step": 9408 }, { "epoch": 1.6013245600874466, "grad_norm": 1.640625, "learning_rate": 8.898815623851402e-06, "loss": 0.8912, "step": 9409 }, { "epoch": 1.6014960241764364, "grad_norm": 2.0625, "learning_rate": 8.89702069644495e-06, "loss": 0.828, "step": 9410 }, { "epoch": 1.6016674882654263, "grad_norm": 1.625, "learning_rate": 8.895225805010807e-06, "loss": 0.7718, "step": 9411 }, { "epoch": 1.6018389523544163, "grad_norm": 1.84375, "learning_rate": 8.893430949607511e-06, "loss": 0.9473, "step": 9412 }, { "epoch": 1.6020104164434061, "grad_norm": 1.7265625, "learning_rate": 8.891636130293598e-06, "loss": 0.8774, "step": 9413 }, { "epoch": 1.602181880532396, "grad_norm": 1.875, "learning_rate": 8.889841347127607e-06, "loss": 0.9145, "step": 9414 }, { "epoch": 1.602353344621386, "grad_norm": 1.7890625, "learning_rate": 8.888046600168067e-06, "loss": 0.8802, "step": 9415 }, { "epoch": 1.6025248087103758, "grad_norm": 1.671875, "learning_rate": 8.886251889473519e-06, "loss": 0.8245, "step": 9416 }, { "epoch": 1.6026962727993657, "grad_norm": 1.6796875, "learning_rate": 8.884457215102489e-06, "loss": 0.8716, "step": 9417 }, { "epoch": 1.6028677368883555, "grad_norm": 1.7421875, "learning_rate": 8.882662577113505e-06, "loss": 0.8579, "step": 9418 }, { "epoch": 1.6030392009773453, "grad_norm": 1.765625, "learning_rate": 8.880867975565106e-06, "loss": 0.8044, "step": 9419 }, { "epoch": 1.6032106650663351, "grad_norm": 1.6484375, "learning_rate": 8.879073410515818e-06, "loss": 0.7676, "step": 9420 }, { "epoch": 1.603382129155325, "grad_norm": 1.6640625, "learning_rate": 8.877278882024163e-06, "loss": 0.8706, "step": 9421 }, { "epoch": 1.6035535932443148, "grad_norm": 1.7890625, "learning_rate": 8.875484390148674e-06, "loss": 0.9197, "step": 9422 }, { "epoch": 1.6037250573333046, "grad_norm": 1.6953125, "learning_rate": 8.873689934947873e-06, "loss": 0.9162, "step": 9423 }, { "epoch": 1.6038965214222947, "grad_norm": 1.6953125, "learning_rate": 8.871895516480286e-06, "loss": 0.9041, "step": 9424 }, { "epoch": 1.6040679855112845, "grad_norm": 1.6875, "learning_rate": 8.870101134804432e-06, "loss": 0.8439, "step": 9425 }, { "epoch": 1.6042394496002743, "grad_norm": 1.703125, "learning_rate": 8.868306789978837e-06, "loss": 0.8439, "step": 9426 }, { "epoch": 1.6044109136892644, "grad_norm": 1.703125, "learning_rate": 8.866512482062019e-06, "loss": 0.8968, "step": 9427 }, { "epoch": 1.6045823777782542, "grad_norm": 1.6171875, "learning_rate": 8.864718211112497e-06, "loss": 0.7964, "step": 9428 }, { "epoch": 1.604753841867244, "grad_norm": 1.7734375, "learning_rate": 8.862923977188788e-06, "loss": 0.8404, "step": 9429 }, { "epoch": 1.6049253059562338, "grad_norm": 1.640625, "learning_rate": 8.861129780349411e-06, "loss": 0.8308, "step": 9430 }, { "epoch": 1.6050967700452237, "grad_norm": 1.7109375, "learning_rate": 8.85933562065288e-06, "loss": 0.8702, "step": 9431 }, { "epoch": 1.6052682341342135, "grad_norm": 1.6640625, "learning_rate": 8.85754149815771e-06, "loss": 0.8977, "step": 9432 }, { "epoch": 1.6054396982232033, "grad_norm": 1.7109375, "learning_rate": 8.855747412922413e-06, "loss": 0.8239, "step": 9433 }, { "epoch": 1.6056111623121931, "grad_norm": 1.609375, "learning_rate": 8.853953365005503e-06, "loss": 0.7898, "step": 9434 }, { "epoch": 1.605782626401183, "grad_norm": 1.7890625, "learning_rate": 8.852159354465486e-06, "loss": 0.8764, "step": 9435 }, { "epoch": 1.605954090490173, "grad_norm": 1.6953125, "learning_rate": 8.850365381360879e-06, "loss": 0.8465, "step": 9436 }, { "epoch": 1.6061255545791628, "grad_norm": 1.7109375, "learning_rate": 8.848571445750183e-06, "loss": 0.8515, "step": 9437 }, { "epoch": 1.6062970186681527, "grad_norm": 1.6640625, "learning_rate": 8.84677754769191e-06, "loss": 0.7793, "step": 9438 }, { "epoch": 1.6064684827571427, "grad_norm": 1.7890625, "learning_rate": 8.844983687244565e-06, "loss": 0.8679, "step": 9439 }, { "epoch": 1.6066399468461325, "grad_norm": 1.71875, "learning_rate": 8.84318986446665e-06, "loss": 0.8877, "step": 9440 }, { "epoch": 1.6068114109351224, "grad_norm": 1.890625, "learning_rate": 8.84139607941667e-06, "loss": 0.9196, "step": 9441 }, { "epoch": 1.6069828750241122, "grad_norm": 1.640625, "learning_rate": 8.839602332153126e-06, "loss": 0.7696, "step": 9442 }, { "epoch": 1.607154339113102, "grad_norm": 1.6328125, "learning_rate": 8.837808622734519e-06, "loss": 0.8631, "step": 9443 }, { "epoch": 1.6073258032020918, "grad_norm": 1.6953125, "learning_rate": 8.83601495121935e-06, "loss": 0.786, "step": 9444 }, { "epoch": 1.6074972672910817, "grad_norm": 1.640625, "learning_rate": 8.834221317666117e-06, "loss": 0.8097, "step": 9445 }, { "epoch": 1.6076687313800715, "grad_norm": 1.7265625, "learning_rate": 8.832427722133315e-06, "loss": 0.8723, "step": 9446 }, { "epoch": 1.6078401954690613, "grad_norm": 1.796875, "learning_rate": 8.830634164679444e-06, "loss": 0.8215, "step": 9447 }, { "epoch": 1.6080116595580514, "grad_norm": 1.6640625, "learning_rate": 8.828840645362995e-06, "loss": 0.8247, "step": 9448 }, { "epoch": 1.6081831236470412, "grad_norm": 1.703125, "learning_rate": 8.82704716424246e-06, "loss": 0.8534, "step": 9449 }, { "epoch": 1.608354587736031, "grad_norm": 1.734375, "learning_rate": 8.825253721376341e-06, "loss": 0.8736, "step": 9450 }, { "epoch": 1.6085260518250208, "grad_norm": 1.7578125, "learning_rate": 8.823460316823118e-06, "loss": 0.9481, "step": 9451 }, { "epoch": 1.608697515914011, "grad_norm": 1.6328125, "learning_rate": 8.821666950641283e-06, "loss": 0.8318, "step": 9452 }, { "epoch": 1.6088689800030007, "grad_norm": 1.671875, "learning_rate": 8.819873622889328e-06, "loss": 0.9219, "step": 9453 }, { "epoch": 1.6090404440919905, "grad_norm": 1.625, "learning_rate": 8.818080333625737e-06, "loss": 0.8387, "step": 9454 }, { "epoch": 1.6092119081809804, "grad_norm": 1.5625, "learning_rate": 8.816287082908998e-06, "loss": 0.7747, "step": 9455 }, { "epoch": 1.6093833722699702, "grad_norm": 1.65625, "learning_rate": 8.814493870797594e-06, "loss": 0.8218, "step": 9456 }, { "epoch": 1.60955483635896, "grad_norm": 1.6171875, "learning_rate": 8.812700697350006e-06, "loss": 0.8, "step": 9457 }, { "epoch": 1.6097263004479498, "grad_norm": 1.78125, "learning_rate": 8.810907562624721e-06, "loss": 0.8254, "step": 9458 }, { "epoch": 1.6098977645369397, "grad_norm": 1.6875, "learning_rate": 8.809114466680219e-06, "loss": 0.8618, "step": 9459 }, { "epoch": 1.6100692286259295, "grad_norm": 1.6875, "learning_rate": 8.80732140957498e-06, "loss": 0.8895, "step": 9460 }, { "epoch": 1.6102406927149195, "grad_norm": 1.6796875, "learning_rate": 8.80552839136748e-06, "loss": 0.8411, "step": 9461 }, { "epoch": 1.6104121568039094, "grad_norm": 1.6953125, "learning_rate": 8.8037354121162e-06, "loss": 0.8813, "step": 9462 }, { "epoch": 1.6105836208928992, "grad_norm": 1.6484375, "learning_rate": 8.801942471879611e-06, "loss": 0.7896, "step": 9463 }, { "epoch": 1.6107550849818892, "grad_norm": 1.8203125, "learning_rate": 8.800149570716188e-06, "loss": 0.8829, "step": 9464 }, { "epoch": 1.610926549070879, "grad_norm": 1.640625, "learning_rate": 8.798356708684405e-06, "loss": 0.8807, "step": 9465 }, { "epoch": 1.611098013159869, "grad_norm": 1.7578125, "learning_rate": 8.796563885842737e-06, "loss": 0.831, "step": 9466 }, { "epoch": 1.6112694772488587, "grad_norm": 1.7734375, "learning_rate": 8.79477110224965e-06, "loss": 0.931, "step": 9467 }, { "epoch": 1.6114409413378485, "grad_norm": 1.734375, "learning_rate": 8.792978357963618e-06, "loss": 0.8006, "step": 9468 }, { "epoch": 1.6116124054268384, "grad_norm": 1.71875, "learning_rate": 8.791185653043106e-06, "loss": 0.8351, "step": 9469 }, { "epoch": 1.6117838695158282, "grad_norm": 1.7734375, "learning_rate": 8.789392987546581e-06, "loss": 0.8104, "step": 9470 }, { "epoch": 1.611955333604818, "grad_norm": 1.6796875, "learning_rate": 8.78760036153251e-06, "loss": 0.8881, "step": 9471 }, { "epoch": 1.6121267976938078, "grad_norm": 1.6484375, "learning_rate": 8.785807775059357e-06, "loss": 0.8613, "step": 9472 }, { "epoch": 1.612298261782798, "grad_norm": 1.6328125, "learning_rate": 8.784015228185587e-06, "loss": 0.8537, "step": 9473 }, { "epoch": 1.6124697258717877, "grad_norm": 1.7578125, "learning_rate": 8.782222720969658e-06, "loss": 0.8582, "step": 9474 }, { "epoch": 1.6126411899607775, "grad_norm": 1.65625, "learning_rate": 8.78043025347003e-06, "loss": 0.8746, "step": 9475 }, { "epoch": 1.6128126540497676, "grad_norm": 1.703125, "learning_rate": 8.778637825745165e-06, "loss": 0.8207, "step": 9476 }, { "epoch": 1.6129841181387574, "grad_norm": 1.6875, "learning_rate": 8.77684543785352e-06, "loss": 0.8755, "step": 9477 }, { "epoch": 1.6131555822277472, "grad_norm": 1.609375, "learning_rate": 8.77505308985355e-06, "loss": 0.7851, "step": 9478 }, { "epoch": 1.613327046316737, "grad_norm": 1.6171875, "learning_rate": 8.773260781803713e-06, "loss": 0.7995, "step": 9479 }, { "epoch": 1.613498510405727, "grad_norm": 1.6796875, "learning_rate": 8.77146851376246e-06, "loss": 0.8759, "step": 9480 }, { "epoch": 1.6136699744947167, "grad_norm": 1.6953125, "learning_rate": 8.769676285788245e-06, "loss": 0.8722, "step": 9481 }, { "epoch": 1.6138414385837065, "grad_norm": 1.6953125, "learning_rate": 8.76788409793952e-06, "loss": 0.8435, "step": 9482 }, { "epoch": 1.6140129026726964, "grad_norm": 1.6953125, "learning_rate": 8.766091950274735e-06, "loss": 0.8915, "step": 9483 }, { "epoch": 1.6141843667616862, "grad_norm": 1.65625, "learning_rate": 8.764299842852336e-06, "loss": 0.839, "step": 9484 }, { "epoch": 1.6143558308506762, "grad_norm": 1.65625, "learning_rate": 8.762507775730776e-06, "loss": 0.8317, "step": 9485 }, { "epoch": 1.614527294939666, "grad_norm": 1.7421875, "learning_rate": 8.760715748968494e-06, "loss": 0.8901, "step": 9486 }, { "epoch": 1.614698759028656, "grad_norm": 1.7734375, "learning_rate": 8.75892376262394e-06, "loss": 0.8669, "step": 9487 }, { "epoch": 1.614870223117646, "grad_norm": 1.625, "learning_rate": 8.757131816755554e-06, "loss": 0.7872, "step": 9488 }, { "epoch": 1.6150416872066358, "grad_norm": 1.6640625, "learning_rate": 8.75533991142178e-06, "loss": 0.7714, "step": 9489 }, { "epoch": 1.6152131512956256, "grad_norm": 1.7109375, "learning_rate": 8.75354804668106e-06, "loss": 0.8928, "step": 9490 }, { "epoch": 1.6153846153846154, "grad_norm": 1.609375, "learning_rate": 8.75175622259183e-06, "loss": 0.8651, "step": 9491 }, { "epoch": 1.6155560794736052, "grad_norm": 1.5625, "learning_rate": 8.74996443921253e-06, "loss": 0.7452, "step": 9492 }, { "epoch": 1.615727543562595, "grad_norm": 1.6171875, "learning_rate": 8.748172696601597e-06, "loss": 0.6888, "step": 9493 }, { "epoch": 1.615899007651585, "grad_norm": 1.671875, "learning_rate": 8.746380994817467e-06, "loss": 0.7881, "step": 9494 }, { "epoch": 1.6160704717405747, "grad_norm": 1.65625, "learning_rate": 8.744589333918571e-06, "loss": 0.8082, "step": 9495 }, { "epoch": 1.6162419358295645, "grad_norm": 1.75, "learning_rate": 8.742797713963352e-06, "loss": 0.8459, "step": 9496 }, { "epoch": 1.6164133999185546, "grad_norm": 1.7421875, "learning_rate": 8.741006135010228e-06, "loss": 0.8487, "step": 9497 }, { "epoch": 1.6165848640075444, "grad_norm": 1.859375, "learning_rate": 8.739214597117634e-06, "loss": 0.8437, "step": 9498 }, { "epoch": 1.6167563280965342, "grad_norm": 1.7578125, "learning_rate": 8.737423100344002e-06, "loss": 0.8587, "step": 9499 }, { "epoch": 1.6169277921855243, "grad_norm": 1.7421875, "learning_rate": 8.735631644747755e-06, "loss": 0.8818, "step": 9500 }, { "epoch": 1.6170992562745141, "grad_norm": 1.6484375, "learning_rate": 8.733840230387322e-06, "loss": 0.8194, "step": 9501 }, { "epoch": 1.617270720363504, "grad_norm": 1.6328125, "learning_rate": 8.732048857321126e-06, "loss": 0.8206, "step": 9502 }, { "epoch": 1.6174421844524938, "grad_norm": 1.6953125, "learning_rate": 8.730257525607594e-06, "loss": 0.8871, "step": 9503 }, { "epoch": 1.6176136485414836, "grad_norm": 1.6796875, "learning_rate": 8.728466235305143e-06, "loss": 0.8642, "step": 9504 }, { "epoch": 1.6177851126304734, "grad_norm": 1.7265625, "learning_rate": 8.726674986472195e-06, "loss": 0.8576, "step": 9505 }, { "epoch": 1.6179565767194632, "grad_norm": 1.671875, "learning_rate": 8.724883779167174e-06, "loss": 0.8776, "step": 9506 }, { "epoch": 1.618128040808453, "grad_norm": 1.640625, "learning_rate": 8.723092613448494e-06, "loss": 0.8235, "step": 9507 }, { "epoch": 1.618299504897443, "grad_norm": 1.703125, "learning_rate": 8.721301489374575e-06, "loss": 0.8939, "step": 9508 }, { "epoch": 1.618470968986433, "grad_norm": 1.703125, "learning_rate": 8.719510407003826e-06, "loss": 0.8792, "step": 9509 }, { "epoch": 1.6186424330754228, "grad_norm": 1.671875, "learning_rate": 8.717719366394665e-06, "loss": 0.7704, "step": 9510 }, { "epoch": 1.6188138971644126, "grad_norm": 1.7265625, "learning_rate": 8.715928367605503e-06, "loss": 0.8084, "step": 9511 }, { "epoch": 1.6189853612534026, "grad_norm": 1.6796875, "learning_rate": 8.714137410694753e-06, "loss": 0.848, "step": 9512 }, { "epoch": 1.6191568253423925, "grad_norm": 1.671875, "learning_rate": 8.712346495720822e-06, "loss": 0.8198, "step": 9513 }, { "epoch": 1.6193282894313823, "grad_norm": 1.6171875, "learning_rate": 8.710555622742124e-06, "loss": 0.7781, "step": 9514 }, { "epoch": 1.6194997535203721, "grad_norm": 1.75, "learning_rate": 8.708764791817062e-06, "loss": 0.9383, "step": 9515 }, { "epoch": 1.619671217609362, "grad_norm": 1.7109375, "learning_rate": 8.706974003004042e-06, "loss": 0.8586, "step": 9516 }, { "epoch": 1.6198426816983518, "grad_norm": 1.6484375, "learning_rate": 8.705183256361468e-06, "loss": 0.7714, "step": 9517 }, { "epoch": 1.6200141457873416, "grad_norm": 1.609375, "learning_rate": 8.703392551947744e-06, "loss": 0.9317, "step": 9518 }, { "epoch": 1.6201856098763314, "grad_norm": 1.6953125, "learning_rate": 8.701601889821278e-06, "loss": 0.8142, "step": 9519 }, { "epoch": 1.6203570739653212, "grad_norm": 1.65625, "learning_rate": 8.699811270040456e-06, "loss": 0.8342, "step": 9520 }, { "epoch": 1.6205285380543113, "grad_norm": 1.78125, "learning_rate": 8.698020692663686e-06, "loss": 0.9059, "step": 9521 }, { "epoch": 1.6207000021433011, "grad_norm": 1.7421875, "learning_rate": 8.696230157749365e-06, "loss": 0.8225, "step": 9522 }, { "epoch": 1.620871466232291, "grad_norm": 1.75, "learning_rate": 8.694439665355887e-06, "loss": 0.8743, "step": 9523 }, { "epoch": 1.621042930321281, "grad_norm": 4.96875, "learning_rate": 8.692649215541648e-06, "loss": 0.8313, "step": 9524 }, { "epoch": 1.6212143944102708, "grad_norm": 1.7578125, "learning_rate": 8.690858808365041e-06, "loss": 0.8802, "step": 9525 }, { "epoch": 1.6213858584992606, "grad_norm": 1.6953125, "learning_rate": 8.68906844388446e-06, "loss": 0.7753, "step": 9526 }, { "epoch": 1.6215573225882505, "grad_norm": 1.7734375, "learning_rate": 8.68727812215829e-06, "loss": 0.7983, "step": 9527 }, { "epoch": 1.6217287866772403, "grad_norm": 1.8125, "learning_rate": 8.685487843244927e-06, "loss": 0.964, "step": 9528 }, { "epoch": 1.6219002507662301, "grad_norm": 1.71875, "learning_rate": 8.683697607202754e-06, "loss": 0.7903, "step": 9529 }, { "epoch": 1.62207171485522, "grad_norm": 1.7109375, "learning_rate": 8.681907414090159e-06, "loss": 0.8497, "step": 9530 }, { "epoch": 1.6222431789442098, "grad_norm": 1.6015625, "learning_rate": 8.68011726396553e-06, "loss": 0.8279, "step": 9531 }, { "epoch": 1.6224146430331996, "grad_norm": 1.796875, "learning_rate": 8.678327156887243e-06, "loss": 0.8786, "step": 9532 }, { "epoch": 1.6225861071221896, "grad_norm": 1.78125, "learning_rate": 8.676537092913685e-06, "loss": 0.9326, "step": 9533 }, { "epoch": 1.6227575712111795, "grad_norm": 1.875, "learning_rate": 8.674747072103236e-06, "loss": 0.7747, "step": 9534 }, { "epoch": 1.6229290353001693, "grad_norm": 1.6953125, "learning_rate": 8.672957094514278e-06, "loss": 0.9462, "step": 9535 }, { "epoch": 1.6231004993891593, "grad_norm": 1.703125, "learning_rate": 8.671167160205183e-06, "loss": 0.7966, "step": 9536 }, { "epoch": 1.6232719634781492, "grad_norm": 1.6015625, "learning_rate": 8.66937726923433e-06, "loss": 0.8396, "step": 9537 }, { "epoch": 1.623443427567139, "grad_norm": 1.8125, "learning_rate": 8.667587421660099e-06, "loss": 0.8396, "step": 9538 }, { "epoch": 1.6236148916561288, "grad_norm": 1.6875, "learning_rate": 8.665797617540857e-06, "loss": 0.8863, "step": 9539 }, { "epoch": 1.6237863557451186, "grad_norm": 1.5625, "learning_rate": 8.664007856934979e-06, "loss": 0.7412, "step": 9540 }, { "epoch": 1.6239578198341085, "grad_norm": 1.71875, "learning_rate": 8.662218139900836e-06, "loss": 0.8568, "step": 9541 }, { "epoch": 1.6241292839230983, "grad_norm": 1.59375, "learning_rate": 8.660428466496795e-06, "loss": 0.8518, "step": 9542 }, { "epoch": 1.6243007480120881, "grad_norm": 1.7421875, "learning_rate": 8.658638836781232e-06, "loss": 0.9646, "step": 9543 }, { "epoch": 1.624472212101078, "grad_norm": 1.71875, "learning_rate": 8.656849250812504e-06, "loss": 0.8214, "step": 9544 }, { "epoch": 1.624643676190068, "grad_norm": 1.6953125, "learning_rate": 8.65505970864898e-06, "loss": 0.8817, "step": 9545 }, { "epoch": 1.6248151402790578, "grad_norm": 1.625, "learning_rate": 8.653270210349023e-06, "loss": 0.7976, "step": 9546 }, { "epoch": 1.6249866043680476, "grad_norm": 1.703125, "learning_rate": 8.651480755970995e-06, "loss": 0.892, "step": 9547 }, { "epoch": 1.6251580684570375, "grad_norm": 1.6875, "learning_rate": 8.649691345573259e-06, "loss": 0.8196, "step": 9548 }, { "epoch": 1.6253295325460275, "grad_norm": 1.765625, "learning_rate": 8.647901979214173e-06, "loss": 0.9425, "step": 9549 }, { "epoch": 1.6255009966350173, "grad_norm": 1.7265625, "learning_rate": 8.646112656952094e-06, "loss": 0.8447, "step": 9550 }, { "epoch": 1.6256724607240072, "grad_norm": 1.6328125, "learning_rate": 8.644323378845378e-06, "loss": 0.8281, "step": 9551 }, { "epoch": 1.625843924812997, "grad_norm": 1.7265625, "learning_rate": 8.642534144952383e-06, "loss": 0.9119, "step": 9552 }, { "epoch": 1.6260153889019868, "grad_norm": 1.7421875, "learning_rate": 8.640744955331462e-06, "loss": 0.8358, "step": 9553 }, { "epoch": 1.6261868529909767, "grad_norm": 1.609375, "learning_rate": 8.63895581004097e-06, "loss": 0.8309, "step": 9554 }, { "epoch": 1.6263583170799665, "grad_norm": 1.6640625, "learning_rate": 8.637166709139251e-06, "loss": 0.8626, "step": 9555 }, { "epoch": 1.6265297811689563, "grad_norm": 1.6484375, "learning_rate": 8.635377652684657e-06, "loss": 0.7618, "step": 9556 }, { "epoch": 1.6267012452579461, "grad_norm": 1.6796875, "learning_rate": 8.633588640735536e-06, "loss": 0.8348, "step": 9557 }, { "epoch": 1.6268727093469362, "grad_norm": 1.6796875, "learning_rate": 8.631799673350235e-06, "loss": 0.9185, "step": 9558 }, { "epoch": 1.627044173435926, "grad_norm": 1.7578125, "learning_rate": 8.630010750587099e-06, "loss": 0.9125, "step": 9559 }, { "epoch": 1.6272156375249158, "grad_norm": 1.6875, "learning_rate": 8.62822187250447e-06, "loss": 0.9099, "step": 9560 }, { "epoch": 1.6273871016139059, "grad_norm": 1.6171875, "learning_rate": 8.626433039160693e-06, "loss": 0.7726, "step": 9561 }, { "epoch": 1.6275585657028957, "grad_norm": 1.9296875, "learning_rate": 8.624644250614107e-06, "loss": 0.863, "step": 9562 }, { "epoch": 1.6277300297918855, "grad_norm": 1.703125, "learning_rate": 8.622855506923051e-06, "loss": 0.9398, "step": 9563 }, { "epoch": 1.6279014938808753, "grad_norm": 1.7734375, "learning_rate": 8.621066808145863e-06, "loss": 0.8906, "step": 9564 }, { "epoch": 1.6280729579698652, "grad_norm": 1.6640625, "learning_rate": 8.619278154340877e-06, "loss": 0.8521, "step": 9565 }, { "epoch": 1.628244422058855, "grad_norm": 1.6796875, "learning_rate": 8.617489545566437e-06, "loss": 0.9032, "step": 9566 }, { "epoch": 1.6284158861478448, "grad_norm": 1.671875, "learning_rate": 8.615700981880861e-06, "loss": 0.7931, "step": 9567 }, { "epoch": 1.6285873502368347, "grad_norm": 1.6875, "learning_rate": 8.61391246334249e-06, "loss": 0.8021, "step": 9568 }, { "epoch": 1.6287588143258245, "grad_norm": 1.734375, "learning_rate": 8.612123990009655e-06, "loss": 0.8412, "step": 9569 }, { "epoch": 1.6289302784148145, "grad_norm": 1.6953125, "learning_rate": 8.610335561940682e-06, "loss": 0.9334, "step": 9570 }, { "epoch": 1.6291017425038044, "grad_norm": 1.703125, "learning_rate": 8.6085471791939e-06, "loss": 0.8338, "step": 9571 }, { "epoch": 1.6292732065927942, "grad_norm": 1.734375, "learning_rate": 8.606758841827634e-06, "loss": 0.8811, "step": 9572 }, { "epoch": 1.6294446706817842, "grad_norm": 1.6796875, "learning_rate": 8.604970549900208e-06, "loss": 0.7772, "step": 9573 }, { "epoch": 1.629616134770774, "grad_norm": 1.53125, "learning_rate": 8.603182303469947e-06, "loss": 0.7959, "step": 9574 }, { "epoch": 1.6297875988597639, "grad_norm": 1.6328125, "learning_rate": 8.601394102595169e-06, "loss": 0.8504, "step": 9575 }, { "epoch": 1.6299590629487537, "grad_norm": 1.640625, "learning_rate": 8.599605947334196e-06, "loss": 0.8329, "step": 9576 }, { "epoch": 1.6301305270377435, "grad_norm": 1.578125, "learning_rate": 8.59781783774535e-06, "loss": 0.7754, "step": 9577 }, { "epoch": 1.6303019911267334, "grad_norm": 1.71875, "learning_rate": 8.59602977388694e-06, "loss": 0.8126, "step": 9578 }, { "epoch": 1.6304734552157232, "grad_norm": 1.6328125, "learning_rate": 8.594241755817289e-06, "loss": 0.7677, "step": 9579 }, { "epoch": 1.630644919304713, "grad_norm": 1.5703125, "learning_rate": 8.592453783594707e-06, "loss": 0.8123, "step": 9580 }, { "epoch": 1.6308163833937028, "grad_norm": 1.7421875, "learning_rate": 8.590665857277507e-06, "loss": 0.8539, "step": 9581 }, { "epoch": 1.6309878474826929, "grad_norm": 1.65625, "learning_rate": 8.588877976924e-06, "loss": 0.7789, "step": 9582 }, { "epoch": 1.6311593115716827, "grad_norm": 1.71875, "learning_rate": 8.587090142592497e-06, "loss": 0.808, "step": 9583 }, { "epoch": 1.6313307756606725, "grad_norm": 1.8125, "learning_rate": 8.585302354341305e-06, "loss": 0.8538, "step": 9584 }, { "epoch": 1.6315022397496626, "grad_norm": 1.6171875, "learning_rate": 8.58351461222873e-06, "loss": 0.7721, "step": 9585 }, { "epoch": 1.6316737038386524, "grad_norm": 1.7109375, "learning_rate": 8.581726916313077e-06, "loss": 0.882, "step": 9586 }, { "epoch": 1.6318451679276422, "grad_norm": 1.6640625, "learning_rate": 8.57993926665265e-06, "loss": 0.855, "step": 9587 }, { "epoch": 1.632016632016632, "grad_norm": 1.7265625, "learning_rate": 8.578151663305751e-06, "loss": 0.8971, "step": 9588 }, { "epoch": 1.6321880961056219, "grad_norm": 1.6796875, "learning_rate": 8.576364106330683e-06, "loss": 0.8524, "step": 9589 }, { "epoch": 1.6323595601946117, "grad_norm": 1.75, "learning_rate": 8.574576595785742e-06, "loss": 0.8495, "step": 9590 }, { "epoch": 1.6325310242836015, "grad_norm": 1.7109375, "learning_rate": 8.572789131729224e-06, "loss": 0.8608, "step": 9591 }, { "epoch": 1.6327024883725914, "grad_norm": 1.6875, "learning_rate": 8.571001714219425e-06, "loss": 0.8293, "step": 9592 }, { "epoch": 1.6328739524615812, "grad_norm": 1.6796875, "learning_rate": 8.569214343314645e-06, "loss": 0.8418, "step": 9593 }, { "epoch": 1.6330454165505712, "grad_norm": 1.609375, "learning_rate": 8.56742701907317e-06, "loss": 0.8288, "step": 9594 }, { "epoch": 1.633216880639561, "grad_norm": 1.7265625, "learning_rate": 8.565639741553296e-06, "loss": 0.8054, "step": 9595 }, { "epoch": 1.6333883447285509, "grad_norm": 1.6640625, "learning_rate": 8.56385251081331e-06, "loss": 0.8483, "step": 9596 }, { "epoch": 1.633559808817541, "grad_norm": 1.65625, "learning_rate": 8.562065326911502e-06, "loss": 0.856, "step": 9597 }, { "epoch": 1.6337312729065308, "grad_norm": 1.6328125, "learning_rate": 8.560278189906159e-06, "loss": 0.7193, "step": 9598 }, { "epoch": 1.6339027369955206, "grad_norm": 1.7109375, "learning_rate": 8.558491099855565e-06, "loss": 0.8671, "step": 9599 }, { "epoch": 1.6340742010845104, "grad_norm": 1.65625, "learning_rate": 8.556704056818011e-06, "loss": 0.8556, "step": 9600 }, { "epoch": 1.6342456651735002, "grad_norm": 1.7265625, "learning_rate": 8.554917060851767e-06, "loss": 0.8631, "step": 9601 }, { "epoch": 1.63441712926249, "grad_norm": 1.75, "learning_rate": 8.553130112015118e-06, "loss": 0.8055, "step": 9602 }, { "epoch": 1.6345885933514799, "grad_norm": 1.6875, "learning_rate": 8.551343210366346e-06, "loss": 0.8503, "step": 9603 }, { "epoch": 1.6347600574404697, "grad_norm": 1.734375, "learning_rate": 8.549556355963726e-06, "loss": 0.903, "step": 9604 }, { "epoch": 1.6349315215294595, "grad_norm": 1.6796875, "learning_rate": 8.547769548865537e-06, "loss": 0.8312, "step": 9605 }, { "epoch": 1.6351029856184496, "grad_norm": 1.7265625, "learning_rate": 8.545982789130048e-06, "loss": 0.8706, "step": 9606 }, { "epoch": 1.6352744497074394, "grad_norm": 1.71875, "learning_rate": 8.544196076815539e-06, "loss": 0.905, "step": 9607 }, { "epoch": 1.6354459137964292, "grad_norm": 1.5546875, "learning_rate": 8.542409411980276e-06, "loss": 0.7822, "step": 9608 }, { "epoch": 1.6356173778854193, "grad_norm": 1.6953125, "learning_rate": 8.540622794682531e-06, "loss": 0.9522, "step": 9609 }, { "epoch": 1.635788841974409, "grad_norm": 1.609375, "learning_rate": 8.538836224980574e-06, "loss": 0.8145, "step": 9610 }, { "epoch": 1.635960306063399, "grad_norm": 1.7734375, "learning_rate": 8.537049702932669e-06, "loss": 0.9594, "step": 9611 }, { "epoch": 1.6361317701523888, "grad_norm": 1.6640625, "learning_rate": 8.535263228597086e-06, "loss": 0.834, "step": 9612 }, { "epoch": 1.6363032342413786, "grad_norm": 1.8046875, "learning_rate": 8.53347680203208e-06, "loss": 0.9463, "step": 9613 }, { "epoch": 1.6364746983303684, "grad_norm": 1.6328125, "learning_rate": 8.531690423295917e-06, "loss": 0.8724, "step": 9614 }, { "epoch": 1.6366461624193582, "grad_norm": 1.7265625, "learning_rate": 8.529904092446862e-06, "loss": 0.8486, "step": 9615 }, { "epoch": 1.636817626508348, "grad_norm": 1.71875, "learning_rate": 8.528117809543168e-06, "loss": 0.8814, "step": 9616 }, { "epoch": 1.6369890905973379, "grad_norm": 1.5859375, "learning_rate": 8.526331574643096e-06, "loss": 0.9166, "step": 9617 }, { "epoch": 1.637160554686328, "grad_norm": 1.71875, "learning_rate": 8.5245453878049e-06, "loss": 0.8858, "step": 9618 }, { "epoch": 1.6373320187753178, "grad_norm": 1.5859375, "learning_rate": 8.522759249086835e-06, "loss": 0.7996, "step": 9619 }, { "epoch": 1.6375034828643076, "grad_norm": 1.625, "learning_rate": 8.520973158547154e-06, "loss": 0.8665, "step": 9620 }, { "epoch": 1.6376749469532976, "grad_norm": 1.8671875, "learning_rate": 8.519187116244107e-06, "loss": 0.91, "step": 9621 }, { "epoch": 1.6378464110422875, "grad_norm": 1.703125, "learning_rate": 8.517401122235945e-06, "loss": 0.9372, "step": 9622 }, { "epoch": 1.6380178751312773, "grad_norm": 1.7421875, "learning_rate": 8.515615176580917e-06, "loss": 0.9159, "step": 9623 }, { "epoch": 1.638189339220267, "grad_norm": 1.734375, "learning_rate": 8.513829279337267e-06, "loss": 0.8983, "step": 9624 }, { "epoch": 1.638360803309257, "grad_norm": 1.671875, "learning_rate": 8.512043430563239e-06, "loss": 0.8373, "step": 9625 }, { "epoch": 1.6385322673982468, "grad_norm": 1.65625, "learning_rate": 8.510257630317079e-06, "loss": 0.8467, "step": 9626 }, { "epoch": 1.6387037314872366, "grad_norm": 1.734375, "learning_rate": 8.508471878657028e-06, "loss": 0.9898, "step": 9627 }, { "epoch": 1.6388751955762264, "grad_norm": 1.625, "learning_rate": 8.506686175641324e-06, "loss": 0.7781, "step": 9628 }, { "epoch": 1.6390466596652162, "grad_norm": 1.6640625, "learning_rate": 8.504900521328207e-06, "loss": 0.7417, "step": 9629 }, { "epoch": 1.6392181237542063, "grad_norm": 1.6171875, "learning_rate": 8.503114915775915e-06, "loss": 0.7881, "step": 9630 }, { "epoch": 1.639389587843196, "grad_norm": 1.6953125, "learning_rate": 8.501329359042683e-06, "loss": 0.8748, "step": 9631 }, { "epoch": 1.639561051932186, "grad_norm": 1.765625, "learning_rate": 8.499543851186742e-06, "loss": 0.8101, "step": 9632 }, { "epoch": 1.639732516021176, "grad_norm": 1.828125, "learning_rate": 8.497758392266328e-06, "loss": 0.8246, "step": 9633 }, { "epoch": 1.6399039801101658, "grad_norm": 1.7109375, "learning_rate": 8.49597298233967e-06, "loss": 0.7272, "step": 9634 }, { "epoch": 1.6400754441991556, "grad_norm": 1.7109375, "learning_rate": 8.494187621464997e-06, "loss": 0.907, "step": 9635 }, { "epoch": 1.6402469082881455, "grad_norm": 1.78125, "learning_rate": 8.492402309700535e-06, "loss": 0.8957, "step": 9636 }, { "epoch": 1.6404183723771353, "grad_norm": 1.7578125, "learning_rate": 8.490617047104511e-06, "loss": 0.9228, "step": 9637 }, { "epoch": 1.640589836466125, "grad_norm": 1.7734375, "learning_rate": 8.488831833735149e-06, "loss": 0.9267, "step": 9638 }, { "epoch": 1.640761300555115, "grad_norm": 1.59375, "learning_rate": 8.48704666965067e-06, "loss": 0.8438, "step": 9639 }, { "epoch": 1.6409327646441048, "grad_norm": 1.6796875, "learning_rate": 8.485261554909298e-06, "loss": 0.8037, "step": 9640 }, { "epoch": 1.6411042287330946, "grad_norm": 1.75, "learning_rate": 8.48347648956925e-06, "loss": 0.8805, "step": 9641 }, { "epoch": 1.6412756928220844, "grad_norm": 1.7578125, "learning_rate": 8.481691473688745e-06, "loss": 0.9045, "step": 9642 }, { "epoch": 1.6414471569110745, "grad_norm": 1.7421875, "learning_rate": 8.479906507325997e-06, "loss": 0.8374, "step": 9643 }, { "epoch": 1.6416186210000643, "grad_norm": 1.6484375, "learning_rate": 8.478121590539221e-06, "loss": 0.7615, "step": 9644 }, { "epoch": 1.641790085089054, "grad_norm": 1.6875, "learning_rate": 8.476336723386632e-06, "loss": 0.8641, "step": 9645 }, { "epoch": 1.6419615491780442, "grad_norm": 1.7265625, "learning_rate": 8.474551905926446e-06, "loss": 0.89, "step": 9646 }, { "epoch": 1.642133013267034, "grad_norm": 1.78125, "learning_rate": 8.47276713821686e-06, "loss": 0.7849, "step": 9647 }, { "epoch": 1.6423044773560238, "grad_norm": 1.53125, "learning_rate": 8.47098242031609e-06, "loss": 0.8519, "step": 9648 }, { "epoch": 1.6424759414450136, "grad_norm": 1.6953125, "learning_rate": 8.469197752282343e-06, "loss": 0.8685, "step": 9649 }, { "epoch": 1.6426474055340035, "grad_norm": 1.6875, "learning_rate": 8.467413134173819e-06, "loss": 0.813, "step": 9650 }, { "epoch": 1.6428188696229933, "grad_norm": 1.6953125, "learning_rate": 8.465628566048724e-06, "loss": 0.8472, "step": 9651 }, { "epoch": 1.642990333711983, "grad_norm": 1.6796875, "learning_rate": 8.46384404796526e-06, "loss": 0.8151, "step": 9652 }, { "epoch": 1.643161797800973, "grad_norm": 1.7265625, "learning_rate": 8.462059579981624e-06, "loss": 0.8978, "step": 9653 }, { "epoch": 1.6433332618899628, "grad_norm": 1.7890625, "learning_rate": 8.460275162156019e-06, "loss": 0.8837, "step": 9654 }, { "epoch": 1.6435047259789528, "grad_norm": 1.7421875, "learning_rate": 8.458490794546638e-06, "loss": 0.852, "step": 9655 }, { "epoch": 1.6436761900679426, "grad_norm": 1.671875, "learning_rate": 8.456706477211677e-06, "loss": 0.819, "step": 9656 }, { "epoch": 1.6438476541569325, "grad_norm": 1.71875, "learning_rate": 8.454922210209332e-06, "loss": 0.9105, "step": 9657 }, { "epoch": 1.6440191182459225, "grad_norm": 1.6875, "learning_rate": 8.453137993597792e-06, "loss": 0.8514, "step": 9658 }, { "epoch": 1.6441905823349123, "grad_norm": 1.765625, "learning_rate": 8.451353827435247e-06, "loss": 0.8103, "step": 9659 }, { "epoch": 1.6443620464239022, "grad_norm": 1.6640625, "learning_rate": 8.449569711779883e-06, "loss": 0.8241, "step": 9660 }, { "epoch": 1.644533510512892, "grad_norm": 1.71875, "learning_rate": 8.447785646689887e-06, "loss": 0.8696, "step": 9661 }, { "epoch": 1.6447049746018818, "grad_norm": 1.6640625, "learning_rate": 8.446001632223448e-06, "loss": 0.7978, "step": 9662 }, { "epoch": 1.6448764386908716, "grad_norm": 1.7578125, "learning_rate": 8.444217668438748e-06, "loss": 0.7798, "step": 9663 }, { "epoch": 1.6450479027798615, "grad_norm": 1.796875, "learning_rate": 8.442433755393968e-06, "loss": 0.9269, "step": 9664 }, { "epoch": 1.6452193668688513, "grad_norm": 1.671875, "learning_rate": 8.440649893147289e-06, "loss": 0.842, "step": 9665 }, { "epoch": 1.645390830957841, "grad_norm": 1.625, "learning_rate": 8.438866081756889e-06, "loss": 0.8264, "step": 9666 }, { "epoch": 1.6455622950468312, "grad_norm": 1.6796875, "learning_rate": 8.437082321280945e-06, "loss": 0.7968, "step": 9667 }, { "epoch": 1.645733759135821, "grad_norm": 1.703125, "learning_rate": 8.435298611777632e-06, "loss": 0.8673, "step": 9668 }, { "epoch": 1.6459052232248108, "grad_norm": 1.7890625, "learning_rate": 8.433514953305124e-06, "loss": 0.9696, "step": 9669 }, { "epoch": 1.6460766873138009, "grad_norm": 1.7265625, "learning_rate": 8.431731345921592e-06, "loss": 0.8855, "step": 9670 }, { "epoch": 1.6462481514027907, "grad_norm": 1.640625, "learning_rate": 8.429947789685206e-06, "loss": 0.822, "step": 9671 }, { "epoch": 1.6464196154917805, "grad_norm": 1.7890625, "learning_rate": 8.428164284654133e-06, "loss": 0.8785, "step": 9672 }, { "epoch": 1.6465910795807703, "grad_norm": 1.6484375, "learning_rate": 8.426380830886544e-06, "loss": 0.8603, "step": 9673 }, { "epoch": 1.6467625436697602, "grad_norm": 1.6875, "learning_rate": 8.4245974284406e-06, "loss": 0.8427, "step": 9674 }, { "epoch": 1.64693400775875, "grad_norm": 1.8203125, "learning_rate": 8.422814077374468e-06, "loss": 0.8966, "step": 9675 }, { "epoch": 1.6471054718477398, "grad_norm": 1.6484375, "learning_rate": 8.421030777746306e-06, "loss": 0.8551, "step": 9676 }, { "epoch": 1.6472769359367296, "grad_norm": 1.7421875, "learning_rate": 8.419247529614278e-06, "loss": 0.8953, "step": 9677 }, { "epoch": 1.6474484000257195, "grad_norm": 1.703125, "learning_rate": 8.41746433303654e-06, "loss": 0.8488, "step": 9678 }, { "epoch": 1.6476198641147095, "grad_norm": 1.640625, "learning_rate": 8.41568118807125e-06, "loss": 0.8468, "step": 9679 }, { "epoch": 1.6477913282036993, "grad_norm": 1.640625, "learning_rate": 8.41389809477656e-06, "loss": 0.7708, "step": 9680 }, { "epoch": 1.6479627922926892, "grad_norm": 1.75, "learning_rate": 8.412115053210631e-06, "loss": 0.9317, "step": 9681 }, { "epoch": 1.6481342563816792, "grad_norm": 1.703125, "learning_rate": 8.410332063431606e-06, "loss": 0.8848, "step": 9682 }, { "epoch": 1.648305720470669, "grad_norm": 1.734375, "learning_rate": 8.408549125497638e-06, "loss": 0.8291, "step": 9683 }, { "epoch": 1.6484771845596589, "grad_norm": 1.703125, "learning_rate": 8.406766239466878e-06, "loss": 0.8033, "step": 9684 }, { "epoch": 1.6486486486486487, "grad_norm": 1.6015625, "learning_rate": 8.404983405397468e-06, "loss": 0.8135, "step": 9685 }, { "epoch": 1.6488201127376385, "grad_norm": 1.6875, "learning_rate": 8.403200623347556e-06, "loss": 0.8502, "step": 9686 }, { "epoch": 1.6489915768266283, "grad_norm": 1.6796875, "learning_rate": 8.401417893375286e-06, "loss": 0.7815, "step": 9687 }, { "epoch": 1.6491630409156182, "grad_norm": 1.6875, "learning_rate": 8.399635215538798e-06, "loss": 0.8002, "step": 9688 }, { "epoch": 1.649334505004608, "grad_norm": 1.734375, "learning_rate": 8.39785258989623e-06, "loss": 0.8572, "step": 9689 }, { "epoch": 1.6495059690935978, "grad_norm": 1.7578125, "learning_rate": 8.396070016505725e-06, "loss": 0.9321, "step": 9690 }, { "epoch": 1.6496774331825879, "grad_norm": 1.75, "learning_rate": 8.394287495425412e-06, "loss": 0.7589, "step": 9691 }, { "epoch": 1.6498488972715777, "grad_norm": 1.7265625, "learning_rate": 8.392505026713434e-06, "loss": 0.8176, "step": 9692 }, { "epoch": 1.6500203613605675, "grad_norm": 1.65625, "learning_rate": 8.390722610427923e-06, "loss": 0.7663, "step": 9693 }, { "epoch": 1.6501918254495576, "grad_norm": 1.71875, "learning_rate": 8.388940246627005e-06, "loss": 0.8577, "step": 9694 }, { "epoch": 1.6503632895385474, "grad_norm": 1.765625, "learning_rate": 8.387157935368811e-06, "loss": 0.8385, "step": 9695 }, { "epoch": 1.6505347536275372, "grad_norm": 1.84375, "learning_rate": 8.385375676711472e-06, "loss": 0.9336, "step": 9696 }, { "epoch": 1.650706217716527, "grad_norm": 1.6171875, "learning_rate": 8.38359347071311e-06, "loss": 0.7651, "step": 9697 }, { "epoch": 1.6508776818055169, "grad_norm": 1.828125, "learning_rate": 8.381811317431853e-06, "loss": 0.9214, "step": 9698 }, { "epoch": 1.6510491458945067, "grad_norm": 1.640625, "learning_rate": 8.380029216925824e-06, "loss": 0.8356, "step": 9699 }, { "epoch": 1.6512206099834965, "grad_norm": 1.6484375, "learning_rate": 8.378247169253138e-06, "loss": 0.8845, "step": 9700 }, { "epoch": 1.6513920740724863, "grad_norm": 1.7421875, "learning_rate": 8.376465174471922e-06, "loss": 0.8047, "step": 9701 }, { "epoch": 1.6515635381614762, "grad_norm": 1.6328125, "learning_rate": 8.374683232640289e-06, "loss": 0.8342, "step": 9702 }, { "epoch": 1.6517350022504662, "grad_norm": 1.75, "learning_rate": 8.372901343816357e-06, "loss": 0.9709, "step": 9703 }, { "epoch": 1.651906466339456, "grad_norm": 1.6171875, "learning_rate": 8.371119508058245e-06, "loss": 0.9, "step": 9704 }, { "epoch": 1.6520779304284459, "grad_norm": 1.6328125, "learning_rate": 8.369337725424054e-06, "loss": 0.8676, "step": 9705 }, { "epoch": 1.652249394517436, "grad_norm": 1.6796875, "learning_rate": 8.3675559959719e-06, "loss": 0.8422, "step": 9706 }, { "epoch": 1.6524208586064257, "grad_norm": 1.6796875, "learning_rate": 8.365774319759892e-06, "loss": 0.8387, "step": 9707 }, { "epoch": 1.6525923226954156, "grad_norm": 1.7109375, "learning_rate": 8.363992696846136e-06, "loss": 0.8843, "step": 9708 }, { "epoch": 1.6527637867844054, "grad_norm": 1.6015625, "learning_rate": 8.36221112728874e-06, "loss": 0.839, "step": 9709 }, { "epoch": 1.6529352508733952, "grad_norm": 1.703125, "learning_rate": 8.360429611145808e-06, "loss": 0.7779, "step": 9710 }, { "epoch": 1.653106714962385, "grad_norm": 1.7109375, "learning_rate": 8.35864814847544e-06, "loss": 0.8224, "step": 9711 }, { "epoch": 1.6532781790513749, "grad_norm": 1.78125, "learning_rate": 8.356866739335735e-06, "loss": 0.8112, "step": 9712 }, { "epoch": 1.6534496431403647, "grad_norm": 1.703125, "learning_rate": 8.355085383784794e-06, "loss": 0.8244, "step": 9713 }, { "epoch": 1.6536211072293545, "grad_norm": 1.6953125, "learning_rate": 8.353304081880713e-06, "loss": 0.8558, "step": 9714 }, { "epoch": 1.6537925713183446, "grad_norm": 1.6875, "learning_rate": 8.351522833681587e-06, "loss": 0.8791, "step": 9715 }, { "epoch": 1.6539640354073344, "grad_norm": 1.7890625, "learning_rate": 8.34974163924551e-06, "loss": 0.8825, "step": 9716 }, { "epoch": 1.6541354994963242, "grad_norm": 1.6953125, "learning_rate": 8.347960498630574e-06, "loss": 0.8269, "step": 9717 }, { "epoch": 1.6543069635853143, "grad_norm": 1.6953125, "learning_rate": 8.346179411894864e-06, "loss": 0.8492, "step": 9718 }, { "epoch": 1.654478427674304, "grad_norm": 1.734375, "learning_rate": 8.34439837909647e-06, "loss": 0.8248, "step": 9719 }, { "epoch": 1.654649891763294, "grad_norm": 1.7265625, "learning_rate": 8.342617400293482e-06, "loss": 0.8771, "step": 9720 }, { "epoch": 1.6548213558522837, "grad_norm": 1.6875, "learning_rate": 8.34083647554398e-06, "loss": 0.8594, "step": 9721 }, { "epoch": 1.6549928199412736, "grad_norm": 1.671875, "learning_rate": 8.33905560490605e-06, "loss": 0.8927, "step": 9722 }, { "epoch": 1.6551642840302634, "grad_norm": 1.640625, "learning_rate": 8.337274788437769e-06, "loss": 0.789, "step": 9723 }, { "epoch": 1.6553357481192532, "grad_norm": 1.7109375, "learning_rate": 8.33549402619722e-06, "loss": 0.7861, "step": 9724 }, { "epoch": 1.655507212208243, "grad_norm": 1.75, "learning_rate": 8.333713318242477e-06, "loss": 0.7888, "step": 9725 }, { "epoch": 1.6556786762972329, "grad_norm": 1.6796875, "learning_rate": 8.331932664631619e-06, "loss": 0.8536, "step": 9726 }, { "epoch": 1.655850140386223, "grad_norm": 1.6953125, "learning_rate": 8.33015206542272e-06, "loss": 0.7891, "step": 9727 }, { "epoch": 1.6560216044752127, "grad_norm": 1.734375, "learning_rate": 8.328371520673848e-06, "loss": 0.887, "step": 9728 }, { "epoch": 1.6561930685642026, "grad_norm": 1.7109375, "learning_rate": 8.326591030443075e-06, "loss": 0.8448, "step": 9729 }, { "epoch": 1.6563645326531926, "grad_norm": 1.96875, "learning_rate": 8.324810594788471e-06, "loss": 0.848, "step": 9730 }, { "epoch": 1.6565359967421824, "grad_norm": 1.7421875, "learning_rate": 8.3230302137681e-06, "loss": 0.9769, "step": 9731 }, { "epoch": 1.6567074608311723, "grad_norm": 1.6484375, "learning_rate": 8.32124988744003e-06, "loss": 0.826, "step": 9732 }, { "epoch": 1.656878924920162, "grad_norm": 1.84375, "learning_rate": 8.319469615862324e-06, "loss": 0.9164, "step": 9733 }, { "epoch": 1.657050389009152, "grad_norm": 1.703125, "learning_rate": 8.317689399093039e-06, "loss": 0.8394, "step": 9734 }, { "epoch": 1.6572218530981417, "grad_norm": 1.6796875, "learning_rate": 8.315909237190241e-06, "loss": 0.858, "step": 9735 }, { "epoch": 1.6573933171871316, "grad_norm": 1.703125, "learning_rate": 8.314129130211981e-06, "loss": 0.8047, "step": 9736 }, { "epoch": 1.6575647812761214, "grad_norm": 1.625, "learning_rate": 8.31234907821632e-06, "loss": 0.8094, "step": 9737 }, { "epoch": 1.6577362453651112, "grad_norm": 1.78125, "learning_rate": 8.31056908126131e-06, "loss": 0.9054, "step": 9738 }, { "epoch": 1.657907709454101, "grad_norm": 1.6953125, "learning_rate": 8.308789139405008e-06, "loss": 0.8816, "step": 9739 }, { "epoch": 1.658079173543091, "grad_norm": 1.7265625, "learning_rate": 8.30700925270546e-06, "loss": 0.744, "step": 9740 }, { "epoch": 1.658250637632081, "grad_norm": 1.7734375, "learning_rate": 8.305229421220712e-06, "loss": 0.9439, "step": 9741 }, { "epoch": 1.6584221017210707, "grad_norm": 1.7421875, "learning_rate": 8.303449645008815e-06, "loss": 0.8557, "step": 9742 }, { "epoch": 1.6585935658100608, "grad_norm": 1.7265625, "learning_rate": 8.301669924127815e-06, "loss": 0.8596, "step": 9743 }, { "epoch": 1.6587650298990506, "grad_norm": 1.7734375, "learning_rate": 8.299890258635753e-06, "loss": 0.9133, "step": 9744 }, { "epoch": 1.6589364939880404, "grad_norm": 1.703125, "learning_rate": 8.298110648590673e-06, "loss": 0.8114, "step": 9745 }, { "epoch": 1.6591079580770303, "grad_norm": 1.65625, "learning_rate": 8.296331094050609e-06, "loss": 0.8435, "step": 9746 }, { "epoch": 1.65927942216602, "grad_norm": 1.734375, "learning_rate": 8.294551595073608e-06, "loss": 0.8475, "step": 9747 }, { "epoch": 1.65945088625501, "grad_norm": 1.6640625, "learning_rate": 8.2927721517177e-06, "loss": 0.8274, "step": 9748 }, { "epoch": 1.6596223503439997, "grad_norm": 1.7265625, "learning_rate": 8.290992764040922e-06, "loss": 0.8764, "step": 9749 }, { "epoch": 1.6597938144329896, "grad_norm": 1.6875, "learning_rate": 8.28921343210131e-06, "loss": 0.8514, "step": 9750 }, { "epoch": 1.6599652785219794, "grad_norm": 1.703125, "learning_rate": 8.287434155956885e-06, "loss": 0.8684, "step": 9751 }, { "epoch": 1.6601367426109694, "grad_norm": 1.7421875, "learning_rate": 8.28565493566568e-06, "loss": 0.9562, "step": 9752 }, { "epoch": 1.6603082066999593, "grad_norm": 1.6796875, "learning_rate": 8.283875771285725e-06, "loss": 0.8543, "step": 9753 }, { "epoch": 1.660479670788949, "grad_norm": 1.7734375, "learning_rate": 8.282096662875042e-06, "loss": 0.9567, "step": 9754 }, { "epoch": 1.6606511348779391, "grad_norm": 1.640625, "learning_rate": 8.280317610491655e-06, "loss": 0.8085, "step": 9755 }, { "epoch": 1.660822598966929, "grad_norm": 1.8203125, "learning_rate": 8.27853861419359e-06, "loss": 0.9327, "step": 9756 }, { "epoch": 1.6609940630559188, "grad_norm": 1.7265625, "learning_rate": 8.276759674038861e-06, "loss": 0.8291, "step": 9757 }, { "epoch": 1.6611655271449086, "grad_norm": 1.6640625, "learning_rate": 8.274980790085489e-06, "loss": 0.8373, "step": 9758 }, { "epoch": 1.6613369912338984, "grad_norm": 1.71875, "learning_rate": 8.273201962391488e-06, "loss": 0.8789, "step": 9759 }, { "epoch": 1.6615084553228883, "grad_norm": 1.734375, "learning_rate": 8.271423191014874e-06, "loss": 0.868, "step": 9760 }, { "epoch": 1.661679919411878, "grad_norm": 1.7578125, "learning_rate": 8.269644476013661e-06, "loss": 0.8498, "step": 9761 }, { "epoch": 1.661851383500868, "grad_norm": 1.6484375, "learning_rate": 8.26786581744586e-06, "loss": 0.8065, "step": 9762 }, { "epoch": 1.6620228475898577, "grad_norm": 1.75, "learning_rate": 8.26608721536947e-06, "loss": 0.8592, "step": 9763 }, { "epoch": 1.6621943116788478, "grad_norm": 1.78125, "learning_rate": 8.26430866984251e-06, "loss": 0.8404, "step": 9764 }, { "epoch": 1.6623657757678376, "grad_norm": 1.6796875, "learning_rate": 8.262530180922978e-06, "loss": 0.824, "step": 9765 }, { "epoch": 1.6625372398568274, "grad_norm": 1.796875, "learning_rate": 8.260751748668881e-06, "loss": 0.8971, "step": 9766 }, { "epoch": 1.6627087039458175, "grad_norm": 1.7578125, "learning_rate": 8.258973373138218e-06, "loss": 0.924, "step": 9767 }, { "epoch": 1.6628801680348073, "grad_norm": 1.765625, "learning_rate": 8.25719505438899e-06, "loss": 0.8984, "step": 9768 }, { "epoch": 1.6630516321237971, "grad_norm": 1.765625, "learning_rate": 8.255416792479192e-06, "loss": 0.747, "step": 9769 }, { "epoch": 1.663223096212787, "grad_norm": 1.7421875, "learning_rate": 8.253638587466823e-06, "loss": 0.9168, "step": 9770 }, { "epoch": 1.6633945603017768, "grad_norm": 1.71875, "learning_rate": 8.251860439409877e-06, "loss": 0.8687, "step": 9771 }, { "epoch": 1.6635660243907666, "grad_norm": 1.84375, "learning_rate": 8.250082348366343e-06, "loss": 0.9425, "step": 9772 }, { "epoch": 1.6637374884797564, "grad_norm": 1.6953125, "learning_rate": 8.248304314394218e-06, "loss": 0.8669, "step": 9773 }, { "epoch": 1.6639089525687463, "grad_norm": 1.7109375, "learning_rate": 8.246526337551482e-06, "loss": 0.8023, "step": 9774 }, { "epoch": 1.664080416657736, "grad_norm": 1.7109375, "learning_rate": 8.244748417896126e-06, "loss": 0.9566, "step": 9775 }, { "epoch": 1.6642518807467261, "grad_norm": 1.703125, "learning_rate": 8.242970555486131e-06, "loss": 0.8965, "step": 9776 }, { "epoch": 1.664423344835716, "grad_norm": 1.671875, "learning_rate": 8.241192750379484e-06, "loss": 0.8908, "step": 9777 }, { "epoch": 1.6645948089247058, "grad_norm": 1.6953125, "learning_rate": 8.239415002634166e-06, "loss": 0.8447, "step": 9778 }, { "epoch": 1.6647662730136958, "grad_norm": 1.640625, "learning_rate": 8.237637312308154e-06, "loss": 0.8, "step": 9779 }, { "epoch": 1.6649377371026857, "grad_norm": 1.7109375, "learning_rate": 8.235859679459427e-06, "loss": 0.8898, "step": 9780 }, { "epoch": 1.6651092011916755, "grad_norm": 1.671875, "learning_rate": 8.234082104145956e-06, "loss": 0.9017, "step": 9781 }, { "epoch": 1.6652806652806653, "grad_norm": 1.78125, "learning_rate": 8.232304586425717e-06, "loss": 0.8863, "step": 9782 }, { "epoch": 1.6654521293696551, "grad_norm": 1.625, "learning_rate": 8.230527126356684e-06, "loss": 0.8047, "step": 9783 }, { "epoch": 1.665623593458645, "grad_norm": 1.7265625, "learning_rate": 8.228749723996825e-06, "loss": 0.9204, "step": 9784 }, { "epoch": 1.6657950575476348, "grad_norm": 1.7734375, "learning_rate": 8.226972379404108e-06, "loss": 0.9076, "step": 9785 }, { "epoch": 1.6659665216366246, "grad_norm": 1.6875, "learning_rate": 8.225195092636497e-06, "loss": 0.8587, "step": 9786 }, { "epoch": 1.6661379857256144, "grad_norm": 1.671875, "learning_rate": 8.223417863751956e-06, "loss": 0.8356, "step": 9787 }, { "epoch": 1.6663094498146045, "grad_norm": 1.6796875, "learning_rate": 8.221640692808451e-06, "loss": 0.8909, "step": 9788 }, { "epoch": 1.6664809139035943, "grad_norm": 1.625, "learning_rate": 8.219863579863939e-06, "loss": 0.7785, "step": 9789 }, { "epoch": 1.6666523779925841, "grad_norm": 1.6484375, "learning_rate": 8.218086524976377e-06, "loss": 0.8028, "step": 9790 }, { "epoch": 1.6668238420815742, "grad_norm": 1.6328125, "learning_rate": 8.216309528203726e-06, "loss": 0.8187, "step": 9791 }, { "epoch": 1.666995306170564, "grad_norm": 1.75, "learning_rate": 8.214532589603936e-06, "loss": 0.9681, "step": 9792 }, { "epoch": 1.6671667702595538, "grad_norm": 1.734375, "learning_rate": 8.212755709234962e-06, "loss": 0.8375, "step": 9793 }, { "epoch": 1.6673382343485437, "grad_norm": 1.78125, "learning_rate": 8.210978887154753e-06, "loss": 0.9124, "step": 9794 }, { "epoch": 1.6675096984375335, "grad_norm": 1.6796875, "learning_rate": 8.209202123421262e-06, "loss": 0.8024, "step": 9795 }, { "epoch": 1.6676811625265233, "grad_norm": 1.625, "learning_rate": 8.207425418092438e-06, "loss": 0.8746, "step": 9796 }, { "epoch": 1.6678526266155131, "grad_norm": 1.6328125, "learning_rate": 8.205648771226215e-06, "loss": 0.8642, "step": 9797 }, { "epoch": 1.668024090704503, "grad_norm": 1.8125, "learning_rate": 8.203872182880544e-06, "loss": 0.8817, "step": 9798 }, { "epoch": 1.6681955547934928, "grad_norm": 1.6796875, "learning_rate": 8.202095653113365e-06, "loss": 0.8011, "step": 9799 }, { "epoch": 1.6683670188824828, "grad_norm": 1.7109375, "learning_rate": 8.200319181982615e-06, "loss": 0.8085, "step": 9800 }, { "epoch": 1.6683670188824828, "eval_loss": 0.8372142910957336, "eval_runtime": 835.8053, "eval_samples_per_second": 2.99, "eval_steps_per_second": 2.99, "step": 9800 }, { "epoch": 1.6685384829714727, "grad_norm": 1.7109375, "learning_rate": 8.198542769546235e-06, "loss": 0.8591, "step": 9801 }, { "epoch": 1.6687099470604625, "grad_norm": 1.7578125, "learning_rate": 8.196766415862157e-06, "loss": 0.9123, "step": 9802 }, { "epoch": 1.6688814111494525, "grad_norm": 1.6953125, "learning_rate": 8.194990120988317e-06, "loss": 0.7732, "step": 9803 }, { "epoch": 1.6690528752384424, "grad_norm": 1.6796875, "learning_rate": 8.193213884982648e-06, "loss": 0.8743, "step": 9804 }, { "epoch": 1.6692243393274322, "grad_norm": 1.8203125, "learning_rate": 8.191437707903077e-06, "loss": 0.9104, "step": 9805 }, { "epoch": 1.669395803416422, "grad_norm": 1.7578125, "learning_rate": 8.189661589807532e-06, "loss": 0.9285, "step": 9806 }, { "epoch": 1.6695672675054118, "grad_norm": 1.703125, "learning_rate": 8.187885530753942e-06, "loss": 0.7954, "step": 9807 }, { "epoch": 1.6697387315944017, "grad_norm": 1.7109375, "learning_rate": 8.186109530800231e-06, "loss": 0.9223, "step": 9808 }, { "epoch": 1.6699101956833915, "grad_norm": 1.7734375, "learning_rate": 8.184333590004315e-06, "loss": 0.8979, "step": 9809 }, { "epoch": 1.6700816597723813, "grad_norm": 1.625, "learning_rate": 8.182557708424116e-06, "loss": 0.8499, "step": 9810 }, { "epoch": 1.6702531238613711, "grad_norm": 1.6171875, "learning_rate": 8.180781886117554e-06, "loss": 0.8487, "step": 9811 }, { "epoch": 1.6704245879503612, "grad_norm": 1.7421875, "learning_rate": 8.179006123142548e-06, "loss": 0.8923, "step": 9812 }, { "epoch": 1.670596052039351, "grad_norm": 1.6953125, "learning_rate": 8.177230419557009e-06, "loss": 0.8345, "step": 9813 }, { "epoch": 1.6707675161283408, "grad_norm": 1.765625, "learning_rate": 8.17545477541885e-06, "loss": 0.9097, "step": 9814 }, { "epoch": 1.670938980217331, "grad_norm": 1.7421875, "learning_rate": 8.17367919078598e-06, "loss": 0.8291, "step": 9815 }, { "epoch": 1.6711104443063207, "grad_norm": 1.5703125, "learning_rate": 8.17190366571631e-06, "loss": 0.7673, "step": 9816 }, { "epoch": 1.6712819083953105, "grad_norm": 1.6875, "learning_rate": 8.170128200267745e-06, "loss": 0.8326, "step": 9817 }, { "epoch": 1.6714533724843004, "grad_norm": 1.6875, "learning_rate": 8.16835279449819e-06, "loss": 0.7833, "step": 9818 }, { "epoch": 1.6716248365732902, "grad_norm": 1.828125, "learning_rate": 8.16657744846555e-06, "loss": 0.8009, "step": 9819 }, { "epoch": 1.67179630066228, "grad_norm": 1.7265625, "learning_rate": 8.164802162227722e-06, "loss": 0.8816, "step": 9820 }, { "epoch": 1.6719677647512698, "grad_norm": 1.6640625, "learning_rate": 8.163026935842604e-06, "loss": 0.8289, "step": 9821 }, { "epoch": 1.6721392288402597, "grad_norm": 1.7109375, "learning_rate": 8.161251769368098e-06, "loss": 0.9218, "step": 9822 }, { "epoch": 1.6723106929292495, "grad_norm": 1.703125, "learning_rate": 8.159476662862094e-06, "loss": 0.8668, "step": 9823 }, { "epoch": 1.6724821570182395, "grad_norm": 1.6796875, "learning_rate": 8.157701616382487e-06, "loss": 0.8952, "step": 9824 }, { "epoch": 1.6726536211072294, "grad_norm": 1.71875, "learning_rate": 8.155926629987169e-06, "loss": 0.9246, "step": 9825 }, { "epoch": 1.6728250851962192, "grad_norm": 1.7265625, "learning_rate": 8.154151703734026e-06, "loss": 0.8608, "step": 9826 }, { "epoch": 1.6729965492852092, "grad_norm": 1.7890625, "learning_rate": 8.152376837680947e-06, "loss": 0.8636, "step": 9827 }, { "epoch": 1.673168013374199, "grad_norm": 1.75, "learning_rate": 8.150602031885818e-06, "loss": 0.9174, "step": 9828 }, { "epoch": 1.673339477463189, "grad_norm": 1.65625, "learning_rate": 8.14882728640652e-06, "loss": 0.8828, "step": 9829 }, { "epoch": 1.6735109415521787, "grad_norm": 1.65625, "learning_rate": 8.147052601300936e-06, "loss": 0.8424, "step": 9830 }, { "epoch": 1.6736824056411685, "grad_norm": 1.7421875, "learning_rate": 8.145277976626945e-06, "loss": 0.8268, "step": 9831 }, { "epoch": 1.6738538697301584, "grad_norm": 1.671875, "learning_rate": 8.143503412442423e-06, "loss": 0.9055, "step": 9832 }, { "epoch": 1.6740253338191482, "grad_norm": 1.7109375, "learning_rate": 8.141728908805244e-06, "loss": 0.854, "step": 9833 }, { "epoch": 1.674196797908138, "grad_norm": 1.71875, "learning_rate": 8.139954465773283e-06, "loss": 0.8855, "step": 9834 }, { "epoch": 1.6743682619971278, "grad_norm": 1.734375, "learning_rate": 8.138180083404412e-06, "loss": 0.8924, "step": 9835 }, { "epoch": 1.6745397260861177, "grad_norm": 1.6640625, "learning_rate": 8.1364057617565e-06, "loss": 0.8561, "step": 9836 }, { "epoch": 1.6747111901751077, "grad_norm": 1.7265625, "learning_rate": 8.134631500887412e-06, "loss": 0.9067, "step": 9837 }, { "epoch": 1.6748826542640975, "grad_norm": 1.8359375, "learning_rate": 8.132857300855016e-06, "loss": 0.8568, "step": 9838 }, { "epoch": 1.6750541183530874, "grad_norm": 1.7578125, "learning_rate": 8.131083161717175e-06, "loss": 0.8184, "step": 9839 }, { "epoch": 1.6752255824420774, "grad_norm": 1.6328125, "learning_rate": 8.129309083531746e-06, "loss": 0.8091, "step": 9840 }, { "epoch": 1.6753970465310672, "grad_norm": 1.7109375, "learning_rate": 8.127535066356595e-06, "loss": 0.8772, "step": 9841 }, { "epoch": 1.675568510620057, "grad_norm": 1.5859375, "learning_rate": 8.12576111024958e-06, "loss": 0.7298, "step": 9842 }, { "epoch": 1.675739974709047, "grad_norm": 1.5625, "learning_rate": 8.123987215268551e-06, "loss": 0.806, "step": 9843 }, { "epoch": 1.6759114387980367, "grad_norm": 1.640625, "learning_rate": 8.122213381471363e-06, "loss": 0.8501, "step": 9844 }, { "epoch": 1.6760829028870265, "grad_norm": 1.7265625, "learning_rate": 8.120439608915866e-06, "loss": 0.8018, "step": 9845 }, { "epoch": 1.6762543669760164, "grad_norm": 1.78125, "learning_rate": 8.118665897659912e-06, "loss": 0.8389, "step": 9846 }, { "epoch": 1.6764258310650062, "grad_norm": 1.7578125, "learning_rate": 8.116892247761348e-06, "loss": 0.8466, "step": 9847 }, { "epoch": 1.676597295153996, "grad_norm": 1.703125, "learning_rate": 8.115118659278019e-06, "loss": 0.8985, "step": 9848 }, { "epoch": 1.676768759242986, "grad_norm": 1.7578125, "learning_rate": 8.113345132267765e-06, "loss": 0.8728, "step": 9849 }, { "epoch": 1.676940223331976, "grad_norm": 1.7421875, "learning_rate": 8.111571666788433e-06, "loss": 0.7813, "step": 9850 }, { "epoch": 1.6771116874209657, "grad_norm": 1.671875, "learning_rate": 8.10979826289786e-06, "loss": 0.779, "step": 9851 }, { "epoch": 1.6772831515099558, "grad_norm": 1.765625, "learning_rate": 8.108024920653885e-06, "loss": 0.8168, "step": 9852 }, { "epoch": 1.6774546155989456, "grad_norm": 1.6484375, "learning_rate": 8.106251640114342e-06, "loss": 0.8528, "step": 9853 }, { "epoch": 1.6776260796879354, "grad_norm": 1.71875, "learning_rate": 8.104478421337065e-06, "loss": 0.8275, "step": 9854 }, { "epoch": 1.6777975437769252, "grad_norm": 1.578125, "learning_rate": 8.102705264379884e-06, "loss": 0.7311, "step": 9855 }, { "epoch": 1.677969007865915, "grad_norm": 1.78125, "learning_rate": 8.100932169300627e-06, "loss": 0.9246, "step": 9856 }, { "epoch": 1.678140471954905, "grad_norm": 1.640625, "learning_rate": 8.099159136157122e-06, "loss": 0.8039, "step": 9857 }, { "epoch": 1.6783119360438947, "grad_norm": 1.6328125, "learning_rate": 8.097386165007197e-06, "loss": 0.8484, "step": 9858 }, { "epoch": 1.6784834001328846, "grad_norm": 1.7578125, "learning_rate": 8.095613255908674e-06, "loss": 0.878, "step": 9859 }, { "epoch": 1.6786548642218744, "grad_norm": 1.6953125, "learning_rate": 8.093840408919373e-06, "loss": 0.9227, "step": 9860 }, { "epoch": 1.6788263283108644, "grad_norm": 1.828125, "learning_rate": 8.092067624097116e-06, "loss": 0.9157, "step": 9861 }, { "epoch": 1.6789977923998542, "grad_norm": 1.6484375, "learning_rate": 8.090294901499718e-06, "loss": 0.8615, "step": 9862 }, { "epoch": 1.679169256488844, "grad_norm": 1.7109375, "learning_rate": 8.088522241184992e-06, "loss": 0.9062, "step": 9863 }, { "epoch": 1.6793407205778341, "grad_norm": 1.75, "learning_rate": 8.086749643210758e-06, "loss": 0.8219, "step": 9864 }, { "epoch": 1.679512184666824, "grad_norm": 1.7109375, "learning_rate": 8.08497710763482e-06, "loss": 0.8179, "step": 9865 }, { "epoch": 1.6796836487558138, "grad_norm": 1.671875, "learning_rate": 8.083204634514991e-06, "loss": 0.7425, "step": 9866 }, { "epoch": 1.6798551128448036, "grad_norm": 1.71875, "learning_rate": 8.081432223909076e-06, "loss": 0.8082, "step": 9867 }, { "epoch": 1.6800265769337934, "grad_norm": 1.6640625, "learning_rate": 8.079659875874883e-06, "loss": 0.8319, "step": 9868 }, { "epoch": 1.6801980410227833, "grad_norm": 1.8203125, "learning_rate": 8.07788759047021e-06, "loss": 0.8149, "step": 9869 }, { "epoch": 1.680369505111773, "grad_norm": 1.7890625, "learning_rate": 8.07611536775286e-06, "loss": 0.8184, "step": 9870 }, { "epoch": 1.680540969200763, "grad_norm": 1.7578125, "learning_rate": 8.074343207780635e-06, "loss": 0.8821, "step": 9871 }, { "epoch": 1.6807124332897527, "grad_norm": 1.640625, "learning_rate": 8.072571110611329e-06, "loss": 0.8301, "step": 9872 }, { "epoch": 1.6808838973787428, "grad_norm": 1.7421875, "learning_rate": 8.070799076302735e-06, "loss": 0.9121, "step": 9873 }, { "epoch": 1.6810553614677326, "grad_norm": 1.65625, "learning_rate": 8.06902710491265e-06, "loss": 0.8864, "step": 9874 }, { "epoch": 1.6812268255567224, "grad_norm": 1.671875, "learning_rate": 8.067255196498862e-06, "loss": 0.8234, "step": 9875 }, { "epoch": 1.6813982896457125, "grad_norm": 1.796875, "learning_rate": 8.06548335111916e-06, "loss": 0.9018, "step": 9876 }, { "epoch": 1.6815697537347023, "grad_norm": 1.7421875, "learning_rate": 8.063711568831332e-06, "loss": 0.9811, "step": 9877 }, { "epoch": 1.6817412178236921, "grad_norm": 1.640625, "learning_rate": 8.06193984969316e-06, "loss": 0.8669, "step": 9878 }, { "epoch": 1.681912681912682, "grad_norm": 1.7265625, "learning_rate": 8.060168193762428e-06, "loss": 0.89, "step": 9879 }, { "epoch": 1.6820841460016718, "grad_norm": 1.6875, "learning_rate": 8.058396601096914e-06, "loss": 0.8921, "step": 9880 }, { "epoch": 1.6822556100906616, "grad_norm": 1.6328125, "learning_rate": 8.0566250717544e-06, "loss": 0.8404, "step": 9881 }, { "epoch": 1.6824270741796514, "grad_norm": 1.7578125, "learning_rate": 8.05485360579266e-06, "loss": 0.9198, "step": 9882 }, { "epoch": 1.6825985382686413, "grad_norm": 1.6015625, "learning_rate": 8.053082203269467e-06, "loss": 0.8214, "step": 9883 }, { "epoch": 1.682770002357631, "grad_norm": 1.671875, "learning_rate": 8.051310864242598e-06, "loss": 0.853, "step": 9884 }, { "epoch": 1.6829414664466211, "grad_norm": 1.6875, "learning_rate": 8.049539588769816e-06, "loss": 0.8835, "step": 9885 }, { "epoch": 1.683112930535611, "grad_norm": 1.7265625, "learning_rate": 8.047768376908896e-06, "loss": 0.7677, "step": 9886 }, { "epoch": 1.6832843946246008, "grad_norm": 1.6640625, "learning_rate": 8.045997228717597e-06, "loss": 0.8603, "step": 9887 }, { "epoch": 1.6834558587135908, "grad_norm": 1.6953125, "learning_rate": 8.04422614425369e-06, "loss": 0.9081, "step": 9888 }, { "epoch": 1.6836273228025807, "grad_norm": 1.7578125, "learning_rate": 8.042455123574936e-06, "loss": 0.8705, "step": 9889 }, { "epoch": 1.6837987868915705, "grad_norm": 1.640625, "learning_rate": 8.040684166739088e-06, "loss": 0.8418, "step": 9890 }, { "epoch": 1.6839702509805603, "grad_norm": 1.7265625, "learning_rate": 8.038913273803906e-06, "loss": 0.8933, "step": 9891 }, { "epoch": 1.6841417150695501, "grad_norm": 1.6640625, "learning_rate": 8.03714244482715e-06, "loss": 0.9062, "step": 9892 }, { "epoch": 1.68431317915854, "grad_norm": 1.5546875, "learning_rate": 8.035371679866569e-06, "loss": 0.7517, "step": 9893 }, { "epoch": 1.6844846432475298, "grad_norm": 1.671875, "learning_rate": 8.033600978979913e-06, "loss": 0.826, "step": 9894 }, { "epoch": 1.6846561073365196, "grad_norm": 1.7265625, "learning_rate": 8.031830342224935e-06, "loss": 0.8717, "step": 9895 }, { "epoch": 1.6848275714255094, "grad_norm": 1.6875, "learning_rate": 8.030059769659382e-06, "loss": 0.8776, "step": 9896 }, { "epoch": 1.6849990355144995, "grad_norm": 1.6484375, "learning_rate": 8.028289261340998e-06, "loss": 0.8231, "step": 9897 }, { "epoch": 1.6851704996034893, "grad_norm": 1.640625, "learning_rate": 8.026518817327527e-06, "loss": 0.8734, "step": 9898 }, { "epoch": 1.6853419636924791, "grad_norm": 1.6875, "learning_rate": 8.024748437676707e-06, "loss": 0.8245, "step": 9899 }, { "epoch": 1.6855134277814692, "grad_norm": 1.71875, "learning_rate": 8.022978122446284e-06, "loss": 0.8654, "step": 9900 }, { "epoch": 1.685684891870459, "grad_norm": 1.703125, "learning_rate": 8.021207871693984e-06, "loss": 0.8636, "step": 9901 }, { "epoch": 1.6858563559594488, "grad_norm": 1.6015625, "learning_rate": 8.019437685477548e-06, "loss": 0.9033, "step": 9902 }, { "epoch": 1.6860278200484387, "grad_norm": 1.75, "learning_rate": 8.017667563854706e-06, "loss": 0.9517, "step": 9903 }, { "epoch": 1.6861992841374285, "grad_norm": 1.6484375, "learning_rate": 8.015897506883188e-06, "loss": 0.8501, "step": 9904 }, { "epoch": 1.6863707482264183, "grad_norm": 1.625, "learning_rate": 8.014127514620726e-06, "loss": 0.828, "step": 9905 }, { "epoch": 1.6865422123154081, "grad_norm": 1.765625, "learning_rate": 8.012357587125043e-06, "loss": 0.8238, "step": 9906 }, { "epoch": 1.686713676404398, "grad_norm": 1.734375, "learning_rate": 8.010587724453865e-06, "loss": 0.8278, "step": 9907 }, { "epoch": 1.6868851404933878, "grad_norm": 1.78125, "learning_rate": 8.008817926664912e-06, "loss": 0.8806, "step": 9908 }, { "epoch": 1.6870566045823778, "grad_norm": 1.6796875, "learning_rate": 8.007048193815905e-06, "loss": 0.8198, "step": 9909 }, { "epoch": 1.6872280686713677, "grad_norm": 1.578125, "learning_rate": 8.005278525964562e-06, "loss": 0.7713, "step": 9910 }, { "epoch": 1.6873995327603575, "grad_norm": 1.7109375, "learning_rate": 8.003508923168596e-06, "loss": 0.8293, "step": 9911 }, { "epoch": 1.6875709968493475, "grad_norm": 1.7109375, "learning_rate": 8.001739385485724e-06, "loss": 0.8553, "step": 9912 }, { "epoch": 1.6877424609383374, "grad_norm": 1.7109375, "learning_rate": 7.999969912973656e-06, "loss": 0.7869, "step": 9913 }, { "epoch": 1.6879139250273272, "grad_norm": 1.65625, "learning_rate": 7.998200505690097e-06, "loss": 0.7464, "step": 9914 }, { "epoch": 1.688085389116317, "grad_norm": 1.625, "learning_rate": 7.99643116369276e-06, "loss": 0.8032, "step": 9915 }, { "epoch": 1.6882568532053068, "grad_norm": 1.6796875, "learning_rate": 7.994661887039347e-06, "loss": 0.8418, "step": 9916 }, { "epoch": 1.6884283172942967, "grad_norm": 1.6875, "learning_rate": 7.99289267578756e-06, "loss": 0.7836, "step": 9917 }, { "epoch": 1.6885997813832865, "grad_norm": 1.7734375, "learning_rate": 7.991123529995102e-06, "loss": 0.872, "step": 9918 }, { "epoch": 1.6887712454722763, "grad_norm": 1.7265625, "learning_rate": 7.989354449719671e-06, "loss": 0.8624, "step": 9919 }, { "epoch": 1.6889427095612661, "grad_norm": 1.7734375, "learning_rate": 7.987585435018963e-06, "loss": 0.8893, "step": 9920 }, { "epoch": 1.6891141736502562, "grad_norm": 1.6953125, "learning_rate": 7.98581648595067e-06, "loss": 0.8371, "step": 9921 }, { "epoch": 1.689285637739246, "grad_norm": 1.7578125, "learning_rate": 7.984047602572486e-06, "loss": 0.9117, "step": 9922 }, { "epoch": 1.6894571018282358, "grad_norm": 1.734375, "learning_rate": 7.982278784942106e-06, "loss": 0.8182, "step": 9923 }, { "epoch": 1.6896285659172259, "grad_norm": 1.78125, "learning_rate": 7.980510033117208e-06, "loss": 0.857, "step": 9924 }, { "epoch": 1.6898000300062157, "grad_norm": 1.578125, "learning_rate": 7.978741347155484e-06, "loss": 0.8085, "step": 9925 }, { "epoch": 1.6899714940952055, "grad_norm": 1.6484375, "learning_rate": 7.976972727114615e-06, "loss": 0.8537, "step": 9926 }, { "epoch": 1.6901429581841954, "grad_norm": 1.78125, "learning_rate": 7.975204173052284e-06, "loss": 0.8811, "step": 9927 }, { "epoch": 1.6903144222731852, "grad_norm": 1.7109375, "learning_rate": 7.973435685026171e-06, "loss": 0.8205, "step": 9928 }, { "epoch": 1.690485886362175, "grad_norm": 1.671875, "learning_rate": 7.97166726309395e-06, "loss": 0.8305, "step": 9929 }, { "epoch": 1.6906573504511648, "grad_norm": 1.6953125, "learning_rate": 7.969898907313298e-06, "loss": 0.8199, "step": 9930 }, { "epoch": 1.6908288145401547, "grad_norm": 1.6953125, "learning_rate": 7.968130617741887e-06, "loss": 0.7937, "step": 9931 }, { "epoch": 1.6910002786291445, "grad_norm": 1.71875, "learning_rate": 7.966362394437389e-06, "loss": 0.7968, "step": 9932 }, { "epoch": 1.6911717427181343, "grad_norm": 1.6875, "learning_rate": 7.964594237457469e-06, "loss": 0.7697, "step": 9933 }, { "epoch": 1.6913432068071244, "grad_norm": 1.6328125, "learning_rate": 7.962826146859794e-06, "loss": 0.8717, "step": 9934 }, { "epoch": 1.6915146708961142, "grad_norm": 1.8671875, "learning_rate": 7.961058122702037e-06, "loss": 0.8174, "step": 9935 }, { "epoch": 1.691686134985104, "grad_norm": 1.7890625, "learning_rate": 7.959290165041848e-06, "loss": 0.8491, "step": 9936 }, { "epoch": 1.691857599074094, "grad_norm": 1.65625, "learning_rate": 7.957522273936892e-06, "loss": 0.8569, "step": 9937 }, { "epoch": 1.6920290631630839, "grad_norm": 1.65625, "learning_rate": 7.955754449444827e-06, "loss": 0.8288, "step": 9938 }, { "epoch": 1.6922005272520737, "grad_norm": 1.6640625, "learning_rate": 7.953986691623305e-06, "loss": 0.7834, "step": 9939 }, { "epoch": 1.6923719913410635, "grad_norm": 1.734375, "learning_rate": 7.952219000529982e-06, "loss": 0.8607, "step": 9940 }, { "epoch": 1.6925434554300534, "grad_norm": 1.703125, "learning_rate": 7.950451376222508e-06, "loss": 0.8376, "step": 9941 }, { "epoch": 1.6927149195190432, "grad_norm": 1.7109375, "learning_rate": 7.948683818758531e-06, "loss": 0.7824, "step": 9942 }, { "epoch": 1.692886383608033, "grad_norm": 1.6875, "learning_rate": 7.946916328195701e-06, "loss": 0.8443, "step": 9943 }, { "epoch": 1.6930578476970228, "grad_norm": 1.6328125, "learning_rate": 7.945148904591663e-06, "loss": 0.8699, "step": 9944 }, { "epoch": 1.6932293117860127, "grad_norm": 1.703125, "learning_rate": 7.943381548004054e-06, "loss": 0.8301, "step": 9945 }, { "epoch": 1.6934007758750027, "grad_norm": 1.796875, "learning_rate": 7.941614258490524e-06, "loss": 0.9083, "step": 9946 }, { "epoch": 1.6935722399639925, "grad_norm": 1.65625, "learning_rate": 7.939847036108698e-06, "loss": 0.7606, "step": 9947 }, { "epoch": 1.6937437040529824, "grad_norm": 1.6796875, "learning_rate": 7.938079880916219e-06, "loss": 0.8062, "step": 9948 }, { "epoch": 1.6939151681419724, "grad_norm": 1.640625, "learning_rate": 7.936312792970719e-06, "loss": 0.8749, "step": 9949 }, { "epoch": 1.6940866322309622, "grad_norm": 1.625, "learning_rate": 7.934545772329828e-06, "loss": 0.8071, "step": 9950 }, { "epoch": 1.694258096319952, "grad_norm": 1.703125, "learning_rate": 7.93277881905118e-06, "loss": 0.8501, "step": 9951 }, { "epoch": 1.6944295604089419, "grad_norm": 1.8671875, "learning_rate": 7.931011933192398e-06, "loss": 0.8819, "step": 9952 }, { "epoch": 1.6946010244979317, "grad_norm": 1.7109375, "learning_rate": 7.929245114811108e-06, "loss": 0.7806, "step": 9953 }, { "epoch": 1.6947724885869215, "grad_norm": 1.609375, "learning_rate": 7.927478363964933e-06, "loss": 0.8344, "step": 9954 }, { "epoch": 1.6949439526759114, "grad_norm": 1.8203125, "learning_rate": 7.925711680711493e-06, "loss": 0.9316, "step": 9955 }, { "epoch": 1.6951154167649012, "grad_norm": 1.671875, "learning_rate": 7.923945065108406e-06, "loss": 0.9082, "step": 9956 }, { "epoch": 1.695286880853891, "grad_norm": 1.7265625, "learning_rate": 7.922178517213288e-06, "loss": 0.8015, "step": 9957 }, { "epoch": 1.695458344942881, "grad_norm": 1.734375, "learning_rate": 7.920412037083757e-06, "loss": 0.9053, "step": 9958 }, { "epoch": 1.6956298090318709, "grad_norm": 1.6875, "learning_rate": 7.918645624777415e-06, "loss": 0.9377, "step": 9959 }, { "epoch": 1.6958012731208607, "grad_norm": 1.7734375, "learning_rate": 7.916879280351878e-06, "loss": 0.8941, "step": 9960 }, { "epoch": 1.6959727372098508, "grad_norm": 1.625, "learning_rate": 7.915113003864753e-06, "loss": 0.8981, "step": 9961 }, { "epoch": 1.6961442012988406, "grad_norm": 1.734375, "learning_rate": 7.913346795373643e-06, "loss": 0.8599, "step": 9962 }, { "epoch": 1.6963156653878304, "grad_norm": 1.6328125, "learning_rate": 7.911580654936152e-06, "loss": 0.7965, "step": 9963 }, { "epoch": 1.6964871294768202, "grad_norm": 1.6640625, "learning_rate": 7.90981458260988e-06, "loss": 0.8015, "step": 9964 }, { "epoch": 1.69665859356581, "grad_norm": 1.734375, "learning_rate": 7.908048578452426e-06, "loss": 0.9537, "step": 9965 }, { "epoch": 1.6968300576547999, "grad_norm": 1.6953125, "learning_rate": 7.906282642521384e-06, "loss": 0.9249, "step": 9966 }, { "epoch": 1.6970015217437897, "grad_norm": 1.6484375, "learning_rate": 7.90451677487435e-06, "loss": 0.8379, "step": 9967 }, { "epoch": 1.6971729858327795, "grad_norm": 1.8203125, "learning_rate": 7.902750975568914e-06, "loss": 0.9602, "step": 9968 }, { "epoch": 1.6973444499217694, "grad_norm": 1.6875, "learning_rate": 7.90098524466267e-06, "loss": 0.8189, "step": 9969 }, { "epoch": 1.6975159140107594, "grad_norm": 1.7421875, "learning_rate": 7.899219582213198e-06, "loss": 0.8329, "step": 9970 }, { "epoch": 1.6976873780997492, "grad_norm": 1.75, "learning_rate": 7.897453988278087e-06, "loss": 0.8815, "step": 9971 }, { "epoch": 1.697858842188739, "grad_norm": 1.625, "learning_rate": 7.895688462914919e-06, "loss": 0.7879, "step": 9972 }, { "epoch": 1.698030306277729, "grad_norm": 1.6328125, "learning_rate": 7.893923006181274e-06, "loss": 0.8663, "step": 9973 }, { "epoch": 1.698201770366719, "grad_norm": 1.6484375, "learning_rate": 7.892157618134729e-06, "loss": 0.8129, "step": 9974 }, { "epoch": 1.6983732344557088, "grad_norm": 1.6015625, "learning_rate": 7.890392298832863e-06, "loss": 0.8256, "step": 9975 }, { "epoch": 1.6985446985446986, "grad_norm": 1.6796875, "learning_rate": 7.888627048333248e-06, "loss": 0.7792, "step": 9976 }, { "epoch": 1.6987161626336884, "grad_norm": 1.7421875, "learning_rate": 7.886861866693457e-06, "loss": 0.8493, "step": 9977 }, { "epoch": 1.6988876267226782, "grad_norm": 1.6796875, "learning_rate": 7.885096753971056e-06, "loss": 0.87, "step": 9978 }, { "epoch": 1.699059090811668, "grad_norm": 1.75, "learning_rate": 7.883331710223614e-06, "loss": 0.9304, "step": 9979 }, { "epoch": 1.6992305549006579, "grad_norm": 1.9453125, "learning_rate": 7.881566735508696e-06, "loss": 0.8853, "step": 9980 }, { "epoch": 1.6994020189896477, "grad_norm": 1.7578125, "learning_rate": 7.879801829883867e-06, "loss": 0.8327, "step": 9981 }, { "epoch": 1.6995734830786378, "grad_norm": 1.640625, "learning_rate": 7.87803699340668e-06, "loss": 0.7743, "step": 9982 }, { "epoch": 1.6997449471676276, "grad_norm": 1.6875, "learning_rate": 7.876272226134698e-06, "loss": 0.8026, "step": 9983 }, { "epoch": 1.6999164112566174, "grad_norm": 1.6953125, "learning_rate": 7.874507528125476e-06, "loss": 0.8312, "step": 9984 }, { "epoch": 1.7000878753456075, "grad_norm": 1.65625, "learning_rate": 7.872742899436568e-06, "loss": 0.809, "step": 9985 }, { "epoch": 1.7002593394345973, "grad_norm": 1.7421875, "learning_rate": 7.870978340125524e-06, "loss": 0.8874, "step": 9986 }, { "epoch": 1.700430803523587, "grad_norm": 1.6171875, "learning_rate": 7.869213850249895e-06, "loss": 0.861, "step": 9987 }, { "epoch": 1.700602267612577, "grad_norm": 1.75, "learning_rate": 7.867449429867224e-06, "loss": 0.9106, "step": 9988 }, { "epoch": 1.7007737317015668, "grad_norm": 1.6328125, "learning_rate": 7.865685079035058e-06, "loss": 0.8218, "step": 9989 }, { "epoch": 1.7009451957905566, "grad_norm": 1.65625, "learning_rate": 7.863920797810938e-06, "loss": 0.8162, "step": 9990 }, { "epoch": 1.7011166598795464, "grad_norm": 1.6484375, "learning_rate": 7.862156586252405e-06, "loss": 0.8495, "step": 9991 }, { "epoch": 1.7012881239685362, "grad_norm": 1.6796875, "learning_rate": 7.860392444417001e-06, "loss": 0.7733, "step": 9992 }, { "epoch": 1.701459588057526, "grad_norm": 1.6484375, "learning_rate": 7.858628372362253e-06, "loss": 0.8968, "step": 9993 }, { "epoch": 1.701631052146516, "grad_norm": 1.71875, "learning_rate": 7.856864370145696e-06, "loss": 0.7907, "step": 9994 }, { "epoch": 1.701802516235506, "grad_norm": 1.75, "learning_rate": 7.855100437824863e-06, "loss": 0.8403, "step": 9995 }, { "epoch": 1.7019739803244958, "grad_norm": 1.6953125, "learning_rate": 7.853336575457281e-06, "loss": 0.8308, "step": 9996 }, { "epoch": 1.7021454444134858, "grad_norm": 1.78125, "learning_rate": 7.851572783100475e-06, "loss": 0.943, "step": 9997 }, { "epoch": 1.7023169085024756, "grad_norm": 1.75, "learning_rate": 7.849809060811973e-06, "loss": 0.8719, "step": 9998 }, { "epoch": 1.7024883725914655, "grad_norm": 1.71875, "learning_rate": 7.848045408649295e-06, "loss": 0.8588, "step": 9999 }, { "epoch": 1.7026598366804553, "grad_norm": 1.6328125, "learning_rate": 7.84628182666996e-06, "loss": 0.82, "step": 10000 }, { "epoch": 1.702831300769445, "grad_norm": 1.6875, "learning_rate": 7.844518314931483e-06, "loss": 0.901, "step": 10001 }, { "epoch": 1.703002764858435, "grad_norm": 1.8125, "learning_rate": 7.842754873491381e-06, "loss": 0.7771, "step": 10002 }, { "epoch": 1.7031742289474248, "grad_norm": 1.640625, "learning_rate": 7.840991502407168e-06, "loss": 0.8143, "step": 10003 }, { "epoch": 1.7033456930364146, "grad_norm": 1.765625, "learning_rate": 7.839228201736354e-06, "loss": 0.8319, "step": 10004 }, { "epoch": 1.7035171571254044, "grad_norm": 1.7421875, "learning_rate": 7.837464971536442e-06, "loss": 0.8533, "step": 10005 }, { "epoch": 1.7036886212143945, "grad_norm": 1.6640625, "learning_rate": 7.835701811864937e-06, "loss": 0.8322, "step": 10006 }, { "epoch": 1.7038600853033843, "grad_norm": 1.6875, "learning_rate": 7.83393872277935e-06, "loss": 0.9125, "step": 10007 }, { "epoch": 1.7040315493923741, "grad_norm": 1.6875, "learning_rate": 7.832175704337177e-06, "loss": 0.8254, "step": 10008 }, { "epoch": 1.7042030134813642, "grad_norm": 1.6484375, "learning_rate": 7.830412756595917e-06, "loss": 0.8034, "step": 10009 }, { "epoch": 1.704374477570354, "grad_norm": 1.59375, "learning_rate": 7.828649879613069e-06, "loss": 0.7845, "step": 10010 }, { "epoch": 1.7045459416593438, "grad_norm": 1.703125, "learning_rate": 7.826887073446122e-06, "loss": 0.8672, "step": 10011 }, { "epoch": 1.7047174057483336, "grad_norm": 1.7265625, "learning_rate": 7.825124338152574e-06, "loss": 0.8432, "step": 10012 }, { "epoch": 1.7048888698373235, "grad_norm": 1.609375, "learning_rate": 7.823361673789909e-06, "loss": 0.7993, "step": 10013 }, { "epoch": 1.7050603339263133, "grad_norm": 1.65625, "learning_rate": 7.821599080415618e-06, "loss": 0.8738, "step": 10014 }, { "epoch": 1.7052317980153031, "grad_norm": 1.71875, "learning_rate": 7.81983655808718e-06, "loss": 0.8296, "step": 10015 }, { "epoch": 1.705403262104293, "grad_norm": 1.640625, "learning_rate": 7.818074106862088e-06, "loss": 0.8741, "step": 10016 }, { "epoch": 1.7055747261932828, "grad_norm": 1.734375, "learning_rate": 7.816311726797813e-06, "loss": 0.8226, "step": 10017 }, { "epoch": 1.7057461902822728, "grad_norm": 1.734375, "learning_rate": 7.814549417951835e-06, "loss": 0.8786, "step": 10018 }, { "epoch": 1.7059176543712626, "grad_norm": 1.640625, "learning_rate": 7.81278718038163e-06, "loss": 0.8289, "step": 10019 }, { "epoch": 1.7060891184602525, "grad_norm": 1.609375, "learning_rate": 7.811025014144671e-06, "loss": 0.7693, "step": 10020 }, { "epoch": 1.7062605825492423, "grad_norm": 1.765625, "learning_rate": 7.809262919298428e-06, "loss": 0.8542, "step": 10021 }, { "epoch": 1.7064320466382323, "grad_norm": 1.7890625, "learning_rate": 7.807500895900373e-06, "loss": 0.9365, "step": 10022 }, { "epoch": 1.7066035107272222, "grad_norm": 1.671875, "learning_rate": 7.805738944007968e-06, "loss": 0.9101, "step": 10023 }, { "epoch": 1.706774974816212, "grad_norm": 1.6796875, "learning_rate": 7.803977063678682e-06, "loss": 0.8732, "step": 10024 }, { "epoch": 1.7069464389052018, "grad_norm": 1.7578125, "learning_rate": 7.80221525496997e-06, "loss": 0.8287, "step": 10025 }, { "epoch": 1.7071179029941916, "grad_norm": 1.7578125, "learning_rate": 7.800453517939298e-06, "loss": 0.8571, "step": 10026 }, { "epoch": 1.7072893670831815, "grad_norm": 1.7109375, "learning_rate": 7.798691852644118e-06, "loss": 0.8613, "step": 10027 }, { "epoch": 1.7074608311721713, "grad_norm": 1.6953125, "learning_rate": 7.796930259141885e-06, "loss": 0.8497, "step": 10028 }, { "epoch": 1.7076322952611611, "grad_norm": 1.625, "learning_rate": 7.795168737490054e-06, "loss": 0.7839, "step": 10029 }, { "epoch": 1.707803759350151, "grad_norm": 1.6484375, "learning_rate": 7.793407287746072e-06, "loss": 0.9061, "step": 10030 }, { "epoch": 1.707975223439141, "grad_norm": 1.6796875, "learning_rate": 7.791645909967387e-06, "loss": 0.8451, "step": 10031 }, { "epoch": 1.7081466875281308, "grad_norm": 1.7109375, "learning_rate": 7.789884604211447e-06, "loss": 0.8251, "step": 10032 }, { "epoch": 1.7083181516171206, "grad_norm": 1.6953125, "learning_rate": 7.78812337053569e-06, "loss": 0.883, "step": 10033 }, { "epoch": 1.7084896157061107, "grad_norm": 1.6015625, "learning_rate": 7.78636220899756e-06, "loss": 0.7842, "step": 10034 }, { "epoch": 1.7086610797951005, "grad_norm": 1.703125, "learning_rate": 7.784601119654494e-06, "loss": 0.8951, "step": 10035 }, { "epoch": 1.7088325438840903, "grad_norm": 1.703125, "learning_rate": 7.782840102563925e-06, "loss": 0.9254, "step": 10036 }, { "epoch": 1.7090040079730802, "grad_norm": 1.6953125, "learning_rate": 7.781079157783292e-06, "loss": 0.8714, "step": 10037 }, { "epoch": 1.70917547206207, "grad_norm": 1.6640625, "learning_rate": 7.779318285370024e-06, "loss": 0.7217, "step": 10038 }, { "epoch": 1.7093469361510598, "grad_norm": 1.734375, "learning_rate": 7.777557485381553e-06, "loss": 0.8525, "step": 10039 }, { "epoch": 1.7095184002400496, "grad_norm": 1.71875, "learning_rate": 7.775796757875298e-06, "loss": 0.8348, "step": 10040 }, { "epoch": 1.7096898643290395, "grad_norm": 1.6875, "learning_rate": 7.774036102908685e-06, "loss": 0.8365, "step": 10041 }, { "epoch": 1.7098613284180293, "grad_norm": 1.6953125, "learning_rate": 7.772275520539136e-06, "loss": 0.8373, "step": 10042 }, { "epoch": 1.7100327925070193, "grad_norm": 1.640625, "learning_rate": 7.770515010824074e-06, "loss": 0.8351, "step": 10043 }, { "epoch": 1.7102042565960092, "grad_norm": 1.6640625, "learning_rate": 7.768754573820908e-06, "loss": 0.8265, "step": 10044 }, { "epoch": 1.710375720684999, "grad_norm": 1.796875, "learning_rate": 7.766994209587062e-06, "loss": 0.9264, "step": 10045 }, { "epoch": 1.710547184773989, "grad_norm": 1.6875, "learning_rate": 7.765233918179942e-06, "loss": 0.8793, "step": 10046 }, { "epoch": 1.7107186488629789, "grad_norm": 1.7109375, "learning_rate": 7.763473699656959e-06, "loss": 0.9455, "step": 10047 }, { "epoch": 1.7108901129519687, "grad_norm": 1.75, "learning_rate": 7.761713554075521e-06, "loss": 0.8932, "step": 10048 }, { "epoch": 1.7110615770409585, "grad_norm": 1.7421875, "learning_rate": 7.759953481493033e-06, "loss": 0.8766, "step": 10049 }, { "epoch": 1.7112330411299483, "grad_norm": 1.6875, "learning_rate": 7.758193481966901e-06, "loss": 0.8285, "step": 10050 }, { "epoch": 1.7114045052189382, "grad_norm": 1.6953125, "learning_rate": 7.756433555554518e-06, "loss": 0.8557, "step": 10051 }, { "epoch": 1.711575969307928, "grad_norm": 1.625, "learning_rate": 7.754673702313284e-06, "loss": 0.8181, "step": 10052 }, { "epoch": 1.7117474333969178, "grad_norm": 1.703125, "learning_rate": 7.752913922300595e-06, "loss": 0.8187, "step": 10053 }, { "epoch": 1.7119188974859076, "grad_norm": 1.65625, "learning_rate": 7.751154215573845e-06, "loss": 0.8839, "step": 10054 }, { "epoch": 1.7120903615748977, "grad_norm": 1.671875, "learning_rate": 7.749394582190426e-06, "loss": 0.8244, "step": 10055 }, { "epoch": 1.7122618256638875, "grad_norm": 1.671875, "learning_rate": 7.747635022207724e-06, "loss": 0.8911, "step": 10056 }, { "epoch": 1.7124332897528773, "grad_norm": 1.7265625, "learning_rate": 7.745875535683126e-06, "loss": 0.8736, "step": 10057 }, { "epoch": 1.7126047538418674, "grad_norm": 1.75, "learning_rate": 7.744116122674015e-06, "loss": 0.8851, "step": 10058 }, { "epoch": 1.7127762179308572, "grad_norm": 1.6953125, "learning_rate": 7.742356783237772e-06, "loss": 0.8383, "step": 10059 }, { "epoch": 1.712947682019847, "grad_norm": 1.6640625, "learning_rate": 7.740597517431775e-06, "loss": 0.8672, "step": 10060 }, { "epoch": 1.7131191461088369, "grad_norm": 1.734375, "learning_rate": 7.738838325313402e-06, "loss": 0.8921, "step": 10061 }, { "epoch": 1.7132906101978267, "grad_norm": 1.7421875, "learning_rate": 7.737079206940027e-06, "loss": 0.8335, "step": 10062 }, { "epoch": 1.7134620742868165, "grad_norm": 1.671875, "learning_rate": 7.73532016236902e-06, "loss": 0.8776, "step": 10063 }, { "epoch": 1.7136335383758063, "grad_norm": 1.703125, "learning_rate": 7.733561191657748e-06, "loss": 0.8359, "step": 10064 }, { "epoch": 1.7138050024647962, "grad_norm": 1.78125, "learning_rate": 7.731802294863583e-06, "loss": 0.883, "step": 10065 }, { "epoch": 1.713976466553786, "grad_norm": 1.765625, "learning_rate": 7.730043472043884e-06, "loss": 0.8665, "step": 10066 }, { "epoch": 1.714147930642776, "grad_norm": 1.6953125, "learning_rate": 7.728284723256017e-06, "loss": 0.8383, "step": 10067 }, { "epoch": 1.7143193947317659, "grad_norm": 1.6953125, "learning_rate": 7.726526048557338e-06, "loss": 0.8075, "step": 10068 }, { "epoch": 1.7144908588207557, "grad_norm": 1.78125, "learning_rate": 7.724767448005208e-06, "loss": 0.8819, "step": 10069 }, { "epoch": 1.7146623229097457, "grad_norm": 1.7421875, "learning_rate": 7.723008921656977e-06, "loss": 0.7742, "step": 10070 }, { "epoch": 1.7148337869987356, "grad_norm": 1.640625, "learning_rate": 7.72125046957e-06, "loss": 0.8344, "step": 10071 }, { "epoch": 1.7150052510877254, "grad_norm": 1.6875, "learning_rate": 7.719492091801628e-06, "loss": 0.8342, "step": 10072 }, { "epoch": 1.7151767151767152, "grad_norm": 1.734375, "learning_rate": 7.717733788409207e-06, "loss": 0.8667, "step": 10073 }, { "epoch": 1.715348179265705, "grad_norm": 1.71875, "learning_rate": 7.71597555945008e-06, "loss": 0.8732, "step": 10074 }, { "epoch": 1.7155196433546949, "grad_norm": 1.671875, "learning_rate": 7.71421740498159e-06, "loss": 0.915, "step": 10075 }, { "epoch": 1.7156911074436847, "grad_norm": 1.75, "learning_rate": 7.712459325061078e-06, "loss": 0.8567, "step": 10076 }, { "epoch": 1.7158625715326745, "grad_norm": 1.8984375, "learning_rate": 7.710701319745881e-06, "loss": 0.8153, "step": 10077 }, { "epoch": 1.7160340356216643, "grad_norm": 1.6875, "learning_rate": 7.708943389093337e-06, "loss": 0.8616, "step": 10078 }, { "epoch": 1.7162054997106544, "grad_norm": 1.734375, "learning_rate": 7.707185533160774e-06, "loss": 0.8309, "step": 10079 }, { "epoch": 1.7163769637996442, "grad_norm": 1.671875, "learning_rate": 7.705427752005525e-06, "loss": 0.8271, "step": 10080 }, { "epoch": 1.716548427888634, "grad_norm": 1.703125, "learning_rate": 7.703670045684919e-06, "loss": 0.9102, "step": 10081 }, { "epoch": 1.716719891977624, "grad_norm": 1.734375, "learning_rate": 7.701912414256279e-06, "loss": 0.9635, "step": 10082 }, { "epoch": 1.716891356066614, "grad_norm": 1.6484375, "learning_rate": 7.700154857776925e-06, "loss": 0.8498, "step": 10083 }, { "epoch": 1.7170628201556037, "grad_norm": 1.671875, "learning_rate": 7.698397376304186e-06, "loss": 0.8869, "step": 10084 }, { "epoch": 1.7172342842445936, "grad_norm": 1.828125, "learning_rate": 7.69663996989538e-06, "loss": 0.8821, "step": 10085 }, { "epoch": 1.7174057483335834, "grad_norm": 1.765625, "learning_rate": 7.694882638607813e-06, "loss": 0.9002, "step": 10086 }, { "epoch": 1.7175772124225732, "grad_norm": 1.6953125, "learning_rate": 7.693125382498804e-06, "loss": 0.8881, "step": 10087 }, { "epoch": 1.717748676511563, "grad_norm": 1.8359375, "learning_rate": 7.691368201625662e-06, "loss": 0.8636, "step": 10088 }, { "epoch": 1.7179201406005529, "grad_norm": 1.703125, "learning_rate": 7.689611096045698e-06, "loss": 0.8352, "step": 10089 }, { "epoch": 1.7180916046895427, "grad_norm": 1.71875, "learning_rate": 7.687854065816216e-06, "loss": 0.9081, "step": 10090 }, { "epoch": 1.7182630687785327, "grad_norm": 1.65625, "learning_rate": 7.686097110994516e-06, "loss": 0.8152, "step": 10091 }, { "epoch": 1.7184345328675226, "grad_norm": 1.609375, "learning_rate": 7.684340231637905e-06, "loss": 0.8166, "step": 10092 }, { "epoch": 1.7186059969565124, "grad_norm": 1.59375, "learning_rate": 7.68258342780368e-06, "loss": 0.8214, "step": 10093 }, { "epoch": 1.7187774610455024, "grad_norm": 1.71875, "learning_rate": 7.680826699549136e-06, "loss": 0.9223, "step": 10094 }, { "epoch": 1.7189489251344923, "grad_norm": 1.6875, "learning_rate": 7.679070046931567e-06, "loss": 0.869, "step": 10095 }, { "epoch": 1.719120389223482, "grad_norm": 1.6328125, "learning_rate": 7.677313470008268e-06, "loss": 0.8313, "step": 10096 }, { "epoch": 1.719291853312472, "grad_norm": 1.6640625, "learning_rate": 7.675556968836517e-06, "loss": 0.8957, "step": 10097 }, { "epoch": 1.7194633174014617, "grad_norm": 1.7421875, "learning_rate": 7.673800543473608e-06, "loss": 0.7927, "step": 10098 }, { "epoch": 1.7196347814904516, "grad_norm": 1.625, "learning_rate": 7.672044193976822e-06, "loss": 0.8355, "step": 10099 }, { "epoch": 1.7198062455794414, "grad_norm": 1.703125, "learning_rate": 7.670287920403439e-06, "loss": 0.819, "step": 10100 }, { "epoch": 1.7199777096684312, "grad_norm": 1.8125, "learning_rate": 7.668531722810742e-06, "loss": 0.9703, "step": 10101 }, { "epoch": 1.720149173757421, "grad_norm": 1.875, "learning_rate": 7.666775601256006e-06, "loss": 0.882, "step": 10102 }, { "epoch": 1.720320637846411, "grad_norm": 1.671875, "learning_rate": 7.665019555796502e-06, "loss": 0.8179, "step": 10103 }, { "epoch": 1.720492101935401, "grad_norm": 1.78125, "learning_rate": 7.663263586489504e-06, "loss": 0.9177, "step": 10104 }, { "epoch": 1.7206635660243907, "grad_norm": 1.640625, "learning_rate": 7.66150769339228e-06, "loss": 0.7969, "step": 10105 }, { "epoch": 1.7208350301133808, "grad_norm": 1.71875, "learning_rate": 7.659751876562096e-06, "loss": 0.8631, "step": 10106 }, { "epoch": 1.7210064942023706, "grad_norm": 1.6171875, "learning_rate": 7.657996136056216e-06, "loss": 0.862, "step": 10107 }, { "epoch": 1.7211779582913604, "grad_norm": 1.8046875, "learning_rate": 7.656240471931904e-06, "loss": 0.9143, "step": 10108 }, { "epoch": 1.7213494223803503, "grad_norm": 1.640625, "learning_rate": 7.654484884246412e-06, "loss": 0.8633, "step": 10109 }, { "epoch": 1.72152088646934, "grad_norm": 1.6328125, "learning_rate": 7.652729373057001e-06, "loss": 0.8728, "step": 10110 }, { "epoch": 1.72169235055833, "grad_norm": 1.671875, "learning_rate": 7.650973938420924e-06, "loss": 0.7864, "step": 10111 }, { "epoch": 1.7218638146473197, "grad_norm": 1.7265625, "learning_rate": 7.649218580395433e-06, "loss": 0.8282, "step": 10112 }, { "epoch": 1.7220352787363096, "grad_norm": 1.7578125, "learning_rate": 7.647463299037777e-06, "loss": 0.8676, "step": 10113 }, { "epoch": 1.7222067428252994, "grad_norm": 1.6953125, "learning_rate": 7.6457080944052e-06, "loss": 0.8708, "step": 10114 }, { "epoch": 1.7223782069142892, "grad_norm": 1.703125, "learning_rate": 7.643952966554948e-06, "loss": 0.8541, "step": 10115 }, { "epoch": 1.7225496710032793, "grad_norm": 1.703125, "learning_rate": 7.642197915544263e-06, "loss": 0.7929, "step": 10116 }, { "epoch": 1.722721135092269, "grad_norm": 1.625, "learning_rate": 7.640442941430382e-06, "loss": 0.8458, "step": 10117 }, { "epoch": 1.722892599181259, "grad_norm": 1.71875, "learning_rate": 7.638688044270542e-06, "loss": 0.8399, "step": 10118 }, { "epoch": 1.723064063270249, "grad_norm": 1.6640625, "learning_rate": 7.636933224121977e-06, "loss": 0.8385, "step": 10119 }, { "epoch": 1.7232355273592388, "grad_norm": 1.671875, "learning_rate": 7.635178481041917e-06, "loss": 0.8743, "step": 10120 }, { "epoch": 1.7234069914482286, "grad_norm": 1.625, "learning_rate": 7.633423815087593e-06, "loss": 0.8821, "step": 10121 }, { "epoch": 1.7235784555372184, "grad_norm": 1.6640625, "learning_rate": 7.63166922631623e-06, "loss": 0.7966, "step": 10122 }, { "epoch": 1.7237499196262083, "grad_norm": 1.71875, "learning_rate": 7.62991471478505e-06, "loss": 0.8014, "step": 10123 }, { "epoch": 1.723921383715198, "grad_norm": 1.6171875, "learning_rate": 7.628160280551278e-06, "loss": 0.8135, "step": 10124 }, { "epoch": 1.724092847804188, "grad_norm": 1.65625, "learning_rate": 7.62640592367213e-06, "loss": 0.8701, "step": 10125 }, { "epoch": 1.7242643118931777, "grad_norm": 1.671875, "learning_rate": 7.624651644204823e-06, "loss": 0.8518, "step": 10126 }, { "epoch": 1.7244357759821676, "grad_norm": 1.6328125, "learning_rate": 7.62289744220657e-06, "loss": 0.8435, "step": 10127 }, { "epoch": 1.7246072400711576, "grad_norm": 1.6640625, "learning_rate": 7.621143317734584e-06, "loss": 0.8237, "step": 10128 }, { "epoch": 1.7247787041601474, "grad_norm": 1.734375, "learning_rate": 7.619389270846071e-06, "loss": 0.91, "step": 10129 }, { "epoch": 1.7249501682491373, "grad_norm": 1.6875, "learning_rate": 7.617635301598237e-06, "loss": 0.8793, "step": 10130 }, { "epoch": 1.7251216323381273, "grad_norm": 1.7734375, "learning_rate": 7.615881410048294e-06, "loss": 0.9091, "step": 10131 }, { "epoch": 1.7252930964271171, "grad_norm": 1.734375, "learning_rate": 7.614127596253431e-06, "loss": 0.8372, "step": 10132 }, { "epoch": 1.725464560516107, "grad_norm": 1.71875, "learning_rate": 7.612373860270852e-06, "loss": 0.8641, "step": 10133 }, { "epoch": 1.7256360246050968, "grad_norm": 1.65625, "learning_rate": 7.610620202157751e-06, "loss": 0.8591, "step": 10134 }, { "epoch": 1.7258074886940866, "grad_norm": 1.7109375, "learning_rate": 7.608866621971325e-06, "loss": 0.8311, "step": 10135 }, { "epoch": 1.7259789527830764, "grad_norm": 1.625, "learning_rate": 7.6071131197687606e-06, "loss": 0.8582, "step": 10136 }, { "epoch": 1.7261504168720663, "grad_norm": 1.5546875, "learning_rate": 7.605359695607248e-06, "loss": 0.8795, "step": 10137 }, { "epoch": 1.726321880961056, "grad_norm": 1.7421875, "learning_rate": 7.603606349543973e-06, "loss": 0.9352, "step": 10138 }, { "epoch": 1.726493345050046, "grad_norm": 1.7734375, "learning_rate": 7.601853081636119e-06, "loss": 0.9355, "step": 10139 }, { "epoch": 1.726664809139036, "grad_norm": 1.71875, "learning_rate": 7.600099891940869e-06, "loss": 0.8794, "step": 10140 }, { "epoch": 1.7268362732280258, "grad_norm": 1.625, "learning_rate": 7.598346780515396e-06, "loss": 0.8277, "step": 10141 }, { "epoch": 1.7270077373170156, "grad_norm": 1.71875, "learning_rate": 7.596593747416885e-06, "loss": 0.8309, "step": 10142 }, { "epoch": 1.7271792014060057, "grad_norm": 1.703125, "learning_rate": 7.5948407927024955e-06, "loss": 0.8031, "step": 10143 }, { "epoch": 1.7273506654949955, "grad_norm": 1.75, "learning_rate": 7.593087916429407e-06, "loss": 0.851, "step": 10144 }, { "epoch": 1.7275221295839853, "grad_norm": 1.6484375, "learning_rate": 7.591335118654784e-06, "loss": 0.8321, "step": 10145 }, { "epoch": 1.7276935936729751, "grad_norm": 1.765625, "learning_rate": 7.589582399435791e-06, "loss": 0.8972, "step": 10146 }, { "epoch": 1.727865057761965, "grad_norm": 1.7109375, "learning_rate": 7.587829758829594e-06, "loss": 0.805, "step": 10147 }, { "epoch": 1.7280365218509548, "grad_norm": 1.6328125, "learning_rate": 7.586077196893353e-06, "loss": 0.8274, "step": 10148 }, { "epoch": 1.7282079859399446, "grad_norm": 1.734375, "learning_rate": 7.5843247136842245e-06, "loss": 0.926, "step": 10149 }, { "epoch": 1.7283794500289344, "grad_norm": 1.6640625, "learning_rate": 7.582572309259364e-06, "loss": 0.8003, "step": 10150 }, { "epoch": 1.7285509141179243, "grad_norm": 1.6796875, "learning_rate": 7.5808199836759235e-06, "loss": 0.8239, "step": 10151 }, { "epoch": 1.7287223782069143, "grad_norm": 1.8203125, "learning_rate": 7.579067736991053e-06, "loss": 0.8298, "step": 10152 }, { "epoch": 1.7288938422959041, "grad_norm": 1.6796875, "learning_rate": 7.5773155692618995e-06, "loss": 0.8657, "step": 10153 }, { "epoch": 1.729065306384894, "grad_norm": 1.6015625, "learning_rate": 7.575563480545613e-06, "loss": 0.8066, "step": 10154 }, { "epoch": 1.729236770473884, "grad_norm": 1.7109375, "learning_rate": 7.573811470899325e-06, "loss": 0.8659, "step": 10155 }, { "epoch": 1.7294082345628738, "grad_norm": 1.703125, "learning_rate": 7.572059540380182e-06, "loss": 0.9177, "step": 10156 }, { "epoch": 1.7295796986518637, "grad_norm": 1.6953125, "learning_rate": 7.570307689045322e-06, "loss": 0.9513, "step": 10157 }, { "epoch": 1.7297511627408535, "grad_norm": 1.6015625, "learning_rate": 7.568555916951877e-06, "loss": 0.787, "step": 10158 }, { "epoch": 1.7299226268298433, "grad_norm": 1.703125, "learning_rate": 7.566804224156978e-06, "loss": 0.8231, "step": 10159 }, { "epoch": 1.7300940909188331, "grad_norm": 1.6484375, "learning_rate": 7.565052610717757e-06, "loss": 0.8836, "step": 10160 }, { "epoch": 1.730265555007823, "grad_norm": 1.6171875, "learning_rate": 7.563301076691339e-06, "loss": 0.8266, "step": 10161 }, { "epoch": 1.7304370190968128, "grad_norm": 1.75, "learning_rate": 7.5615496221348495e-06, "loss": 0.8571, "step": 10162 }, { "epoch": 1.7306084831858026, "grad_norm": 1.75, "learning_rate": 7.559798247105409e-06, "loss": 0.8509, "step": 10163 }, { "epoch": 1.7307799472747927, "grad_norm": 1.78125, "learning_rate": 7.558046951660136e-06, "loss": 0.8736, "step": 10164 }, { "epoch": 1.7309514113637825, "grad_norm": 1.765625, "learning_rate": 7.55629573585615e-06, "loss": 0.8781, "step": 10165 }, { "epoch": 1.7311228754527723, "grad_norm": 1.6640625, "learning_rate": 7.554544599750559e-06, "loss": 0.8972, "step": 10166 }, { "epoch": 1.7312943395417624, "grad_norm": 1.734375, "learning_rate": 7.55279354340048e-06, "loss": 0.8911, "step": 10167 }, { "epoch": 1.7314658036307522, "grad_norm": 1.734375, "learning_rate": 7.551042566863017e-06, "loss": 0.8434, "step": 10168 }, { "epoch": 1.731637267719742, "grad_norm": 1.703125, "learning_rate": 7.549291670195278e-06, "loss": 0.8142, "step": 10169 }, { "epoch": 1.7318087318087318, "grad_norm": 1.7265625, "learning_rate": 7.547540853454366e-06, "loss": 0.8171, "step": 10170 }, { "epoch": 1.7319801958977217, "grad_norm": 1.6484375, "learning_rate": 7.5457901166973825e-06, "loss": 0.8392, "step": 10171 }, { "epoch": 1.7321516599867115, "grad_norm": 1.75, "learning_rate": 7.544039459981425e-06, "loss": 0.8475, "step": 10172 }, { "epoch": 1.7323231240757013, "grad_norm": 1.7109375, "learning_rate": 7.542288883363587e-06, "loss": 0.8659, "step": 10173 }, { "epoch": 1.7324945881646912, "grad_norm": 1.7734375, "learning_rate": 7.540538386900966e-06, "loss": 0.9016, "step": 10174 }, { "epoch": 1.732666052253681, "grad_norm": 1.703125, "learning_rate": 7.538787970650648e-06, "loss": 0.9046, "step": 10175 }, { "epoch": 1.732837516342671, "grad_norm": 1.6328125, "learning_rate": 7.5370376346697235e-06, "loss": 0.8584, "step": 10176 }, { "epoch": 1.7330089804316609, "grad_norm": 1.71875, "learning_rate": 7.535287379015278e-06, "loss": 0.8298, "step": 10177 }, { "epoch": 1.7331804445206507, "grad_norm": 1.703125, "learning_rate": 7.53353720374439e-06, "loss": 0.845, "step": 10178 }, { "epoch": 1.7333519086096407, "grad_norm": 1.6796875, "learning_rate": 7.5317871089141415e-06, "loss": 0.7766, "step": 10179 }, { "epoch": 1.7335233726986305, "grad_norm": 1.640625, "learning_rate": 7.53003709458161e-06, "loss": 0.7899, "step": 10180 }, { "epoch": 1.7336948367876204, "grad_norm": 1.625, "learning_rate": 7.528287160803871e-06, "loss": 0.8287, "step": 10181 }, { "epoch": 1.7338663008766102, "grad_norm": 1.6796875, "learning_rate": 7.5265373076379934e-06, "loss": 0.8382, "step": 10182 }, { "epoch": 1.7340377649656, "grad_norm": 1.7109375, "learning_rate": 7.524787535141049e-06, "loss": 0.8667, "step": 10183 }, { "epoch": 1.7342092290545899, "grad_norm": 1.703125, "learning_rate": 7.523037843370104e-06, "loss": 0.7994, "step": 10184 }, { "epoch": 1.7343806931435797, "grad_norm": 1.671875, "learning_rate": 7.521288232382221e-06, "loss": 0.8789, "step": 10185 }, { "epoch": 1.7345521572325695, "grad_norm": 1.6953125, "learning_rate": 7.519538702234464e-06, "loss": 0.8457, "step": 10186 }, { "epoch": 1.7347236213215593, "grad_norm": 1.65625, "learning_rate": 7.517789252983891e-06, "loss": 0.8667, "step": 10187 }, { "epoch": 1.7348950854105494, "grad_norm": 1.71875, "learning_rate": 7.516039884687557e-06, "loss": 0.8285, "step": 10188 }, { "epoch": 1.7350665494995392, "grad_norm": 1.6328125, "learning_rate": 7.51429059740252e-06, "loss": 0.8443, "step": 10189 }, { "epoch": 1.735238013588529, "grad_norm": 1.78125, "learning_rate": 7.512541391185823e-06, "loss": 0.8528, "step": 10190 }, { "epoch": 1.735409477677519, "grad_norm": 1.703125, "learning_rate": 7.51079226609452e-06, "loss": 0.7553, "step": 10191 }, { "epoch": 1.735580941766509, "grad_norm": 1.75, "learning_rate": 7.5090432221856526e-06, "loss": 0.8326, "step": 10192 }, { "epoch": 1.7357524058554987, "grad_norm": 1.671875, "learning_rate": 7.507294259516265e-06, "loss": 0.8594, "step": 10193 }, { "epoch": 1.7359238699444886, "grad_norm": 1.65625, "learning_rate": 7.505545378143398e-06, "loss": 0.8385, "step": 10194 }, { "epoch": 1.7360953340334784, "grad_norm": 1.796875, "learning_rate": 7.503796578124092e-06, "loss": 0.8266, "step": 10195 }, { "epoch": 1.7362667981224682, "grad_norm": 1.71875, "learning_rate": 7.502047859515378e-06, "loss": 0.8895, "step": 10196 }, { "epoch": 1.736438262211458, "grad_norm": 1.6328125, "learning_rate": 7.50029922237429e-06, "loss": 0.7925, "step": 10197 }, { "epoch": 1.7366097263004479, "grad_norm": 1.78125, "learning_rate": 7.498550666757858e-06, "loss": 0.8161, "step": 10198 }, { "epoch": 1.7367811903894377, "grad_norm": 1.6875, "learning_rate": 7.496802192723107e-06, "loss": 0.8297, "step": 10199 }, { "epoch": 1.7369526544784277, "grad_norm": 1.703125, "learning_rate": 7.495053800327068e-06, "loss": 0.8645, "step": 10200 }, { "epoch": 1.7371241185674176, "grad_norm": 1.6484375, "learning_rate": 7.493305489626753e-06, "loss": 0.8653, "step": 10201 }, { "epoch": 1.7372955826564074, "grad_norm": 1.6875, "learning_rate": 7.491557260679183e-06, "loss": 0.7951, "step": 10202 }, { "epoch": 1.7374670467453974, "grad_norm": 1.6640625, "learning_rate": 7.489809113541379e-06, "loss": 0.7885, "step": 10203 }, { "epoch": 1.7376385108343873, "grad_norm": 1.75, "learning_rate": 7.488061048270352e-06, "loss": 0.9382, "step": 10204 }, { "epoch": 1.737809974923377, "grad_norm": 1.734375, "learning_rate": 7.486313064923114e-06, "loss": 0.8568, "step": 10205 }, { "epoch": 1.737981439012367, "grad_norm": 1.6796875, "learning_rate": 7.484565163556672e-06, "loss": 0.8941, "step": 10206 }, { "epoch": 1.7381529031013567, "grad_norm": 1.6328125, "learning_rate": 7.482817344228031e-06, "loss": 0.804, "step": 10207 }, { "epoch": 1.7383243671903466, "grad_norm": 1.671875, "learning_rate": 7.481069606994198e-06, "loss": 0.8119, "step": 10208 }, { "epoch": 1.7384958312793364, "grad_norm": 1.734375, "learning_rate": 7.479321951912168e-06, "loss": 0.839, "step": 10209 }, { "epoch": 1.7386672953683262, "grad_norm": 1.640625, "learning_rate": 7.477574379038943e-06, "loss": 0.8308, "step": 10210 }, { "epoch": 1.738838759457316, "grad_norm": 1.78125, "learning_rate": 7.475826888431515e-06, "loss": 0.8357, "step": 10211 }, { "epoch": 1.7390102235463059, "grad_norm": 1.6875, "learning_rate": 7.47407948014688e-06, "loss": 0.8502, "step": 10212 }, { "epoch": 1.739181687635296, "grad_norm": 1.6796875, "learning_rate": 7.472332154242023e-06, "loss": 0.8749, "step": 10213 }, { "epoch": 1.7393531517242857, "grad_norm": 1.7265625, "learning_rate": 7.470584910773931e-06, "loss": 0.8231, "step": 10214 }, { "epoch": 1.7395246158132756, "grad_norm": 1.734375, "learning_rate": 7.4688377497995915e-06, "loss": 0.8878, "step": 10215 }, { "epoch": 1.7396960799022656, "grad_norm": 1.703125, "learning_rate": 7.467090671375985e-06, "loss": 0.9022, "step": 10216 }, { "epoch": 1.7398675439912554, "grad_norm": 1.71875, "learning_rate": 7.465343675560088e-06, "loss": 0.8323, "step": 10217 }, { "epoch": 1.7400390080802453, "grad_norm": 1.7578125, "learning_rate": 7.4635967624088805e-06, "loss": 0.8797, "step": 10218 }, { "epoch": 1.740210472169235, "grad_norm": 1.6015625, "learning_rate": 7.461849931979332e-06, "loss": 0.8438, "step": 10219 }, { "epoch": 1.740381936258225, "grad_norm": 1.6796875, "learning_rate": 7.4601031843284155e-06, "loss": 0.903, "step": 10220 }, { "epoch": 1.7405534003472147, "grad_norm": 1.6796875, "learning_rate": 7.4583565195130995e-06, "loss": 0.7802, "step": 10221 }, { "epoch": 1.7407248644362046, "grad_norm": 1.734375, "learning_rate": 7.456609937590347e-06, "loss": 0.8862, "step": 10222 }, { "epoch": 1.7408963285251944, "grad_norm": 1.7734375, "learning_rate": 7.454863438617124e-06, "loss": 0.849, "step": 10223 }, { "epoch": 1.7410677926141842, "grad_norm": 1.796875, "learning_rate": 7.453117022650387e-06, "loss": 0.8742, "step": 10224 }, { "epoch": 1.7412392567031743, "grad_norm": 1.6484375, "learning_rate": 7.451370689747095e-06, "loss": 0.8438, "step": 10225 }, { "epoch": 1.741410720792164, "grad_norm": 1.6640625, "learning_rate": 7.4496244399642005e-06, "loss": 0.9366, "step": 10226 }, { "epoch": 1.741582184881154, "grad_norm": 1.6875, "learning_rate": 7.447878273358657e-06, "loss": 0.8167, "step": 10227 }, { "epoch": 1.741753648970144, "grad_norm": 1.78125, "learning_rate": 7.4461321899874155e-06, "loss": 0.8649, "step": 10228 }, { "epoch": 1.7419251130591338, "grad_norm": 1.6484375, "learning_rate": 7.444386189907418e-06, "loss": 0.8296, "step": 10229 }, { "epoch": 1.7420965771481236, "grad_norm": 1.65625, "learning_rate": 7.442640273175612e-06, "loss": 0.8709, "step": 10230 }, { "epoch": 1.7422680412371134, "grad_norm": 1.6484375, "learning_rate": 7.440894439848935e-06, "loss": 0.8295, "step": 10231 }, { "epoch": 1.7424395053261033, "grad_norm": 1.7109375, "learning_rate": 7.439148689984327e-06, "loss": 0.8687, "step": 10232 }, { "epoch": 1.742610969415093, "grad_norm": 1.6640625, "learning_rate": 7.437403023638725e-06, "loss": 0.8493, "step": 10233 }, { "epoch": 1.742782433504083, "grad_norm": 1.7421875, "learning_rate": 7.43565744086906e-06, "loss": 0.8864, "step": 10234 }, { "epoch": 1.7429538975930727, "grad_norm": 1.78125, "learning_rate": 7.433911941732266e-06, "loss": 0.8556, "step": 10235 }, { "epoch": 1.7431253616820626, "grad_norm": 1.609375, "learning_rate": 7.4321665262852626e-06, "loss": 0.7473, "step": 10236 }, { "epoch": 1.7432968257710526, "grad_norm": 1.5859375, "learning_rate": 7.430421194584978e-06, "loss": 0.9047, "step": 10237 }, { "epoch": 1.7434682898600424, "grad_norm": 1.65625, "learning_rate": 7.428675946688335e-06, "loss": 0.8799, "step": 10238 }, { "epoch": 1.7436397539490323, "grad_norm": 1.7890625, "learning_rate": 7.426930782652253e-06, "loss": 0.8822, "step": 10239 }, { "epoch": 1.7438112180380223, "grad_norm": 1.625, "learning_rate": 7.425185702533644e-06, "loss": 0.8088, "step": 10240 }, { "epoch": 1.7439826821270121, "grad_norm": 1.71875, "learning_rate": 7.423440706389427e-06, "loss": 0.8863, "step": 10241 }, { "epoch": 1.744154146216002, "grad_norm": 1.6640625, "learning_rate": 7.421695794276511e-06, "loss": 0.788, "step": 10242 }, { "epoch": 1.7443256103049918, "grad_norm": 1.671875, "learning_rate": 7.4199509662518054e-06, "loss": 0.859, "step": 10243 }, { "epoch": 1.7444970743939816, "grad_norm": 1.7578125, "learning_rate": 7.418206222372213e-06, "loss": 0.8653, "step": 10244 }, { "epoch": 1.7446685384829714, "grad_norm": 1.7578125, "learning_rate": 7.416461562694639e-06, "loss": 0.8734, "step": 10245 }, { "epoch": 1.7448400025719613, "grad_norm": 1.6953125, "learning_rate": 7.414716987275985e-06, "loss": 0.8341, "step": 10246 }, { "epoch": 1.745011466660951, "grad_norm": 1.8203125, "learning_rate": 7.412972496173143e-06, "loss": 0.8791, "step": 10247 }, { "epoch": 1.745182930749941, "grad_norm": 1.8203125, "learning_rate": 7.4112280894430076e-06, "loss": 0.8111, "step": 10248 }, { "epoch": 1.745354394838931, "grad_norm": 1.6640625, "learning_rate": 7.409483767142473e-06, "loss": 0.8054, "step": 10249 }, { "epoch": 1.7455258589279208, "grad_norm": 1.8359375, "learning_rate": 7.4077395293284285e-06, "loss": 0.8333, "step": 10250 }, { "epoch": 1.7456973230169106, "grad_norm": 1.6640625, "learning_rate": 7.405995376057758e-06, "loss": 0.7959, "step": 10251 }, { "epoch": 1.7458687871059007, "grad_norm": 1.6328125, "learning_rate": 7.404251307387349e-06, "loss": 0.8575, "step": 10252 }, { "epoch": 1.7460402511948905, "grad_norm": 1.6015625, "learning_rate": 7.402507323374077e-06, "loss": 0.8788, "step": 10253 }, { "epoch": 1.7462117152838803, "grad_norm": 1.765625, "learning_rate": 7.400763424074824e-06, "loss": 0.8249, "step": 10254 }, { "epoch": 1.7463831793728701, "grad_norm": 1.6640625, "learning_rate": 7.399019609546464e-06, "loss": 0.9007, "step": 10255 }, { "epoch": 1.74655464346186, "grad_norm": 1.6640625, "learning_rate": 7.397275879845868e-06, "loss": 0.8715, "step": 10256 }, { "epoch": 1.7467261075508498, "grad_norm": 1.7109375, "learning_rate": 7.395532235029908e-06, "loss": 0.8172, "step": 10257 }, { "epoch": 1.7468975716398396, "grad_norm": 1.7265625, "learning_rate": 7.393788675155449e-06, "loss": 0.922, "step": 10258 }, { "epoch": 1.7470690357288294, "grad_norm": 1.609375, "learning_rate": 7.392045200279354e-06, "loss": 0.7991, "step": 10259 }, { "epoch": 1.7472404998178193, "grad_norm": 1.7265625, "learning_rate": 7.390301810458487e-06, "loss": 0.8249, "step": 10260 }, { "epoch": 1.7474119639068093, "grad_norm": 1.734375, "learning_rate": 7.388558505749703e-06, "loss": 0.8807, "step": 10261 }, { "epoch": 1.7475834279957991, "grad_norm": 1.625, "learning_rate": 7.386815286209862e-06, "loss": 0.8831, "step": 10262 }, { "epoch": 1.747754892084789, "grad_norm": 1.640625, "learning_rate": 7.385072151895814e-06, "loss": 0.854, "step": 10263 }, { "epoch": 1.747926356173779, "grad_norm": 1.5859375, "learning_rate": 7.383329102864411e-06, "loss": 0.8495, "step": 10264 }, { "epoch": 1.7480978202627688, "grad_norm": 1.6875, "learning_rate": 7.381586139172499e-06, "loss": 0.8507, "step": 10265 }, { "epoch": 1.7482692843517587, "grad_norm": 1.625, "learning_rate": 7.379843260876922e-06, "loss": 0.8643, "step": 10266 }, { "epoch": 1.7484407484407485, "grad_norm": 1.734375, "learning_rate": 7.3781004680345235e-06, "loss": 0.7792, "step": 10267 }, { "epoch": 1.7486122125297383, "grad_norm": 1.5546875, "learning_rate": 7.376357760702142e-06, "loss": 0.7385, "step": 10268 }, { "epoch": 1.7487836766187281, "grad_norm": 1.6796875, "learning_rate": 7.374615138936615e-06, "loss": 0.8638, "step": 10269 }, { "epoch": 1.748955140707718, "grad_norm": 1.7421875, "learning_rate": 7.372872602794772e-06, "loss": 0.8629, "step": 10270 }, { "epoch": 1.7491266047967078, "grad_norm": 1.6953125, "learning_rate": 7.371130152333448e-06, "loss": 0.8324, "step": 10271 }, { "epoch": 1.7492980688856976, "grad_norm": 1.65625, "learning_rate": 7.369387787609469e-06, "loss": 0.8626, "step": 10272 }, { "epoch": 1.7494695329746877, "grad_norm": 1.671875, "learning_rate": 7.367645508679659e-06, "loss": 0.8506, "step": 10273 }, { "epoch": 1.7496409970636775, "grad_norm": 1.703125, "learning_rate": 7.365903315600842e-06, "loss": 0.7514, "step": 10274 }, { "epoch": 1.7498124611526673, "grad_norm": 1.7109375, "learning_rate": 7.364161208429838e-06, "loss": 0.8505, "step": 10275 }, { "epoch": 1.7499839252416574, "grad_norm": 1.65625, "learning_rate": 7.362419187223462e-06, "loss": 0.8002, "step": 10276 }, { "epoch": 1.7501553893306472, "grad_norm": 1.6015625, "learning_rate": 7.360677252038529e-06, "loss": 0.7803, "step": 10277 }, { "epoch": 1.750326853419637, "grad_norm": 1.71875, "learning_rate": 7.358935402931848e-06, "loss": 0.8861, "step": 10278 }, { "epoch": 1.7504983175086268, "grad_norm": 1.7578125, "learning_rate": 7.357193639960227e-06, "loss": 0.8101, "step": 10279 }, { "epoch": 1.7506697815976167, "grad_norm": 1.7109375, "learning_rate": 7.355451963180477e-06, "loss": 0.829, "step": 10280 }, { "epoch": 1.7508412456866065, "grad_norm": 1.6875, "learning_rate": 7.353710372649399e-06, "loss": 0.8157, "step": 10281 }, { "epoch": 1.7510127097755963, "grad_norm": 1.6953125, "learning_rate": 7.351968868423789e-06, "loss": 0.9102, "step": 10282 }, { "epoch": 1.7511841738645861, "grad_norm": 1.6640625, "learning_rate": 7.350227450560443e-06, "loss": 0.8951, "step": 10283 }, { "epoch": 1.751355637953576, "grad_norm": 1.6171875, "learning_rate": 7.348486119116161e-06, "loss": 0.8913, "step": 10284 }, { "epoch": 1.751527102042566, "grad_norm": 1.671875, "learning_rate": 7.346744874147729e-06, "loss": 0.8598, "step": 10285 }, { "epoch": 1.7516985661315558, "grad_norm": 1.703125, "learning_rate": 7.345003715711938e-06, "loss": 0.8271, "step": 10286 }, { "epoch": 1.7518700302205457, "grad_norm": 1.6796875, "learning_rate": 7.3432626438655726e-06, "loss": 0.8592, "step": 10287 }, { "epoch": 1.7520414943095357, "grad_norm": 1.6796875, "learning_rate": 7.3415216586654184e-06, "loss": 0.8075, "step": 10288 }, { "epoch": 1.7522129583985255, "grad_norm": 1.6640625, "learning_rate": 7.339780760168254e-06, "loss": 0.8372, "step": 10289 }, { "epoch": 1.7523844224875154, "grad_norm": 1.65625, "learning_rate": 7.338039948430857e-06, "loss": 0.8529, "step": 10290 }, { "epoch": 1.7525558865765052, "grad_norm": 1.671875, "learning_rate": 7.336299223509999e-06, "loss": 0.7569, "step": 10291 }, { "epoch": 1.752727350665495, "grad_norm": 1.6796875, "learning_rate": 7.334558585462461e-06, "loss": 0.8094, "step": 10292 }, { "epoch": 1.7528988147544848, "grad_norm": 1.828125, "learning_rate": 7.332818034344999e-06, "loss": 0.897, "step": 10293 }, { "epoch": 1.7530702788434747, "grad_norm": 1.7578125, "learning_rate": 7.331077570214385e-06, "loss": 0.8805, "step": 10294 }, { "epoch": 1.7532417429324645, "grad_norm": 1.6796875, "learning_rate": 7.329337193127379e-06, "loss": 0.8222, "step": 10295 }, { "epoch": 1.7534132070214543, "grad_norm": 1.75, "learning_rate": 7.327596903140746e-06, "loss": 0.8482, "step": 10296 }, { "epoch": 1.7535846711104444, "grad_norm": 1.7265625, "learning_rate": 7.325856700311243e-06, "loss": 0.8909, "step": 10297 }, { "epoch": 1.7537561351994342, "grad_norm": 1.7109375, "learning_rate": 7.32411658469562e-06, "loss": 0.8621, "step": 10298 }, { "epoch": 1.753927599288424, "grad_norm": 1.703125, "learning_rate": 7.322376556350633e-06, "loss": 0.8418, "step": 10299 }, { "epoch": 1.754099063377414, "grad_norm": 1.671875, "learning_rate": 7.32063661533303e-06, "loss": 0.8549, "step": 10300 }, { "epoch": 1.7542705274664039, "grad_norm": 1.78125, "learning_rate": 7.318896761699557e-06, "loss": 0.9614, "step": 10301 }, { "epoch": 1.7544419915553937, "grad_norm": 1.6171875, "learning_rate": 7.317156995506955e-06, "loss": 0.8664, "step": 10302 }, { "epoch": 1.7546134556443835, "grad_norm": 1.6953125, "learning_rate": 7.315417316811967e-06, "loss": 0.8552, "step": 10303 }, { "epoch": 1.7547849197333734, "grad_norm": 1.6640625, "learning_rate": 7.313677725671331e-06, "loss": 0.8504, "step": 10304 }, { "epoch": 1.7549563838223632, "grad_norm": 1.6484375, "learning_rate": 7.311938222141779e-06, "loss": 0.7789, "step": 10305 }, { "epoch": 1.755127847911353, "grad_norm": 1.671875, "learning_rate": 7.3101988062800435e-06, "loss": 0.8515, "step": 10306 }, { "epoch": 1.7552993120003428, "grad_norm": 1.7265625, "learning_rate": 7.308459478142853e-06, "loss": 0.8759, "step": 10307 }, { "epoch": 1.7554707760893327, "grad_norm": 1.6640625, "learning_rate": 7.3067202377869354e-06, "loss": 0.8369, "step": 10308 }, { "epoch": 1.7556422401783225, "grad_norm": 1.6640625, "learning_rate": 7.304981085269012e-06, "loss": 0.8632, "step": 10309 }, { "epoch": 1.7558137042673125, "grad_norm": 1.765625, "learning_rate": 7.303242020645804e-06, "loss": 0.8468, "step": 10310 }, { "epoch": 1.7559851683563024, "grad_norm": 1.71875, "learning_rate": 7.30150304397403e-06, "loss": 0.7487, "step": 10311 }, { "epoch": 1.7561566324452922, "grad_norm": 1.71875, "learning_rate": 7.2997641553104025e-06, "loss": 0.8561, "step": 10312 }, { "epoch": 1.7563280965342822, "grad_norm": 1.6328125, "learning_rate": 7.298025354711633e-06, "loss": 0.8081, "step": 10313 }, { "epoch": 1.756499560623272, "grad_norm": 1.6953125, "learning_rate": 7.296286642234434e-06, "loss": 0.9342, "step": 10314 }, { "epoch": 1.7566710247122619, "grad_norm": 1.7265625, "learning_rate": 7.2945480179355075e-06, "loss": 0.8533, "step": 10315 }, { "epoch": 1.7568424888012517, "grad_norm": 1.71875, "learning_rate": 7.292809481871559e-06, "loss": 0.8413, "step": 10316 }, { "epoch": 1.7570139528902415, "grad_norm": 1.6875, "learning_rate": 7.291071034099285e-06, "loss": 0.8442, "step": 10317 }, { "epoch": 1.7571854169792314, "grad_norm": 1.8203125, "learning_rate": 7.289332674675386e-06, "loss": 0.919, "step": 10318 }, { "epoch": 1.7573568810682212, "grad_norm": 1.7109375, "learning_rate": 7.287594403656557e-06, "loss": 0.829, "step": 10319 }, { "epoch": 1.757528345157211, "grad_norm": 1.625, "learning_rate": 7.2858562210994886e-06, "loss": 0.8363, "step": 10320 }, { "epoch": 1.7576998092462008, "grad_norm": 1.6875, "learning_rate": 7.284118127060868e-06, "loss": 0.8166, "step": 10321 }, { "epoch": 1.7578712733351909, "grad_norm": 1.5859375, "learning_rate": 7.282380121597384e-06, "loss": 0.7882, "step": 10322 }, { "epoch": 1.7580427374241807, "grad_norm": 1.6953125, "learning_rate": 7.2806422047657165e-06, "loss": 0.8613, "step": 10323 }, { "epoch": 1.7582142015131705, "grad_norm": 1.609375, "learning_rate": 7.278904376622548e-06, "loss": 0.8299, "step": 10324 }, { "epoch": 1.7583856656021606, "grad_norm": 1.671875, "learning_rate": 7.277166637224553e-06, "loss": 0.7943, "step": 10325 }, { "epoch": 1.7585571296911504, "grad_norm": 1.8203125, "learning_rate": 7.275428986628406e-06, "loss": 0.9177, "step": 10326 }, { "epoch": 1.7587285937801402, "grad_norm": 1.734375, "learning_rate": 7.273691424890786e-06, "loss": 0.8536, "step": 10327 }, { "epoch": 1.75890005786913, "grad_norm": 1.8046875, "learning_rate": 7.271953952068351e-06, "loss": 0.8851, "step": 10328 }, { "epoch": 1.7590715219581199, "grad_norm": 1.7421875, "learning_rate": 7.270216568217769e-06, "loss": 0.8791, "step": 10329 }, { "epoch": 1.7592429860471097, "grad_norm": 1.734375, "learning_rate": 7.2684792733957055e-06, "loss": 0.8501, "step": 10330 }, { "epoch": 1.7594144501360995, "grad_norm": 1.625, "learning_rate": 7.2667420676588185e-06, "loss": 0.8799, "step": 10331 }, { "epoch": 1.7595859142250894, "grad_norm": 1.703125, "learning_rate": 7.265004951063765e-06, "loss": 0.8702, "step": 10332 }, { "epoch": 1.7597573783140792, "grad_norm": 1.765625, "learning_rate": 7.263267923667199e-06, "loss": 0.9555, "step": 10333 }, { "epoch": 1.7599288424030692, "grad_norm": 1.65625, "learning_rate": 7.261530985525771e-06, "loss": 0.9094, "step": 10334 }, { "epoch": 1.760100306492059, "grad_norm": 1.71875, "learning_rate": 7.25979413669613e-06, "loss": 0.8269, "step": 10335 }, { "epoch": 1.760271770581049, "grad_norm": 1.6796875, "learning_rate": 7.258057377234922e-06, "loss": 0.8336, "step": 10336 }, { "epoch": 1.760443234670039, "grad_norm": 1.828125, "learning_rate": 7.256320707198786e-06, "loss": 0.8184, "step": 10337 }, { "epoch": 1.7606146987590288, "grad_norm": 1.65625, "learning_rate": 7.254584126644366e-06, "loss": 0.8485, "step": 10338 }, { "epoch": 1.7607861628480186, "grad_norm": 1.7109375, "learning_rate": 7.252847635628298e-06, "loss": 0.849, "step": 10339 }, { "epoch": 1.7609576269370084, "grad_norm": 1.7421875, "learning_rate": 7.251111234207211e-06, "loss": 0.8736, "step": 10340 }, { "epoch": 1.7611290910259982, "grad_norm": 1.6015625, "learning_rate": 7.249374922437737e-06, "loss": 0.877, "step": 10341 }, { "epoch": 1.761300555114988, "grad_norm": 1.640625, "learning_rate": 7.247638700376503e-06, "loss": 0.8993, "step": 10342 }, { "epoch": 1.761472019203978, "grad_norm": 1.75, "learning_rate": 7.2459025680801365e-06, "loss": 0.8747, "step": 10343 }, { "epoch": 1.7616434832929677, "grad_norm": 1.765625, "learning_rate": 7.2441665256052606e-06, "loss": 0.8566, "step": 10344 }, { "epoch": 1.7618149473819575, "grad_norm": 1.703125, "learning_rate": 7.24243057300849e-06, "loss": 0.9118, "step": 10345 }, { "epoch": 1.7619864114709476, "grad_norm": 1.6796875, "learning_rate": 7.240694710346443e-06, "loss": 0.8452, "step": 10346 }, { "epoch": 1.7621578755599374, "grad_norm": 1.71875, "learning_rate": 7.238958937675731e-06, "loss": 0.8191, "step": 10347 }, { "epoch": 1.7623293396489272, "grad_norm": 1.6328125, "learning_rate": 7.237223255052967e-06, "loss": 0.8403, "step": 10348 }, { "epoch": 1.7625008037379173, "grad_norm": 1.6015625, "learning_rate": 7.235487662534755e-06, "loss": 0.7737, "step": 10349 }, { "epoch": 1.7626722678269071, "grad_norm": 1.6796875, "learning_rate": 7.233752160177705e-06, "loss": 0.8608, "step": 10350 }, { "epoch": 1.762843731915897, "grad_norm": 1.7265625, "learning_rate": 7.232016748038408e-06, "loss": 0.8762, "step": 10351 }, { "epoch": 1.7630151960048868, "grad_norm": 1.625, "learning_rate": 7.230281426173469e-06, "loss": 0.8423, "step": 10352 }, { "epoch": 1.7631866600938766, "grad_norm": 1.7109375, "learning_rate": 7.228546194639483e-06, "loss": 0.8653, "step": 10353 }, { "epoch": 1.7633581241828664, "grad_norm": 1.703125, "learning_rate": 7.226811053493041e-06, "loss": 0.8833, "step": 10354 }, { "epoch": 1.7635295882718562, "grad_norm": 1.765625, "learning_rate": 7.225076002790736e-06, "loss": 0.8177, "step": 10355 }, { "epoch": 1.763701052360846, "grad_norm": 1.640625, "learning_rate": 7.223341042589151e-06, "loss": 0.751, "step": 10356 }, { "epoch": 1.763872516449836, "grad_norm": 1.796875, "learning_rate": 7.221606172944869e-06, "loss": 0.8932, "step": 10357 }, { "epoch": 1.764043980538826, "grad_norm": 1.6796875, "learning_rate": 7.219871393914473e-06, "loss": 0.9326, "step": 10358 }, { "epoch": 1.7642154446278158, "grad_norm": 1.765625, "learning_rate": 7.218136705554541e-06, "loss": 0.9091, "step": 10359 }, { "epoch": 1.7643869087168056, "grad_norm": 1.7265625, "learning_rate": 7.216402107921645e-06, "loss": 0.7611, "step": 10360 }, { "epoch": 1.7645583728057956, "grad_norm": 1.6171875, "learning_rate": 7.214667601072358e-06, "loss": 0.8232, "step": 10361 }, { "epoch": 1.7647298368947855, "grad_norm": 1.71875, "learning_rate": 7.2129331850632535e-06, "loss": 0.8851, "step": 10362 }, { "epoch": 1.7649013009837753, "grad_norm": 1.7265625, "learning_rate": 7.21119885995089e-06, "loss": 0.8625, "step": 10363 }, { "epoch": 1.7650727650727651, "grad_norm": 1.6796875, "learning_rate": 7.209464625791831e-06, "loss": 0.8253, "step": 10364 }, { "epoch": 1.765244229161755, "grad_norm": 1.609375, "learning_rate": 7.207730482642641e-06, "loss": 0.8143, "step": 10365 }, { "epoch": 1.7654156932507448, "grad_norm": 1.6953125, "learning_rate": 7.205996430559874e-06, "loss": 0.8331, "step": 10366 }, { "epoch": 1.7655871573397346, "grad_norm": 1.6953125, "learning_rate": 7.2042624696000855e-06, "loss": 0.9238, "step": 10367 }, { "epoch": 1.7657586214287244, "grad_norm": 1.78125, "learning_rate": 7.202528599819825e-06, "loss": 0.8804, "step": 10368 }, { "epoch": 1.7659300855177142, "grad_norm": 1.734375, "learning_rate": 7.200794821275641e-06, "loss": 0.8217, "step": 10369 }, { "epoch": 1.7661015496067043, "grad_norm": 1.7265625, "learning_rate": 7.199061134024079e-06, "loss": 0.8658, "step": 10370 }, { "epoch": 1.7662730136956941, "grad_norm": 1.796875, "learning_rate": 7.197327538121681e-06, "loss": 0.9574, "step": 10371 }, { "epoch": 1.766444477784684, "grad_norm": 1.640625, "learning_rate": 7.195594033624985e-06, "loss": 0.8551, "step": 10372 }, { "epoch": 1.766615941873674, "grad_norm": 1.6953125, "learning_rate": 7.193860620590532e-06, "loss": 0.8563, "step": 10373 }, { "epoch": 1.7667874059626638, "grad_norm": 1.59375, "learning_rate": 7.192127299074847e-06, "loss": 0.7884, "step": 10374 }, { "epoch": 1.7669588700516536, "grad_norm": 1.7578125, "learning_rate": 7.190394069134464e-06, "loss": 0.9111, "step": 10375 }, { "epoch": 1.7671303341406435, "grad_norm": 1.8203125, "learning_rate": 7.188660930825911e-06, "loss": 0.8985, "step": 10376 }, { "epoch": 1.7673017982296333, "grad_norm": 1.6640625, "learning_rate": 7.186927884205712e-06, "loss": 0.8794, "step": 10377 }, { "epoch": 1.7674732623186231, "grad_norm": 1.75, "learning_rate": 7.185194929330388e-06, "loss": 0.8902, "step": 10378 }, { "epoch": 1.767644726407613, "grad_norm": 1.6171875, "learning_rate": 7.183462066256457e-06, "loss": 0.8045, "step": 10379 }, { "epoch": 1.7678161904966028, "grad_norm": 1.78125, "learning_rate": 7.1817292950404325e-06, "loss": 0.8288, "step": 10380 }, { "epoch": 1.7679876545855926, "grad_norm": 1.8515625, "learning_rate": 7.179996615738828e-06, "loss": 0.8861, "step": 10381 }, { "epoch": 1.7681591186745826, "grad_norm": 1.671875, "learning_rate": 7.178264028408154e-06, "loss": 0.8331, "step": 10382 }, { "epoch": 1.7683305827635725, "grad_norm": 1.625, "learning_rate": 7.176531533104916e-06, "loss": 0.7918, "step": 10383 }, { "epoch": 1.7685020468525623, "grad_norm": 1.6953125, "learning_rate": 7.174799129885617e-06, "loss": 0.8849, "step": 10384 }, { "epoch": 1.7686735109415523, "grad_norm": 1.7734375, "learning_rate": 7.173066818806762e-06, "loss": 0.9, "step": 10385 }, { "epoch": 1.7688449750305422, "grad_norm": 1.6640625, "learning_rate": 7.171334599924837e-06, "loss": 0.8639, "step": 10386 }, { "epoch": 1.769016439119532, "grad_norm": 1.71875, "learning_rate": 7.1696024732963445e-06, "loss": 0.8004, "step": 10387 }, { "epoch": 1.7691879032085218, "grad_norm": 1.6796875, "learning_rate": 7.1678704389777735e-06, "loss": 0.8962, "step": 10388 }, { "epoch": 1.7693593672975116, "grad_norm": 1.6484375, "learning_rate": 7.16613849702561e-06, "loss": 0.8478, "step": 10389 }, { "epoch": 1.7695308313865015, "grad_norm": 1.71875, "learning_rate": 7.164406647496342e-06, "loss": 0.8258, "step": 10390 }, { "epoch": 1.7697022954754913, "grad_norm": 1.671875, "learning_rate": 7.162674890446453e-06, "loss": 0.8445, "step": 10391 }, { "epoch": 1.7698737595644811, "grad_norm": 1.6484375, "learning_rate": 7.16094322593242e-06, "loss": 0.7893, "step": 10392 }, { "epoch": 1.770045223653471, "grad_norm": 1.6484375, "learning_rate": 7.1592116540107185e-06, "loss": 0.8214, "step": 10393 }, { "epoch": 1.770216687742461, "grad_norm": 1.7265625, "learning_rate": 7.157480174737823e-06, "loss": 0.8983, "step": 10394 }, { "epoch": 1.7703881518314508, "grad_norm": 1.609375, "learning_rate": 7.155748788170202e-06, "loss": 0.8466, "step": 10395 }, { "epoch": 1.7705596159204406, "grad_norm": 1.6640625, "learning_rate": 7.154017494364329e-06, "loss": 0.8173, "step": 10396 }, { "epoch": 1.7707310800094307, "grad_norm": 1.609375, "learning_rate": 7.1522862933766555e-06, "loss": 0.817, "step": 10397 }, { "epoch": 1.7709025440984205, "grad_norm": 1.6640625, "learning_rate": 7.150555185263653e-06, "loss": 0.8219, "step": 10398 }, { "epoch": 1.7710740081874103, "grad_norm": 1.6328125, "learning_rate": 7.148824170081774e-06, "loss": 0.8267, "step": 10399 }, { "epoch": 1.7712454722764002, "grad_norm": 1.734375, "learning_rate": 7.147093247887476e-06, "loss": 0.8318, "step": 10400 }, { "epoch": 1.77141693636539, "grad_norm": 1.6171875, "learning_rate": 7.145362418737209e-06, "loss": 0.8338, "step": 10401 }, { "epoch": 1.7715884004543798, "grad_norm": 1.7421875, "learning_rate": 7.143631682687424e-06, "loss": 0.9461, "step": 10402 }, { "epoch": 1.7717598645433696, "grad_norm": 1.6796875, "learning_rate": 7.141901039794566e-06, "loss": 0.8248, "step": 10403 }, { "epoch": 1.7719313286323595, "grad_norm": 1.7421875, "learning_rate": 7.140170490115078e-06, "loss": 0.8534, "step": 10404 }, { "epoch": 1.7721027927213493, "grad_norm": 1.78125, "learning_rate": 7.1384400337054e-06, "loss": 0.8852, "step": 10405 }, { "epoch": 1.7722742568103391, "grad_norm": 1.640625, "learning_rate": 7.1367096706219665e-06, "loss": 0.8271, "step": 10406 }, { "epoch": 1.7724457208993292, "grad_norm": 1.7421875, "learning_rate": 7.134979400921214e-06, "loss": 0.7973, "step": 10407 }, { "epoch": 1.772617184988319, "grad_norm": 1.8046875, "learning_rate": 7.133249224659574e-06, "loss": 0.7925, "step": 10408 }, { "epoch": 1.7727886490773088, "grad_norm": 1.8046875, "learning_rate": 7.131519141893469e-06, "loss": 0.8688, "step": 10409 }, { "epoch": 1.7729601131662989, "grad_norm": 1.6328125, "learning_rate": 7.1297891526793276e-06, "loss": 0.7748, "step": 10410 }, { "epoch": 1.7731315772552887, "grad_norm": 1.671875, "learning_rate": 7.128059257073569e-06, "loss": 0.8827, "step": 10411 }, { "epoch": 1.7733030413442785, "grad_norm": 1.6328125, "learning_rate": 7.1263294551326145e-06, "loss": 0.8463, "step": 10412 }, { "epoch": 1.7734745054332683, "grad_norm": 1.75, "learning_rate": 7.1245997469128754e-06, "loss": 0.8997, "step": 10413 }, { "epoch": 1.7736459695222582, "grad_norm": 1.6875, "learning_rate": 7.122870132470769e-06, "loss": 0.8661, "step": 10414 }, { "epoch": 1.773817433611248, "grad_norm": 1.59375, "learning_rate": 7.121140611862699e-06, "loss": 0.7815, "step": 10415 }, { "epoch": 1.7739888977002378, "grad_norm": 1.7109375, "learning_rate": 7.119411185145075e-06, "loss": 0.8995, "step": 10416 }, { "epoch": 1.7741603617892276, "grad_norm": 1.6484375, "learning_rate": 7.117681852374301e-06, "loss": 0.9213, "step": 10417 }, { "epoch": 1.7743318258782175, "grad_norm": 1.671875, "learning_rate": 7.1159526136067755e-06, "loss": 0.876, "step": 10418 }, { "epoch": 1.7745032899672075, "grad_norm": 1.7421875, "learning_rate": 7.114223468898897e-06, "loss": 0.8722, "step": 10419 }, { "epoch": 1.7746747540561973, "grad_norm": 1.7578125, "learning_rate": 7.112494418307056e-06, "loss": 0.8296, "step": 10420 }, { "epoch": 1.7748462181451872, "grad_norm": 1.6875, "learning_rate": 7.110765461887645e-06, "loss": 0.807, "step": 10421 }, { "epoch": 1.7750176822341772, "grad_norm": 1.6796875, "learning_rate": 7.1090365996970526e-06, "loss": 0.8302, "step": 10422 }, { "epoch": 1.775189146323167, "grad_norm": 1.7421875, "learning_rate": 7.107307831791663e-06, "loss": 0.8044, "step": 10423 }, { "epoch": 1.7753606104121569, "grad_norm": 1.6796875, "learning_rate": 7.105579158227858e-06, "loss": 0.8336, "step": 10424 }, { "epoch": 1.7755320745011467, "grad_norm": 1.640625, "learning_rate": 7.103850579062015e-06, "loss": 0.8537, "step": 10425 }, { "epoch": 1.7757035385901365, "grad_norm": 1.765625, "learning_rate": 7.1021220943505124e-06, "loss": 0.8888, "step": 10426 }, { "epoch": 1.7758750026791263, "grad_norm": 1.7578125, "learning_rate": 7.10039370414972e-06, "loss": 0.877, "step": 10427 }, { "epoch": 1.7760464667681162, "grad_norm": 1.6875, "learning_rate": 7.098665408516004e-06, "loss": 0.8352, "step": 10428 }, { "epoch": 1.776217930857106, "grad_norm": 1.703125, "learning_rate": 7.0969372075057385e-06, "loss": 0.8651, "step": 10429 }, { "epoch": 1.7763893949460958, "grad_norm": 1.6640625, "learning_rate": 7.095209101175282e-06, "loss": 0.8052, "step": 10430 }, { "epoch": 1.7765608590350859, "grad_norm": 1.75, "learning_rate": 7.093481089580999e-06, "loss": 0.8881, "step": 10431 }, { "epoch": 1.7767323231240757, "grad_norm": 1.640625, "learning_rate": 7.0917531727792385e-06, "loss": 0.9111, "step": 10432 }, { "epoch": 1.7769037872130655, "grad_norm": 1.640625, "learning_rate": 7.090025350826359e-06, "loss": 0.8268, "step": 10433 }, { "epoch": 1.7770752513020556, "grad_norm": 1.703125, "learning_rate": 7.08829762377871e-06, "loss": 0.7837, "step": 10434 }, { "epoch": 1.7772467153910454, "grad_norm": 1.6796875, "learning_rate": 7.086569991692641e-06, "loss": 0.8403, "step": 10435 }, { "epoch": 1.7774181794800352, "grad_norm": 1.703125, "learning_rate": 7.084842454624493e-06, "loss": 0.8457, "step": 10436 }, { "epoch": 1.777589643569025, "grad_norm": 1.65625, "learning_rate": 7.0831150126306125e-06, "loss": 0.8311, "step": 10437 }, { "epoch": 1.7777611076580149, "grad_norm": 1.71875, "learning_rate": 7.081387665767334e-06, "loss": 0.8601, "step": 10438 }, { "epoch": 1.7779325717470047, "grad_norm": 1.6796875, "learning_rate": 7.079660414090995e-06, "loss": 0.8963, "step": 10439 }, { "epoch": 1.7781040358359945, "grad_norm": 1.6953125, "learning_rate": 7.077933257657927e-06, "loss": 0.886, "step": 10440 }, { "epoch": 1.7782754999249843, "grad_norm": 1.65625, "learning_rate": 7.07620619652446e-06, "loss": 0.8143, "step": 10441 }, { "epoch": 1.7784469640139742, "grad_norm": 1.75, "learning_rate": 7.074479230746921e-06, "loss": 0.8514, "step": 10442 }, { "epoch": 1.7786184281029642, "grad_norm": 1.7265625, "learning_rate": 7.0727523603816276e-06, "loss": 0.844, "step": 10443 }, { "epoch": 1.778789892191954, "grad_norm": 1.671875, "learning_rate": 7.071025585484901e-06, "loss": 0.822, "step": 10444 }, { "epoch": 1.7789613562809439, "grad_norm": 1.6328125, "learning_rate": 7.069298906113061e-06, "loss": 0.8756, "step": 10445 }, { "epoch": 1.779132820369934, "grad_norm": 1.71875, "learning_rate": 7.06757232232242e-06, "loss": 0.8601, "step": 10446 }, { "epoch": 1.7793042844589237, "grad_norm": 1.59375, "learning_rate": 7.065845834169288e-06, "loss": 0.8132, "step": 10447 }, { "epoch": 1.7794757485479136, "grad_norm": 1.734375, "learning_rate": 7.064119441709972e-06, "loss": 0.885, "step": 10448 }, { "epoch": 1.7796472126369034, "grad_norm": 1.6875, "learning_rate": 7.062393145000776e-06, "loss": 0.8066, "step": 10449 }, { "epoch": 1.7798186767258932, "grad_norm": 1.6640625, "learning_rate": 7.060666944098004e-06, "loss": 0.8237, "step": 10450 }, { "epoch": 1.779990140814883, "grad_norm": 1.65625, "learning_rate": 7.05894083905795e-06, "loss": 0.8584, "step": 10451 }, { "epoch": 1.7801616049038729, "grad_norm": 1.6640625, "learning_rate": 7.057214829936909e-06, "loss": 0.8027, "step": 10452 }, { "epoch": 1.7803330689928627, "grad_norm": 1.7734375, "learning_rate": 7.055488916791176e-06, "loss": 0.8564, "step": 10453 }, { "epoch": 1.7805045330818525, "grad_norm": 1.71875, "learning_rate": 7.053763099677038e-06, "loss": 0.9454, "step": 10454 }, { "epoch": 1.7806759971708426, "grad_norm": 1.703125, "learning_rate": 7.052037378650778e-06, "loss": 0.869, "step": 10455 }, { "epoch": 1.7808474612598324, "grad_norm": 1.6640625, "learning_rate": 7.050311753768681e-06, "loss": 0.9125, "step": 10456 }, { "epoch": 1.7810189253488222, "grad_norm": 1.671875, "learning_rate": 7.048586225087024e-06, "loss": 0.8282, "step": 10457 }, { "epoch": 1.7811903894378123, "grad_norm": 1.640625, "learning_rate": 7.0468607926620845e-06, "loss": 0.7893, "step": 10458 }, { "epoch": 1.781361853526802, "grad_norm": 1.6875, "learning_rate": 7.0451354565501364e-06, "loss": 0.9097, "step": 10459 }, { "epoch": 1.781533317615792, "grad_norm": 1.65625, "learning_rate": 7.043410216807447e-06, "loss": 0.8195, "step": 10460 }, { "epoch": 1.7817047817047817, "grad_norm": 1.6171875, "learning_rate": 7.041685073490283e-06, "loss": 0.8134, "step": 10461 }, { "epoch": 1.7818762457937716, "grad_norm": 1.59375, "learning_rate": 7.039960026654911e-06, "loss": 0.7667, "step": 10462 }, { "epoch": 1.7820477098827614, "grad_norm": 1.703125, "learning_rate": 7.038235076357587e-06, "loss": 0.8867, "step": 10463 }, { "epoch": 1.7822191739717512, "grad_norm": 1.765625, "learning_rate": 7.036510222654571e-06, "loss": 0.8679, "step": 10464 }, { "epoch": 1.782390638060741, "grad_norm": 1.703125, "learning_rate": 7.034785465602118e-06, "loss": 0.9346, "step": 10465 }, { "epoch": 1.7825621021497309, "grad_norm": 1.625, "learning_rate": 7.0330608052564744e-06, "loss": 0.7896, "step": 10466 }, { "epoch": 1.782733566238721, "grad_norm": 1.65625, "learning_rate": 7.0313362416738905e-06, "loss": 0.8179, "step": 10467 }, { "epoch": 1.7829050303277107, "grad_norm": 1.640625, "learning_rate": 7.0296117749106105e-06, "loss": 0.8323, "step": 10468 }, { "epoch": 1.7830764944167006, "grad_norm": 1.7265625, "learning_rate": 7.0278874050228775e-06, "loss": 0.8471, "step": 10469 }, { "epoch": 1.7832479585056906, "grad_norm": 1.7890625, "learning_rate": 7.026163132066927e-06, "loss": 0.857, "step": 10470 }, { "epoch": 1.7834194225946804, "grad_norm": 1.625, "learning_rate": 7.024438956098996e-06, "loss": 0.7765, "step": 10471 }, { "epoch": 1.7835908866836703, "grad_norm": 1.7109375, "learning_rate": 7.022714877175314e-06, "loss": 0.8874, "step": 10472 }, { "epoch": 1.78376235077266, "grad_norm": 1.6953125, "learning_rate": 7.020990895352112e-06, "loss": 0.8134, "step": 10473 }, { "epoch": 1.78393381486165, "grad_norm": 1.5625, "learning_rate": 7.019267010685615e-06, "loss": 0.781, "step": 10474 }, { "epoch": 1.7841052789506398, "grad_norm": 1.734375, "learning_rate": 7.017543223232043e-06, "loss": 0.8688, "step": 10475 }, { "epoch": 1.7842767430396296, "grad_norm": 1.765625, "learning_rate": 7.015819533047619e-06, "loss": 0.886, "step": 10476 }, { "epoch": 1.7844482071286194, "grad_norm": 1.7421875, "learning_rate": 7.0140959401885635e-06, "loss": 0.8504, "step": 10477 }, { "epoch": 1.7846196712176092, "grad_norm": 1.703125, "learning_rate": 7.012372444711078e-06, "loss": 0.833, "step": 10478 }, { "epoch": 1.7847911353065993, "grad_norm": 1.7734375, "learning_rate": 7.010649046671376e-06, "loss": 0.9199, "step": 10479 }, { "epoch": 1.784962599395589, "grad_norm": 1.6875, "learning_rate": 7.008925746125667e-06, "loss": 0.8332, "step": 10480 }, { "epoch": 1.785134063484579, "grad_norm": 1.6171875, "learning_rate": 7.007202543130152e-06, "loss": 0.8157, "step": 10481 }, { "epoch": 1.785305527573569, "grad_norm": 1.671875, "learning_rate": 7.005479437741032e-06, "loss": 0.8156, "step": 10482 }, { "epoch": 1.7854769916625588, "grad_norm": 1.71875, "learning_rate": 7.003756430014502e-06, "loss": 0.8972, "step": 10483 }, { "epoch": 1.7856484557515486, "grad_norm": 1.6875, "learning_rate": 7.00203352000676e-06, "loss": 0.8747, "step": 10484 }, { "epoch": 1.7858199198405385, "grad_norm": 1.7265625, "learning_rate": 7.000310707773994e-06, "loss": 0.9243, "step": 10485 }, { "epoch": 1.7859913839295283, "grad_norm": 1.609375, "learning_rate": 6.998587993372392e-06, "loss": 0.7961, "step": 10486 }, { "epoch": 1.786162848018518, "grad_norm": 1.6484375, "learning_rate": 6.996865376858137e-06, "loss": 0.8212, "step": 10487 }, { "epoch": 1.786334312107508, "grad_norm": 1.765625, "learning_rate": 6.995142858287416e-06, "loss": 0.7968, "step": 10488 }, { "epoch": 1.7865057761964978, "grad_norm": 1.8125, "learning_rate": 6.993420437716396e-06, "loss": 0.8075, "step": 10489 }, { "epoch": 1.7866772402854876, "grad_norm": 1.6796875, "learning_rate": 6.991698115201257e-06, "loss": 0.8797, "step": 10490 }, { "epoch": 1.7868487043744776, "grad_norm": 1.6328125, "learning_rate": 6.9899758907981706e-06, "loss": 0.818, "step": 10491 }, { "epoch": 1.7870201684634675, "grad_norm": 1.6875, "learning_rate": 6.988253764563307e-06, "loss": 0.7987, "step": 10492 }, { "epoch": 1.7871916325524573, "grad_norm": 1.703125, "learning_rate": 6.986531736552829e-06, "loss": 0.8601, "step": 10493 }, { "epoch": 1.787363096641447, "grad_norm": 1.7265625, "learning_rate": 6.984809806822897e-06, "loss": 0.8766, "step": 10494 }, { "epoch": 1.7875345607304371, "grad_norm": 1.65625, "learning_rate": 6.983087975429673e-06, "loss": 0.914, "step": 10495 }, { "epoch": 1.787706024819427, "grad_norm": 1.8203125, "learning_rate": 6.98136624242931e-06, "loss": 0.9354, "step": 10496 }, { "epoch": 1.7878774889084168, "grad_norm": 1.734375, "learning_rate": 6.979644607877962e-06, "loss": 0.9315, "step": 10497 }, { "epoch": 1.7880489529974066, "grad_norm": 1.6953125, "learning_rate": 6.977923071831776e-06, "loss": 0.7592, "step": 10498 }, { "epoch": 1.7882204170863965, "grad_norm": 1.84375, "learning_rate": 6.9762016343469e-06, "loss": 0.9585, "step": 10499 }, { "epoch": 1.7883918811753863, "grad_norm": 1.734375, "learning_rate": 6.974480295479476e-06, "loss": 0.8544, "step": 10500 }, { "epoch": 1.7883918811753863, "eval_loss": 0.8336145877838135, "eval_runtime": 835.9359, "eval_samples_per_second": 2.989, "eval_steps_per_second": 2.989, "step": 10500 }, { "epoch": 1.788563345264376, "grad_norm": 1.765625, "learning_rate": 6.9727590552856405e-06, "loss": 0.8694, "step": 10501 }, { "epoch": 1.788734809353366, "grad_norm": 1.671875, "learning_rate": 6.971037913821533e-06, "loss": 0.7916, "step": 10502 }, { "epoch": 1.7889062734423558, "grad_norm": 1.6875, "learning_rate": 6.9693168711432835e-06, "loss": 0.8967, "step": 10503 }, { "epoch": 1.7890777375313458, "grad_norm": 1.671875, "learning_rate": 6.967595927307025e-06, "loss": 0.7497, "step": 10504 }, { "epoch": 1.7892492016203356, "grad_norm": 1.65625, "learning_rate": 6.96587508236888e-06, "loss": 0.8398, "step": 10505 }, { "epoch": 1.7894206657093255, "grad_norm": 1.8125, "learning_rate": 6.964154336384976e-06, "loss": 0.8664, "step": 10506 }, { "epoch": 1.7895921297983155, "grad_norm": 1.6875, "learning_rate": 6.962433689411431e-06, "loss": 0.8373, "step": 10507 }, { "epoch": 1.7897635938873053, "grad_norm": 1.6796875, "learning_rate": 6.96071314150436e-06, "loss": 0.8511, "step": 10508 }, { "epoch": 1.7899350579762952, "grad_norm": 1.7109375, "learning_rate": 6.9589926927198805e-06, "loss": 0.8263, "step": 10509 }, { "epoch": 1.790106522065285, "grad_norm": 1.7265625, "learning_rate": 6.9572723431141e-06, "loss": 0.934, "step": 10510 }, { "epoch": 1.7902779861542748, "grad_norm": 1.71875, "learning_rate": 6.955552092743125e-06, "loss": 0.8711, "step": 10511 }, { "epoch": 1.7904494502432646, "grad_norm": 1.65625, "learning_rate": 6.953831941663065e-06, "loss": 0.836, "step": 10512 }, { "epoch": 1.7906209143322545, "grad_norm": 1.578125, "learning_rate": 6.9521118899300135e-06, "loss": 0.8665, "step": 10513 }, { "epoch": 1.7907923784212443, "grad_norm": 1.671875, "learning_rate": 6.95039193760007e-06, "loss": 0.816, "step": 10514 }, { "epoch": 1.790963842510234, "grad_norm": 1.7890625, "learning_rate": 6.948672084729328e-06, "loss": 0.8766, "step": 10515 }, { "epoch": 1.7911353065992242, "grad_norm": 1.640625, "learning_rate": 6.946952331373881e-06, "loss": 0.802, "step": 10516 }, { "epoch": 1.791306770688214, "grad_norm": 1.6953125, "learning_rate": 6.945232677589815e-06, "loss": 0.8039, "step": 10517 }, { "epoch": 1.7914782347772038, "grad_norm": 1.734375, "learning_rate": 6.943513123433214e-06, "loss": 0.8979, "step": 10518 }, { "epoch": 1.7916496988661939, "grad_norm": 1.734375, "learning_rate": 6.94179366896016e-06, "loss": 0.8817, "step": 10519 }, { "epoch": 1.7918211629551837, "grad_norm": 1.6640625, "learning_rate": 6.94007431422673e-06, "loss": 0.8062, "step": 10520 }, { "epoch": 1.7919926270441735, "grad_norm": 1.703125, "learning_rate": 6.938355059289e-06, "loss": 0.8331, "step": 10521 }, { "epoch": 1.7921640911331633, "grad_norm": 1.6640625, "learning_rate": 6.936635904203039e-06, "loss": 0.812, "step": 10522 }, { "epoch": 1.7923355552221532, "grad_norm": 1.671875, "learning_rate": 6.934916849024922e-06, "loss": 0.9065, "step": 10523 }, { "epoch": 1.792507019311143, "grad_norm": 1.734375, "learning_rate": 6.933197893810706e-06, "loss": 0.8648, "step": 10524 }, { "epoch": 1.7926784834001328, "grad_norm": 1.7578125, "learning_rate": 6.931479038616453e-06, "loss": 0.8835, "step": 10525 }, { "epoch": 1.7928499474891226, "grad_norm": 1.6796875, "learning_rate": 6.929760283498225e-06, "loss": 0.893, "step": 10526 }, { "epoch": 1.7930214115781125, "grad_norm": 1.65625, "learning_rate": 6.928041628512074e-06, "loss": 0.8337, "step": 10527 }, { "epoch": 1.7931928756671025, "grad_norm": 1.7109375, "learning_rate": 6.926323073714055e-06, "loss": 0.8407, "step": 10528 }, { "epoch": 1.7933643397560923, "grad_norm": 1.7265625, "learning_rate": 6.924604619160214e-06, "loss": 0.8472, "step": 10529 }, { "epoch": 1.7935358038450822, "grad_norm": 1.5859375, "learning_rate": 6.922886264906597e-06, "loss": 0.8395, "step": 10530 }, { "epoch": 1.7937072679340722, "grad_norm": 1.6640625, "learning_rate": 6.921168011009247e-06, "loss": 0.8467, "step": 10531 }, { "epoch": 1.793878732023062, "grad_norm": 1.625, "learning_rate": 6.919449857524201e-06, "loss": 0.8281, "step": 10532 }, { "epoch": 1.7940501961120519, "grad_norm": 1.71875, "learning_rate": 6.917731804507497e-06, "loss": 0.85, "step": 10533 }, { "epoch": 1.7942216602010417, "grad_norm": 1.7421875, "learning_rate": 6.916013852015165e-06, "loss": 0.8706, "step": 10534 }, { "epoch": 1.7943931242900315, "grad_norm": 1.71875, "learning_rate": 6.914296000103238e-06, "loss": 0.8551, "step": 10535 }, { "epoch": 1.7945645883790213, "grad_norm": 1.7421875, "learning_rate": 6.9125782488277345e-06, "loss": 0.7838, "step": 10536 }, { "epoch": 1.7947360524680112, "grad_norm": 1.6015625, "learning_rate": 6.910860598244682e-06, "loss": 0.8028, "step": 10537 }, { "epoch": 1.794907516557001, "grad_norm": 1.6640625, "learning_rate": 6.909143048410094e-06, "loss": 0.8455, "step": 10538 }, { "epoch": 1.7950789806459908, "grad_norm": 1.6640625, "learning_rate": 6.907425599379993e-06, "loss": 0.8057, "step": 10539 }, { "epoch": 1.7952504447349809, "grad_norm": 1.7734375, "learning_rate": 6.905708251210388e-06, "loss": 0.788, "step": 10540 }, { "epoch": 1.7954219088239707, "grad_norm": 1.8515625, "learning_rate": 6.90399100395729e-06, "loss": 0.8614, "step": 10541 }, { "epoch": 1.7955933729129605, "grad_norm": 1.59375, "learning_rate": 6.902273857676703e-06, "loss": 0.8715, "step": 10542 }, { "epoch": 1.7957648370019506, "grad_norm": 1.71875, "learning_rate": 6.900556812424631e-06, "loss": 0.9178, "step": 10543 }, { "epoch": 1.7959363010909404, "grad_norm": 1.6796875, "learning_rate": 6.898839868257072e-06, "loss": 0.7955, "step": 10544 }, { "epoch": 1.7961077651799302, "grad_norm": 1.703125, "learning_rate": 6.897123025230023e-06, "loss": 0.8021, "step": 10545 }, { "epoch": 1.79627922926892, "grad_norm": 1.609375, "learning_rate": 6.89540628339948e-06, "loss": 0.7967, "step": 10546 }, { "epoch": 1.7964506933579099, "grad_norm": 1.7578125, "learning_rate": 6.893689642821426e-06, "loss": 0.9277, "step": 10547 }, { "epoch": 1.7966221574468997, "grad_norm": 1.703125, "learning_rate": 6.89197310355185e-06, "loss": 0.8216, "step": 10548 }, { "epoch": 1.7967936215358895, "grad_norm": 1.625, "learning_rate": 6.890256665646735e-06, "loss": 0.7643, "step": 10549 }, { "epoch": 1.7969650856248793, "grad_norm": 1.7890625, "learning_rate": 6.88854032916206e-06, "loss": 0.8166, "step": 10550 }, { "epoch": 1.7971365497138692, "grad_norm": 1.609375, "learning_rate": 6.886824094153801e-06, "loss": 0.8987, "step": 10551 }, { "epoch": 1.7973080138028592, "grad_norm": 1.71875, "learning_rate": 6.885107960677933e-06, "loss": 0.8656, "step": 10552 }, { "epoch": 1.797479477891849, "grad_norm": 1.828125, "learning_rate": 6.883391928790423e-06, "loss": 0.8586, "step": 10553 }, { "epoch": 1.7976509419808389, "grad_norm": 1.7421875, "learning_rate": 6.881675998547238e-06, "loss": 0.9042, "step": 10554 }, { "epoch": 1.797822406069829, "grad_norm": 1.65625, "learning_rate": 6.879960170004341e-06, "loss": 0.852, "step": 10555 }, { "epoch": 1.7979938701588187, "grad_norm": 1.75, "learning_rate": 6.878244443217693e-06, "loss": 0.8558, "step": 10556 }, { "epoch": 1.7981653342478086, "grad_norm": 1.7265625, "learning_rate": 6.876528818243249e-06, "loss": 0.9334, "step": 10557 }, { "epoch": 1.7983367983367984, "grad_norm": 1.71875, "learning_rate": 6.874813295136964e-06, "loss": 0.8312, "step": 10558 }, { "epoch": 1.7985082624257882, "grad_norm": 1.7109375, "learning_rate": 6.873097873954783e-06, "loss": 0.7822, "step": 10559 }, { "epoch": 1.798679726514778, "grad_norm": 1.59375, "learning_rate": 6.871382554752655e-06, "loss": 0.8809, "step": 10560 }, { "epoch": 1.7988511906037679, "grad_norm": 1.640625, "learning_rate": 6.869667337586524e-06, "loss": 0.8643, "step": 10561 }, { "epoch": 1.7990226546927577, "grad_norm": 1.75, "learning_rate": 6.8679522225123286e-06, "loss": 0.8073, "step": 10562 }, { "epoch": 1.7991941187817475, "grad_norm": 1.625, "learning_rate": 6.8662372095860055e-06, "loss": 0.8247, "step": 10563 }, { "epoch": 1.7993655828707376, "grad_norm": 1.6171875, "learning_rate": 6.864522298863488e-06, "loss": 0.8408, "step": 10564 }, { "epoch": 1.7995370469597274, "grad_norm": 1.75, "learning_rate": 6.862807490400705e-06, "loss": 0.8944, "step": 10565 }, { "epoch": 1.7997085110487172, "grad_norm": 1.7734375, "learning_rate": 6.8610927842535825e-06, "loss": 0.8626, "step": 10566 }, { "epoch": 1.7998799751377073, "grad_norm": 1.671875, "learning_rate": 6.859378180478046e-06, "loss": 0.8183, "step": 10567 }, { "epoch": 1.800051439226697, "grad_norm": 1.7421875, "learning_rate": 6.857663679130013e-06, "loss": 0.8266, "step": 10568 }, { "epoch": 1.800222903315687, "grad_norm": 1.7109375, "learning_rate": 6.855949280265402e-06, "loss": 0.8345, "step": 10569 }, { "epoch": 1.8003943674046767, "grad_norm": 1.7109375, "learning_rate": 6.854234983940123e-06, "loss": 0.8341, "step": 10570 }, { "epoch": 1.8005658314936666, "grad_norm": 1.7734375, "learning_rate": 6.8525207902100865e-06, "loss": 0.8453, "step": 10571 }, { "epoch": 1.8007372955826564, "grad_norm": 1.7421875, "learning_rate": 6.850806699131198e-06, "loss": 0.9124, "step": 10572 }, { "epoch": 1.8009087596716462, "grad_norm": 1.625, "learning_rate": 6.849092710759364e-06, "loss": 0.826, "step": 10573 }, { "epoch": 1.801080223760636, "grad_norm": 1.71875, "learning_rate": 6.847378825150481e-06, "loss": 0.9096, "step": 10574 }, { "epoch": 1.8012516878496259, "grad_norm": 1.671875, "learning_rate": 6.845665042360445e-06, "loss": 0.8523, "step": 10575 }, { "epoch": 1.801423151938616, "grad_norm": 1.6328125, "learning_rate": 6.843951362445153e-06, "loss": 0.916, "step": 10576 }, { "epoch": 1.8015946160276057, "grad_norm": 1.7109375, "learning_rate": 6.842237785460487e-06, "loss": 0.8603, "step": 10577 }, { "epoch": 1.8017660801165956, "grad_norm": 1.7265625, "learning_rate": 6.840524311462341e-06, "loss": 0.8717, "step": 10578 }, { "epoch": 1.8019375442055856, "grad_norm": 1.703125, "learning_rate": 6.838810940506595e-06, "loss": 0.82, "step": 10579 }, { "epoch": 1.8021090082945754, "grad_norm": 1.625, "learning_rate": 6.837097672649126e-06, "loss": 0.7833, "step": 10580 }, { "epoch": 1.8022804723835653, "grad_norm": 1.8359375, "learning_rate": 6.8353845079458195e-06, "loss": 0.8223, "step": 10581 }, { "epoch": 1.802451936472555, "grad_norm": 1.8203125, "learning_rate": 6.833671446452535e-06, "loss": 0.8815, "step": 10582 }, { "epoch": 1.802623400561545, "grad_norm": 1.65625, "learning_rate": 6.831958488225149e-06, "loss": 0.8606, "step": 10583 }, { "epoch": 1.8027948646505347, "grad_norm": 1.625, "learning_rate": 6.8302456333195255e-06, "loss": 0.8732, "step": 10584 }, { "epoch": 1.8029663287395246, "grad_norm": 1.6640625, "learning_rate": 6.828532881791528e-06, "loss": 0.8402, "step": 10585 }, { "epoch": 1.8031377928285144, "grad_norm": 1.6796875, "learning_rate": 6.826820233697015e-06, "loss": 0.8685, "step": 10586 }, { "epoch": 1.8033092569175042, "grad_norm": 1.65625, "learning_rate": 6.825107689091846e-06, "loss": 0.8481, "step": 10587 }, { "epoch": 1.803480721006494, "grad_norm": 1.65625, "learning_rate": 6.823395248031867e-06, "loss": 0.8237, "step": 10588 }, { "epoch": 1.803652185095484, "grad_norm": 1.6875, "learning_rate": 6.821682910572934e-06, "loss": 0.9003, "step": 10589 }, { "epoch": 1.803823649184474, "grad_norm": 1.6328125, "learning_rate": 6.81997067677089e-06, "loss": 0.8977, "step": 10590 }, { "epoch": 1.8039951132734637, "grad_norm": 1.609375, "learning_rate": 6.818258546681575e-06, "loss": 0.7993, "step": 10591 }, { "epoch": 1.8041665773624538, "grad_norm": 1.703125, "learning_rate": 6.816546520360836e-06, "loss": 0.9014, "step": 10592 }, { "epoch": 1.8043380414514436, "grad_norm": 1.71875, "learning_rate": 6.814834597864497e-06, "loss": 0.8515, "step": 10593 }, { "epoch": 1.8045095055404334, "grad_norm": 1.6953125, "learning_rate": 6.8131227792483956e-06, "loss": 0.9445, "step": 10594 }, { "epoch": 1.8046809696294233, "grad_norm": 1.71875, "learning_rate": 6.8114110645683625e-06, "loss": 0.8309, "step": 10595 }, { "epoch": 1.804852433718413, "grad_norm": 1.6015625, "learning_rate": 6.809699453880223e-06, "loss": 0.8086, "step": 10596 }, { "epoch": 1.805023897807403, "grad_norm": 1.6875, "learning_rate": 6.807987947239796e-06, "loss": 0.9167, "step": 10597 }, { "epoch": 1.8051953618963927, "grad_norm": 1.6484375, "learning_rate": 6.806276544702902e-06, "loss": 0.8983, "step": 10598 }, { "epoch": 1.8053668259853826, "grad_norm": 1.59375, "learning_rate": 6.804565246325358e-06, "loss": 0.8876, "step": 10599 }, { "epoch": 1.8055382900743724, "grad_norm": 1.625, "learning_rate": 6.802854052162973e-06, "loss": 0.846, "step": 10600 }, { "epoch": 1.8057097541633624, "grad_norm": 1.6640625, "learning_rate": 6.801142962271556e-06, "loss": 0.8577, "step": 10601 }, { "epoch": 1.8058812182523523, "grad_norm": 1.609375, "learning_rate": 6.799431976706914e-06, "loss": 0.8461, "step": 10602 }, { "epoch": 1.806052682341342, "grad_norm": 1.5859375, "learning_rate": 6.797721095524847e-06, "loss": 0.8367, "step": 10603 }, { "epoch": 1.8062241464303321, "grad_norm": 1.734375, "learning_rate": 6.796010318781157e-06, "loss": 0.8702, "step": 10604 }, { "epoch": 1.806395610519322, "grad_norm": 1.7421875, "learning_rate": 6.794299646531631e-06, "loss": 0.8586, "step": 10605 }, { "epoch": 1.8065670746083118, "grad_norm": 1.6015625, "learning_rate": 6.792589078832066e-06, "loss": 0.8048, "step": 10606 }, { "epoch": 1.8067385386973016, "grad_norm": 1.75, "learning_rate": 6.79087861573825e-06, "loss": 0.8556, "step": 10607 }, { "epoch": 1.8069100027862914, "grad_norm": 1.59375, "learning_rate": 6.789168257305967e-06, "loss": 0.8026, "step": 10608 }, { "epoch": 1.8070814668752813, "grad_norm": 1.5859375, "learning_rate": 6.787458003590996e-06, "loss": 0.7729, "step": 10609 }, { "epoch": 1.807252930964271, "grad_norm": 1.734375, "learning_rate": 6.78574785464912e-06, "loss": 0.8806, "step": 10610 }, { "epoch": 1.807424395053261, "grad_norm": 1.6328125, "learning_rate": 6.784037810536108e-06, "loss": 0.86, "step": 10611 }, { "epoch": 1.8075958591422507, "grad_norm": 1.8046875, "learning_rate": 6.782327871307733e-06, "loss": 0.9071, "step": 10612 }, { "epoch": 1.8077673232312408, "grad_norm": 1.6640625, "learning_rate": 6.780618037019764e-06, "loss": 0.8872, "step": 10613 }, { "epoch": 1.8079387873202306, "grad_norm": 1.6796875, "learning_rate": 6.778908307727962e-06, "loss": 0.8096, "step": 10614 }, { "epoch": 1.8081102514092204, "grad_norm": 1.71875, "learning_rate": 6.777198683488093e-06, "loss": 0.8722, "step": 10615 }, { "epoch": 1.8082817154982105, "grad_norm": 1.6328125, "learning_rate": 6.7754891643559095e-06, "loss": 0.8332, "step": 10616 }, { "epoch": 1.8084531795872003, "grad_norm": 1.640625, "learning_rate": 6.773779750387166e-06, "loss": 0.7589, "step": 10617 }, { "epoch": 1.8086246436761901, "grad_norm": 1.7421875, "learning_rate": 6.772070441637614e-06, "loss": 0.8632, "step": 10618 }, { "epoch": 1.80879610776518, "grad_norm": 1.75, "learning_rate": 6.7703612381629994e-06, "loss": 0.8868, "step": 10619 }, { "epoch": 1.8089675718541698, "grad_norm": 1.734375, "learning_rate": 6.768652140019067e-06, "loss": 0.8385, "step": 10620 }, { "epoch": 1.8091390359431596, "grad_norm": 1.625, "learning_rate": 6.7669431472615565e-06, "loss": 0.8399, "step": 10621 }, { "epoch": 1.8093105000321494, "grad_norm": 1.6796875, "learning_rate": 6.765234259946204e-06, "loss": 0.9131, "step": 10622 }, { "epoch": 1.8094819641211393, "grad_norm": 1.78125, "learning_rate": 6.763525478128744e-06, "loss": 0.8054, "step": 10623 }, { "epoch": 1.809653428210129, "grad_norm": 1.6640625, "learning_rate": 6.7618168018649024e-06, "loss": 0.8744, "step": 10624 }, { "epoch": 1.8098248922991191, "grad_norm": 1.6640625, "learning_rate": 6.760108231210411e-06, "loss": 0.836, "step": 10625 }, { "epoch": 1.809996356388109, "grad_norm": 1.703125, "learning_rate": 6.758399766220992e-06, "loss": 0.8886, "step": 10626 }, { "epoch": 1.8101678204770988, "grad_norm": 1.78125, "learning_rate": 6.756691406952367e-06, "loss": 0.7741, "step": 10627 }, { "epoch": 1.8103392845660888, "grad_norm": 1.703125, "learning_rate": 6.754983153460244e-06, "loss": 0.8985, "step": 10628 }, { "epoch": 1.8105107486550787, "grad_norm": 1.7421875, "learning_rate": 6.7532750058003395e-06, "loss": 0.7809, "step": 10629 }, { "epoch": 1.8106822127440685, "grad_norm": 1.7734375, "learning_rate": 6.751566964028363e-06, "loss": 0.9026, "step": 10630 }, { "epoch": 1.8108536768330583, "grad_norm": 1.703125, "learning_rate": 6.749859028200021e-06, "loss": 0.8978, "step": 10631 }, { "epoch": 1.8110251409220481, "grad_norm": 1.7421875, "learning_rate": 6.7481511983710125e-06, "loss": 0.8433, "step": 10632 }, { "epoch": 1.811196605011038, "grad_norm": 1.640625, "learning_rate": 6.746443474597041e-06, "loss": 0.8048, "step": 10633 }, { "epoch": 1.8113680691000278, "grad_norm": 1.7578125, "learning_rate": 6.744735856933799e-06, "loss": 0.8588, "step": 10634 }, { "epoch": 1.8115395331890176, "grad_norm": 1.65625, "learning_rate": 6.7430283454369795e-06, "loss": 0.8494, "step": 10635 }, { "epoch": 1.8117109972780074, "grad_norm": 1.6328125, "learning_rate": 6.741320940162271e-06, "loss": 0.8099, "step": 10636 }, { "epoch": 1.8118824613669975, "grad_norm": 1.6640625, "learning_rate": 6.7396136411653566e-06, "loss": 0.827, "step": 10637 }, { "epoch": 1.8120539254559873, "grad_norm": 1.671875, "learning_rate": 6.737906448501923e-06, "loss": 0.7802, "step": 10638 }, { "epoch": 1.8122253895449771, "grad_norm": 1.65625, "learning_rate": 6.73619936222764e-06, "loss": 0.8083, "step": 10639 }, { "epoch": 1.8123968536339672, "grad_norm": 1.75, "learning_rate": 6.734492382398184e-06, "loss": 0.8436, "step": 10640 }, { "epoch": 1.812568317722957, "grad_norm": 1.8125, "learning_rate": 6.7327855090692305e-06, "loss": 0.845, "step": 10641 }, { "epoch": 1.8127397818119468, "grad_norm": 1.6796875, "learning_rate": 6.731078742296444e-06, "loss": 0.8076, "step": 10642 }, { "epoch": 1.8129112459009367, "grad_norm": 1.671875, "learning_rate": 6.72937208213549e-06, "loss": 0.8896, "step": 10643 }, { "epoch": 1.8130827099899265, "grad_norm": 1.7109375, "learning_rate": 6.727665528642029e-06, "loss": 0.8489, "step": 10644 }, { "epoch": 1.8132541740789163, "grad_norm": 1.6484375, "learning_rate": 6.725959081871717e-06, "loss": 0.8434, "step": 10645 }, { "epoch": 1.8134256381679061, "grad_norm": 1.7109375, "learning_rate": 6.724252741880208e-06, "loss": 0.884, "step": 10646 }, { "epoch": 1.813597102256896, "grad_norm": 1.71875, "learning_rate": 6.722546508723154e-06, "loss": 0.8877, "step": 10647 }, { "epoch": 1.8137685663458858, "grad_norm": 1.75, "learning_rate": 6.720840382456198e-06, "loss": 0.8864, "step": 10648 }, { "epoch": 1.8139400304348758, "grad_norm": 1.703125, "learning_rate": 6.719134363134986e-06, "loss": 0.8983, "step": 10649 }, { "epoch": 1.8141114945238657, "grad_norm": 1.8125, "learning_rate": 6.717428450815159e-06, "loss": 0.8891, "step": 10650 }, { "epoch": 1.8142829586128555, "grad_norm": 1.6953125, "learning_rate": 6.715722645552349e-06, "loss": 0.8517, "step": 10651 }, { "epoch": 1.8144544227018455, "grad_norm": 1.546875, "learning_rate": 6.714016947402191e-06, "loss": 0.8189, "step": 10652 }, { "epoch": 1.8146258867908354, "grad_norm": 1.7109375, "learning_rate": 6.712311356420315e-06, "loss": 0.7795, "step": 10653 }, { "epoch": 1.8147973508798252, "grad_norm": 1.640625, "learning_rate": 6.710605872662346e-06, "loss": 0.8564, "step": 10654 }, { "epoch": 1.814968814968815, "grad_norm": 1.6328125, "learning_rate": 6.708900496183906e-06, "loss": 0.8641, "step": 10655 }, { "epoch": 1.8151402790578048, "grad_norm": 1.7578125, "learning_rate": 6.707195227040612e-06, "loss": 0.8355, "step": 10656 }, { "epoch": 1.8153117431467947, "grad_norm": 1.71875, "learning_rate": 6.705490065288083e-06, "loss": 0.7409, "step": 10657 }, { "epoch": 1.8154832072357845, "grad_norm": 1.75, "learning_rate": 6.7037850109819294e-06, "loss": 0.8776, "step": 10658 }, { "epoch": 1.8156546713247743, "grad_norm": 1.78125, "learning_rate": 6.702080064177759e-06, "loss": 0.7984, "step": 10659 }, { "epoch": 1.8158261354137641, "grad_norm": 1.7421875, "learning_rate": 6.700375224931175e-06, "loss": 0.8178, "step": 10660 }, { "epoch": 1.8159975995027542, "grad_norm": 1.71875, "learning_rate": 6.698670493297781e-06, "loss": 0.7873, "step": 10661 }, { "epoch": 1.816169063591744, "grad_norm": 1.703125, "learning_rate": 6.696965869333176e-06, "loss": 0.8951, "step": 10662 }, { "epoch": 1.8163405276807338, "grad_norm": 1.71875, "learning_rate": 6.69526135309295e-06, "loss": 0.8174, "step": 10663 }, { "epoch": 1.8165119917697239, "grad_norm": 1.6484375, "learning_rate": 6.693556944632696e-06, "loss": 0.7665, "step": 10664 }, { "epoch": 1.8166834558587137, "grad_norm": 1.7265625, "learning_rate": 6.691852644008e-06, "loss": 0.8757, "step": 10665 }, { "epoch": 1.8168549199477035, "grad_norm": 1.6796875, "learning_rate": 6.690148451274447e-06, "loss": 0.8875, "step": 10666 }, { "epoch": 1.8170263840366934, "grad_norm": 1.625, "learning_rate": 6.688444366487618e-06, "loss": 0.7827, "step": 10667 }, { "epoch": 1.8171978481256832, "grad_norm": 1.671875, "learning_rate": 6.686740389703087e-06, "loss": 0.8972, "step": 10668 }, { "epoch": 1.817369312214673, "grad_norm": 1.6875, "learning_rate": 6.685036520976429e-06, "loss": 0.8712, "step": 10669 }, { "epoch": 1.8175407763036628, "grad_norm": 1.6796875, "learning_rate": 6.683332760363212e-06, "loss": 0.8629, "step": 10670 }, { "epoch": 1.8177122403926527, "grad_norm": 1.75, "learning_rate": 6.681629107919002e-06, "loss": 0.8683, "step": 10671 }, { "epoch": 1.8178837044816425, "grad_norm": 1.7109375, "learning_rate": 6.679925563699365e-06, "loss": 0.8437, "step": 10672 }, { "epoch": 1.8180551685706325, "grad_norm": 1.7734375, "learning_rate": 6.678222127759862e-06, "loss": 0.8247, "step": 10673 }, { "epoch": 1.8182266326596224, "grad_norm": 1.7265625, "learning_rate": 6.676518800156039e-06, "loss": 0.9106, "step": 10674 }, { "epoch": 1.8183980967486122, "grad_norm": 1.578125, "learning_rate": 6.674815580943453e-06, "loss": 0.8461, "step": 10675 }, { "epoch": 1.8185695608376022, "grad_norm": 1.7421875, "learning_rate": 6.673112470177653e-06, "loss": 0.8636, "step": 10676 }, { "epoch": 1.818741024926592, "grad_norm": 1.8203125, "learning_rate": 6.671409467914183e-06, "loss": 0.8077, "step": 10677 }, { "epoch": 1.818912489015582, "grad_norm": 1.640625, "learning_rate": 6.669706574208585e-06, "loss": 0.8084, "step": 10678 }, { "epoch": 1.8190839531045717, "grad_norm": 1.640625, "learning_rate": 6.668003789116393e-06, "loss": 0.789, "step": 10679 }, { "epoch": 1.8192554171935615, "grad_norm": 1.59375, "learning_rate": 6.666301112693147e-06, "loss": 0.8396, "step": 10680 }, { "epoch": 1.8194268812825514, "grad_norm": 1.7265625, "learning_rate": 6.664598544994376e-06, "loss": 0.9069, "step": 10681 }, { "epoch": 1.8195983453715412, "grad_norm": 1.65625, "learning_rate": 6.662896086075607e-06, "loss": 0.8007, "step": 10682 }, { "epoch": 1.819769809460531, "grad_norm": 1.640625, "learning_rate": 6.6611937359923615e-06, "loss": 0.8241, "step": 10683 }, { "epoch": 1.8199412735495208, "grad_norm": 1.7109375, "learning_rate": 6.659491494800161e-06, "loss": 0.8853, "step": 10684 }, { "epoch": 1.8201127376385107, "grad_norm": 1.71875, "learning_rate": 6.657789362554527e-06, "loss": 0.8538, "step": 10685 }, { "epoch": 1.8202842017275007, "grad_norm": 1.703125, "learning_rate": 6.656087339310964e-06, "loss": 0.797, "step": 10686 }, { "epoch": 1.8204556658164905, "grad_norm": 1.71875, "learning_rate": 6.654385425124981e-06, "loss": 0.8743, "step": 10687 }, { "epoch": 1.8206271299054804, "grad_norm": 1.8203125, "learning_rate": 6.652683620052092e-06, "loss": 0.8449, "step": 10688 }, { "epoch": 1.8207985939944704, "grad_norm": 1.6640625, "learning_rate": 6.6509819241477924e-06, "loss": 0.7926, "step": 10689 }, { "epoch": 1.8209700580834602, "grad_norm": 1.765625, "learning_rate": 6.649280337467585e-06, "loss": 0.8931, "step": 10690 }, { "epoch": 1.82114152217245, "grad_norm": 1.6875, "learning_rate": 6.6475788600669635e-06, "loss": 0.8234, "step": 10691 }, { "epoch": 1.82131298626144, "grad_norm": 1.625, "learning_rate": 6.645877492001419e-06, "loss": 0.8517, "step": 10692 }, { "epoch": 1.8214844503504297, "grad_norm": 1.640625, "learning_rate": 6.64417623332644e-06, "loss": 0.8323, "step": 10693 }, { "epoch": 1.8216559144394195, "grad_norm": 1.75, "learning_rate": 6.6424750840975115e-06, "loss": 0.8254, "step": 10694 }, { "epoch": 1.8218273785284094, "grad_norm": 1.5546875, "learning_rate": 6.640774044370113e-06, "loss": 0.7968, "step": 10695 }, { "epoch": 1.8219988426173992, "grad_norm": 1.6796875, "learning_rate": 6.6390731141997255e-06, "loss": 0.7962, "step": 10696 }, { "epoch": 1.822170306706389, "grad_norm": 1.7109375, "learning_rate": 6.6373722936418176e-06, "loss": 0.8424, "step": 10697 }, { "epoch": 1.822341770795379, "grad_norm": 1.6875, "learning_rate": 6.6356715827518615e-06, "loss": 0.9017, "step": 10698 }, { "epoch": 1.822513234884369, "grad_norm": 1.703125, "learning_rate": 6.633970981585323e-06, "loss": 0.8439, "step": 10699 }, { "epoch": 1.8226846989733587, "grad_norm": 1.8203125, "learning_rate": 6.632270490197667e-06, "loss": 0.8899, "step": 10700 }, { "epoch": 1.8228561630623488, "grad_norm": 1.671875, "learning_rate": 6.630570108644352e-06, "loss": 0.8732, "step": 10701 }, { "epoch": 1.8230276271513386, "grad_norm": 1.578125, "learning_rate": 6.628869836980833e-06, "loss": 0.7281, "step": 10702 }, { "epoch": 1.8231990912403284, "grad_norm": 1.65625, "learning_rate": 6.627169675262563e-06, "loss": 0.8485, "step": 10703 }, { "epoch": 1.8233705553293182, "grad_norm": 1.6875, "learning_rate": 6.625469623544994e-06, "loss": 0.8677, "step": 10704 }, { "epoch": 1.823542019418308, "grad_norm": 1.828125, "learning_rate": 6.623769681883565e-06, "loss": 0.8255, "step": 10705 }, { "epoch": 1.823713483507298, "grad_norm": 1.796875, "learning_rate": 6.62206985033372e-06, "loss": 0.897, "step": 10706 }, { "epoch": 1.8238849475962877, "grad_norm": 1.6875, "learning_rate": 6.620370128950898e-06, "loss": 0.8786, "step": 10707 }, { "epoch": 1.8240564116852775, "grad_norm": 1.7265625, "learning_rate": 6.618670517790535e-06, "loss": 0.8853, "step": 10708 }, { "epoch": 1.8242278757742674, "grad_norm": 1.796875, "learning_rate": 6.6169710169080585e-06, "loss": 0.8861, "step": 10709 }, { "epoch": 1.8243993398632574, "grad_norm": 1.625, "learning_rate": 6.615271626358895e-06, "loss": 0.8048, "step": 10710 }, { "epoch": 1.8245708039522472, "grad_norm": 1.71875, "learning_rate": 6.613572346198469e-06, "loss": 0.8505, "step": 10711 }, { "epoch": 1.824742268041237, "grad_norm": 1.7109375, "learning_rate": 6.611873176482201e-06, "loss": 0.8112, "step": 10712 }, { "epoch": 1.8249137321302271, "grad_norm": 1.671875, "learning_rate": 6.610174117265507e-06, "loss": 0.8226, "step": 10713 }, { "epoch": 1.825085196219217, "grad_norm": 1.6015625, "learning_rate": 6.608475168603801e-06, "loss": 0.863, "step": 10714 }, { "epoch": 1.8252566603082068, "grad_norm": 1.765625, "learning_rate": 6.606776330552491e-06, "loss": 0.8547, "step": 10715 }, { "epoch": 1.8254281243971966, "grad_norm": 1.6328125, "learning_rate": 6.605077603166981e-06, "loss": 0.7697, "step": 10716 }, { "epoch": 1.8255995884861864, "grad_norm": 1.65625, "learning_rate": 6.603378986502677e-06, "loss": 0.8732, "step": 10717 }, { "epoch": 1.8257710525751762, "grad_norm": 1.59375, "learning_rate": 6.6016804806149715e-06, "loss": 0.8357, "step": 10718 }, { "epoch": 1.825942516664166, "grad_norm": 1.7265625, "learning_rate": 6.5999820855592675e-06, "loss": 0.8082, "step": 10719 }, { "epoch": 1.826113980753156, "grad_norm": 1.6875, "learning_rate": 6.598283801390948e-06, "loss": 0.9118, "step": 10720 }, { "epoch": 1.8262854448421457, "grad_norm": 1.6796875, "learning_rate": 6.596585628165404e-06, "loss": 0.8519, "step": 10721 }, { "epoch": 1.8264569089311358, "grad_norm": 1.5859375, "learning_rate": 6.594887565938018e-06, "loss": 0.7778, "step": 10722 }, { "epoch": 1.8266283730201256, "grad_norm": 1.6796875, "learning_rate": 6.593189614764171e-06, "loss": 0.893, "step": 10723 }, { "epoch": 1.8267998371091154, "grad_norm": 1.6640625, "learning_rate": 6.591491774699239e-06, "loss": 0.9023, "step": 10724 }, { "epoch": 1.8269713011981055, "grad_norm": 1.7421875, "learning_rate": 6.589794045798596e-06, "loss": 0.8617, "step": 10725 }, { "epoch": 1.8271427652870953, "grad_norm": 1.65625, "learning_rate": 6.588096428117608e-06, "loss": 0.8279, "step": 10726 }, { "epoch": 1.8273142293760851, "grad_norm": 1.671875, "learning_rate": 6.586398921711646e-06, "loss": 0.8542, "step": 10727 }, { "epoch": 1.827485693465075, "grad_norm": 1.734375, "learning_rate": 6.5847015266360706e-06, "loss": 0.8184, "step": 10728 }, { "epoch": 1.8276571575540648, "grad_norm": 1.6796875, "learning_rate": 6.5830042429462386e-06, "loss": 0.81, "step": 10729 }, { "epoch": 1.8278286216430546, "grad_norm": 1.7421875, "learning_rate": 6.581307070697505e-06, "loss": 0.8955, "step": 10730 }, { "epoch": 1.8280000857320444, "grad_norm": 1.7265625, "learning_rate": 6.579610009945225e-06, "loss": 0.9194, "step": 10731 }, { "epoch": 1.8281715498210342, "grad_norm": 1.59375, "learning_rate": 6.57791306074474e-06, "loss": 0.8302, "step": 10732 }, { "epoch": 1.828343013910024, "grad_norm": 1.625, "learning_rate": 6.576216223151395e-06, "loss": 0.8272, "step": 10733 }, { "epoch": 1.8285144779990141, "grad_norm": 1.796875, "learning_rate": 6.57451949722053e-06, "loss": 0.8894, "step": 10734 }, { "epoch": 1.828685942088004, "grad_norm": 1.734375, "learning_rate": 6.572822883007486e-06, "loss": 0.932, "step": 10735 }, { "epoch": 1.8288574061769938, "grad_norm": 1.640625, "learning_rate": 6.571126380567594e-06, "loss": 0.8385, "step": 10736 }, { "epoch": 1.8290288702659838, "grad_norm": 1.7265625, "learning_rate": 6.569429989956182e-06, "loss": 0.8257, "step": 10737 }, { "epoch": 1.8292003343549736, "grad_norm": 1.796875, "learning_rate": 6.567733711228577e-06, "loss": 0.9127, "step": 10738 }, { "epoch": 1.8293717984439635, "grad_norm": 1.7265625, "learning_rate": 6.566037544440098e-06, "loss": 0.8548, "step": 10739 }, { "epoch": 1.8295432625329533, "grad_norm": 1.8125, "learning_rate": 6.564341489646068e-06, "loss": 0.9063, "step": 10740 }, { "epoch": 1.8297147266219431, "grad_norm": 1.75, "learning_rate": 6.562645546901798e-06, "loss": 0.8873, "step": 10741 }, { "epoch": 1.829886190710933, "grad_norm": 1.6171875, "learning_rate": 6.5609497162626044e-06, "loss": 0.8054, "step": 10742 }, { "epoch": 1.8300576547999228, "grad_norm": 1.71875, "learning_rate": 6.5592539977837875e-06, "loss": 0.8719, "step": 10743 }, { "epoch": 1.8302291188889126, "grad_norm": 1.609375, "learning_rate": 6.557558391520655e-06, "loss": 0.793, "step": 10744 }, { "epoch": 1.8304005829779024, "grad_norm": 1.6953125, "learning_rate": 6.555862897528507e-06, "loss": 0.8555, "step": 10745 }, { "epoch": 1.8305720470668925, "grad_norm": 1.671875, "learning_rate": 6.554167515862637e-06, "loss": 0.8055, "step": 10746 }, { "epoch": 1.8307435111558823, "grad_norm": 1.7890625, "learning_rate": 6.552472246578343e-06, "loss": 0.8477, "step": 10747 }, { "epoch": 1.8309149752448721, "grad_norm": 1.734375, "learning_rate": 6.55077708973091e-06, "loss": 0.8425, "step": 10748 }, { "epoch": 1.8310864393338622, "grad_norm": 1.78125, "learning_rate": 6.549082045375624e-06, "loss": 0.8409, "step": 10749 }, { "epoch": 1.831257903422852, "grad_norm": 1.6484375, "learning_rate": 6.547387113567768e-06, "loss": 0.8289, "step": 10750 }, { "epoch": 1.8314293675118418, "grad_norm": 1.75, "learning_rate": 6.54569229436262e-06, "loss": 0.9509, "step": 10751 }, { "epoch": 1.8316008316008316, "grad_norm": 1.6796875, "learning_rate": 6.543997587815454e-06, "loss": 0.8315, "step": 10752 }, { "epoch": 1.8317722956898215, "grad_norm": 1.6875, "learning_rate": 6.5423029939815394e-06, "loss": 0.8989, "step": 10753 }, { "epoch": 1.8319437597788113, "grad_norm": 1.6796875, "learning_rate": 6.5406085129161475e-06, "loss": 0.8576, "step": 10754 }, { "epoch": 1.8321152238678011, "grad_norm": 1.7734375, "learning_rate": 6.538914144674537e-06, "loss": 0.8601, "step": 10755 }, { "epoch": 1.832286687956791, "grad_norm": 1.671875, "learning_rate": 6.53721988931197e-06, "loss": 0.8074, "step": 10756 }, { "epoch": 1.8324581520457808, "grad_norm": 1.7578125, "learning_rate": 6.535525746883702e-06, "loss": 0.8987, "step": 10757 }, { "epoch": 1.8326296161347708, "grad_norm": 1.6328125, "learning_rate": 6.533831717444984e-06, "loss": 0.7944, "step": 10758 }, { "epoch": 1.8328010802237606, "grad_norm": 1.7578125, "learning_rate": 6.5321378010510675e-06, "loss": 0.8961, "step": 10759 }, { "epoch": 1.8329725443127505, "grad_norm": 1.7421875, "learning_rate": 6.5304439977571945e-06, "loss": 0.8753, "step": 10760 }, { "epoch": 1.8331440084017405, "grad_norm": 1.609375, "learning_rate": 6.52875030761861e-06, "loss": 0.825, "step": 10761 }, { "epoch": 1.8333154724907303, "grad_norm": 1.5859375, "learning_rate": 6.527056730690547e-06, "loss": 0.8377, "step": 10762 }, { "epoch": 1.8334869365797202, "grad_norm": 1.59375, "learning_rate": 6.525363267028244e-06, "loss": 0.7996, "step": 10763 }, { "epoch": 1.83365840066871, "grad_norm": 1.671875, "learning_rate": 6.523669916686928e-06, "loss": 0.8381, "step": 10764 }, { "epoch": 1.8338298647576998, "grad_norm": 1.8125, "learning_rate": 6.521976679721829e-06, "loss": 0.8257, "step": 10765 }, { "epoch": 1.8340013288466896, "grad_norm": 1.6484375, "learning_rate": 6.520283556188166e-06, "loss": 0.8644, "step": 10766 }, { "epoch": 1.8341727929356795, "grad_norm": 1.7578125, "learning_rate": 6.518590546141159e-06, "loss": 0.8382, "step": 10767 }, { "epoch": 1.8343442570246693, "grad_norm": 1.7578125, "learning_rate": 6.516897649636025e-06, "loss": 0.8607, "step": 10768 }, { "epoch": 1.8345157211136591, "grad_norm": 1.765625, "learning_rate": 6.5152048667279735e-06, "loss": 0.8957, "step": 10769 }, { "epoch": 1.8346871852026492, "grad_norm": 1.8125, "learning_rate": 6.513512197472214e-06, "loss": 0.9375, "step": 10770 }, { "epoch": 1.834858649291639, "grad_norm": 1.703125, "learning_rate": 6.511819641923951e-06, "loss": 0.8961, "step": 10771 }, { "epoch": 1.8350301133806288, "grad_norm": 1.6171875, "learning_rate": 6.510127200138385e-06, "loss": 0.8588, "step": 10772 }, { "epoch": 1.8352015774696189, "grad_norm": 1.7890625, "learning_rate": 6.50843487217071e-06, "loss": 0.8759, "step": 10773 }, { "epoch": 1.8353730415586087, "grad_norm": 1.6796875, "learning_rate": 6.506742658076124e-06, "loss": 0.8354, "step": 10774 }, { "epoch": 1.8355445056475985, "grad_norm": 1.7265625, "learning_rate": 6.505050557909816e-06, "loss": 0.7801, "step": 10775 }, { "epoch": 1.8357159697365883, "grad_norm": 1.859375, "learning_rate": 6.503358571726968e-06, "loss": 0.8814, "step": 10776 }, { "epoch": 1.8358874338255782, "grad_norm": 1.6953125, "learning_rate": 6.501666699582769e-06, "loss": 0.8261, "step": 10777 }, { "epoch": 1.836058897914568, "grad_norm": 1.828125, "learning_rate": 6.499974941532387e-06, "loss": 0.8936, "step": 10778 }, { "epoch": 1.8362303620035578, "grad_norm": 1.75, "learning_rate": 6.498283297631004e-06, "loss": 0.8725, "step": 10779 }, { "epoch": 1.8364018260925477, "grad_norm": 1.7421875, "learning_rate": 6.496591767933789e-06, "loss": 0.7837, "step": 10780 }, { "epoch": 1.8365732901815375, "grad_norm": 1.71875, "learning_rate": 6.4949003524959055e-06, "loss": 0.8273, "step": 10781 }, { "epoch": 1.8367447542705273, "grad_norm": 1.8046875, "learning_rate": 6.493209051372522e-06, "loss": 0.8467, "step": 10782 }, { "epoch": 1.8369162183595173, "grad_norm": 1.71875, "learning_rate": 6.491517864618799e-06, "loss": 0.8545, "step": 10783 }, { "epoch": 1.8370876824485072, "grad_norm": 1.6484375, "learning_rate": 6.4898267922898884e-06, "loss": 0.8044, "step": 10784 }, { "epoch": 1.837259146537497, "grad_norm": 1.6953125, "learning_rate": 6.488135834440945e-06, "loss": 0.8334, "step": 10785 }, { "epoch": 1.837430610626487, "grad_norm": 1.6875, "learning_rate": 6.4864449911271165e-06, "loss": 0.884, "step": 10786 }, { "epoch": 1.8376020747154769, "grad_norm": 1.8046875, "learning_rate": 6.484754262403547e-06, "loss": 0.8961, "step": 10787 }, { "epoch": 1.8377735388044667, "grad_norm": 1.5859375, "learning_rate": 6.483063648325383e-06, "loss": 0.7778, "step": 10788 }, { "epoch": 1.8379450028934565, "grad_norm": 1.7421875, "learning_rate": 6.48137314894775e-06, "loss": 0.8881, "step": 10789 }, { "epoch": 1.8381164669824464, "grad_norm": 1.7109375, "learning_rate": 6.479682764325792e-06, "loss": 0.8727, "step": 10790 }, { "epoch": 1.8382879310714362, "grad_norm": 1.6953125, "learning_rate": 6.477992494514633e-06, "loss": 0.8266, "step": 10791 }, { "epoch": 1.838459395160426, "grad_norm": 1.703125, "learning_rate": 6.476302339569405e-06, "loss": 0.8603, "step": 10792 }, { "epoch": 1.8386308592494158, "grad_norm": 1.6796875, "learning_rate": 6.474612299545225e-06, "loss": 0.7457, "step": 10793 }, { "epoch": 1.8388023233384057, "grad_norm": 1.6796875, "learning_rate": 6.472922374497211e-06, "loss": 0.8563, "step": 10794 }, { "epoch": 1.8389737874273957, "grad_norm": 1.59375, "learning_rate": 6.471232564480483e-06, "loss": 0.7924, "step": 10795 }, { "epoch": 1.8391452515163855, "grad_norm": 1.6875, "learning_rate": 6.469542869550147e-06, "loss": 0.883, "step": 10796 }, { "epoch": 1.8393167156053754, "grad_norm": 1.6640625, "learning_rate": 6.467853289761315e-06, "loss": 0.9064, "step": 10797 }, { "epoch": 1.8394881796943654, "grad_norm": 1.7265625, "learning_rate": 6.466163825169086e-06, "loss": 0.9077, "step": 10798 }, { "epoch": 1.8396596437833552, "grad_norm": 1.6875, "learning_rate": 6.464474475828563e-06, "loss": 0.8597, "step": 10799 }, { "epoch": 1.839831107872345, "grad_norm": 1.71875, "learning_rate": 6.4627852417948425e-06, "loss": 0.8379, "step": 10800 }, { "epoch": 1.8400025719613349, "grad_norm": 1.7421875, "learning_rate": 6.461096123123013e-06, "loss": 0.8876, "step": 10801 }, { "epoch": 1.8401740360503247, "grad_norm": 1.7109375, "learning_rate": 6.459407119868165e-06, "loss": 0.8891, "step": 10802 }, { "epoch": 1.8403455001393145, "grad_norm": 1.6640625, "learning_rate": 6.4577182320853836e-06, "loss": 0.8582, "step": 10803 }, { "epoch": 1.8405169642283044, "grad_norm": 1.6796875, "learning_rate": 6.456029459829751e-06, "loss": 0.8022, "step": 10804 }, { "epoch": 1.8406884283172942, "grad_norm": 1.71875, "learning_rate": 6.454340803156341e-06, "loss": 0.8657, "step": 10805 }, { "epoch": 1.840859892406284, "grad_norm": 1.6171875, "learning_rate": 6.452652262120231e-06, "loss": 0.8819, "step": 10806 }, { "epoch": 1.841031356495274, "grad_norm": 1.6484375, "learning_rate": 6.450963836776487e-06, "loss": 0.816, "step": 10807 }, { "epoch": 1.8412028205842639, "grad_norm": 1.7578125, "learning_rate": 6.449275527180178e-06, "loss": 0.9231, "step": 10808 }, { "epoch": 1.8413742846732537, "grad_norm": 1.7578125, "learning_rate": 6.447587333386365e-06, "loss": 0.9352, "step": 10809 }, { "epoch": 1.8415457487622438, "grad_norm": 1.65625, "learning_rate": 6.445899255450106e-06, "loss": 0.8779, "step": 10810 }, { "epoch": 1.8417172128512336, "grad_norm": 1.671875, "learning_rate": 6.444211293426457e-06, "loss": 0.8487, "step": 10811 }, { "epoch": 1.8418886769402234, "grad_norm": 1.7421875, "learning_rate": 6.442523447370466e-06, "loss": 0.8554, "step": 10812 }, { "epoch": 1.8420601410292132, "grad_norm": 1.7109375, "learning_rate": 6.440835717337182e-06, "loss": 0.9101, "step": 10813 }, { "epoch": 1.842231605118203, "grad_norm": 1.796875, "learning_rate": 6.439148103381647e-06, "loss": 0.8077, "step": 10814 }, { "epoch": 1.8424030692071929, "grad_norm": 1.78125, "learning_rate": 6.437460605558902e-06, "loss": 0.8836, "step": 10815 }, { "epoch": 1.8425745332961827, "grad_norm": 1.640625, "learning_rate": 6.435773223923982e-06, "loss": 0.8436, "step": 10816 }, { "epoch": 1.8427459973851725, "grad_norm": 1.8125, "learning_rate": 6.434085958531918e-06, "loss": 0.8848, "step": 10817 }, { "epoch": 1.8429174614741624, "grad_norm": 1.6953125, "learning_rate": 6.432398809437739e-06, "loss": 0.8224, "step": 10818 }, { "epoch": 1.8430889255631524, "grad_norm": 1.6796875, "learning_rate": 6.43071177669647e-06, "loss": 0.8838, "step": 10819 }, { "epoch": 1.8432603896521422, "grad_norm": 1.6953125, "learning_rate": 6.429024860363128e-06, "loss": 0.8179, "step": 10820 }, { "epoch": 1.843431853741132, "grad_norm": 1.6796875, "learning_rate": 6.427338060492734e-06, "loss": 0.8074, "step": 10821 }, { "epoch": 1.843603317830122, "grad_norm": 1.71875, "learning_rate": 6.4256513771403e-06, "loss": 0.9034, "step": 10822 }, { "epoch": 1.843774781919112, "grad_norm": 1.7578125, "learning_rate": 6.4239648103608384e-06, "loss": 0.8656, "step": 10823 }, { "epoch": 1.8439462460081018, "grad_norm": 1.7265625, "learning_rate": 6.4222783602093465e-06, "loss": 0.8631, "step": 10824 }, { "epoch": 1.8441177100970916, "grad_norm": 1.734375, "learning_rate": 6.420592026740829e-06, "loss": 0.7848, "step": 10825 }, { "epoch": 1.8442891741860814, "grad_norm": 1.6484375, "learning_rate": 6.418905810010285e-06, "loss": 0.7932, "step": 10826 }, { "epoch": 1.8444606382750712, "grad_norm": 1.71875, "learning_rate": 6.4172197100727075e-06, "loss": 0.7858, "step": 10827 }, { "epoch": 1.844632102364061, "grad_norm": 1.671875, "learning_rate": 6.4155337269830855e-06, "loss": 0.8253, "step": 10828 }, { "epoch": 1.8448035664530509, "grad_norm": 1.7265625, "learning_rate": 6.413847860796407e-06, "loss": 0.8208, "step": 10829 }, { "epoch": 1.8449750305420407, "grad_norm": 1.6484375, "learning_rate": 6.412162111567655e-06, "loss": 0.8985, "step": 10830 }, { "epoch": 1.8451464946310308, "grad_norm": 1.6640625, "learning_rate": 6.410476479351806e-06, "loss": 0.8778, "step": 10831 }, { "epoch": 1.8453179587200206, "grad_norm": 1.6171875, "learning_rate": 6.408790964203837e-06, "loss": 0.8216, "step": 10832 }, { "epoch": 1.8454894228090104, "grad_norm": 1.7265625, "learning_rate": 6.407105566178717e-06, "loss": 0.7939, "step": 10833 }, { "epoch": 1.8456608868980005, "grad_norm": 1.71875, "learning_rate": 6.405420285331414e-06, "loss": 0.8711, "step": 10834 }, { "epoch": 1.8458323509869903, "grad_norm": 1.65625, "learning_rate": 6.4037351217168965e-06, "loss": 0.8093, "step": 10835 }, { "epoch": 1.84600381507598, "grad_norm": 1.7109375, "learning_rate": 6.402050075390112e-06, "loss": 0.8094, "step": 10836 }, { "epoch": 1.84617527916497, "grad_norm": 1.7109375, "learning_rate": 6.400365146406027e-06, "loss": 0.8507, "step": 10837 }, { "epoch": 1.8463467432539598, "grad_norm": 1.6015625, "learning_rate": 6.398680334819587e-06, "loss": 0.867, "step": 10838 }, { "epoch": 1.8465182073429496, "grad_norm": 1.6796875, "learning_rate": 6.396995640685744e-06, "loss": 0.8413, "step": 10839 }, { "epoch": 1.8466896714319394, "grad_norm": 1.6640625, "learning_rate": 6.395311064059442e-06, "loss": 0.8135, "step": 10840 }, { "epoch": 1.8468611355209292, "grad_norm": 1.65625, "learning_rate": 6.393626604995617e-06, "loss": 0.8094, "step": 10841 }, { "epoch": 1.847032599609919, "grad_norm": 1.796875, "learning_rate": 6.391942263549211e-06, "loss": 0.8335, "step": 10842 }, { "epoch": 1.847204063698909, "grad_norm": 1.7890625, "learning_rate": 6.390258039775155e-06, "loss": 0.8797, "step": 10843 }, { "epoch": 1.847375527787899, "grad_norm": 1.6796875, "learning_rate": 6.388573933728376e-06, "loss": 0.8325, "step": 10844 }, { "epoch": 1.8475469918768888, "grad_norm": 1.828125, "learning_rate": 6.386889945463801e-06, "loss": 1.0021, "step": 10845 }, { "epoch": 1.8477184559658788, "grad_norm": 1.6796875, "learning_rate": 6.385206075036352e-06, "loss": 0.853, "step": 10846 }, { "epoch": 1.8478899200548686, "grad_norm": 1.609375, "learning_rate": 6.383522322500943e-06, "loss": 0.7751, "step": 10847 }, { "epoch": 1.8480613841438585, "grad_norm": 1.6953125, "learning_rate": 6.381838687912489e-06, "loss": 0.913, "step": 10848 }, { "epoch": 1.8482328482328483, "grad_norm": 1.703125, "learning_rate": 6.3801551713259015e-06, "loss": 0.8545, "step": 10849 }, { "epoch": 1.848404312321838, "grad_norm": 1.7578125, "learning_rate": 6.3784717727960844e-06, "loss": 0.8677, "step": 10850 }, { "epoch": 1.848575776410828, "grad_norm": 1.7421875, "learning_rate": 6.376788492377941e-06, "loss": 0.7775, "step": 10851 }, { "epoch": 1.8487472404998178, "grad_norm": 1.625, "learning_rate": 6.375105330126368e-06, "loss": 0.7807, "step": 10852 }, { "epoch": 1.8489187045888076, "grad_norm": 1.71875, "learning_rate": 6.373422286096259e-06, "loss": 0.8227, "step": 10853 }, { "epoch": 1.8490901686777974, "grad_norm": 1.7109375, "learning_rate": 6.371739360342507e-06, "loss": 0.9536, "step": 10854 }, { "epoch": 1.8492616327667875, "grad_norm": 1.7265625, "learning_rate": 6.3700565529199965e-06, "loss": 0.7772, "step": 10855 }, { "epoch": 1.8494330968557773, "grad_norm": 1.640625, "learning_rate": 6.3683738638836125e-06, "loss": 0.8825, "step": 10856 }, { "epoch": 1.849604560944767, "grad_norm": 1.6171875, "learning_rate": 6.366691293288229e-06, "loss": 0.8269, "step": 10857 }, { "epoch": 1.8497760250337572, "grad_norm": 1.7578125, "learning_rate": 6.365008841188729e-06, "loss": 0.9076, "step": 10858 }, { "epoch": 1.849947489122747, "grad_norm": 1.6875, "learning_rate": 6.363326507639978e-06, "loss": 0.9056, "step": 10859 }, { "epoch": 1.8501189532117368, "grad_norm": 1.71875, "learning_rate": 6.36164429269684e-06, "loss": 0.9067, "step": 10860 }, { "epoch": 1.8502904173007266, "grad_norm": 1.6640625, "learning_rate": 6.359962196414185e-06, "loss": 0.7929, "step": 10861 }, { "epoch": 1.8504618813897165, "grad_norm": 1.5859375, "learning_rate": 6.358280218846871e-06, "loss": 0.766, "step": 10862 }, { "epoch": 1.8506333454787063, "grad_norm": 1.59375, "learning_rate": 6.356598360049751e-06, "loss": 0.8052, "step": 10863 }, { "epoch": 1.850804809567696, "grad_norm": 1.8671875, "learning_rate": 6.354916620077677e-06, "loss": 0.8696, "step": 10864 }, { "epoch": 1.850976273656686, "grad_norm": 1.75, "learning_rate": 6.353234998985499e-06, "loss": 0.8394, "step": 10865 }, { "epoch": 1.8511477377456758, "grad_norm": 1.7109375, "learning_rate": 6.3515534968280604e-06, "loss": 0.8423, "step": 10866 }, { "epoch": 1.8513192018346658, "grad_norm": 1.6796875, "learning_rate": 6.3498721136601994e-06, "loss": 0.8579, "step": 10867 }, { "epoch": 1.8514906659236556, "grad_norm": 1.703125, "learning_rate": 6.348190849536755e-06, "loss": 0.7728, "step": 10868 }, { "epoch": 1.8516621300126455, "grad_norm": 1.5859375, "learning_rate": 6.346509704512563e-06, "loss": 0.779, "step": 10869 }, { "epoch": 1.8518335941016355, "grad_norm": 1.65625, "learning_rate": 6.344828678642444e-06, "loss": 0.7862, "step": 10870 }, { "epoch": 1.8520050581906253, "grad_norm": 1.6640625, "learning_rate": 6.343147771981225e-06, "loss": 0.825, "step": 10871 }, { "epoch": 1.8521765222796152, "grad_norm": 1.625, "learning_rate": 6.341466984583728e-06, "loss": 0.8207, "step": 10872 }, { "epoch": 1.852347986368605, "grad_norm": 1.6953125, "learning_rate": 6.339786316504769e-06, "loss": 0.8482, "step": 10873 }, { "epoch": 1.8525194504575948, "grad_norm": 1.609375, "learning_rate": 6.338105767799161e-06, "loss": 0.854, "step": 10874 }, { "epoch": 1.8526909145465846, "grad_norm": 1.7265625, "learning_rate": 6.336425338521712e-06, "loss": 0.8701, "step": 10875 }, { "epoch": 1.8528623786355745, "grad_norm": 1.8046875, "learning_rate": 6.33474502872723e-06, "loss": 0.9802, "step": 10876 }, { "epoch": 1.8530338427245643, "grad_norm": 1.7109375, "learning_rate": 6.333064838470515e-06, "loss": 0.8136, "step": 10877 }, { "epoch": 1.853205306813554, "grad_norm": 1.6640625, "learning_rate": 6.331384767806365e-06, "loss": 0.808, "step": 10878 }, { "epoch": 1.853376770902544, "grad_norm": 1.7421875, "learning_rate": 6.3297048167895705e-06, "loss": 0.9327, "step": 10879 }, { "epoch": 1.853548234991534, "grad_norm": 1.6328125, "learning_rate": 6.328024985474924e-06, "loss": 0.8691, "step": 10880 }, { "epoch": 1.8537196990805238, "grad_norm": 1.6796875, "learning_rate": 6.326345273917214e-06, "loss": 0.7949, "step": 10881 }, { "epoch": 1.8538911631695136, "grad_norm": 1.65625, "learning_rate": 6.324665682171214e-06, "loss": 0.8325, "step": 10882 }, { "epoch": 1.8540626272585037, "grad_norm": 1.796875, "learning_rate": 6.322986210291705e-06, "loss": 0.8751, "step": 10883 }, { "epoch": 1.8542340913474935, "grad_norm": 1.7265625, "learning_rate": 6.321306858333463e-06, "loss": 0.8528, "step": 10884 }, { "epoch": 1.8544055554364833, "grad_norm": 1.640625, "learning_rate": 6.319627626351258e-06, "loss": 0.8821, "step": 10885 }, { "epoch": 1.8545770195254732, "grad_norm": 1.75, "learning_rate": 6.317948514399854e-06, "loss": 0.8082, "step": 10886 }, { "epoch": 1.854748483614463, "grad_norm": 1.7890625, "learning_rate": 6.3162695225340155e-06, "loss": 0.8664, "step": 10887 }, { "epoch": 1.8549199477034528, "grad_norm": 1.6875, "learning_rate": 6.314590650808498e-06, "loss": 0.8479, "step": 10888 }, { "epoch": 1.8550914117924426, "grad_norm": 1.765625, "learning_rate": 6.312911899278059e-06, "loss": 0.8752, "step": 10889 }, { "epoch": 1.8552628758814325, "grad_norm": 1.71875, "learning_rate": 6.311233267997446e-06, "loss": 0.8617, "step": 10890 }, { "epoch": 1.8554343399704223, "grad_norm": 1.65625, "learning_rate": 6.309554757021408e-06, "loss": 0.8035, "step": 10891 }, { "epoch": 1.8556058040594123, "grad_norm": 1.625, "learning_rate": 6.307876366404687e-06, "loss": 0.8573, "step": 10892 }, { "epoch": 1.8557772681484022, "grad_norm": 1.6875, "learning_rate": 6.30619809620202e-06, "loss": 0.923, "step": 10893 }, { "epoch": 1.855948732237392, "grad_norm": 1.7109375, "learning_rate": 6.304519946468142e-06, "loss": 0.7693, "step": 10894 }, { "epoch": 1.856120196326382, "grad_norm": 1.640625, "learning_rate": 6.302841917257785e-06, "loss": 0.824, "step": 10895 }, { "epoch": 1.8562916604153719, "grad_norm": 1.6171875, "learning_rate": 6.301164008625674e-06, "loss": 0.7486, "step": 10896 }, { "epoch": 1.8564631245043617, "grad_norm": 1.734375, "learning_rate": 6.299486220626534e-06, "loss": 0.9049, "step": 10897 }, { "epoch": 1.8566345885933515, "grad_norm": 1.765625, "learning_rate": 6.297808553315084e-06, "loss": 0.8944, "step": 10898 }, { "epoch": 1.8568060526823413, "grad_norm": 1.7109375, "learning_rate": 6.296131006746037e-06, "loss": 0.9116, "step": 10899 }, { "epoch": 1.8569775167713312, "grad_norm": 1.7734375, "learning_rate": 6.294453580974106e-06, "loss": 0.9053, "step": 10900 }, { "epoch": 1.857148980860321, "grad_norm": 1.7421875, "learning_rate": 6.2927762760539975e-06, "loss": 0.8544, "step": 10901 }, { "epoch": 1.8573204449493108, "grad_norm": 2.25, "learning_rate": 6.291099092040414e-06, "loss": 0.8509, "step": 10902 }, { "epoch": 1.8574919090383006, "grad_norm": 1.78125, "learning_rate": 6.289422028988057e-06, "loss": 0.815, "step": 10903 }, { "epoch": 1.8576633731272907, "grad_norm": 1.6484375, "learning_rate": 6.287745086951621e-06, "loss": 0.7843, "step": 10904 }, { "epoch": 1.8578348372162805, "grad_norm": 1.734375, "learning_rate": 6.286068265985795e-06, "loss": 0.8522, "step": 10905 }, { "epoch": 1.8580063013052703, "grad_norm": 1.765625, "learning_rate": 6.284391566145269e-06, "loss": 0.8605, "step": 10906 }, { "epoch": 1.8581777653942604, "grad_norm": 1.8046875, "learning_rate": 6.282714987484725e-06, "loss": 0.8064, "step": 10907 }, { "epoch": 1.8583492294832502, "grad_norm": 1.7109375, "learning_rate": 6.281038530058843e-06, "loss": 0.832, "step": 10908 }, { "epoch": 1.85852069357224, "grad_norm": 1.640625, "learning_rate": 6.2793621939223e-06, "loss": 0.7933, "step": 10909 }, { "epoch": 1.8586921576612299, "grad_norm": 1.671875, "learning_rate": 6.277685979129766e-06, "loss": 0.8531, "step": 10910 }, { "epoch": 1.8588636217502197, "grad_norm": 1.6953125, "learning_rate": 6.276009885735909e-06, "loss": 0.8221, "step": 10911 }, { "epoch": 1.8590350858392095, "grad_norm": 1.7265625, "learning_rate": 6.274333913795392e-06, "loss": 0.8708, "step": 10912 }, { "epoch": 1.8592065499281993, "grad_norm": 1.78125, "learning_rate": 6.2726580633628775e-06, "loss": 0.8046, "step": 10913 }, { "epoch": 1.8593780140171892, "grad_norm": 1.671875, "learning_rate": 6.270982334493017e-06, "loss": 0.8399, "step": 10914 }, { "epoch": 1.859549478106179, "grad_norm": 1.734375, "learning_rate": 6.26930672724047e-06, "loss": 0.9163, "step": 10915 }, { "epoch": 1.859720942195169, "grad_norm": 1.6171875, "learning_rate": 6.267631241659875e-06, "loss": 0.9097, "step": 10916 }, { "epoch": 1.8598924062841589, "grad_norm": 1.71875, "learning_rate": 6.26595587780588e-06, "loss": 0.8519, "step": 10917 }, { "epoch": 1.8600638703731487, "grad_norm": 1.75, "learning_rate": 6.2642806357331244e-06, "loss": 0.9399, "step": 10918 }, { "epoch": 1.8602353344621387, "grad_norm": 1.6875, "learning_rate": 6.262605515496245e-06, "loss": 0.8465, "step": 10919 }, { "epoch": 1.8604067985511286, "grad_norm": 1.6953125, "learning_rate": 6.260930517149875e-06, "loss": 0.7655, "step": 10920 }, { "epoch": 1.8605782626401184, "grad_norm": 1.6328125, "learning_rate": 6.2592556407486394e-06, "loss": 0.9003, "step": 10921 }, { "epoch": 1.8607497267291082, "grad_norm": 1.703125, "learning_rate": 6.257580886347162e-06, "loss": 0.8566, "step": 10922 }, { "epoch": 1.860921190818098, "grad_norm": 1.640625, "learning_rate": 6.255906254000067e-06, "loss": 0.839, "step": 10923 }, { "epoch": 1.8610926549070879, "grad_norm": 1.5859375, "learning_rate": 6.254231743761967e-06, "loss": 0.7622, "step": 10924 }, { "epoch": 1.8612641189960777, "grad_norm": 1.546875, "learning_rate": 6.252557355687476e-06, "loss": 0.7633, "step": 10925 }, { "epoch": 1.8614355830850675, "grad_norm": 1.6484375, "learning_rate": 6.250883089831202e-06, "loss": 0.809, "step": 10926 }, { "epoch": 1.8616070471740573, "grad_norm": 1.6953125, "learning_rate": 6.2492089462477515e-06, "loss": 0.862, "step": 10927 }, { "epoch": 1.8617785112630474, "grad_norm": 1.7734375, "learning_rate": 6.247534924991716e-06, "loss": 0.8738, "step": 10928 }, { "epoch": 1.8619499753520372, "grad_norm": 1.71875, "learning_rate": 6.245861026117699e-06, "loss": 0.8735, "step": 10929 }, { "epoch": 1.862121439441027, "grad_norm": 1.6875, "learning_rate": 6.244187249680287e-06, "loss": 0.8735, "step": 10930 }, { "epoch": 1.862292903530017, "grad_norm": 1.6953125, "learning_rate": 6.242513595734075e-06, "loss": 0.8638, "step": 10931 }, { "epoch": 1.862464367619007, "grad_norm": 1.6796875, "learning_rate": 6.240840064333644e-06, "loss": 0.8454, "step": 10932 }, { "epoch": 1.8626358317079967, "grad_norm": 1.7265625, "learning_rate": 6.239166655533575e-06, "loss": 0.8941, "step": 10933 }, { "epoch": 1.8628072957969866, "grad_norm": 1.6015625, "learning_rate": 6.237493369388441e-06, "loss": 0.7839, "step": 10934 }, { "epoch": 1.8629787598859764, "grad_norm": 1.796875, "learning_rate": 6.235820205952818e-06, "loss": 0.8964, "step": 10935 }, { "epoch": 1.8631502239749662, "grad_norm": 1.75, "learning_rate": 6.2341471652812734e-06, "loss": 0.8891, "step": 10936 }, { "epoch": 1.863321688063956, "grad_norm": 1.6640625, "learning_rate": 6.2324742474283695e-06, "loss": 0.8195, "step": 10937 }, { "epoch": 1.8634931521529459, "grad_norm": 1.6875, "learning_rate": 6.23080145244867e-06, "loss": 0.7823, "step": 10938 }, { "epoch": 1.8636646162419357, "grad_norm": 1.609375, "learning_rate": 6.229128780396727e-06, "loss": 0.8456, "step": 10939 }, { "epoch": 1.8638360803309257, "grad_norm": 1.6171875, "learning_rate": 6.227456231327094e-06, "loss": 0.8666, "step": 10940 }, { "epoch": 1.8640075444199156, "grad_norm": 1.8125, "learning_rate": 6.225783805294319e-06, "loss": 0.8293, "step": 10941 }, { "epoch": 1.8641790085089054, "grad_norm": 1.6953125, "learning_rate": 6.224111502352947e-06, "loss": 0.9055, "step": 10942 }, { "epoch": 1.8643504725978954, "grad_norm": 1.6875, "learning_rate": 6.222439322557516e-06, "loss": 0.7858, "step": 10943 }, { "epoch": 1.8645219366868853, "grad_norm": 1.734375, "learning_rate": 6.2207672659625665e-06, "loss": 0.9471, "step": 10944 }, { "epoch": 1.864693400775875, "grad_norm": 1.7265625, "learning_rate": 6.219095332622626e-06, "loss": 0.9263, "step": 10945 }, { "epoch": 1.864864864864865, "grad_norm": 1.6875, "learning_rate": 6.217423522592223e-06, "loss": 0.8449, "step": 10946 }, { "epoch": 1.8650363289538547, "grad_norm": 1.8359375, "learning_rate": 6.215751835925885e-06, "loss": 0.8745, "step": 10947 }, { "epoch": 1.8652077930428446, "grad_norm": 1.578125, "learning_rate": 6.2140802726781294e-06, "loss": 0.8291, "step": 10948 }, { "epoch": 1.8653792571318344, "grad_norm": 1.7109375, "learning_rate": 6.2124088329034715e-06, "loss": 0.7714, "step": 10949 }, { "epoch": 1.8655507212208242, "grad_norm": 1.828125, "learning_rate": 6.210737516656427e-06, "loss": 0.849, "step": 10950 }, { "epoch": 1.865722185309814, "grad_norm": 1.703125, "learning_rate": 6.2090663239915e-06, "loss": 0.8369, "step": 10951 }, { "epoch": 1.865893649398804, "grad_norm": 1.71875, "learning_rate": 6.207395254963193e-06, "loss": 0.8538, "step": 10952 }, { "epoch": 1.866065113487794, "grad_norm": 1.734375, "learning_rate": 6.205724309626011e-06, "loss": 0.8414, "step": 10953 }, { "epoch": 1.8662365775767837, "grad_norm": 1.78125, "learning_rate": 6.204053488034446e-06, "loss": 0.8347, "step": 10954 }, { "epoch": 1.8664080416657738, "grad_norm": 1.6015625, "learning_rate": 6.2023827902429915e-06, "loss": 0.7904, "step": 10955 }, { "epoch": 1.8665795057547636, "grad_norm": 1.65625, "learning_rate": 6.200712216306134e-06, "loss": 0.8911, "step": 10956 }, { "epoch": 1.8667509698437534, "grad_norm": 1.7265625, "learning_rate": 6.1990417662783574e-06, "loss": 0.8466, "step": 10957 }, { "epoch": 1.8669224339327433, "grad_norm": 1.6484375, "learning_rate": 6.197371440214144e-06, "loss": 0.8515, "step": 10958 }, { "epoch": 1.867093898021733, "grad_norm": 1.6875, "learning_rate": 6.195701238167966e-06, "loss": 0.8596, "step": 10959 }, { "epoch": 1.867265362110723, "grad_norm": 1.6796875, "learning_rate": 6.194031160194296e-06, "loss": 0.8283, "step": 10960 }, { "epoch": 1.8674368261997127, "grad_norm": 1.6484375, "learning_rate": 6.192361206347603e-06, "loss": 0.7804, "step": 10961 }, { "epoch": 1.8676082902887026, "grad_norm": 1.6796875, "learning_rate": 6.190691376682349e-06, "loss": 0.8363, "step": 10962 }, { "epoch": 1.8677797543776924, "grad_norm": 1.640625, "learning_rate": 6.189021671252993e-06, "loss": 0.8353, "step": 10963 }, { "epoch": 1.8679512184666824, "grad_norm": 1.6484375, "learning_rate": 6.187352090113992e-06, "loss": 0.8868, "step": 10964 }, { "epoch": 1.8681226825556723, "grad_norm": 1.765625, "learning_rate": 6.185682633319796e-06, "loss": 0.8438, "step": 10965 }, { "epoch": 1.868294146644662, "grad_norm": 1.6796875, "learning_rate": 6.184013300924852e-06, "loss": 0.8665, "step": 10966 }, { "epoch": 1.868465610733652, "grad_norm": 1.7578125, "learning_rate": 6.1823440929836055e-06, "loss": 0.8625, "step": 10967 }, { "epoch": 1.868637074822642, "grad_norm": 1.6875, "learning_rate": 6.180675009550492e-06, "loss": 0.8351, "step": 10968 }, { "epoch": 1.8688085389116318, "grad_norm": 1.7890625, "learning_rate": 6.179006050679947e-06, "loss": 0.8879, "step": 10969 }, { "epoch": 1.8689800030006216, "grad_norm": 1.6484375, "learning_rate": 6.177337216426407e-06, "loss": 0.8025, "step": 10970 }, { "epoch": 1.8691514670896114, "grad_norm": 1.7109375, "learning_rate": 6.175668506844294e-06, "loss": 0.8274, "step": 10971 }, { "epoch": 1.8693229311786013, "grad_norm": 1.71875, "learning_rate": 6.173999921988032e-06, "loss": 0.8356, "step": 10972 }, { "epoch": 1.869494395267591, "grad_norm": 1.6640625, "learning_rate": 6.172331461912044e-06, "loss": 0.7581, "step": 10973 }, { "epoch": 1.869665859356581, "grad_norm": 1.6875, "learning_rate": 6.170663126670737e-06, "loss": 0.8047, "step": 10974 }, { "epoch": 1.8698373234455707, "grad_norm": 1.703125, "learning_rate": 6.1689949163185245e-06, "loss": 0.809, "step": 10975 }, { "epoch": 1.8700087875345606, "grad_norm": 1.6875, "learning_rate": 6.167326830909815e-06, "loss": 0.785, "step": 10976 }, { "epoch": 1.8701802516235506, "grad_norm": 1.6015625, "learning_rate": 6.1656588704990085e-06, "loss": 0.7857, "step": 10977 }, { "epoch": 1.8703517157125404, "grad_norm": 1.6875, "learning_rate": 6.163991035140506e-06, "loss": 0.8535, "step": 10978 }, { "epoch": 1.8705231798015303, "grad_norm": 1.8046875, "learning_rate": 6.162323324888702e-06, "loss": 0.8995, "step": 10979 }, { "epoch": 1.8706946438905203, "grad_norm": 1.6875, "learning_rate": 6.160655739797985e-06, "loss": 0.7442, "step": 10980 }, { "epoch": 1.8708661079795101, "grad_norm": 1.6875, "learning_rate": 6.158988279922741e-06, "loss": 0.9137, "step": 10981 }, { "epoch": 1.8710375720685, "grad_norm": 1.703125, "learning_rate": 6.157320945317353e-06, "loss": 0.8714, "step": 10982 }, { "epoch": 1.8712090361574898, "grad_norm": 1.765625, "learning_rate": 6.1556537360362014e-06, "loss": 0.9099, "step": 10983 }, { "epoch": 1.8713805002464796, "grad_norm": 1.7109375, "learning_rate": 6.153986652133657e-06, "loss": 0.8458, "step": 10984 }, { "epoch": 1.8715519643354694, "grad_norm": 1.578125, "learning_rate": 6.152319693664091e-06, "loss": 0.8526, "step": 10985 }, { "epoch": 1.8717234284244593, "grad_norm": 1.6640625, "learning_rate": 6.150652860681869e-06, "loss": 0.8533, "step": 10986 }, { "epoch": 1.871894892513449, "grad_norm": 1.8125, "learning_rate": 6.148986153241352e-06, "loss": 0.9229, "step": 10987 }, { "epoch": 1.872066356602439, "grad_norm": 1.796875, "learning_rate": 6.147319571396897e-06, "loss": 0.8749, "step": 10988 }, { "epoch": 1.872237820691429, "grad_norm": 1.7890625, "learning_rate": 6.14565311520286e-06, "loss": 0.8774, "step": 10989 }, { "epoch": 1.8724092847804188, "grad_norm": 1.734375, "learning_rate": 6.143986784713588e-06, "loss": 0.948, "step": 10990 }, { "epoch": 1.8725807488694086, "grad_norm": 1.640625, "learning_rate": 6.142320579983427e-06, "loss": 0.8464, "step": 10991 }, { "epoch": 1.8727522129583987, "grad_norm": 1.6796875, "learning_rate": 6.14065450106672e-06, "loss": 0.8901, "step": 10992 }, { "epoch": 1.8729236770473885, "grad_norm": 1.7578125, "learning_rate": 6.138988548017802e-06, "loss": 0.805, "step": 10993 }, { "epoch": 1.8730951411363783, "grad_norm": 1.7578125, "learning_rate": 6.137322720891007e-06, "loss": 0.823, "step": 10994 }, { "epoch": 1.8732666052253681, "grad_norm": 1.671875, "learning_rate": 6.135657019740663e-06, "loss": 0.8371, "step": 10995 }, { "epoch": 1.873438069314358, "grad_norm": 1.7421875, "learning_rate": 6.133991444621097e-06, "loss": 0.9085, "step": 10996 }, { "epoch": 1.8736095334033478, "grad_norm": 1.6640625, "learning_rate": 6.132325995586628e-06, "loss": 0.8843, "step": 10997 }, { "epoch": 1.8737809974923376, "grad_norm": 1.796875, "learning_rate": 6.130660672691571e-06, "loss": 0.9687, "step": 10998 }, { "epoch": 1.8739524615813274, "grad_norm": 1.71875, "learning_rate": 6.128995475990241e-06, "loss": 0.8697, "step": 10999 }, { "epoch": 1.8741239256703173, "grad_norm": 1.71875, "learning_rate": 6.127330405536943e-06, "loss": 0.8857, "step": 11000 }, { "epoch": 1.8742953897593073, "grad_norm": 1.7421875, "learning_rate": 6.125665461385986e-06, "loss": 0.8626, "step": 11001 }, { "epoch": 1.8744668538482971, "grad_norm": 1.6640625, "learning_rate": 6.124000643591667e-06, "loss": 0.7529, "step": 11002 }, { "epoch": 1.874638317937287, "grad_norm": 1.703125, "learning_rate": 6.122335952208283e-06, "loss": 0.8251, "step": 11003 }, { "epoch": 1.874809782026277, "grad_norm": 1.703125, "learning_rate": 6.120671387290125e-06, "loss": 0.8308, "step": 11004 }, { "epoch": 1.8749812461152668, "grad_norm": 1.7265625, "learning_rate": 6.1190069488914806e-06, "loss": 0.8574, "step": 11005 }, { "epoch": 1.8751527102042567, "grad_norm": 1.4921875, "learning_rate": 6.117342637066635e-06, "loss": 0.7836, "step": 11006 }, { "epoch": 1.8753241742932465, "grad_norm": 1.71875, "learning_rate": 6.115678451869866e-06, "loss": 0.8602, "step": 11007 }, { "epoch": 1.8754956383822363, "grad_norm": 1.6328125, "learning_rate": 6.114014393355453e-06, "loss": 0.8605, "step": 11008 }, { "epoch": 1.8756671024712261, "grad_norm": 1.703125, "learning_rate": 6.112350461577661e-06, "loss": 0.8752, "step": 11009 }, { "epoch": 1.875838566560216, "grad_norm": 1.671875, "learning_rate": 6.110686656590761e-06, "loss": 0.754, "step": 11010 }, { "epoch": 1.8760100306492058, "grad_norm": 1.625, "learning_rate": 6.109022978449013e-06, "loss": 0.8233, "step": 11011 }, { "epoch": 1.8761814947381956, "grad_norm": 1.6796875, "learning_rate": 6.107359427206679e-06, "loss": 0.8455, "step": 11012 }, { "epoch": 1.8763529588271857, "grad_norm": 1.6953125, "learning_rate": 6.105696002918012e-06, "loss": 0.7755, "step": 11013 }, { "epoch": 1.8765244229161755, "grad_norm": 1.71875, "learning_rate": 6.104032705637264e-06, "loss": 0.8127, "step": 11014 }, { "epoch": 1.8766958870051653, "grad_norm": 1.6875, "learning_rate": 6.102369535418679e-06, "loss": 0.8543, "step": 11015 }, { "epoch": 1.8768673510941554, "grad_norm": 1.8046875, "learning_rate": 6.100706492316499e-06, "loss": 0.9207, "step": 11016 }, { "epoch": 1.8770388151831452, "grad_norm": 1.6953125, "learning_rate": 6.099043576384966e-06, "loss": 0.8285, "step": 11017 }, { "epoch": 1.877210279272135, "grad_norm": 1.71875, "learning_rate": 6.097380787678311e-06, "loss": 0.7908, "step": 11018 }, { "epoch": 1.8773817433611248, "grad_norm": 1.7734375, "learning_rate": 6.095718126250769e-06, "loss": 0.8402, "step": 11019 }, { "epoch": 1.8775532074501147, "grad_norm": 1.921875, "learning_rate": 6.094055592156557e-06, "loss": 0.8721, "step": 11020 }, { "epoch": 1.8777246715391045, "grad_norm": 1.71875, "learning_rate": 6.092393185449901e-06, "loss": 0.8221, "step": 11021 }, { "epoch": 1.8778961356280943, "grad_norm": 1.6484375, "learning_rate": 6.090730906185016e-06, "loss": 0.9318, "step": 11022 }, { "epoch": 1.8780675997170841, "grad_norm": 1.734375, "learning_rate": 6.089068754416118e-06, "loss": 0.8173, "step": 11023 }, { "epoch": 1.878239063806074, "grad_norm": 1.796875, "learning_rate": 6.087406730197414e-06, "loss": 0.8182, "step": 11024 }, { "epoch": 1.878410527895064, "grad_norm": 1.65625, "learning_rate": 6.085744833583111e-06, "loss": 0.9004, "step": 11025 }, { "epoch": 1.8785819919840538, "grad_norm": 1.7109375, "learning_rate": 6.08408306462741e-06, "loss": 0.897, "step": 11026 }, { "epoch": 1.8787534560730437, "grad_norm": 1.6796875, "learning_rate": 6.082421423384505e-06, "loss": 0.8675, "step": 11027 }, { "epoch": 1.8789249201620337, "grad_norm": 1.6171875, "learning_rate": 6.0807599099085915e-06, "loss": 0.7529, "step": 11028 }, { "epoch": 1.8790963842510235, "grad_norm": 1.671875, "learning_rate": 6.079098524253853e-06, "loss": 0.8412, "step": 11029 }, { "epoch": 1.8792678483400134, "grad_norm": 1.6015625, "learning_rate": 6.077437266474478e-06, "loss": 0.85, "step": 11030 }, { "epoch": 1.8794393124290032, "grad_norm": 1.796875, "learning_rate": 6.075776136624649e-06, "loss": 0.9143, "step": 11031 }, { "epoch": 1.879610776517993, "grad_norm": 1.6953125, "learning_rate": 6.074115134758532e-06, "loss": 0.8095, "step": 11032 }, { "epoch": 1.8797822406069828, "grad_norm": 1.59375, "learning_rate": 6.0724542609303035e-06, "loss": 0.7707, "step": 11033 }, { "epoch": 1.8799537046959727, "grad_norm": 1.703125, "learning_rate": 6.070793515194133e-06, "loss": 0.8904, "step": 11034 }, { "epoch": 1.8801251687849625, "grad_norm": 1.6640625, "learning_rate": 6.069132897604182e-06, "loss": 0.8905, "step": 11035 }, { "epoch": 1.8802966328739523, "grad_norm": 1.7421875, "learning_rate": 6.06747240821461e-06, "loss": 0.8963, "step": 11036 }, { "epoch": 1.8804680969629424, "grad_norm": 1.65625, "learning_rate": 6.065812047079569e-06, "loss": 0.9203, "step": 11037 }, { "epoch": 1.8806395610519322, "grad_norm": 1.7421875, "learning_rate": 6.064151814253214e-06, "loss": 0.9206, "step": 11038 }, { "epoch": 1.880811025140922, "grad_norm": 1.7578125, "learning_rate": 6.062491709789688e-06, "loss": 0.8701, "step": 11039 }, { "epoch": 1.880982489229912, "grad_norm": 1.6796875, "learning_rate": 6.060831733743136e-06, "loss": 0.8292, "step": 11040 }, { "epoch": 1.881153953318902, "grad_norm": 1.7421875, "learning_rate": 6.059171886167694e-06, "loss": 0.8637, "step": 11041 }, { "epoch": 1.8813254174078917, "grad_norm": 1.7734375, "learning_rate": 6.0575121671174985e-06, "loss": 0.9126, "step": 11042 }, { "epoch": 1.8814968814968815, "grad_norm": 1.6875, "learning_rate": 6.055852576646677e-06, "loss": 0.8704, "step": 11043 }, { "epoch": 1.8816683455858714, "grad_norm": 1.703125, "learning_rate": 6.0541931148093525e-06, "loss": 0.9013, "step": 11044 }, { "epoch": 1.8818398096748612, "grad_norm": 1.6796875, "learning_rate": 6.052533781659651e-06, "loss": 0.8459, "step": 11045 }, { "epoch": 1.882011273763851, "grad_norm": 1.8046875, "learning_rate": 6.050874577251686e-06, "loss": 0.9123, "step": 11046 }, { "epoch": 1.8821827378528408, "grad_norm": 1.65625, "learning_rate": 6.049215501639574e-06, "loss": 0.8889, "step": 11047 }, { "epoch": 1.8823542019418307, "grad_norm": 1.6328125, "learning_rate": 6.04755655487742e-06, "loss": 0.7668, "step": 11048 }, { "epoch": 1.8825256660308207, "grad_norm": 1.6640625, "learning_rate": 6.0458977370193316e-06, "loss": 0.809, "step": 11049 }, { "epoch": 1.8826971301198105, "grad_norm": 1.6953125, "learning_rate": 6.044239048119407e-06, "loss": 0.8047, "step": 11050 }, { "epoch": 1.8828685942088004, "grad_norm": 1.703125, "learning_rate": 6.042580488231744e-06, "loss": 0.7854, "step": 11051 }, { "epoch": 1.8830400582977904, "grad_norm": 1.7578125, "learning_rate": 6.040922057410432e-06, "loss": 0.9012, "step": 11052 }, { "epoch": 1.8832115223867802, "grad_norm": 1.625, "learning_rate": 6.039263755709561e-06, "loss": 0.8462, "step": 11053 }, { "epoch": 1.88338298647577, "grad_norm": 1.6953125, "learning_rate": 6.037605583183217e-06, "loss": 0.8311, "step": 11054 }, { "epoch": 1.88355445056476, "grad_norm": 1.734375, "learning_rate": 6.035947539885472e-06, "loss": 0.9026, "step": 11055 }, { "epoch": 1.8837259146537497, "grad_norm": 1.890625, "learning_rate": 6.034289625870405e-06, "loss": 0.8557, "step": 11056 }, { "epoch": 1.8838973787427395, "grad_norm": 1.7734375, "learning_rate": 6.032631841192088e-06, "loss": 0.8124, "step": 11057 }, { "epoch": 1.8840688428317294, "grad_norm": 1.59375, "learning_rate": 6.030974185904586e-06, "loss": 0.8698, "step": 11058 }, { "epoch": 1.8842403069207192, "grad_norm": 1.6953125, "learning_rate": 6.029316660061961e-06, "loss": 0.9407, "step": 11059 }, { "epoch": 1.884411771009709, "grad_norm": 1.609375, "learning_rate": 6.027659263718273e-06, "loss": 0.7825, "step": 11060 }, { "epoch": 1.8845832350986989, "grad_norm": 1.7109375, "learning_rate": 6.026001996927574e-06, "loss": 0.843, "step": 11061 }, { "epoch": 1.884754699187689, "grad_norm": 1.703125, "learning_rate": 6.0243448597439154e-06, "loss": 0.9317, "step": 11062 }, { "epoch": 1.8849261632766787, "grad_norm": 1.609375, "learning_rate": 6.0226878522213385e-06, "loss": 0.8107, "step": 11063 }, { "epoch": 1.8850976273656685, "grad_norm": 1.6875, "learning_rate": 6.02103097441389e-06, "loss": 0.7993, "step": 11064 }, { "epoch": 1.8852690914546586, "grad_norm": 1.71875, "learning_rate": 6.0193742263756115e-06, "loss": 0.8842, "step": 11065 }, { "epoch": 1.8854405555436484, "grad_norm": 1.75, "learning_rate": 6.017717608160522e-06, "loss": 0.8449, "step": 11066 }, { "epoch": 1.8856120196326382, "grad_norm": 1.78125, "learning_rate": 6.0160611198226595e-06, "loss": 0.8286, "step": 11067 }, { "epoch": 1.885783483721628, "grad_norm": 1.671875, "learning_rate": 6.014404761416044e-06, "loss": 0.8558, "step": 11068 }, { "epoch": 1.885954947810618, "grad_norm": 1.6484375, "learning_rate": 6.012748532994699e-06, "loss": 0.7994, "step": 11069 }, { "epoch": 1.8861264118996077, "grad_norm": 1.6796875, "learning_rate": 6.011092434612639e-06, "loss": 0.8989, "step": 11070 }, { "epoch": 1.8862978759885975, "grad_norm": 1.6796875, "learning_rate": 6.009436466323873e-06, "loss": 0.8535, "step": 11071 }, { "epoch": 1.8864693400775874, "grad_norm": 1.6328125, "learning_rate": 6.007780628182413e-06, "loss": 0.846, "step": 11072 }, { "epoch": 1.8866408041665772, "grad_norm": 1.703125, "learning_rate": 6.00612492024226e-06, "loss": 0.8123, "step": 11073 }, { "epoch": 1.8868122682555672, "grad_norm": 1.6484375, "learning_rate": 6.004469342557413e-06, "loss": 0.7837, "step": 11074 }, { "epoch": 1.886983732344557, "grad_norm": 1.6015625, "learning_rate": 6.002813895181865e-06, "loss": 0.83, "step": 11075 }, { "epoch": 1.887155196433547, "grad_norm": 1.6953125, "learning_rate": 6.00115857816961e-06, "loss": 0.8554, "step": 11076 }, { "epoch": 1.887326660522537, "grad_norm": 1.7421875, "learning_rate": 5.999503391574635e-06, "loss": 0.8573, "step": 11077 }, { "epoch": 1.8874981246115268, "grad_norm": 1.6953125, "learning_rate": 5.9978483354509155e-06, "loss": 0.7935, "step": 11078 }, { "epoch": 1.8876695887005166, "grad_norm": 1.765625, "learning_rate": 5.99619340985243e-06, "loss": 0.8641, "step": 11079 }, { "epoch": 1.8878410527895064, "grad_norm": 1.6171875, "learning_rate": 5.9945386148331565e-06, "loss": 0.8151, "step": 11080 }, { "epoch": 1.8880125168784962, "grad_norm": 1.625, "learning_rate": 5.992883950447062e-06, "loss": 0.8537, "step": 11081 }, { "epoch": 1.888183980967486, "grad_norm": 1.703125, "learning_rate": 5.99122941674811e-06, "loss": 0.8656, "step": 11082 }, { "epoch": 1.888355445056476, "grad_norm": 1.6875, "learning_rate": 5.989575013790264e-06, "loss": 0.8776, "step": 11083 }, { "epoch": 1.8885269091454657, "grad_norm": 1.8125, "learning_rate": 5.987920741627479e-06, "loss": 0.8657, "step": 11084 }, { "epoch": 1.8886983732344556, "grad_norm": 1.6484375, "learning_rate": 5.986266600313706e-06, "loss": 0.9305, "step": 11085 }, { "epoch": 1.8888698373234456, "grad_norm": 1.7890625, "learning_rate": 5.984612589902893e-06, "loss": 0.9352, "step": 11086 }, { "epoch": 1.8890413014124354, "grad_norm": 1.6328125, "learning_rate": 5.982958710448984e-06, "loss": 0.8567, "step": 11087 }, { "epoch": 1.8892127655014253, "grad_norm": 1.71875, "learning_rate": 5.9813049620059206e-06, "loss": 0.8659, "step": 11088 }, { "epoch": 1.8893842295904153, "grad_norm": 1.734375, "learning_rate": 5.979651344627633e-06, "loss": 0.8445, "step": 11089 }, { "epoch": 1.8895556936794051, "grad_norm": 1.84375, "learning_rate": 5.977997858368055e-06, "loss": 0.7571, "step": 11090 }, { "epoch": 1.889727157768395, "grad_norm": 1.6953125, "learning_rate": 5.976344503281113e-06, "loss": 0.7836, "step": 11091 }, { "epoch": 1.8898986218573848, "grad_norm": 1.5703125, "learning_rate": 5.974691279420727e-06, "loss": 0.8175, "step": 11092 }, { "epoch": 1.8900700859463746, "grad_norm": 1.671875, "learning_rate": 5.973038186840816e-06, "loss": 0.8845, "step": 11093 }, { "epoch": 1.8902415500353644, "grad_norm": 1.7734375, "learning_rate": 5.971385225595294e-06, "loss": 0.8002, "step": 11094 }, { "epoch": 1.8904130141243543, "grad_norm": 1.671875, "learning_rate": 5.969732395738071e-06, "loss": 0.8699, "step": 11095 }, { "epoch": 1.890584478213344, "grad_norm": 1.7109375, "learning_rate": 5.968079697323052e-06, "loss": 0.7574, "step": 11096 }, { "epoch": 1.890755942302334, "grad_norm": 1.65625, "learning_rate": 5.966427130404136e-06, "loss": 0.8268, "step": 11097 }, { "epoch": 1.890927406391324, "grad_norm": 1.7109375, "learning_rate": 5.964774695035219e-06, "loss": 0.8457, "step": 11098 }, { "epoch": 1.8910988704803138, "grad_norm": 1.96875, "learning_rate": 5.963122391270195e-06, "loss": 0.9298, "step": 11099 }, { "epoch": 1.8912703345693036, "grad_norm": 1.7265625, "learning_rate": 5.961470219162955e-06, "loss": 0.8533, "step": 11100 }, { "epoch": 1.8914417986582936, "grad_norm": 1.765625, "learning_rate": 5.959818178767376e-06, "loss": 0.9166, "step": 11101 }, { "epoch": 1.8916132627472835, "grad_norm": 1.6875, "learning_rate": 5.9581662701373386e-06, "loss": 0.8582, "step": 11102 }, { "epoch": 1.8917847268362733, "grad_norm": 1.78125, "learning_rate": 5.9565144933267205e-06, "loss": 0.7822, "step": 11103 }, { "epoch": 1.8919561909252631, "grad_norm": 1.7421875, "learning_rate": 5.9548628483893915e-06, "loss": 0.9107, "step": 11104 }, { "epoch": 1.892127655014253, "grad_norm": 1.796875, "learning_rate": 5.953211335379217e-06, "loss": 0.9657, "step": 11105 }, { "epoch": 1.8922991191032428, "grad_norm": 1.6796875, "learning_rate": 5.951559954350059e-06, "loss": 0.9031, "step": 11106 }, { "epoch": 1.8924705831922326, "grad_norm": 1.6171875, "learning_rate": 5.949908705355778e-06, "loss": 0.7995, "step": 11107 }, { "epoch": 1.8926420472812224, "grad_norm": 1.7578125, "learning_rate": 5.948257588450224e-06, "loss": 0.8743, "step": 11108 }, { "epoch": 1.8928135113702123, "grad_norm": 1.671875, "learning_rate": 5.946606603687246e-06, "loss": 0.8933, "step": 11109 }, { "epoch": 1.8929849754592023, "grad_norm": 1.7109375, "learning_rate": 5.944955751120691e-06, "loss": 0.8185, "step": 11110 }, { "epoch": 1.8931564395481921, "grad_norm": 1.6875, "learning_rate": 5.943305030804403e-06, "loss": 0.8492, "step": 11111 }, { "epoch": 1.893327903637182, "grad_norm": 1.7109375, "learning_rate": 5.94165444279221e-06, "loss": 0.8794, "step": 11112 }, { "epoch": 1.893499367726172, "grad_norm": 1.6796875, "learning_rate": 5.94000398713795e-06, "loss": 0.8339, "step": 11113 }, { "epoch": 1.8936708318151618, "grad_norm": 1.625, "learning_rate": 5.938353663895447e-06, "loss": 0.8516, "step": 11114 }, { "epoch": 1.8938422959041517, "grad_norm": 1.65625, "learning_rate": 5.936703473118526e-06, "loss": 0.8399, "step": 11115 }, { "epoch": 1.8940137599931415, "grad_norm": 1.6484375, "learning_rate": 5.935053414861005e-06, "loss": 0.7824, "step": 11116 }, { "epoch": 1.8941852240821313, "grad_norm": 1.6875, "learning_rate": 5.933403489176701e-06, "loss": 0.8342, "step": 11117 }, { "epoch": 1.8943566881711211, "grad_norm": 1.640625, "learning_rate": 5.931753696119419e-06, "loss": 0.8584, "step": 11118 }, { "epoch": 1.894528152260111, "grad_norm": 1.7109375, "learning_rate": 5.930104035742972e-06, "loss": 0.8126, "step": 11119 }, { "epoch": 1.8946996163491008, "grad_norm": 1.6796875, "learning_rate": 5.928454508101156e-06, "loss": 0.8786, "step": 11120 }, { "epoch": 1.8948710804380906, "grad_norm": 1.671875, "learning_rate": 5.926805113247772e-06, "loss": 0.8117, "step": 11121 }, { "epoch": 1.8950425445270807, "grad_norm": 1.71875, "learning_rate": 5.9251558512366115e-06, "loss": 0.8673, "step": 11122 }, { "epoch": 1.8952140086160705, "grad_norm": 1.6328125, "learning_rate": 5.923506722121467e-06, "loss": 0.898, "step": 11123 }, { "epoch": 1.8953854727050603, "grad_norm": 1.609375, "learning_rate": 5.9218577259561146e-06, "loss": 0.7834, "step": 11124 }, { "epoch": 1.8955569367940504, "grad_norm": 1.7265625, "learning_rate": 5.920208862794339e-06, "loss": 0.8138, "step": 11125 }, { "epoch": 1.8957284008830402, "grad_norm": 1.703125, "learning_rate": 5.9185601326899145e-06, "loss": 0.8742, "step": 11126 }, { "epoch": 1.89589986497203, "grad_norm": 1.7734375, "learning_rate": 5.9169115356966135e-06, "loss": 0.8483, "step": 11127 }, { "epoch": 1.8960713290610198, "grad_norm": 1.7265625, "learning_rate": 5.915263071868203e-06, "loss": 0.8284, "step": 11128 }, { "epoch": 1.8962427931500097, "grad_norm": 1.6640625, "learning_rate": 5.913614741258446e-06, "loss": 0.8855, "step": 11129 }, { "epoch": 1.8964142572389995, "grad_norm": 1.7734375, "learning_rate": 5.911966543921101e-06, "loss": 0.8719, "step": 11130 }, { "epoch": 1.8965857213279893, "grad_norm": 1.671875, "learning_rate": 5.91031847990992e-06, "loss": 0.8622, "step": 11131 }, { "epoch": 1.8967571854169791, "grad_norm": 1.6875, "learning_rate": 5.908670549278655e-06, "loss": 0.832, "step": 11132 }, { "epoch": 1.896928649505969, "grad_norm": 1.703125, "learning_rate": 5.907022752081047e-06, "loss": 0.8111, "step": 11133 }, { "epoch": 1.897100113594959, "grad_norm": 1.625, "learning_rate": 5.905375088370842e-06, "loss": 0.8047, "step": 11134 }, { "epoch": 1.8972715776839488, "grad_norm": 1.7421875, "learning_rate": 5.903727558201776e-06, "loss": 0.9245, "step": 11135 }, { "epoch": 1.8974430417729387, "grad_norm": 1.703125, "learning_rate": 5.902080161627577e-06, "loss": 0.9039, "step": 11136 }, { "epoch": 1.8976145058619287, "grad_norm": 1.6796875, "learning_rate": 5.900432898701977e-06, "loss": 0.7891, "step": 11137 }, { "epoch": 1.8977859699509185, "grad_norm": 1.6796875, "learning_rate": 5.898785769478695e-06, "loss": 0.8917, "step": 11138 }, { "epoch": 1.8979574340399084, "grad_norm": 1.7109375, "learning_rate": 5.897138774011455e-06, "loss": 0.8707, "step": 11139 }, { "epoch": 1.8981288981288982, "grad_norm": 1.7109375, "learning_rate": 5.8954919123539675e-06, "loss": 0.8902, "step": 11140 }, { "epoch": 1.898300362217888, "grad_norm": 1.6796875, "learning_rate": 5.893845184559948e-06, "loss": 0.8544, "step": 11141 }, { "epoch": 1.8984718263068778, "grad_norm": 1.6328125, "learning_rate": 5.892198590683096e-06, "loss": 0.811, "step": 11142 }, { "epoch": 1.8986432903958677, "grad_norm": 1.6328125, "learning_rate": 5.890552130777119e-06, "loss": 0.8141, "step": 11143 }, { "epoch": 1.8988147544848575, "grad_norm": 1.65625, "learning_rate": 5.88890580489571e-06, "loss": 0.6992, "step": 11144 }, { "epoch": 1.8989862185738473, "grad_norm": 1.6875, "learning_rate": 5.887259613092564e-06, "loss": 0.7467, "step": 11145 }, { "epoch": 1.8991576826628374, "grad_norm": 1.671875, "learning_rate": 5.885613555421372e-06, "loss": 0.8146, "step": 11146 }, { "epoch": 1.8993291467518272, "grad_norm": 1.734375, "learning_rate": 5.883967631935813e-06, "loss": 0.7893, "step": 11147 }, { "epoch": 1.899500610840817, "grad_norm": 1.859375, "learning_rate": 5.882321842689569e-06, "loss": 0.8589, "step": 11148 }, { "epoch": 1.899672074929807, "grad_norm": 1.6171875, "learning_rate": 5.880676187736316e-06, "loss": 0.7366, "step": 11149 }, { "epoch": 1.8998435390187969, "grad_norm": 1.6875, "learning_rate": 5.8790306671297234e-06, "loss": 0.8711, "step": 11150 }, { "epoch": 1.9000150031077867, "grad_norm": 1.6484375, "learning_rate": 5.87738528092346e-06, "loss": 0.8846, "step": 11151 }, { "epoch": 1.9001864671967765, "grad_norm": 1.65625, "learning_rate": 5.875740029171185e-06, "loss": 0.8274, "step": 11152 }, { "epoch": 1.9003579312857664, "grad_norm": 1.78125, "learning_rate": 5.87409491192656e-06, "loss": 0.7669, "step": 11153 }, { "epoch": 1.9005293953747562, "grad_norm": 1.6328125, "learning_rate": 5.872449929243236e-06, "loss": 0.8362, "step": 11154 }, { "epoch": 1.900700859463746, "grad_norm": 1.7265625, "learning_rate": 5.870805081174862e-06, "loss": 0.8204, "step": 11155 }, { "epoch": 1.9008723235527358, "grad_norm": 1.7109375, "learning_rate": 5.869160367775084e-06, "loss": 0.7579, "step": 11156 }, { "epoch": 1.9010437876417257, "grad_norm": 1.7578125, "learning_rate": 5.86751578909754e-06, "loss": 0.8474, "step": 11157 }, { "epoch": 1.9012152517307155, "grad_norm": 1.6875, "learning_rate": 5.865871345195875e-06, "loss": 0.8747, "step": 11158 }, { "epoch": 1.9013867158197055, "grad_norm": 1.640625, "learning_rate": 5.8642270361237066e-06, "loss": 0.8889, "step": 11159 }, { "epoch": 1.9015581799086954, "grad_norm": 1.71875, "learning_rate": 5.8625828619346695e-06, "loss": 0.8505, "step": 11160 }, { "epoch": 1.9017296439976852, "grad_norm": 1.7578125, "learning_rate": 5.860938822682385e-06, "loss": 0.9267, "step": 11161 }, { "epoch": 1.9019011080866752, "grad_norm": 1.78125, "learning_rate": 5.859294918420473e-06, "loss": 0.8676, "step": 11162 }, { "epoch": 1.902072572175665, "grad_norm": 1.6015625, "learning_rate": 5.857651149202545e-06, "loss": 0.8717, "step": 11163 }, { "epoch": 1.9022440362646549, "grad_norm": 1.6796875, "learning_rate": 5.856007515082212e-06, "loss": 0.8625, "step": 11164 }, { "epoch": 1.9024155003536447, "grad_norm": 1.640625, "learning_rate": 5.8543640161130765e-06, "loss": 0.7168, "step": 11165 }, { "epoch": 1.9025869644426345, "grad_norm": 1.671875, "learning_rate": 5.852720652348744e-06, "loss": 0.826, "step": 11166 }, { "epoch": 1.9027584285316244, "grad_norm": 1.6953125, "learning_rate": 5.851077423842807e-06, "loss": 0.81, "step": 11167 }, { "epoch": 1.9029298926206142, "grad_norm": 1.671875, "learning_rate": 5.8494343306488595e-06, "loss": 0.9127, "step": 11168 }, { "epoch": 1.903101356709604, "grad_norm": 1.7734375, "learning_rate": 5.847791372820493e-06, "loss": 0.9623, "step": 11169 }, { "epoch": 1.9032728207985938, "grad_norm": 1.6171875, "learning_rate": 5.846148550411279e-06, "loss": 0.8097, "step": 11170 }, { "epoch": 1.9034442848875839, "grad_norm": 1.8125, "learning_rate": 5.8445058634748055e-06, "loss": 0.8239, "step": 11171 }, { "epoch": 1.9036157489765737, "grad_norm": 1.578125, "learning_rate": 5.842863312064642e-06, "loss": 0.7607, "step": 11172 }, { "epoch": 1.9037872130655635, "grad_norm": 1.7734375, "learning_rate": 5.841220896234358e-06, "loss": 0.8831, "step": 11173 }, { "epoch": 1.9039586771545536, "grad_norm": 1.6875, "learning_rate": 5.839578616037525e-06, "loss": 0.8309, "step": 11174 }, { "epoch": 1.9041301412435434, "grad_norm": 1.734375, "learning_rate": 5.837936471527701e-06, "loss": 0.8537, "step": 11175 }, { "epoch": 1.9043016053325332, "grad_norm": 1.6484375, "learning_rate": 5.836294462758441e-06, "loss": 0.7846, "step": 11176 }, { "epoch": 1.904473069421523, "grad_norm": 1.71875, "learning_rate": 5.834652589783295e-06, "loss": 0.8714, "step": 11177 }, { "epoch": 1.9046445335105129, "grad_norm": 1.828125, "learning_rate": 5.833010852655815e-06, "loss": 0.9001, "step": 11178 }, { "epoch": 1.9048159975995027, "grad_norm": 1.65625, "learning_rate": 5.8313692514295416e-06, "loss": 0.8025, "step": 11179 }, { "epoch": 1.9049874616884925, "grad_norm": 1.6875, "learning_rate": 5.829727786158011e-06, "loss": 0.88, "step": 11180 }, { "epoch": 1.9051589257774824, "grad_norm": 1.6328125, "learning_rate": 5.828086456894769e-06, "loss": 0.8556, "step": 11181 }, { "epoch": 1.9053303898664722, "grad_norm": 1.734375, "learning_rate": 5.826445263693335e-06, "loss": 0.8384, "step": 11182 }, { "epoch": 1.9055018539554622, "grad_norm": 1.765625, "learning_rate": 5.824804206607235e-06, "loss": 0.8997, "step": 11183 }, { "epoch": 1.905673318044452, "grad_norm": 1.7578125, "learning_rate": 5.823163285689992e-06, "loss": 0.8814, "step": 11184 }, { "epoch": 1.9058447821334419, "grad_norm": 1.65625, "learning_rate": 5.821522500995125e-06, "loss": 0.8122, "step": 11185 }, { "epoch": 1.906016246222432, "grad_norm": 1.6171875, "learning_rate": 5.819881852576141e-06, "loss": 0.7983, "step": 11186 }, { "epoch": 1.9061877103114218, "grad_norm": 1.765625, "learning_rate": 5.818241340486554e-06, "loss": 0.8247, "step": 11187 }, { "epoch": 1.9063591744004116, "grad_norm": 1.5625, "learning_rate": 5.8166009647798616e-06, "loss": 0.7229, "step": 11188 }, { "epoch": 1.9065306384894014, "grad_norm": 1.671875, "learning_rate": 5.8149607255095665e-06, "loss": 0.7919, "step": 11189 }, { "epoch": 1.9067021025783912, "grad_norm": 1.6796875, "learning_rate": 5.813320622729159e-06, "loss": 0.8322, "step": 11190 }, { "epoch": 1.906873566667381, "grad_norm": 1.703125, "learning_rate": 5.811680656492134e-06, "loss": 0.8727, "step": 11191 }, { "epoch": 1.9070450307563709, "grad_norm": 1.6796875, "learning_rate": 5.810040826851978e-06, "loss": 0.793, "step": 11192 }, { "epoch": 1.9072164948453607, "grad_norm": 1.703125, "learning_rate": 5.808401133862165e-06, "loss": 0.7886, "step": 11193 }, { "epoch": 1.9073879589343505, "grad_norm": 1.7265625, "learning_rate": 5.8067615775761746e-06, "loss": 0.8129, "step": 11194 }, { "epoch": 1.9075594230233406, "grad_norm": 1.7890625, "learning_rate": 5.8051221580474786e-06, "loss": 0.9707, "step": 11195 }, { "epoch": 1.9077308871123304, "grad_norm": 1.6328125, "learning_rate": 5.803482875329543e-06, "loss": 0.7983, "step": 11196 }, { "epoch": 1.9079023512013202, "grad_norm": 1.6875, "learning_rate": 5.801843729475836e-06, "loss": 0.7935, "step": 11197 }, { "epoch": 1.9080738152903103, "grad_norm": 1.6171875, "learning_rate": 5.800204720539813e-06, "loss": 0.8917, "step": 11198 }, { "epoch": 1.9082452793793, "grad_norm": 1.6171875, "learning_rate": 5.798565848574931e-06, "loss": 0.799, "step": 11199 }, { "epoch": 1.90841674346829, "grad_norm": 1.7734375, "learning_rate": 5.796927113634637e-06, "loss": 0.8679, "step": 11200 }, { "epoch": 1.90841674346829, "eval_loss": 0.8319188952445984, "eval_runtime": 835.9538, "eval_samples_per_second": 2.989, "eval_steps_per_second": 2.989, "step": 11200 }, { "epoch": 1.9085882075572798, "grad_norm": 1.75, "learning_rate": 5.795288515772377e-06, "loss": 0.8503, "step": 11201 }, { "epoch": 1.9087596716462696, "grad_norm": 1.71875, "learning_rate": 5.7936500550415934e-06, "loss": 0.8268, "step": 11202 }, { "epoch": 1.9089311357352594, "grad_norm": 1.9453125, "learning_rate": 5.792011731495719e-06, "loss": 0.9486, "step": 11203 }, { "epoch": 1.9091025998242492, "grad_norm": 1.6171875, "learning_rate": 5.7903735451881935e-06, "loss": 0.823, "step": 11204 }, { "epoch": 1.909274063913239, "grad_norm": 1.6953125, "learning_rate": 5.788735496172435e-06, "loss": 0.8247, "step": 11205 }, { "epoch": 1.9094455280022289, "grad_norm": 1.953125, "learning_rate": 5.7870975845018685e-06, "loss": 0.8613, "step": 11206 }, { "epoch": 1.909616992091219, "grad_norm": 1.6640625, "learning_rate": 5.785459810229914e-06, "loss": 0.8429, "step": 11207 }, { "epoch": 1.9097884561802088, "grad_norm": 1.7734375, "learning_rate": 5.783822173409988e-06, "loss": 0.8999, "step": 11208 }, { "epoch": 1.9099599202691986, "grad_norm": 1.734375, "learning_rate": 5.782184674095495e-06, "loss": 0.8341, "step": 11209 }, { "epoch": 1.9101313843581886, "grad_norm": 1.671875, "learning_rate": 5.780547312339844e-06, "loss": 0.8422, "step": 11210 }, { "epoch": 1.9103028484471785, "grad_norm": 1.75, "learning_rate": 5.778910088196435e-06, "loss": 0.8455, "step": 11211 }, { "epoch": 1.9104743125361683, "grad_norm": 1.703125, "learning_rate": 5.77727300171866e-06, "loss": 0.9009, "step": 11212 }, { "epoch": 1.910645776625158, "grad_norm": 1.6953125, "learning_rate": 5.775636052959916e-06, "loss": 0.9527, "step": 11213 }, { "epoch": 1.910817240714148, "grad_norm": 1.5859375, "learning_rate": 5.773999241973587e-06, "loss": 0.8251, "step": 11214 }, { "epoch": 1.9109887048031378, "grad_norm": 1.6953125, "learning_rate": 5.7723625688130565e-06, "loss": 0.7843, "step": 11215 }, { "epoch": 1.9111601688921276, "grad_norm": 1.6015625, "learning_rate": 5.770726033531704e-06, "loss": 0.8589, "step": 11216 }, { "epoch": 1.9113316329811174, "grad_norm": 1.703125, "learning_rate": 5.769089636182901e-06, "loss": 0.8252, "step": 11217 }, { "epoch": 1.9115030970701072, "grad_norm": 1.7265625, "learning_rate": 5.767453376820016e-06, "loss": 0.8801, "step": 11218 }, { "epoch": 1.9116745611590973, "grad_norm": 1.6640625, "learning_rate": 5.765817255496414e-06, "loss": 0.8598, "step": 11219 }, { "epoch": 1.911846025248087, "grad_norm": 1.609375, "learning_rate": 5.764181272265456e-06, "loss": 0.8349, "step": 11220 }, { "epoch": 1.912017489337077, "grad_norm": 1.5859375, "learning_rate": 5.762545427180499e-06, "loss": 0.8272, "step": 11221 }, { "epoch": 1.912188953426067, "grad_norm": 1.671875, "learning_rate": 5.760909720294892e-06, "loss": 0.8555, "step": 11222 }, { "epoch": 1.9123604175150568, "grad_norm": 1.59375, "learning_rate": 5.759274151661981e-06, "loss": 0.8261, "step": 11223 }, { "epoch": 1.9125318816040466, "grad_norm": 1.7265625, "learning_rate": 5.757638721335111e-06, "loss": 0.901, "step": 11224 }, { "epoch": 1.9127033456930365, "grad_norm": 1.7109375, "learning_rate": 5.756003429367615e-06, "loss": 0.8994, "step": 11225 }, { "epoch": 1.9128748097820263, "grad_norm": 1.71875, "learning_rate": 5.7543682758128295e-06, "loss": 0.8293, "step": 11226 }, { "epoch": 1.9130462738710161, "grad_norm": 1.6640625, "learning_rate": 5.752733260724086e-06, "loss": 0.8326, "step": 11227 }, { "epoch": 1.913217737960006, "grad_norm": 1.7109375, "learning_rate": 5.751098384154701e-06, "loss": 0.8523, "step": 11228 }, { "epoch": 1.9133892020489958, "grad_norm": 1.6953125, "learning_rate": 5.749463646157998e-06, "loss": 0.8387, "step": 11229 }, { "epoch": 1.9135606661379856, "grad_norm": 1.7109375, "learning_rate": 5.74782904678729e-06, "loss": 0.7988, "step": 11230 }, { "epoch": 1.9137321302269756, "grad_norm": 1.75, "learning_rate": 5.74619458609589e-06, "loss": 0.7845, "step": 11231 }, { "epoch": 1.9139035943159655, "grad_norm": 1.640625, "learning_rate": 5.744560264137102e-06, "loss": 0.9016, "step": 11232 }, { "epoch": 1.9140750584049553, "grad_norm": 1.7265625, "learning_rate": 5.7429260809642295e-06, "loss": 0.8691, "step": 11233 }, { "epoch": 1.9142465224939453, "grad_norm": 1.75, "learning_rate": 5.741292036630568e-06, "loss": 0.8374, "step": 11234 }, { "epoch": 1.9144179865829352, "grad_norm": 1.6328125, "learning_rate": 5.739658131189405e-06, "loss": 0.8578, "step": 11235 }, { "epoch": 1.914589450671925, "grad_norm": 1.7265625, "learning_rate": 5.738024364694039e-06, "loss": 0.9524, "step": 11236 }, { "epoch": 1.9147609147609148, "grad_norm": 1.6796875, "learning_rate": 5.736390737197745e-06, "loss": 0.8665, "step": 11237 }, { "epoch": 1.9149323788499046, "grad_norm": 1.7421875, "learning_rate": 5.734757248753811e-06, "loss": 0.8449, "step": 11238 }, { "epoch": 1.9151038429388945, "grad_norm": 1.765625, "learning_rate": 5.7331238994155e-06, "loss": 0.9447, "step": 11239 }, { "epoch": 1.9152753070278843, "grad_norm": 1.5859375, "learning_rate": 5.7314906892360855e-06, "loss": 0.8613, "step": 11240 }, { "epoch": 1.9154467711168741, "grad_norm": 1.7109375, "learning_rate": 5.729857618268832e-06, "loss": 0.8167, "step": 11241 }, { "epoch": 1.915618235205864, "grad_norm": 1.7265625, "learning_rate": 5.728224686567004e-06, "loss": 0.9106, "step": 11242 }, { "epoch": 1.915789699294854, "grad_norm": 1.796875, "learning_rate": 5.726591894183855e-06, "loss": 0.7939, "step": 11243 }, { "epoch": 1.9159611633838438, "grad_norm": 1.65625, "learning_rate": 5.724959241172634e-06, "loss": 0.7932, "step": 11244 }, { "epoch": 1.9161326274728336, "grad_norm": 1.7421875, "learning_rate": 5.723326727586593e-06, "loss": 0.8271, "step": 11245 }, { "epoch": 1.9163040915618237, "grad_norm": 1.828125, "learning_rate": 5.721694353478971e-06, "loss": 0.9315, "step": 11246 }, { "epoch": 1.9164755556508135, "grad_norm": 1.6796875, "learning_rate": 5.720062118903006e-06, "loss": 0.8342, "step": 11247 }, { "epoch": 1.9166470197398033, "grad_norm": 1.6484375, "learning_rate": 5.718430023911932e-06, "loss": 0.8443, "step": 11248 }, { "epoch": 1.9168184838287932, "grad_norm": 1.734375, "learning_rate": 5.7167980685589785e-06, "loss": 0.8486, "step": 11249 }, { "epoch": 1.916989947917783, "grad_norm": 1.6171875, "learning_rate": 5.715166252897373e-06, "loss": 0.8103, "step": 11250 }, { "epoch": 1.9171614120067728, "grad_norm": 1.71875, "learning_rate": 5.713534576980328e-06, "loss": 0.8686, "step": 11251 }, { "epoch": 1.9173328760957626, "grad_norm": 1.796875, "learning_rate": 5.711903040861055e-06, "loss": 0.8277, "step": 11252 }, { "epoch": 1.9175043401847525, "grad_norm": 1.6953125, "learning_rate": 5.7102716445927785e-06, "loss": 0.8404, "step": 11253 }, { "epoch": 1.9176758042737423, "grad_norm": 1.8671875, "learning_rate": 5.708640388228697e-06, "loss": 0.9247, "step": 11254 }, { "epoch": 1.9178472683627321, "grad_norm": 1.671875, "learning_rate": 5.707009271822011e-06, "loss": 0.7726, "step": 11255 }, { "epoch": 1.9180187324517222, "grad_norm": 1.703125, "learning_rate": 5.705378295425919e-06, "loss": 0.8043, "step": 11256 }, { "epoch": 1.918190196540712, "grad_norm": 1.6953125, "learning_rate": 5.703747459093611e-06, "loss": 0.924, "step": 11257 }, { "epoch": 1.9183616606297018, "grad_norm": 1.7109375, "learning_rate": 5.702116762878278e-06, "loss": 0.8636, "step": 11258 }, { "epoch": 1.9185331247186919, "grad_norm": 1.640625, "learning_rate": 5.700486206833103e-06, "loss": 0.8355, "step": 11259 }, { "epoch": 1.9187045888076817, "grad_norm": 1.6171875, "learning_rate": 5.698855791011262e-06, "loss": 0.817, "step": 11260 }, { "epoch": 1.9188760528966715, "grad_norm": 1.5859375, "learning_rate": 5.697225515465934e-06, "loss": 0.8022, "step": 11261 }, { "epoch": 1.9190475169856613, "grad_norm": 1.6953125, "learning_rate": 5.69559538025028e-06, "loss": 0.9162, "step": 11262 }, { "epoch": 1.9192189810746512, "grad_norm": 1.6875, "learning_rate": 5.693965385417471e-06, "loss": 0.8811, "step": 11263 }, { "epoch": 1.919390445163641, "grad_norm": 1.59375, "learning_rate": 5.692335531020665e-06, "loss": 0.7753, "step": 11264 }, { "epoch": 1.9195619092526308, "grad_norm": 1.7578125, "learning_rate": 5.69070581711302e-06, "loss": 0.8442, "step": 11265 }, { "epoch": 1.9197333733416206, "grad_norm": 1.7109375, "learning_rate": 5.689076243747684e-06, "loss": 0.8683, "step": 11266 }, { "epoch": 1.9199048374306105, "grad_norm": 1.6953125, "learning_rate": 5.687446810977806e-06, "loss": 0.8505, "step": 11267 }, { "epoch": 1.9200763015196005, "grad_norm": 1.6796875, "learning_rate": 5.6858175188565266e-06, "loss": 0.8469, "step": 11268 }, { "epoch": 1.9202477656085903, "grad_norm": 1.6953125, "learning_rate": 5.684188367436984e-06, "loss": 0.8282, "step": 11269 }, { "epoch": 1.9204192296975802, "grad_norm": 1.796875, "learning_rate": 5.68255935677231e-06, "loss": 0.8958, "step": 11270 }, { "epoch": 1.9205906937865702, "grad_norm": 1.609375, "learning_rate": 5.680930486915633e-06, "loss": 0.7198, "step": 11271 }, { "epoch": 1.92076215787556, "grad_norm": 1.6796875, "learning_rate": 5.679301757920078e-06, "loss": 0.8596, "step": 11272 }, { "epoch": 1.9209336219645499, "grad_norm": 1.6953125, "learning_rate": 5.677673169838762e-06, "loss": 0.8341, "step": 11273 }, { "epoch": 1.9211050860535397, "grad_norm": 1.71875, "learning_rate": 5.676044722724801e-06, "loss": 0.843, "step": 11274 }, { "epoch": 1.9212765501425295, "grad_norm": 1.7109375, "learning_rate": 5.674416416631304e-06, "loss": 0.8879, "step": 11275 }, { "epoch": 1.9214480142315193, "grad_norm": 1.6328125, "learning_rate": 5.6727882516113765e-06, "loss": 0.823, "step": 11276 }, { "epoch": 1.9216194783205092, "grad_norm": 1.7734375, "learning_rate": 5.671160227718118e-06, "loss": 0.9125, "step": 11277 }, { "epoch": 1.921790942409499, "grad_norm": 1.7109375, "learning_rate": 5.669532345004627e-06, "loss": 0.875, "step": 11278 }, { "epoch": 1.9219624064984888, "grad_norm": 1.71875, "learning_rate": 5.667904603523992e-06, "loss": 0.8969, "step": 11279 }, { "epoch": 1.9221338705874789, "grad_norm": 1.6953125, "learning_rate": 5.666277003329301e-06, "loss": 0.9196, "step": 11280 }, { "epoch": 1.9223053346764687, "grad_norm": 1.6640625, "learning_rate": 5.664649544473636e-06, "loss": 0.9005, "step": 11281 }, { "epoch": 1.9224767987654585, "grad_norm": 1.71875, "learning_rate": 5.6630222270100755e-06, "loss": 0.8406, "step": 11282 }, { "epoch": 1.9226482628544486, "grad_norm": 1.6171875, "learning_rate": 5.661395050991691e-06, "loss": 0.8144, "step": 11283 }, { "epoch": 1.9228197269434384, "grad_norm": 1.625, "learning_rate": 5.659768016471556e-06, "loss": 0.7749, "step": 11284 }, { "epoch": 1.9229911910324282, "grad_norm": 1.671875, "learning_rate": 5.658141123502726e-06, "loss": 0.8384, "step": 11285 }, { "epoch": 1.923162655121418, "grad_norm": 1.6328125, "learning_rate": 5.656514372138265e-06, "loss": 0.8448, "step": 11286 }, { "epoch": 1.9233341192104079, "grad_norm": 1.6328125, "learning_rate": 5.654887762431224e-06, "loss": 0.8417, "step": 11287 }, { "epoch": 1.9235055832993977, "grad_norm": 1.78125, "learning_rate": 5.653261294434655e-06, "loss": 0.8934, "step": 11288 }, { "epoch": 1.9236770473883875, "grad_norm": 1.65625, "learning_rate": 5.651634968201606e-06, "loss": 0.9038, "step": 11289 }, { "epoch": 1.9238485114773773, "grad_norm": 1.6328125, "learning_rate": 5.65000878378511e-06, "loss": 0.8409, "step": 11290 }, { "epoch": 1.9240199755663672, "grad_norm": 1.65625, "learning_rate": 5.648382741238212e-06, "loss": 0.8006, "step": 11291 }, { "epoch": 1.9241914396553572, "grad_norm": 1.7109375, "learning_rate": 5.64675684061394e-06, "loss": 0.8432, "step": 11292 }, { "epoch": 1.924362903744347, "grad_norm": 1.7109375, "learning_rate": 5.645131081965319e-06, "loss": 0.8412, "step": 11293 }, { "epoch": 1.9245343678333369, "grad_norm": 1.734375, "learning_rate": 5.6435054653453735e-06, "loss": 0.8535, "step": 11294 }, { "epoch": 1.924705831922327, "grad_norm": 1.7265625, "learning_rate": 5.64187999080712e-06, "loss": 0.8394, "step": 11295 }, { "epoch": 1.9248772960113167, "grad_norm": 1.6796875, "learning_rate": 5.6402546584035744e-06, "loss": 0.8817, "step": 11296 }, { "epoch": 1.9250487601003066, "grad_norm": 1.6640625, "learning_rate": 5.6386294681877375e-06, "loss": 0.8096, "step": 11297 }, { "epoch": 1.9252202241892964, "grad_norm": 1.7421875, "learning_rate": 5.637004420212617e-06, "loss": 0.8154, "step": 11298 }, { "epoch": 1.9253916882782862, "grad_norm": 1.5859375, "learning_rate": 5.635379514531212e-06, "loss": 0.7501, "step": 11299 }, { "epoch": 1.925563152367276, "grad_norm": 1.6875, "learning_rate": 5.633754751196516e-06, "loss": 0.8901, "step": 11300 }, { "epoch": 1.9257346164562659, "grad_norm": 1.6640625, "learning_rate": 5.632130130261522e-06, "loss": 0.8765, "step": 11301 }, { "epoch": 1.9259060805452557, "grad_norm": 1.59375, "learning_rate": 5.63050565177921e-06, "loss": 0.8111, "step": 11302 }, { "epoch": 1.9260775446342455, "grad_norm": 1.6953125, "learning_rate": 5.628881315802563e-06, "loss": 0.8233, "step": 11303 }, { "epoch": 1.9262490087232356, "grad_norm": 1.71875, "learning_rate": 5.627257122384558e-06, "loss": 0.8096, "step": 11304 }, { "epoch": 1.9264204728122254, "grad_norm": 1.71875, "learning_rate": 5.625633071578163e-06, "loss": 0.8586, "step": 11305 }, { "epoch": 1.9265919369012152, "grad_norm": 1.734375, "learning_rate": 5.624009163436345e-06, "loss": 0.8614, "step": 11306 }, { "epoch": 1.9267634009902053, "grad_norm": 1.6796875, "learning_rate": 5.622385398012067e-06, "loss": 0.8562, "step": 11307 }, { "epoch": 1.926934865079195, "grad_norm": 1.6484375, "learning_rate": 5.620761775358287e-06, "loss": 0.7839, "step": 11308 }, { "epoch": 1.927106329168185, "grad_norm": 1.625, "learning_rate": 5.619138295527955e-06, "loss": 0.8728, "step": 11309 }, { "epoch": 1.9272777932571747, "grad_norm": 1.6796875, "learning_rate": 5.617514958574021e-06, "loss": 0.84, "step": 11310 }, { "epoch": 1.9274492573461646, "grad_norm": 1.7109375, "learning_rate": 5.615891764549426e-06, "loss": 0.9072, "step": 11311 }, { "epoch": 1.9276207214351544, "grad_norm": 1.75, "learning_rate": 5.61426871350711e-06, "loss": 0.8774, "step": 11312 }, { "epoch": 1.9277921855241442, "grad_norm": 1.703125, "learning_rate": 5.612645805500008e-06, "loss": 0.8319, "step": 11313 }, { "epoch": 1.927963649613134, "grad_norm": 1.7109375, "learning_rate": 5.611023040581045e-06, "loss": 0.8352, "step": 11314 }, { "epoch": 1.9281351137021239, "grad_norm": 1.7109375, "learning_rate": 5.60940041880315e-06, "loss": 0.7795, "step": 11315 }, { "epoch": 1.928306577791114, "grad_norm": 1.6328125, "learning_rate": 5.607777940219239e-06, "loss": 0.8453, "step": 11316 }, { "epoch": 1.9284780418801037, "grad_norm": 1.765625, "learning_rate": 5.606155604882231e-06, "loss": 0.9062, "step": 11317 }, { "epoch": 1.9286495059690936, "grad_norm": 1.5859375, "learning_rate": 5.604533412845032e-06, "loss": 0.8051, "step": 11318 }, { "epoch": 1.9288209700580836, "grad_norm": 1.71875, "learning_rate": 5.602911364160557e-06, "loss": 0.848, "step": 11319 }, { "epoch": 1.9289924341470734, "grad_norm": 1.71875, "learning_rate": 5.601289458881693e-06, "loss": 0.888, "step": 11320 }, { "epoch": 1.9291638982360633, "grad_norm": 1.71875, "learning_rate": 5.599667697061346e-06, "loss": 0.7835, "step": 11321 }, { "epoch": 1.929335362325053, "grad_norm": 1.875, "learning_rate": 5.598046078752406e-06, "loss": 0.8079, "step": 11322 }, { "epoch": 1.929506826414043, "grad_norm": 1.7421875, "learning_rate": 5.596424604007758e-06, "loss": 0.7706, "step": 11323 }, { "epoch": 1.9296782905030327, "grad_norm": 1.6015625, "learning_rate": 5.594803272880287e-06, "loss": 0.8445, "step": 11324 }, { "epoch": 1.9298497545920226, "grad_norm": 1.6640625, "learning_rate": 5.59318208542287e-06, "loss": 0.7663, "step": 11325 }, { "epoch": 1.9300212186810124, "grad_norm": 1.734375, "learning_rate": 5.591561041688378e-06, "loss": 0.8155, "step": 11326 }, { "epoch": 1.9301926827700022, "grad_norm": 1.625, "learning_rate": 5.589940141729683e-06, "loss": 0.7547, "step": 11327 }, { "epoch": 1.9303641468589923, "grad_norm": 1.7578125, "learning_rate": 5.588319385599645e-06, "loss": 0.8048, "step": 11328 }, { "epoch": 1.930535610947982, "grad_norm": 1.7890625, "learning_rate": 5.586698773351122e-06, "loss": 0.8576, "step": 11329 }, { "epoch": 1.930707075036972, "grad_norm": 1.7109375, "learning_rate": 5.585078305036975e-06, "loss": 0.9041, "step": 11330 }, { "epoch": 1.930878539125962, "grad_norm": 1.765625, "learning_rate": 5.583457980710053e-06, "loss": 0.8994, "step": 11331 }, { "epoch": 1.9310500032149518, "grad_norm": 1.6953125, "learning_rate": 5.581837800423193e-06, "loss": 0.8983, "step": 11332 }, { "epoch": 1.9312214673039416, "grad_norm": 1.71875, "learning_rate": 5.580217764229241e-06, "loss": 0.8648, "step": 11333 }, { "epoch": 1.9313929313929314, "grad_norm": 1.6640625, "learning_rate": 5.578597872181031e-06, "loss": 0.7956, "step": 11334 }, { "epoch": 1.9315643954819213, "grad_norm": 1.8515625, "learning_rate": 5.5769781243313915e-06, "loss": 0.9276, "step": 11335 }, { "epoch": 1.931735859570911, "grad_norm": 1.6640625, "learning_rate": 5.575358520733153e-06, "loss": 0.7656, "step": 11336 }, { "epoch": 1.931907323659901, "grad_norm": 1.6328125, "learning_rate": 5.573739061439134e-06, "loss": 0.826, "step": 11337 }, { "epoch": 1.9320787877488907, "grad_norm": 1.640625, "learning_rate": 5.572119746502152e-06, "loss": 0.8464, "step": 11338 }, { "epoch": 1.9322502518378806, "grad_norm": 1.6796875, "learning_rate": 5.57050057597502e-06, "loss": 0.8301, "step": 11339 }, { "epoch": 1.9324217159268706, "grad_norm": 1.640625, "learning_rate": 5.568881549910543e-06, "loss": 0.9023, "step": 11340 }, { "epoch": 1.9325931800158604, "grad_norm": 1.5390625, "learning_rate": 5.567262668361525e-06, "loss": 0.7431, "step": 11341 }, { "epoch": 1.9327646441048503, "grad_norm": 1.609375, "learning_rate": 5.5656439313807675e-06, "loss": 0.884, "step": 11342 }, { "epoch": 1.9329361081938403, "grad_norm": 1.765625, "learning_rate": 5.564025339021055e-06, "loss": 0.7854, "step": 11343 }, { "epoch": 1.9331075722828301, "grad_norm": 1.734375, "learning_rate": 5.562406891335181e-06, "loss": 0.8516, "step": 11344 }, { "epoch": 1.93327903637182, "grad_norm": 1.7734375, "learning_rate": 5.560788588375925e-06, "loss": 0.8559, "step": 11345 }, { "epoch": 1.9334505004608098, "grad_norm": 1.59375, "learning_rate": 5.559170430196074e-06, "loss": 0.7799, "step": 11346 }, { "epoch": 1.9336219645497996, "grad_norm": 1.6484375, "learning_rate": 5.557552416848398e-06, "loss": 0.7802, "step": 11347 }, { "epoch": 1.9337934286387894, "grad_norm": 1.6953125, "learning_rate": 5.555934548385665e-06, "loss": 0.8102, "step": 11348 }, { "epoch": 1.9339648927277793, "grad_norm": 1.7578125, "learning_rate": 5.554316824860642e-06, "loss": 0.8421, "step": 11349 }, { "epoch": 1.934136356816769, "grad_norm": 1.6953125, "learning_rate": 5.5526992463260884e-06, "loss": 0.8531, "step": 11350 }, { "epoch": 1.934307820905759, "grad_norm": 1.6171875, "learning_rate": 5.55108181283476e-06, "loss": 0.7784, "step": 11351 }, { "epoch": 1.9344792849947487, "grad_norm": 1.515625, "learning_rate": 5.549464524439406e-06, "loss": 0.8066, "step": 11352 }, { "epoch": 1.9346507490837388, "grad_norm": 1.6640625, "learning_rate": 5.547847381192772e-06, "loss": 0.8316, "step": 11353 }, { "epoch": 1.9348222131727286, "grad_norm": 1.6484375, "learning_rate": 5.546230383147606e-06, "loss": 0.8106, "step": 11354 }, { "epoch": 1.9349936772617184, "grad_norm": 1.6640625, "learning_rate": 5.544613530356633e-06, "loss": 0.8452, "step": 11355 }, { "epoch": 1.9351651413507085, "grad_norm": 1.75, "learning_rate": 5.54299682287259e-06, "loss": 0.9008, "step": 11356 }, { "epoch": 1.9353366054396983, "grad_norm": 1.6953125, "learning_rate": 5.541380260748206e-06, "loss": 0.8208, "step": 11357 }, { "epoch": 1.9355080695286881, "grad_norm": 1.6875, "learning_rate": 5.539763844036201e-06, "loss": 0.8263, "step": 11358 }, { "epoch": 1.935679533617678, "grad_norm": 1.71875, "learning_rate": 5.538147572789292e-06, "loss": 0.8089, "step": 11359 }, { "epoch": 1.9358509977066678, "grad_norm": 1.6796875, "learning_rate": 5.5365314470601915e-06, "loss": 0.8441, "step": 11360 }, { "epoch": 1.9360224617956576, "grad_norm": 1.765625, "learning_rate": 5.534915466901609e-06, "loss": 0.7927, "step": 11361 }, { "epoch": 1.9361939258846474, "grad_norm": 1.625, "learning_rate": 5.533299632366248e-06, "loss": 0.8619, "step": 11362 }, { "epoch": 1.9363653899736373, "grad_norm": 1.671875, "learning_rate": 5.531683943506806e-06, "loss": 0.7727, "step": 11363 }, { "epoch": 1.936536854062627, "grad_norm": 1.734375, "learning_rate": 5.530068400375976e-06, "loss": 0.7626, "step": 11364 }, { "epoch": 1.9367083181516171, "grad_norm": 1.6484375, "learning_rate": 5.528453003026448e-06, "loss": 0.8035, "step": 11365 }, { "epoch": 1.936879782240607, "grad_norm": 1.71875, "learning_rate": 5.526837751510905e-06, "loss": 0.8574, "step": 11366 }, { "epoch": 1.9370512463295968, "grad_norm": 1.7734375, "learning_rate": 5.525222645882029e-06, "loss": 0.9693, "step": 11367 }, { "epoch": 1.9372227104185868, "grad_norm": 1.703125, "learning_rate": 5.523607686192492e-06, "loss": 0.8168, "step": 11368 }, { "epoch": 1.9373941745075767, "grad_norm": 1.6953125, "learning_rate": 5.521992872494965e-06, "loss": 0.8063, "step": 11369 }, { "epoch": 1.9375656385965665, "grad_norm": 1.828125, "learning_rate": 5.5203782048421115e-06, "loss": 0.8824, "step": 11370 }, { "epoch": 1.9377371026855563, "grad_norm": 1.546875, "learning_rate": 5.518763683286596e-06, "loss": 0.7815, "step": 11371 }, { "epoch": 1.9379085667745461, "grad_norm": 1.6796875, "learning_rate": 5.5171493078810704e-06, "loss": 0.8088, "step": 11372 }, { "epoch": 1.938080030863536, "grad_norm": 1.5859375, "learning_rate": 5.515535078678187e-06, "loss": 0.8024, "step": 11373 }, { "epoch": 1.9382514949525258, "grad_norm": 1.6484375, "learning_rate": 5.51392099573059e-06, "loss": 0.7431, "step": 11374 }, { "epoch": 1.9384229590415156, "grad_norm": 1.8671875, "learning_rate": 5.512307059090925e-06, "loss": 0.7968, "step": 11375 }, { "epoch": 1.9385944231305055, "grad_norm": 1.671875, "learning_rate": 5.5106932688118245e-06, "loss": 0.7958, "step": 11376 }, { "epoch": 1.9387658872194955, "grad_norm": 2.640625, "learning_rate": 5.509079624945926e-06, "loss": 0.8416, "step": 11377 }, { "epoch": 1.9389373513084853, "grad_norm": 1.6328125, "learning_rate": 5.507466127545847e-06, "loss": 0.8436, "step": 11378 }, { "epoch": 1.9391088153974751, "grad_norm": 1.84375, "learning_rate": 5.505852776664215e-06, "loss": 0.8292, "step": 11379 }, { "epoch": 1.9392802794864652, "grad_norm": 1.6875, "learning_rate": 5.504239572353649e-06, "loss": 0.8329, "step": 11380 }, { "epoch": 1.939451743575455, "grad_norm": 1.640625, "learning_rate": 5.5026265146667605e-06, "loss": 0.7997, "step": 11381 }, { "epoch": 1.9396232076644448, "grad_norm": 1.6328125, "learning_rate": 5.501013603656155e-06, "loss": 0.7976, "step": 11382 }, { "epoch": 1.9397946717534347, "grad_norm": 1.6796875, "learning_rate": 5.499400839374438e-06, "loss": 0.8052, "step": 11383 }, { "epoch": 1.9399661358424245, "grad_norm": 1.7578125, "learning_rate": 5.497788221874203e-06, "loss": 0.9537, "step": 11384 }, { "epoch": 1.9401375999314143, "grad_norm": 1.6484375, "learning_rate": 5.496175751208052e-06, "loss": 0.8053, "step": 11385 }, { "epoch": 1.9403090640204042, "grad_norm": 1.6171875, "learning_rate": 5.494563427428569e-06, "loss": 0.8405, "step": 11386 }, { "epoch": 1.940480528109394, "grad_norm": 1.671875, "learning_rate": 5.492951250588341e-06, "loss": 0.8469, "step": 11387 }, { "epoch": 1.9406519921983838, "grad_norm": 1.625, "learning_rate": 5.491339220739947e-06, "loss": 0.797, "step": 11388 }, { "epoch": 1.9408234562873738, "grad_norm": 1.6953125, "learning_rate": 5.489727337935955e-06, "loss": 0.8585, "step": 11389 }, { "epoch": 1.9409949203763637, "grad_norm": 1.7890625, "learning_rate": 5.488115602228938e-06, "loss": 0.9122, "step": 11390 }, { "epoch": 1.9411663844653535, "grad_norm": 1.625, "learning_rate": 5.486504013671463e-06, "loss": 0.792, "step": 11391 }, { "epoch": 1.9413378485543435, "grad_norm": 1.7265625, "learning_rate": 5.484892572316088e-06, "loss": 0.8458, "step": 11392 }, { "epoch": 1.9415093126433334, "grad_norm": 1.8203125, "learning_rate": 5.483281278215366e-06, "loss": 0.9417, "step": 11393 }, { "epoch": 1.9416807767323232, "grad_norm": 1.6484375, "learning_rate": 5.481670131421853e-06, "loss": 0.8389, "step": 11394 }, { "epoch": 1.941852240821313, "grad_norm": 1.640625, "learning_rate": 5.4800591319880905e-06, "loss": 0.8018, "step": 11395 }, { "epoch": 1.9420237049103029, "grad_norm": 1.671875, "learning_rate": 5.47844827996662e-06, "loss": 0.861, "step": 11396 }, { "epoch": 1.9421951689992927, "grad_norm": 1.671875, "learning_rate": 5.476837575409978e-06, "loss": 0.8843, "step": 11397 }, { "epoch": 1.9423666330882825, "grad_norm": 1.7265625, "learning_rate": 5.475227018370695e-06, "loss": 0.8738, "step": 11398 }, { "epoch": 1.9425380971772723, "grad_norm": 1.59375, "learning_rate": 5.473616608901298e-06, "loss": 0.8311, "step": 11399 }, { "epoch": 1.9427095612662622, "grad_norm": 1.71875, "learning_rate": 5.4720063470543115e-06, "loss": 0.8529, "step": 11400 }, { "epoch": 1.9428810253552522, "grad_norm": 1.71875, "learning_rate": 5.470396232882242e-06, "loss": 0.8726, "step": 11401 }, { "epoch": 1.943052489444242, "grad_norm": 1.6953125, "learning_rate": 5.468786266437611e-06, "loss": 0.8968, "step": 11402 }, { "epoch": 1.9432239535332319, "grad_norm": 1.640625, "learning_rate": 5.467176447772924e-06, "loss": 0.864, "step": 11403 }, { "epoch": 1.943395417622222, "grad_norm": 1.625, "learning_rate": 5.4655667769406805e-06, "loss": 0.9435, "step": 11404 }, { "epoch": 1.9435668817112117, "grad_norm": 1.6015625, "learning_rate": 5.46395725399338e-06, "loss": 0.7874, "step": 11405 }, { "epoch": 1.9437383458002016, "grad_norm": 1.6640625, "learning_rate": 5.462347878983516e-06, "loss": 0.8414, "step": 11406 }, { "epoch": 1.9439098098891914, "grad_norm": 1.6328125, "learning_rate": 5.460738651963573e-06, "loss": 0.7489, "step": 11407 }, { "epoch": 1.9440812739781812, "grad_norm": 1.6171875, "learning_rate": 5.459129572986037e-06, "loss": 0.7888, "step": 11408 }, { "epoch": 1.944252738067171, "grad_norm": 1.765625, "learning_rate": 5.457520642103385e-06, "loss": 0.8051, "step": 11409 }, { "epoch": 1.9444242021561609, "grad_norm": 1.8046875, "learning_rate": 5.4559118593680905e-06, "loss": 0.9306, "step": 11410 }, { "epoch": 1.9445956662451507, "grad_norm": 1.5703125, "learning_rate": 5.454303224832624e-06, "loss": 0.7945, "step": 11411 }, { "epoch": 1.9447671303341405, "grad_norm": 1.8125, "learning_rate": 5.4526947385494445e-06, "loss": 0.8615, "step": 11412 }, { "epoch": 1.9449385944231306, "grad_norm": 1.75, "learning_rate": 5.451086400571013e-06, "loss": 0.8689, "step": 11413 }, { "epoch": 1.9451100585121204, "grad_norm": 1.6953125, "learning_rate": 5.449478210949784e-06, "loss": 0.8464, "step": 11414 }, { "epoch": 1.9452815226011102, "grad_norm": 1.734375, "learning_rate": 5.447870169738205e-06, "loss": 0.8437, "step": 11415 }, { "epoch": 1.9454529866901002, "grad_norm": 1.6484375, "learning_rate": 5.446262276988722e-06, "loss": 0.8332, "step": 11416 }, { "epoch": 1.94562445077909, "grad_norm": 1.6171875, "learning_rate": 5.444654532753775e-06, "loss": 0.8617, "step": 11417 }, { "epoch": 1.94579591486808, "grad_norm": 1.625, "learning_rate": 5.443046937085795e-06, "loss": 0.8854, "step": 11418 }, { "epoch": 1.9459673789570697, "grad_norm": 1.6328125, "learning_rate": 5.441439490037217e-06, "loss": 0.8435, "step": 11419 }, { "epoch": 1.9461388430460596, "grad_norm": 1.7734375, "learning_rate": 5.439832191660462e-06, "loss": 0.8938, "step": 11420 }, { "epoch": 1.9463103071350494, "grad_norm": 1.7890625, "learning_rate": 5.438225042007949e-06, "loss": 0.9738, "step": 11421 }, { "epoch": 1.9464817712240392, "grad_norm": 1.734375, "learning_rate": 5.436618041132099e-06, "loss": 0.8825, "step": 11422 }, { "epoch": 1.946653235313029, "grad_norm": 1.703125, "learning_rate": 5.435011189085318e-06, "loss": 0.9542, "step": 11423 }, { "epoch": 1.9468246994020189, "grad_norm": 1.6953125, "learning_rate": 5.433404485920011e-06, "loss": 0.8039, "step": 11424 }, { "epoch": 1.946996163491009, "grad_norm": 1.7421875, "learning_rate": 5.431797931688581e-06, "loss": 0.7727, "step": 11425 }, { "epoch": 1.9471676275799987, "grad_norm": 1.7265625, "learning_rate": 5.430191526443423e-06, "loss": 0.8357, "step": 11426 }, { "epoch": 1.9473390916689886, "grad_norm": 1.6796875, "learning_rate": 5.428585270236925e-06, "loss": 0.8357, "step": 11427 }, { "epoch": 1.9475105557579786, "grad_norm": 1.7890625, "learning_rate": 5.426979163121479e-06, "loss": 0.8168, "step": 11428 }, { "epoch": 1.9476820198469684, "grad_norm": 1.6953125, "learning_rate": 5.425373205149461e-06, "loss": 0.8457, "step": 11429 }, { "epoch": 1.9478534839359583, "grad_norm": 1.734375, "learning_rate": 5.42376739637325e-06, "loss": 0.8166, "step": 11430 }, { "epoch": 1.948024948024948, "grad_norm": 1.6640625, "learning_rate": 5.4221617368452165e-06, "loss": 0.8057, "step": 11431 }, { "epoch": 1.948196412113938, "grad_norm": 1.7421875, "learning_rate": 5.420556226617727e-06, "loss": 0.7438, "step": 11432 }, { "epoch": 1.9483678762029277, "grad_norm": 1.6015625, "learning_rate": 5.4189508657431444e-06, "loss": 0.7822, "step": 11433 }, { "epoch": 1.9485393402919176, "grad_norm": 1.75, "learning_rate": 5.417345654273827e-06, "loss": 0.8917, "step": 11434 }, { "epoch": 1.9487108043809074, "grad_norm": 1.671875, "learning_rate": 5.415740592262122e-06, "loss": 0.7634, "step": 11435 }, { "epoch": 1.9488822684698972, "grad_norm": 1.65625, "learning_rate": 5.41413567976038e-06, "loss": 0.8146, "step": 11436 }, { "epoch": 1.9490537325588873, "grad_norm": 1.7421875, "learning_rate": 5.4125309168209394e-06, "loss": 0.9364, "step": 11437 }, { "epoch": 1.949225196647877, "grad_norm": 1.625, "learning_rate": 5.410926303496142e-06, "loss": 0.8589, "step": 11438 }, { "epoch": 1.949396660736867, "grad_norm": 1.671875, "learning_rate": 5.4093218398383154e-06, "loss": 0.8342, "step": 11439 }, { "epoch": 1.9495681248258567, "grad_norm": 1.8046875, "learning_rate": 5.407717525899793e-06, "loss": 0.8523, "step": 11440 }, { "epoch": 1.9497395889148468, "grad_norm": 1.6171875, "learning_rate": 5.4061133617328956e-06, "loss": 0.8945, "step": 11441 }, { "epoch": 1.9499110530038366, "grad_norm": 1.6953125, "learning_rate": 5.40450934738994e-06, "loss": 0.7959, "step": 11442 }, { "epoch": 1.9500825170928264, "grad_norm": 1.65625, "learning_rate": 5.402905482923239e-06, "loss": 0.9032, "step": 11443 }, { "epoch": 1.9502539811818163, "grad_norm": 1.6875, "learning_rate": 5.4013017683851034e-06, "loss": 0.8463, "step": 11444 }, { "epoch": 1.950425445270806, "grad_norm": 1.7734375, "learning_rate": 5.399698203827834e-06, "loss": 0.8543, "step": 11445 }, { "epoch": 1.950596909359796, "grad_norm": 1.7265625, "learning_rate": 5.398094789303731e-06, "loss": 0.9256, "step": 11446 }, { "epoch": 1.9507683734487857, "grad_norm": 1.6640625, "learning_rate": 5.396491524865084e-06, "loss": 0.7521, "step": 11447 }, { "epoch": 1.9509398375377756, "grad_norm": 1.75, "learning_rate": 5.394888410564184e-06, "loss": 0.8547, "step": 11448 }, { "epoch": 1.9511113016267654, "grad_norm": 1.609375, "learning_rate": 5.393285446453314e-06, "loss": 0.801, "step": 11449 }, { "epoch": 1.9512827657157554, "grad_norm": 1.7109375, "learning_rate": 5.391682632584752e-06, "loss": 0.8614, "step": 11450 }, { "epoch": 1.9514542298047453, "grad_norm": 1.6875, "learning_rate": 5.390079969010773e-06, "loss": 0.8522, "step": 11451 }, { "epoch": 1.951625693893735, "grad_norm": 1.671875, "learning_rate": 5.388477455783645e-06, "loss": 0.8962, "step": 11452 }, { "epoch": 1.9517971579827251, "grad_norm": 1.75, "learning_rate": 5.386875092955633e-06, "loss": 0.8499, "step": 11453 }, { "epoch": 1.951968622071715, "grad_norm": 1.734375, "learning_rate": 5.385272880578996e-06, "loss": 0.9148, "step": 11454 }, { "epoch": 1.9521400861607048, "grad_norm": 1.8828125, "learning_rate": 5.383670818705987e-06, "loss": 0.792, "step": 11455 }, { "epoch": 1.9523115502496946, "grad_norm": 1.6875, "learning_rate": 5.382068907388856e-06, "loss": 0.8232, "step": 11456 }, { "epoch": 1.9524830143386844, "grad_norm": 1.6171875, "learning_rate": 5.3804671466798486e-06, "loss": 0.8675, "step": 11457 }, { "epoch": 1.9526544784276743, "grad_norm": 1.71875, "learning_rate": 5.378865536631203e-06, "loss": 0.8182, "step": 11458 }, { "epoch": 1.952825942516664, "grad_norm": 1.6796875, "learning_rate": 5.377264077295152e-06, "loss": 0.8782, "step": 11459 }, { "epoch": 1.952997406605654, "grad_norm": 1.734375, "learning_rate": 5.375662768723929e-06, "loss": 0.7995, "step": 11460 }, { "epoch": 1.9531688706946437, "grad_norm": 1.625, "learning_rate": 5.374061610969756e-06, "loss": 0.8436, "step": 11461 }, { "epoch": 1.9533403347836338, "grad_norm": 1.71875, "learning_rate": 5.372460604084854e-06, "loss": 0.8517, "step": 11462 }, { "epoch": 1.9535117988726236, "grad_norm": 1.6484375, "learning_rate": 5.370859748121437e-06, "loss": 0.8209, "step": 11463 }, { "epoch": 1.9536832629616134, "grad_norm": 1.8046875, "learning_rate": 5.369259043131716e-06, "loss": 0.8313, "step": 11464 }, { "epoch": 1.9538547270506035, "grad_norm": 1.6640625, "learning_rate": 5.367658489167894e-06, "loss": 0.8333, "step": 11465 }, { "epoch": 1.9540261911395933, "grad_norm": 1.6640625, "learning_rate": 5.366058086282173e-06, "loss": 0.8558, "step": 11466 }, { "epoch": 1.9541976552285831, "grad_norm": 1.8046875, "learning_rate": 5.364457834526749e-06, "loss": 0.8054, "step": 11467 }, { "epoch": 1.954369119317573, "grad_norm": 1.6875, "learning_rate": 5.362857733953809e-06, "loss": 0.8077, "step": 11468 }, { "epoch": 1.9545405834065628, "grad_norm": 1.6015625, "learning_rate": 5.361257784615546e-06, "loss": 0.8044, "step": 11469 }, { "epoch": 1.9547120474955526, "grad_norm": 1.7578125, "learning_rate": 5.3596579865641286e-06, "loss": 0.9041, "step": 11470 }, { "epoch": 1.9548835115845424, "grad_norm": 1.6484375, "learning_rate": 5.358058339851739e-06, "loss": 0.8412, "step": 11471 }, { "epoch": 1.9550549756735323, "grad_norm": 1.6640625, "learning_rate": 5.356458844530546e-06, "loss": 0.8495, "step": 11472 }, { "epoch": 1.955226439762522, "grad_norm": 1.6328125, "learning_rate": 5.354859500652717e-06, "loss": 0.8629, "step": 11473 }, { "epoch": 1.9553979038515121, "grad_norm": 1.7421875, "learning_rate": 5.3532603082704115e-06, "loss": 0.8612, "step": 11474 }, { "epoch": 1.955569367940502, "grad_norm": 1.8125, "learning_rate": 5.351661267435786e-06, "loss": 0.8293, "step": 11475 }, { "epoch": 1.9557408320294918, "grad_norm": 1.7421875, "learning_rate": 5.350062378200989e-06, "loss": 0.8635, "step": 11476 }, { "epoch": 1.9559122961184818, "grad_norm": 1.7109375, "learning_rate": 5.348463640618169e-06, "loss": 0.8071, "step": 11477 }, { "epoch": 1.9560837602074717, "grad_norm": 1.6640625, "learning_rate": 5.346865054739463e-06, "loss": 0.8663, "step": 11478 }, { "epoch": 1.9562552242964615, "grad_norm": 1.7890625, "learning_rate": 5.345266620617011e-06, "loss": 0.9402, "step": 11479 }, { "epoch": 1.9564266883854513, "grad_norm": 1.7890625, "learning_rate": 5.343668338302944e-06, "loss": 0.8633, "step": 11480 }, { "epoch": 1.9565981524744411, "grad_norm": 1.734375, "learning_rate": 5.34207020784939e-06, "loss": 0.8151, "step": 11481 }, { "epoch": 1.956769616563431, "grad_norm": 1.703125, "learning_rate": 5.340472229308465e-06, "loss": 0.8865, "step": 11482 }, { "epoch": 1.9569410806524208, "grad_norm": 1.671875, "learning_rate": 5.338874402732284e-06, "loss": 0.7956, "step": 11483 }, { "epoch": 1.9571125447414106, "grad_norm": 1.59375, "learning_rate": 5.337276728172962e-06, "loss": 0.7595, "step": 11484 }, { "epoch": 1.9572840088304004, "grad_norm": 1.671875, "learning_rate": 5.335679205682604e-06, "loss": 0.8831, "step": 11485 }, { "epoch": 1.9574554729193905, "grad_norm": 1.71875, "learning_rate": 5.3340818353133105e-06, "loss": 0.8479, "step": 11486 }, { "epoch": 1.9576269370083803, "grad_norm": 1.59375, "learning_rate": 5.33248461711718e-06, "loss": 0.8728, "step": 11487 }, { "epoch": 1.9577984010973701, "grad_norm": 1.703125, "learning_rate": 5.330887551146301e-06, "loss": 0.952, "step": 11488 }, { "epoch": 1.9579698651863602, "grad_norm": 1.6875, "learning_rate": 5.329290637452762e-06, "loss": 0.8699, "step": 11489 }, { "epoch": 1.95814132927535, "grad_norm": 1.6484375, "learning_rate": 5.327693876088643e-06, "loss": 0.8186, "step": 11490 }, { "epoch": 1.9583127933643398, "grad_norm": 1.8046875, "learning_rate": 5.3260972671060215e-06, "loss": 0.8201, "step": 11491 }, { "epoch": 1.9584842574533297, "grad_norm": 1.734375, "learning_rate": 5.324500810556971e-06, "loss": 0.8688, "step": 11492 }, { "epoch": 1.9586557215423195, "grad_norm": 1.65625, "learning_rate": 5.322904506493554e-06, "loss": 0.8751, "step": 11493 }, { "epoch": 1.9588271856313093, "grad_norm": 1.7890625, "learning_rate": 5.321308354967828e-06, "loss": 0.8815, "step": 11494 }, { "epoch": 1.9589986497202991, "grad_norm": 1.71875, "learning_rate": 5.319712356031859e-06, "loss": 0.8465, "step": 11495 }, { "epoch": 1.959170113809289, "grad_norm": 1.703125, "learning_rate": 5.318116509737694e-06, "loss": 0.807, "step": 11496 }, { "epoch": 1.9593415778982788, "grad_norm": 1.8046875, "learning_rate": 5.31652081613738e-06, "loss": 0.8358, "step": 11497 }, { "epoch": 1.9595130419872688, "grad_norm": 1.6875, "learning_rate": 5.314925275282959e-06, "loss": 0.8653, "step": 11498 }, { "epoch": 1.9596845060762587, "grad_norm": 1.6796875, "learning_rate": 5.313329887226467e-06, "loss": 0.8799, "step": 11499 }, { "epoch": 1.9598559701652485, "grad_norm": 1.796875, "learning_rate": 5.311734652019935e-06, "loss": 0.8724, "step": 11500 }, { "epoch": 1.9600274342542385, "grad_norm": 1.78125, "learning_rate": 5.310139569715391e-06, "loss": 0.8704, "step": 11501 }, { "epoch": 1.9601988983432284, "grad_norm": 1.6171875, "learning_rate": 5.308544640364856e-06, "loss": 0.7946, "step": 11502 }, { "epoch": 1.9603703624322182, "grad_norm": 1.6640625, "learning_rate": 5.306949864020348e-06, "loss": 0.7768, "step": 11503 }, { "epoch": 1.960541826521208, "grad_norm": 1.828125, "learning_rate": 5.305355240733881e-06, "loss": 0.7978, "step": 11504 }, { "epoch": 1.9607132906101978, "grad_norm": 1.609375, "learning_rate": 5.303760770557452e-06, "loss": 0.8094, "step": 11505 }, { "epoch": 1.9608847546991877, "grad_norm": 1.6484375, "learning_rate": 5.302166453543069e-06, "loss": 0.8126, "step": 11506 }, { "epoch": 1.9610562187881775, "grad_norm": 1.6484375, "learning_rate": 5.30057228974273e-06, "loss": 0.8152, "step": 11507 }, { "epoch": 1.9612276828771673, "grad_norm": 1.6640625, "learning_rate": 5.2989782792084236e-06, "loss": 0.8209, "step": 11508 }, { "epoch": 1.9613991469661571, "grad_norm": 1.6796875, "learning_rate": 5.297384421992138e-06, "loss": 0.8824, "step": 11509 }, { "epoch": 1.9615706110551472, "grad_norm": 1.6875, "learning_rate": 5.2957907181458554e-06, "loss": 0.8342, "step": 11510 }, { "epoch": 1.961742075144137, "grad_norm": 1.6640625, "learning_rate": 5.294197167721552e-06, "loss": 0.8246, "step": 11511 }, { "epoch": 1.9619135392331268, "grad_norm": 1.6875, "learning_rate": 5.292603770771199e-06, "loss": 0.8105, "step": 11512 }, { "epoch": 1.9620850033221169, "grad_norm": 1.546875, "learning_rate": 5.291010527346763e-06, "loss": 0.747, "step": 11513 }, { "epoch": 1.9622564674111067, "grad_norm": 1.734375, "learning_rate": 5.289417437500207e-06, "loss": 0.8496, "step": 11514 }, { "epoch": 1.9624279315000965, "grad_norm": 1.765625, "learning_rate": 5.287824501283487e-06, "loss": 0.8033, "step": 11515 }, { "epoch": 1.9625993955890864, "grad_norm": 1.6640625, "learning_rate": 5.286231718748553e-06, "loss": 0.7926, "step": 11516 }, { "epoch": 1.9627708596780762, "grad_norm": 1.6875, "learning_rate": 5.284639089947355e-06, "loss": 0.8827, "step": 11517 }, { "epoch": 1.962942323767066, "grad_norm": 1.6796875, "learning_rate": 5.28304661493183e-06, "loss": 0.7797, "step": 11518 }, { "epoch": 1.9631137878560558, "grad_norm": 1.6484375, "learning_rate": 5.281454293753921e-06, "loss": 0.827, "step": 11519 }, { "epoch": 1.9632852519450457, "grad_norm": 1.640625, "learning_rate": 5.279862126465552e-06, "loss": 0.8803, "step": 11520 }, { "epoch": 1.9634567160340355, "grad_norm": 1.6875, "learning_rate": 5.278270113118655e-06, "loss": 0.8489, "step": 11521 }, { "epoch": 1.9636281801230255, "grad_norm": 1.7109375, "learning_rate": 5.276678253765151e-06, "loss": 0.8641, "step": 11522 }, { "epoch": 1.9637996442120154, "grad_norm": 1.6796875, "learning_rate": 5.275086548456954e-06, "loss": 0.8714, "step": 11523 }, { "epoch": 1.9639711083010052, "grad_norm": 1.8046875, "learning_rate": 5.273494997245978e-06, "loss": 0.9374, "step": 11524 }, { "epoch": 1.9641425723899952, "grad_norm": 1.6875, "learning_rate": 5.2719036001841295e-06, "loss": 0.8625, "step": 11525 }, { "epoch": 1.964314036478985, "grad_norm": 1.75, "learning_rate": 5.270312357323307e-06, "loss": 0.8165, "step": 11526 }, { "epoch": 1.9644855005679749, "grad_norm": 1.6640625, "learning_rate": 5.268721268715413e-06, "loss": 0.8819, "step": 11527 }, { "epoch": 1.9646569646569647, "grad_norm": 1.7265625, "learning_rate": 5.2671303344123315e-06, "loss": 0.8243, "step": 11528 }, { "epoch": 1.9648284287459545, "grad_norm": 1.65625, "learning_rate": 5.265539554465953e-06, "loss": 0.8084, "step": 11529 }, { "epoch": 1.9649998928349444, "grad_norm": 1.7265625, "learning_rate": 5.263948928928157e-06, "loss": 0.8177, "step": 11530 }, { "epoch": 1.9651713569239342, "grad_norm": 1.7265625, "learning_rate": 5.26235845785082e-06, "loss": 0.8604, "step": 11531 }, { "epoch": 1.965342821012924, "grad_norm": 1.78125, "learning_rate": 5.260768141285814e-06, "loss": 0.877, "step": 11532 }, { "epoch": 1.9655142851019138, "grad_norm": 1.671875, "learning_rate": 5.259177979285003e-06, "loss": 0.8761, "step": 11533 }, { "epoch": 1.9656857491909037, "grad_norm": 1.671875, "learning_rate": 5.257587971900253e-06, "loss": 0.8233, "step": 11534 }, { "epoch": 1.9658572132798937, "grad_norm": 1.7734375, "learning_rate": 5.255998119183419e-06, "loss": 0.9104, "step": 11535 }, { "epoch": 1.9660286773688835, "grad_norm": 1.6796875, "learning_rate": 5.254408421186348e-06, "loss": 0.8313, "step": 11536 }, { "epoch": 1.9662001414578734, "grad_norm": 1.7734375, "learning_rate": 5.25281887796089e-06, "loss": 0.8515, "step": 11537 }, { "epoch": 1.9663716055468634, "grad_norm": 1.7265625, "learning_rate": 5.251229489558889e-06, "loss": 0.8842, "step": 11538 }, { "epoch": 1.9665430696358532, "grad_norm": 1.7421875, "learning_rate": 5.249640256032172e-06, "loss": 0.8579, "step": 11539 }, { "epoch": 1.966714533724843, "grad_norm": 1.8046875, "learning_rate": 5.248051177432575e-06, "loss": 0.8923, "step": 11540 }, { "epoch": 1.9668859978138329, "grad_norm": 1.7109375, "learning_rate": 5.246462253811923e-06, "loss": 0.8346, "step": 11541 }, { "epoch": 1.9670574619028227, "grad_norm": 1.640625, "learning_rate": 5.244873485222038e-06, "loss": 0.8458, "step": 11542 }, { "epoch": 1.9672289259918125, "grad_norm": 1.765625, "learning_rate": 5.243284871714733e-06, "loss": 0.8551, "step": 11543 }, { "epoch": 1.9674003900808024, "grad_norm": 1.734375, "learning_rate": 5.2416964133418216e-06, "loss": 0.8916, "step": 11544 }, { "epoch": 1.9675718541697922, "grad_norm": 1.7734375, "learning_rate": 5.240108110155108e-06, "loss": 0.8748, "step": 11545 }, { "epoch": 1.967743318258782, "grad_norm": 1.6953125, "learning_rate": 5.238519962206392e-06, "loss": 0.8406, "step": 11546 }, { "epoch": 1.967914782347772, "grad_norm": 1.75, "learning_rate": 5.236931969547472e-06, "loss": 0.8873, "step": 11547 }, { "epoch": 1.9680862464367619, "grad_norm": 1.7578125, "learning_rate": 5.2353441322301356e-06, "loss": 0.8211, "step": 11548 }, { "epoch": 1.9682577105257517, "grad_norm": 1.8671875, "learning_rate": 5.2337564503061685e-06, "loss": 0.8973, "step": 11549 }, { "epoch": 1.9684291746147418, "grad_norm": 1.7890625, "learning_rate": 5.232168923827352e-06, "loss": 0.8692, "step": 11550 }, { "epoch": 1.9686006387037316, "grad_norm": 1.734375, "learning_rate": 5.230581552845462e-06, "loss": 0.9202, "step": 11551 }, { "epoch": 1.9687721027927214, "grad_norm": 1.78125, "learning_rate": 5.228994337412266e-06, "loss": 0.8969, "step": 11552 }, { "epoch": 1.9689435668817112, "grad_norm": 1.609375, "learning_rate": 5.227407277579531e-06, "loss": 0.8916, "step": 11553 }, { "epoch": 1.969115030970701, "grad_norm": 2.265625, "learning_rate": 5.225820373399018e-06, "loss": 0.8081, "step": 11554 }, { "epoch": 1.969286495059691, "grad_norm": 1.671875, "learning_rate": 5.224233624922479e-06, "loss": 0.7951, "step": 11555 }, { "epoch": 1.9694579591486807, "grad_norm": 1.703125, "learning_rate": 5.222647032201664e-06, "loss": 0.8401, "step": 11556 }, { "epoch": 1.9696294232376705, "grad_norm": 1.7109375, "learning_rate": 5.221060595288321e-06, "loss": 0.8383, "step": 11557 }, { "epoch": 1.9698008873266604, "grad_norm": 1.8046875, "learning_rate": 5.219474314234187e-06, "loss": 0.8254, "step": 11558 }, { "epoch": 1.9699723514156504, "grad_norm": 1.6953125, "learning_rate": 5.217888189090999e-06, "loss": 0.8489, "step": 11559 }, { "epoch": 1.9701438155046402, "grad_norm": 1.6484375, "learning_rate": 5.216302219910484e-06, "loss": 0.8711, "step": 11560 }, { "epoch": 1.97031527959363, "grad_norm": 1.8125, "learning_rate": 5.2147164067443715e-06, "loss": 0.8555, "step": 11561 }, { "epoch": 1.9704867436826201, "grad_norm": 1.7109375, "learning_rate": 5.2131307496443724e-06, "loss": 0.8815, "step": 11562 }, { "epoch": 1.97065820777161, "grad_norm": 1.8515625, "learning_rate": 5.211545248662205e-06, "loss": 0.8769, "step": 11563 }, { "epoch": 1.9708296718605998, "grad_norm": 1.703125, "learning_rate": 5.20995990384958e-06, "loss": 0.7547, "step": 11564 }, { "epoch": 1.9710011359495896, "grad_norm": 1.7578125, "learning_rate": 5.2083747152581975e-06, "loss": 0.9023, "step": 11565 }, { "epoch": 1.9711726000385794, "grad_norm": 1.6953125, "learning_rate": 5.206789682939761e-06, "loss": 0.8359, "step": 11566 }, { "epoch": 1.9713440641275692, "grad_norm": 1.6484375, "learning_rate": 5.205204806945963e-06, "loss": 0.7655, "step": 11567 }, { "epoch": 1.971515528216559, "grad_norm": 1.6328125, "learning_rate": 5.203620087328491e-06, "loss": 0.7909, "step": 11568 }, { "epoch": 1.971686992305549, "grad_norm": 1.71875, "learning_rate": 5.202035524139029e-06, "loss": 0.8972, "step": 11569 }, { "epoch": 1.9718584563945387, "grad_norm": 1.6171875, "learning_rate": 5.200451117429257e-06, "loss": 0.8527, "step": 11570 }, { "epoch": 1.9720299204835288, "grad_norm": 1.703125, "learning_rate": 5.198866867250847e-06, "loss": 0.8325, "step": 11571 }, { "epoch": 1.9722013845725186, "grad_norm": 1.6484375, "learning_rate": 5.197282773655463e-06, "loss": 0.8403, "step": 11572 }, { "epoch": 1.9723728486615084, "grad_norm": 1.6796875, "learning_rate": 5.195698836694783e-06, "loss": 0.816, "step": 11573 }, { "epoch": 1.9725443127504985, "grad_norm": 1.7421875, "learning_rate": 5.19411505642045e-06, "loss": 0.8329, "step": 11574 }, { "epoch": 1.9727157768394883, "grad_norm": 1.703125, "learning_rate": 5.192531432884122e-06, "loss": 0.955, "step": 11575 }, { "epoch": 1.9728872409284781, "grad_norm": 1.703125, "learning_rate": 5.1909479661374475e-06, "loss": 0.8504, "step": 11576 }, { "epoch": 1.973058705017468, "grad_norm": 1.6796875, "learning_rate": 5.18936465623207e-06, "loss": 0.8312, "step": 11577 }, { "epoch": 1.9732301691064578, "grad_norm": 1.84375, "learning_rate": 5.1877815032196255e-06, "loss": 0.8168, "step": 11578 }, { "epoch": 1.9734016331954476, "grad_norm": 1.6875, "learning_rate": 5.186198507151747e-06, "loss": 0.8544, "step": 11579 }, { "epoch": 1.9735730972844374, "grad_norm": 1.7109375, "learning_rate": 5.184615668080063e-06, "loss": 0.8645, "step": 11580 }, { "epoch": 1.9737445613734272, "grad_norm": 1.6640625, "learning_rate": 5.183032986056195e-06, "loss": 0.7389, "step": 11581 }, { "epoch": 1.973916025462417, "grad_norm": 1.671875, "learning_rate": 5.181450461131759e-06, "loss": 0.7831, "step": 11582 }, { "epoch": 1.9740874895514071, "grad_norm": 1.671875, "learning_rate": 5.17986809335837e-06, "loss": 0.881, "step": 11583 }, { "epoch": 1.974258953640397, "grad_norm": 1.6640625, "learning_rate": 5.178285882787637e-06, "loss": 0.8336, "step": 11584 }, { "epoch": 1.9744304177293868, "grad_norm": 1.7578125, "learning_rate": 5.176703829471155e-06, "loss": 0.8578, "step": 11585 }, { "epoch": 1.9746018818183768, "grad_norm": 1.6015625, "learning_rate": 5.175121933460523e-06, "loss": 0.8177, "step": 11586 }, { "epoch": 1.9747733459073666, "grad_norm": 1.7265625, "learning_rate": 5.173540194807334e-06, "loss": 0.853, "step": 11587 }, { "epoch": 1.9749448099963565, "grad_norm": 1.703125, "learning_rate": 5.171958613563168e-06, "loss": 0.8128, "step": 11588 }, { "epoch": 1.9751162740853463, "grad_norm": 1.65625, "learning_rate": 5.170377189779618e-06, "loss": 0.8597, "step": 11589 }, { "epoch": 1.9752877381743361, "grad_norm": 1.75, "learning_rate": 5.1687959235082554e-06, "loss": 0.7888, "step": 11590 }, { "epoch": 1.975459202263326, "grad_norm": 1.734375, "learning_rate": 5.1672148148006494e-06, "loss": 0.8659, "step": 11591 }, { "epoch": 1.9756306663523158, "grad_norm": 1.6796875, "learning_rate": 5.165633863708368e-06, "loss": 0.8922, "step": 11592 }, { "epoch": 1.9758021304413056, "grad_norm": 1.6875, "learning_rate": 5.164053070282968e-06, "loss": 0.9071, "step": 11593 }, { "epoch": 1.9759735945302954, "grad_norm": 1.703125, "learning_rate": 5.162472434576011e-06, "loss": 0.9287, "step": 11594 }, { "epoch": 1.9761450586192855, "grad_norm": 1.765625, "learning_rate": 5.160891956639042e-06, "loss": 0.8846, "step": 11595 }, { "epoch": 1.9763165227082753, "grad_norm": 1.6640625, "learning_rate": 5.159311636523614e-06, "loss": 0.8229, "step": 11596 }, { "epoch": 1.9764879867972651, "grad_norm": 1.625, "learning_rate": 5.1577314742812575e-06, "loss": 0.8095, "step": 11597 }, { "epoch": 1.9766594508862552, "grad_norm": 1.7421875, "learning_rate": 5.1561514699635126e-06, "loss": 0.8743, "step": 11598 }, { "epoch": 1.976830914975245, "grad_norm": 1.65625, "learning_rate": 5.1545716236219075e-06, "loss": 0.7827, "step": 11599 }, { "epoch": 1.9770023790642348, "grad_norm": 1.7109375, "learning_rate": 5.152991935307969e-06, "loss": 0.9119, "step": 11600 }, { "epoch": 1.9771738431532246, "grad_norm": 1.6796875, "learning_rate": 5.151412405073215e-06, "loss": 0.8324, "step": 11601 }, { "epoch": 1.9773453072422145, "grad_norm": 1.6484375, "learning_rate": 5.1498330329691605e-06, "loss": 0.8356, "step": 11602 }, { "epoch": 1.9775167713312043, "grad_norm": 1.640625, "learning_rate": 5.148253819047315e-06, "loss": 0.833, "step": 11603 }, { "epoch": 1.9776882354201941, "grad_norm": 1.65625, "learning_rate": 5.146674763359183e-06, "loss": 0.8032, "step": 11604 }, { "epoch": 1.977859699509184, "grad_norm": 1.7890625, "learning_rate": 5.145095865956262e-06, "loss": 0.8332, "step": 11605 }, { "epoch": 1.9780311635981738, "grad_norm": 1.7421875, "learning_rate": 5.1435171268900474e-06, "loss": 0.8906, "step": 11606 }, { "epoch": 1.9782026276871638, "grad_norm": 1.640625, "learning_rate": 5.1419385462120264e-06, "loss": 0.9111, "step": 11607 }, { "epoch": 1.9783740917761536, "grad_norm": 1.65625, "learning_rate": 5.140360123973683e-06, "loss": 0.8105, "step": 11608 }, { "epoch": 1.9785455558651435, "grad_norm": 1.671875, "learning_rate": 5.138781860226496e-06, "loss": 0.8314, "step": 11609 }, { "epoch": 1.9787170199541335, "grad_norm": 1.6328125, "learning_rate": 5.137203755021938e-06, "loss": 0.7603, "step": 11610 }, { "epoch": 1.9788884840431233, "grad_norm": 1.671875, "learning_rate": 5.135625808411478e-06, "loss": 0.874, "step": 11611 }, { "epoch": 1.9790599481321132, "grad_norm": 1.7265625, "learning_rate": 5.134048020446578e-06, "loss": 0.8629, "step": 11612 }, { "epoch": 1.979231412221103, "grad_norm": 1.8125, "learning_rate": 5.1324703911786946e-06, "loss": 0.8188, "step": 11613 }, { "epoch": 1.9794028763100928, "grad_norm": 1.7109375, "learning_rate": 5.1308929206592805e-06, "loss": 0.7701, "step": 11614 }, { "epoch": 1.9795743403990826, "grad_norm": 1.703125, "learning_rate": 5.129315608939786e-06, "loss": 0.8955, "step": 11615 }, { "epoch": 1.9797458044880725, "grad_norm": 1.7421875, "learning_rate": 5.1277384560716495e-06, "loss": 0.8836, "step": 11616 }, { "epoch": 1.9799172685770623, "grad_norm": 1.703125, "learning_rate": 5.126161462106308e-06, "loss": 0.9262, "step": 11617 }, { "epoch": 1.9800887326660521, "grad_norm": 1.7578125, "learning_rate": 5.124584627095196e-06, "loss": 0.8944, "step": 11618 }, { "epoch": 1.9802601967550422, "grad_norm": 1.6484375, "learning_rate": 5.123007951089743e-06, "loss": 0.8255, "step": 11619 }, { "epoch": 1.980431660844032, "grad_norm": 1.6796875, "learning_rate": 5.12143143414136e-06, "loss": 0.8353, "step": 11620 }, { "epoch": 1.9806031249330218, "grad_norm": 1.7578125, "learning_rate": 5.119855076301468e-06, "loss": 0.8852, "step": 11621 }, { "epoch": 1.9807745890220119, "grad_norm": 1.671875, "learning_rate": 5.118278877621481e-06, "loss": 0.8293, "step": 11622 }, { "epoch": 1.9809460531110017, "grad_norm": 1.65625, "learning_rate": 5.116702838152803e-06, "loss": 0.8015, "step": 11623 }, { "epoch": 1.9811175171999915, "grad_norm": 1.7265625, "learning_rate": 5.115126957946832e-06, "loss": 0.8779, "step": 11624 }, { "epoch": 1.9812889812889813, "grad_norm": 1.8046875, "learning_rate": 5.113551237054968e-06, "loss": 0.869, "step": 11625 }, { "epoch": 1.9814604453779712, "grad_norm": 1.796875, "learning_rate": 5.111975675528598e-06, "loss": 0.8304, "step": 11626 }, { "epoch": 1.981631909466961, "grad_norm": 1.6171875, "learning_rate": 5.110400273419104e-06, "loss": 0.7922, "step": 11627 }, { "epoch": 1.9818033735559508, "grad_norm": 1.625, "learning_rate": 5.108825030777873e-06, "loss": 0.8331, "step": 11628 }, { "epoch": 1.9819748376449406, "grad_norm": 1.7421875, "learning_rate": 5.107249947656276e-06, "loss": 0.8969, "step": 11629 }, { "epoch": 1.9821463017339305, "grad_norm": 1.7265625, "learning_rate": 5.105675024105683e-06, "loss": 0.8615, "step": 11630 }, { "epoch": 1.9823177658229203, "grad_norm": 1.734375, "learning_rate": 5.104100260177462e-06, "loss": 0.8852, "step": 11631 }, { "epoch": 1.9824892299119103, "grad_norm": 1.7890625, "learning_rate": 5.102525655922963e-06, "loss": 0.8795, "step": 11632 }, { "epoch": 1.9826606940009002, "grad_norm": 1.6171875, "learning_rate": 5.100951211393545e-06, "loss": 0.8602, "step": 11633 }, { "epoch": 1.98283215808989, "grad_norm": 1.71875, "learning_rate": 5.099376926640558e-06, "loss": 0.9049, "step": 11634 }, { "epoch": 1.98300362217888, "grad_norm": 1.6484375, "learning_rate": 5.097802801715341e-06, "loss": 0.8136, "step": 11635 }, { "epoch": 1.9831750862678699, "grad_norm": 1.6484375, "learning_rate": 5.096228836669234e-06, "loss": 0.8509, "step": 11636 }, { "epoch": 1.9833465503568597, "grad_norm": 1.703125, "learning_rate": 5.094655031553572e-06, "loss": 0.8705, "step": 11637 }, { "epoch": 1.9835180144458495, "grad_norm": 1.5703125, "learning_rate": 5.093081386419682e-06, "loss": 0.731, "step": 11638 }, { "epoch": 1.9836894785348393, "grad_norm": 1.7109375, "learning_rate": 5.091507901318885e-06, "loss": 0.8829, "step": 11639 }, { "epoch": 1.9838609426238292, "grad_norm": 1.734375, "learning_rate": 5.0899345763024986e-06, "loss": 0.8311, "step": 11640 }, { "epoch": 1.984032406712819, "grad_norm": 1.71875, "learning_rate": 5.088361411421836e-06, "loss": 0.8326, "step": 11641 }, { "epoch": 1.9842038708018088, "grad_norm": 1.6328125, "learning_rate": 5.086788406728207e-06, "loss": 0.8074, "step": 11642 }, { "epoch": 1.9843753348907986, "grad_norm": 1.6640625, "learning_rate": 5.085215562272901e-06, "loss": 0.8817, "step": 11643 }, { "epoch": 1.9845467989797887, "grad_norm": 1.7265625, "learning_rate": 5.0836428781072266e-06, "loss": 0.8249, "step": 11644 }, { "epoch": 1.9847182630687785, "grad_norm": 1.734375, "learning_rate": 5.082070354282473e-06, "loss": 0.8485, "step": 11645 }, { "epoch": 1.9848897271577683, "grad_norm": 1.75, "learning_rate": 5.080497990849922e-06, "loss": 0.9151, "step": 11646 }, { "epoch": 1.9850611912467584, "grad_norm": 1.765625, "learning_rate": 5.078925787860858e-06, "loss": 0.7656, "step": 11647 }, { "epoch": 1.9852326553357482, "grad_norm": 1.6484375, "learning_rate": 5.077353745366555e-06, "loss": 0.8011, "step": 11648 }, { "epoch": 1.985404119424738, "grad_norm": 1.703125, "learning_rate": 5.075781863418283e-06, "loss": 0.8813, "step": 11649 }, { "epoch": 1.9855755835137279, "grad_norm": 1.6171875, "learning_rate": 5.074210142067309e-06, "loss": 0.8026, "step": 11650 }, { "epoch": 1.9857470476027177, "grad_norm": 1.7421875, "learning_rate": 5.072638581364888e-06, "loss": 0.8569, "step": 11651 }, { "epoch": 1.9859185116917075, "grad_norm": 1.7578125, "learning_rate": 5.071067181362279e-06, "loss": 0.926, "step": 11652 }, { "epoch": 1.9860899757806973, "grad_norm": 1.71875, "learning_rate": 5.069495942110731e-06, "loss": 0.8279, "step": 11653 }, { "epoch": 1.9862614398696872, "grad_norm": 1.6953125, "learning_rate": 5.06792486366149e-06, "loss": 0.8042, "step": 11654 }, { "epoch": 1.986432903958677, "grad_norm": 1.765625, "learning_rate": 5.066353946065787e-06, "loss": 0.8291, "step": 11655 }, { "epoch": 1.986604368047667, "grad_norm": 1.6640625, "learning_rate": 5.064783189374862e-06, "loss": 0.8628, "step": 11656 }, { "epoch": 1.9867758321366569, "grad_norm": 1.671875, "learning_rate": 5.063212593639939e-06, "loss": 0.8537, "step": 11657 }, { "epoch": 1.9869472962256467, "grad_norm": 1.6875, "learning_rate": 5.061642158912246e-06, "loss": 0.8203, "step": 11658 }, { "epoch": 1.9871187603146367, "grad_norm": 1.7578125, "learning_rate": 5.060071885242996e-06, "loss": 0.8631, "step": 11659 }, { "epoch": 1.9872902244036266, "grad_norm": 1.765625, "learning_rate": 5.0585017726834065e-06, "loss": 0.801, "step": 11660 }, { "epoch": 1.9874616884926164, "grad_norm": 1.6484375, "learning_rate": 5.056931821284681e-06, "loss": 0.87, "step": 11661 }, { "epoch": 1.9876331525816062, "grad_norm": 1.8046875, "learning_rate": 5.055362031098021e-06, "loss": 0.8322, "step": 11662 }, { "epoch": 1.987804616670596, "grad_norm": 1.671875, "learning_rate": 5.053792402174627e-06, "loss": 0.7921, "step": 11663 }, { "epoch": 1.9879760807595859, "grad_norm": 1.7421875, "learning_rate": 5.0522229345656885e-06, "loss": 0.8372, "step": 11664 } ], "logging_steps": 1, "max_steps": 17496, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5832, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.4421122547400573e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }