{ "best_metric": null, "best_model_checkpoint": null, "epoch": 350.0, "eval_steps": 350, "global_step": 1050, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.33, "learning_rate": 1.9047619047619051e-06, "loss": 1.9355, "step": 1 }, { "epoch": 0.67, "learning_rate": 3.8095238095238102e-06, "loss": 1.977, "step": 2 }, { "epoch": 1.0, "learning_rate": 5.7142857142857145e-06, "loss": 1.909, "step": 3 }, { "epoch": 1.33, "learning_rate": 7.6190476190476205e-06, "loss": 1.9514, "step": 4 }, { "epoch": 1.67, "learning_rate": 9.523809523809523e-06, "loss": 1.945, "step": 5 }, { "epoch": 2.0, "learning_rate": 1.1428571428571429e-05, "loss": 1.9343, "step": 6 }, { "epoch": 2.33, "learning_rate": 1.3333333333333333e-05, "loss": 1.923, "step": 7 }, { "epoch": 2.67, "learning_rate": 1.5238095238095241e-05, "loss": 1.9339, "step": 8 }, { "epoch": 3.0, "learning_rate": 1.7142857142857145e-05, "loss": 1.9657, "step": 9 }, { "epoch": 3.33, "learning_rate": 1.9047619047619046e-05, "loss": 1.9215, "step": 10 }, { "epoch": 3.67, "learning_rate": 2.0952380952380954e-05, "loss": 1.9188, "step": 11 }, { "epoch": 4.0, "learning_rate": 2.2857142857142858e-05, "loss": 1.9504, "step": 12 }, { "epoch": 4.33, "learning_rate": 2.4761904761904762e-05, "loss": 1.9009, "step": 13 }, { "epoch": 4.67, "learning_rate": 2.6666666666666667e-05, "loss": 1.9399, "step": 14 }, { "epoch": 5.0, "learning_rate": 2.857142857142857e-05, "loss": 1.902, "step": 15 }, { "epoch": 5.33, "learning_rate": 3.0476190476190482e-05, "loss": 1.8687, "step": 16 }, { "epoch": 5.67, "learning_rate": 3.2380952380952386e-05, "loss": 1.8792, "step": 17 }, { "epoch": 6.0, "learning_rate": 3.428571428571429e-05, "loss": 1.9199, "step": 18 }, { "epoch": 6.33, "learning_rate": 3.619047619047619e-05, "loss": 1.8893, "step": 19 }, { "epoch": 6.67, "learning_rate": 3.809523809523809e-05, "loss": 1.7973, "step": 20 }, { "epoch": 7.0, "learning_rate": 4e-05, "loss": 1.8354, "step": 21 }, { "epoch": 7.33, "learning_rate": 4.190476190476191e-05, "loss": 1.8192, "step": 22 }, { "epoch": 7.67, "learning_rate": 4.380952380952381e-05, "loss": 1.7685, "step": 23 }, { "epoch": 8.0, "learning_rate": 4.5714285714285716e-05, "loss": 1.7412, "step": 24 }, { "epoch": 8.33, "learning_rate": 4.761904761904762e-05, "loss": 1.7596, "step": 25 }, { "epoch": 8.67, "learning_rate": 4.9523809523809525e-05, "loss": 1.6698, "step": 26 }, { "epoch": 9.0, "learning_rate": 5.142857142857143e-05, "loss": 1.6695, "step": 27 }, { "epoch": 9.33, "learning_rate": 5.333333333333333e-05, "loss": 1.6478, "step": 28 }, { "epoch": 9.67, "learning_rate": 5.5238095238095244e-05, "loss": 1.6236, "step": 29 }, { "epoch": 10.0, "learning_rate": 5.714285714285714e-05, "loss": 1.6301, "step": 30 }, { "epoch": 10.33, "learning_rate": 5.904761904761905e-05, "loss": 1.5615, "step": 31 }, { "epoch": 10.67, "learning_rate": 6.0952380952380964e-05, "loss": 1.5969, "step": 32 }, { "epoch": 11.0, "learning_rate": 6.285714285714286e-05, "loss": 1.5722, "step": 33 }, { "epoch": 11.33, "learning_rate": 6.476190476190477e-05, "loss": 1.5809, "step": 34 }, { "epoch": 11.67, "learning_rate": 6.666666666666667e-05, "loss": 1.4969, "step": 35 }, { "epoch": 12.0, "learning_rate": 6.857142857142858e-05, "loss": 1.5034, "step": 36 }, { "epoch": 12.33, "learning_rate": 7.047619047619048e-05, "loss": 1.4814, "step": 37 }, { "epoch": 12.67, "learning_rate": 7.238095238095238e-05, "loss": 1.4822, "step": 38 }, { "epoch": 13.0, "learning_rate": 7.428571428571429e-05, "loss": 1.4758, "step": 39 }, { "epoch": 13.33, "learning_rate": 7.619047619047618e-05, "loss": 1.4583, "step": 40 }, { "epoch": 13.67, "learning_rate": 7.80952380952381e-05, "loss": 1.4306, "step": 41 }, { "epoch": 14.0, "learning_rate": 8e-05, "loss": 1.4247, "step": 42 }, { "epoch": 14.33, "learning_rate": 8.19047619047619e-05, "loss": 1.3784, "step": 43 }, { "epoch": 14.67, "learning_rate": 8.380952380952382e-05, "loss": 1.4326, "step": 44 }, { "epoch": 15.0, "learning_rate": 8.571428571428571e-05, "loss": 1.3538, "step": 45 }, { "epoch": 15.33, "learning_rate": 8.761904761904762e-05, "loss": 1.3156, "step": 46 }, { "epoch": 15.67, "learning_rate": 8.952380952380953e-05, "loss": 1.3364, "step": 47 }, { "epoch": 16.0, "learning_rate": 9.142857142857143e-05, "loss": 1.3473, "step": 48 }, { "epoch": 16.33, "learning_rate": 9.333333333333334e-05, "loss": 1.2919, "step": 49 }, { "epoch": 16.67, "learning_rate": 9.523809523809524e-05, "loss": 1.2749, "step": 50 }, { "epoch": 17.0, "learning_rate": 9.714285714285715e-05, "loss": 1.2587, "step": 51 }, { "epoch": 17.33, "learning_rate": 9.904761904761905e-05, "loss": 1.2509, "step": 52 }, { "epoch": 17.67, "learning_rate": 0.00010095238095238096, "loss": 1.2122, "step": 53 }, { "epoch": 18.0, "learning_rate": 0.00010285714285714286, "loss": 1.2101, "step": 54 }, { "epoch": 18.33, "learning_rate": 0.00010476190476190477, "loss": 1.1981, "step": 55 }, { "epoch": 18.67, "learning_rate": 0.00010666666666666667, "loss": 1.1938, "step": 56 }, { "epoch": 19.0, "learning_rate": 0.00010857142857142856, "loss": 1.1346, "step": 57 }, { "epoch": 19.33, "learning_rate": 0.00011047619047619049, "loss": 1.2012, "step": 58 }, { "epoch": 19.67, "learning_rate": 0.00011238095238095239, "loss": 1.1305, "step": 59 }, { "epoch": 20.0, "learning_rate": 0.00011428571428571428, "loss": 1.089, "step": 60 }, { "epoch": 20.33, "learning_rate": 0.00011619047619047621, "loss": 1.0899, "step": 61 }, { "epoch": 20.67, "learning_rate": 0.0001180952380952381, "loss": 1.0853, "step": 62 }, { "epoch": 21.0, "learning_rate": 0.00012, "loss": 1.1892, "step": 63 }, { "epoch": 21.33, "learning_rate": 0.00012190476190476193, "loss": 1.0786, "step": 64 }, { "epoch": 21.67, "learning_rate": 0.0001238095238095238, "loss": 1.0845, "step": 65 }, { "epoch": 22.0, "learning_rate": 0.00012571428571428572, "loss": 1.0965, "step": 66 }, { "epoch": 22.33, "learning_rate": 0.0001276190476190476, "loss": 1.0556, "step": 67 }, { "epoch": 22.67, "learning_rate": 0.00012952380952380954, "loss": 1.1, "step": 68 }, { "epoch": 23.0, "learning_rate": 0.00013142857142857143, "loss": 1.0133, "step": 69 }, { "epoch": 23.33, "learning_rate": 0.00013333333333333334, "loss": 1.0485, "step": 70 }, { "epoch": 23.67, "learning_rate": 0.00013523809523809525, "loss": 1.0284, "step": 71 }, { "epoch": 24.0, "learning_rate": 0.00013714285714285716, "loss": 1.0065, "step": 72 }, { "epoch": 24.33, "learning_rate": 0.00013904761904761905, "loss": 1.087, "step": 73 }, { "epoch": 24.67, "learning_rate": 0.00014095238095238096, "loss": 0.9647, "step": 74 }, { "epoch": 25.0, "learning_rate": 0.00014285714285714287, "loss": 0.9509, "step": 75 }, { "epoch": 25.33, "learning_rate": 0.00014476190476190475, "loss": 0.9621, "step": 76 }, { "epoch": 25.67, "learning_rate": 0.00014666666666666666, "loss": 0.9913, "step": 77 }, { "epoch": 26.0, "learning_rate": 0.00014857142857142857, "loss": 1.0245, "step": 78 }, { "epoch": 26.33, "learning_rate": 0.00015047619047619048, "loss": 0.9638, "step": 79 }, { "epoch": 26.67, "learning_rate": 0.00015238095238095237, "loss": 0.9652, "step": 80 }, { "epoch": 27.0, "learning_rate": 0.0001542857142857143, "loss": 0.9183, "step": 81 }, { "epoch": 27.33, "learning_rate": 0.0001561904761904762, "loss": 0.9729, "step": 82 }, { "epoch": 27.67, "learning_rate": 0.0001580952380952381, "loss": 0.9248, "step": 83 }, { "epoch": 28.0, "learning_rate": 0.00016, "loss": 0.8567, "step": 84 }, { "epoch": 28.33, "learning_rate": 0.00016190476190476192, "loss": 0.911, "step": 85 }, { "epoch": 28.67, "learning_rate": 0.0001638095238095238, "loss": 0.8622, "step": 86 }, { "epoch": 29.0, "learning_rate": 0.00016571428571428575, "loss": 0.9135, "step": 87 }, { "epoch": 29.33, "learning_rate": 0.00016761904761904763, "loss": 0.869, "step": 88 }, { "epoch": 29.67, "learning_rate": 0.00016952380952380954, "loss": 0.9034, "step": 89 }, { "epoch": 30.0, "learning_rate": 0.00017142857142857143, "loss": 0.8036, "step": 90 }, { "epoch": 30.33, "learning_rate": 0.00017333333333333334, "loss": 0.8248, "step": 91 }, { "epoch": 30.67, "learning_rate": 0.00017523809523809525, "loss": 0.8567, "step": 92 }, { "epoch": 31.0, "learning_rate": 0.00017714285714285713, "loss": 0.7961, "step": 93 }, { "epoch": 31.33, "learning_rate": 0.00017904761904761907, "loss": 0.7859, "step": 94 }, { "epoch": 31.67, "learning_rate": 0.00018095238095238095, "loss": 0.8019, "step": 95 }, { "epoch": 32.0, "learning_rate": 0.00018285714285714286, "loss": 0.8066, "step": 96 }, { "epoch": 32.33, "learning_rate": 0.00018476190476190478, "loss": 0.781, "step": 97 }, { "epoch": 32.67, "learning_rate": 0.0001866666666666667, "loss": 0.7525, "step": 98 }, { "epoch": 33.0, "learning_rate": 0.00018857142857142857, "loss": 0.7167, "step": 99 }, { "epoch": 33.33, "learning_rate": 0.00019047619047619048, "loss": 0.7536, "step": 100 }, { "epoch": 33.67, "learning_rate": 0.0001923809523809524, "loss": 0.6921, "step": 101 }, { "epoch": 34.0, "learning_rate": 0.0001942857142857143, "loss": 0.6864, "step": 102 }, { "epoch": 34.33, "learning_rate": 0.0001961904761904762, "loss": 0.7011, "step": 103 }, { "epoch": 34.67, "learning_rate": 0.0001980952380952381, "loss": 0.6718, "step": 104 }, { "epoch": 35.0, "learning_rate": 0.0002, "loss": 0.652, "step": 105 }, { "epoch": 35.33, "learning_rate": 0.00019999944740655014, "loss": 0.6498, "step": 106 }, { "epoch": 35.67, "learning_rate": 0.00019999778963230775, "loss": 0.6476, "step": 107 }, { "epoch": 36.0, "learning_rate": 0.00019999502669559432, "loss": 0.6011, "step": 108 }, { "epoch": 36.33, "learning_rate": 0.00019999115862694546, "loss": 0.595, "step": 109 }, { "epoch": 36.67, "learning_rate": 0.00019998618546911056, "loss": 0.6427, "step": 110 }, { "epoch": 37.0, "learning_rate": 0.00019998010727705236, "loss": 0.5505, "step": 111 }, { "epoch": 37.33, "learning_rate": 0.00019997292411794618, "loss": 0.617, "step": 112 }, { "epoch": 37.67, "learning_rate": 0.00019996463607117935, "loss": 0.537, "step": 113 }, { "epoch": 38.0, "learning_rate": 0.00019995524322835034, "loss": 0.5147, "step": 114 }, { "epoch": 38.33, "learning_rate": 0.00019994474569326757, "loss": 0.5067, "step": 115 }, { "epoch": 38.67, "learning_rate": 0.00019993314358194843, "loss": 0.5243, "step": 116 }, { "epoch": 39.0, "learning_rate": 0.00019992043702261793, "loss": 0.5144, "step": 117 }, { "epoch": 39.33, "learning_rate": 0.0001999066261557073, "loss": 0.4697, "step": 118 }, { "epoch": 39.67, "learning_rate": 0.0001998917111338525, "loss": 0.4706, "step": 119 }, { "epoch": 40.0, "learning_rate": 0.00019987569212189224, "loss": 0.4908, "step": 120 }, { "epoch": 40.33, "learning_rate": 0.00019985856929686667, "loss": 0.4193, "step": 121 }, { "epoch": 40.67, "learning_rate": 0.00019984034284801502, "loss": 0.4805, "step": 122 }, { "epoch": 41.0, "learning_rate": 0.0001998210129767735, "loss": 0.4101, "step": 123 }, { "epoch": 41.33, "learning_rate": 0.00019980057989677345, "loss": 0.3676, "step": 124 }, { "epoch": 41.67, "learning_rate": 0.0001997790438338385, "loss": 0.442, "step": 125 }, { "epoch": 42.0, "learning_rate": 0.00019975640502598244, "loss": 0.385, "step": 126 }, { "epoch": 42.33, "learning_rate": 0.00019973266372340639, "loss": 0.38, "step": 127 }, { "epoch": 42.67, "learning_rate": 0.0001997078201884961, "loss": 0.3504, "step": 128 }, { "epoch": 43.0, "learning_rate": 0.0001996818746958191, "loss": 0.3622, "step": 129 }, { "epoch": 43.33, "learning_rate": 0.00019965482753212156, "loss": 0.3173, "step": 130 }, { "epoch": 43.67, "learning_rate": 0.00019962667899632518, "loss": 0.3491, "step": 131 }, { "epoch": 44.0, "learning_rate": 0.00019959742939952392, "loss": 0.3067, "step": 132 }, { "epoch": 44.33, "learning_rate": 0.00019956707906498044, "loss": 0.2769, "step": 133 }, { "epoch": 44.67, "learning_rate": 0.00019953562832812272, "loss": 0.306, "step": 134 }, { "epoch": 45.0, "learning_rate": 0.00019950307753654017, "loss": 0.2923, "step": 135 }, { "epoch": 45.33, "learning_rate": 0.00019946942704997982, "loss": 0.2585, "step": 136 }, { "epoch": 45.67, "learning_rate": 0.00019943467724034252, "loss": 0.2664, "step": 137 }, { "epoch": 46.0, "learning_rate": 0.00019939882849167852, "loss": 0.2606, "step": 138 }, { "epoch": 46.33, "learning_rate": 0.0001993618812001836, "loss": 0.2376, "step": 139 }, { "epoch": 46.67, "learning_rate": 0.00019932383577419432, "loss": 0.213, "step": 140 }, { "epoch": 47.0, "learning_rate": 0.00019928469263418374, "loss": 0.2371, "step": 141 }, { "epoch": 47.33, "learning_rate": 0.00019924445221275675, "loss": 0.2132, "step": 142 }, { "epoch": 47.67, "learning_rate": 0.00019920311495464518, "loss": 0.2004, "step": 143 }, { "epoch": 48.0, "learning_rate": 0.00019916068131670302, "loss": 0.2163, "step": 144 }, { "epoch": 48.33, "learning_rate": 0.0001991171517679013, "loss": 0.2106, "step": 145 }, { "epoch": 48.67, "learning_rate": 0.0001990725267893228, "loss": 0.1734, "step": 146 }, { "epoch": 49.0, "learning_rate": 0.00019902680687415705, "loss": 0.1896, "step": 147 }, { "epoch": 49.33, "learning_rate": 0.00019897999252769448, "loss": 0.1678, "step": 148 }, { "epoch": 49.67, "learning_rate": 0.00019893208426732115, "loss": 0.179, "step": 149 }, { "epoch": 50.0, "learning_rate": 0.00019888308262251285, "loss": 0.152, "step": 150 }, { "epoch": 50.33, "learning_rate": 0.00019883298813482938, "loss": 0.1437, "step": 151 }, { "epoch": 50.67, "learning_rate": 0.00019878180135790845, "loss": 0.1491, "step": 152 }, { "epoch": 51.0, "learning_rate": 0.00019872952285745959, "loss": 0.1562, "step": 153 }, { "epoch": 51.33, "learning_rate": 0.00019867615321125795, "loss": 0.137, "step": 154 }, { "epoch": 51.67, "learning_rate": 0.00019862169300913785, "loss": 0.1305, "step": 155 }, { "epoch": 52.0, "learning_rate": 0.0001985661428529863, "loss": 0.1266, "step": 156 }, { "epoch": 52.33, "learning_rate": 0.00019850950335673643, "loss": 0.1205, "step": 157 }, { "epoch": 52.67, "learning_rate": 0.00019845177514636042, "loss": 0.1262, "step": 158 }, { "epoch": 53.0, "learning_rate": 0.00019839295885986296, "loss": 0.1023, "step": 159 }, { "epoch": 53.33, "learning_rate": 0.00019833305514727395, "loss": 0.1071, "step": 160 }, { "epoch": 53.67, "learning_rate": 0.00019827206467064133, "loss": 0.0987, "step": 161 }, { "epoch": 54.0, "learning_rate": 0.0001982099881040239, "loss": 0.0992, "step": 162 }, { "epoch": 54.33, "learning_rate": 0.0001981468261334837, "loss": 0.0812, "step": 163 }, { "epoch": 54.67, "learning_rate": 0.0001980825794570786, "loss": 0.0976, "step": 164 }, { "epoch": 55.0, "learning_rate": 0.00019801724878485438, "loss": 0.0957, "step": 165 }, { "epoch": 55.33, "learning_rate": 0.00019795083483883715, "loss": 0.0867, "step": 166 }, { "epoch": 55.67, "learning_rate": 0.0001978833383530251, "loss": 0.0724, "step": 167 }, { "epoch": 56.0, "learning_rate": 0.00019781476007338058, "loss": 0.0926, "step": 168 }, { "epoch": 56.33, "learning_rate": 0.00019774510075782172, "loss": 0.0717, "step": 169 }, { "epoch": 56.67, "learning_rate": 0.00019767436117621413, "loss": 0.0764, "step": 170 }, { "epoch": 57.0, "learning_rate": 0.00019760254211036244, "loss": 0.073, "step": 171 }, { "epoch": 57.33, "learning_rate": 0.00019752964435400155, "loss": 0.065, "step": 172 }, { "epoch": 57.67, "learning_rate": 0.00019745566871278794, "loss": 0.0695, "step": 173 }, { "epoch": 58.0, "learning_rate": 0.00019738061600429064, "loss": 0.0638, "step": 174 }, { "epoch": 58.33, "learning_rate": 0.00019730448705798239, "loss": 0.0583, "step": 175 }, { "epoch": 58.67, "learning_rate": 0.00019722728271523034, "loss": 0.0594, "step": 176 }, { "epoch": 59.0, "learning_rate": 0.00019714900382928675, "loss": 0.0574, "step": 177 }, { "epoch": 59.33, "learning_rate": 0.00019706965126527963, "loss": 0.0489, "step": 178 }, { "epoch": 59.67, "learning_rate": 0.00019698922590020312, "loss": 0.0602, "step": 179 }, { "epoch": 60.0, "learning_rate": 0.0001969077286229078, "loss": 0.0438, "step": 180 }, { "epoch": 60.33, "learning_rate": 0.00019682516033409092, "loss": 0.047, "step": 181 }, { "epoch": 60.67, "learning_rate": 0.00019674152194628638, "loss": 0.0437, "step": 182 }, { "epoch": 61.0, "learning_rate": 0.00019665681438385473, "loss": 0.0533, "step": 183 }, { "epoch": 61.33, "learning_rate": 0.0001965710385829728, "loss": 0.0463, "step": 184 }, { "epoch": 61.67, "learning_rate": 0.00019648419549162348, "loss": 0.0441, "step": 185 }, { "epoch": 62.0, "learning_rate": 0.00019639628606958533, "loss": 0.0398, "step": 186 }, { "epoch": 62.33, "learning_rate": 0.0001963073112884217, "loss": 0.0396, "step": 187 }, { "epoch": 62.67, "learning_rate": 0.00019621727213147027, "loss": 0.0372, "step": 188 }, { "epoch": 63.0, "learning_rate": 0.0001961261695938319, "loss": 0.0447, "step": 189 }, { "epoch": 63.33, "learning_rate": 0.00019603400468235998, "loss": 0.0366, "step": 190 }, { "epoch": 63.67, "learning_rate": 0.00019594077841564907, "loss": 0.0369, "step": 191 }, { "epoch": 64.0, "learning_rate": 0.00019584649182402357, "loss": 0.0338, "step": 192 }, { "epoch": 64.33, "learning_rate": 0.0001957511459495266, "loss": 0.0337, "step": 193 }, { "epoch": 64.67, "learning_rate": 0.00019565474184590826, "loss": 0.0331, "step": 194 }, { "epoch": 65.0, "learning_rate": 0.0001955572805786141, "loss": 0.0345, "step": 195 }, { "epoch": 65.33, "learning_rate": 0.0001954587632247732, "loss": 0.0294, "step": 196 }, { "epoch": 65.67, "learning_rate": 0.00019535919087318652, "loss": 0.031, "step": 197 }, { "epoch": 66.0, "learning_rate": 0.0001952585646243146, "loss": 0.0303, "step": 198 }, { "epoch": 66.33, "learning_rate": 0.00019515688559026563, "loss": 0.0289, "step": 199 }, { "epoch": 66.67, "learning_rate": 0.0001950541548947829, "loss": 0.0276, "step": 200 }, { "epoch": 67.0, "learning_rate": 0.00019495037367323262, "loss": 0.0272, "step": 201 }, { "epoch": 67.33, "learning_rate": 0.0001948455430725913, "loss": 0.0255, "step": 202 }, { "epoch": 67.67, "learning_rate": 0.00019473966425143292, "loss": 0.0261, "step": 203 }, { "epoch": 68.0, "learning_rate": 0.00019463273837991643, "loss": 0.0278, "step": 204 }, { "epoch": 68.33, "learning_rate": 0.00019452476663977248, "loss": 0.0225, "step": 205 }, { "epoch": 68.67, "learning_rate": 0.00019441575022429065, "loss": 0.0236, "step": 206 }, { "epoch": 69.0, "learning_rate": 0.00019430569033830605, "loss": 0.0261, "step": 207 }, { "epoch": 69.33, "learning_rate": 0.00019419458819818614, "loss": 0.0218, "step": 208 }, { "epoch": 69.67, "learning_rate": 0.00019408244503181724, "loss": 0.0222, "step": 209 }, { "epoch": 70.0, "learning_rate": 0.00019396926207859084, "loss": 0.0224, "step": 210 }, { "epoch": 70.33, "learning_rate": 0.00019385504058939024, "loss": 0.0207, "step": 211 }, { "epoch": 70.67, "learning_rate": 0.00019373978182657625, "loss": 0.0207, "step": 212 }, { "epoch": 71.0, "learning_rate": 0.00019362348706397373, "loss": 0.0194, "step": 213 }, { "epoch": 71.33, "learning_rate": 0.00019350615758685708, "loss": 0.0167, "step": 214 }, { "epoch": 71.67, "learning_rate": 0.00019338779469193639, "loss": 0.02, "step": 215 }, { "epoch": 72.0, "learning_rate": 0.00019326839968734279, "loss": 0.0196, "step": 216 }, { "epoch": 72.33, "learning_rate": 0.00019314797389261424, "loss": 0.0155, "step": 217 }, { "epoch": 72.67, "learning_rate": 0.00019302651863868092, "loss": 0.019, "step": 218 }, { "epoch": 73.0, "learning_rate": 0.00019290403526785025, "loss": 0.0174, "step": 219 }, { "epoch": 73.33, "learning_rate": 0.00019278052513379255, "loss": 0.0172, "step": 220 }, { "epoch": 73.67, "learning_rate": 0.00019265598960152555, "loss": 0.0164, "step": 221 }, { "epoch": 74.0, "learning_rate": 0.00019253043004739968, "loss": 0.0149, "step": 222 }, { "epoch": 74.33, "learning_rate": 0.00019240384785908265, "loss": 0.0147, "step": 223 }, { "epoch": 74.67, "learning_rate": 0.00019227624443554425, "loss": 0.0156, "step": 224 }, { "epoch": 75.0, "learning_rate": 0.00019214762118704076, "loss": 0.0151, "step": 225 }, { "epoch": 75.33, "learning_rate": 0.00019201797953509955, "loss": 0.0153, "step": 226 }, { "epoch": 75.67, "learning_rate": 0.00019188732091250307, "loss": 0.0131, "step": 227 }, { "epoch": 76.0, "learning_rate": 0.00019175564676327339, "loss": 0.0136, "step": 228 }, { "epoch": 76.33, "learning_rate": 0.00019162295854265594, "loss": 0.0142, "step": 229 }, { "epoch": 76.67, "learning_rate": 0.00019148925771710347, "loss": 0.0124, "step": 230 }, { "epoch": 77.0, "learning_rate": 0.0001913545457642601, "loss": 0.0127, "step": 231 }, { "epoch": 77.33, "learning_rate": 0.00019121882417294462, "loss": 0.0119, "step": 232 }, { "epoch": 77.67, "learning_rate": 0.00019108209444313433, "loss": 0.0128, "step": 233 }, { "epoch": 78.0, "learning_rate": 0.00019094435808594823, "loss": 0.0117, "step": 234 }, { "epoch": 78.33, "learning_rate": 0.0001908056166236305, "loss": 0.0122, "step": 235 }, { "epoch": 78.67, "learning_rate": 0.00019066587158953366, "loss": 0.0115, "step": 236 }, { "epoch": 79.0, "learning_rate": 0.0001905251245281015, "loss": 0.0108, "step": 237 }, { "epoch": 79.33, "learning_rate": 0.00019038337699485208, "loss": 0.0104, "step": 238 }, { "epoch": 79.67, "learning_rate": 0.00019024063055636057, "loss": 0.0108, "step": 239 }, { "epoch": 80.0, "learning_rate": 0.0001900968867902419, "loss": 0.0099, "step": 240 }, { "epoch": 80.33, "learning_rate": 0.00018995214728513343, "loss": 0.0099, "step": 241 }, { "epoch": 80.67, "learning_rate": 0.0001898064136406771, "loss": 0.0096, "step": 242 }, { "epoch": 81.0, "learning_rate": 0.0001896596874675021, "loss": 0.0101, "step": 243 }, { "epoch": 81.33, "learning_rate": 0.00018951197038720688, "loss": 0.0105, "step": 244 }, { "epoch": 81.67, "learning_rate": 0.00018936326403234125, "loss": 0.0088, "step": 245 }, { "epoch": 82.0, "learning_rate": 0.00018921357004638835, "loss": 0.0097, "step": 246 }, { "epoch": 82.33, "learning_rate": 0.00018906289008374655, "loss": 0.0084, "step": 247 }, { "epoch": 82.67, "learning_rate": 0.00018891122580971098, "loss": 0.0095, "step": 248 }, { "epoch": 83.0, "learning_rate": 0.00018875857890045543, "loss": 0.0084, "step": 249 }, { "epoch": 83.33, "learning_rate": 0.00018860495104301345, "loss": 0.0074, "step": 250 }, { "epoch": 83.67, "learning_rate": 0.00018845034393526005, "loss": 0.0088, "step": 251 }, { "epoch": 84.0, "learning_rate": 0.00018829475928589271, "loss": 0.0078, "step": 252 }, { "epoch": 84.33, "learning_rate": 0.0001881381988144126, "loss": 0.0081, "step": 253 }, { "epoch": 84.67, "learning_rate": 0.0001879806642511055, "loss": 0.0071, "step": 254 }, { "epoch": 85.0, "learning_rate": 0.00018782215733702286, "loss": 0.0069, "step": 255 }, { "epoch": 85.33, "learning_rate": 0.00018766267982396224, "loss": 0.007, "step": 256 }, { "epoch": 85.67, "learning_rate": 0.00018750223347444828, "loss": 0.0074, "step": 257 }, { "epoch": 86.0, "learning_rate": 0.00018734082006171299, "loss": 0.0065, "step": 258 }, { "epoch": 86.33, "learning_rate": 0.00018717844136967624, "loss": 0.0063, "step": 259 }, { "epoch": 86.67, "learning_rate": 0.00018701509919292613, "loss": 0.0072, "step": 260 }, { "epoch": 87.0, "learning_rate": 0.0001868507953366989, "loss": 0.006, "step": 261 }, { "epoch": 87.33, "learning_rate": 0.00018668553161685933, "loss": 0.0062, "step": 262 }, { "epoch": 87.67, "learning_rate": 0.00018651930985988036, "loss": 0.0059, "step": 263 }, { "epoch": 88.0, "learning_rate": 0.0001863521319028231, "loss": 0.0068, "step": 264 }, { "epoch": 88.33, "learning_rate": 0.0001861839995933164, "loss": 0.0056, "step": 265 }, { "epoch": 88.67, "learning_rate": 0.00018601491478953657, "loss": 0.0058, "step": 266 }, { "epoch": 89.0, "learning_rate": 0.00018584487936018661, "loss": 0.0063, "step": 267 }, { "epoch": 89.33, "learning_rate": 0.0001856738951844759, "loss": 0.0057, "step": 268 }, { "epoch": 89.67, "learning_rate": 0.00018550196415209914, "loss": 0.006, "step": 269 }, { "epoch": 90.0, "learning_rate": 0.00018532908816321558, "loss": 0.0062, "step": 270 }, { "epoch": 90.33, "learning_rate": 0.00018515526912842796, "loss": 0.0059, "step": 271 }, { "epoch": 90.67, "learning_rate": 0.0001849805089687615, "loss": 0.0071, "step": 272 }, { "epoch": 91.0, "learning_rate": 0.0001848048096156426, "loss": 0.0053, "step": 273 }, { "epoch": 91.33, "learning_rate": 0.00018462817301087748, "loss": 0.0061, "step": 274 }, { "epoch": 91.67, "learning_rate": 0.0001844506011066308, "loss": 0.0049, "step": 275 }, { "epoch": 92.0, "learning_rate": 0.0001842720958654039, "loss": 0.0057, "step": 276 }, { "epoch": 92.33, "learning_rate": 0.00018409265926001343, "loss": 0.0052, "step": 277 }, { "epoch": 92.67, "learning_rate": 0.00018391229327356916, "loss": 0.0052, "step": 278 }, { "epoch": 93.0, "learning_rate": 0.00018373099989945236, "loss": 0.0051, "step": 279 }, { "epoch": 93.33, "learning_rate": 0.00018354878114129367, "loss": 0.0054, "step": 280 }, { "epoch": 93.67, "learning_rate": 0.0001833656390129509, "loss": 0.0051, "step": 281 }, { "epoch": 94.0, "learning_rate": 0.0001831815755384869, "loss": 0.0049, "step": 282 }, { "epoch": 94.33, "learning_rate": 0.00018299659275214706, "loss": 0.0043, "step": 283 }, { "epoch": 94.67, "learning_rate": 0.00018281069269833692, "loss": 0.005, "step": 284 }, { "epoch": 95.0, "learning_rate": 0.0001826238774315995, "loss": 0.0058, "step": 285 }, { "epoch": 95.33, "learning_rate": 0.00018243614901659264, "loss": 0.0049, "step": 286 }, { "epoch": 95.67, "learning_rate": 0.00018224750952806624, "loss": 0.0049, "step": 287 }, { "epoch": 96.0, "learning_rate": 0.00018205796105083915, "loss": 0.0047, "step": 288 }, { "epoch": 96.33, "learning_rate": 0.00018186750567977637, "loss": 0.0046, "step": 289 }, { "epoch": 96.67, "learning_rate": 0.00018167614551976567, "loss": 0.0051, "step": 290 }, { "epoch": 97.0, "learning_rate": 0.00018148388268569453, "loss": 0.0052, "step": 291 }, { "epoch": 97.33, "learning_rate": 0.00018129071930242648, "loss": 0.0042, "step": 292 }, { "epoch": 97.67, "learning_rate": 0.00018109665750477806, "loss": 0.0054, "step": 293 }, { "epoch": 98.0, "learning_rate": 0.00018090169943749476, "loss": 0.0054, "step": 294 }, { "epoch": 98.33, "learning_rate": 0.00018070584725522762, "loss": 0.0041, "step": 295 }, { "epoch": 98.67, "learning_rate": 0.00018050910312250931, "loss": 0.0051, "step": 296 }, { "epoch": 99.0, "learning_rate": 0.00018031146921373018, "loss": 0.0047, "step": 297 }, { "epoch": 99.33, "learning_rate": 0.00018011294771311435, "loss": 0.0039, "step": 298 }, { "epoch": 99.67, "learning_rate": 0.00017991354081469538, "loss": 0.0048, "step": 299 }, { "epoch": 100.0, "learning_rate": 0.00017971325072229226, "loss": 0.0048, "step": 300 }, { "epoch": 100.33, "learning_rate": 0.0001795120796494848, "loss": 0.0043, "step": 301 }, { "epoch": 100.67, "learning_rate": 0.00017931002981958933, "loss": 0.0044, "step": 302 }, { "epoch": 101.0, "learning_rate": 0.00017910710346563416, "loss": 0.0041, "step": 303 }, { "epoch": 101.33, "learning_rate": 0.00017890330283033468, "loss": 0.0044, "step": 304 }, { "epoch": 101.67, "learning_rate": 0.0001786986301660689, "loss": 0.0045, "step": 305 }, { "epoch": 102.0, "learning_rate": 0.00017849308773485226, "loss": 0.0035, "step": 306 }, { "epoch": 102.33, "learning_rate": 0.00017828667780831278, "loss": 0.0039, "step": 307 }, { "epoch": 102.67, "learning_rate": 0.00017807940266766593, "loss": 0.0043, "step": 308 }, { "epoch": 103.0, "learning_rate": 0.0001778712646036894, "loss": 0.0041, "step": 309 }, { "epoch": 103.33, "learning_rate": 0.00017766226591669785, "loss": 0.004, "step": 310 }, { "epoch": 103.67, "learning_rate": 0.00017745240891651735, "loss": 0.0036, "step": 311 }, { "epoch": 104.0, "learning_rate": 0.00017724169592245995, "loss": 0.0047, "step": 312 }, { "epoch": 104.33, "learning_rate": 0.00017703012926329815, "loss": 0.0038, "step": 313 }, { "epoch": 104.67, "learning_rate": 0.0001768177112772388, "loss": 0.0039, "step": 314 }, { "epoch": 105.0, "learning_rate": 0.0001766044443118978, "loss": 0.0038, "step": 315 }, { "epoch": 105.33, "learning_rate": 0.00017639033072427366, "loss": 0.004, "step": 316 }, { "epoch": 105.67, "learning_rate": 0.0001761753728807217, "loss": 0.004, "step": 317 }, { "epoch": 106.0, "learning_rate": 0.00017595957315692782, "loss": 0.0033, "step": 318 }, { "epoch": 106.33, "learning_rate": 0.00017574293393788235, "loss": 0.0039, "step": 319 }, { "epoch": 106.67, "learning_rate": 0.0001755254576178535, "loss": 0.0041, "step": 320 }, { "epoch": 107.0, "learning_rate": 0.00017530714660036112, "loss": 0.0033, "step": 321 }, { "epoch": 107.33, "learning_rate": 0.00017508800329814995, "loss": 0.0036, "step": 322 }, { "epoch": 107.67, "learning_rate": 0.000174868030133163, "loss": 0.0036, "step": 323 }, { "epoch": 108.0, "learning_rate": 0.00017464722953651504, "loss": 0.0039, "step": 324 }, { "epoch": 108.33, "learning_rate": 0.00017442560394846516, "loss": 0.0039, "step": 325 }, { "epoch": 108.67, "learning_rate": 0.00017420315581839044, "loss": 0.0036, "step": 326 }, { "epoch": 109.0, "learning_rate": 0.0001739798876047584, "loss": 0.0035, "step": 327 }, { "epoch": 109.33, "learning_rate": 0.00017375580177510016, "loss": 0.0039, "step": 328 }, { "epoch": 109.67, "learning_rate": 0.0001735309008059829, "loss": 0.0032, "step": 329 }, { "epoch": 110.0, "learning_rate": 0.00017330518718298264, "loss": 0.0037, "step": 330 }, { "epoch": 110.33, "learning_rate": 0.00017307866340065685, "loss": 0.0041, "step": 331 }, { "epoch": 110.67, "learning_rate": 0.00017285133196251663, "loss": 0.0033, "step": 332 }, { "epoch": 111.0, "learning_rate": 0.0001726231953809993, "loss": 0.0033, "step": 333 }, { "epoch": 111.33, "learning_rate": 0.00017239425617744048, "loss": 0.0033, "step": 334 }, { "epoch": 111.67, "learning_rate": 0.0001721645168820462, "loss": 0.0038, "step": 335 }, { "epoch": 112.0, "learning_rate": 0.0001719339800338651, "loss": 0.0036, "step": 336 }, { "epoch": 112.33, "learning_rate": 0.00017170264818076026, "loss": 0.0033, "step": 337 }, { "epoch": 112.67, "learning_rate": 0.0001714705238793809, "loss": 0.0035, "step": 338 }, { "epoch": 113.0, "learning_rate": 0.0001712376096951345, "loss": 0.0038, "step": 339 }, { "epoch": 113.33, "learning_rate": 0.00017100390820215804, "loss": 0.0034, "step": 340 }, { "epoch": 113.67, "learning_rate": 0.00017076942198328987, "loss": 0.0034, "step": 341 }, { "epoch": 114.0, "learning_rate": 0.0001705341536300409, "loss": 0.0035, "step": 342 }, { "epoch": 114.33, "learning_rate": 0.0001702981057425662, "loss": 0.0033, "step": 343 }, { "epoch": 114.67, "learning_rate": 0.00017006128092963605, "loss": 0.0036, "step": 344 }, { "epoch": 115.0, "learning_rate": 0.00016982368180860728, "loss": 0.0034, "step": 345 }, { "epoch": 115.33, "learning_rate": 0.00016958531100539427, "loss": 0.0032, "step": 346 }, { "epoch": 115.67, "learning_rate": 0.00016934617115443992, "loss": 0.0033, "step": 347 }, { "epoch": 116.0, "learning_rate": 0.00016910626489868649, "loss": 0.004, "step": 348 }, { "epoch": 116.33, "learning_rate": 0.00016886559488954648, "loss": 0.0032, "step": 349 }, { "epoch": 116.67, "learning_rate": 0.0001686241637868734, "loss": 0.0035, "step": 350 }, { "epoch": 116.67, "eval_loss": 1.2795522212982178, "eval_runtime": 3.5029, "eval_samples_per_second": 5.995, "eval_steps_per_second": 0.856, "step": 350 }, { "epoch": 117.0, "learning_rate": 0.00016838197425893202, "loss": 0.0038, "step": 351 }, { "epoch": 117.33, "learning_rate": 0.00016813902898236939, "loss": 0.0033, "step": 352 }, { "epoch": 117.67, "learning_rate": 0.00016789533064218485, "loss": 0.0035, "step": 353 }, { "epoch": 118.0, "learning_rate": 0.00016765088193170053, "loss": 0.0033, "step": 354 }, { "epoch": 118.33, "learning_rate": 0.00016740568555253155, "loss": 0.0037, "step": 355 }, { "epoch": 118.67, "learning_rate": 0.00016715974421455617, "loss": 0.0032, "step": 356 }, { "epoch": 119.0, "learning_rate": 0.00016691306063588583, "loss": 0.0031, "step": 357 }, { "epoch": 119.33, "learning_rate": 0.00016666563754283515, "loss": 0.0035, "step": 358 }, { "epoch": 119.67, "learning_rate": 0.0001664174776698917, "loss": 0.0031, "step": 359 }, { "epoch": 120.0, "learning_rate": 0.00016616858375968595, "loss": 0.0033, "step": 360 }, { "epoch": 120.33, "learning_rate": 0.00016591895856296073, "loss": 0.0036, "step": 361 }, { "epoch": 120.67, "learning_rate": 0.00016566860483854104, "loss": 0.0032, "step": 362 }, { "epoch": 121.0, "learning_rate": 0.00016541752535330345, "loss": 0.0032, "step": 363 }, { "epoch": 121.33, "learning_rate": 0.00016516572288214552, "loss": 0.003, "step": 364 }, { "epoch": 121.67, "learning_rate": 0.0001649132002079552, "loss": 0.003, "step": 365 }, { "epoch": 122.0, "learning_rate": 0.00016465996012157995, "loss": 0.0036, "step": 366 }, { "epoch": 122.33, "learning_rate": 0.00016440600542179615, "loss": 0.0036, "step": 367 }, { "epoch": 122.67, "learning_rate": 0.0001641513389152777, "loss": 0.0032, "step": 368 }, { "epoch": 123.0, "learning_rate": 0.0001638959634165656, "loss": 0.0027, "step": 369 }, { "epoch": 123.33, "learning_rate": 0.00016363988174803638, "loss": 0.0034, "step": 370 }, { "epoch": 123.67, "learning_rate": 0.00016338309673987101, "loss": 0.0033, "step": 371 }, { "epoch": 124.0, "learning_rate": 0.0001631256112300239, "loss": 0.0027, "step": 372 }, { "epoch": 124.33, "learning_rate": 0.00016286742806419108, "loss": 0.0033, "step": 373 }, { "epoch": 124.67, "learning_rate": 0.0001626085500957791, "loss": 0.0031, "step": 374 }, { "epoch": 125.0, "learning_rate": 0.00016234898018587337, "loss": 0.0032, "step": 375 }, { "epoch": 125.33, "learning_rate": 0.0001620887212032065, "loss": 0.003, "step": 376 }, { "epoch": 125.67, "learning_rate": 0.00016182777602412665, "loss": 0.0034, "step": 377 }, { "epoch": 126.0, "learning_rate": 0.0001615661475325658, "loss": 0.0031, "step": 378 }, { "epoch": 126.33, "learning_rate": 0.0001613038386200078, "loss": 0.0031, "step": 379 }, { "epoch": 126.67, "learning_rate": 0.00016104085218545633, "loss": 0.0034, "step": 380 }, { "epoch": 127.0, "learning_rate": 0.00016077719113540302, "loss": 0.0027, "step": 381 }, { "epoch": 127.33, "learning_rate": 0.00016051285838379525, "loss": 0.0035, "step": 382 }, { "epoch": 127.67, "learning_rate": 0.00016024785685200395, "loss": 0.0029, "step": 383 }, { "epoch": 128.0, "learning_rate": 0.00015998218946879138, "loss": 0.0027, "step": 384 }, { "epoch": 128.33, "learning_rate": 0.00015971585917027862, "loss": 0.0035, "step": 385 }, { "epoch": 128.67, "learning_rate": 0.00015944886889991325, "loss": 0.0029, "step": 386 }, { "epoch": 129.0, "learning_rate": 0.00015918122160843678, "loss": 0.0029, "step": 387 }, { "epoch": 129.33, "learning_rate": 0.000158912920253852, "loss": 0.0032, "step": 388 }, { "epoch": 129.67, "learning_rate": 0.0001586439678013903, "loss": 0.0029, "step": 389 }, { "epoch": 130.0, "learning_rate": 0.000158374367223479, "loss": 0.0032, "step": 390 }, { "epoch": 130.33, "learning_rate": 0.00015810412149970833, "loss": 0.0027, "step": 391 }, { "epoch": 130.67, "learning_rate": 0.00015783323361679864, "loss": 0.0034, "step": 392 }, { "epoch": 131.0, "learning_rate": 0.00015756170656856737, "loss": 0.0032, "step": 393 }, { "epoch": 131.33, "learning_rate": 0.0001572895433558958, "loss": 0.0033, "step": 394 }, { "epoch": 131.67, "learning_rate": 0.0001570167469866962, "loss": 0.0027, "step": 395 }, { "epoch": 132.0, "learning_rate": 0.0001567433204758782, "loss": 0.003, "step": 396 }, { "epoch": 132.33, "learning_rate": 0.00015646926684531585, "loss": 0.003, "step": 397 }, { "epoch": 132.67, "learning_rate": 0.00015619458912381396, "loss": 0.003, "step": 398 }, { "epoch": 133.0, "learning_rate": 0.0001559192903470747, "loss": 0.003, "step": 399 }, { "epoch": 133.33, "learning_rate": 0.00015564337355766412, "loss": 0.0033, "step": 400 }, { "epoch": 133.67, "learning_rate": 0.0001553668418049784, "loss": 0.0027, "step": 401 }, { "epoch": 134.0, "learning_rate": 0.00015508969814521025, "loss": 0.003, "step": 402 }, { "epoch": 134.33, "learning_rate": 0.00015481194564131512, "loss": 0.0028, "step": 403 }, { "epoch": 134.67, "learning_rate": 0.00015453358736297729, "loss": 0.0029, "step": 404 }, { "epoch": 135.0, "learning_rate": 0.00015425462638657595, "loss": 0.0031, "step": 405 }, { "epoch": 135.33, "learning_rate": 0.0001539750657951513, "loss": 0.0027, "step": 406 }, { "epoch": 135.67, "learning_rate": 0.00015369490867837035, "loss": 0.0029, "step": 407 }, { "epoch": 136.0, "learning_rate": 0.00015341415813249288, "loss": 0.0032, "step": 408 }, { "epoch": 136.33, "learning_rate": 0.00015313281726033715, "loss": 0.0031, "step": 409 }, { "epoch": 136.67, "learning_rate": 0.00015285088917124556, "loss": 0.0029, "step": 410 }, { "epoch": 137.0, "learning_rate": 0.00015256837698105047, "loss": 0.0028, "step": 411 }, { "epoch": 137.33, "learning_rate": 0.00015228528381203962, "loss": 0.003, "step": 412 }, { "epoch": 137.67, "learning_rate": 0.00015200161279292155, "loss": 0.0029, "step": 413 }, { "epoch": 138.0, "learning_rate": 0.00015171736705879126, "loss": 0.0028, "step": 414 }, { "epoch": 138.33, "learning_rate": 0.00015143254975109538, "loss": 0.0025, "step": 415 }, { "epoch": 138.67, "learning_rate": 0.0001511471640175974, "loss": 0.0031, "step": 416 }, { "epoch": 139.0, "learning_rate": 0.00015086121301234316, "loss": 0.0029, "step": 417 }, { "epoch": 139.33, "learning_rate": 0.00015057469989562567, "loss": 0.0027, "step": 418 }, { "epoch": 139.67, "learning_rate": 0.00015028762783395034, "loss": 0.0028, "step": 419 }, { "epoch": 140.0, "learning_rate": 0.00015000000000000001, "loss": 0.0031, "step": 420 }, { "epoch": 140.33, "learning_rate": 0.0001497118195725998, "loss": 0.0029, "step": 421 }, { "epoch": 140.67, "learning_rate": 0.0001494230897366821, "loss": 0.003, "step": 422 }, { "epoch": 141.0, "learning_rate": 0.00014913381368325115, "loss": 0.0027, "step": 423 }, { "epoch": 141.33, "learning_rate": 0.00014884399460934805, "loss": 0.003, "step": 424 }, { "epoch": 141.67, "learning_rate": 0.00014855363571801523, "loss": 0.0025, "step": 425 }, { "epoch": 142.0, "learning_rate": 0.0001482627402182611, "loss": 0.003, "step": 426 }, { "epoch": 142.33, "learning_rate": 0.00014797131132502465, "loss": 0.0028, "step": 427 }, { "epoch": 142.67, "learning_rate": 0.00014767935225913975, "loss": 0.0027, "step": 428 }, { "epoch": 143.0, "learning_rate": 0.00014738686624729986, "loss": 0.0031, "step": 429 }, { "epoch": 143.33, "learning_rate": 0.00014709385652202203, "loss": 0.0029, "step": 430 }, { "epoch": 143.67, "learning_rate": 0.0001468003263216113, "loss": 0.0029, "step": 431 }, { "epoch": 144.0, "learning_rate": 0.00014650627889012507, "loss": 0.0025, "step": 432 }, { "epoch": 144.33, "learning_rate": 0.00014621171747733697, "loss": 0.0028, "step": 433 }, { "epoch": 144.67, "learning_rate": 0.00014591664533870118, "loss": 0.0029, "step": 434 }, { "epoch": 145.0, "learning_rate": 0.0001456210657353163, "loss": 0.0026, "step": 435 }, { "epoch": 145.33, "learning_rate": 0.0001453249819338894, "loss": 0.0029, "step": 436 }, { "epoch": 145.67, "learning_rate": 0.00014502839720669989, "loss": 0.0027, "step": 437 }, { "epoch": 146.0, "learning_rate": 0.00014473131483156327, "loss": 0.0027, "step": 438 }, { "epoch": 146.33, "learning_rate": 0.00014443373809179508, "loss": 0.0028, "step": 439 }, { "epoch": 146.67, "learning_rate": 0.0001441356702761744, "loss": 0.0028, "step": 440 }, { "epoch": 147.0, "learning_rate": 0.00014383711467890774, "loss": 0.0026, "step": 441 }, { "epoch": 147.33, "learning_rate": 0.00014353807459959242, "loss": 0.0027, "step": 442 }, { "epoch": 147.67, "learning_rate": 0.00014323855334318026, "loss": 0.0026, "step": 443 }, { "epoch": 148.0, "learning_rate": 0.00014293855421994094, "loss": 0.0028, "step": 444 }, { "epoch": 148.33, "learning_rate": 0.0001426380805454254, "loss": 0.0028, "step": 445 }, { "epoch": 148.67, "learning_rate": 0.00014233713564042937, "loss": 0.0027, "step": 446 }, { "epoch": 149.0, "learning_rate": 0.00014203572283095657, "loss": 0.0026, "step": 447 }, { "epoch": 149.33, "learning_rate": 0.0001417338454481818, "loss": 0.0024, "step": 448 }, { "epoch": 149.67, "learning_rate": 0.00014143150682841438, "loss": 0.0029, "step": 449 }, { "epoch": 150.0, "learning_rate": 0.00014112871031306119, "loss": 0.0028, "step": 450 }, { "epoch": 150.33, "learning_rate": 0.00014082545924858954, "loss": 0.0027, "step": 451 }, { "epoch": 150.67, "learning_rate": 0.00014052175698649053, "loss": 0.0029, "step": 452 }, { "epoch": 151.0, "learning_rate": 0.00014021760688324176, "loss": 0.0023, "step": 453 }, { "epoch": 151.33, "learning_rate": 0.0001399130123002703, "loss": 0.0028, "step": 454 }, { "epoch": 151.67, "learning_rate": 0.0001396079766039157, "loss": 0.0027, "step": 455 }, { "epoch": 152.0, "learning_rate": 0.00013930250316539238, "loss": 0.0025, "step": 456 }, { "epoch": 152.33, "learning_rate": 0.0001389965953607528, "loss": 0.0027, "step": 457 }, { "epoch": 152.67, "learning_rate": 0.00013869025657084995, "loss": 0.0028, "step": 458 }, { "epoch": 153.0, "learning_rate": 0.00013838349018130007, "loss": 0.0024, "step": 459 }, { "epoch": 153.33, "learning_rate": 0.00013807629958244498, "loss": 0.0026, "step": 460 }, { "epoch": 153.67, "learning_rate": 0.00013776868816931502, "loss": 0.0027, "step": 461 }, { "epoch": 154.0, "learning_rate": 0.00013746065934159123, "loss": 0.0025, "step": 462 }, { "epoch": 154.33, "learning_rate": 0.0001371522165035678, "loss": 0.0027, "step": 463 }, { "epoch": 154.67, "learning_rate": 0.00013684336306411468, "loss": 0.0026, "step": 464 }, { "epoch": 155.0, "learning_rate": 0.00013653410243663952, "loss": 0.0026, "step": 465 }, { "epoch": 155.33, "learning_rate": 0.00013622443803905027, "loss": 0.0026, "step": 466 }, { "epoch": 155.67, "learning_rate": 0.00013591437329371736, "loss": 0.0026, "step": 467 }, { "epoch": 156.0, "learning_rate": 0.00013560391162743569, "loss": 0.0027, "step": 468 }, { "epoch": 156.33, "learning_rate": 0.00013529305647138687, "loss": 0.003, "step": 469 }, { "epoch": 156.67, "learning_rate": 0.0001349818112611015, "loss": 0.0024, "step": 470 }, { "epoch": 157.0, "learning_rate": 0.00013467017943642073, "loss": 0.0024, "step": 471 }, { "epoch": 157.33, "learning_rate": 0.0001343581644414587, "loss": 0.0024, "step": 472 }, { "epoch": 157.67, "learning_rate": 0.00013404576972456431, "loss": 0.0027, "step": 473 }, { "epoch": 158.0, "learning_rate": 0.00013373299873828303, "loss": 0.0026, "step": 474 }, { "epoch": 158.33, "learning_rate": 0.00013341985493931877, "loss": 0.0026, "step": 475 }, { "epoch": 158.67, "learning_rate": 0.0001331063417884958, "loss": 0.0024, "step": 476 }, { "epoch": 159.0, "learning_rate": 0.00013279246275072046, "loss": 0.0028, "step": 477 }, { "epoch": 159.33, "learning_rate": 0.00013247822129494266, "loss": 0.0026, "step": 478 }, { "epoch": 159.67, "learning_rate": 0.00013216362089411783, "loss": 0.0026, "step": 479 }, { "epoch": 160.0, "learning_rate": 0.00013184866502516845, "loss": 0.0027, "step": 480 }, { "epoch": 160.33, "learning_rate": 0.00013153335716894544, "loss": 0.0028, "step": 481 }, { "epoch": 160.67, "learning_rate": 0.00013121770081018998, "loss": 0.0026, "step": 482 }, { "epoch": 161.0, "learning_rate": 0.00013090169943749476, "loss": 0.0024, "step": 483 }, { "epoch": 161.33, "learning_rate": 0.00013058535654326554, "loss": 0.0027, "step": 484 }, { "epoch": 161.67, "learning_rate": 0.0001302686756236826, "loss": 0.0025, "step": 485 }, { "epoch": 162.0, "learning_rate": 0.00012995166017866193, "loss": 0.0027, "step": 486 }, { "epoch": 162.33, "learning_rate": 0.00012963431371181672, "loss": 0.0025, "step": 487 }, { "epoch": 162.67, "learning_rate": 0.00012931663973041855, "loss": 0.0026, "step": 488 }, { "epoch": 163.0, "learning_rate": 0.00012899864174535864, "loss": 0.0025, "step": 489 }, { "epoch": 163.33, "learning_rate": 0.00012868032327110904, "loss": 0.0026, "step": 490 }, { "epoch": 163.67, "learning_rate": 0.00012836168782568385, "loss": 0.0027, "step": 491 }, { "epoch": 164.0, "learning_rate": 0.00012804273893060028, "loss": 0.0023, "step": 492 }, { "epoch": 164.33, "learning_rate": 0.00012772348011083973, "loss": 0.0025, "step": 493 }, { "epoch": 164.67, "learning_rate": 0.00012740391489480884, "loss": 0.0026, "step": 494 }, { "epoch": 165.0, "learning_rate": 0.00012708404681430053, "loss": 0.0027, "step": 495 }, { "epoch": 165.33, "learning_rate": 0.0001267638794044549, "loss": 0.0026, "step": 496 }, { "epoch": 165.67, "learning_rate": 0.00012644341620372023, "loss": 0.0026, "step": 497 }, { "epoch": 166.0, "learning_rate": 0.00012612266075381386, "loss": 0.0024, "step": 498 }, { "epoch": 166.33, "learning_rate": 0.00012580161659968294, "loss": 0.0026, "step": 499 }, { "epoch": 166.67, "learning_rate": 0.0001254802872894655, "loss": 0.0025, "step": 500 }, { "epoch": 167.0, "learning_rate": 0.00012515867637445086, "loss": 0.0027, "step": 501 }, { "epoch": 167.33, "learning_rate": 0.00012483678740904082, "loss": 0.0028, "step": 502 }, { "epoch": 167.67, "learning_rate": 0.00012451462395071, "loss": 0.0024, "step": 503 }, { "epoch": 168.0, "learning_rate": 0.00012419218955996676, "loss": 0.0024, "step": 504 }, { "epoch": 168.33, "learning_rate": 0.0001238694878003138, "loss": 0.0024, "step": 505 }, { "epoch": 168.67, "learning_rate": 0.00012354652223820858, "loss": 0.0022, "step": 506 }, { "epoch": 169.0, "learning_rate": 0.00012322329644302426, "loss": 0.0031, "step": 507 }, { "epoch": 169.33, "learning_rate": 0.00012289981398700995, "loss": 0.0022, "step": 508 }, { "epoch": 169.67, "learning_rate": 0.00012257607844525146, "loss": 0.0026, "step": 509 }, { "epoch": 170.0, "learning_rate": 0.00012225209339563145, "loss": 0.0027, "step": 510 }, { "epoch": 170.33, "learning_rate": 0.00012192786241879033, "loss": 0.0024, "step": 511 }, { "epoch": 170.67, "learning_rate": 0.0001216033890980864, "loss": 0.0025, "step": 512 }, { "epoch": 171.0, "learning_rate": 0.00012127867701955622, "loss": 0.0026, "step": 513 }, { "epoch": 171.33, "learning_rate": 0.0001209537297718752, "loss": 0.0026, "step": 514 }, { "epoch": 171.67, "learning_rate": 0.00012062855094631778, "loss": 0.0023, "step": 515 }, { "epoch": 172.0, "learning_rate": 0.00012030314413671762, "loss": 0.0027, "step": 516 }, { "epoch": 172.33, "learning_rate": 0.00011997751293942827, "loss": 0.0027, "step": 517 }, { "epoch": 172.67, "learning_rate": 0.00011965166095328301, "loss": 0.0023, "step": 518 }, { "epoch": 173.0, "learning_rate": 0.00011932559177955533, "loss": 0.0024, "step": 519 }, { "epoch": 173.33, "learning_rate": 0.00011899930902191902, "loss": 0.0024, "step": 520 }, { "epoch": 173.67, "learning_rate": 0.00011867281628640835, "loss": 0.0026, "step": 521 }, { "epoch": 174.0, "learning_rate": 0.00011834611718137824, "loss": 0.0024, "step": 522 }, { "epoch": 174.33, "learning_rate": 0.00011801921531746444, "loss": 0.0023, "step": 523 }, { "epoch": 174.67, "learning_rate": 0.00011769211430754357, "loss": 0.0025, "step": 524 }, { "epoch": 175.0, "learning_rate": 0.00011736481776669306, "loss": 0.0025, "step": 525 }, { "epoch": 175.33, "learning_rate": 0.00011703732931215141, "loss": 0.0024, "step": 526 }, { "epoch": 175.67, "learning_rate": 0.00011670965256327818, "loss": 0.0024, "step": 527 }, { "epoch": 176.0, "learning_rate": 0.00011638179114151377, "loss": 0.0024, "step": 528 }, { "epoch": 176.33, "learning_rate": 0.00011605374867033977, "loss": 0.0024, "step": 529 }, { "epoch": 176.67, "learning_rate": 0.00011572552877523854, "loss": 0.0024, "step": 530 }, { "epoch": 177.0, "learning_rate": 0.00011539713508365335, "loss": 0.0025, "step": 531 }, { "epoch": 177.33, "learning_rate": 0.00011506857122494831, "loss": 0.0024, "step": 532 }, { "epoch": 177.67, "learning_rate": 0.00011473984083036813, "loss": 0.0028, "step": 533 }, { "epoch": 178.0, "learning_rate": 0.00011441094753299801, "loss": 0.0021, "step": 534 }, { "epoch": 178.33, "learning_rate": 0.00011408189496772368, "loss": 0.0027, "step": 535 }, { "epoch": 178.67, "learning_rate": 0.00011375268677119089, "loss": 0.0024, "step": 536 }, { "epoch": 179.0, "learning_rate": 0.00011342332658176555, "loss": 0.0022, "step": 537 }, { "epoch": 179.33, "learning_rate": 0.00011309381803949333, "loss": 0.0027, "step": 538 }, { "epoch": 179.67, "learning_rate": 0.00011276416478605949, "loss": 0.0024, "step": 539 }, { "epoch": 180.0, "learning_rate": 0.00011243437046474853, "loss": 0.0023, "step": 540 }, { "epoch": 180.33, "learning_rate": 0.00011210443872040414, "loss": 0.0022, "step": 541 }, { "epoch": 180.67, "learning_rate": 0.00011177437319938875, "loss": 0.0026, "step": 542 }, { "epoch": 181.0, "learning_rate": 0.0001114441775495432, "loss": 0.0029, "step": 543 }, { "epoch": 181.33, "learning_rate": 0.00011111385542014663, "loss": 0.0025, "step": 544 }, { "epoch": 181.67, "learning_rate": 0.00011078341046187589, "loss": 0.0022, "step": 545 }, { "epoch": 182.0, "learning_rate": 0.00011045284632676536, "loss": 0.0027, "step": 546 }, { "epoch": 182.33, "learning_rate": 0.00011012216666816659, "loss": 0.0025, "step": 547 }, { "epoch": 182.67, "learning_rate": 0.00010979137514070782, "loss": 0.0025, "step": 548 }, { "epoch": 183.0, "learning_rate": 0.00010946047540025372, "loss": 0.0024, "step": 549 }, { "epoch": 183.33, "learning_rate": 0.00010912947110386484, "loss": 0.0024, "step": 550 }, { "epoch": 183.67, "learning_rate": 0.00010879836590975731, "loss": 0.0024, "step": 551 }, { "epoch": 184.0, "learning_rate": 0.00010846716347726233, "loss": 0.0025, "step": 552 }, { "epoch": 184.33, "learning_rate": 0.00010813586746678583, "loss": 0.0026, "step": 553 }, { "epoch": 184.67, "learning_rate": 0.00010780448153976793, "loss": 0.0023, "step": 554 }, { "epoch": 185.0, "learning_rate": 0.00010747300935864243, "loss": 0.0023, "step": 555 }, { "epoch": 185.33, "learning_rate": 0.00010714145458679649, "loss": 0.0027, "step": 556 }, { "epoch": 185.67, "learning_rate": 0.00010680982088853002, "loss": 0.0022, "step": 557 }, { "epoch": 186.0, "learning_rate": 0.00010647811192901518, "loss": 0.0023, "step": 558 }, { "epoch": 186.33, "learning_rate": 0.00010614633137425598, "loss": 0.0022, "step": 559 }, { "epoch": 186.67, "learning_rate": 0.00010581448289104758, "loss": 0.0025, "step": 560 }, { "epoch": 187.0, "learning_rate": 0.00010548257014693601, "loss": 0.0027, "step": 561 }, { "epoch": 187.33, "learning_rate": 0.0001051505968101774, "loss": 0.0023, "step": 562 }, { "epoch": 187.67, "learning_rate": 0.00010481856654969758, "loss": 0.0027, "step": 563 }, { "epoch": 188.0, "learning_rate": 0.00010448648303505151, "loss": 0.0021, "step": 564 }, { "epoch": 188.33, "learning_rate": 0.00010415434993638269, "loss": 0.0026, "step": 565 }, { "epoch": 188.67, "learning_rate": 0.00010382217092438255, "loss": 0.0023, "step": 566 }, { "epoch": 189.0, "learning_rate": 0.00010348994967025012, "loss": 0.0022, "step": 567 }, { "epoch": 189.33, "learning_rate": 0.0001031576898456511, "loss": 0.0022, "step": 568 }, { "epoch": 189.67, "learning_rate": 0.00010282539512267757, "loss": 0.0024, "step": 569 }, { "epoch": 190.0, "learning_rate": 0.0001024930691738073, "loss": 0.0028, "step": 570 }, { "epoch": 190.33, "learning_rate": 0.00010216071567186312, "loss": 0.0022, "step": 571 }, { "epoch": 190.67, "learning_rate": 0.00010182833828997238, "loss": 0.0027, "step": 572 }, { "epoch": 191.0, "learning_rate": 0.00010149594070152638, "loss": 0.0021, "step": 573 }, { "epoch": 191.33, "learning_rate": 0.00010116352658013973, "loss": 0.0024, "step": 574 }, { "epoch": 191.67, "learning_rate": 0.00010083109959960973, "loss": 0.0024, "step": 575 }, { "epoch": 192.0, "learning_rate": 0.00010049866343387581, "loss": 0.0025, "step": 576 }, { "epoch": 192.33, "learning_rate": 0.00010016622175697898, "loss": 0.0024, "step": 577 }, { "epoch": 192.67, "learning_rate": 9.983377824302106e-05, "loss": 0.0024, "step": 578 }, { "epoch": 193.0, "learning_rate": 9.950133656612421e-05, "loss": 0.0022, "step": 579 }, { "epoch": 193.33, "learning_rate": 9.916890040039031e-05, "loss": 0.0023, "step": 580 }, { "epoch": 193.67, "learning_rate": 9.883647341986032e-05, "loss": 0.0023, "step": 581 }, { "epoch": 194.0, "learning_rate": 9.850405929847366e-05, "loss": 0.0025, "step": 582 }, { "epoch": 194.33, "learning_rate": 9.817166171002765e-05, "loss": 0.0023, "step": 583 }, { "epoch": 194.67, "learning_rate": 9.783928432813688e-05, "loss": 0.0026, "step": 584 }, { "epoch": 195.0, "learning_rate": 9.750693082619273e-05, "loss": 0.0022, "step": 585 }, { "epoch": 195.33, "learning_rate": 9.717460487732245e-05, "loss": 0.0023, "step": 586 }, { "epoch": 195.67, "learning_rate": 9.68423101543489e-05, "loss": 0.0025, "step": 587 }, { "epoch": 196.0, "learning_rate": 9.651005032974994e-05, "loss": 0.0026, "step": 588 }, { "epoch": 196.33, "learning_rate": 9.617782907561748e-05, "loss": 0.0025, "step": 589 }, { "epoch": 196.67, "learning_rate": 9.584565006361734e-05, "loss": 0.0023, "step": 590 }, { "epoch": 197.0, "learning_rate": 9.551351696494854e-05, "loss": 0.0024, "step": 591 }, { "epoch": 197.33, "learning_rate": 9.518143345030246e-05, "loss": 0.0022, "step": 592 }, { "epoch": 197.67, "learning_rate": 9.48494031898226e-05, "loss": 0.0026, "step": 593 }, { "epoch": 198.0, "learning_rate": 9.451742985306398e-05, "loss": 0.0022, "step": 594 }, { "epoch": 198.33, "learning_rate": 9.418551710895243e-05, "loss": 0.0021, "step": 595 }, { "epoch": 198.67, "learning_rate": 9.385366862574404e-05, "loss": 0.0026, "step": 596 }, { "epoch": 199.0, "learning_rate": 9.352188807098481e-05, "loss": 0.0025, "step": 597 }, { "epoch": 199.33, "learning_rate": 9.319017911147e-05, "loss": 0.0024, "step": 598 }, { "epoch": 199.67, "learning_rate": 9.285854541320352e-05, "loss": 0.0024, "step": 599 }, { "epoch": 200.0, "learning_rate": 9.252699064135758e-05, "loss": 0.0021, "step": 600 }, { "epoch": 200.33, "learning_rate": 9.219551846023211e-05, "loss": 0.0022, "step": 601 }, { "epoch": 200.67, "learning_rate": 9.186413253321418e-05, "loss": 0.0025, "step": 602 }, { "epoch": 201.0, "learning_rate": 9.153283652273768e-05, "loss": 0.0023, "step": 603 }, { "epoch": 201.33, "learning_rate": 9.120163409024271e-05, "loss": 0.0023, "step": 604 }, { "epoch": 201.67, "learning_rate": 9.087052889613518e-05, "loss": 0.0023, "step": 605 }, { "epoch": 202.0, "learning_rate": 9.05395245997463e-05, "loss": 0.0025, "step": 606 }, { "epoch": 202.33, "learning_rate": 9.020862485929219e-05, "loss": 0.0026, "step": 607 }, { "epoch": 202.67, "learning_rate": 8.987783333183344e-05, "loss": 0.0022, "step": 608 }, { "epoch": 203.0, "learning_rate": 8.954715367323468e-05, "loss": 0.0023, "step": 609 }, { "epoch": 203.33, "learning_rate": 8.921658953812415e-05, "loss": 0.0023, "step": 610 }, { "epoch": 203.67, "learning_rate": 8.888614457985341e-05, "loss": 0.0023, "step": 611 }, { "epoch": 204.0, "learning_rate": 8.855582245045683e-05, "loss": 0.0025, "step": 612 }, { "epoch": 204.33, "learning_rate": 8.822562680061125e-05, "loss": 0.0023, "step": 613 }, { "epoch": 204.67, "learning_rate": 8.789556127959585e-05, "loss": 0.0023, "step": 614 }, { "epoch": 205.0, "learning_rate": 8.756562953525152e-05, "loss": 0.0023, "step": 615 }, { "epoch": 205.33, "learning_rate": 8.723583521394054e-05, "loss": 0.0023, "step": 616 }, { "epoch": 205.67, "learning_rate": 8.690618196050666e-05, "loss": 0.0024, "step": 617 }, { "epoch": 206.0, "learning_rate": 8.657667341823448e-05, "loss": 0.0021, "step": 618 }, { "epoch": 206.33, "learning_rate": 8.624731322880912e-05, "loss": 0.0025, "step": 619 }, { "epoch": 206.67, "learning_rate": 8.591810503227635e-05, "loss": 0.0023, "step": 620 }, { "epoch": 207.0, "learning_rate": 8.558905246700201e-05, "loss": 0.0021, "step": 621 }, { "epoch": 207.33, "learning_rate": 8.526015916963191e-05, "loss": 0.0021, "step": 622 }, { "epoch": 207.67, "learning_rate": 8.49314287750517e-05, "loss": 0.0023, "step": 623 }, { "epoch": 208.0, "learning_rate": 8.460286491634663e-05, "loss": 0.0026, "step": 624 }, { "epoch": 208.33, "learning_rate": 8.427447122476148e-05, "loss": 0.0024, "step": 625 }, { "epoch": 208.67, "learning_rate": 8.394625132966025e-05, "loss": 0.0023, "step": 626 }, { "epoch": 209.0, "learning_rate": 8.361820885848624e-05, "loss": 0.0024, "step": 627 }, { "epoch": 209.33, "learning_rate": 8.329034743672187e-05, "loss": 0.0023, "step": 628 }, { "epoch": 209.67, "learning_rate": 8.296267068784862e-05, "loss": 0.0022, "step": 629 }, { "epoch": 210.0, "learning_rate": 8.263518223330697e-05, "loss": 0.0025, "step": 630 }, { "epoch": 210.33, "learning_rate": 8.230788569245648e-05, "loss": 0.0025, "step": 631 }, { "epoch": 210.67, "learning_rate": 8.198078468253557e-05, "loss": 0.0022, "step": 632 }, { "epoch": 211.0, "learning_rate": 8.165388281862178e-05, "loss": 0.0023, "step": 633 }, { "epoch": 211.33, "learning_rate": 8.132718371359166e-05, "loss": 0.0023, "step": 634 }, { "epoch": 211.67, "learning_rate": 8.100069097808103e-05, "loss": 0.0024, "step": 635 }, { "epoch": 212.0, "learning_rate": 8.067440822044469e-05, "loss": 0.0023, "step": 636 }, { "epoch": 212.33, "learning_rate": 8.034833904671698e-05, "loss": 0.0024, "step": 637 }, { "epoch": 212.67, "learning_rate": 8.002248706057177e-05, "loss": 0.0022, "step": 638 }, { "epoch": 213.0, "learning_rate": 7.96968558632824e-05, "loss": 0.0022, "step": 639 }, { "epoch": 213.33, "learning_rate": 7.937144905368226e-05, "loss": 0.002, "step": 640 }, { "epoch": 213.67, "learning_rate": 7.904627022812483e-05, "loss": 0.0024, "step": 641 }, { "epoch": 214.0, "learning_rate": 7.872132298044382e-05, "loss": 0.0026, "step": 642 }, { "epoch": 214.33, "learning_rate": 7.839661090191362e-05, "loss": 0.0023, "step": 643 }, { "epoch": 214.67, "learning_rate": 7.807213758120966e-05, "loss": 0.0023, "step": 644 }, { "epoch": 215.0, "learning_rate": 7.774790660436858e-05, "loss": 0.0023, "step": 645 }, { "epoch": 215.33, "learning_rate": 7.742392155474858e-05, "loss": 0.0022, "step": 646 }, { "epoch": 215.67, "learning_rate": 7.710018601299004e-05, "loss": 0.0022, "step": 647 }, { "epoch": 216.0, "learning_rate": 7.677670355697577e-05, "loss": 0.0025, "step": 648 }, { "epoch": 216.33, "learning_rate": 7.645347776179144e-05, "loss": 0.0023, "step": 649 }, { "epoch": 216.67, "learning_rate": 7.613051219968623e-05, "loss": 0.0024, "step": 650 }, { "epoch": 217.0, "learning_rate": 7.580781044003324e-05, "loss": 0.0022, "step": 651 }, { "epoch": 217.33, "learning_rate": 7.548537604929001e-05, "loss": 0.0026, "step": 652 }, { "epoch": 217.67, "learning_rate": 7.516321259095921e-05, "loss": 0.0023, "step": 653 }, { "epoch": 218.0, "learning_rate": 7.484132362554915e-05, "loss": 0.0021, "step": 654 }, { "epoch": 218.33, "learning_rate": 7.451971271053455e-05, "loss": 0.0021, "step": 655 }, { "epoch": 218.67, "learning_rate": 7.419838340031708e-05, "loss": 0.0026, "step": 656 }, { "epoch": 219.0, "learning_rate": 7.387733924618617e-05, "loss": 0.0021, "step": 657 }, { "epoch": 219.33, "learning_rate": 7.35565837962798e-05, "loss": 0.0024, "step": 658 }, { "epoch": 219.67, "learning_rate": 7.323612059554513e-05, "loss": 0.0024, "step": 659 }, { "epoch": 220.0, "learning_rate": 7.291595318569951e-05, "loss": 0.002, "step": 660 }, { "epoch": 220.33, "learning_rate": 7.25960851051912e-05, "loss": 0.0023, "step": 661 }, { "epoch": 220.67, "learning_rate": 7.227651988916031e-05, "loss": 0.0023, "step": 662 }, { "epoch": 221.0, "learning_rate": 7.195726106939974e-05, "loss": 0.0022, "step": 663 }, { "epoch": 221.33, "learning_rate": 7.163831217431615e-05, "loss": 0.0022, "step": 664 }, { "epoch": 221.67, "learning_rate": 7.131967672889101e-05, "loss": 0.0022, "step": 665 }, { "epoch": 222.0, "learning_rate": 7.100135825464139e-05, "loss": 0.0025, "step": 666 }, { "epoch": 222.33, "learning_rate": 7.068336026958146e-05, "loss": 0.0021, "step": 667 }, { "epoch": 222.67, "learning_rate": 7.036568628818331e-05, "loss": 0.0023, "step": 668 }, { "epoch": 223.0, "learning_rate": 7.004833982133808e-05, "loss": 0.0026, "step": 669 }, { "epoch": 223.33, "learning_rate": 6.973132437631742e-05, "loss": 0.0022, "step": 670 }, { "epoch": 223.67, "learning_rate": 6.941464345673449e-05, "loss": 0.0023, "step": 671 }, { "epoch": 224.0, "learning_rate": 6.909830056250527e-05, "loss": 0.0024, "step": 672 }, { "epoch": 224.33, "learning_rate": 6.878229918981003e-05, "loss": 0.0024, "step": 673 }, { "epoch": 224.67, "learning_rate": 6.846664283105455e-05, "loss": 0.0021, "step": 674 }, { "epoch": 225.0, "learning_rate": 6.815133497483157e-05, "loss": 0.0022, "step": 675 }, { "epoch": 225.33, "learning_rate": 6.783637910588216e-05, "loss": 0.0021, "step": 676 }, { "epoch": 225.67, "learning_rate": 6.752177870505736e-05, "loss": 0.0023, "step": 677 }, { "epoch": 226.0, "learning_rate": 6.720753724927958e-05, "loss": 0.0024, "step": 678 }, { "epoch": 226.33, "learning_rate": 6.68936582115042e-05, "loss": 0.0021, "step": 679 }, { "epoch": 226.67, "learning_rate": 6.658014506068126e-05, "loss": 0.0023, "step": 680 }, { "epoch": 227.0, "learning_rate": 6.626700126171702e-05, "loss": 0.0024, "step": 681 }, { "epoch": 227.33, "learning_rate": 6.595423027543571e-05, "loss": 0.0024, "step": 682 }, { "epoch": 227.67, "learning_rate": 6.56418355585413e-05, "loss": 0.002, "step": 683 }, { "epoch": 228.0, "learning_rate": 6.532982056357928e-05, "loss": 0.0023, "step": 684 }, { "epoch": 228.33, "learning_rate": 6.501818873889855e-05, "loss": 0.0025, "step": 685 }, { "epoch": 228.67, "learning_rate": 6.470694352861312e-05, "loss": 0.0022, "step": 686 }, { "epoch": 229.0, "learning_rate": 6.439608837256432e-05, "loss": 0.0021, "step": 687 }, { "epoch": 229.33, "learning_rate": 6.408562670628266e-05, "loss": 0.0022, "step": 688 }, { "epoch": 229.67, "learning_rate": 6.377556196094973e-05, "loss": 0.0023, "step": 689 }, { "epoch": 230.0, "learning_rate": 6.34658975633605e-05, "loss": 0.0022, "step": 690 }, { "epoch": 230.33, "learning_rate": 6.315663693588534e-05, "loss": 0.0022, "step": 691 }, { "epoch": 230.67, "learning_rate": 6.28477834964322e-05, "loss": 0.0023, "step": 692 }, { "epoch": 231.0, "learning_rate": 6.25393406584088e-05, "loss": 0.0023, "step": 693 }, { "epoch": 231.33, "learning_rate": 6.223131183068499e-05, "loss": 0.0022, "step": 694 }, { "epoch": 231.67, "learning_rate": 6.192370041755505e-05, "loss": 0.0023, "step": 695 }, { "epoch": 232.0, "learning_rate": 6.161650981869998e-05, "loss": 0.0026, "step": 696 }, { "epoch": 232.33, "learning_rate": 6.130974342915005e-05, "loss": 0.0024, "step": 697 }, { "epoch": 232.67, "learning_rate": 6.100340463924723e-05, "loss": 0.002, "step": 698 }, { "epoch": 233.0, "learning_rate": 6.069749683460765e-05, "loss": 0.0024, "step": 699 }, { "epoch": 233.33, "learning_rate": 6.039202339608432e-05, "loss": 0.0024, "step": 700 }, { "epoch": 233.33, "eval_loss": 1.320059061050415, "eval_runtime": 3.5029, "eval_samples_per_second": 5.995, "eval_steps_per_second": 0.856, "step": 700 }, { "epoch": 233.67, "learning_rate": 6.008698769972967e-05, "loss": 0.0023, "step": 701 }, { "epoch": 234.0, "learning_rate": 5.978239311675826e-05, "loss": 0.002, "step": 702 }, { "epoch": 234.33, "learning_rate": 5.9478243013509505e-05, "loss": 0.0022, "step": 703 }, { "epoch": 234.67, "learning_rate": 5.9174540751410487e-05, "loss": 0.0023, "step": 704 }, { "epoch": 235.0, "learning_rate": 5.887128968693887e-05, "loss": 0.0022, "step": 705 }, { "epoch": 235.33, "learning_rate": 5.856849317158563e-05, "loss": 0.0023, "step": 706 }, { "epoch": 235.67, "learning_rate": 5.8266154551818216e-05, "loss": 0.0021, "step": 707 }, { "epoch": 236.0, "learning_rate": 5.796427716904347e-05, "loss": 0.0024, "step": 708 }, { "epoch": 236.33, "learning_rate": 5.7662864359570624e-05, "loss": 0.0023, "step": 709 }, { "epoch": 236.67, "learning_rate": 5.736191945457463e-05, "loss": 0.0022, "step": 710 }, { "epoch": 237.0, "learning_rate": 5.7061445780059074e-05, "loss": 0.0024, "step": 711 }, { "epoch": 237.33, "learning_rate": 5.676144665681974e-05, "loss": 0.002, "step": 712 }, { "epoch": 237.67, "learning_rate": 5.6461925400407576e-05, "loss": 0.0023, "step": 713 }, { "epoch": 238.0, "learning_rate": 5.616288532109225e-05, "loss": 0.0024, "step": 714 }, { "epoch": 238.33, "learning_rate": 5.58643297238256e-05, "loss": 0.0024, "step": 715 }, { "epoch": 238.67, "learning_rate": 5.5566261908204966e-05, "loss": 0.0023, "step": 716 }, { "epoch": 239.0, "learning_rate": 5.526868516843673e-05, "loss": 0.002, "step": 717 }, { "epoch": 239.33, "learning_rate": 5.497160279330014e-05, "loss": 0.0024, "step": 718 }, { "epoch": 239.67, "learning_rate": 5.467501806611062e-05, "loss": 0.0021, "step": 719 }, { "epoch": 240.0, "learning_rate": 5.43789342646837e-05, "loss": 0.0023, "step": 720 }, { "epoch": 240.33, "learning_rate": 5.4083354661298814e-05, "loss": 0.0025, "step": 721 }, { "epoch": 240.67, "learning_rate": 5.378828252266308e-05, "loss": 0.002, "step": 722 }, { "epoch": 241.0, "learning_rate": 5.349372110987496e-05, "loss": 0.0023, "step": 723 }, { "epoch": 241.33, "learning_rate": 5.3199673678388685e-05, "loss": 0.0024, "step": 724 }, { "epoch": 241.67, "learning_rate": 5.290614347797802e-05, "loss": 0.0021, "step": 725 }, { "epoch": 242.0, "learning_rate": 5.261313375270014e-05, "loss": 0.0022, "step": 726 }, { "epoch": 242.33, "learning_rate": 5.232064774086022e-05, "loss": 0.0022, "step": 727 }, { "epoch": 242.67, "learning_rate": 5.2028688674975415e-05, "loss": 0.0023, "step": 728 }, { "epoch": 243.0, "learning_rate": 5.1737259781738936e-05, "loss": 0.0022, "step": 729 }, { "epoch": 243.33, "learning_rate": 5.1446364281984774e-05, "loss": 0.0022, "step": 730 }, { "epoch": 243.67, "learning_rate": 5.115600539065197e-05, "loss": 0.0024, "step": 731 }, { "epoch": 244.0, "learning_rate": 5.086618631674888e-05, "loss": 0.0021, "step": 732 }, { "epoch": 244.33, "learning_rate": 5.057691026331792e-05, "loss": 0.0023, "step": 733 }, { "epoch": 244.67, "learning_rate": 5.02881804274002e-05, "loss": 0.0022, "step": 734 }, { "epoch": 245.0, "learning_rate": 5.000000000000002e-05, "loss": 0.0023, "step": 735 }, { "epoch": 245.33, "learning_rate": 4.971237216604967e-05, "loss": 0.0022, "step": 736 }, { "epoch": 245.67, "learning_rate": 4.942530010437435e-05, "loss": 0.0023, "step": 737 }, { "epoch": 246.0, "learning_rate": 4.913878698765686e-05, "loss": 0.0022, "step": 738 }, { "epoch": 246.33, "learning_rate": 4.885283598240259e-05, "loss": 0.0023, "step": 739 }, { "epoch": 246.67, "learning_rate": 4.856745024890466e-05, "loss": 0.0023, "step": 740 }, { "epoch": 247.0, "learning_rate": 4.8282632941208725e-05, "loss": 0.0022, "step": 741 }, { "epoch": 247.33, "learning_rate": 4.799838720707846e-05, "loss": 0.0021, "step": 742 }, { "epoch": 247.67, "learning_rate": 4.771471618796043e-05, "loss": 0.0024, "step": 743 }, { "epoch": 248.0, "learning_rate": 4.743162301894952e-05, "loss": 0.0023, "step": 744 }, { "epoch": 248.33, "learning_rate": 4.7149110828754464e-05, "loss": 0.0021, "step": 745 }, { "epoch": 248.67, "learning_rate": 4.686718273966291e-05, "loss": 0.0023, "step": 746 }, { "epoch": 249.0, "learning_rate": 4.658584186750713e-05, "loss": 0.0023, "step": 747 }, { "epoch": 249.33, "learning_rate": 4.6305091321629666e-05, "loss": 0.0021, "step": 748 }, { "epoch": 249.67, "learning_rate": 4.6024934204848745e-05, "loss": 0.0024, "step": 749 }, { "epoch": 250.0, "learning_rate": 4.574537361342407e-05, "loss": 0.0022, "step": 750 }, { "epoch": 250.33, "learning_rate": 4.5466412637022704e-05, "loss": 0.0022, "step": 751 }, { "epoch": 250.67, "learning_rate": 4.518805435868492e-05, "loss": 0.0022, "step": 752 }, { "epoch": 251.0, "learning_rate": 4.491030185478976e-05, "loss": 0.0025, "step": 753 }, { "epoch": 251.33, "learning_rate": 4.4633158195021594e-05, "loss": 0.0025, "step": 754 }, { "epoch": 251.67, "learning_rate": 4.435662644233594e-05, "loss": 0.0021, "step": 755 }, { "epoch": 252.0, "learning_rate": 4.4080709652925336e-05, "loss": 0.0021, "step": 756 }, { "epoch": 252.33, "learning_rate": 4.380541087618606e-05, "loss": 0.0022, "step": 757 }, { "epoch": 252.67, "learning_rate": 4.3530733154684164e-05, "loss": 0.0024, "step": 758 }, { "epoch": 253.0, "learning_rate": 4.3256679524121834e-05, "loss": 0.0021, "step": 759 }, { "epoch": 253.33, "learning_rate": 4.298325301330383e-05, "loss": 0.0024, "step": 760 }, { "epoch": 253.67, "learning_rate": 4.27104566441042e-05, "loss": 0.0021, "step": 761 }, { "epoch": 254.0, "learning_rate": 4.2438293431432665e-05, "loss": 0.0022, "step": 762 }, { "epoch": 254.33, "learning_rate": 4.216676638320135e-05, "loss": 0.0022, "step": 763 }, { "epoch": 254.67, "learning_rate": 4.189587850029169e-05, "loss": 0.0023, "step": 764 }, { "epoch": 255.0, "learning_rate": 4.1625632776521037e-05, "loss": 0.0023, "step": 765 }, { "epoch": 255.33, "learning_rate": 4.1356032198609706e-05, "loss": 0.0023, "step": 766 }, { "epoch": 255.67, "learning_rate": 4.108707974614804e-05, "loss": 0.0022, "step": 767 }, { "epoch": 256.0, "learning_rate": 4.081877839156325e-05, "loss": 0.0021, "step": 768 }, { "epoch": 256.33, "learning_rate": 4.0551131100086745e-05, "loss": 0.0021, "step": 769 }, { "epoch": 256.67, "learning_rate": 4.028414082972141e-05, "loss": 0.0023, "step": 770 }, { "epoch": 257.0, "learning_rate": 4.001781053120863e-05, "loss": 0.0021, "step": 771 }, { "epoch": 257.33, "learning_rate": 3.975214314799607e-05, "loss": 0.0021, "step": 772 }, { "epoch": 257.67, "learning_rate": 3.94871416162048e-05, "loss": 0.0024, "step": 773 }, { "epoch": 258.0, "learning_rate": 3.9222808864597004e-05, "loss": 0.002, "step": 774 }, { "epoch": 258.33, "learning_rate": 3.89591478145437e-05, "loss": 0.002, "step": 775 }, { "epoch": 258.67, "learning_rate": 3.8696161379992225e-05, "loss": 0.0022, "step": 776 }, { "epoch": 259.0, "learning_rate": 3.843385246743417e-05, "loss": 0.0025, "step": 777 }, { "epoch": 259.33, "learning_rate": 3.817222397587336e-05, "loss": 0.0022, "step": 778 }, { "epoch": 259.67, "learning_rate": 3.7911278796793516e-05, "loss": 0.0022, "step": 779 }, { "epoch": 260.0, "learning_rate": 3.7651019814126654e-05, "loss": 0.0023, "step": 780 }, { "epoch": 260.33, "learning_rate": 3.739144990422089e-05, "loss": 0.0021, "step": 781 }, { "epoch": 260.67, "learning_rate": 3.7132571935808924e-05, "loss": 0.0024, "step": 782 }, { "epoch": 261.0, "learning_rate": 3.687438876997612e-05, "loss": 0.0022, "step": 783 }, { "epoch": 261.33, "learning_rate": 3.661690326012897e-05, "loss": 0.0021, "step": 784 }, { "epoch": 261.67, "learning_rate": 3.6360118251963645e-05, "loss": 0.0022, "step": 785 }, { "epoch": 262.0, "learning_rate": 3.610403658343443e-05, "loss": 0.0024, "step": 786 }, { "epoch": 262.33, "learning_rate": 3.58486610847223e-05, "loss": 0.002, "step": 787 }, { "epoch": 262.67, "learning_rate": 3.5593994578203896e-05, "loss": 0.0023, "step": 788 }, { "epoch": 263.0, "learning_rate": 3.534003987842005e-05, "loss": 0.0024, "step": 789 }, { "epoch": 263.33, "learning_rate": 3.508679979204481e-05, "loss": 0.0023, "step": 790 }, { "epoch": 263.67, "learning_rate": 3.483427711785449e-05, "loss": 0.002, "step": 791 }, { "epoch": 264.0, "learning_rate": 3.458247464669657e-05, "loss": 0.0027, "step": 792 }, { "epoch": 264.33, "learning_rate": 3.4331395161458955e-05, "loss": 0.0023, "step": 793 }, { "epoch": 264.67, "learning_rate": 3.408104143703929e-05, "loss": 0.0021, "step": 794 }, { "epoch": 265.0, "learning_rate": 3.383141624031408e-05, "loss": 0.0022, "step": 795 }, { "epoch": 265.33, "learning_rate": 3.35825223301083e-05, "loss": 0.0023, "step": 796 }, { "epoch": 265.67, "learning_rate": 3.333436245716488e-05, "loss": 0.0023, "step": 797 }, { "epoch": 266.0, "learning_rate": 3.308693936411421e-05, "loss": 0.0021, "step": 798 }, { "epoch": 266.33, "learning_rate": 3.2840255785443855e-05, "loss": 0.0023, "step": 799 }, { "epoch": 266.67, "learning_rate": 3.259431444746846e-05, "loss": 0.0022, "step": 800 }, { "epoch": 267.0, "learning_rate": 3.234911806829948e-05, "loss": 0.0022, "step": 801 }, { "epoch": 267.33, "learning_rate": 3.210466935781516e-05, "loss": 0.0024, "step": 802 }, { "epoch": 267.67, "learning_rate": 3.1860971017630604e-05, "loss": 0.0023, "step": 803 }, { "epoch": 268.0, "learning_rate": 3.161802574106799e-05, "loss": 0.002, "step": 804 }, { "epoch": 268.33, "learning_rate": 3.137583621312665e-05, "loss": 0.0023, "step": 805 }, { "epoch": 268.67, "learning_rate": 3.1134405110453515e-05, "loss": 0.0022, "step": 806 }, { "epoch": 269.0, "learning_rate": 3.089373510131354e-05, "loss": 0.0021, "step": 807 }, { "epoch": 269.33, "learning_rate": 3.065382884556012e-05, "loss": 0.0024, "step": 808 }, { "epoch": 269.67, "learning_rate": 3.0414688994605723e-05, "loss": 0.0023, "step": 809 }, { "epoch": 270.0, "learning_rate": 3.0176318191392726e-05, "loss": 0.0019, "step": 810 }, { "epoch": 270.33, "learning_rate": 2.9938719070363952e-05, "loss": 0.0024, "step": 811 }, { "epoch": 270.67, "learning_rate": 2.9701894257433826e-05, "loss": 0.0022, "step": 812 }, { "epoch": 271.0, "learning_rate": 2.9465846369959127e-05, "loss": 0.0022, "step": 813 }, { "epoch": 271.33, "learning_rate": 2.923057801671015e-05, "loss": 0.0022, "step": 814 }, { "epoch": 271.67, "learning_rate": 2.8996091797841973e-05, "loss": 0.0021, "step": 815 }, { "epoch": 272.0, "learning_rate": 2.876239030486554e-05, "loss": 0.0023, "step": 816 }, { "epoch": 272.33, "learning_rate": 2.8529476120619104e-05, "loss": 0.0023, "step": 817 }, { "epoch": 272.67, "learning_rate": 2.829735181923978e-05, "loss": 0.0022, "step": 818 }, { "epoch": 273.0, "learning_rate": 2.8066019966134904e-05, "loss": 0.0022, "step": 819 }, { "epoch": 273.33, "learning_rate": 2.7835483117953788e-05, "loss": 0.0019, "step": 820 }, { "epoch": 273.67, "learning_rate": 2.7605743822559506e-05, "loss": 0.0024, "step": 821 }, { "epoch": 274.0, "learning_rate": 2.7376804619000707e-05, "loss": 0.0024, "step": 822 }, { "epoch": 274.33, "learning_rate": 2.7148668037483372e-05, "loss": 0.0021, "step": 823 }, { "epoch": 274.67, "learning_rate": 2.692133659934315e-05, "loss": 0.0024, "step": 824 }, { "epoch": 275.0, "learning_rate": 2.669481281701739e-05, "loss": 0.0023, "step": 825 }, { "epoch": 275.33, "learning_rate": 2.6469099194017143e-05, "loss": 0.0021, "step": 826 }, { "epoch": 275.67, "learning_rate": 2.624419822489985e-05, "loss": 0.0022, "step": 827 }, { "epoch": 276.0, "learning_rate": 2.6020112395241624e-05, "loss": 0.0023, "step": 828 }, { "epoch": 276.33, "learning_rate": 2.579684418160958e-05, "loss": 0.0022, "step": 829 }, { "epoch": 276.67, "learning_rate": 2.5574396051534832e-05, "loss": 0.0023, "step": 830 }, { "epoch": 277.0, "learning_rate": 2.5352770463484987e-05, "loss": 0.0022, "step": 831 }, { "epoch": 277.33, "learning_rate": 2.5131969866836992e-05, "loss": 0.0022, "step": 832 }, { "epoch": 277.67, "learning_rate": 2.491199670185008e-05, "loss": 0.0024, "step": 833 }, { "epoch": 278.0, "learning_rate": 2.4692853399638917e-05, "loss": 0.0021, "step": 834 }, { "epoch": 278.33, "learning_rate": 2.4474542382146537e-05, "loss": 0.0021, "step": 835 }, { "epoch": 278.67, "learning_rate": 2.425706606211767e-05, "loss": 0.0024, "step": 836 }, { "epoch": 279.0, "learning_rate": 2.4040426843072206e-05, "loss": 0.0024, "step": 837 }, { "epoch": 279.33, "learning_rate": 2.3824627119278342e-05, "loss": 0.0022, "step": 838 }, { "epoch": 279.67, "learning_rate": 2.3609669275726355e-05, "loss": 0.0021, "step": 839 }, { "epoch": 280.0, "learning_rate": 2.339555568810221e-05, "loss": 0.0024, "step": 840 }, { "epoch": 280.33, "learning_rate": 2.318228872276118e-05, "loss": 0.0021, "step": 841 }, { "epoch": 280.67, "learning_rate": 2.2969870736701895e-05, "loss": 0.0021, "step": 842 }, { "epoch": 281.0, "learning_rate": 2.275830407754006e-05, "loss": 0.0023, "step": 843 }, { "epoch": 281.33, "learning_rate": 2.2547591083482665e-05, "loss": 0.0023, "step": 844 }, { "epoch": 281.67, "learning_rate": 2.2337734083302164e-05, "loss": 0.0023, "step": 845 }, { "epoch": 282.0, "learning_rate": 2.212873539631061e-05, "loss": 0.002, "step": 846 }, { "epoch": 282.33, "learning_rate": 2.192059733233408e-05, "loss": 0.0024, "step": 847 }, { "epoch": 282.67, "learning_rate": 2.1713322191687237e-05, "loss": 0.0023, "step": 848 }, { "epoch": 283.0, "learning_rate": 2.1506912265147772e-05, "loss": 0.0019, "step": 849 }, { "epoch": 283.33, "learning_rate": 2.1301369833931117e-05, "loss": 0.0024, "step": 850 }, { "epoch": 283.67, "learning_rate": 2.1096697169665313e-05, "loss": 0.0022, "step": 851 }, { "epoch": 284.0, "learning_rate": 2.0892896534365904e-05, "loss": 0.0019, "step": 852 }, { "epoch": 284.33, "learning_rate": 2.068997018041069e-05, "loss": 0.0022, "step": 853 }, { "epoch": 284.67, "learning_rate": 2.0487920350515212e-05, "loss": 0.0021, "step": 854 }, { "epoch": 285.0, "learning_rate": 2.0286749277707782e-05, "loss": 0.0024, "step": 855 }, { "epoch": 285.33, "learning_rate": 2.0086459185304618e-05, "loss": 0.0021, "step": 856 }, { "epoch": 285.67, "learning_rate": 1.9887052286885655e-05, "loss": 0.0022, "step": 857 }, { "epoch": 286.0, "learning_rate": 1.9688530786269855e-05, "loss": 0.0023, "step": 858 }, { "epoch": 286.33, "learning_rate": 1.9490896877490716e-05, "loss": 0.0022, "step": 859 }, { "epoch": 286.67, "learning_rate": 1.929415274477239e-05, "loss": 0.0024, "step": 860 }, { "epoch": 287.0, "learning_rate": 1.9098300562505266e-05, "loss": 0.0021, "step": 861 }, { "epoch": 287.33, "learning_rate": 1.8903342495221977e-05, "loss": 0.0022, "step": 862 }, { "epoch": 287.67, "learning_rate": 1.870928069757353e-05, "loss": 0.0023, "step": 863 }, { "epoch": 288.0, "learning_rate": 1.8516117314305524e-05, "loss": 0.0021, "step": 864 }, { "epoch": 288.33, "learning_rate": 1.832385448023435e-05, "loss": 0.0022, "step": 865 }, { "epoch": 288.67, "learning_rate": 1.8132494320223638e-05, "loss": 0.0021, "step": 866 }, { "epoch": 289.0, "learning_rate": 1.7942038949160854e-05, "loss": 0.0024, "step": 867 }, { "epoch": 289.33, "learning_rate": 1.775249047193377e-05, "loss": 0.0023, "step": 868 }, { "epoch": 289.67, "learning_rate": 1.756385098340736e-05, "loss": 0.002, "step": 869 }, { "epoch": 290.0, "learning_rate": 1.7376122568400532e-05, "loss": 0.0024, "step": 870 }, { "epoch": 290.33, "learning_rate": 1.7189307301663084e-05, "loss": 0.0021, "step": 871 }, { "epoch": 290.67, "learning_rate": 1.7003407247852943e-05, "loss": 0.0022, "step": 872 }, { "epoch": 291.0, "learning_rate": 1.681842446151313e-05, "loss": 0.0022, "step": 873 }, { "epoch": 291.33, "learning_rate": 1.6634360987049115e-05, "loss": 0.002, "step": 874 }, { "epoch": 291.67, "learning_rate": 1.6451218858706374e-05, "loss": 0.0022, "step": 875 }, { "epoch": 292.0, "learning_rate": 1.6269000100547683e-05, "loss": 0.0024, "step": 876 }, { "epoch": 292.33, "learning_rate": 1.6087706726430873e-05, "loss": 0.0021, "step": 877 }, { "epoch": 292.67, "learning_rate": 1.5907340739986575e-05, "loss": 0.0022, "step": 878 }, { "epoch": 293.0, "learning_rate": 1.5727904134596083e-05, "loss": 0.0024, "step": 879 }, { "epoch": 293.33, "learning_rate": 1.5549398893369216e-05, "loss": 0.0025, "step": 880 }, { "epoch": 293.67, "learning_rate": 1.5371826989122506e-05, "loss": 0.002, "step": 881 }, { "epoch": 294.0, "learning_rate": 1.5195190384357404e-05, "loss": 0.0021, "step": 882 }, { "epoch": 294.33, "learning_rate": 1.501949103123852e-05, "loss": 0.0021, "step": 883 }, { "epoch": 294.67, "learning_rate": 1.4844730871572043e-05, "loss": 0.0024, "step": 884 }, { "epoch": 295.0, "learning_rate": 1.467091183678444e-05, "loss": 0.0023, "step": 885 }, { "epoch": 295.33, "learning_rate": 1.449803584790086e-05, "loss": 0.0022, "step": 886 }, { "epoch": 295.67, "learning_rate": 1.4326104815524088e-05, "loss": 0.0022, "step": 887 }, { "epoch": 296.0, "learning_rate": 1.415512063981339e-05, "loss": 0.0022, "step": 888 }, { "epoch": 296.33, "learning_rate": 1.3985085210463477e-05, "loss": 0.0023, "step": 889 }, { "epoch": 296.67, "learning_rate": 1.3816000406683604e-05, "loss": 0.0023, "step": 890 }, { "epoch": 297.0, "learning_rate": 1.364786809717692e-05, "loss": 0.0019, "step": 891 }, { "epoch": 297.33, "learning_rate": 1.3480690140119657e-05, "loss": 0.0022, "step": 892 }, { "epoch": 297.67, "learning_rate": 1.3314468383140688e-05, "loss": 0.0023, "step": 893 }, { "epoch": 298.0, "learning_rate": 1.3149204663301118e-05, "loss": 0.0021, "step": 894 }, { "epoch": 298.33, "learning_rate": 1.2984900807073919e-05, "loss": 0.002, "step": 895 }, { "epoch": 298.67, "learning_rate": 1.2821558630323772e-05, "loss": 0.0021, "step": 896 }, { "epoch": 299.0, "learning_rate": 1.2659179938287035e-05, "loss": 0.0026, "step": 897 }, { "epoch": 299.33, "learning_rate": 1.2497766525551724e-05, "loss": 0.0023, "step": 898 }, { "epoch": 299.67, "learning_rate": 1.2337320176037759e-05, "loss": 0.0023, "step": 899 }, { "epoch": 300.0, "learning_rate": 1.2177842662977135e-05, "loss": 0.002, "step": 900 }, { "epoch": 300.33, "learning_rate": 1.201933574889449e-05, "loss": 0.0022, "step": 901 }, { "epoch": 300.67, "learning_rate": 1.186180118558743e-05, "loss": 0.0024, "step": 902 }, { "epoch": 301.0, "learning_rate": 1.1705240714107302e-05, "loss": 0.0019, "step": 903 }, { "epoch": 301.33, "learning_rate": 1.1549656064739967e-05, "loss": 0.0024, "step": 904 }, { "epoch": 301.67, "learning_rate": 1.1395048956986575e-05, "loss": 0.0019, "step": 905 }, { "epoch": 302.0, "learning_rate": 1.124142109954459e-05, "loss": 0.0025, "step": 906 }, { "epoch": 302.33, "learning_rate": 1.108877419028902e-05, "loss": 0.0024, "step": 907 }, { "epoch": 302.67, "learning_rate": 1.0937109916253474e-05, "loss": 0.0021, "step": 908 }, { "epoch": 303.0, "learning_rate": 1.0786429953611666e-05, "loss": 0.0024, "step": 909 }, { "epoch": 303.33, "learning_rate": 1.0636735967658784e-05, "loss": 0.0021, "step": 910 }, { "epoch": 303.67, "learning_rate": 1.0488029612793138e-05, "loss": 0.0022, "step": 911 }, { "epoch": 304.0, "learning_rate": 1.034031253249792e-05, "loss": 0.0023, "step": 912 }, { "epoch": 304.33, "learning_rate": 1.0193586359322927e-05, "loss": 0.002, "step": 913 }, { "epoch": 304.67, "learning_rate": 1.004785271486659e-05, "loss": 0.0025, "step": 914 }, { "epoch": 305.0, "learning_rate": 9.903113209758096e-06, "loss": 0.0021, "step": 915 }, { "epoch": 305.33, "learning_rate": 9.759369443639454e-06, "loss": 0.0024, "step": 916 }, { "epoch": 305.67, "learning_rate": 9.616623005147951e-06, "loss": 0.0021, "step": 917 }, { "epoch": 306.0, "learning_rate": 9.474875471898526e-06, "loss": 0.0021, "step": 918 }, { "epoch": 306.33, "learning_rate": 9.334128410466358e-06, "loss": 0.0022, "step": 919 }, { "epoch": 306.67, "learning_rate": 9.194383376369508e-06, "loss": 0.0022, "step": 920 }, { "epoch": 307.0, "learning_rate": 9.055641914051782e-06, "loss": 0.002, "step": 921 }, { "epoch": 307.33, "learning_rate": 8.917905556865713e-06, "loss": 0.0022, "step": 922 }, { "epoch": 307.67, "learning_rate": 8.781175827055389e-06, "loss": 0.0023, "step": 923 }, { "epoch": 308.0, "learning_rate": 8.645454235739903e-06, "loss": 0.0022, "step": 924 }, { "epoch": 308.33, "learning_rate": 8.510742282896544e-06, "loss": 0.0021, "step": 925 }, { "epoch": 308.67, "learning_rate": 8.377041457344103e-06, "loss": 0.0023, "step": 926 }, { "epoch": 309.0, "learning_rate": 8.24435323672661e-06, "loss": 0.0025, "step": 927 }, { "epoch": 309.33, "learning_rate": 8.112679087496933e-06, "loss": 0.0021, "step": 928 }, { "epoch": 309.67, "learning_rate": 7.982020464900486e-06, "loss": 0.0022, "step": 929 }, { "epoch": 310.0, "learning_rate": 7.852378812959227e-06, "loss": 0.0025, "step": 930 }, { "epoch": 310.33, "learning_rate": 7.72375556445577e-06, "loss": 0.0021, "step": 931 }, { "epoch": 310.67, "learning_rate": 7.596152140917368e-06, "loss": 0.0023, "step": 932 }, { "epoch": 311.0, "learning_rate": 7.46956995260033e-06, "loss": 0.0022, "step": 933 }, { "epoch": 311.33, "learning_rate": 7.344010398474455e-06, "loss": 0.0021, "step": 934 }, { "epoch": 311.67, "learning_rate": 7.219474866207465e-06, "loss": 0.0023, "step": 935 }, { "epoch": 312.0, "learning_rate": 7.09596473214974e-06, "loss": 0.0023, "step": 936 }, { "epoch": 312.33, "learning_rate": 6.973481361319123e-06, "loss": 0.0023, "step": 937 }, { "epoch": 312.67, "learning_rate": 6.852026107385756e-06, "loss": 0.0022, "step": 938 }, { "epoch": 313.0, "learning_rate": 6.731600312657238e-06, "loss": 0.0021, "step": 939 }, { "epoch": 313.33, "learning_rate": 6.612205308063646e-06, "loss": 0.0024, "step": 940 }, { "epoch": 313.67, "learning_rate": 6.493842413142914e-06, "loss": 0.0021, "step": 941 }, { "epoch": 314.0, "learning_rate": 6.37651293602628e-06, "loss": 0.0021, "step": 942 }, { "epoch": 314.33, "learning_rate": 6.260218173423749e-06, "loss": 0.0021, "step": 943 }, { "epoch": 314.67, "learning_rate": 6.144959410609785e-06, "loss": 0.0021, "step": 944 }, { "epoch": 315.0, "learning_rate": 6.030737921409169e-06, "loss": 0.0025, "step": 945 }, { "epoch": 315.33, "learning_rate": 5.917554968182803e-06, "loss": 0.0022, "step": 946 }, { "epoch": 315.67, "learning_rate": 5.805411801813865e-06, "loss": 0.0022, "step": 947 }, { "epoch": 316.0, "learning_rate": 5.694309661693942e-06, "loss": 0.0024, "step": 948 }, { "epoch": 316.33, "learning_rate": 5.584249775709371e-06, "loss": 0.0022, "step": 949 }, { "epoch": 316.67, "learning_rate": 5.475233360227516e-06, "loss": 0.0022, "step": 950 }, { "epoch": 317.0, "learning_rate": 5.367261620083575e-06, "loss": 0.0021, "step": 951 }, { "epoch": 317.33, "learning_rate": 5.26033574856708e-06, "loss": 0.0022, "step": 952 }, { "epoch": 317.67, "learning_rate": 5.1544569274087125e-06, "loss": 0.0024, "step": 953 }, { "epoch": 318.0, "learning_rate": 5.049626326767365e-06, "loss": 0.002, "step": 954 }, { "epoch": 318.33, "learning_rate": 4.945845105217117e-06, "loss": 0.0023, "step": 955 }, { "epoch": 318.67, "learning_rate": 4.843114409734384e-06, "loss": 0.0021, "step": 956 }, { "epoch": 319.0, "learning_rate": 4.741435375685377e-06, "loss": 0.0024, "step": 957 }, { "epoch": 319.33, "learning_rate": 4.640809126813484e-06, "loss": 0.0022, "step": 958 }, { "epoch": 319.67, "learning_rate": 4.541236775226809e-06, "loss": 0.0025, "step": 959 }, { "epoch": 320.0, "learning_rate": 4.442719421385922e-06, "loss": 0.002, "step": 960 }, { "epoch": 320.33, "learning_rate": 4.3452581540917465e-06, "loss": 0.0021, "step": 961 }, { "epoch": 320.67, "learning_rate": 4.248854050473405e-06, "loss": 0.0021, "step": 962 }, { "epoch": 321.0, "learning_rate": 4.153508175976428e-06, "loss": 0.0024, "step": 963 }, { "epoch": 321.33, "learning_rate": 4.059221584350958e-06, "loss": 0.002, "step": 964 }, { "epoch": 321.67, "learning_rate": 3.965995317640025e-06, "loss": 0.0021, "step": 965 }, { "epoch": 322.0, "learning_rate": 3.873830406168111e-06, "loss": 0.0025, "step": 966 }, { "epoch": 322.33, "learning_rate": 3.7827278685297785e-06, "loss": 0.0021, "step": 967 }, { "epoch": 322.67, "learning_rate": 3.692688711578296e-06, "loss": 0.0025, "step": 968 }, { "epoch": 323.0, "learning_rate": 3.6037139304146762e-06, "loss": 0.0019, "step": 969 }, { "epoch": 323.33, "learning_rate": 3.515804508376508e-06, "loss": 0.0024, "step": 970 }, { "epoch": 323.67, "learning_rate": 3.428961417027221e-06, "loss": 0.0021, "step": 971 }, { "epoch": 324.0, "learning_rate": 3.3431856161452835e-06, "loss": 0.0022, "step": 972 }, { "epoch": 324.33, "learning_rate": 3.2584780537136207e-06, "loss": 0.0021, "step": 973 }, { "epoch": 324.67, "learning_rate": 3.1748396659090797e-06, "loss": 0.0023, "step": 974 }, { "epoch": 325.0, "learning_rate": 3.092271377092215e-06, "loss": 0.0022, "step": 975 }, { "epoch": 325.33, "learning_rate": 3.010774099796898e-06, "loss": 0.0022, "step": 976 }, { "epoch": 325.67, "learning_rate": 2.9303487347203783e-06, "loss": 0.0022, "step": 977 }, { "epoch": 326.0, "learning_rate": 2.8509961707132494e-06, "loss": 0.0022, "step": 978 }, { "epoch": 326.33, "learning_rate": 2.772717284769677e-06, "loss": 0.0023, "step": 979 }, { "epoch": 326.67, "learning_rate": 2.6955129420176196e-06, "loss": 0.0022, "step": 980 }, { "epoch": 327.0, "learning_rate": 2.619383995709368e-06, "loss": 0.0022, "step": 981 }, { "epoch": 327.33, "learning_rate": 2.5443312872120763e-06, "loss": 0.0025, "step": 982 }, { "epoch": 327.67, "learning_rate": 2.4703556459984456e-06, "loss": 0.002, "step": 983 }, { "epoch": 328.0, "learning_rate": 2.3974578896375553e-06, "loss": 0.0021, "step": 984 }, { "epoch": 328.33, "learning_rate": 2.3256388237858807e-06, "loss": 0.0021, "step": 985 }, { "epoch": 328.67, "learning_rate": 2.25489924217831e-06, "loss": 0.0024, "step": 986 }, { "epoch": 329.0, "learning_rate": 2.1852399266194314e-06, "loss": 0.0022, "step": 987 }, { "epoch": 329.33, "learning_rate": 2.1166616469749044e-06, "loss": 0.002, "step": 988 }, { "epoch": 329.67, "learning_rate": 2.049165161162858e-06, "loss": 0.0022, "step": 989 }, { "epoch": 330.0, "learning_rate": 1.9827512151456173e-06, "loss": 0.0025, "step": 990 }, { "epoch": 330.33, "learning_rate": 1.917420542921433e-06, "loss": 0.0021, "step": 991 }, { "epoch": 330.67, "learning_rate": 1.8531738665163112e-06, "loss": 0.0024, "step": 992 }, { "epoch": 331.0, "learning_rate": 1.790011895976118e-06, "loss": 0.002, "step": 993 }, { "epoch": 331.33, "learning_rate": 1.7279353293586765e-06, "loss": 0.0025, "step": 994 }, { "epoch": 331.67, "learning_rate": 1.66694485272606e-06, "loss": 0.0018, "step": 995 }, { "epoch": 332.0, "learning_rate": 1.6070411401370334e-06, "loss": 0.0025, "step": 996 }, { "epoch": 332.33, "learning_rate": 1.5482248536395905e-06, "loss": 0.0024, "step": 997 }, { "epoch": 332.67, "learning_rate": 1.4904966432635947e-06, "loss": 0.0023, "step": 998 }, { "epoch": 333.0, "learning_rate": 1.4338571470137063e-06, "loss": 0.0018, "step": 999 }, { "epoch": 333.33, "learning_rate": 1.378306990862177e-06, "loss": 0.0023, "step": 1000 }, { "epoch": 333.67, "learning_rate": 1.323846788742078e-06, "loss": 0.0022, "step": 1001 }, { "epoch": 334.0, "learning_rate": 1.2704771425404382e-06, "loss": 0.0021, "step": 1002 }, { "epoch": 334.33, "learning_rate": 1.2181986420915615e-06, "loss": 0.0021, "step": 1003 }, { "epoch": 334.67, "learning_rate": 1.1670118651706197e-06, "loss": 0.0024, "step": 1004 }, { "epoch": 335.0, "learning_rate": 1.1169173774871478e-06, "loss": 0.0024, "step": 1005 }, { "epoch": 335.33, "learning_rate": 1.067915732678859e-06, "loss": 0.0021, "step": 1006 }, { "epoch": 335.67, "learning_rate": 1.0200074723055398e-06, "loss": 0.0024, "step": 1007 }, { "epoch": 336.0, "learning_rate": 9.731931258429638e-07, "loss": 0.0022, "step": 1008 }, { "epoch": 336.33, "learning_rate": 9.274732106771988e-07, "loss": 0.0021, "step": 1009 }, { "epoch": 336.67, "learning_rate": 8.828482320987319e-07, "loss": 0.0023, "step": 1010 }, { "epoch": 337.0, "learning_rate": 8.393186832969746e-07, "loss": 0.0021, "step": 1011 }, { "epoch": 337.33, "learning_rate": 7.968850453548226e-07, "loss": 0.0022, "step": 1012 }, { "epoch": 337.67, "learning_rate": 7.555477872432715e-07, "loss": 0.002, "step": 1013 }, { "epoch": 338.0, "learning_rate": 7.153073658162646e-07, "loss": 0.0024, "step": 1014 }, { "epoch": 338.33, "learning_rate": 6.761642258056978e-07, "loss": 0.002, "step": 1015 }, { "epoch": 338.67, "learning_rate": 6.381187998164229e-07, "loss": 0.0022, "step": 1016 }, { "epoch": 339.0, "learning_rate": 6.011715083214741e-07, "loss": 0.0024, "step": 1017 }, { "epoch": 339.33, "learning_rate": 5.653227596575161e-07, "loss": 0.002, "step": 1018 }, { "epoch": 339.67, "learning_rate": 5.305729500201917e-07, "loss": 0.0019, "step": 1019 }, { "epoch": 340.0, "learning_rate": 4.969224634598591e-07, "loss": 0.0028, "step": 1020 }, { "epoch": 340.33, "learning_rate": 4.6437167187728393e-07, "loss": 0.0023, "step": 1021 }, { "epoch": 340.67, "learning_rate": 4.329209350195651e-07, "loss": 0.0022, "step": 1022 }, { "epoch": 341.0, "learning_rate": 4.025706004760932e-07, "loss": 0.0021, "step": 1023 }, { "epoch": 341.33, "learning_rate": 3.7332100367482024e-07, "loss": 0.0022, "step": 1024 }, { "epoch": 341.67, "learning_rate": 3.451724678784518e-07, "loss": 0.0023, "step": 1025 }, { "epoch": 342.0, "learning_rate": 3.1812530418090513e-07, "loss": 0.0021, "step": 1026 }, { "epoch": 342.33, "learning_rate": 2.921798115039009e-07, "loss": 0.0018, "step": 1027 }, { "epoch": 342.67, "learning_rate": 2.673362765936327e-07, "loss": 0.0023, "step": 1028 }, { "epoch": 343.0, "learning_rate": 2.4359497401758024e-07, "loss": 0.0025, "step": 1029 }, { "epoch": 343.33, "learning_rate": 2.2095616616150115e-07, "loss": 0.0021, "step": 1030 }, { "epoch": 343.67, "learning_rate": 1.9942010322655524e-07, "loss": 0.0022, "step": 1031 }, { "epoch": 344.0, "learning_rate": 1.7898702322648453e-07, "loss": 0.0024, "step": 1032 }, { "epoch": 344.33, "learning_rate": 1.596571519850043e-07, "loss": 0.0023, "step": 1033 }, { "epoch": 344.67, "learning_rate": 1.414307031333273e-07, "loss": 0.0023, "step": 1034 }, { "epoch": 345.0, "learning_rate": 1.2430787810776555e-07, "loss": 0.0021, "step": 1035 }, { "epoch": 345.33, "learning_rate": 1.0828886614754341e-07, "loss": 0.0024, "step": 1036 }, { "epoch": 345.67, "learning_rate": 9.337384429269901e-08, "loss": 0.0022, "step": 1037 }, { "epoch": 346.0, "learning_rate": 7.956297738207497e-08, "loss": 0.0019, "step": 1038 }, { "epoch": 346.33, "learning_rate": 6.685641805158627e-08, "loss": 0.0023, "step": 1039 }, { "epoch": 346.67, "learning_rate": 5.5254306732444025e-08, "loss": 0.0022, "step": 1040 }, { "epoch": 347.0, "learning_rate": 4.475677164966774e-08, "loss": 0.0022, "step": 1041 }, { "epoch": 347.33, "learning_rate": 3.536392882064199e-08, "loss": 0.0022, "step": 1042 }, { "epoch": 347.67, "learning_rate": 2.7075882053828605e-08, "loss": 0.0022, "step": 1043 }, { "epoch": 348.0, "learning_rate": 1.9892722947645326e-08, "loss": 0.0022, "step": 1044 }, { "epoch": 348.33, "learning_rate": 1.3814530889433296e-08, "loss": 0.0021, "step": 1045 }, { "epoch": 348.67, "learning_rate": 8.841373054546686e-09, "loss": 0.0023, "step": 1046 }, { "epoch": 349.0, "learning_rate": 4.973304405697654e-09, "loss": 0.0022, "step": 1047 }, { "epoch": 349.33, "learning_rate": 2.2103676922680117e-09, "loss": 0.0024, "step": 1048 }, { "epoch": 349.67, "learning_rate": 5.525934498651352e-10, "loss": 0.0021, "step": 1049 }, { "epoch": 350.0, "learning_rate": 0.0, "loss": 0.0021, "step": 1050 }, { "epoch": 350.0, "eval_loss": 1.3223165273666382, "eval_runtime": 3.5062, "eval_samples_per_second": 5.989, "eval_steps_per_second": 0.856, "step": 1050 } ], "logging_steps": 1, "max_steps": 1050, "num_input_tokens_seen": 0, "num_train_epochs": 350, "save_steps": 350, "total_flos": 1.277394092556288e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }