{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49975012493753124, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 2.205, "step": 1 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 2.1741, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.2e-05, "loss": 2.3915, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.6000000000000003e-05, "loss": 2.2188, "step": 4 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 2.2271, "step": 5 }, { "epoch": 0.0, "learning_rate": 2.4e-05, "loss": 2.1674, "step": 6 }, { "epoch": 0.0, "learning_rate": 2.8000000000000003e-05, "loss": 2.0138, "step": 7 }, { "epoch": 0.0, "learning_rate": 3.2000000000000005e-05, "loss": 1.8433, "step": 8 }, { "epoch": 0.0, "learning_rate": 3.6e-05, "loss": 2.0383, "step": 9 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 2.1195, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.4000000000000006e-05, "loss": 2.1695, "step": 11 }, { "epoch": 0.01, "learning_rate": 4.8e-05, "loss": 1.8903, "step": 12 }, { "epoch": 0.01, "learning_rate": 5.2000000000000004e-05, "loss": 1.729, "step": 13 }, { "epoch": 0.01, "learning_rate": 5.6000000000000006e-05, "loss": 2.0379, "step": 14 }, { "epoch": 0.01, "learning_rate": 6e-05, "loss": 1.9539, "step": 15 }, { "epoch": 0.01, "learning_rate": 6.400000000000001e-05, "loss": 2.0113, "step": 16 }, { "epoch": 0.01, "learning_rate": 6.800000000000001e-05, "loss": 1.6799, "step": 17 }, { "epoch": 0.01, "learning_rate": 7.2e-05, "loss": 2.0281, "step": 18 }, { "epoch": 0.01, "learning_rate": 7.6e-05, "loss": 1.8322, "step": 19 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 1.8084, "step": 20 }, { "epoch": 0.01, "learning_rate": 8.4e-05, "loss": 1.9343, "step": 21 }, { "epoch": 0.01, "learning_rate": 8.800000000000001e-05, "loss": 1.9195, "step": 22 }, { "epoch": 0.01, "learning_rate": 9.200000000000001e-05, "loss": 1.8495, "step": 23 }, { "epoch": 0.01, "learning_rate": 9.6e-05, "loss": 1.8587, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 1.814, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.00010400000000000001, "loss": 1.824, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.00010800000000000001, "loss": 1.7464, "step": 27 }, { "epoch": 0.01, "learning_rate": 0.00011200000000000001, "loss": 1.8182, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.000116, "loss": 1.6595, "step": 29 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 1.8015, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.000124, "loss": 1.825, "step": 31 }, { "epoch": 0.02, "learning_rate": 0.00012800000000000002, "loss": 1.8051, "step": 32 }, { "epoch": 0.02, "learning_rate": 0.000132, "loss": 1.7185, "step": 33 }, { "epoch": 0.02, "learning_rate": 0.00013600000000000003, "loss": 1.7766, "step": 34 }, { "epoch": 0.02, "learning_rate": 0.00014, "loss": 1.7529, "step": 35 }, { "epoch": 0.02, "learning_rate": 0.000144, "loss": 1.6643, "step": 36 }, { "epoch": 0.02, "learning_rate": 0.000148, "loss": 1.8265, "step": 37 }, { "epoch": 0.02, "learning_rate": 0.000152, "loss": 1.6598, "step": 38 }, { "epoch": 0.02, "learning_rate": 0.00015600000000000002, "loss": 1.5992, "step": 39 }, { "epoch": 0.02, "learning_rate": 0.00016, "loss": 1.9037, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.000164, "loss": 1.7461, "step": 41 }, { "epoch": 0.02, "learning_rate": 0.000168, "loss": 1.7369, "step": 42 }, { "epoch": 0.02, "learning_rate": 0.000172, "loss": 1.78, "step": 43 }, { "epoch": 0.02, "learning_rate": 0.00017600000000000002, "loss": 1.8073, "step": 44 }, { "epoch": 0.02, "learning_rate": 0.00018, "loss": 1.6786, "step": 45 }, { "epoch": 0.02, "learning_rate": 0.00018400000000000003, "loss": 1.8868, "step": 46 }, { "epoch": 0.02, "learning_rate": 0.000188, "loss": 1.9341, "step": 47 }, { "epoch": 0.02, "learning_rate": 0.000192, "loss": 1.5922, "step": 48 }, { "epoch": 0.02, "learning_rate": 0.000196, "loss": 1.8224, "step": 49 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 1.8133, "step": 50 }, { "epoch": 0.03, "learning_rate": 0.00020400000000000003, "loss": 1.8373, "step": 51 }, { "epoch": 0.03, "learning_rate": 0.00020800000000000001, "loss": 1.5649, "step": 52 }, { "epoch": 0.03, "learning_rate": 0.00021200000000000003, "loss": 1.7079, "step": 53 }, { "epoch": 0.03, "learning_rate": 0.00021600000000000002, "loss": 1.7249, "step": 54 }, { "epoch": 0.03, "learning_rate": 0.00022000000000000003, "loss": 1.7299, "step": 55 }, { "epoch": 0.03, "learning_rate": 0.00022400000000000002, "loss": 1.7068, "step": 56 }, { "epoch": 0.03, "learning_rate": 0.00022799999999999999, "loss": 1.8436, "step": 57 }, { "epoch": 0.03, "learning_rate": 0.000232, "loss": 1.7897, "step": 58 }, { "epoch": 0.03, "learning_rate": 0.000236, "loss": 1.8142, "step": 59 }, { "epoch": 0.03, "learning_rate": 0.00024, "loss": 1.8176, "step": 60 }, { "epoch": 0.03, "learning_rate": 0.000244, "loss": 1.7742, "step": 61 }, { "epoch": 0.03, "learning_rate": 0.000248, "loss": 1.6512, "step": 62 }, { "epoch": 0.03, "learning_rate": 0.000252, "loss": 1.806, "step": 63 }, { "epoch": 0.03, "learning_rate": 0.00025600000000000004, "loss": 1.8776, "step": 64 }, { "epoch": 0.03, "learning_rate": 0.00026000000000000003, "loss": 1.8309, "step": 65 }, { "epoch": 0.03, "learning_rate": 0.000264, "loss": 1.7217, "step": 66 }, { "epoch": 0.03, "learning_rate": 0.000268, "loss": 1.7578, "step": 67 }, { "epoch": 0.03, "learning_rate": 0.00027200000000000005, "loss": 1.7367, "step": 68 }, { "epoch": 0.03, "learning_rate": 0.000276, "loss": 1.4613, "step": 69 }, { "epoch": 0.03, "learning_rate": 0.00028, "loss": 1.85, "step": 70 }, { "epoch": 0.04, "learning_rate": 0.000284, "loss": 1.7672, "step": 71 }, { "epoch": 0.04, "learning_rate": 0.000288, "loss": 1.6758, "step": 72 }, { "epoch": 0.04, "learning_rate": 0.000292, "loss": 1.6693, "step": 73 }, { "epoch": 0.04, "learning_rate": 0.000296, "loss": 1.6831, "step": 74 }, { "epoch": 0.04, "learning_rate": 0.00030000000000000003, "loss": 1.7338, "step": 75 }, { "epoch": 0.04, "learning_rate": 0.000304, "loss": 1.6858, "step": 76 }, { "epoch": 0.04, "learning_rate": 0.000308, "loss": 1.7453, "step": 77 }, { "epoch": 0.04, "learning_rate": 0.00031200000000000005, "loss": 1.6362, "step": 78 }, { "epoch": 0.04, "learning_rate": 0.00031600000000000004, "loss": 1.875, "step": 79 }, { "epoch": 0.04, "learning_rate": 0.00032, "loss": 1.7411, "step": 80 }, { "epoch": 0.04, "learning_rate": 0.000324, "loss": 1.7638, "step": 81 }, { "epoch": 0.04, "learning_rate": 0.000328, "loss": 1.7488, "step": 82 }, { "epoch": 0.04, "learning_rate": 0.000332, "loss": 1.5624, "step": 83 }, { "epoch": 0.04, "learning_rate": 0.000336, "loss": 1.8976, "step": 84 }, { "epoch": 0.04, "learning_rate": 0.00034, "loss": 1.7016, "step": 85 }, { "epoch": 0.04, "learning_rate": 0.000344, "loss": 1.6986, "step": 86 }, { "epoch": 0.04, "learning_rate": 0.000348, "loss": 1.8508, "step": 87 }, { "epoch": 0.04, "learning_rate": 0.00035200000000000005, "loss": 1.7049, "step": 88 }, { "epoch": 0.04, "learning_rate": 0.00035600000000000003, "loss": 1.7912, "step": 89 }, { "epoch": 0.04, "learning_rate": 0.00036, "loss": 1.6776, "step": 90 }, { "epoch": 0.05, "learning_rate": 0.000364, "loss": 1.7725, "step": 91 }, { "epoch": 0.05, "learning_rate": 0.00036800000000000005, "loss": 1.734, "step": 92 }, { "epoch": 0.05, "learning_rate": 0.00037200000000000004, "loss": 1.8326, "step": 93 }, { "epoch": 0.05, "learning_rate": 0.000376, "loss": 1.7755, "step": 94 }, { "epoch": 0.05, "learning_rate": 0.00038, "loss": 1.5972, "step": 95 }, { "epoch": 0.05, "learning_rate": 0.000384, "loss": 1.7307, "step": 96 }, { "epoch": 0.05, "learning_rate": 0.000388, "loss": 1.7856, "step": 97 }, { "epoch": 0.05, "learning_rate": 0.000392, "loss": 1.7529, "step": 98 }, { "epoch": 0.05, "learning_rate": 0.00039600000000000003, "loss": 1.5194, "step": 99 }, { "epoch": 0.05, "learning_rate": 0.0004, "loss": 1.7358, "step": 100 }, { "epoch": 0.05, "learning_rate": 0.00039999972689137685, "loss": 1.8458, "step": 101 }, { "epoch": 0.05, "learning_rate": 0.00039999890756625326, "loss": 1.7214, "step": 102 }, { "epoch": 0.05, "learning_rate": 0.0003999975420268669, "loss": 1.9563, "step": 103 }, { "epoch": 0.05, "learning_rate": 0.0003999956302769471, "loss": 1.9295, "step": 104 }, { "epoch": 0.05, "learning_rate": 0.0003999931723217151, "loss": 1.7522, "step": 105 }, { "epoch": 0.05, "learning_rate": 0.0003999901681678838, "loss": 1.6478, "step": 106 }, { "epoch": 0.05, "learning_rate": 0.00039998661782365765, "loss": 1.7303, "step": 107 }, { "epoch": 0.05, "learning_rate": 0.00039998252129873314, "loss": 1.7452, "step": 108 }, { "epoch": 0.05, "learning_rate": 0.00039997787860429813, "loss": 1.6586, "step": 109 }, { "epoch": 0.05, "learning_rate": 0.0003999726897530322, "loss": 1.5898, "step": 110 }, { "epoch": 0.06, "learning_rate": 0.0003999669547591067, "loss": 1.5924, "step": 111 }, { "epoch": 0.06, "learning_rate": 0.0003999606736381842, "loss": 1.5336, "step": 112 }, { "epoch": 0.06, "learning_rate": 0.000399953846407419, "loss": 1.6407, "step": 113 }, { "epoch": 0.06, "learning_rate": 0.0003999464730854571, "loss": 1.5072, "step": 114 }, { "epoch": 0.06, "learning_rate": 0.00039993855369243534, "loss": 1.8297, "step": 115 }, { "epoch": 0.06, "learning_rate": 0.00039993008824998246, "loss": 1.8099, "step": 116 }, { "epoch": 0.06, "learning_rate": 0.0003999210767812183, "loss": 1.5861, "step": 117 }, { "epoch": 0.06, "learning_rate": 0.0003999115193107539, "loss": 1.7694, "step": 118 }, { "epoch": 0.06, "learning_rate": 0.0003999014158646916, "loss": 1.8015, "step": 119 }, { "epoch": 0.06, "learning_rate": 0.00039989076647062473, "loss": 1.7514, "step": 120 }, { "epoch": 0.06, "learning_rate": 0.0003998795711576378, "loss": 1.8225, "step": 121 }, { "epoch": 0.06, "learning_rate": 0.00039986782995630603, "loss": 1.7047, "step": 122 }, { "epoch": 0.06, "learning_rate": 0.00039985554289869574, "loss": 1.5168, "step": 123 }, { "epoch": 0.06, "learning_rate": 0.00039984271001836395, "loss": 1.8128, "step": 124 }, { "epoch": 0.06, "learning_rate": 0.0003998293313503583, "loss": 1.8507, "step": 125 }, { "epoch": 0.06, "learning_rate": 0.00039981540693121716, "loss": 1.6675, "step": 126 }, { "epoch": 0.06, "learning_rate": 0.0003998009367989693, "loss": 1.6252, "step": 127 }, { "epoch": 0.06, "learning_rate": 0.00039978592099313386, "loss": 1.75, "step": 128 }, { "epoch": 0.06, "learning_rate": 0.00039977035955472034, "loss": 1.4685, "step": 129 }, { "epoch": 0.06, "learning_rate": 0.0003997542525262284, "loss": 1.6629, "step": 130 }, { "epoch": 0.07, "learning_rate": 0.0003997375999516476, "loss": 1.8121, "step": 131 }, { "epoch": 0.07, "learning_rate": 0.0003997204018764577, "loss": 1.8056, "step": 132 }, { "epoch": 0.07, "learning_rate": 0.000399702658347628, "loss": 1.6775, "step": 133 }, { "epoch": 0.07, "learning_rate": 0.00039968436941361773, "loss": 1.6369, "step": 134 }, { "epoch": 0.07, "learning_rate": 0.0003996655351243755, "loss": 1.6055, "step": 135 }, { "epoch": 0.07, "learning_rate": 0.0003996461555313393, "loss": 1.636, "step": 136 }, { "epoch": 0.07, "learning_rate": 0.0003996262306874366, "loss": 1.739, "step": 137 }, { "epoch": 0.07, "learning_rate": 0.0003996057606470837, "loss": 1.561, "step": 138 }, { "epoch": 0.07, "learning_rate": 0.00039958474546618626, "loss": 1.6892, "step": 139 }, { "epoch": 0.07, "learning_rate": 0.00039956318520213837, "loss": 1.8357, "step": 140 }, { "epoch": 0.07, "learning_rate": 0.0003995410799138231, "loss": 1.8147, "step": 141 }, { "epoch": 0.07, "learning_rate": 0.00039951842966161176, "loss": 1.6856, "step": 142 }, { "epoch": 0.07, "learning_rate": 0.0003994952345073643, "loss": 1.6191, "step": 143 }, { "epoch": 0.07, "learning_rate": 0.0003994714945144286, "loss": 1.5254, "step": 144 }, { "epoch": 0.07, "learning_rate": 0.0003994472097476406, "loss": 1.5864, "step": 145 }, { "epoch": 0.07, "learning_rate": 0.0003994223802733241, "loss": 1.7604, "step": 146 }, { "epoch": 0.07, "learning_rate": 0.0003993970061592906, "loss": 1.7338, "step": 147 }, { "epoch": 0.07, "learning_rate": 0.00039937108747483893, "loss": 1.8078, "step": 148 }, { "epoch": 0.07, "learning_rate": 0.0003993446242907553, "loss": 1.6927, "step": 149 }, { "epoch": 0.07, "learning_rate": 0.00039931761667931287, "loss": 1.6732, "step": 150 }, { "epoch": 0.08, "learning_rate": 0.00039929006471427187, "loss": 1.5369, "step": 151 }, { "epoch": 0.08, "learning_rate": 0.00039926196847087905, "loss": 1.5529, "step": 152 }, { "epoch": 0.08, "learning_rate": 0.0003992333280258676, "loss": 1.6886, "step": 153 }, { "epoch": 0.08, "learning_rate": 0.0003992041434574572, "loss": 1.7383, "step": 154 }, { "epoch": 0.08, "learning_rate": 0.0003991744148453532, "loss": 1.7943, "step": 155 }, { "epoch": 0.08, "learning_rate": 0.0003991441422707472, "loss": 1.6811, "step": 156 }, { "epoch": 0.08, "learning_rate": 0.00039911332581631613, "loss": 1.7442, "step": 157 }, { "epoch": 0.08, "learning_rate": 0.0003990819655662224, "loss": 1.5712, "step": 158 }, { "epoch": 0.08, "learning_rate": 0.00039905006160611357, "loss": 1.5422, "step": 159 }, { "epoch": 0.08, "learning_rate": 0.00039901761402312205, "loss": 1.6689, "step": 160 }, { "epoch": 0.08, "learning_rate": 0.000398984622905865, "loss": 1.8412, "step": 161 }, { "epoch": 0.08, "learning_rate": 0.00039895108834444405, "loss": 1.7318, "step": 162 }, { "epoch": 0.08, "learning_rate": 0.00039891701043044496, "loss": 1.4819, "step": 163 }, { "epoch": 0.08, "learning_rate": 0.0003988823892569375, "loss": 1.4612, "step": 164 }, { "epoch": 0.08, "learning_rate": 0.00039884722491847504, "loss": 1.5737, "step": 165 }, { "epoch": 0.08, "learning_rate": 0.0003988115175110943, "loss": 1.7666, "step": 166 }, { "epoch": 0.08, "learning_rate": 0.0003987752671323155, "loss": 1.4879, "step": 167 }, { "epoch": 0.08, "learning_rate": 0.0003987384738811414, "loss": 1.7151, "step": 168 }, { "epoch": 0.08, "learning_rate": 0.0003987011378580576, "loss": 1.751, "step": 169 }, { "epoch": 0.08, "learning_rate": 0.00039866325916503205, "loss": 1.7216, "step": 170 }, { "epoch": 0.09, "learning_rate": 0.0003986248379055146, "loss": 1.5863, "step": 171 }, { "epoch": 0.09, "learning_rate": 0.00039858587418443715, "loss": 1.793, "step": 172 }, { "epoch": 0.09, "learning_rate": 0.00039854636810821286, "loss": 1.6011, "step": 173 }, { "epoch": 0.09, "learning_rate": 0.0003985063197847363, "loss": 1.7418, "step": 174 }, { "epoch": 0.09, "learning_rate": 0.0003984657293233829, "loss": 1.6862, "step": 175 }, { "epoch": 0.09, "learning_rate": 0.0003984245968350087, "loss": 1.6857, "step": 176 }, { "epoch": 0.09, "learning_rate": 0.00039838292243195013, "loss": 1.7442, "step": 177 }, { "epoch": 0.09, "learning_rate": 0.0003983407062280234, "loss": 1.9183, "step": 178 }, { "epoch": 0.09, "learning_rate": 0.0003982979483385249, "loss": 1.592, "step": 179 }, { "epoch": 0.09, "learning_rate": 0.0003982546488802298, "loss": 1.777, "step": 180 }, { "epoch": 0.09, "learning_rate": 0.00039821080797139283, "loss": 1.7078, "step": 181 }, { "epoch": 0.09, "learning_rate": 0.0003981664257317472, "loss": 1.7803, "step": 182 }, { "epoch": 0.09, "learning_rate": 0.00039812150228250474, "loss": 1.7864, "step": 183 }, { "epoch": 0.09, "learning_rate": 0.0003980760377463552, "loss": 1.7185, "step": 184 }, { "epoch": 0.09, "learning_rate": 0.0003980300322474662, "loss": 1.7885, "step": 185 }, { "epoch": 0.09, "learning_rate": 0.00039798348591148263, "loss": 1.8994, "step": 186 }, { "epoch": 0.09, "learning_rate": 0.00039793639886552665, "loss": 1.713, "step": 187 }, { "epoch": 0.09, "learning_rate": 0.000397888771238197, "loss": 1.5899, "step": 188 }, { "epoch": 0.09, "learning_rate": 0.0003978406031595688, "loss": 1.6652, "step": 189 }, { "epoch": 0.09, "learning_rate": 0.0003977918947611932, "loss": 1.5935, "step": 190 }, { "epoch": 0.1, "learning_rate": 0.0003977426461760972, "loss": 1.6655, "step": 191 }, { "epoch": 0.1, "learning_rate": 0.00039769285753878277, "loss": 1.8054, "step": 192 }, { "epoch": 0.1, "learning_rate": 0.000397642528985227, "loss": 1.6275, "step": 193 }, { "epoch": 0.1, "learning_rate": 0.00039759166065288167, "loss": 1.7446, "step": 194 }, { "epoch": 0.1, "learning_rate": 0.00039754025268067233, "loss": 1.6488, "step": 195 }, { "epoch": 0.1, "learning_rate": 0.00039748830520899874, "loss": 1.7123, "step": 196 }, { "epoch": 0.1, "learning_rate": 0.0003974358183797339, "loss": 1.6823, "step": 197 }, { "epoch": 0.1, "learning_rate": 0.00039738279233622386, "loss": 1.4115, "step": 198 }, { "epoch": 0.1, "learning_rate": 0.00039732922722328725, "loss": 1.3705, "step": 199 }, { "epoch": 0.1, "learning_rate": 0.00039727512318721514, "loss": 1.776, "step": 200 }, { "epoch": 0.1, "learning_rate": 0.00039722048037577024, "loss": 1.7096, "step": 201 }, { "epoch": 0.1, "learning_rate": 0.0003971652989381868, "loss": 1.5852, "step": 202 }, { "epoch": 0.1, "learning_rate": 0.0003971095790251701, "loss": 1.8677, "step": 203 }, { "epoch": 0.1, "learning_rate": 0.00039705332078889596, "loss": 1.4359, "step": 204 }, { "epoch": 0.1, "learning_rate": 0.00039699652438301053, "loss": 1.7743, "step": 205 }, { "epoch": 0.1, "learning_rate": 0.00039693918996262974, "loss": 1.6298, "step": 206 }, { "epoch": 0.1, "learning_rate": 0.0003968813176843387, "loss": 1.7088, "step": 207 }, { "epoch": 0.1, "learning_rate": 0.0003968229077061917, "loss": 1.4698, "step": 208 }, { "epoch": 0.1, "learning_rate": 0.00039676396018771147, "loss": 1.6022, "step": 209 }, { "epoch": 0.1, "learning_rate": 0.0003967044752898886, "loss": 1.6729, "step": 210 }, { "epoch": 0.11, "learning_rate": 0.0003966444531751816, "loss": 1.5742, "step": 211 }, { "epoch": 0.11, "learning_rate": 0.000396583894007516, "loss": 1.437, "step": 212 }, { "epoch": 0.11, "learning_rate": 0.0003965227979522842, "loss": 1.615, "step": 213 }, { "epoch": 0.11, "learning_rate": 0.00039646116517634463, "loss": 1.6582, "step": 214 }, { "epoch": 0.11, "learning_rate": 0.00039639899584802184, "loss": 1.5987, "step": 215 }, { "epoch": 0.11, "learning_rate": 0.00039633629013710554, "loss": 1.8452, "step": 216 }, { "epoch": 0.11, "learning_rate": 0.00039627304821485056, "loss": 1.6249, "step": 217 }, { "epoch": 0.11, "learning_rate": 0.0003962092702539759, "loss": 1.7399, "step": 218 }, { "epoch": 0.11, "learning_rate": 0.0003961449564286648, "loss": 1.5969, "step": 219 }, { "epoch": 0.11, "learning_rate": 0.00039608010691456367, "loss": 1.5766, "step": 220 }, { "epoch": 0.11, "learning_rate": 0.00039601472188878235, "loss": 1.6199, "step": 221 }, { "epoch": 0.11, "learning_rate": 0.0003959488015298929, "loss": 1.773, "step": 222 }, { "epoch": 0.11, "learning_rate": 0.00039588234601792944, "loss": 1.8759, "step": 223 }, { "epoch": 0.11, "learning_rate": 0.0003958153555343878, "loss": 1.7133, "step": 224 }, { "epoch": 0.11, "learning_rate": 0.00039574783026222475, "loss": 1.5905, "step": 225 }, { "epoch": 0.11, "learning_rate": 0.00039567977038585756, "loss": 1.7087, "step": 226 }, { "epoch": 0.11, "learning_rate": 0.0003956111760911637, "loss": 1.5135, "step": 227 }, { "epoch": 0.11, "learning_rate": 0.00039554204756548, "loss": 1.4835, "step": 228 }, { "epoch": 0.11, "learning_rate": 0.00039547238499760255, "loss": 1.7274, "step": 229 }, { "epoch": 0.11, "learning_rate": 0.00039540218857778576, "loss": 1.568, "step": 230 }, { "epoch": 0.12, "learning_rate": 0.0003953314584977421, "loss": 1.5526, "step": 231 }, { "epoch": 0.12, "learning_rate": 0.00039526019495064155, "loss": 1.7895, "step": 232 }, { "epoch": 0.12, "learning_rate": 0.000395188398131111, "loss": 1.6583, "step": 233 }, { "epoch": 0.12, "learning_rate": 0.00039511606823523375, "loss": 1.7053, "step": 234 }, { "epoch": 0.12, "learning_rate": 0.00039504320546054894, "loss": 1.709, "step": 235 }, { "epoch": 0.12, "learning_rate": 0.00039496981000605117, "loss": 1.6779, "step": 236 }, { "epoch": 0.12, "learning_rate": 0.0003948958820721897, "loss": 1.6184, "step": 237 }, { "epoch": 0.12, "learning_rate": 0.0003948214218608681, "loss": 1.6131, "step": 238 }, { "epoch": 0.12, "learning_rate": 0.00039474642957544365, "loss": 1.6341, "step": 239 }, { "epoch": 0.12, "learning_rate": 0.0003946709054207267, "loss": 1.6489, "step": 240 }, { "epoch": 0.12, "learning_rate": 0.00039459484960298026, "loss": 1.6761, "step": 241 }, { "epoch": 0.12, "learning_rate": 0.00039451826232991935, "loss": 1.5096, "step": 242 }, { "epoch": 0.12, "learning_rate": 0.0003944411438107104, "loss": 1.736, "step": 243 }, { "epoch": 0.12, "learning_rate": 0.0003943634942559708, "loss": 1.7403, "step": 244 }, { "epoch": 0.12, "learning_rate": 0.00039428531387776804, "loss": 1.554, "step": 245 }, { "epoch": 0.12, "learning_rate": 0.0003942066028896195, "loss": 1.8338, "step": 246 }, { "epoch": 0.12, "learning_rate": 0.0003941273615064918, "loss": 1.7811, "step": 247 }, { "epoch": 0.12, "learning_rate": 0.00039404758994479984, "loss": 1.6816, "step": 248 }, { "epoch": 0.12, "learning_rate": 0.00039396728842240673, "loss": 1.6188, "step": 249 }, { "epoch": 0.12, "learning_rate": 0.0003938864571586229, "loss": 1.6212, "step": 250 }, { "epoch": 0.13, "learning_rate": 0.00039380509637420533, "loss": 1.8131, "step": 251 }, { "epoch": 0.13, "learning_rate": 0.0003937232062913575, "loss": 1.7621, "step": 252 }, { "epoch": 0.13, "learning_rate": 0.00039364078713372816, "loss": 1.7201, "step": 253 }, { "epoch": 0.13, "learning_rate": 0.00039355783912641126, "loss": 1.6273, "step": 254 }, { "epoch": 0.13, "learning_rate": 0.0003934743624959449, "loss": 1.812, "step": 255 }, { "epoch": 0.13, "learning_rate": 0.0003933903574703109, "loss": 1.7049, "step": 256 }, { "epoch": 0.13, "learning_rate": 0.0003933058242789344, "loss": 1.7132, "step": 257 }, { "epoch": 0.13, "learning_rate": 0.00039322076315268266, "loss": 1.5422, "step": 258 }, { "epoch": 0.13, "learning_rate": 0.000393135174323865, "loss": 1.6279, "step": 259 }, { "epoch": 0.13, "learning_rate": 0.0003930490580262319, "loss": 1.5107, "step": 260 }, { "epoch": 0.13, "learning_rate": 0.00039296241449497443, "loss": 1.6808, "step": 261 }, { "epoch": 0.13, "learning_rate": 0.00039287524396672345, "loss": 1.8189, "step": 262 }, { "epoch": 0.13, "learning_rate": 0.00039278754667954936, "loss": 1.6716, "step": 263 }, { "epoch": 0.13, "learning_rate": 0.00039269932287296083, "loss": 1.7371, "step": 264 }, { "epoch": 0.13, "learning_rate": 0.00039261057278790483, "loss": 1.5354, "step": 265 }, { "epoch": 0.13, "learning_rate": 0.0003925212966667654, "loss": 1.5848, "step": 266 }, { "epoch": 0.13, "learning_rate": 0.0003924314947533633, "loss": 1.6857, "step": 267 }, { "epoch": 0.13, "learning_rate": 0.00039234116729295536, "loss": 1.8029, "step": 268 }, { "epoch": 0.13, "learning_rate": 0.00039225031453223367, "loss": 1.6986, "step": 269 }, { "epoch": 0.13, "learning_rate": 0.00039215893671932497, "loss": 1.5044, "step": 270 }, { "epoch": 0.14, "learning_rate": 0.0003920670341037899, "loss": 1.6712, "step": 271 }, { "epoch": 0.14, "learning_rate": 0.00039197460693662245, "loss": 1.5513, "step": 272 }, { "epoch": 0.14, "learning_rate": 0.00039188165547024916, "loss": 1.4925, "step": 273 }, { "epoch": 0.14, "learning_rate": 0.00039178817995852856, "loss": 1.6916, "step": 274 }, { "epoch": 0.14, "learning_rate": 0.00039169418065675024, "loss": 1.7875, "step": 275 }, { "epoch": 0.14, "learning_rate": 0.00039159965782163453, "loss": 1.5011, "step": 276 }, { "epoch": 0.14, "learning_rate": 0.00039150461171133126, "loss": 1.6323, "step": 277 }, { "epoch": 0.14, "learning_rate": 0.0003914090425854197, "loss": 1.6161, "step": 278 }, { "epoch": 0.14, "learning_rate": 0.00039131295070490727, "loss": 1.7276, "step": 279 }, { "epoch": 0.14, "learning_rate": 0.0003912163363322293, "loss": 1.6082, "step": 280 }, { "epoch": 0.14, "learning_rate": 0.000391119199731248, "loss": 1.732, "step": 281 }, { "epoch": 0.14, "learning_rate": 0.0003910215411672516, "loss": 1.6163, "step": 282 }, { "epoch": 0.14, "learning_rate": 0.0003909233609069542, "loss": 1.8471, "step": 283 }, { "epoch": 0.14, "learning_rate": 0.0003908246592184946, "loss": 1.6584, "step": 284 }, { "epoch": 0.14, "learning_rate": 0.0003907254363714355, "loss": 1.6216, "step": 285 }, { "epoch": 0.14, "learning_rate": 0.00039062569263676307, "loss": 1.6343, "step": 286 }, { "epoch": 0.14, "learning_rate": 0.0003905254282868861, "loss": 1.682, "step": 287 }, { "epoch": 0.14, "learning_rate": 0.00039042464359563523, "loss": 1.7296, "step": 288 }, { "epoch": 0.14, "learning_rate": 0.00039032333883826206, "loss": 1.5631, "step": 289 }, { "epoch": 0.14, "learning_rate": 0.00039022151429143865, "loss": 1.7181, "step": 290 }, { "epoch": 0.15, "learning_rate": 0.00039011917023325655, "loss": 1.7264, "step": 291 }, { "epoch": 0.15, "learning_rate": 0.0003900163069432263, "loss": 1.815, "step": 292 }, { "epoch": 0.15, "learning_rate": 0.00038991292470227636, "loss": 1.5588, "step": 293 }, { "epoch": 0.15, "learning_rate": 0.00038980902379275257, "loss": 1.7129, "step": 294 }, { "epoch": 0.15, "learning_rate": 0.00038970460449841725, "loss": 1.6191, "step": 295 }, { "epoch": 0.15, "learning_rate": 0.0003895996671044485, "loss": 1.6827, "step": 296 }, { "epoch": 0.15, "learning_rate": 0.0003894942118974394, "loss": 1.5777, "step": 297 }, { "epoch": 0.15, "learning_rate": 0.0003893882391653973, "loss": 1.664, "step": 298 }, { "epoch": 0.15, "learning_rate": 0.0003892817491977427, "loss": 1.7806, "step": 299 }, { "epoch": 0.15, "learning_rate": 0.000389174742285309, "loss": 1.6249, "step": 300 }, { "epoch": 0.15, "learning_rate": 0.0003890672187203413, "loss": 1.7223, "step": 301 }, { "epoch": 0.15, "learning_rate": 0.0003889591787964957, "loss": 1.6471, "step": 302 }, { "epoch": 0.15, "learning_rate": 0.0003888506228088385, "loss": 1.5887, "step": 303 }, { "epoch": 0.15, "learning_rate": 0.0003887415510538456, "loss": 1.7015, "step": 304 }, { "epoch": 0.15, "learning_rate": 0.00038863196382940123, "loss": 1.6102, "step": 305 }, { "epoch": 0.15, "learning_rate": 0.00038852186143479764, "loss": 1.5874, "step": 306 }, { "epoch": 0.15, "learning_rate": 0.0003884112441707339, "loss": 1.6151, "step": 307 }, { "epoch": 0.15, "learning_rate": 0.00038830011233931526, "loss": 1.739, "step": 308 }, { "epoch": 0.15, "learning_rate": 0.0003881884662440525, "loss": 1.7366, "step": 309 }, { "epoch": 0.15, "learning_rate": 0.00038807630618986063, "loss": 1.6522, "step": 310 }, { "epoch": 0.16, "learning_rate": 0.0003879636324830584, "loss": 1.71, "step": 311 }, { "epoch": 0.16, "learning_rate": 0.0003878504454313675, "loss": 1.7334, "step": 312 }, { "epoch": 0.16, "learning_rate": 0.00038773674534391144, "loss": 1.613, "step": 313 }, { "epoch": 0.16, "learning_rate": 0.000387622532531215, "loss": 1.7322, "step": 314 }, { "epoch": 0.16, "learning_rate": 0.00038750780730520325, "loss": 1.5234, "step": 315 }, { "epoch": 0.16, "learning_rate": 0.00038739256997920063, "loss": 1.5423, "step": 316 }, { "epoch": 0.16, "learning_rate": 0.0003872768208679302, "loss": 1.6155, "step": 317 }, { "epoch": 0.16, "learning_rate": 0.00038716056028751284, "loss": 1.6176, "step": 318 }, { "epoch": 0.16, "learning_rate": 0.00038704378855546615, "loss": 1.696, "step": 319 }, { "epoch": 0.16, "learning_rate": 0.00038692650599070393, "loss": 1.5135, "step": 320 }, { "epoch": 0.16, "learning_rate": 0.0003868087129135348, "loss": 1.53, "step": 321 }, { "epoch": 0.16, "learning_rate": 0.0003866904096456619, "loss": 1.6036, "step": 322 }, { "epoch": 0.16, "learning_rate": 0.00038657159651018163, "loss": 1.8304, "step": 323 }, { "epoch": 0.16, "learning_rate": 0.0003864522738315829, "loss": 1.6235, "step": 324 }, { "epoch": 0.16, "learning_rate": 0.0003863324419357463, "loss": 1.5371, "step": 325 }, { "epoch": 0.16, "learning_rate": 0.000386212101149943, "loss": 1.6708, "step": 326 }, { "epoch": 0.16, "learning_rate": 0.00038609125180283414, "loss": 1.6137, "step": 327 }, { "epoch": 0.16, "learning_rate": 0.00038596989422446954, "loss": 1.546, "step": 328 }, { "epoch": 0.16, "learning_rate": 0.0003858480287462874, "loss": 1.5953, "step": 329 }, { "epoch": 0.16, "learning_rate": 0.00038572565570111283, "loss": 1.7084, "step": 330 }, { "epoch": 0.17, "learning_rate": 0.0003856027754231571, "loss": 1.6816, "step": 331 }, { "epoch": 0.17, "learning_rate": 0.00038547938824801684, "loss": 1.5167, "step": 332 }, { "epoch": 0.17, "learning_rate": 0.00038535549451267315, "loss": 1.4679, "step": 333 }, { "epoch": 0.17, "learning_rate": 0.0003852310945554904, "loss": 1.6548, "step": 334 }, { "epoch": 0.17, "learning_rate": 0.0003851061887162156, "loss": 1.9461, "step": 335 }, { "epoch": 0.17, "learning_rate": 0.0003849807773359774, "loss": 1.782, "step": 336 }, { "epoch": 0.17, "learning_rate": 0.0003848548607572852, "loss": 1.7077, "step": 337 }, { "epoch": 0.17, "learning_rate": 0.0003847284393240279, "loss": 1.5953, "step": 338 }, { "epoch": 0.17, "learning_rate": 0.00038460151338147333, "loss": 1.5885, "step": 339 }, { "epoch": 0.17, "learning_rate": 0.00038447408327626733, "loss": 1.5709, "step": 340 }, { "epoch": 0.17, "learning_rate": 0.0003843461493564323, "loss": 1.7463, "step": 341 }, { "epoch": 0.17, "learning_rate": 0.00038421771197136696, "loss": 1.7452, "step": 342 }, { "epoch": 0.17, "learning_rate": 0.00038408877147184483, "loss": 1.4542, "step": 343 }, { "epoch": 0.17, "learning_rate": 0.00038395932821001354, "loss": 1.4835, "step": 344 }, { "epoch": 0.17, "learning_rate": 0.00038382938253939385, "loss": 1.6311, "step": 345 }, { "epoch": 0.17, "learning_rate": 0.00038369893481487847, "loss": 1.6245, "step": 346 }, { "epoch": 0.17, "learning_rate": 0.00038356798539273146, "loss": 1.6264, "step": 347 }, { "epoch": 0.17, "learning_rate": 0.00038343653463058705, "loss": 1.5778, "step": 348 }, { "epoch": 0.17, "learning_rate": 0.0003833045828874485, "loss": 1.6097, "step": 349 }, { "epoch": 0.17, "learning_rate": 0.00038317213052368744, "loss": 1.67, "step": 350 }, { "epoch": 0.18, "learning_rate": 0.00038303917790104264, "loss": 1.5424, "step": 351 }, { "epoch": 0.18, "learning_rate": 0.00038290572538261927, "loss": 1.607, "step": 352 }, { "epoch": 0.18, "learning_rate": 0.00038277177333288765, "loss": 1.6334, "step": 353 }, { "epoch": 0.18, "learning_rate": 0.0003826373221176823, "loss": 1.5178, "step": 354 }, { "epoch": 0.18, "learning_rate": 0.0003825023721042012, "loss": 1.5779, "step": 355 }, { "epoch": 0.18, "learning_rate": 0.0003823669236610044, "loss": 1.5685, "step": 356 }, { "epoch": 0.18, "learning_rate": 0.0003822309771580132, "loss": 1.6973, "step": 357 }, { "epoch": 0.18, "learning_rate": 0.00038209453296650944, "loss": 1.8553, "step": 358 }, { "epoch": 0.18, "learning_rate": 0.0003819575914591338, "loss": 1.6619, "step": 359 }, { "epoch": 0.18, "learning_rate": 0.0003818201530098853, "loss": 1.7002, "step": 360 }, { "epoch": 0.18, "learning_rate": 0.0003816822179941204, "loss": 1.7676, "step": 361 }, { "epoch": 0.18, "learning_rate": 0.0003815437867885514, "loss": 1.5902, "step": 362 }, { "epoch": 0.18, "learning_rate": 0.0003814048597712458, "loss": 1.7975, "step": 363 }, { "epoch": 0.18, "learning_rate": 0.0003812654373216254, "loss": 1.5581, "step": 364 }, { "epoch": 0.18, "learning_rate": 0.00038112551982046484, "loss": 1.7062, "step": 365 }, { "epoch": 0.18, "learning_rate": 0.00038098510764989087, "loss": 1.7171, "step": 366 }, { "epoch": 0.18, "learning_rate": 0.0003808442011933814, "loss": 1.5299, "step": 367 }, { "epoch": 0.18, "learning_rate": 0.0003807028008357638, "loss": 1.5733, "step": 368 }, { "epoch": 0.18, "learning_rate": 0.000380560906963215, "loss": 1.8065, "step": 369 }, { "epoch": 0.18, "learning_rate": 0.0003804185199632591, "loss": 1.4458, "step": 370 }, { "epoch": 0.19, "learning_rate": 0.0003802756402247674, "loss": 1.5408, "step": 371 }, { "epoch": 0.19, "learning_rate": 0.00038013226813795686, "loss": 1.476, "step": 372 }, { "epoch": 0.19, "learning_rate": 0.0003799884040943889, "loss": 1.5259, "step": 373 }, { "epoch": 0.19, "learning_rate": 0.00037984404848696873, "loss": 1.6524, "step": 374 }, { "epoch": 0.19, "learning_rate": 0.0003796992017099438, "loss": 1.6925, "step": 375 }, { "epoch": 0.19, "learning_rate": 0.0003795538641589033, "loss": 1.5929, "step": 376 }, { "epoch": 0.19, "learning_rate": 0.0003794080362307766, "loss": 1.5452, "step": 377 }, { "epoch": 0.19, "learning_rate": 0.00037926171832383226, "loss": 1.6099, "step": 378 }, { "epoch": 0.19, "learning_rate": 0.00037911491083767715, "loss": 1.5971, "step": 379 }, { "epoch": 0.19, "learning_rate": 0.00037896761417325524, "loss": 1.8214, "step": 380 }, { "epoch": 0.19, "learning_rate": 0.0003788198287328463, "loss": 1.4613, "step": 381 }, { "epoch": 0.19, "learning_rate": 0.00037867155492006516, "loss": 1.6455, "step": 382 }, { "epoch": 0.19, "learning_rate": 0.00037852279313986044, "loss": 1.4235, "step": 383 }, { "epoch": 0.19, "learning_rate": 0.0003783735437985133, "loss": 1.6516, "step": 384 }, { "epoch": 0.19, "learning_rate": 0.0003782238073036367, "loss": 1.5719, "step": 385 }, { "epoch": 0.19, "learning_rate": 0.00037807358406417374, "loss": 1.5233, "step": 386 }, { "epoch": 0.19, "learning_rate": 0.00037792287449039713, "loss": 1.8381, "step": 387 }, { "epoch": 0.19, "learning_rate": 0.00037777167899390776, "loss": 1.5918, "step": 388 }, { "epoch": 0.19, "learning_rate": 0.0003776199979876335, "loss": 1.7216, "step": 389 }, { "epoch": 0.19, "learning_rate": 0.00037746783188582827, "loss": 1.8514, "step": 390 }, { "epoch": 0.2, "learning_rate": 0.00037731518110407084, "loss": 1.5811, "step": 391 }, { "epoch": 0.2, "learning_rate": 0.00037716204605926367, "loss": 1.6584, "step": 392 }, { "epoch": 0.2, "learning_rate": 0.0003770084271696317, "loss": 1.5818, "step": 393 }, { "epoch": 0.2, "learning_rate": 0.00037685432485472145, "loss": 1.7268, "step": 394 }, { "epoch": 0.2, "learning_rate": 0.0003766997395353995, "loss": 1.6397, "step": 395 }, { "epoch": 0.2, "learning_rate": 0.0003765446716338518, "loss": 1.7954, "step": 396 }, { "epoch": 0.2, "learning_rate": 0.00037638912157358223, "loss": 1.6724, "step": 397 }, { "epoch": 0.2, "learning_rate": 0.00037623308977941124, "loss": 1.5138, "step": 398 }, { "epoch": 0.2, "learning_rate": 0.00037607657667747523, "loss": 1.6065, "step": 399 }, { "epoch": 0.2, "learning_rate": 0.000375919582695225, "loss": 1.6297, "step": 400 }, { "epoch": 0.2, "learning_rate": 0.0003757621082614245, "loss": 1.5817, "step": 401 }, { "epoch": 0.2, "learning_rate": 0.00037560415380615014, "loss": 1.5774, "step": 402 }, { "epoch": 0.2, "learning_rate": 0.00037544571976078913, "loss": 1.6676, "step": 403 }, { "epoch": 0.2, "learning_rate": 0.0003752868065580384, "loss": 1.7233, "step": 404 }, { "epoch": 0.2, "learning_rate": 0.00037512741463190374, "loss": 1.6582, "step": 405 }, { "epoch": 0.2, "learning_rate": 0.0003749675444176983, "loss": 1.648, "step": 406 }, { "epoch": 0.2, "learning_rate": 0.0003748071963520412, "loss": 1.581, "step": 407 }, { "epoch": 0.2, "learning_rate": 0.000374646370872857, "loss": 1.7211, "step": 408 }, { "epoch": 0.2, "learning_rate": 0.00037448506841937393, "loss": 1.567, "step": 409 }, { "epoch": 0.2, "learning_rate": 0.0003743232894321229, "loss": 1.4671, "step": 410 }, { "epoch": 0.21, "learning_rate": 0.00037416103435293616, "loss": 1.6647, "step": 411 }, { "epoch": 0.21, "learning_rate": 0.0003739983036249465, "loss": 1.5772, "step": 412 }, { "epoch": 0.21, "learning_rate": 0.0003738350976925854, "loss": 1.5484, "step": 413 }, { "epoch": 0.21, "learning_rate": 0.00037367141700158247, "loss": 1.5484, "step": 414 }, { "epoch": 0.21, "learning_rate": 0.00037350726199896384, "loss": 1.6113, "step": 415 }, { "epoch": 0.21, "learning_rate": 0.00037334263313305074, "loss": 1.5764, "step": 416 }, { "epoch": 0.21, "learning_rate": 0.000373177530853459, "loss": 1.6048, "step": 417 }, { "epoch": 0.21, "learning_rate": 0.0003730119556110971, "loss": 1.6771, "step": 418 }, { "epoch": 0.21, "learning_rate": 0.00037284590785816534, "loss": 1.6426, "step": 419 }, { "epoch": 0.21, "learning_rate": 0.00037267938804815443, "loss": 1.6309, "step": 420 }, { "epoch": 0.21, "learning_rate": 0.0003725123966358444, "loss": 1.6995, "step": 421 }, { "epoch": 0.21, "learning_rate": 0.00037234493407730307, "loss": 1.8452, "step": 422 }, { "epoch": 0.21, "learning_rate": 0.0003721770008298852, "loss": 1.6109, "step": 423 }, { "epoch": 0.21, "learning_rate": 0.000372008597352231, "loss": 1.6494, "step": 424 }, { "epoch": 0.21, "learning_rate": 0.00037183972410426483, "loss": 1.7364, "step": 425 }, { "epoch": 0.21, "learning_rate": 0.0003716703815471942, "loss": 1.6014, "step": 426 }, { "epoch": 0.21, "learning_rate": 0.00037150057014350796, "loss": 1.6395, "step": 427 }, { "epoch": 0.21, "learning_rate": 0.000371330290356976, "loss": 1.6497, "step": 428 }, { "epoch": 0.21, "learning_rate": 0.000371159542652647, "loss": 1.4915, "step": 429 }, { "epoch": 0.21, "learning_rate": 0.00037098832749684767, "loss": 1.4835, "step": 430 }, { "epoch": 0.22, "learning_rate": 0.0003708166453571813, "loss": 1.8236, "step": 431 }, { "epoch": 0.22, "learning_rate": 0.0003706444967025267, "loss": 1.5515, "step": 432 }, { "epoch": 0.22, "learning_rate": 0.0003704718820030366, "loss": 1.4684, "step": 433 }, { "epoch": 0.22, "learning_rate": 0.0003702988017301368, "loss": 1.8934, "step": 434 }, { "epoch": 0.22, "learning_rate": 0.00037012525635652424, "loss": 1.7834, "step": 435 }, { "epoch": 0.22, "learning_rate": 0.0003699512463561664, "loss": 1.6533, "step": 436 }, { "epoch": 0.22, "learning_rate": 0.00036977677220429963, "loss": 1.5941, "step": 437 }, { "epoch": 0.22, "learning_rate": 0.00036960183437742783, "loss": 1.6756, "step": 438 }, { "epoch": 0.22, "learning_rate": 0.00036942643335332134, "loss": 1.5123, "step": 439 }, { "epoch": 0.22, "learning_rate": 0.00036925056961101537, "loss": 1.5916, "step": 440 }, { "epoch": 0.22, "learning_rate": 0.0003690742436308091, "loss": 1.4561, "step": 441 }, { "epoch": 0.22, "learning_rate": 0.0003688974558942639, "loss": 1.7021, "step": 442 }, { "epoch": 0.22, "learning_rate": 0.0003687202068842024, "loss": 1.4809, "step": 443 }, { "epoch": 0.22, "learning_rate": 0.00036854249708470686, "loss": 1.4896, "step": 444 }, { "epoch": 0.22, "learning_rate": 0.00036836432698111806, "loss": 1.5072, "step": 445 }, { "epoch": 0.22, "learning_rate": 0.000368185697060034, "loss": 1.5797, "step": 446 }, { "epoch": 0.22, "learning_rate": 0.00036800660780930835, "loss": 1.5629, "step": 447 }, { "epoch": 0.22, "learning_rate": 0.00036782705971804923, "loss": 1.64, "step": 448 }, { "epoch": 0.22, "learning_rate": 0.00036764705327661806, "loss": 1.6466, "step": 449 }, { "epoch": 0.22, "learning_rate": 0.00036746658897662793, "loss": 1.6695, "step": 450 }, { "epoch": 0.23, "learning_rate": 0.00036728566731094236, "loss": 1.8699, "step": 451 }, { "epoch": 0.23, "learning_rate": 0.0003671042887736741, "loss": 1.7128, "step": 452 }, { "epoch": 0.23, "learning_rate": 0.00036692245386018353, "loss": 1.6109, "step": 453 }, { "epoch": 0.23, "learning_rate": 0.0003667401630670774, "loss": 1.5786, "step": 454 }, { "epoch": 0.23, "learning_rate": 0.0003665574168922077, "loss": 1.8162, "step": 455 }, { "epoch": 0.23, "learning_rate": 0.00036637421583466995, "loss": 1.7225, "step": 456 }, { "epoch": 0.23, "learning_rate": 0.0003661905603948021, "loss": 1.7744, "step": 457 }, { "epoch": 0.23, "learning_rate": 0.0003660064510741829, "loss": 1.7345, "step": 458 }, { "epoch": 0.23, "learning_rate": 0.0003658218883756308, "loss": 1.6193, "step": 459 }, { "epoch": 0.23, "learning_rate": 0.00036563687280320245, "loss": 1.5931, "step": 460 }, { "epoch": 0.23, "learning_rate": 0.00036545140486219133, "loss": 1.6595, "step": 461 }, { "epoch": 0.23, "learning_rate": 0.0003652654850591264, "loss": 1.7491, "step": 462 }, { "epoch": 0.23, "learning_rate": 0.0003650791139017707, "loss": 1.4703, "step": 463 }, { "epoch": 0.23, "learning_rate": 0.00036489229189911985, "loss": 1.615, "step": 464 }, { "epoch": 0.23, "learning_rate": 0.000364705019561401, "loss": 1.6676, "step": 465 }, { "epoch": 0.23, "learning_rate": 0.00036451729740007084, "loss": 1.5476, "step": 466 }, { "epoch": 0.23, "learning_rate": 0.0003643291259278149, "loss": 1.4557, "step": 467 }, { "epoch": 0.23, "learning_rate": 0.00036414050565854574, "loss": 1.7099, "step": 468 }, { "epoch": 0.23, "learning_rate": 0.00036395143710740143, "loss": 1.7883, "step": 469 }, { "epoch": 0.23, "learning_rate": 0.0003637619207907447, "loss": 1.4658, "step": 470 }, { "epoch": 0.24, "learning_rate": 0.0003635719572261608, "loss": 1.6454, "step": 471 }, { "epoch": 0.24, "learning_rate": 0.0003633815469324566, "loss": 1.6372, "step": 472 }, { "epoch": 0.24, "learning_rate": 0.0003631906904296591, "loss": 1.4782, "step": 473 }, { "epoch": 0.24, "learning_rate": 0.0003629993882390139, "loss": 1.6954, "step": 474 }, { "epoch": 0.24, "learning_rate": 0.0003628076408829836, "loss": 1.7706, "step": 475 }, { "epoch": 0.24, "learning_rate": 0.00036261544888524695, "loss": 1.7135, "step": 476 }, { "epoch": 0.24, "learning_rate": 0.0003624228127706968, "loss": 1.6762, "step": 477 }, { "epoch": 0.24, "learning_rate": 0.000362229733065439, "loss": 1.697, "step": 478 }, { "epoch": 0.24, "learning_rate": 0.0003620362102967909, "loss": 1.5524, "step": 479 }, { "epoch": 0.24, "learning_rate": 0.00036184224499327976, "loss": 1.488, "step": 480 }, { "epoch": 0.24, "learning_rate": 0.0003616478376846417, "loss": 1.5274, "step": 481 }, { "epoch": 0.24, "learning_rate": 0.0003614529889018197, "loss": 1.437, "step": 482 }, { "epoch": 0.24, "learning_rate": 0.0003612576991769627, "loss": 1.6498, "step": 483 }, { "epoch": 0.24, "learning_rate": 0.00036106196904342377, "loss": 1.6727, "step": 484 }, { "epoch": 0.24, "learning_rate": 0.00036086579903575866, "loss": 1.5969, "step": 485 }, { "epoch": 0.24, "learning_rate": 0.0003606691896897248, "loss": 1.6462, "step": 486 }, { "epoch": 0.24, "learning_rate": 0.000360472141542279, "loss": 1.5607, "step": 487 }, { "epoch": 0.24, "learning_rate": 0.0003602746551315769, "loss": 1.687, "step": 488 }, { "epoch": 0.24, "learning_rate": 0.000360076730996971, "loss": 1.3288, "step": 489 }, { "epoch": 0.24, "learning_rate": 0.000359878369679009, "loss": 1.6054, "step": 490 }, { "epoch": 0.25, "learning_rate": 0.0003596795717194328, "loss": 1.6685, "step": 491 }, { "epoch": 0.25, "learning_rate": 0.00035948033766117687, "loss": 1.5, "step": 492 }, { "epoch": 0.25, "learning_rate": 0.00035928066804836653, "loss": 1.7705, "step": 493 }, { "epoch": 0.25, "learning_rate": 0.0003590805634263167, "loss": 1.4625, "step": 494 }, { "epoch": 0.25, "learning_rate": 0.0003588800243415304, "loss": 1.438, "step": 495 }, { "epoch": 0.25, "learning_rate": 0.00035867905134169716, "loss": 1.7041, "step": 496 }, { "epoch": 0.25, "learning_rate": 0.0003584776449756915, "loss": 1.5696, "step": 497 }, { "epoch": 0.25, "learning_rate": 0.0003582758057935717, "loss": 1.571, "step": 498 }, { "epoch": 0.25, "learning_rate": 0.0003580735343465778, "loss": 1.66, "step": 499 }, { "epoch": 0.25, "learning_rate": 0.0003578708311871308, "loss": 1.4366, "step": 500 }, { "epoch": 0.25, "learning_rate": 0.0003576676968688303, "loss": 1.5916, "step": 501 }, { "epoch": 0.25, "learning_rate": 0.0003574641319464537, "loss": 1.5619, "step": 502 }, { "epoch": 0.25, "learning_rate": 0.0003572601369759544, "loss": 1.7142, "step": 503 }, { "epoch": 0.25, "learning_rate": 0.0003570557125144602, "loss": 1.7435, "step": 504 }, { "epoch": 0.25, "learning_rate": 0.00035685085912027197, "loss": 1.5314, "step": 505 }, { "epoch": 0.25, "learning_rate": 0.00035664557735286197, "loss": 1.8234, "step": 506 }, { "epoch": 0.25, "learning_rate": 0.0003564398677728724, "loss": 1.7898, "step": 507 }, { "epoch": 0.25, "learning_rate": 0.0003562337309421139, "loss": 1.5523, "step": 508 }, { "epoch": 0.25, "learning_rate": 0.00035602716742356397, "loss": 1.5225, "step": 509 }, { "epoch": 0.25, "learning_rate": 0.0003558201777813653, "loss": 1.784, "step": 510 }, { "epoch": 0.26, "learning_rate": 0.00035561276258082444, "loss": 1.5623, "step": 511 }, { "epoch": 0.26, "learning_rate": 0.00035540492238841025, "loss": 1.6344, "step": 512 }, { "epoch": 0.26, "learning_rate": 0.0003551966577717522, "loss": 1.7194, "step": 513 }, { "epoch": 0.26, "learning_rate": 0.00035498796929963895, "loss": 1.4878, "step": 514 }, { "epoch": 0.26, "learning_rate": 0.00035477885754201666, "loss": 1.5972, "step": 515 }, { "epoch": 0.26, "learning_rate": 0.00035456932306998765, "loss": 1.4085, "step": 516 }, { "epoch": 0.26, "learning_rate": 0.00035435936645580846, "loss": 1.4897, "step": 517 }, { "epoch": 0.26, "learning_rate": 0.0003541489882728889, "loss": 1.5127, "step": 518 }, { "epoch": 0.26, "learning_rate": 0.00035393818909578985, "loss": 1.6159, "step": 519 }, { "epoch": 0.26, "learning_rate": 0.0003537269695002221, "loss": 1.6481, "step": 520 }, { "epoch": 0.26, "learning_rate": 0.0003535153300630444, "loss": 1.6057, "step": 521 }, { "epoch": 0.26, "learning_rate": 0.0003533032713622625, "loss": 1.6588, "step": 522 }, { "epoch": 0.26, "learning_rate": 0.0003530907939770269, "loss": 1.6406, "step": 523 }, { "epoch": 0.26, "learning_rate": 0.00035287789848763166, "loss": 1.4532, "step": 524 }, { "epoch": 0.26, "learning_rate": 0.0003526645854755128, "loss": 1.4354, "step": 525 }, { "epoch": 0.26, "learning_rate": 0.0003524508555232464, "loss": 1.6292, "step": 526 }, { "epoch": 0.26, "learning_rate": 0.00035223670921454757, "loss": 1.7245, "step": 527 }, { "epoch": 0.26, "learning_rate": 0.0003520221471342682, "loss": 1.5872, "step": 528 }, { "epoch": 0.26, "learning_rate": 0.000351807169868396, "loss": 1.5689, "step": 529 }, { "epoch": 0.26, "learning_rate": 0.0003515917780040522, "loss": 1.719, "step": 530 }, { "epoch": 0.27, "learning_rate": 0.0003513759721294907, "loss": 1.6705, "step": 531 }, { "epoch": 0.27, "learning_rate": 0.00035115975283409593, "loss": 1.6617, "step": 532 }, { "epoch": 0.27, "learning_rate": 0.0003509431207083814, "loss": 1.6712, "step": 533 }, { "epoch": 0.27, "learning_rate": 0.0003507260763439882, "loss": 1.5884, "step": 534 }, { "epoch": 0.27, "learning_rate": 0.0003505086203336831, "loss": 1.7172, "step": 535 }, { "epoch": 0.27, "learning_rate": 0.0003502907532713573, "loss": 1.6537, "step": 536 }, { "epoch": 0.27, "learning_rate": 0.00035007247575202446, "loss": 1.5581, "step": 537 }, { "epoch": 0.27, "learning_rate": 0.0003498537883718194, "loss": 1.632, "step": 538 }, { "epoch": 0.27, "learning_rate": 0.00034963469172799615, "loss": 1.5551, "step": 539 }, { "epoch": 0.27, "learning_rate": 0.0003494151864189266, "loss": 1.7205, "step": 540 }, { "epoch": 0.27, "learning_rate": 0.00034919527304409857, "loss": 1.5633, "step": 541 }, { "epoch": 0.27, "learning_rate": 0.0003489749522041145, "loss": 1.5343, "step": 542 }, { "epoch": 0.27, "learning_rate": 0.00034875422450068963, "loss": 1.6079, "step": 543 }, { "epoch": 0.27, "learning_rate": 0.0003485330905366503, "loss": 1.4977, "step": 544 }, { "epoch": 0.27, "learning_rate": 0.0003483115509159325, "loss": 1.6487, "step": 545 }, { "epoch": 0.27, "learning_rate": 0.00034808960624358, "loss": 1.6665, "step": 546 }, { "epoch": 0.27, "learning_rate": 0.00034786725712574287, "loss": 1.6199, "step": 547 }, { "epoch": 0.27, "learning_rate": 0.0003476445041696757, "loss": 1.5882, "step": 548 }, { "epoch": 0.27, "learning_rate": 0.000347421347983736, "loss": 1.6632, "step": 549 }, { "epoch": 0.27, "learning_rate": 0.00034719778917738256, "loss": 1.4553, "step": 550 }, { "epoch": 0.28, "learning_rate": 0.0003469738283611738, "loss": 1.5907, "step": 551 }, { "epoch": 0.28, "learning_rate": 0.00034674946614676597, "loss": 1.7215, "step": 552 }, { "epoch": 0.28, "learning_rate": 0.0003465247031469117, "loss": 1.6233, "step": 553 }, { "epoch": 0.28, "learning_rate": 0.000346299539975458, "loss": 1.6185, "step": 554 }, { "epoch": 0.28, "learning_rate": 0.000346073977247345, "loss": 1.4927, "step": 555 }, { "epoch": 0.28, "learning_rate": 0.000345848015578604, "loss": 1.6307, "step": 556 }, { "epoch": 0.28, "learning_rate": 0.00034562165558635577, "loss": 1.6981, "step": 557 }, { "epoch": 0.28, "learning_rate": 0.00034539489788880883, "loss": 1.5469, "step": 558 }, { "epoch": 0.28, "learning_rate": 0.0003451677431052582, "loss": 1.6071, "step": 559 }, { "epoch": 0.28, "learning_rate": 0.0003449401918560831, "loss": 1.658, "step": 560 }, { "epoch": 0.28, "learning_rate": 0.0003447122447627456, "loss": 1.6857, "step": 561 }, { "epoch": 0.28, "learning_rate": 0.0003444839024477889, "loss": 1.5205, "step": 562 }, { "epoch": 0.28, "learning_rate": 0.0003442551655348355, "loss": 1.5646, "step": 563 }, { "epoch": 0.28, "learning_rate": 0.00034402603464858564, "loss": 1.7056, "step": 564 }, { "epoch": 0.28, "learning_rate": 0.0003437965104148156, "loss": 1.6276, "step": 565 }, { "epoch": 0.28, "learning_rate": 0.00034356659346037585, "loss": 1.6262, "step": 566 }, { "epoch": 0.28, "learning_rate": 0.00034333628441318936, "loss": 1.6572, "step": 567 }, { "epoch": 0.28, "learning_rate": 0.00034310558390225, "loss": 1.6792, "step": 568 }, { "epoch": 0.28, "learning_rate": 0.0003428744925576208, "loss": 1.4858, "step": 569 }, { "epoch": 0.28, "learning_rate": 0.0003426430110104321, "loss": 1.318, "step": 570 }, { "epoch": 0.29, "learning_rate": 0.00034241113989288003, "loss": 1.3952, "step": 571 }, { "epoch": 0.29, "learning_rate": 0.00034217887983822463, "loss": 1.7068, "step": 572 }, { "epoch": 0.29, "learning_rate": 0.0003419462314807879, "loss": 1.7115, "step": 573 }, { "epoch": 0.29, "learning_rate": 0.0003417131954559529, "loss": 1.4274, "step": 574 }, { "epoch": 0.29, "learning_rate": 0.0003414797724001609, "loss": 1.4477, "step": 575 }, { "epoch": 0.29, "learning_rate": 0.0003412459629509105, "loss": 1.4958, "step": 576 }, { "epoch": 0.29, "learning_rate": 0.0003410117677467553, "loss": 1.513, "step": 577 }, { "epoch": 0.29, "learning_rate": 0.0003407771874273028, "loss": 1.6229, "step": 578 }, { "epoch": 0.29, "learning_rate": 0.00034054222263321194, "loss": 1.5662, "step": 579 }, { "epoch": 0.29, "learning_rate": 0.000340306874006192, "loss": 1.6515, "step": 580 }, { "epoch": 0.29, "learning_rate": 0.0003400711421890001, "loss": 1.762, "step": 581 }, { "epoch": 0.29, "learning_rate": 0.00033983502782544044, "loss": 1.4794, "step": 582 }, { "epoch": 0.29, "learning_rate": 0.0003395985315603615, "loss": 1.5187, "step": 583 }, { "epoch": 0.29, "learning_rate": 0.00033936165403965516, "loss": 1.5355, "step": 584 }, { "epoch": 0.29, "learning_rate": 0.0003391243959102542, "loss": 1.6659, "step": 585 }, { "epoch": 0.29, "learning_rate": 0.00033888675782013113, "loss": 1.5111, "step": 586 }, { "epoch": 0.29, "learning_rate": 0.000338648740418296, "loss": 1.7099, "step": 587 }, { "epoch": 0.29, "learning_rate": 0.0003384103443547948, "loss": 1.6258, "step": 588 }, { "epoch": 0.29, "learning_rate": 0.0003381715702807079, "loss": 1.5812, "step": 589 }, { "epoch": 0.29, "learning_rate": 0.00033793241884814783, "loss": 1.5988, "step": 590 }, { "epoch": 0.3, "learning_rate": 0.0003376928907102578, "loss": 1.4556, "step": 591 }, { "epoch": 0.3, "learning_rate": 0.0003374529865212097, "loss": 1.4119, "step": 592 }, { "epoch": 0.3, "learning_rate": 0.00033721270693620254, "loss": 1.8002, "step": 593 }, { "epoch": 0.3, "learning_rate": 0.00033697205261146076, "loss": 1.6446, "step": 594 }, { "epoch": 0.3, "learning_rate": 0.00033673102420423193, "loss": 1.5731, "step": 595 }, { "epoch": 0.3, "learning_rate": 0.0003364896223727855, "loss": 1.5963, "step": 596 }, { "epoch": 0.3, "learning_rate": 0.00033624784777641067, "loss": 1.603, "step": 597 }, { "epoch": 0.3, "learning_rate": 0.00033600570107541463, "loss": 1.7701, "step": 598 }, { "epoch": 0.3, "learning_rate": 0.00033576318293112103, "loss": 1.4142, "step": 599 }, { "epoch": 0.3, "learning_rate": 0.00033552029400586773, "loss": 1.6056, "step": 600 }, { "epoch": 0.3, "learning_rate": 0.00033527703496300535, "loss": 1.6508, "step": 601 }, { "epoch": 0.3, "learning_rate": 0.00033503340646689534, "loss": 1.6636, "step": 602 }, { "epoch": 0.3, "learning_rate": 0.00033478940918290815, "loss": 1.6291, "step": 603 }, { "epoch": 0.3, "learning_rate": 0.00033454504377742136, "loss": 1.4496, "step": 604 }, { "epoch": 0.3, "learning_rate": 0.000334300310917818, "loss": 1.5698, "step": 605 }, { "epoch": 0.3, "learning_rate": 0.0003340552112724845, "loss": 1.6541, "step": 606 }, { "epoch": 0.3, "learning_rate": 0.0003338097455108093, "loss": 1.5061, "step": 607 }, { "epoch": 0.3, "learning_rate": 0.00033356391430318047, "loss": 1.4854, "step": 608 }, { "epoch": 0.3, "learning_rate": 0.0003333177183209842, "loss": 1.6627, "step": 609 }, { "epoch": 0.3, "learning_rate": 0.0003330711582366031, "loss": 1.535, "step": 610 }, { "epoch": 0.31, "learning_rate": 0.00033282423472341384, "loss": 1.6511, "step": 611 }, { "epoch": 0.31, "learning_rate": 0.0003325769484557859, "loss": 1.5739, "step": 612 }, { "epoch": 0.31, "learning_rate": 0.0003323293001090795, "loss": 1.3858, "step": 613 }, { "epoch": 0.31, "learning_rate": 0.0003320812903596434, "loss": 1.646, "step": 614 }, { "epoch": 0.31, "learning_rate": 0.0003318329198848138, "loss": 1.7639, "step": 615 }, { "epoch": 0.31, "learning_rate": 0.0003315841893629118, "loss": 1.447, "step": 616 }, { "epoch": 0.31, "learning_rate": 0.000331335099473242, "loss": 1.6622, "step": 617 }, { "epoch": 0.31, "learning_rate": 0.00033108565089609034, "loss": 1.5356, "step": 618 }, { "epoch": 0.31, "learning_rate": 0.00033083584431272225, "loss": 1.4241, "step": 619 }, { "epoch": 0.31, "learning_rate": 0.0003305856804053812, "loss": 1.8722, "step": 620 }, { "epoch": 0.31, "learning_rate": 0.00033033515985728633, "loss": 1.4449, "step": 621 }, { "epoch": 0.31, "learning_rate": 0.0003300842833526309, "loss": 1.8373, "step": 622 }, { "epoch": 0.31, "learning_rate": 0.00032983305157658027, "loss": 1.6631, "step": 623 }, { "epoch": 0.31, "learning_rate": 0.00032958146521527, "loss": 1.771, "step": 624 }, { "epoch": 0.31, "learning_rate": 0.00032932952495580425, "loss": 1.6418, "step": 625 }, { "epoch": 0.31, "learning_rate": 0.00032907723148625355, "loss": 1.4662, "step": 626 }, { "epoch": 0.31, "learning_rate": 0.0003288245854956531, "loss": 1.655, "step": 627 }, { "epoch": 0.31, "learning_rate": 0.00032857158767400104, "loss": 1.4315, "step": 628 }, { "epoch": 0.31, "learning_rate": 0.000328318238712256, "loss": 1.716, "step": 629 }, { "epoch": 0.31, "learning_rate": 0.00032806453930233595, "loss": 1.6393, "step": 630 }, { "epoch": 0.32, "learning_rate": 0.0003278104901371159, "loss": 1.7138, "step": 631 }, { "epoch": 0.32, "learning_rate": 0.0003275560919104259, "loss": 1.6158, "step": 632 }, { "epoch": 0.32, "learning_rate": 0.0003273013453170496, "loss": 1.5414, "step": 633 }, { "epoch": 0.32, "learning_rate": 0.0003270462510527218, "loss": 1.5223, "step": 634 }, { "epoch": 0.32, "learning_rate": 0.0003267908098141271, "loss": 1.6761, "step": 635 }, { "epoch": 0.32, "learning_rate": 0.0003265350222988972, "loss": 1.443, "step": 636 }, { "epoch": 0.32, "learning_rate": 0.00032627888920561024, "loss": 1.756, "step": 637 }, { "epoch": 0.32, "learning_rate": 0.0003260224112337876, "loss": 1.506, "step": 638 }, { "epoch": 0.32, "learning_rate": 0.0003257655890838927, "loss": 1.6524, "step": 639 }, { "epoch": 0.32, "learning_rate": 0.00032550842345732917, "loss": 1.6236, "step": 640 }, { "epoch": 0.32, "learning_rate": 0.00032525091505643825, "loss": 1.578, "step": 641 }, { "epoch": 0.32, "learning_rate": 0.0003249930645844978, "loss": 1.2744, "step": 642 }, { "epoch": 0.32, "learning_rate": 0.00032473487274571963, "loss": 1.7205, "step": 643 }, { "epoch": 0.32, "learning_rate": 0.00032447634024524786, "loss": 1.6393, "step": 644 }, { "epoch": 0.32, "learning_rate": 0.0003242174677891571, "loss": 1.5814, "step": 645 }, { "epoch": 0.32, "learning_rate": 0.0003239582560844503, "loss": 1.4664, "step": 646 }, { "epoch": 0.32, "learning_rate": 0.0003236987058390571, "loss": 1.6065, "step": 647 }, { "epoch": 0.32, "learning_rate": 0.0003234388177618314, "loss": 1.5789, "step": 648 }, { "epoch": 0.32, "learning_rate": 0.00032317859256255016, "loss": 1.4482, "step": 649 }, { "epoch": 0.32, "learning_rate": 0.00032291803095191074, "loss": 1.5858, "step": 650 }, { "epoch": 0.33, "learning_rate": 0.00032265713364152933, "loss": 1.6917, "step": 651 }, { "epoch": 0.33, "learning_rate": 0.000322395901343939, "loss": 1.5473, "step": 652 }, { "epoch": 0.33, "learning_rate": 0.00032213433477258776, "loss": 1.574, "step": 653 }, { "epoch": 0.33, "learning_rate": 0.0003218724346418364, "loss": 1.5813, "step": 654 }, { "epoch": 0.33, "learning_rate": 0.0003216102016669568, "loss": 1.4462, "step": 655 }, { "epoch": 0.33, "learning_rate": 0.0003213476365641298, "loss": 1.5667, "step": 656 }, { "epoch": 0.33, "learning_rate": 0.00032108474005044325, "loss": 1.6149, "step": 657 }, { "epoch": 0.33, "learning_rate": 0.0003208215128438904, "loss": 1.5657, "step": 658 }, { "epoch": 0.33, "learning_rate": 0.0003205579556633673, "loss": 1.5153, "step": 659 }, { "epoch": 0.33, "learning_rate": 0.0003202940692286714, "loss": 1.5941, "step": 660 }, { "epoch": 0.33, "learning_rate": 0.00032002985426049925, "loss": 1.6282, "step": 661 }, { "epoch": 0.33, "learning_rate": 0.00031976531148044475, "loss": 1.478, "step": 662 }, { "epoch": 0.33, "learning_rate": 0.0003195004416109971, "loss": 1.4623, "step": 663 }, { "epoch": 0.33, "learning_rate": 0.00031923524537553864, "loss": 1.5935, "step": 664 }, { "epoch": 0.33, "learning_rate": 0.0003189697234983432, "loss": 1.5293, "step": 665 }, { "epoch": 0.33, "learning_rate": 0.000318703876704574, "loss": 1.4841, "step": 666 }, { "epoch": 0.33, "learning_rate": 0.00031843770572028145, "loss": 1.4985, "step": 667 }, { "epoch": 0.33, "learning_rate": 0.0003181712112724015, "loss": 1.604, "step": 668 }, { "epoch": 0.33, "learning_rate": 0.0003179043940887535, "loss": 1.5311, "step": 669 }, { "epoch": 0.33, "learning_rate": 0.0003176372548980381, "loss": 1.4669, "step": 670 }, { "epoch": 0.34, "learning_rate": 0.00031736979442983557, "loss": 1.4565, "step": 671 }, { "epoch": 0.34, "learning_rate": 0.0003171020134146035, "loss": 1.6435, "step": 672 }, { "epoch": 0.34, "learning_rate": 0.00031683391258367484, "loss": 1.6284, "step": 673 }, { "epoch": 0.34, "learning_rate": 0.00031656549266925613, "loss": 1.4404, "step": 674 }, { "epoch": 0.34, "learning_rate": 0.00031629675440442536, "loss": 1.5517, "step": 675 }, { "epoch": 0.34, "learning_rate": 0.00031602769852312983, "loss": 1.6271, "step": 676 }, { "epoch": 0.34, "learning_rate": 0.00031575832576018437, "loss": 1.6055, "step": 677 }, { "epoch": 0.34, "learning_rate": 0.00031548863685126926, "loss": 1.7491, "step": 678 }, { "epoch": 0.34, "learning_rate": 0.00031521863253292814, "loss": 1.6137, "step": 679 }, { "epoch": 0.34, "learning_rate": 0.00031494831354256605, "loss": 1.5847, "step": 680 }, { "epoch": 0.34, "learning_rate": 0.00031467768061844753, "loss": 1.3082, "step": 681 }, { "epoch": 0.34, "learning_rate": 0.0003144067344996944, "loss": 1.6252, "step": 682 }, { "epoch": 0.34, "learning_rate": 0.0003141354759262839, "loss": 1.5987, "step": 683 }, { "epoch": 0.34, "learning_rate": 0.0003138639056390465, "loss": 1.4824, "step": 684 }, { "epoch": 0.34, "learning_rate": 0.0003135920243796641, "loss": 1.3903, "step": 685 }, { "epoch": 0.34, "learning_rate": 0.000313319832890668, "loss": 1.5826, "step": 686 }, { "epoch": 0.34, "learning_rate": 0.0003130473319154365, "loss": 1.5808, "step": 687 }, { "epoch": 0.34, "learning_rate": 0.00031277452219819325, "loss": 1.6244, "step": 688 }, { "epoch": 0.34, "learning_rate": 0.0003125014044840051, "loss": 1.5069, "step": 689 }, { "epoch": 0.34, "learning_rate": 0.00031222797951878026, "loss": 1.6237, "step": 690 }, { "epoch": 0.35, "learning_rate": 0.00031195424804926567, "loss": 1.4301, "step": 691 }, { "epoch": 0.35, "learning_rate": 0.00031168021082304565, "loss": 1.5819, "step": 692 }, { "epoch": 0.35, "learning_rate": 0.0003114058685885396, "loss": 1.8096, "step": 693 }, { "epoch": 0.35, "learning_rate": 0.0003111312220949996, "loss": 1.4657, "step": 694 }, { "epoch": 0.35, "learning_rate": 0.00031085627209250915, "loss": 1.6379, "step": 695 }, { "epoch": 0.35, "learning_rate": 0.00031058101933198023, "loss": 1.4865, "step": 696 }, { "epoch": 0.35, "learning_rate": 0.00031030546456515195, "loss": 1.3837, "step": 697 }, { "epoch": 0.35, "learning_rate": 0.0003100296085445881, "loss": 1.6494, "step": 698 }, { "epoch": 0.35, "learning_rate": 0.0003097534520236754, "loss": 1.7428, "step": 699 }, { "epoch": 0.35, "learning_rate": 0.00030947699575662087, "loss": 1.5992, "step": 700 }, { "epoch": 0.35, "learning_rate": 0.0003092002404984506, "loss": 1.6187, "step": 701 }, { "epoch": 0.35, "learning_rate": 0.00030892318700500703, "loss": 1.7533, "step": 702 }, { "epoch": 0.35, "learning_rate": 0.0003086458360329471, "loss": 1.6341, "step": 703 }, { "epoch": 0.35, "learning_rate": 0.0003083681883397403, "loss": 1.6211, "step": 704 }, { "epoch": 0.35, "learning_rate": 0.00030809024468366635, "loss": 1.5643, "step": 705 }, { "epoch": 0.35, "learning_rate": 0.00030781200582381336, "loss": 1.5116, "step": 706 }, { "epoch": 0.35, "learning_rate": 0.0003075334725200757, "loss": 1.627, "step": 707 }, { "epoch": 0.35, "learning_rate": 0.00030725464553315186, "loss": 1.8024, "step": 708 }, { "epoch": 0.35, "learning_rate": 0.00030697552562454223, "loss": 1.4348, "step": 709 }, { "epoch": 0.35, "learning_rate": 0.00030669611355654743, "loss": 1.4995, "step": 710 }, { "epoch": 0.36, "learning_rate": 0.000306416410092266, "loss": 1.6125, "step": 711 }, { "epoch": 0.36, "learning_rate": 0.0003061364159955921, "loss": 1.6559, "step": 712 }, { "epoch": 0.36, "learning_rate": 0.0003058561320312139, "loss": 1.469, "step": 713 }, { "epoch": 0.36, "learning_rate": 0.00030557555896461086, "loss": 1.404, "step": 714 }, { "epoch": 0.36, "learning_rate": 0.0003052946975620524, "loss": 1.6919, "step": 715 }, { "epoch": 0.36, "learning_rate": 0.0003050135485905951, "loss": 1.5993, "step": 716 }, { "epoch": 0.36, "learning_rate": 0.0003047321128180812, "loss": 1.5386, "step": 717 }, { "epoch": 0.36, "learning_rate": 0.00030445039101313593, "loss": 1.472, "step": 718 }, { "epoch": 0.36, "learning_rate": 0.00030416838394516587, "loss": 1.413, "step": 719 }, { "epoch": 0.36, "learning_rate": 0.0003038860923843567, "loss": 1.6552, "step": 720 }, { "epoch": 0.36, "learning_rate": 0.00030360351710167094, "loss": 1.4909, "step": 721 }, { "epoch": 0.36, "learning_rate": 0.0003033206588688461, "loss": 1.4127, "step": 722 }, { "epoch": 0.36, "learning_rate": 0.0003030375184583923, "loss": 1.4671, "step": 723 }, { "epoch": 0.36, "learning_rate": 0.00030275409664359056, "loss": 1.6795, "step": 724 }, { "epoch": 0.36, "learning_rate": 0.00030247039419849025, "loss": 1.6729, "step": 725 }, { "epoch": 0.36, "learning_rate": 0.0003021864118979071, "loss": 1.4749, "step": 726 }, { "epoch": 0.36, "learning_rate": 0.0003019021505174215, "loss": 1.6014, "step": 727 }, { "epoch": 0.36, "learning_rate": 0.0003016176108333756, "loss": 1.6341, "step": 728 }, { "epoch": 0.36, "learning_rate": 0.00030133279362287187, "loss": 1.823, "step": 729 }, { "epoch": 0.36, "learning_rate": 0.0003010476996637706, "loss": 1.5576, "step": 730 }, { "epoch": 0.37, "learning_rate": 0.0003007623297346881, "loss": 1.729, "step": 731 }, { "epoch": 0.37, "learning_rate": 0.00030047668461499413, "loss": 1.4666, "step": 732 }, { "epoch": 0.37, "learning_rate": 0.0003001907650848103, "loss": 1.5468, "step": 733 }, { "epoch": 0.37, "learning_rate": 0.0002999045719250074, "loss": 1.6001, "step": 734 }, { "epoch": 0.37, "learning_rate": 0.00029961810591720364, "loss": 1.6967, "step": 735 }, { "epoch": 0.37, "learning_rate": 0.00029933136784376235, "loss": 1.5668, "step": 736 }, { "epoch": 0.37, "learning_rate": 0.00029904435848779, "loss": 1.5286, "step": 737 }, { "epoch": 0.37, "learning_rate": 0.0002987570786331339, "loss": 1.6096, "step": 738 }, { "epoch": 0.37, "learning_rate": 0.00029846952906438, "loss": 1.5456, "step": 739 }, { "epoch": 0.37, "learning_rate": 0.00029818171056685103, "loss": 1.6902, "step": 740 }, { "epoch": 0.37, "learning_rate": 0.0002978936239266042, "loss": 1.7138, "step": 741 }, { "epoch": 0.37, "learning_rate": 0.00029760526993042886, "loss": 1.5856, "step": 742 }, { "epoch": 0.37, "learning_rate": 0.00029731664936584473, "loss": 1.464, "step": 743 }, { "epoch": 0.37, "learning_rate": 0.00029702776302109943, "loss": 1.4777, "step": 744 }, { "epoch": 0.37, "learning_rate": 0.00029673861168516634, "loss": 1.6905, "step": 745 }, { "epoch": 0.37, "learning_rate": 0.0002964491961477429, "loss": 1.54, "step": 746 }, { "epoch": 0.37, "learning_rate": 0.00029615951719924783, "loss": 1.6324, "step": 747 }, { "epoch": 0.37, "learning_rate": 0.00029586957563081925, "loss": 1.5705, "step": 748 }, { "epoch": 0.37, "learning_rate": 0.0002955793722343127, "loss": 1.7081, "step": 749 }, { "epoch": 0.37, "learning_rate": 0.0002952889078022985, "loss": 1.5256, "step": 750 }, { "epoch": 0.38, "learning_rate": 0.0002949981831280602, "loss": 1.4953, "step": 751 }, { "epoch": 0.38, "learning_rate": 0.000294707199005592, "loss": 1.51, "step": 752 }, { "epoch": 0.38, "learning_rate": 0.00029441595622959656, "loss": 1.5811, "step": 753 }, { "epoch": 0.38, "learning_rate": 0.000294124455595483, "loss": 1.5375, "step": 754 }, { "epoch": 0.38, "learning_rate": 0.00029383269789936466, "loss": 1.7053, "step": 755 }, { "epoch": 0.38, "learning_rate": 0.0002935406839380571, "loss": 1.4679, "step": 756 }, { "epoch": 0.38, "learning_rate": 0.0002932484145090755, "loss": 1.6812, "step": 757 }, { "epoch": 0.38, "learning_rate": 0.0002929558904106329, "loss": 1.5965, "step": 758 }, { "epoch": 0.38, "learning_rate": 0.00029266311244163784, "loss": 1.7395, "step": 759 }, { "epoch": 0.38, "learning_rate": 0.00029237008140169227, "loss": 1.5782, "step": 760 }, { "epoch": 0.38, "learning_rate": 0.0002920767980910891, "loss": 1.6442, "step": 761 }, { "epoch": 0.38, "learning_rate": 0.00029178326331081043, "loss": 1.5455, "step": 762 }, { "epoch": 0.38, "learning_rate": 0.0002914894778625251, "loss": 1.3559, "step": 763 }, { "epoch": 0.38, "learning_rate": 0.00029119544254858643, "loss": 1.4975, "step": 764 }, { "epoch": 0.38, "learning_rate": 0.0002909011581720302, "loss": 1.6059, "step": 765 }, { "epoch": 0.38, "learning_rate": 0.0002906066255365724, "loss": 1.6428, "step": 766 }, { "epoch": 0.38, "learning_rate": 0.00029031184544660717, "loss": 1.6501, "step": 767 }, { "epoch": 0.38, "learning_rate": 0.00029001681870720434, "loss": 1.4806, "step": 768 }, { "epoch": 0.38, "learning_rate": 0.0002897215461241072, "loss": 1.4095, "step": 769 }, { "epoch": 0.38, "learning_rate": 0.00028942602850373086, "loss": 1.6663, "step": 770 }, { "epoch": 0.39, "learning_rate": 0.0002891302666531592, "loss": 1.8152, "step": 771 }, { "epoch": 0.39, "learning_rate": 0.0002888342613801436, "loss": 1.7046, "step": 772 }, { "epoch": 0.39, "learning_rate": 0.00028853801349309983, "loss": 1.6848, "step": 773 }, { "epoch": 0.39, "learning_rate": 0.00028824152380110645, "loss": 1.6858, "step": 774 }, { "epoch": 0.39, "learning_rate": 0.0002879447931139023, "loss": 1.5137, "step": 775 }, { "epoch": 0.39, "learning_rate": 0.00028764782224188453, "loss": 1.6079, "step": 776 }, { "epoch": 0.39, "learning_rate": 0.00028735061199610626, "loss": 1.7438, "step": 777 }, { "epoch": 0.39, "learning_rate": 0.0002870531631882742, "loss": 1.6403, "step": 778 }, { "epoch": 0.39, "learning_rate": 0.0002867554766307468, "loss": 1.6697, "step": 779 }, { "epoch": 0.39, "learning_rate": 0.0002864575531365316, "loss": 1.513, "step": 780 }, { "epoch": 0.39, "learning_rate": 0.00028615939351928337, "loss": 1.5807, "step": 781 }, { "epoch": 0.39, "learning_rate": 0.00028586099859330183, "loss": 1.5632, "step": 782 }, { "epoch": 0.39, "learning_rate": 0.00028556236917352926, "loss": 1.7058, "step": 783 }, { "epoch": 0.39, "learning_rate": 0.00028526350607554823, "loss": 1.5961, "step": 784 }, { "epoch": 0.39, "learning_rate": 0.0002849644101155797, "loss": 1.6368, "step": 785 }, { "epoch": 0.39, "learning_rate": 0.0002846650821104805, "loss": 1.5342, "step": 786 }, { "epoch": 0.39, "learning_rate": 0.0002843655228777413, "loss": 1.6549, "step": 787 }, { "epoch": 0.39, "learning_rate": 0.0002840657332354841, "loss": 1.5317, "step": 788 }, { "epoch": 0.39, "learning_rate": 0.00028376571400246035, "loss": 1.687, "step": 789 }, { "epoch": 0.39, "learning_rate": 0.0002834654659980484, "loss": 1.421, "step": 790 }, { "epoch": 0.4, "learning_rate": 0.0002831649900422514, "loss": 1.639, "step": 791 }, { "epoch": 0.4, "learning_rate": 0.00028286428695569523, "loss": 1.62, "step": 792 }, { "epoch": 0.4, "learning_rate": 0.00028256335755962584, "loss": 1.5643, "step": 793 }, { "epoch": 0.4, "learning_rate": 0.0002822622026759074, "loss": 1.6884, "step": 794 }, { "epoch": 0.4, "learning_rate": 0.00028196082312701977, "loss": 1.7462, "step": 795 }, { "epoch": 0.4, "learning_rate": 0.0002816592197360566, "loss": 1.6372, "step": 796 }, { "epoch": 0.4, "learning_rate": 0.00028135739332672274, "loss": 1.5019, "step": 797 }, { "epoch": 0.4, "learning_rate": 0.0002810553447233321, "loss": 1.5365, "step": 798 }, { "epoch": 0.4, "learning_rate": 0.0002807530747508056, "loss": 1.6709, "step": 799 }, { "epoch": 0.4, "learning_rate": 0.0002804505842346684, "loss": 1.5888, "step": 800 }, { "epoch": 0.4, "learning_rate": 0.00028014787400104825, "loss": 1.6876, "step": 801 }, { "epoch": 0.4, "learning_rate": 0.00027984494487667296, "loss": 1.5555, "step": 802 }, { "epoch": 0.4, "learning_rate": 0.000279541797688868, "loss": 1.762, "step": 803 }, { "epoch": 0.4, "learning_rate": 0.00027923843326555463, "loss": 1.5098, "step": 804 }, { "epoch": 0.4, "learning_rate": 0.00027893485243524706, "loss": 1.7013, "step": 805 }, { "epoch": 0.4, "learning_rate": 0.0002786310560270509, "loss": 1.5521, "step": 806 }, { "epoch": 0.4, "learning_rate": 0.0002783270448706601, "loss": 1.6102, "step": 807 }, { "epoch": 0.4, "learning_rate": 0.00027802281979635564, "loss": 1.5667, "step": 808 }, { "epoch": 0.4, "learning_rate": 0.00027771838163500223, "loss": 1.3938, "step": 809 }, { "epoch": 0.4, "learning_rate": 0.00027741373121804684, "loss": 1.5571, "step": 810 }, { "epoch": 0.41, "learning_rate": 0.0002771088693775159, "loss": 1.4687, "step": 811 }, { "epoch": 0.41, "learning_rate": 0.0002768037969460135, "loss": 1.6164, "step": 812 }, { "epoch": 0.41, "learning_rate": 0.0002764985147567187, "loss": 1.5342, "step": 813 }, { "epoch": 0.41, "learning_rate": 0.0002761930236433836, "loss": 1.675, "step": 814 }, { "epoch": 0.41, "learning_rate": 0.00027588732444033066, "loss": 1.5154, "step": 815 }, { "epoch": 0.41, "learning_rate": 0.00027558141798245074, "loss": 1.6015, "step": 816 }, { "epoch": 0.41, "learning_rate": 0.0002752753051052007, "loss": 1.6213, "step": 817 }, { "epoch": 0.41, "learning_rate": 0.0002749689866446015, "loss": 1.7362, "step": 818 }, { "epoch": 0.41, "learning_rate": 0.000274662463437235, "loss": 1.5406, "step": 819 }, { "epoch": 0.41, "learning_rate": 0.0002743557363202427, "loss": 1.5625, "step": 820 }, { "epoch": 0.41, "learning_rate": 0.0002740488061313225, "loss": 1.7225, "step": 821 }, { "epoch": 0.41, "learning_rate": 0.00027374167370872746, "loss": 1.5959, "step": 822 }, { "epoch": 0.41, "learning_rate": 0.00027343433989126273, "loss": 1.5027, "step": 823 }, { "epoch": 0.41, "learning_rate": 0.00027312680551828337, "loss": 1.5802, "step": 824 }, { "epoch": 0.41, "learning_rate": 0.0002728190714296923, "loss": 1.6822, "step": 825 }, { "epoch": 0.41, "learning_rate": 0.00027251113846593785, "loss": 1.5289, "step": 826 }, { "epoch": 0.41, "learning_rate": 0.0002722030074680114, "loss": 1.6296, "step": 827 }, { "epoch": 0.41, "learning_rate": 0.0002718946792774455, "loss": 1.6557, "step": 828 }, { "epoch": 0.41, "learning_rate": 0.0002715861547363109, "loss": 1.5834, "step": 829 }, { "epoch": 0.41, "learning_rate": 0.00027127743468721466, "loss": 1.4457, "step": 830 }, { "epoch": 0.42, "learning_rate": 0.00027096851997329794, "loss": 1.5919, "step": 831 }, { "epoch": 0.42, "learning_rate": 0.0002706594114382335, "loss": 1.7794, "step": 832 }, { "epoch": 0.42, "learning_rate": 0.0002703501099262233, "loss": 1.5044, "step": 833 }, { "epoch": 0.42, "learning_rate": 0.00027004061628199645, "loss": 1.4793, "step": 834 }, { "epoch": 0.42, "learning_rate": 0.00026973093135080684, "loss": 1.4537, "step": 835 }, { "epoch": 0.42, "learning_rate": 0.00026942105597843076, "loss": 1.5793, "step": 836 }, { "epoch": 0.42, "learning_rate": 0.00026911099101116444, "loss": 1.5592, "step": 837 }, { "epoch": 0.42, "learning_rate": 0.00026880073729582213, "loss": 1.4737, "step": 838 }, { "epoch": 0.42, "learning_rate": 0.0002684902956797335, "loss": 1.5163, "step": 839 }, { "epoch": 0.42, "learning_rate": 0.0002681796670107413, "loss": 1.3649, "step": 840 }, { "epoch": 0.42, "learning_rate": 0.0002678688521371993, "loss": 1.6616, "step": 841 }, { "epoch": 0.42, "learning_rate": 0.00026755785190796965, "loss": 1.5122, "step": 842 }, { "epoch": 0.42, "learning_rate": 0.0002672466671724208, "loss": 1.6713, "step": 843 }, { "epoch": 0.42, "learning_rate": 0.0002669352987804251, "loss": 1.594, "step": 844 }, { "epoch": 0.42, "learning_rate": 0.00026662374758235655, "loss": 1.5373, "step": 845 }, { "epoch": 0.42, "learning_rate": 0.0002663120144290883, "loss": 1.3893, "step": 846 }, { "epoch": 0.42, "learning_rate": 0.0002660001001719904, "loss": 1.6124, "step": 847 }, { "epoch": 0.42, "learning_rate": 0.00026568800566292763, "loss": 1.5687, "step": 848 }, { "epoch": 0.42, "learning_rate": 0.000265375731754257, "loss": 1.2931, "step": 849 }, { "epoch": 0.42, "learning_rate": 0.0002650632792988255, "loss": 1.5906, "step": 850 }, { "epoch": 0.43, "learning_rate": 0.00026475064914996773, "loss": 1.5324, "step": 851 }, { "epoch": 0.43, "learning_rate": 0.0002644378421615036, "loss": 1.6167, "step": 852 }, { "epoch": 0.43, "learning_rate": 0.00026412485918773595, "loss": 1.6344, "step": 853 }, { "epoch": 0.43, "learning_rate": 0.00026381170108344827, "loss": 1.5958, "step": 854 }, { "epoch": 0.43, "learning_rate": 0.00026349836870390235, "loss": 1.4789, "step": 855 }, { "epoch": 0.43, "learning_rate": 0.00026318486290483593, "loss": 1.7105, "step": 856 }, { "epoch": 0.43, "learning_rate": 0.00026287118454246033, "loss": 1.4852, "step": 857 }, { "epoch": 0.43, "learning_rate": 0.00026255733447345833, "loss": 1.6808, "step": 858 }, { "epoch": 0.43, "learning_rate": 0.0002622433135549814, "loss": 1.5825, "step": 859 }, { "epoch": 0.43, "learning_rate": 0.00026192912264464785, "loss": 1.6492, "step": 860 }, { "epoch": 0.43, "learning_rate": 0.00026161476260054014, "loss": 1.5621, "step": 861 }, { "epoch": 0.43, "learning_rate": 0.0002613002342812026, "loss": 1.6808, "step": 862 }, { "epoch": 0.43, "learning_rate": 0.00026098553854563916, "loss": 1.5637, "step": 863 }, { "epoch": 0.43, "learning_rate": 0.00026067067625331117, "loss": 1.6629, "step": 864 }, { "epoch": 0.43, "learning_rate": 0.0002603556482641345, "loss": 1.3583, "step": 865 }, { "epoch": 0.43, "learning_rate": 0.00026004045543847796, "loss": 1.6123, "step": 866 }, { "epoch": 0.43, "learning_rate": 0.00025972509863716016, "loss": 1.4788, "step": 867 }, { "epoch": 0.43, "learning_rate": 0.0002594095787214478, "loss": 1.5373, "step": 868 }, { "epoch": 0.43, "learning_rate": 0.00025909389655305305, "loss": 1.5852, "step": 869 }, { "epoch": 0.43, "learning_rate": 0.000258778052994131, "loss": 1.5569, "step": 870 }, { "epoch": 0.44, "learning_rate": 0.0002584620489072777, "loss": 1.54, "step": 871 }, { "epoch": 0.44, "learning_rate": 0.00025814588515552753, "loss": 1.4899, "step": 872 }, { "epoch": 0.44, "learning_rate": 0.000257829562602351, "loss": 1.4598, "step": 873 }, { "epoch": 0.44, "learning_rate": 0.00025751308211165223, "loss": 1.5688, "step": 874 }, { "epoch": 0.44, "learning_rate": 0.0002571964445477668, "loss": 1.4195, "step": 875 }, { "epoch": 0.44, "learning_rate": 0.0002568796507754592, "loss": 1.6566, "step": 876 }, { "epoch": 0.44, "learning_rate": 0.0002565627016599205, "loss": 1.406, "step": 877 }, { "epoch": 0.44, "learning_rate": 0.00025624559806676603, "loss": 1.827, "step": 878 }, { "epoch": 0.44, "learning_rate": 0.00025592834086203315, "loss": 1.4952, "step": 879 }, { "epoch": 0.44, "learning_rate": 0.0002556109309121786, "loss": 1.4737, "step": 880 }, { "epoch": 0.44, "learning_rate": 0.0002552933690840762, "loss": 1.4676, "step": 881 }, { "epoch": 0.44, "learning_rate": 0.0002549756562450149, "loss": 1.5686, "step": 882 }, { "epoch": 0.44, "learning_rate": 0.0002546577932626957, "loss": 1.639, "step": 883 }, { "epoch": 0.44, "learning_rate": 0.0002543397810052299, "loss": 1.6325, "step": 884 }, { "epoch": 0.44, "learning_rate": 0.00025402162034113637, "loss": 1.4458, "step": 885 }, { "epoch": 0.44, "learning_rate": 0.00025370331213933926, "loss": 1.4646, "step": 886 }, { "epoch": 0.44, "learning_rate": 0.0002533848572691658, "loss": 1.7105, "step": 887 }, { "epoch": 0.44, "learning_rate": 0.00025306625660034365, "loss": 1.6741, "step": 888 }, { "epoch": 0.44, "learning_rate": 0.0002527475110029988, "loss": 1.6431, "step": 889 }, { "epoch": 0.44, "learning_rate": 0.0002524286213476529, "loss": 1.7294, "step": 890 }, { "epoch": 0.45, "learning_rate": 0.00025210958850522104, "loss": 1.5165, "step": 891 }, { "epoch": 0.45, "learning_rate": 0.0002517904133470095, "loss": 1.5219, "step": 892 }, { "epoch": 0.45, "learning_rate": 0.00025147109674471317, "loss": 1.3967, "step": 893 }, { "epoch": 0.45, "learning_rate": 0.0002511516395704132, "loss": 1.6214, "step": 894 }, { "epoch": 0.45, "learning_rate": 0.00025083204269657467, "loss": 1.6539, "step": 895 }, { "epoch": 0.45, "learning_rate": 0.0002505123069960442, "loss": 1.5878, "step": 896 }, { "epoch": 0.45, "learning_rate": 0.0002501924333420475, "loss": 1.6149, "step": 897 }, { "epoch": 0.45, "learning_rate": 0.0002498724226081872, "loss": 1.6648, "step": 898 }, { "epoch": 0.45, "learning_rate": 0.0002495522756684402, "loss": 1.4552, "step": 899 }, { "epoch": 0.45, "learning_rate": 0.00024923199339715543, "loss": 1.531, "step": 900 }, { "epoch": 0.45, "learning_rate": 0.0002489115766690513, "loss": 1.6431, "step": 901 }, { "epoch": 0.45, "learning_rate": 0.0002485910263592135, "loss": 1.5006, "step": 902 }, { "epoch": 0.45, "learning_rate": 0.00024827034334309265, "loss": 1.4951, "step": 903 }, { "epoch": 0.45, "learning_rate": 0.00024794952849650174, "loss": 1.3887, "step": 904 }, { "epoch": 0.45, "learning_rate": 0.0002476285826956138, "loss": 1.593, "step": 905 }, { "epoch": 0.45, "learning_rate": 0.0002473075068169593, "loss": 1.4254, "step": 906 }, { "epoch": 0.45, "learning_rate": 0.00024698630173742436, "loss": 1.5903, "step": 907 }, { "epoch": 0.45, "learning_rate": 0.0002466649683342477, "loss": 1.5224, "step": 908 }, { "epoch": 0.45, "learning_rate": 0.0002466649683342477, "loss": 1.6739, "step": 909 }, { "epoch": 0.45, "learning_rate": 0.0002463435074850184, "loss": 1.4916, "step": 910 }, { "epoch": 0.46, "learning_rate": 0.000246021920067674, "loss": 1.3713, "step": 911 }, { "epoch": 0.46, "learning_rate": 0.0002457002069604973, "loss": 1.3699, "step": 912 }, { "epoch": 0.46, "learning_rate": 0.0002453783690421146, "loss": 1.3908, "step": 913 }, { "epoch": 0.46, "learning_rate": 0.000245056407191493, "loss": 1.4784, "step": 914 }, { "epoch": 0.46, "learning_rate": 0.00024473432228793807, "loss": 1.5448, "step": 915 }, { "epoch": 0.46, "learning_rate": 0.0002444121152110915, "loss": 1.4938, "step": 916 }, { "epoch": 0.46, "learning_rate": 0.00024408978684092847, "loss": 1.5958, "step": 917 }, { "epoch": 0.46, "learning_rate": 0.00024376733805775574, "loss": 1.5453, "step": 918 }, { "epoch": 0.46, "learning_rate": 0.00024344476974220855, "loss": 1.4917, "step": 919 }, { "epoch": 0.46, "learning_rate": 0.00024312208277524892, "loss": 1.4726, "step": 920 }, { "epoch": 0.46, "learning_rate": 0.00024279927803816276, "loss": 1.7581, "step": 921 }, { "epoch": 0.46, "learning_rate": 0.00024247635641255766, "loss": 1.517, "step": 922 }, { "epoch": 0.46, "learning_rate": 0.00024215331878036037, "loss": 1.5761, "step": 923 }, { "epoch": 0.46, "learning_rate": 0.00024183016602381447, "loss": 1.3919, "step": 924 }, { "epoch": 0.46, "learning_rate": 0.00024150689902547811, "loss": 1.6481, "step": 925 }, { "epoch": 0.46, "learning_rate": 0.00024118351866822137, "loss": 1.5396, "step": 926 }, { "epoch": 0.46, "learning_rate": 0.00024086002583522382, "loss": 1.6246, "step": 927 }, { "epoch": 0.46, "learning_rate": 0.00024053642140997225, "loss": 1.4913, "step": 928 }, { "epoch": 0.46, "learning_rate": 0.00024021270627625825, "loss": 1.6974, "step": 929 }, { "epoch": 0.46, "learning_rate": 0.00023988888131817583, "loss": 1.727, "step": 930 }, { "epoch": 0.47, "learning_rate": 0.0002395649474201189, "loss": 1.4335, "step": 931 }, { "epoch": 0.47, "learning_rate": 0.0002392409054667788, "loss": 1.7088, "step": 932 }, { "epoch": 0.47, "learning_rate": 0.00023891675634314202, "loss": 1.6817, "step": 933 }, { "epoch": 0.47, "learning_rate": 0.00023859250093448783, "loss": 1.7091, "step": 934 }, { "epoch": 0.47, "learning_rate": 0.00023826814012638568, "loss": 1.5044, "step": 935 }, { "epoch": 0.47, "learning_rate": 0.00023794367480469295, "loss": 1.6067, "step": 936 }, { "epoch": 0.47, "learning_rate": 0.0002376191058555524, "loss": 1.6023, "step": 937 }, { "epoch": 0.47, "learning_rate": 0.00023729443416538982, "loss": 1.5024, "step": 938 }, { "epoch": 0.47, "learning_rate": 0.00023696966062091148, "loss": 1.6559, "step": 939 }, { "epoch": 0.47, "learning_rate": 0.00023664478610910207, "loss": 1.6554, "step": 940 }, { "epoch": 0.47, "learning_rate": 0.0002363198115172219, "loss": 1.5379, "step": 941 }, { "epoch": 0.47, "learning_rate": 0.00023599473773280454, "loss": 1.4307, "step": 942 }, { "epoch": 0.47, "learning_rate": 0.0002356695656436546, "loss": 1.6144, "step": 943 }, { "epoch": 0.47, "learning_rate": 0.00023534429613784497, "loss": 1.6197, "step": 944 }, { "epoch": 0.47, "learning_rate": 0.00023501893010371476, "loss": 1.6309, "step": 945 }, { "epoch": 0.47, "learning_rate": 0.00023469346842986677, "loss": 1.5075, "step": 946 }, { "epoch": 0.47, "learning_rate": 0.0002343679120051648, "loss": 1.6614, "step": 947 }, { "epoch": 0.47, "learning_rate": 0.00023404226171873157, "loss": 1.4659, "step": 948 }, { "epoch": 0.47, "learning_rate": 0.00023371651845994603, "loss": 1.3864, "step": 949 }, { "epoch": 0.47, "learning_rate": 0.00023339068311844114, "loss": 1.3556, "step": 950 }, { "epoch": 0.48, "learning_rate": 0.0002330647565841013, "loss": 1.5859, "step": 951 }, { "epoch": 0.48, "learning_rate": 0.0002327387397470601, "loss": 1.6599, "step": 952 }, { "epoch": 0.48, "learning_rate": 0.00023241263349769748, "loss": 1.5984, "step": 953 }, { "epoch": 0.48, "learning_rate": 0.0002320864387266378, "loss": 1.5987, "step": 954 }, { "epoch": 0.48, "learning_rate": 0.00023176015632474703, "loss": 1.727, "step": 955 }, { "epoch": 0.48, "learning_rate": 0.00023143378718313066, "loss": 1.4925, "step": 956 }, { "epoch": 0.48, "learning_rate": 0.00023110733219313087, "loss": 1.5061, "step": 957 }, { "epoch": 0.48, "learning_rate": 0.0002307807922463245, "loss": 1.6809, "step": 958 }, { "epoch": 0.48, "learning_rate": 0.00023045416823452023, "loss": 1.5185, "step": 959 }, { "epoch": 0.48, "learning_rate": 0.00023012746104975632, "loss": 1.5064, "step": 960 }, { "epoch": 0.48, "learning_rate": 0.00022980067158429832, "loss": 1.4743, "step": 961 }, { "epoch": 0.48, "learning_rate": 0.00022947380073063656, "loss": 1.5773, "step": 962 }, { "epoch": 0.48, "learning_rate": 0.00022914684938148342, "loss": 1.3633, "step": 963 }, { "epoch": 0.48, "learning_rate": 0.00022881981842977117, "loss": 1.6052, "step": 964 }, { "epoch": 0.48, "learning_rate": 0.00022849270876864965, "loss": 1.5255, "step": 965 }, { "epoch": 0.48, "learning_rate": 0.00022816552129148354, "loss": 1.7143, "step": 966 }, { "epoch": 0.48, "learning_rate": 0.00022783825689184998, "loss": 1.5166, "step": 967 }, { "epoch": 0.48, "learning_rate": 0.00022751091646353632, "loss": 1.5318, "step": 968 }, { "epoch": 0.48, "learning_rate": 0.00022718350090053752, "loss": 1.512, "step": 969 }, { "epoch": 0.48, "learning_rate": 0.00022685601109705364, "loss": 1.5758, "step": 970 }, { "epoch": 0.49, "learning_rate": 0.00022652844794748765, "loss": 1.4, "step": 971 }, { "epoch": 0.49, "learning_rate": 0.0002262008123464427, "loss": 1.5296, "step": 972 }, { "epoch": 0.49, "learning_rate": 0.0002258731051887199, "loss": 1.7131, "step": 973 }, { "epoch": 0.49, "learning_rate": 0.00022554532736931577, "loss": 1.5665, "step": 974 }, { "epoch": 0.49, "learning_rate": 0.00022521747978341972, "loss": 1.4738, "step": 975 }, { "epoch": 0.49, "learning_rate": 0.00022488956332641192, "loss": 1.6389, "step": 976 }, { "epoch": 0.49, "learning_rate": 0.00022456157889386033, "loss": 1.6043, "step": 977 }, { "epoch": 0.49, "learning_rate": 0.00022423352738151886, "loss": 1.5159, "step": 978 }, { "epoch": 0.49, "learning_rate": 0.00022390540968532442, "loss": 1.5425, "step": 979 }, { "epoch": 0.49, "learning_rate": 0.0002235772267013947, "loss": 1.5404, "step": 980 }, { "epoch": 0.49, "learning_rate": 0.00022324897932602574, "loss": 1.6195, "step": 981 }, { "epoch": 0.49, "learning_rate": 0.0002229206684556895, "loss": 1.5286, "step": 982 }, { "epoch": 0.49, "learning_rate": 0.0002225922949870311, "loss": 1.4833, "step": 983 }, { "epoch": 0.49, "learning_rate": 0.00022226385981686706, "loss": 1.5749, "step": 984 }, { "epoch": 0.49, "learning_rate": 0.00022193536384218195, "loss": 1.718, "step": 985 }, { "epoch": 0.49, "learning_rate": 0.00022160680796012665, "loss": 1.4774, "step": 986 }, { "epoch": 0.49, "learning_rate": 0.00022127819306801567, "loss": 1.4944, "step": 987 }, { "epoch": 0.49, "learning_rate": 0.00022094952006332453, "loss": 1.6471, "step": 988 }, { "epoch": 0.49, "learning_rate": 0.00022062078984368756, "loss": 1.6706, "step": 989 }, { "epoch": 0.49, "learning_rate": 0.00022029200330689545, "loss": 1.5164, "step": 990 }, { "epoch": 0.5, "learning_rate": 0.00021996316135089239, "loss": 1.6634, "step": 991 }, { "epoch": 0.5, "learning_rate": 0.00021963426487377433, "loss": 1.7244, "step": 992 }, { "epoch": 0.5, "learning_rate": 0.00021930531477378572, "loss": 1.4838, "step": 993 }, { "epoch": 0.5, "learning_rate": 0.0002189763119493178, "loss": 1.6398, "step": 994 }, { "epoch": 0.5, "learning_rate": 0.00021864725729890555, "loss": 1.3957, "step": 995 }, { "epoch": 0.5, "learning_rate": 0.0002183181517212256, "loss": 1.6115, "step": 996 }, { "epoch": 0.5, "learning_rate": 0.00021798899611509377, "loss": 1.7738, "step": 997 }, { "epoch": 0.5, "learning_rate": 0.00021765979137946233, "loss": 1.6396, "step": 998 }, { "epoch": 0.5, "learning_rate": 0.00021733053841341775, "loss": 1.5969, "step": 999 }, { "epoch": 0.5, "learning_rate": 0.00021700123811617834, "loss": 1.4756, "step": 1000 } ], "logging_steps": 1, "max_steps": 2001, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.9365667152278323e+17, "trial_name": null, "trial_params": null }