{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 168, "global_step": 671, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1e-05, "loss": 1.3745, "step": 1 }, { "epoch": 0.0, "eval_loss": 1.6296857595443726, "eval_runtime": 2.6662, "eval_samples_per_second": 409.572, "eval_steps_per_second": 25.88, "step": 1 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 1.42, "step": 2 }, { "epoch": 0.0, "learning_rate": 3e-05, "loss": 1.3057, "step": 3 }, { "epoch": 0.01, "learning_rate": 4e-05, "loss": 1.2307, "step": 4 }, { "epoch": 0.01, "learning_rate": 5e-05, "loss": 1.289, "step": 5 }, { "epoch": 0.01, "learning_rate": 6e-05, "loss": 1.4111, "step": 6 }, { "epoch": 0.01, "learning_rate": 7e-05, "loss": 1.3089, "step": 7 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 1.3204, "step": 8 }, { "epoch": 0.01, "learning_rate": 9e-05, "loss": 1.3575, "step": 9 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 1.3279, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.00011000000000000002, "loss": 1.3149, "step": 11 }, { "epoch": 0.02, "learning_rate": 0.00012, "loss": 1.2578, "step": 12 }, { "epoch": 0.02, "learning_rate": 0.00013000000000000002, "loss": 1.2849, "step": 13 }, { "epoch": 0.02, "learning_rate": 0.00014, "loss": 1.2971, "step": 14 }, { "epoch": 0.02, "learning_rate": 0.00015000000000000001, "loss": 1.1473, "step": 15 }, { "epoch": 0.02, "learning_rate": 0.00016, "loss": 1.1943, "step": 16 }, { "epoch": 0.03, "learning_rate": 0.00017, "loss": 1.1877, "step": 17 }, { "epoch": 0.03, "learning_rate": 0.00018, "loss": 1.1984, "step": 18 }, { "epoch": 0.03, "learning_rate": 0.00019, "loss": 1.2647, "step": 19 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 1.217, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.00019999993046535236, "loss": 1.0274, "step": 21 }, { "epoch": 0.03, "learning_rate": 0.00019999972186150606, "loss": 1.2122, "step": 22 }, { "epoch": 0.03, "learning_rate": 0.00019999937418875124, "loss": 1.1868, "step": 23 }, { "epoch": 0.04, "learning_rate": 0.00019999888744757143, "loss": 1.2345, "step": 24 }, { "epoch": 0.04, "learning_rate": 0.00019999826163864348, "loss": 1.2127, "step": 25 }, { "epoch": 0.04, "learning_rate": 0.00019999749676283775, "loss": 1.2114, "step": 26 }, { "epoch": 0.04, "learning_rate": 0.00019999659282121792, "loss": 1.2224, "step": 27 }, { "epoch": 0.04, "learning_rate": 0.0001999955498150411, "loss": 1.1517, "step": 28 }, { "epoch": 0.04, "learning_rate": 0.0001999943677457578, "loss": 1.1631, "step": 29 }, { "epoch": 0.04, "learning_rate": 0.0001999930466150119, "loss": 1.0465, "step": 30 }, { "epoch": 0.05, "learning_rate": 0.0001999915864246407, "loss": 1.1847, "step": 31 }, { "epoch": 0.05, "learning_rate": 0.0001999899871766749, "loss": 1.1238, "step": 32 }, { "epoch": 0.05, "learning_rate": 0.0001999882488733385, "loss": 1.1491, "step": 33 }, { "epoch": 0.05, "learning_rate": 0.000199986371517049, "loss": 1.276, "step": 34 }, { "epoch": 0.05, "learning_rate": 0.0001999843551104172, "loss": 1.0911, "step": 35 }, { "epoch": 0.05, "learning_rate": 0.00019998219965624734, "loss": 1.1276, "step": 36 }, { "epoch": 0.06, "learning_rate": 0.00019997990515753693, "loss": 1.0981, "step": 37 }, { "epoch": 0.06, "learning_rate": 0.00019997747161747695, "loss": 1.0901, "step": 38 }, { "epoch": 0.06, "learning_rate": 0.0001999748990394517, "loss": 1.096, "step": 39 }, { "epoch": 0.06, "learning_rate": 0.00019997218742703887, "loss": 1.122, "step": 40 }, { "epoch": 0.06, "learning_rate": 0.00019996933678400946, "loss": 1.1132, "step": 41 }, { "epoch": 0.06, "learning_rate": 0.00019996634711432786, "loss": 1.1498, "step": 42 }, { "epoch": 0.06, "learning_rate": 0.00019996321842215173, "loss": 1.0708, "step": 43 }, { "epoch": 0.07, "learning_rate": 0.0001999599507118322, "loss": 1.1154, "step": 44 }, { "epoch": 0.07, "learning_rate": 0.00019995654398791355, "loss": 1.2118, "step": 45 }, { "epoch": 0.07, "learning_rate": 0.00019995299825513357, "loss": 1.0919, "step": 46 }, { "epoch": 0.07, "learning_rate": 0.00019994931351842327, "loss": 1.1364, "step": 47 }, { "epoch": 0.07, "learning_rate": 0.00019994548978290695, "loss": 1.1442, "step": 48 }, { "epoch": 0.07, "learning_rate": 0.0001999415270539023, "loss": 1.1248, "step": 49 }, { "epoch": 0.07, "learning_rate": 0.00019993742533692022, "loss": 1.1366, "step": 50 }, { "epoch": 0.08, "learning_rate": 0.00019993318463766495, "loss": 1.1437, "step": 51 }, { "epoch": 0.08, "learning_rate": 0.000199928804962034, "loss": 1.1191, "step": 52 }, { "epoch": 0.08, "learning_rate": 0.0001999242863161182, "loss": 1.0786, "step": 53 }, { "epoch": 0.08, "learning_rate": 0.00019991962870620153, "loss": 1.1951, "step": 54 }, { "epoch": 0.08, "learning_rate": 0.00019991483213876134, "loss": 1.1321, "step": 55 }, { "epoch": 0.08, "learning_rate": 0.00019990989662046818, "loss": 1.0876, "step": 56 }, { "epoch": 0.08, "learning_rate": 0.0001999048221581858, "loss": 1.1794, "step": 57 }, { "epoch": 0.09, "learning_rate": 0.00019989960875897126, "loss": 1.1796, "step": 58 }, { "epoch": 0.09, "learning_rate": 0.00019989425643007476, "loss": 1.1165, "step": 59 }, { "epoch": 0.09, "learning_rate": 0.0001998887651789398, "loss": 1.1978, "step": 60 }, { "epoch": 0.09, "learning_rate": 0.00019988313501320297, "loss": 1.1693, "step": 61 }, { "epoch": 0.09, "learning_rate": 0.00019987736594069414, "loss": 1.1553, "step": 62 }, { "epoch": 0.09, "learning_rate": 0.0001998714579694363, "loss": 1.1959, "step": 63 }, { "epoch": 0.1, "learning_rate": 0.00019986541110764565, "loss": 1.1945, "step": 64 }, { "epoch": 0.1, "learning_rate": 0.00019985922536373146, "loss": 1.121, "step": 65 }, { "epoch": 0.1, "learning_rate": 0.00019985290074629627, "loss": 1.122, "step": 66 }, { "epoch": 0.1, "learning_rate": 0.00019984643726413565, "loss": 1.1435, "step": 67 }, { "epoch": 0.1, "learning_rate": 0.00019983983492623833, "loss": 1.0413, "step": 68 }, { "epoch": 0.1, "learning_rate": 0.0001998330937417861, "loss": 1.078, "step": 69 }, { "epoch": 0.1, "learning_rate": 0.0001998262137201539, "loss": 1.0811, "step": 70 }, { "epoch": 0.11, "learning_rate": 0.00019981919487090972, "loss": 1.1639, "step": 71 }, { "epoch": 0.11, "learning_rate": 0.00019981203720381463, "loss": 1.164, "step": 72 }, { "epoch": 0.11, "learning_rate": 0.00019980474072882277, "loss": 1.1006, "step": 73 }, { "epoch": 0.11, "learning_rate": 0.00019979730545608126, "loss": 1.1926, "step": 74 }, { "epoch": 0.11, "learning_rate": 0.0001997897313959303, "loss": 1.1129, "step": 75 }, { "epoch": 0.11, "learning_rate": 0.00019978201855890308, "loss": 1.1367, "step": 76 }, { "epoch": 0.11, "learning_rate": 0.00019977416695572578, "loss": 1.1495, "step": 77 }, { "epoch": 0.12, "learning_rate": 0.0001997661765973176, "loss": 1.1567, "step": 78 }, { "epoch": 0.12, "learning_rate": 0.00019975804749479062, "loss": 1.2102, "step": 79 }, { "epoch": 0.12, "learning_rate": 0.00019974977965945, "loss": 1.1175, "step": 80 }, { "epoch": 0.12, "learning_rate": 0.0001997413731027937, "loss": 1.1243, "step": 81 }, { "epoch": 0.12, "learning_rate": 0.00019973282783651263, "loss": 1.1406, "step": 82 }, { "epoch": 0.12, "learning_rate": 0.00019972414387249072, "loss": 1.09, "step": 83 }, { "epoch": 0.13, "learning_rate": 0.00019971532122280464, "loss": 1.0115, "step": 84 }, { "epoch": 0.13, "learning_rate": 0.00019970635989972402, "loss": 1.0328, "step": 85 }, { "epoch": 0.13, "learning_rate": 0.00019969725991571128, "loss": 1.1226, "step": 86 }, { "epoch": 0.13, "learning_rate": 0.00019968802128342172, "loss": 1.0747, "step": 87 }, { "epoch": 0.13, "learning_rate": 0.00019967864401570343, "loss": 1.119, "step": 88 }, { "epoch": 0.13, "learning_rate": 0.00019966912812559732, "loss": 1.1125, "step": 89 }, { "epoch": 0.13, "learning_rate": 0.00019965947362633708, "loss": 1.0734, "step": 90 }, { "epoch": 0.14, "learning_rate": 0.0001996496805313491, "loss": 1.1798, "step": 91 }, { "epoch": 0.14, "learning_rate": 0.00019963974885425266, "loss": 1.1461, "step": 92 }, { "epoch": 0.14, "learning_rate": 0.0001996296786088596, "loss": 1.0397, "step": 93 }, { "epoch": 0.14, "learning_rate": 0.00019961946980917456, "loss": 1.17, "step": 94 }, { "epoch": 0.14, "learning_rate": 0.00019960912246939485, "loss": 1.0679, "step": 95 }, { "epoch": 0.14, "learning_rate": 0.00019959863660391045, "loss": 1.0839, "step": 96 }, { "epoch": 0.14, "learning_rate": 0.00019958801222730394, "loss": 1.0937, "step": 97 }, { "epoch": 0.15, "learning_rate": 0.00019957724935435063, "loss": 1.1668, "step": 98 }, { "epoch": 0.15, "learning_rate": 0.00019956634800001832, "loss": 1.0858, "step": 99 }, { "epoch": 0.15, "learning_rate": 0.00019955530817946748, "loss": 1.0935, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.00019954412990805107, "loss": 1.1046, "step": 101 }, { "epoch": 0.15, "learning_rate": 0.00019953281320131468, "loss": 1.1319, "step": 102 }, { "epoch": 0.15, "learning_rate": 0.00019952135807499633, "loss": 1.1108, "step": 103 }, { "epoch": 0.15, "learning_rate": 0.0001995097645450266, "loss": 1.0485, "step": 104 }, { "epoch": 0.16, "learning_rate": 0.00019949803262752855, "loss": 1.0862, "step": 105 }, { "epoch": 0.16, "learning_rate": 0.00019948616233881768, "loss": 1.268, "step": 106 }, { "epoch": 0.16, "learning_rate": 0.00019947415369540189, "loss": 1.0926, "step": 107 }, { "epoch": 0.16, "learning_rate": 0.0001994620067139815, "loss": 1.1427, "step": 108 }, { "epoch": 0.16, "learning_rate": 0.00019944972141144928, "loss": 1.0754, "step": 109 }, { "epoch": 0.16, "learning_rate": 0.00019943729780489027, "loss": 1.0044, "step": 110 }, { "epoch": 0.17, "learning_rate": 0.0001994247359115819, "loss": 1.1304, "step": 111 }, { "epoch": 0.17, "learning_rate": 0.00019941203574899393, "loss": 1.1683, "step": 112 }, { "epoch": 0.17, "learning_rate": 0.00019939919733478838, "loss": 1.1559, "step": 113 }, { "epoch": 0.17, "learning_rate": 0.00019938622068681953, "loss": 1.1879, "step": 114 }, { "epoch": 0.17, "learning_rate": 0.00019937310582313392, "loss": 1.0613, "step": 115 }, { "epoch": 0.17, "learning_rate": 0.0001993598527619703, "loss": 1.1196, "step": 116 }, { "epoch": 0.17, "learning_rate": 0.0001993464615217596, "loss": 1.0762, "step": 117 }, { "epoch": 0.18, "learning_rate": 0.00019933293212112495, "loss": 1.1059, "step": 118 }, { "epoch": 0.18, "learning_rate": 0.00019931926457888156, "loss": 1.0831, "step": 119 }, { "epoch": 0.18, "learning_rate": 0.00019930545891403678, "loss": 1.0552, "step": 120 }, { "epoch": 0.18, "learning_rate": 0.00019929151514579008, "loss": 1.15, "step": 121 }, { "epoch": 0.18, "learning_rate": 0.00019927743329353295, "loss": 1.1038, "step": 122 }, { "epoch": 0.18, "learning_rate": 0.0001992632133768489, "loss": 1.067, "step": 123 }, { "epoch": 0.18, "learning_rate": 0.0001992488554155135, "loss": 1.1311, "step": 124 }, { "epoch": 0.19, "learning_rate": 0.00019923435942949426, "loss": 1.1402, "step": 125 }, { "epoch": 0.19, "learning_rate": 0.00019921972543895066, "loss": 1.0453, "step": 126 }, { "epoch": 0.19, "learning_rate": 0.00019920495346423402, "loss": 1.1567, "step": 127 }, { "epoch": 0.19, "learning_rate": 0.00019919004352588767, "loss": 1.137, "step": 128 }, { "epoch": 0.19, "learning_rate": 0.0001991749956446468, "loss": 0.9986, "step": 129 }, { "epoch": 0.19, "learning_rate": 0.00019915980984143832, "loss": 1.083, "step": 130 }, { "epoch": 0.2, "learning_rate": 0.00019914448613738106, "loss": 1.0619, "step": 131 }, { "epoch": 0.2, "learning_rate": 0.00019912902455378556, "loss": 1.1294, "step": 132 }, { "epoch": 0.2, "learning_rate": 0.00019911342511215414, "loss": 1.0965, "step": 133 }, { "epoch": 0.2, "learning_rate": 0.00019909768783418086, "loss": 1.0216, "step": 134 }, { "epoch": 0.2, "learning_rate": 0.00019908181274175138, "loss": 1.0081, "step": 135 }, { "epoch": 0.2, "learning_rate": 0.0001990657998569432, "loss": 1.0246, "step": 136 }, { "epoch": 0.2, "learning_rate": 0.0001990496492020252, "loss": 1.1249, "step": 137 }, { "epoch": 0.21, "learning_rate": 0.00019903336079945804, "loss": 1.0518, "step": 138 }, { "epoch": 0.21, "learning_rate": 0.00019901693467189386, "loss": 1.189, "step": 139 }, { "epoch": 0.21, "learning_rate": 0.00019900037084217637, "loss": 1.1475, "step": 140 }, { "epoch": 0.21, "learning_rate": 0.0001989836693333408, "loss": 1.2259, "step": 141 }, { "epoch": 0.21, "learning_rate": 0.0001989668301686138, "loss": 1.0399, "step": 142 }, { "epoch": 0.21, "learning_rate": 0.0001989498533714135, "loss": 1.128, "step": 143 }, { "epoch": 0.21, "learning_rate": 0.00019893273896534936, "loss": 1.014, "step": 144 }, { "epoch": 0.22, "learning_rate": 0.0001989154869742223, "loss": 1.1552, "step": 145 }, { "epoch": 0.22, "learning_rate": 0.00019889809742202455, "loss": 1.1159, "step": 146 }, { "epoch": 0.22, "learning_rate": 0.0001988805703329396, "loss": 1.0218, "step": 147 }, { "epoch": 0.22, "learning_rate": 0.00019886290573134228, "loss": 1.1723, "step": 148 }, { "epoch": 0.22, "learning_rate": 0.0001988451036417986, "loss": 1.2132, "step": 149 }, { "epoch": 0.22, "learning_rate": 0.00019882716408906585, "loss": 1.112, "step": 150 }, { "epoch": 0.23, "learning_rate": 0.0001988090870980924, "loss": 1.0856, "step": 151 }, { "epoch": 0.23, "learning_rate": 0.0001987908726940178, "loss": 1.0951, "step": 152 }, { "epoch": 0.23, "learning_rate": 0.00019877252090217271, "loss": 1.0218, "step": 153 }, { "epoch": 0.23, "learning_rate": 0.00019875403174807882, "loss": 1.0552, "step": 154 }, { "epoch": 0.23, "learning_rate": 0.00019873540525744887, "loss": 1.1481, "step": 155 }, { "epoch": 0.23, "learning_rate": 0.00019871664145618657, "loss": 1.169, "step": 156 }, { "epoch": 0.23, "learning_rate": 0.00019869774037038665, "loss": 1.0802, "step": 157 }, { "epoch": 0.24, "learning_rate": 0.0001986787020263347, "loss": 1.0871, "step": 158 }, { "epoch": 0.24, "learning_rate": 0.0001986595264505072, "loss": 1.1022, "step": 159 }, { "epoch": 0.24, "learning_rate": 0.00019864021366957147, "loss": 1.0257, "step": 160 }, { "epoch": 0.24, "learning_rate": 0.0001986207637103857, "loss": 1.0986, "step": 161 }, { "epoch": 0.24, "learning_rate": 0.00019860117659999878, "loss": 1.0837, "step": 162 }, { "epoch": 0.24, "learning_rate": 0.00019858145236565037, "loss": 1.1895, "step": 163 }, { "epoch": 0.24, "learning_rate": 0.00019856159103477086, "loss": 1.052, "step": 164 }, { "epoch": 0.25, "learning_rate": 0.00019854159263498123, "loss": 1.1184, "step": 165 }, { "epoch": 0.25, "learning_rate": 0.0001985214571940931, "loss": 1.0895, "step": 166 }, { "epoch": 0.25, "learning_rate": 0.00019850118474010872, "loss": 1.0764, "step": 167 }, { "epoch": 0.25, "learning_rate": 0.00019848077530122083, "loss": 1.1387, "step": 168 }, { "epoch": 0.25, "eval_loss": 1.084919810295105, "eval_runtime": 2.6029, "eval_samples_per_second": 419.538, "eval_steps_per_second": 26.509, "step": 168 }, { "epoch": 0.25, "learning_rate": 0.00019846022890581267, "loss": 1.0826, "step": 169 }, { "epoch": 0.25, "learning_rate": 0.000198439545582458, "loss": 1.1366, "step": 170 }, { "epoch": 0.25, "learning_rate": 0.000198418725359921, "loss": 1.1349, "step": 171 }, { "epoch": 0.26, "learning_rate": 0.00019839776826715614, "loss": 1.0636, "step": 172 }, { "epoch": 0.26, "learning_rate": 0.00019837667433330838, "loss": 1.1216, "step": 173 }, { "epoch": 0.26, "learning_rate": 0.0001983554435877128, "loss": 1.1051, "step": 174 }, { "epoch": 0.26, "learning_rate": 0.00019833407605989494, "loss": 1.1558, "step": 175 }, { "epoch": 0.26, "learning_rate": 0.00019831257177957044, "loss": 1.0364, "step": 176 }, { "epoch": 0.26, "learning_rate": 0.00019829093077664513, "loss": 1.0665, "step": 177 }, { "epoch": 0.27, "learning_rate": 0.00019826915308121504, "loss": 1.1994, "step": 178 }, { "epoch": 0.27, "learning_rate": 0.0001982472387235662, "loss": 1.1434, "step": 179 }, { "epoch": 0.27, "learning_rate": 0.0001982251877341748, "loss": 1.081, "step": 180 }, { "epoch": 0.27, "learning_rate": 0.000198203000143707, "loss": 1.0653, "step": 181 }, { "epoch": 0.27, "learning_rate": 0.0001981806759830189, "loss": 1.0269, "step": 182 }, { "epoch": 0.27, "learning_rate": 0.0001981582152831566, "loss": 1.1167, "step": 183 }, { "epoch": 0.27, "learning_rate": 0.00019813561807535598, "loss": 1.0608, "step": 184 }, { "epoch": 0.28, "learning_rate": 0.0001981128843910428, "loss": 1.0989, "step": 185 }, { "epoch": 0.28, "learning_rate": 0.0001980900142618327, "loss": 1.1405, "step": 186 }, { "epoch": 0.28, "learning_rate": 0.00019806700771953097, "loss": 1.0359, "step": 187 }, { "epoch": 0.28, "learning_rate": 0.0001980438647961327, "loss": 1.1073, "step": 188 }, { "epoch": 0.28, "learning_rate": 0.0001980205855238225, "loss": 1.0338, "step": 189 }, { "epoch": 0.28, "learning_rate": 0.00019799716993497475, "loss": 1.1285, "step": 190 }, { "epoch": 0.28, "learning_rate": 0.00019797361806215332, "loss": 1.1277, "step": 191 }, { "epoch": 0.29, "learning_rate": 0.00019794992993811165, "loss": 1.119, "step": 192 }, { "epoch": 0.29, "learning_rate": 0.00019792610559579265, "loss": 1.1224, "step": 193 }, { "epoch": 0.29, "learning_rate": 0.00019790214506832868, "loss": 1.1438, "step": 194 }, { "epoch": 0.29, "learning_rate": 0.0001978780483890414, "loss": 1.1462, "step": 195 }, { "epoch": 0.29, "learning_rate": 0.00019785381559144196, "loss": 1.042, "step": 196 }, { "epoch": 0.29, "learning_rate": 0.00019782944670923076, "loss": 1.1022, "step": 197 }, { "epoch": 0.3, "learning_rate": 0.00019780494177629735, "loss": 1.0564, "step": 198 }, { "epoch": 0.3, "learning_rate": 0.00019778030082672068, "loss": 1.0471, "step": 199 }, { "epoch": 0.3, "learning_rate": 0.00019775552389476864, "loss": 1.0636, "step": 200 }, { "epoch": 0.3, "learning_rate": 0.0001977306110148984, "loss": 1.0917, "step": 201 }, { "epoch": 0.3, "learning_rate": 0.00019770556222175608, "loss": 1.1965, "step": 202 }, { "epoch": 0.3, "learning_rate": 0.00019768037755017685, "loss": 1.073, "step": 203 }, { "epoch": 0.3, "learning_rate": 0.00019765505703518496, "loss": 1.0636, "step": 204 }, { "epoch": 0.31, "learning_rate": 0.00019762960071199333, "loss": 1.087, "step": 205 }, { "epoch": 0.31, "learning_rate": 0.000197604008616004, "loss": 1.0569, "step": 206 }, { "epoch": 0.31, "learning_rate": 0.00019757828078280766, "loss": 1.08, "step": 207 }, { "epoch": 0.31, "learning_rate": 0.00019755241724818387, "loss": 1.1536, "step": 208 }, { "epoch": 0.31, "learning_rate": 0.00019752641804810084, "loss": 1.1514, "step": 209 }, { "epoch": 0.31, "learning_rate": 0.00019750028321871546, "loss": 1.0691, "step": 210 }, { "epoch": 0.31, "learning_rate": 0.00019747401279637325, "loss": 1.1289, "step": 211 }, { "epoch": 0.32, "learning_rate": 0.00019744760681760832, "loss": 1.0834, "step": 212 }, { "epoch": 0.32, "learning_rate": 0.00019742106531914328, "loss": 1.0762, "step": 213 }, { "epoch": 0.32, "learning_rate": 0.0001973943883378892, "loss": 1.0913, "step": 214 }, { "epoch": 0.32, "learning_rate": 0.00019736757591094558, "loss": 1.132, "step": 215 }, { "epoch": 0.32, "learning_rate": 0.00019734062807560027, "loss": 1.0894, "step": 216 }, { "epoch": 0.32, "learning_rate": 0.00019731354486932944, "loss": 1.0327, "step": 217 }, { "epoch": 0.32, "learning_rate": 0.00019728632632979746, "loss": 1.112, "step": 218 }, { "epoch": 0.33, "learning_rate": 0.00019725897249485704, "loss": 1.0718, "step": 219 }, { "epoch": 0.33, "learning_rate": 0.00019723148340254892, "loss": 1.077, "step": 220 }, { "epoch": 0.33, "learning_rate": 0.00019720385909110198, "loss": 1.0335, "step": 221 }, { "epoch": 0.33, "learning_rate": 0.00019717609959893318, "loss": 1.0483, "step": 222 }, { "epoch": 0.33, "learning_rate": 0.00019714820496464746, "loss": 1.0901, "step": 223 }, { "epoch": 0.33, "learning_rate": 0.00019712017522703764, "loss": 0.9921, "step": 224 }, { "epoch": 0.34, "learning_rate": 0.00019709201042508455, "loss": 1.0829, "step": 225 }, { "epoch": 0.34, "learning_rate": 0.0001970637105979567, "loss": 1.0705, "step": 226 }, { "epoch": 0.34, "learning_rate": 0.0001970352757850105, "loss": 1.0481, "step": 227 }, { "epoch": 0.34, "learning_rate": 0.00019700670602579008, "loss": 0.9846, "step": 228 }, { "epoch": 0.34, "learning_rate": 0.0001969780013600272, "loss": 1.1492, "step": 229 }, { "epoch": 0.34, "learning_rate": 0.00019694916182764113, "loss": 1.1745, "step": 230 }, { "epoch": 0.34, "learning_rate": 0.00019692018746873892, "loss": 1.0451, "step": 231 }, { "epoch": 0.35, "learning_rate": 0.00019689107832361496, "loss": 1.1217, "step": 232 }, { "epoch": 0.35, "learning_rate": 0.00019686183443275116, "loss": 1.0788, "step": 233 }, { "epoch": 0.35, "learning_rate": 0.00019683245583681675, "loss": 1.0703, "step": 234 }, { "epoch": 0.35, "learning_rate": 0.00019680294257666837, "loss": 1.1521, "step": 235 }, { "epoch": 0.35, "learning_rate": 0.0001967732946933499, "loss": 1.0659, "step": 236 }, { "epoch": 0.35, "learning_rate": 0.00019674351222809242, "loss": 1.0321, "step": 237 }, { "epoch": 0.35, "learning_rate": 0.0001967135952223142, "loss": 1.0555, "step": 238 }, { "epoch": 0.36, "learning_rate": 0.00019668354371762066, "loss": 1.0648, "step": 239 }, { "epoch": 0.36, "learning_rate": 0.00019665335775580415, "loss": 1.0723, "step": 240 }, { "epoch": 0.36, "learning_rate": 0.0001966230373788441, "loss": 1.0264, "step": 241 }, { "epoch": 0.36, "learning_rate": 0.00019659258262890683, "loss": 1.0331, "step": 242 }, { "epoch": 0.36, "learning_rate": 0.00019656199354834558, "loss": 1.1514, "step": 243 }, { "epoch": 0.36, "learning_rate": 0.00019653127017970034, "loss": 1.069, "step": 244 }, { "epoch": 0.37, "learning_rate": 0.00019650041256569792, "loss": 0.9623, "step": 245 }, { "epoch": 0.37, "learning_rate": 0.00019646942074925172, "loss": 1.0021, "step": 246 }, { "epoch": 0.37, "learning_rate": 0.00019643829477346188, "loss": 1.1131, "step": 247 }, { "epoch": 0.37, "learning_rate": 0.0001964070346816151, "loss": 1.1426, "step": 248 }, { "epoch": 0.37, "learning_rate": 0.0001963756405171845, "loss": 1.0761, "step": 249 }, { "epoch": 0.37, "learning_rate": 0.00019634411232382978, "loss": 1.1112, "step": 250 }, { "epoch": 0.37, "learning_rate": 0.00019631245014539698, "loss": 1.081, "step": 251 }, { "epoch": 0.38, "learning_rate": 0.00019628065402591845, "loss": 1.1446, "step": 252 }, { "epoch": 0.38, "learning_rate": 0.00019624872400961284, "loss": 1.045, "step": 253 }, { "epoch": 0.38, "learning_rate": 0.00019621666014088494, "loss": 1.0337, "step": 254 }, { "epoch": 0.38, "learning_rate": 0.00019618446246432583, "loss": 1.1764, "step": 255 }, { "epoch": 0.38, "learning_rate": 0.00019615213102471257, "loss": 1.0323, "step": 256 }, { "epoch": 0.38, "learning_rate": 0.00019611966586700823, "loss": 1.0073, "step": 257 }, { "epoch": 0.38, "learning_rate": 0.00019608706703636188, "loss": 1.1615, "step": 258 }, { "epoch": 0.39, "learning_rate": 0.00019605433457810855, "loss": 1.1209, "step": 259 }, { "epoch": 0.39, "learning_rate": 0.00019602146853776894, "loss": 1.0721, "step": 260 }, { "epoch": 0.39, "learning_rate": 0.0001959884689610497, "loss": 1.0967, "step": 261 }, { "epoch": 0.39, "learning_rate": 0.00019595533589384308, "loss": 1.0284, "step": 262 }, { "epoch": 0.39, "learning_rate": 0.00019592206938222703, "loss": 1.0148, "step": 263 }, { "epoch": 0.39, "learning_rate": 0.00019588866947246498, "loss": 1.1434, "step": 264 }, { "epoch": 0.39, "learning_rate": 0.00019585513621100603, "loss": 1.1125, "step": 265 }, { "epoch": 0.4, "learning_rate": 0.0001958214696444846, "loss": 1.0812, "step": 266 }, { "epoch": 0.4, "learning_rate": 0.00019578766981972058, "loss": 1.0611, "step": 267 }, { "epoch": 0.4, "learning_rate": 0.00019575373678371909, "loss": 1.1029, "step": 268 }, { "epoch": 0.4, "learning_rate": 0.00019571967058367064, "loss": 1.0692, "step": 269 }, { "epoch": 0.4, "learning_rate": 0.00019568547126695083, "loss": 1.0581, "step": 270 }, { "epoch": 0.4, "learning_rate": 0.00019565113888112036, "loss": 0.9841, "step": 271 }, { "epoch": 0.41, "learning_rate": 0.00019561667347392508, "loss": 1.0173, "step": 272 }, { "epoch": 0.41, "learning_rate": 0.00019558207509329584, "loss": 1.0805, "step": 273 }, { "epoch": 0.41, "learning_rate": 0.00019554734378734824, "loss": 1.088, "step": 274 }, { "epoch": 0.41, "learning_rate": 0.00019551247960438296, "loss": 1.0481, "step": 275 }, { "epoch": 0.41, "learning_rate": 0.00019547748259288536, "loss": 1.1747, "step": 276 }, { "epoch": 0.41, "learning_rate": 0.0001954423528015255, "loss": 1.0407, "step": 277 }, { "epoch": 0.41, "learning_rate": 0.00019540709027915818, "loss": 1.1412, "step": 278 }, { "epoch": 0.42, "learning_rate": 0.0001953716950748227, "loss": 1.075, "step": 279 }, { "epoch": 0.42, "learning_rate": 0.00019533616723774294, "loss": 0.9863, "step": 280 }, { "epoch": 0.42, "learning_rate": 0.0001953005068173272, "loss": 1.1426, "step": 281 }, { "epoch": 0.42, "learning_rate": 0.0001952647138631682, "loss": 1.0621, "step": 282 }, { "epoch": 0.42, "learning_rate": 0.00019522878842504295, "loss": 1.1007, "step": 283 }, { "epoch": 0.42, "learning_rate": 0.00019519273055291266, "loss": 1.0632, "step": 284 }, { "epoch": 0.42, "learning_rate": 0.00019515654029692278, "loss": 1.126, "step": 285 }, { "epoch": 0.43, "learning_rate": 0.00019512021770740288, "loss": 1.0946, "step": 286 }, { "epoch": 0.43, "learning_rate": 0.0001950837628348665, "loss": 1.0639, "step": 287 }, { "epoch": 0.43, "learning_rate": 0.00019504717573001117, "loss": 1.1432, "step": 288 }, { "epoch": 0.43, "learning_rate": 0.00019501045644371832, "loss": 1.0619, "step": 289 }, { "epoch": 0.43, "learning_rate": 0.0001949736050270532, "loss": 1.0597, "step": 290 }, { "epoch": 0.43, "learning_rate": 0.00019493662153126481, "loss": 1.0743, "step": 291 }, { "epoch": 0.44, "learning_rate": 0.0001948995060077859, "loss": 1.1114, "step": 292 }, { "epoch": 0.44, "learning_rate": 0.00019486225850823266, "loss": 1.1435, "step": 293 }, { "epoch": 0.44, "learning_rate": 0.000194824879084405, "loss": 1.1396, "step": 294 }, { "epoch": 0.44, "learning_rate": 0.00019478736778828624, "loss": 1.1597, "step": 295 }, { "epoch": 0.44, "learning_rate": 0.00019474972467204297, "loss": 1.0976, "step": 296 }, { "epoch": 0.44, "learning_rate": 0.00019471194978802533, "loss": 1.0829, "step": 297 }, { "epoch": 0.44, "learning_rate": 0.0001946740431887665, "loss": 1.0437, "step": 298 }, { "epoch": 0.45, "learning_rate": 0.00019463600492698296, "loss": 1.0835, "step": 299 }, { "epoch": 0.45, "learning_rate": 0.00019459783505557424, "loss": 1.0558, "step": 300 }, { "epoch": 0.45, "learning_rate": 0.0001945595336276229, "loss": 1.0656, "step": 301 }, { "epoch": 0.45, "learning_rate": 0.00019452110069639452, "loss": 1.1487, "step": 302 }, { "epoch": 0.45, "learning_rate": 0.00019448253631533744, "loss": 1.1383, "step": 303 }, { "epoch": 0.45, "learning_rate": 0.00019444384053808288, "loss": 1.1582, "step": 304 }, { "epoch": 0.45, "learning_rate": 0.00019440501341844483, "loss": 0.9999, "step": 305 }, { "epoch": 0.46, "learning_rate": 0.00019436605501041987, "loss": 1.1317, "step": 306 }, { "epoch": 0.46, "learning_rate": 0.00019432696536818717, "loss": 1.0944, "step": 307 }, { "epoch": 0.46, "learning_rate": 0.00019428774454610843, "loss": 1.1624, "step": 308 }, { "epoch": 0.46, "learning_rate": 0.00019424839259872778, "loss": 1.1644, "step": 309 }, { "epoch": 0.46, "learning_rate": 0.00019420890958077167, "loss": 1.0486, "step": 310 }, { "epoch": 0.46, "learning_rate": 0.00019416929554714888, "loss": 1.0705, "step": 311 }, { "epoch": 0.46, "learning_rate": 0.00019412955055295034, "loss": 1.023, "step": 312 }, { "epoch": 0.47, "learning_rate": 0.00019408967465344917, "loss": 1.1144, "step": 313 }, { "epoch": 0.47, "learning_rate": 0.00019404966790410047, "loss": 1.0378, "step": 314 }, { "epoch": 0.47, "learning_rate": 0.00019400953036054138, "loss": 1.036, "step": 315 }, { "epoch": 0.47, "learning_rate": 0.00019396926207859084, "loss": 1.0735, "step": 316 }, { "epoch": 0.47, "learning_rate": 0.00019392886311424973, "loss": 1.0259, "step": 317 }, { "epoch": 0.47, "learning_rate": 0.0001938883335237006, "loss": 1.1603, "step": 318 }, { "epoch": 0.48, "learning_rate": 0.0001938476733633076, "loss": 1.1282, "step": 319 }, { "epoch": 0.48, "learning_rate": 0.0001938068826896166, "loss": 1.063, "step": 320 }, { "epoch": 0.48, "learning_rate": 0.00019376596155935486, "loss": 1.1176, "step": 321 }, { "epoch": 0.48, "learning_rate": 0.00019372491002943112, "loss": 1.1307, "step": 322 }, { "epoch": 0.48, "learning_rate": 0.00019368372815693549, "loss": 1.0412, "step": 323 }, { "epoch": 0.48, "learning_rate": 0.00019364241599913924, "loss": 1.1353, "step": 324 }, { "epoch": 0.48, "learning_rate": 0.00019360097361349494, "loss": 1.1293, "step": 325 }, { "epoch": 0.49, "learning_rate": 0.0001935594010576362, "loss": 1.0885, "step": 326 }, { "epoch": 0.49, "learning_rate": 0.00019351769838937775, "loss": 1.0944, "step": 327 }, { "epoch": 0.49, "learning_rate": 0.00019347586566671512, "loss": 1.1435, "step": 328 }, { "epoch": 0.49, "learning_rate": 0.0001934339029478248, "loss": 1.1217, "step": 329 }, { "epoch": 0.49, "learning_rate": 0.00019339181029106404, "loss": 1.1801, "step": 330 }, { "epoch": 0.49, "learning_rate": 0.00019334958775497083, "loss": 1.1846, "step": 331 }, { "epoch": 0.49, "learning_rate": 0.00019330723539826375, "loss": 1.0897, "step": 332 }, { "epoch": 0.5, "learning_rate": 0.00019326475327984192, "loss": 1.0643, "step": 333 }, { "epoch": 0.5, "learning_rate": 0.00019322214145878487, "loss": 1.0246, "step": 334 }, { "epoch": 0.5, "learning_rate": 0.0001931793999943526, "loss": 1.1108, "step": 335 }, { "epoch": 0.5, "learning_rate": 0.00019313652894598543, "loss": 1.0619, "step": 336 }, { "epoch": 0.5, "eval_loss": 1.048388123512268, "eval_runtime": 2.6045, "eval_samples_per_second": 419.273, "eval_steps_per_second": 26.493, "step": 336 }, { "epoch": 0.5, "learning_rate": 0.00019309352837330372, "loss": 1.0014, "step": 337 }, { "epoch": 0.5, "learning_rate": 0.0001930503983361081, "loss": 1.0786, "step": 338 }, { "epoch": 0.51, "learning_rate": 0.00019300713889437926, "loss": 1.014, "step": 339 }, { "epoch": 0.51, "learning_rate": 0.00019296375010827773, "loss": 1.1233, "step": 340 }, { "epoch": 0.51, "learning_rate": 0.000192920232038144, "loss": 1.1052, "step": 341 }, { "epoch": 0.51, "learning_rate": 0.0001928765847444984, "loss": 1.0138, "step": 342 }, { "epoch": 0.51, "learning_rate": 0.00019283280828804081, "loss": 1.1536, "step": 343 }, { "epoch": 0.51, "learning_rate": 0.00019278890272965096, "loss": 0.992, "step": 344 }, { "epoch": 0.51, "learning_rate": 0.0001927448681303879, "loss": 1.1165, "step": 345 }, { "epoch": 0.52, "learning_rate": 0.0001927007045514903, "loss": 1.0565, "step": 346 }, { "epoch": 0.52, "learning_rate": 0.00019265641205437611, "loss": 1.0664, "step": 347 }, { "epoch": 0.52, "learning_rate": 0.0001926119907006426, "loss": 1.0625, "step": 348 }, { "epoch": 0.52, "learning_rate": 0.00019256744055206622, "loss": 1.0393, "step": 349 }, { "epoch": 0.52, "learning_rate": 0.0001925227616706026, "loss": 1.125, "step": 350 }, { "epoch": 0.52, "learning_rate": 0.00019247795411838627, "loss": 1.0375, "step": 351 }, { "epoch": 0.52, "learning_rate": 0.00019243301795773086, "loss": 1.0648, "step": 352 }, { "epoch": 0.53, "learning_rate": 0.0001923879532511287, "loss": 1.0903, "step": 353 }, { "epoch": 0.53, "learning_rate": 0.000192342760061251, "loss": 1.1219, "step": 354 }, { "epoch": 0.53, "learning_rate": 0.00019229743845094755, "loss": 1.054, "step": 355 }, { "epoch": 0.53, "learning_rate": 0.0001922519884832469, "loss": 1.1206, "step": 356 }, { "epoch": 0.53, "learning_rate": 0.00019220641022135588, "loss": 1.1125, "step": 357 }, { "epoch": 0.53, "learning_rate": 0.00019216070372865996, "loss": 1.064, "step": 358 }, { "epoch": 0.54, "learning_rate": 0.0001921148690687228, "loss": 1.0843, "step": 359 }, { "epoch": 0.54, "learning_rate": 0.00019206890630528634, "loss": 1.1378, "step": 360 }, { "epoch": 0.54, "learning_rate": 0.00019202281550227064, "loss": 1.0399, "step": 361 }, { "epoch": 0.54, "learning_rate": 0.0001919765967237739, "loss": 1.1762, "step": 362 }, { "epoch": 0.54, "learning_rate": 0.0001919302500340722, "loss": 1.0538, "step": 363 }, { "epoch": 0.54, "learning_rate": 0.00019188377549761963, "loss": 1.0343, "step": 364 }, { "epoch": 0.54, "learning_rate": 0.0001918371731790479, "loss": 1.1027, "step": 365 }, { "epoch": 0.55, "learning_rate": 0.00019179044314316664, "loss": 1.036, "step": 366 }, { "epoch": 0.55, "learning_rate": 0.00019174358545496288, "loss": 1.041, "step": 367 }, { "epoch": 0.55, "learning_rate": 0.00019169660017960137, "loss": 1.0762, "step": 368 }, { "epoch": 0.55, "learning_rate": 0.00019164948738242409, "loss": 1.0807, "step": 369 }, { "epoch": 0.55, "learning_rate": 0.00019160224712895055, "loss": 1.037, "step": 370 }, { "epoch": 0.55, "learning_rate": 0.00019155487948487748, "loss": 1.0625, "step": 371 }, { "epoch": 0.55, "learning_rate": 0.0001915073845160786, "loss": 1.062, "step": 372 }, { "epoch": 0.56, "learning_rate": 0.00019145976228860496, "loss": 1.1882, "step": 373 }, { "epoch": 0.56, "learning_rate": 0.00019141201286868435, "loss": 1.1338, "step": 374 }, { "epoch": 0.56, "learning_rate": 0.00019136413632272163, "loss": 1.0174, "step": 375 }, { "epoch": 0.56, "learning_rate": 0.00019131613271729833, "loss": 1.0585, "step": 376 }, { "epoch": 0.56, "learning_rate": 0.00019126800211917276, "loss": 1.0495, "step": 377 }, { "epoch": 0.56, "learning_rate": 0.0001912197445952798, "loss": 1.123, "step": 378 }, { "epoch": 0.56, "learning_rate": 0.00019117136021273075, "loss": 1.0517, "step": 379 }, { "epoch": 0.57, "learning_rate": 0.0001911228490388136, "loss": 1.0545, "step": 380 }, { "epoch": 0.57, "learning_rate": 0.00019107421114099237, "loss": 1.0302, "step": 381 }, { "epoch": 0.57, "learning_rate": 0.00019102544658690748, "loss": 1.0908, "step": 382 }, { "epoch": 0.57, "learning_rate": 0.00019097655544437545, "loss": 1.1425, "step": 383 }, { "epoch": 0.57, "learning_rate": 0.00019092753778138886, "loss": 1.0686, "step": 384 }, { "epoch": 0.57, "learning_rate": 0.0001908783936661162, "loss": 1.06, "step": 385 }, { "epoch": 0.58, "learning_rate": 0.0001908291231669019, "loss": 1.1296, "step": 386 }, { "epoch": 0.58, "learning_rate": 0.00019077972635226604, "loss": 1.1029, "step": 387 }, { "epoch": 0.58, "learning_rate": 0.00019073020329090444, "loss": 1.0469, "step": 388 }, { "epoch": 0.58, "learning_rate": 0.0001906805540516885, "loss": 1.0427, "step": 389 }, { "epoch": 0.58, "learning_rate": 0.000190630778703665, "loss": 1.0075, "step": 390 }, { "epoch": 0.58, "learning_rate": 0.00019058087731605624, "loss": 1.1146, "step": 391 }, { "epoch": 0.58, "learning_rate": 0.0001905308499582597, "loss": 1.1161, "step": 392 }, { "epoch": 0.59, "learning_rate": 0.00019048069669984802, "loss": 1.1419, "step": 393 }, { "epoch": 0.59, "learning_rate": 0.00019043041761056907, "loss": 1.1586, "step": 394 }, { "epoch": 0.59, "learning_rate": 0.00019038001276034557, "loss": 1.0765, "step": 395 }, { "epoch": 0.59, "learning_rate": 0.00019032948221927524, "loss": 1.1225, "step": 396 }, { "epoch": 0.59, "learning_rate": 0.0001902788260576305, "loss": 1.0247, "step": 397 }, { "epoch": 0.59, "learning_rate": 0.00019022804434585852, "loss": 1.135, "step": 398 }, { "epoch": 0.59, "learning_rate": 0.0001901771371545811, "loss": 1.1122, "step": 399 }, { "epoch": 0.6, "learning_rate": 0.00019012610455459446, "loss": 1.075, "step": 400 }, { "epoch": 0.6, "learning_rate": 0.00019007494661686935, "loss": 1.1121, "step": 401 }, { "epoch": 0.6, "learning_rate": 0.0001900236634125507, "loss": 1.0531, "step": 402 }, { "epoch": 0.6, "learning_rate": 0.00018997225501295772, "loss": 1.0561, "step": 403 }, { "epoch": 0.6, "learning_rate": 0.00018992072148958368, "loss": 1.0803, "step": 404 }, { "epoch": 0.6, "learning_rate": 0.00018986906291409595, "loss": 1.0579, "step": 405 }, { "epoch": 0.61, "learning_rate": 0.00018981727935833567, "loss": 1.0614, "step": 406 }, { "epoch": 0.61, "learning_rate": 0.0001897653708943179, "loss": 0.9982, "step": 407 }, { "epoch": 0.61, "learning_rate": 0.00018971333759423142, "loss": 1.1498, "step": 408 }, { "epoch": 0.61, "learning_rate": 0.00018966117953043852, "loss": 1.1165, "step": 409 }, { "epoch": 0.61, "learning_rate": 0.00018960889677547505, "loss": 1.1155, "step": 410 }, { "epoch": 0.61, "learning_rate": 0.00018955648940205028, "loss": 1.0017, "step": 411 }, { "epoch": 0.61, "learning_rate": 0.00018950395748304678, "loss": 1.0556, "step": 412 }, { "epoch": 0.62, "learning_rate": 0.00018945130109152033, "loss": 1.0248, "step": 413 }, { "epoch": 0.62, "learning_rate": 0.00018939852030069981, "loss": 1.0155, "step": 414 }, { "epoch": 0.62, "learning_rate": 0.00018934561518398706, "loss": 1.0248, "step": 415 }, { "epoch": 0.62, "learning_rate": 0.00018929258581495685, "loss": 0.9835, "step": 416 }, { "epoch": 0.62, "learning_rate": 0.0001892394322673568, "loss": 1.1602, "step": 417 }, { "epoch": 0.62, "learning_rate": 0.0001891861546151071, "loss": 1.021, "step": 418 }, { "epoch": 0.62, "learning_rate": 0.00018913275293230069, "loss": 1.0526, "step": 419 }, { "epoch": 0.63, "learning_rate": 0.00018907922729320285, "loss": 1.0585, "step": 420 }, { "epoch": 0.63, "learning_rate": 0.00018902557777225135, "loss": 1.0327, "step": 421 }, { "epoch": 0.63, "learning_rate": 0.00018897180444405614, "loss": 1.0448, "step": 422 }, { "epoch": 0.63, "learning_rate": 0.0001889179073833995, "loss": 1.0776, "step": 423 }, { "epoch": 0.63, "learning_rate": 0.0001888638866652356, "loss": 1.0748, "step": 424 }, { "epoch": 0.63, "learning_rate": 0.0001888097423646907, "loss": 1.0482, "step": 425 }, { "epoch": 0.63, "learning_rate": 0.00018875547455706295, "loss": 1.0394, "step": 426 }, { "epoch": 0.64, "learning_rate": 0.00018870108331782217, "loss": 1.0646, "step": 427 }, { "epoch": 0.64, "learning_rate": 0.00018864656872260985, "loss": 1.0338, "step": 428 }, { "epoch": 0.64, "learning_rate": 0.00018859193084723913, "loss": 0.9848, "step": 429 }, { "epoch": 0.64, "learning_rate": 0.0001885371697676944, "loss": 1.0587, "step": 430 }, { "epoch": 0.64, "learning_rate": 0.0001884822855601316, "loss": 1.0711, "step": 431 }, { "epoch": 0.64, "learning_rate": 0.00018842727830087778, "loss": 1.0964, "step": 432 }, { "epoch": 0.65, "learning_rate": 0.00018837214806643115, "loss": 1.0254, "step": 433 }, { "epoch": 0.65, "learning_rate": 0.00018831689493346095, "loss": 1.0748, "step": 434 }, { "epoch": 0.65, "learning_rate": 0.00018826151897880728, "loss": 1.0797, "step": 435 }, { "epoch": 0.65, "learning_rate": 0.00018820602027948114, "loss": 1.1068, "step": 436 }, { "epoch": 0.65, "learning_rate": 0.00018815039891266418, "loss": 1.081, "step": 437 }, { "epoch": 0.65, "learning_rate": 0.0001880946549557086, "loss": 1.0685, "step": 438 }, { "epoch": 0.65, "learning_rate": 0.00018803878848613716, "loss": 1.0916, "step": 439 }, { "epoch": 0.66, "learning_rate": 0.00018798279958164295, "loss": 1.115, "step": 440 }, { "epoch": 0.66, "learning_rate": 0.00018792668832008936, "loss": 1.0048, "step": 441 }, { "epoch": 0.66, "learning_rate": 0.0001878704547795099, "loss": 1.0386, "step": 442 }, { "epoch": 0.66, "learning_rate": 0.00018781409903810821, "loss": 1.0283, "step": 443 }, { "epoch": 0.66, "learning_rate": 0.00018775762117425777, "loss": 1.085, "step": 444 }, { "epoch": 0.66, "learning_rate": 0.00018770102126650198, "loss": 1.0582, "step": 445 }, { "epoch": 0.66, "learning_rate": 0.00018764429939355392, "loss": 1.0705, "step": 446 }, { "epoch": 0.67, "learning_rate": 0.0001875874556342963, "loss": 1.1426, "step": 447 }, { "epoch": 0.67, "learning_rate": 0.00018753049006778132, "loss": 1.0337, "step": 448 }, { "epoch": 0.67, "learning_rate": 0.0001874734027732306, "loss": 1.0993, "step": 449 }, { "epoch": 0.67, "learning_rate": 0.00018741619383003507, "loss": 1.0661, "step": 450 }, { "epoch": 0.67, "learning_rate": 0.00018735886331775476, "loss": 1.0564, "step": 451 }, { "epoch": 0.67, "learning_rate": 0.00018730141131611882, "loss": 1.0989, "step": 452 }, { "epoch": 0.68, "learning_rate": 0.0001872438379050254, "loss": 1.0984, "step": 453 }, { "epoch": 0.68, "learning_rate": 0.00018718614316454133, "loss": 1.1173, "step": 454 }, { "epoch": 0.68, "learning_rate": 0.00018712832717490235, "loss": 1.1005, "step": 455 }, { "epoch": 0.68, "learning_rate": 0.00018707039001651277, "loss": 1.0008, "step": 456 }, { "epoch": 0.68, "learning_rate": 0.00018701233176994533, "loss": 1.0701, "step": 457 }, { "epoch": 0.68, "learning_rate": 0.00018695415251594123, "loss": 1.0831, "step": 458 }, { "epoch": 0.68, "learning_rate": 0.00018689585233541003, "loss": 1.1165, "step": 459 }, { "epoch": 0.69, "learning_rate": 0.00018683743130942928, "loss": 1.0884, "step": 460 }, { "epoch": 0.69, "learning_rate": 0.00018677888951924474, "loss": 0.9882, "step": 461 }, { "epoch": 0.69, "learning_rate": 0.00018672022704627002, "loss": 1.086, "step": 462 }, { "epoch": 0.69, "learning_rate": 0.00018666144397208668, "loss": 1.0545, "step": 463 }, { "epoch": 0.69, "learning_rate": 0.00018660254037844388, "loss": 1.0274, "step": 464 }, { "epoch": 0.69, "learning_rate": 0.0001865435163472584, "loss": 1.0795, "step": 465 }, { "epoch": 0.69, "learning_rate": 0.00018648437196061462, "loss": 1.022, "step": 466 }, { "epoch": 0.7, "learning_rate": 0.0001864251073007642, "loss": 1.0717, "step": 467 }, { "epoch": 0.7, "learning_rate": 0.00018636572245012606, "loss": 1.1501, "step": 468 }, { "epoch": 0.7, "learning_rate": 0.0001863062174912863, "loss": 1.1034, "step": 469 }, { "epoch": 0.7, "learning_rate": 0.00018624659250699805, "loss": 1.0784, "step": 470 }, { "epoch": 0.7, "learning_rate": 0.00018618684758018136, "loss": 1.1274, "step": 471 }, { "epoch": 0.7, "learning_rate": 0.0001861269827939231, "loss": 1.0643, "step": 472 }, { "epoch": 0.7, "learning_rate": 0.00018606699823147676, "loss": 1.1394, "step": 473 }, { "epoch": 0.71, "learning_rate": 0.00018600689397626246, "loss": 0.9665, "step": 474 }, { "epoch": 0.71, "learning_rate": 0.00018594667011186678, "loss": 1.058, "step": 475 }, { "epoch": 0.71, "learning_rate": 0.00018588632672204264, "loss": 1.0706, "step": 476 }, { "epoch": 0.71, "learning_rate": 0.0001858258638907091, "loss": 1.0414, "step": 477 }, { "epoch": 0.71, "learning_rate": 0.00018576528170195146, "loss": 1.1, "step": 478 }, { "epoch": 0.71, "learning_rate": 0.00018570458024002093, "loss": 1.1114, "step": 479 }, { "epoch": 0.72, "learning_rate": 0.00018564375958933459, "loss": 1.0596, "step": 480 }, { "epoch": 0.72, "learning_rate": 0.0001855828198344753, "loss": 1.0897, "step": 481 }, { "epoch": 0.72, "learning_rate": 0.00018552176106019155, "loss": 1.0316, "step": 482 }, { "epoch": 0.72, "learning_rate": 0.00018546058335139733, "loss": 1.0516, "step": 483 }, { "epoch": 0.72, "learning_rate": 0.0001853992867931721, "loss": 1.0477, "step": 484 }, { "epoch": 0.72, "learning_rate": 0.00018533787147076048, "loss": 1.0432, "step": 485 }, { "epoch": 0.72, "learning_rate": 0.00018527633746957234, "loss": 1.0568, "step": 486 }, { "epoch": 0.73, "learning_rate": 0.00018521468487518264, "loss": 1.114, "step": 487 }, { "epoch": 0.73, "learning_rate": 0.00018515291377333112, "loss": 1.0664, "step": 488 }, { "epoch": 0.73, "learning_rate": 0.0001850910242499225, "loss": 1.0162, "step": 489 }, { "epoch": 0.73, "learning_rate": 0.0001850290163910261, "loss": 1.0829, "step": 490 }, { "epoch": 0.73, "learning_rate": 0.00018496689028287572, "loss": 1.1078, "step": 491 }, { "epoch": 0.73, "learning_rate": 0.0001849046460118698, "loss": 1.0533, "step": 492 }, { "epoch": 0.73, "learning_rate": 0.00018484228366457095, "loss": 1.0923, "step": 493 }, { "epoch": 0.74, "learning_rate": 0.00018477980332770607, "loss": 1.0516, "step": 494 }, { "epoch": 0.74, "learning_rate": 0.00018471720508816614, "loss": 0.9826, "step": 495 }, { "epoch": 0.74, "learning_rate": 0.00018465448903300606, "loss": 1.1581, "step": 496 }, { "epoch": 0.74, "learning_rate": 0.0001845916552494446, "loss": 1.1268, "step": 497 }, { "epoch": 0.74, "learning_rate": 0.00018452870382486432, "loss": 1.0483, "step": 498 }, { "epoch": 0.74, "learning_rate": 0.00018446563484681127, "loss": 1.1792, "step": 499 }, { "epoch": 0.75, "learning_rate": 0.00018440244840299506, "loss": 1.0918, "step": 500 }, { "epoch": 0.75, "learning_rate": 0.0001843391445812886, "loss": 0.9691, "step": 501 }, { "epoch": 0.75, "learning_rate": 0.00018427572346972805, "loss": 1.0581, "step": 502 }, { "epoch": 0.75, "learning_rate": 0.0001842121851565128, "loss": 1.0072, "step": 503 }, { "epoch": 0.75, "learning_rate": 0.00018414852973000503, "loss": 0.9686, "step": 504 }, { "epoch": 0.75, "eval_loss": 1.0276715755462646, "eval_runtime": 2.6054, "eval_samples_per_second": 419.124, "eval_steps_per_second": 26.483, "step": 504 }, { "epoch": 0.75, "learning_rate": 0.00018408475727872995, "loss": 1.1221, "step": 505 }, { "epoch": 0.75, "learning_rate": 0.00018402086789137546, "loss": 1.087, "step": 506 }, { "epoch": 0.76, "learning_rate": 0.00018395686165679202, "loss": 1.0599, "step": 507 }, { "epoch": 0.76, "learning_rate": 0.00018389273866399275, "loss": 1.1844, "step": 508 }, { "epoch": 0.76, "learning_rate": 0.00018382849900215294, "loss": 1.046, "step": 509 }, { "epoch": 0.76, "learning_rate": 0.00018376414276061032, "loss": 0.9691, "step": 510 }, { "epoch": 0.76, "learning_rate": 0.00018369967002886464, "loss": 1.0996, "step": 511 }, { "epoch": 0.76, "learning_rate": 0.0001836350808965776, "loss": 1.083, "step": 512 }, { "epoch": 0.76, "learning_rate": 0.00018357037545357297, "loss": 1.0371, "step": 513 }, { "epoch": 0.77, "learning_rate": 0.00018350555378983608, "loss": 1.018, "step": 514 }, { "epoch": 0.77, "learning_rate": 0.00018344061599551398, "loss": 1.095, "step": 515 }, { "epoch": 0.77, "learning_rate": 0.00018337556216091517, "loss": 1.0871, "step": 516 }, { "epoch": 0.77, "learning_rate": 0.0001833103923765096, "loss": 1.0774, "step": 517 }, { "epoch": 0.77, "learning_rate": 0.00018324510673292842, "loss": 1.0337, "step": 518 }, { "epoch": 0.77, "learning_rate": 0.0001831797053209639, "loss": 1.0059, "step": 519 }, { "epoch": 0.77, "learning_rate": 0.00018311418823156936, "loss": 1.0744, "step": 520 }, { "epoch": 0.78, "learning_rate": 0.00018304855555585894, "loss": 0.9732, "step": 521 }, { "epoch": 0.78, "learning_rate": 0.00018298280738510752, "loss": 1.1176, "step": 522 }, { "epoch": 0.78, "learning_rate": 0.00018291694381075056, "loss": 1.1485, "step": 523 }, { "epoch": 0.78, "learning_rate": 0.00018285096492438424, "loss": 1.1044, "step": 524 }, { "epoch": 0.78, "learning_rate": 0.00018278487081776476, "loss": 0.9812, "step": 525 }, { "epoch": 0.78, "learning_rate": 0.00018271866158280884, "loss": 1.0966, "step": 526 }, { "epoch": 0.79, "learning_rate": 0.0001826523373115931, "loss": 1.2406, "step": 527 }, { "epoch": 0.79, "learning_rate": 0.0001825858980963543, "loss": 1.0727, "step": 528 }, { "epoch": 0.79, "learning_rate": 0.000182519344029489, "loss": 0.9966, "step": 529 }, { "epoch": 0.79, "learning_rate": 0.00018245267520355346, "loss": 1.081, "step": 530 }, { "epoch": 0.79, "learning_rate": 0.00018238589171126353, "loss": 1.1104, "step": 531 }, { "epoch": 0.79, "learning_rate": 0.00018231899364549455, "loss": 1.0535, "step": 532 }, { "epoch": 0.79, "learning_rate": 0.00018225198109928114, "loss": 1.0801, "step": 533 }, { "epoch": 0.8, "learning_rate": 0.00018218485416581726, "loss": 1.0726, "step": 534 }, { "epoch": 0.8, "learning_rate": 0.00018211761293845585, "loss": 1.0923, "step": 535 }, { "epoch": 0.8, "learning_rate": 0.00018205025751070875, "loss": 1.0551, "step": 536 }, { "epoch": 0.8, "learning_rate": 0.00018198278797624675, "loss": 1.0495, "step": 537 }, { "epoch": 0.8, "learning_rate": 0.0001819152044288992, "loss": 1.0589, "step": 538 }, { "epoch": 0.8, "learning_rate": 0.00018184750696265408, "loss": 1.0487, "step": 539 }, { "epoch": 0.8, "learning_rate": 0.0001817796956716578, "loss": 1.0491, "step": 540 }, { "epoch": 0.81, "learning_rate": 0.000181711770650215, "loss": 1.0981, "step": 541 }, { "epoch": 0.81, "learning_rate": 0.00018164373199278856, "loss": 1.1706, "step": 542 }, { "epoch": 0.81, "learning_rate": 0.0001815755797939994, "loss": 1.1024, "step": 543 }, { "epoch": 0.81, "learning_rate": 0.00018150731414862622, "loss": 1.0488, "step": 544 }, { "epoch": 0.81, "learning_rate": 0.00018143893515160564, "loss": 1.165, "step": 545 }, { "epoch": 0.81, "learning_rate": 0.00018137044289803181, "loss": 1.0346, "step": 546 }, { "epoch": 0.82, "learning_rate": 0.00018130183748315645, "loss": 1.1179, "step": 547 }, { "epoch": 0.82, "learning_rate": 0.0001812331190023886, "loss": 1.0027, "step": 548 }, { "epoch": 0.82, "learning_rate": 0.00018116428755129459, "loss": 1.1106, "step": 549 }, { "epoch": 0.82, "learning_rate": 0.00018109534322559783, "loss": 1.0479, "step": 550 }, { "epoch": 0.82, "learning_rate": 0.00018102628612117865, "loss": 1.0046, "step": 551 }, { "epoch": 0.82, "learning_rate": 0.0001809571163340744, "loss": 0.9883, "step": 552 }, { "epoch": 0.82, "learning_rate": 0.00018088783396047893, "loss": 1.1018, "step": 553 }, { "epoch": 0.83, "learning_rate": 0.00018081843909674276, "loss": 1.1389, "step": 554 }, { "epoch": 0.83, "learning_rate": 0.00018074893183937283, "loss": 1.0751, "step": 555 }, { "epoch": 0.83, "learning_rate": 0.00018067931228503246, "loss": 1.1475, "step": 556 }, { "epoch": 0.83, "learning_rate": 0.00018060958053054096, "loss": 1.0829, "step": 557 }, { "epoch": 0.83, "learning_rate": 0.00018053973667287387, "loss": 1.0272, "step": 558 }, { "epoch": 0.83, "learning_rate": 0.00018046978080916252, "loss": 1.0668, "step": 559 }, { "epoch": 0.83, "learning_rate": 0.00018039971303669407, "loss": 1.0988, "step": 560 }, { "epoch": 0.84, "learning_rate": 0.00018032953345291123, "loss": 1.0339, "step": 561 }, { "epoch": 0.84, "learning_rate": 0.0001802592421554123, "loss": 1.0654, "step": 562 }, { "epoch": 0.84, "learning_rate": 0.00018018883924195085, "loss": 1.0157, "step": 563 }, { "epoch": 0.84, "learning_rate": 0.00018011832481043576, "loss": 1.0738, "step": 564 }, { "epoch": 0.84, "learning_rate": 0.0001800476989589309, "loss": 1.0742, "step": 565 }, { "epoch": 0.84, "learning_rate": 0.0001799769617856552, "loss": 0.9861, "step": 566 }, { "epoch": 0.85, "learning_rate": 0.0001799061133889823, "loss": 1.0788, "step": 567 }, { "epoch": 0.85, "learning_rate": 0.00017983515386744061, "loss": 1.0539, "step": 568 }, { "epoch": 0.85, "learning_rate": 0.00017976408331971298, "loss": 1.0875, "step": 569 }, { "epoch": 0.85, "learning_rate": 0.0001796929018446368, "loss": 1.0765, "step": 570 }, { "epoch": 0.85, "learning_rate": 0.00017962160954120354, "loss": 1.1336, "step": 571 }, { "epoch": 0.85, "learning_rate": 0.000179550206508559, "loss": 0.9674, "step": 572 }, { "epoch": 0.85, "learning_rate": 0.00017947869284600282, "loss": 1.0607, "step": 573 }, { "epoch": 0.86, "learning_rate": 0.0001794070686529886, "loss": 0.9959, "step": 574 }, { "epoch": 0.86, "learning_rate": 0.00017933533402912354, "loss": 1.038, "step": 575 }, { "epoch": 0.86, "learning_rate": 0.0001792634890741685, "loss": 1.1342, "step": 576 }, { "epoch": 0.86, "learning_rate": 0.00017919153388803774, "loss": 1.0941, "step": 577 }, { "epoch": 0.86, "learning_rate": 0.00017911946857079888, "loss": 1.1286, "step": 578 }, { "epoch": 0.86, "learning_rate": 0.00017904729322267256, "loss": 1.0354, "step": 579 }, { "epoch": 0.86, "learning_rate": 0.0001789750079440326, "loss": 1.1314, "step": 580 }, { "epoch": 0.87, "learning_rate": 0.00017890261283540562, "loss": 1.0365, "step": 581 }, { "epoch": 0.87, "learning_rate": 0.00017883010799747099, "loss": 1.091, "step": 582 }, { "epoch": 0.87, "learning_rate": 0.00017875749353106062, "loss": 0.9995, "step": 583 }, { "epoch": 0.87, "learning_rate": 0.000178684769537159, "loss": 1.0435, "step": 584 }, { "epoch": 0.87, "learning_rate": 0.00017861193611690287, "loss": 1.0555, "step": 585 }, { "epoch": 0.87, "learning_rate": 0.00017853899337158112, "loss": 1.0637, "step": 586 }, { "epoch": 0.87, "learning_rate": 0.00017846594140263474, "loss": 1.064, "step": 587 }, { "epoch": 0.88, "learning_rate": 0.00017839278031165658, "loss": 0.9879, "step": 588 }, { "epoch": 0.88, "learning_rate": 0.00017831951020039126, "loss": 1.0846, "step": 589 }, { "epoch": 0.88, "learning_rate": 0.000178246131170735, "loss": 1.0373, "step": 590 }, { "epoch": 0.88, "learning_rate": 0.00017817264332473546, "loss": 1.0377, "step": 591 }, { "epoch": 0.88, "learning_rate": 0.00017809904676459177, "loss": 1.0932, "step": 592 }, { "epoch": 0.88, "learning_rate": 0.00017802534159265404, "loss": 1.085, "step": 593 }, { "epoch": 0.89, "learning_rate": 0.0001779515279114236, "loss": 1.0975, "step": 594 }, { "epoch": 0.89, "learning_rate": 0.0001778776058235526, "loss": 1.1283, "step": 595 }, { "epoch": 0.89, "learning_rate": 0.00017780357543184397, "loss": 1.0652, "step": 596 }, { "epoch": 0.89, "learning_rate": 0.00017772943683925122, "loss": 1.0336, "step": 597 }, { "epoch": 0.89, "learning_rate": 0.00017765519014887842, "loss": 0.9761, "step": 598 }, { "epoch": 0.89, "learning_rate": 0.0001775808354639799, "loss": 1.0688, "step": 599 }, { "epoch": 0.89, "learning_rate": 0.00017750637288796016, "loss": 1.1031, "step": 600 }, { "epoch": 0.9, "learning_rate": 0.00017743180252437383, "loss": 1.083, "step": 601 }, { "epoch": 0.9, "learning_rate": 0.00017735712447692538, "loss": 1.1612, "step": 602 }, { "epoch": 0.9, "learning_rate": 0.00017728233884946903, "loss": 1.1618, "step": 603 }, { "epoch": 0.9, "learning_rate": 0.00017720744574600863, "loss": 1.144, "step": 604 }, { "epoch": 0.9, "learning_rate": 0.0001771324452706975, "loss": 1.1174, "step": 605 }, { "epoch": 0.9, "learning_rate": 0.00017705733752783825, "loss": 0.9728, "step": 606 }, { "epoch": 0.9, "learning_rate": 0.0001769821226218827, "loss": 1.0599, "step": 607 }, { "epoch": 0.91, "learning_rate": 0.0001769068006574317, "loss": 1.0639, "step": 608 }, { "epoch": 0.91, "learning_rate": 0.00017683137173923495, "loss": 1.1278, "step": 609 }, { "epoch": 0.91, "learning_rate": 0.00017675583597219095, "loss": 0.9925, "step": 610 }, { "epoch": 0.91, "learning_rate": 0.0001766801934613467, "loss": 1.0457, "step": 611 }, { "epoch": 0.91, "learning_rate": 0.0001766044443118978, "loss": 1.0348, "step": 612 }, { "epoch": 0.91, "learning_rate": 0.000176528588629188, "loss": 1.022, "step": 613 }, { "epoch": 0.92, "learning_rate": 0.00017645262651870926, "loss": 1.0027, "step": 614 }, { "epoch": 0.92, "learning_rate": 0.00017637655808610156, "loss": 1.0491, "step": 615 }, { "epoch": 0.92, "learning_rate": 0.00017630038343715275, "loss": 1.0413, "step": 616 }, { "epoch": 0.92, "learning_rate": 0.00017622410267779834, "loss": 1.0358, "step": 617 }, { "epoch": 0.92, "learning_rate": 0.00017614771591412148, "loss": 1.1125, "step": 618 }, { "epoch": 0.92, "learning_rate": 0.00017607122325235267, "loss": 1.1185, "step": 619 }, { "epoch": 0.92, "learning_rate": 0.00017599462479886974, "loss": 1.0738, "step": 620 }, { "epoch": 0.93, "learning_rate": 0.00017591792066019765, "loss": 1.102, "step": 621 }, { "epoch": 0.93, "learning_rate": 0.00017584111094300827, "loss": 1.065, "step": 622 }, { "epoch": 0.93, "learning_rate": 0.0001757641957541203, "loss": 1.0514, "step": 623 }, { "epoch": 0.93, "learning_rate": 0.0001756871752004992, "loss": 1.0396, "step": 624 }, { "epoch": 0.93, "learning_rate": 0.00017561004938925688, "loss": 1.1027, "step": 625 }, { "epoch": 0.93, "learning_rate": 0.00017553281842765169, "loss": 1.0223, "step": 626 }, { "epoch": 0.93, "learning_rate": 0.00017545548242308816, "loss": 1.1793, "step": 627 }, { "epoch": 0.94, "learning_rate": 0.00017537804148311695, "loss": 1.0642, "step": 628 }, { "epoch": 0.94, "learning_rate": 0.00017530049571543464, "loss": 1.0682, "step": 629 }, { "epoch": 0.94, "learning_rate": 0.00017522284522788353, "loss": 1.0476, "step": 630 }, { "epoch": 0.94, "learning_rate": 0.00017514509012845164, "loss": 1.1064, "step": 631 }, { "epoch": 0.94, "learning_rate": 0.00017506723052527242, "loss": 1.0258, "step": 632 }, { "epoch": 0.94, "learning_rate": 0.00017498926652662476, "loss": 1.1954, "step": 633 }, { "epoch": 0.94, "learning_rate": 0.0001749111982409325, "loss": 1.0637, "step": 634 }, { "epoch": 0.95, "learning_rate": 0.00017483302577676475, "loss": 0.9685, "step": 635 }, { "epoch": 0.95, "learning_rate": 0.00017475474924283536, "loss": 1.0465, "step": 636 }, { "epoch": 0.95, "learning_rate": 0.000174676368748003, "loss": 1.0161, "step": 637 }, { "epoch": 0.95, "learning_rate": 0.00017459788440127083, "loss": 1.0479, "step": 638 }, { "epoch": 0.95, "learning_rate": 0.00017451929631178648, "loss": 1.1166, "step": 639 }, { "epoch": 0.95, "learning_rate": 0.0001744406045888419, "loss": 1.0634, "step": 640 }, { "epoch": 0.96, "learning_rate": 0.00017436180934187308, "loss": 1.0826, "step": 641 }, { "epoch": 0.96, "learning_rate": 0.00017428291068046, "loss": 1.07, "step": 642 }, { "epoch": 0.96, "learning_rate": 0.00017420390871432647, "loss": 1.1167, "step": 643 }, { "epoch": 0.96, "learning_rate": 0.00017412480355334005, "loss": 1.0347, "step": 644 }, { "epoch": 0.96, "learning_rate": 0.00017404559530751162, "loss": 1.0393, "step": 645 }, { "epoch": 0.96, "learning_rate": 0.00017396628408699555, "loss": 1.1108, "step": 646 }, { "epoch": 0.96, "learning_rate": 0.00017388687000208946, "loss": 1.006, "step": 647 }, { "epoch": 0.97, "learning_rate": 0.0001738073531632339, "loss": 1.0932, "step": 648 }, { "epoch": 0.97, "learning_rate": 0.0001737277336810124, "loss": 1.0123, "step": 649 }, { "epoch": 0.97, "learning_rate": 0.00017364801166615124, "loss": 1.1273, "step": 650 }, { "epoch": 0.97, "learning_rate": 0.0001735681872295192, "loss": 0.9893, "step": 651 }, { "epoch": 0.97, "learning_rate": 0.0001734882604821276, "loss": 1.0699, "step": 652 }, { "epoch": 0.97, "learning_rate": 0.00017340823153513002, "loss": 1.0901, "step": 653 }, { "epoch": 0.97, "learning_rate": 0.00017332810049982208, "loss": 1.0212, "step": 654 }, { "epoch": 0.98, "learning_rate": 0.00017324786748764155, "loss": 0.9898, "step": 655 }, { "epoch": 0.98, "learning_rate": 0.00017316753261016783, "loss": 1.0899, "step": 656 }, { "epoch": 0.98, "learning_rate": 0.00017308709597912213, "loss": 1.085, "step": 657 }, { "epoch": 0.98, "learning_rate": 0.00017300655770636708, "loss": 1.091, "step": 658 }, { "epoch": 0.98, "learning_rate": 0.00017292591790390665, "loss": 1.0502, "step": 659 }, { "epoch": 0.98, "learning_rate": 0.0001728451766838861, "loss": 1.2131, "step": 660 }, { "epoch": 0.99, "learning_rate": 0.00017276433415859167, "loss": 1.1256, "step": 661 }, { "epoch": 0.99, "learning_rate": 0.00017268339044045042, "loss": 1.0577, "step": 662 }, { "epoch": 0.99, "learning_rate": 0.00017260234564203032, "loss": 1.0012, "step": 663 }, { "epoch": 0.99, "learning_rate": 0.00017252119987603973, "loss": 1.0611, "step": 664 }, { "epoch": 0.99, "learning_rate": 0.00017243995325532755, "loss": 1.1251, "step": 665 }, { "epoch": 0.99, "learning_rate": 0.00017235860589288277, "loss": 1.0959, "step": 666 }, { "epoch": 0.99, "learning_rate": 0.0001722771579018347, "loss": 1.1413, "step": 667 }, { "epoch": 1.0, "learning_rate": 0.00017219560939545246, "loss": 1.0728, "step": 668 }, { "epoch": 1.0, "learning_rate": 0.00017211396048714498, "loss": 1.0461, "step": 669 }, { "epoch": 1.0, "learning_rate": 0.0001720322112904608, "loss": 1.1084, "step": 670 }, { "epoch": 1.0, "learning_rate": 0.00017195036191908797, "loss": 1.1316, "step": 671 } ], "logging_steps": 1, "max_steps": 2684, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 671, "total_flos": 2.085255620067328e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }