{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8870490833826138, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.4166666666666664e-05, "loss": 2.8488, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.00010833333333333333, "loss": 2.7815, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.0001625, "loss": 2.8002, "step": 3 }, { "epoch": 0.01, "learning_rate": 0.00021666666666666666, "loss": 2.7473, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0002708333333333333, "loss": 2.4233, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.000325, "loss": 1.9676, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.00037916666666666665, "loss": 1.7562, "step": 7 }, { "epoch": 0.01, "learning_rate": 0.0004333333333333333, "loss": 1.3949, "step": 8 }, { "epoch": 0.02, "learning_rate": 0.0004875, "loss": 1.2908, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.0005416666666666666, "loss": 1.2542, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.0005958333333333333, "loss": 1.2959, "step": 11 }, { "epoch": 0.02, "learning_rate": 0.00065, "loss": 1.1706, "step": 12 }, { "epoch": 0.02, "learning_rate": 0.0006499947173877214, "loss": 1.0829, "step": 13 }, { "epoch": 0.02, "learning_rate": 0.0006499788697226147, "loss": 1.128, "step": 14 }, { "epoch": 0.03, "learning_rate": 0.0006499524575198621, "loss": 1.0847, "step": 15 }, { "epoch": 0.03, "learning_rate": 0.0006499154816380815, "loss": 1.1143, "step": 16 }, { "epoch": 0.03, "learning_rate": 0.0006498679432792988, "loss": 1.0751, "step": 17 }, { "epoch": 0.03, "learning_rate": 0.0006498098439889095, "loss": 1.179, "step": 18 }, { "epoch": 0.03, "learning_rate": 0.0006497411856556275, "loss": 1.0327, "step": 19 }, { "epoch": 0.04, "learning_rate": 0.0006496619705114241, "loss": 1.0672, "step": 20 }, { "epoch": 0.04, "learning_rate": 0.0006495722011314557, "loss": 1.1625, "step": 21 }, { "epoch": 0.04, "learning_rate": 0.0006494718804339797, "loss": 1.0751, "step": 22 }, { "epoch": 0.04, "learning_rate": 0.0006493610116802598, "loss": 0.996, "step": 23 }, { "epoch": 0.04, "learning_rate": 0.0006492395984744599, "loss": 1.0478, "step": 24 }, { "epoch": 0.04, "learning_rate": 0.0006491076447635269, "loss": 1.064, "step": 25 }, { "epoch": 0.05, "learning_rate": 0.0006489651548370628, "loss": 0.9393, "step": 26 }, { "epoch": 0.05, "learning_rate": 0.0006488121333271846, "loss": 0.9282, "step": 27 }, { "epoch": 0.05, "learning_rate": 0.0006486485852083744, "loss": 1.0558, "step": 28 }, { "epoch": 0.05, "learning_rate": 0.0006484745157973169, "loss": 1.0015, "step": 29 }, { "epoch": 0.05, "learning_rate": 0.0006482899307527272, "loss": 1.0261, "step": 30 }, { "epoch": 0.05, "learning_rate": 0.0006480948360751669, "loss": 1.0507, "step": 31 }, { "epoch": 0.06, "learning_rate": 0.0006478892381068483, "loss": 1.0225, "step": 32 }, { "epoch": 0.06, "learning_rate": 0.0006476731435314292, "loss": 0.9411, "step": 33 }, { "epoch": 0.06, "learning_rate": 0.0006474465593737948, "loss": 0.9884, "step": 34 }, { "epoch": 0.06, "learning_rate": 0.0006472094929998295, "loss": 0.9892, "step": 35 }, { "epoch": 0.06, "learning_rate": 0.0006469619521161782, "loss": 1.0527, "step": 36 }, { "epoch": 0.07, "learning_rate": 0.0006467039447699945, "loss": 0.969, "step": 37 }, { "epoch": 0.07, "learning_rate": 0.0006464354793486803, "loss": 1.0009, "step": 38 }, { "epoch": 0.07, "learning_rate": 0.0006461565645796124, "loss": 1.0068, "step": 39 }, { "epoch": 0.07, "learning_rate": 0.0006458672095298589, "loss": 0.9626, "step": 40 }, { "epoch": 0.07, "learning_rate": 0.0006455674236058847, "loss": 0.934, "step": 41 }, { "epoch": 0.07, "learning_rate": 0.0006452572165532456, "loss": 1.0217, "step": 42 }, { "epoch": 0.08, "learning_rate": 0.0006449365984562712, "loss": 1.0036, "step": 43 }, { "epoch": 0.08, "learning_rate": 0.0006446055797377376, "loss": 0.9234, "step": 44 }, { "epoch": 0.08, "learning_rate": 0.000644264171158528, "loss": 0.9771, "step": 45 }, { "epoch": 0.08, "learning_rate": 0.0006439123838172836, "loss": 1.013, "step": 46 }, { "epoch": 0.08, "learning_rate": 0.0006435502291500418, "loss": 0.9154, "step": 47 }, { "epoch": 0.09, "learning_rate": 0.0006431777189298656, "loss": 0.9098, "step": 48 }, { "epoch": 0.09, "learning_rate": 0.0006427948652664599, "loss": 0.9243, "step": 49 }, { "epoch": 0.09, "learning_rate": 0.0006424016806057781, "loss": 0.9162, "step": 50 }, { "epoch": 0.09, "learning_rate": 0.0006419981777296182, "loss": 0.9538, "step": 51 }, { "epoch": 0.09, "learning_rate": 0.0006415843697552062, "loss": 0.9454, "step": 52 }, { "epoch": 0.09, "learning_rate": 0.0006411602701347703, "loss": 0.9296, "step": 53 }, { "epoch": 0.1, "learning_rate": 0.0006407258926551036, "loss": 0.929, "step": 54 }, { "epoch": 0.1, "learning_rate": 0.0006402812514371154, "loss": 0.9172, "step": 55 }, { "epoch": 0.1, "learning_rate": 0.0006398263609353731, "loss": 0.9871, "step": 56 }, { "epoch": 0.1, "learning_rate": 0.0006393612359376315, "loss": 0.9279, "step": 57 }, { "epoch": 0.1, "learning_rate": 0.0006388858915643519, "loss": 0.9191, "step": 58 }, { "epoch": 0.1, "learning_rate": 0.0006384003432682119, "loss": 0.9828, "step": 59 }, { "epoch": 0.11, "learning_rate": 0.0006379046068336013, "loss": 0.8912, "step": 60 }, { "epoch": 0.11, "learning_rate": 0.00063739869837611, "loss": 0.9023, "step": 61 }, { "epoch": 0.11, "learning_rate": 0.0006368826343420043, "loss": 0.9978, "step": 62 }, { "epoch": 0.11, "learning_rate": 0.0006363564315076915, "loss": 0.9097, "step": 63 }, { "epoch": 0.11, "learning_rate": 0.0006358201069791749, "loss": 0.8475, "step": 64 }, { "epoch": 0.12, "learning_rate": 0.000635273678191498, "loss": 0.9763, "step": 65 }, { "epoch": 0.12, "learning_rate": 0.000634717162908177, "loss": 0.8673, "step": 66 }, { "epoch": 0.12, "learning_rate": 0.0006341505792206243, "loss": 0.9188, "step": 67 }, { "epoch": 0.12, "learning_rate": 0.0006335739455475594, "loss": 0.865, "step": 68 }, { "epoch": 0.12, "learning_rate": 0.0006329872806344108, "loss": 0.9187, "step": 69 }, { "epoch": 0.12, "learning_rate": 0.0006323906035527062, "loss": 0.887, "step": 70 }, { "epoch": 0.13, "learning_rate": 0.0006317839336994531, "loss": 0.908, "step": 71 }, { "epoch": 0.13, "learning_rate": 0.0006311672907965074, "loss": 0.918, "step": 72 }, { "epoch": 0.13, "learning_rate": 0.0006305406948899329, "loss": 0.9399, "step": 73 }, { "epoch": 0.13, "learning_rate": 0.0006299041663493497, "loss": 0.9741, "step": 74 }, { "epoch": 0.13, "learning_rate": 0.0006292577258672713, "loss": 0.8738, "step": 75 }, { "epoch": 0.13, "learning_rate": 0.0006286013944584328, "loss": 0.9192, "step": 76 }, { "epoch": 0.14, "learning_rate": 0.0006279351934591071, "loss": 0.8589, "step": 77 }, { "epoch": 0.14, "learning_rate": 0.0006272591445264116, "loss": 0.955, "step": 78 }, { "epoch": 0.14, "learning_rate": 0.0006265732696376042, "loss": 0.928, "step": 79 }, { "epoch": 0.14, "learning_rate": 0.0006258775910893685, "loss": 0.8454, "step": 80 }, { "epoch": 0.14, "learning_rate": 0.0006251721314970894, "loss": 0.8709, "step": 81 }, { "epoch": 0.15, "learning_rate": 0.0006244569137941179, "loss": 0.8732, "step": 82 }, { "epoch": 0.15, "learning_rate": 0.0006237319612310249, "loss": 0.9345, "step": 83 }, { "epoch": 0.15, "learning_rate": 0.0006229972973748463, "loss": 0.9342, "step": 84 }, { "epoch": 0.15, "learning_rate": 0.0006222529461083165, "loss": 0.8803, "step": 85 }, { "epoch": 0.15, "learning_rate": 0.0006214989316290914, "loss": 0.8676, "step": 86 }, { "epoch": 0.15, "learning_rate": 0.0006207352784489629, "loss": 0.9195, "step": 87 }, { "epoch": 0.16, "learning_rate": 0.000619962011393061, "loss": 0.9505, "step": 88 }, { "epoch": 0.16, "learning_rate": 0.0006191791555990477, "loss": 0.8778, "step": 89 }, { "epoch": 0.16, "learning_rate": 0.0006183867365162994, "loss": 0.9663, "step": 90 }, { "epoch": 0.16, "learning_rate": 0.0006175847799050789, "loss": 0.9304, "step": 91 }, { "epoch": 0.16, "learning_rate": 0.0006167733118356993, "loss": 0.9233, "step": 92 }, { "epoch": 0.16, "learning_rate": 0.0006159523586876756, "loss": 0.9167, "step": 93 }, { "epoch": 0.17, "learning_rate": 0.0006151219471488673, "loss": 0.882, "step": 94 }, { "epoch": 0.17, "learning_rate": 0.0006142821042146112, "loss": 0.8295, "step": 95 }, { "epoch": 0.17, "learning_rate": 0.0006134328571868428, "loss": 0.7799, "step": 96 }, { "epoch": 0.17, "learning_rate": 0.0006125742336732103, "loss": 0.9368, "step": 97 }, { "epoch": 0.17, "learning_rate": 0.000611706261586176, "loss": 0.8542, "step": 98 }, { "epoch": 0.18, "learning_rate": 0.0006108289691421089, "loss": 0.9263, "step": 99 }, { "epoch": 0.18, "learning_rate": 0.0006099423848603682, "loss": 0.8572, "step": 100 }, { "epoch": 0.18, "learning_rate": 0.0006090465375623755, "loss": 0.905, "step": 101 }, { "epoch": 0.18, "learning_rate": 0.0006081414563706781, "loss": 0.8621, "step": 102 }, { "epoch": 0.18, "learning_rate": 0.0006072271707080021, "loss": 0.8745, "step": 103 }, { "epoch": 0.18, "learning_rate": 0.0006063037102962963, "loss": 0.928, "step": 104 }, { "epoch": 0.19, "learning_rate": 0.0006053711051557658, "loss": 0.908, "step": 105 }, { "epoch": 0.19, "learning_rate": 0.0006044293856038958, "loss": 0.8919, "step": 106 }, { "epoch": 0.19, "learning_rate": 0.0006034785822544665, "loss": 0.8665, "step": 107 }, { "epoch": 0.19, "learning_rate": 0.0006025187260165575, "loss": 0.8645, "step": 108 }, { "epoch": 0.19, "learning_rate": 0.0006015498480935434, "loss": 0.895, "step": 109 }, { "epoch": 0.2, "learning_rate": 0.0006005719799820788, "loss": 0.892, "step": 110 }, { "epoch": 0.2, "learning_rate": 0.0005995851534710752, "loss": 0.8843, "step": 111 }, { "epoch": 0.2, "learning_rate": 0.0005985894006406671, "loss": 0.8114, "step": 112 }, { "epoch": 0.2, "learning_rate": 0.0005975847538611689, "loss": 0.9086, "step": 113 }, { "epoch": 0.2, "learning_rate": 0.0005965712457920233, "loss": 0.8644, "step": 114 }, { "epoch": 0.2, "learning_rate": 0.000595548909380739, "loss": 0.8638, "step": 115 }, { "epoch": 0.21, "learning_rate": 0.00059451777786182, "loss": 0.8856, "step": 116 }, { "epoch": 0.21, "learning_rate": 0.0005934778847556848, "loss": 0.8749, "step": 117 }, { "epoch": 0.21, "learning_rate": 0.0005924292638675769, "loss": 0.8864, "step": 118 }, { "epoch": 0.21, "learning_rate": 0.0005913719492864662, "loss": 0.8317, "step": 119 }, { "epoch": 0.21, "learning_rate": 0.0005903059753839402, "loss": 0.8356, "step": 120 }, { "epoch": 0.21, "learning_rate": 0.0005892313768130872, "loss": 0.784, "step": 121 }, { "epoch": 0.22, "learning_rate": 0.0005881481885073694, "loss": 0.8377, "step": 122 }, { "epoch": 0.22, "learning_rate": 0.0005870564456794872, "loss": 0.7854, "step": 123 }, { "epoch": 0.22, "learning_rate": 0.0005859561838202349, "loss": 0.9538, "step": 124 }, { "epoch": 0.22, "learning_rate": 0.0005848474386973468, "loss": 0.8268, "step": 125 }, { "epoch": 0.22, "learning_rate": 0.0005837302463543341, "loss": 0.9009, "step": 126 }, { "epoch": 0.23, "learning_rate": 0.000582604643109314, "loss": 0.8684, "step": 127 }, { "epoch": 0.23, "learning_rate": 0.0005814706655538279, "loss": 0.7749, "step": 128 }, { "epoch": 0.23, "learning_rate": 0.0005803283505516529, "loss": 0.8931, "step": 129 }, { "epoch": 0.23, "learning_rate": 0.0005791777352376026, "loss": 0.8246, "step": 130 }, { "epoch": 0.23, "learning_rate": 0.0005780188570163211, "loss": 0.7862, "step": 131 }, { "epoch": 0.23, "learning_rate": 0.0005768517535610654, "loss": 0.9168, "step": 132 }, { "epoch": 0.24, "learning_rate": 0.0005756764628124819, "loss": 0.8706, "step": 133 }, { "epoch": 0.24, "learning_rate": 0.000574493022977373, "loss": 0.7976, "step": 134 }, { "epoch": 0.24, "learning_rate": 0.000573301472527454, "loss": 0.814, "step": 135 }, { "epoch": 0.24, "learning_rate": 0.000572101850198104, "loss": 0.8991, "step": 136 }, { "epoch": 0.24, "learning_rate": 0.0005708941949871053, "loss": 0.8539, "step": 137 }, { "epoch": 0.24, "learning_rate": 0.0005696785461533761, "loss": 0.9107, "step": 138 }, { "epoch": 0.25, "learning_rate": 0.0005684549432156948, "loss": 0.9165, "step": 139 }, { "epoch": 0.25, "learning_rate": 0.0005672234259514147, "loss": 0.843, "step": 140 }, { "epoch": 0.25, "learning_rate": 0.000565984034395171, "loss": 0.8328, "step": 141 }, { "epoch": 0.25, "learning_rate": 0.0005647368088375792, "loss": 0.884, "step": 142 }, { "epoch": 0.25, "learning_rate": 0.000563481789823926, "loss": 0.9101, "step": 143 }, { "epoch": 0.26, "learning_rate": 0.0005622190181528502, "loss": 0.8508, "step": 144 }, { "epoch": 0.26, "learning_rate": 0.0005609485348750175, "loss": 0.8575, "step": 145 }, { "epoch": 0.26, "learning_rate": 0.0005596703812917851, "loss": 0.8861, "step": 146 }, { "epoch": 0.26, "learning_rate": 0.0005583845989538596, "loss": 0.8163, "step": 147 }, { "epoch": 0.26, "learning_rate": 0.0005570912296599459, "loss": 0.8583, "step": 148 }, { "epoch": 0.26, "learning_rate": 0.0005557903154553888, "loss": 0.8635, "step": 149 }, { "epoch": 0.27, "learning_rate": 0.000554481898630806, "loss": 0.811, "step": 150 }, { "epoch": 0.27, "learning_rate": 0.0005531660217207126, "loss": 0.9116, "step": 151 }, { "epoch": 0.27, "learning_rate": 0.0005518427275021399, "loss": 0.868, "step": 152 }, { "epoch": 0.27, "learning_rate": 0.0005505120589932435, "loss": 0.8868, "step": 153 }, { "epoch": 0.27, "learning_rate": 0.0005491740594519051, "loss": 0.8816, "step": 154 }, { "epoch": 0.27, "learning_rate": 0.0005478287723743267, "loss": 0.8499, "step": 155 }, { "epoch": 0.28, "learning_rate": 0.0005464762414936163, "loss": 0.8502, "step": 156 }, { "epoch": 0.28, "learning_rate": 0.0005451165107783659, "loss": 0.86, "step": 157 }, { "epoch": 0.28, "learning_rate": 0.0005437496244312228, "loss": 0.8669, "step": 158 }, { "epoch": 0.28, "learning_rate": 0.0005423756268874522, "loss": 0.869, "step": 159 }, { "epoch": 0.28, "learning_rate": 0.000540994562813493, "loss": 0.8476, "step": 160 }, { "epoch": 0.29, "learning_rate": 0.0005396064771055053, "loss": 0.7992, "step": 161 }, { "epoch": 0.29, "learning_rate": 0.0005382114148879113, "loss": 0.8569, "step": 162 }, { "epoch": 0.29, "learning_rate": 0.0005368094215119282, "loss": 0.8367, "step": 163 }, { "epoch": 0.29, "learning_rate": 0.000535400542554094, "loss": 0.8455, "step": 164 }, { "epoch": 0.29, "learning_rate": 0.0005339848238147857, "loss": 0.9209, "step": 165 }, { "epoch": 0.29, "learning_rate": 0.0005325623113167311, "loss": 0.8577, "step": 166 }, { "epoch": 0.3, "learning_rate": 0.0005311330513035111, "loss": 0.8331, "step": 167 }, { "epoch": 0.3, "learning_rate": 0.0005296970902380583, "loss": 0.7925, "step": 168 }, { "epoch": 0.3, "learning_rate": 0.0005282544748011454, "loss": 0.8223, "step": 169 }, { "epoch": 0.3, "learning_rate": 0.0005268052518898676, "loss": 0.8555, "step": 170 }, { "epoch": 0.3, "learning_rate": 0.0005253494686161189, "loss": 0.9448, "step": 171 }, { "epoch": 0.31, "learning_rate": 0.00052388717230506, "loss": 0.8364, "step": 172 }, { "epoch": 0.31, "learning_rate": 0.0005224184104935797, "loss": 0.866, "step": 173 }, { "epoch": 0.31, "learning_rate": 0.0005209432309287499, "loss": 0.8312, "step": 174 }, { "epoch": 0.31, "learning_rate": 0.0005194616815662733, "loss": 0.8725, "step": 175 }, { "epoch": 0.31, "learning_rate": 0.0005179738105689243, "loss": 0.8199, "step": 176 }, { "epoch": 0.31, "learning_rate": 0.0005164796663049834, "loss": 0.8068, "step": 177 }, { "epoch": 0.32, "learning_rate": 0.0005149792973466653, "loss": 0.8533, "step": 178 }, { "epoch": 0.32, "learning_rate": 0.0005134727524685388, "loss": 0.9067, "step": 179 }, { "epoch": 0.32, "learning_rate": 0.0005119600806459426, "loss": 0.8105, "step": 180 }, { "epoch": 0.32, "learning_rate": 0.0005104413310533914, "loss": 0.8426, "step": 181 }, { "epoch": 0.32, "learning_rate": 0.0005089165530629796, "loss": 0.8854, "step": 182 }, { "epoch": 0.32, "learning_rate": 0.0005073857962427743, "loss": 0.8151, "step": 183 }, { "epoch": 0.33, "learning_rate": 0.0005058491103552046, "loss": 0.8467, "step": 184 }, { "epoch": 0.33, "learning_rate": 0.0005043065453554449, "loss": 0.8343, "step": 185 }, { "epoch": 0.33, "learning_rate": 0.0005027581513897888, "loss": 0.8139, "step": 186 }, { "epoch": 0.33, "learning_rate": 0.0005012039787940209, "loss": 0.8632, "step": 187 }, { "epoch": 0.33, "learning_rate": 0.0004996440780917798, "loss": 0.8525, "step": 188 }, { "epoch": 0.34, "learning_rate": 0.0004980784999929151, "loss": 0.9075, "step": 189 }, { "epoch": 0.34, "learning_rate": 0.00049650729539184, "loss": 0.7663, "step": 190 }, { "epoch": 0.34, "learning_rate": 0.0004949305153658755, "loss": 0.82, "step": 191 }, { "epoch": 0.34, "learning_rate": 0.0004933482111735912, "loss": 0.9614, "step": 192 }, { "epoch": 0.34, "learning_rate": 0.0004917604342531381, "loss": 0.8063, "step": 193 }, { "epoch": 0.34, "learning_rate": 0.0004901672362205767, "loss": 0.8729, "step": 194 }, { "epoch": 0.35, "learning_rate": 0.0004885686688681996, "loss": 0.8819, "step": 195 }, { "epoch": 0.35, "learning_rate": 0.0004869647841628468, "loss": 0.7797, "step": 196 }, { "epoch": 0.35, "learning_rate": 0.00048535563424421686, "loss": 0.7435, "step": 197 }, { "epoch": 0.35, "learning_rate": 0.0004837412714231722, "loss": 0.7985, "step": 198 }, { "epoch": 0.35, "learning_rate": 0.00048212174818003796, "loss": 0.8532, "step": 199 }, { "epoch": 0.35, "learning_rate": 0.00048049711716289666, "loss": 0.8559, "step": 200 }, { "epoch": 0.36, "learning_rate": 0.0004788674311858757, "loss": 0.818, "step": 201 }, { "epoch": 0.36, "learning_rate": 0.00047723274322743176, "loss": 0.8423, "step": 202 }, { "epoch": 0.36, "learning_rate": 0.00047559310642862737, "loss": 0.8352, "step": 203 }, { "epoch": 0.36, "learning_rate": 0.00047394857409140383, "loss": 0.8056, "step": 204 }, { "epoch": 0.36, "learning_rate": 0.00047229919967684887, "loss": 0.8196, "step": 205 }, { "epoch": 0.37, "learning_rate": 0.0004706450368034578, "loss": 0.8858, "step": 206 }, { "epoch": 0.37, "learning_rate": 0.00046898613924539154, "loss": 0.8701, "step": 207 }, { "epoch": 0.37, "learning_rate": 0.0004673225609307275, "loss": 0.8912, "step": 208 }, { "epoch": 0.37, "learning_rate": 0.00046565435593970737, "loss": 0.8082, "step": 209 }, { "epoch": 0.37, "learning_rate": 0.000463981578502978, "loss": 0.8062, "step": 210 }, { "epoch": 0.37, "learning_rate": 0.0004623042829998296, "loss": 0.8533, "step": 211 }, { "epoch": 0.38, "learning_rate": 0.00046062252395642723, "loss": 0.7499, "step": 212 }, { "epoch": 0.38, "learning_rate": 0.0004589363560440383, "loss": 0.881, "step": 213 }, { "epoch": 0.38, "learning_rate": 0.00045724583407725556, "loss": 0.9066, "step": 214 }, { "epoch": 0.38, "learning_rate": 0.0004555510130122151, "loss": 0.7825, "step": 215 }, { "epoch": 0.38, "learning_rate": 0.0004538519479448095, "loss": 0.8257, "step": 216 }, { "epoch": 0.38, "learning_rate": 0.00045214869410889724, "loss": 0.7501, "step": 217 }, { "epoch": 0.39, "learning_rate": 0.0004504413068745068, "loss": 0.8188, "step": 218 }, { "epoch": 0.39, "learning_rate": 0.0004487298417460368, "loss": 0.8416, "step": 219 }, { "epoch": 0.39, "learning_rate": 0.00044701435436045133, "loss": 0.7909, "step": 220 }, { "epoch": 0.39, "learning_rate": 0.0004452949004854722, "loss": 0.8351, "step": 221 }, { "epoch": 0.39, "learning_rate": 0.00044357153601776454, "loss": 0.7758, "step": 222 }, { "epoch": 0.4, "learning_rate": 0.0004418443169811211, "loss": 0.8534, "step": 223 }, { "epoch": 0.4, "learning_rate": 0.00044011329952464045, "loss": 0.8572, "step": 224 }, { "epoch": 0.4, "learning_rate": 0.00043837853992090124, "loss": 0.8721, "step": 225 }, { "epoch": 0.4, "learning_rate": 0.00043664009456413367, "loss": 0.7843, "step": 226 }, { "epoch": 0.4, "learning_rate": 0.0004348980199683856, "loss": 0.8812, "step": 227 }, { "epoch": 0.4, "learning_rate": 0.0004331523727656857, "loss": 0.8069, "step": 228 }, { "epoch": 0.41, "learning_rate": 0.0004314032097042021, "loss": 0.7687, "step": 229 }, { "epoch": 0.41, "learning_rate": 0.00042965058764639813, "loss": 0.8415, "step": 230 }, { "epoch": 0.41, "learning_rate": 0.00042789456356718343, "loss": 0.7969, "step": 231 }, { "epoch": 0.41, "learning_rate": 0.0004261351945520616, "loss": 0.7946, "step": 232 }, { "epoch": 0.41, "learning_rate": 0.00042437253779527485, "loss": 0.7948, "step": 233 }, { "epoch": 0.42, "learning_rate": 0.00042260665059794467, "loss": 0.8782, "step": 234 }, { "epoch": 0.42, "learning_rate": 0.0004208375903662087, "loss": 0.7703, "step": 235 }, { "epoch": 0.42, "learning_rate": 0.00041906541460935524, "loss": 0.8175, "step": 236 }, { "epoch": 0.42, "learning_rate": 0.0004172901809379527, "loss": 0.8762, "step": 237 }, { "epoch": 0.42, "learning_rate": 0.0004155119470619779, "loss": 0.7611, "step": 238 }, { "epoch": 0.42, "learning_rate": 0.00041373077078893887, "loss": 0.8024, "step": 239 }, { "epoch": 0.43, "learning_rate": 0.0004119467100219968, "loss": 0.8315, "step": 240 }, { "epoch": 0.43, "learning_rate": 0.0004101598227580827, "loss": 0.8179, "step": 241 }, { "epoch": 0.43, "learning_rate": 0.0004083701670860126, "loss": 0.7895, "step": 242 }, { "epoch": 0.43, "learning_rate": 0.0004065778011845991, "loss": 0.8286, "step": 243 }, { "epoch": 0.43, "learning_rate": 0.0004047827833207597, "loss": 0.7763, "step": 244 }, { "epoch": 0.43, "learning_rate": 0.0004029851718476232, "loss": 0.8347, "step": 245 }, { "epoch": 0.44, "learning_rate": 0.0004011850252026321, "loss": 0.8407, "step": 246 }, { "epoch": 0.44, "learning_rate": 0.0003993824019056437, "loss": 0.6947, "step": 247 }, { "epoch": 0.44, "learning_rate": 0.0003975773605570268, "loss": 0.8137, "step": 248 }, { "epoch": 0.44, "learning_rate": 0.0003957699598357574, "loss": 0.7397, "step": 249 }, { "epoch": 0.44, "learning_rate": 0.00039396025849751105, "loss": 0.7993, "step": 250 }, { "epoch": 0.45, "learning_rate": 0.0003921483153727521, "loss": 0.8487, "step": 251 }, { "epoch": 0.45, "learning_rate": 0.0003903341893648222, "loss": 0.8079, "step": 252 }, { "epoch": 0.45, "learning_rate": 0.00038851793944802497, "loss": 0.8078, "step": 253 }, { "epoch": 0.45, "learning_rate": 0.0003866996246657087, "loss": 0.8757, "step": 254 }, { "epoch": 0.45, "learning_rate": 0.0003848793041283472, "loss": 0.8695, "step": 255 }, { "epoch": 0.45, "learning_rate": 0.0003830570370116183, "loss": 0.8306, "step": 256 }, { "epoch": 0.46, "learning_rate": 0.0003812328825544796, "loss": 0.7502, "step": 257 }, { "epoch": 0.46, "learning_rate": 0.00037940690005724336, "loss": 0.8467, "step": 258 }, { "epoch": 0.46, "learning_rate": 0.0003775791488796486, "loss": 0.7893, "step": 259 }, { "epoch": 0.46, "learning_rate": 0.0003757496884389308, "loss": 0.7922, "step": 260 }, { "epoch": 0.46, "learning_rate": 0.00037391857820789123, "loss": 0.8137, "step": 261 }, { "epoch": 0.46, "learning_rate": 0.00037208587771296326, "loss": 0.8322, "step": 262 }, { "epoch": 0.47, "learning_rate": 0.00037025164653227676, "loss": 0.8077, "step": 263 }, { "epoch": 0.47, "learning_rate": 0.0003684159442937219, "loss": 0.8906, "step": 264 }, { "epoch": 0.47, "learning_rate": 0.0003665788306730106, "loss": 0.8189, "step": 265 }, { "epoch": 0.47, "learning_rate": 0.00036474036539173673, "loss": 0.7934, "step": 266 }, { "epoch": 0.47, "learning_rate": 0.00036290060821543406, "loss": 0.8353, "step": 267 }, { "epoch": 0.48, "learning_rate": 0.00036105961895163387, "loss": 0.8887, "step": 268 }, { "epoch": 0.48, "learning_rate": 0.00035921745744792096, "loss": 0.8221, "step": 269 }, { "epoch": 0.48, "learning_rate": 0.0003573741835899873, "loss": 0.8948, "step": 270 }, { "epoch": 0.48, "learning_rate": 0.0003555298572996861, "loss": 0.8042, "step": 271 }, { "epoch": 0.48, "learning_rate": 0.00035368453853308303, "loss": 0.8296, "step": 272 }, { "epoch": 0.48, "learning_rate": 0.00035183828727850804, "loss": 0.809, "step": 273 }, { "epoch": 0.49, "learning_rate": 0.0003499911635546043, "loss": 0.7516, "step": 274 }, { "epoch": 0.49, "learning_rate": 0.00034814322740837764, "loss": 0.8407, "step": 275 }, { "epoch": 0.49, "learning_rate": 0.0003462945389132449, "loss": 0.7767, "step": 276 }, { "epoch": 0.49, "learning_rate": 0.0003444451581670798, "loss": 0.826, "step": 277 }, { "epoch": 0.49, "learning_rate": 0.0003425951452902607, "loss": 0.8608, "step": 278 }, { "epoch": 0.49, "learning_rate": 0.0003407445604237151, "loss": 0.8421, "step": 279 }, { "epoch": 0.5, "learning_rate": 0.0003388934637269651, "loss": 0.7686, "step": 280 }, { "epoch": 0.5, "learning_rate": 0.0003370419153761715, "loss": 0.7972, "step": 281 }, { "epoch": 0.5, "learning_rate": 0.00033518997556217776, "loss": 0.8238, "step": 282 }, { "epoch": 0.5, "learning_rate": 0.00033333770448855317, "loss": 0.7767, "step": 283 }, { "epoch": 0.5, "learning_rate": 0.0003314851623696355, "loss": 0.8312, "step": 284 }, { "epoch": 0.51, "learning_rate": 0.00032963240942857416, "loss": 0.7618, "step": 285 }, { "epoch": 0.51, "learning_rate": 0.00032777950589537176, "loss": 0.7653, "step": 286 }, { "epoch": 0.51, "learning_rate": 0.00032592651200492634, "loss": 0.8921, "step": 287 }, { "epoch": 0.51, "learning_rate": 0.00032407348799507374, "loss": 0.8412, "step": 288 }, { "epoch": 0.51, "learning_rate": 0.0003222204941046283, "loss": 0.787, "step": 289 }, { "epoch": 0.51, "learning_rate": 0.00032036759057142586, "loss": 0.7834, "step": 290 }, { "epoch": 0.52, "learning_rate": 0.0003185148376303645, "loss": 0.8221, "step": 291 }, { "epoch": 0.52, "learning_rate": 0.00031666229551144685, "loss": 0.8025, "step": 292 }, { "epoch": 0.52, "learning_rate": 0.00031481002443782227, "loss": 0.8136, "step": 293 }, { "epoch": 0.52, "learning_rate": 0.0003129580846238285, "loss": 0.8188, "step": 294 }, { "epoch": 0.52, "learning_rate": 0.000311106536273035, "loss": 0.7241, "step": 295 }, { "epoch": 0.53, "learning_rate": 0.0003092554395762849, "loss": 0.8234, "step": 296 }, { "epoch": 0.53, "learning_rate": 0.0003074048547097393, "loss": 0.8827, "step": 297 }, { "epoch": 0.53, "learning_rate": 0.0003055548418329201, "loss": 0.766, "step": 298 }, { "epoch": 0.53, "learning_rate": 0.00030370546108675513, "loss": 0.753, "step": 299 }, { "epoch": 0.53, "learning_rate": 0.0003018567725916224, "loss": 0.7767, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0003000088364453958, "loss": 0.7809, "step": 301 }, { "epoch": 0.54, "learning_rate": 0.000298161712721492, "loss": 0.8679, "step": 302 }, { "epoch": 0.54, "learning_rate": 0.00029631546146691694, "loss": 0.8464, "step": 303 }, { "epoch": 0.54, "learning_rate": 0.00029447014270031393, "loss": 0.7181, "step": 304 }, { "epoch": 0.54, "learning_rate": 0.0002926258164100127, "loss": 0.9354, "step": 305 }, { "epoch": 0.54, "learning_rate": 0.00029078254255207906, "loss": 0.8151, "step": 306 }, { "epoch": 0.54, "learning_rate": 0.00028894038104836615, "loss": 0.8382, "step": 307 }, { "epoch": 0.55, "learning_rate": 0.000287099391784566, "loss": 0.7495, "step": 308 }, { "epoch": 0.55, "learning_rate": 0.0002852596346082633, "loss": 0.8873, "step": 309 }, { "epoch": 0.55, "learning_rate": 0.0002834211693269893, "loss": 0.7534, "step": 310 }, { "epoch": 0.55, "learning_rate": 0.0002815840557062782, "loss": 0.7908, "step": 311 }, { "epoch": 0.55, "learning_rate": 0.00027974835346772337, "loss": 0.7738, "step": 312 }, { "epoch": 0.56, "learning_rate": 0.00027791412228703676, "loss": 0.7945, "step": 313 }, { "epoch": 0.56, "learning_rate": 0.00027608142179210874, "loss": 0.7678, "step": 314 }, { "epoch": 0.56, "learning_rate": 0.00027425031156106923, "loss": 0.8487, "step": 315 }, { "epoch": 0.56, "learning_rate": 0.00027242085112035144, "loss": 0.7736, "step": 316 }, { "epoch": 0.56, "learning_rate": 0.0002705930999427566, "loss": 0.8068, "step": 317 }, { "epoch": 0.56, "learning_rate": 0.00026876711744552043, "loss": 0.8051, "step": 318 }, { "epoch": 0.57, "learning_rate": 0.00026694296298838174, "loss": 0.7384, "step": 319 }, { "epoch": 0.57, "learning_rate": 0.0002651206958716527, "loss": 0.8203, "step": 320 }, { "epoch": 0.57, "learning_rate": 0.00026330037533429127, "loss": 0.7676, "step": 321 }, { "epoch": 0.57, "learning_rate": 0.000261482060551975, "loss": 0.7505, "step": 322 }, { "epoch": 0.57, "learning_rate": 0.0002596658106351778, "loss": 0.8522, "step": 323 }, { "epoch": 0.57, "learning_rate": 0.000257851684627248, "loss": 0.8943, "step": 324 }, { "epoch": 0.58, "learning_rate": 0.00025603974150248903, "loss": 0.8092, "step": 325 }, { "epoch": 0.58, "learning_rate": 0.0002542300401642426, "loss": 0.8543, "step": 326 }, { "epoch": 0.58, "learning_rate": 0.0002524226394429732, "loss": 0.8052, "step": 327 }, { "epoch": 0.58, "learning_rate": 0.0002506175980943563, "loss": 0.7748, "step": 328 }, { "epoch": 0.58, "learning_rate": 0.00024881497479736786, "loss": 0.784, "step": 329 }, { "epoch": 0.59, "learning_rate": 0.0002470148281523768, "loss": 0.7701, "step": 330 }, { "epoch": 0.59, "learning_rate": 0.0002452172166792403, "loss": 0.7336, "step": 331 }, { "epoch": 0.59, "learning_rate": 0.00024342219881540086, "loss": 0.7608, "step": 332 }, { "epoch": 0.59, "learning_rate": 0.00024162983291398736, "loss": 0.8646, "step": 333 }, { "epoch": 0.59, "learning_rate": 0.00023984017724191725, "loss": 0.7821, "step": 334 }, { "epoch": 0.59, "learning_rate": 0.00023805328997800329, "loss": 0.753, "step": 335 }, { "epoch": 0.6, "learning_rate": 0.0002362692292110612, "loss": 0.7984, "step": 336 }, { "epoch": 0.6, "learning_rate": 0.00023448805293802222, "loss": 0.7831, "step": 337 }, { "epoch": 0.6, "learning_rate": 0.00023270981906204732, "loss": 0.738, "step": 338 }, { "epoch": 0.6, "learning_rate": 0.00023093458539064478, "loss": 0.8652, "step": 339 }, { "epoch": 0.6, "learning_rate": 0.00022916240963379128, "loss": 0.7252, "step": 340 }, { "epoch": 0.6, "learning_rate": 0.0002273933494020554, "loss": 0.7484, "step": 341 }, { "epoch": 0.61, "learning_rate": 0.00022562746220472518, "loss": 0.7634, "step": 342 }, { "epoch": 0.61, "learning_rate": 0.00022386480544793846, "loss": 0.8458, "step": 343 }, { "epoch": 0.61, "learning_rate": 0.00022210543643281656, "loss": 0.7653, "step": 344 }, { "epoch": 0.61, "learning_rate": 0.00022034941235360179, "loss": 0.7125, "step": 345 }, { "epoch": 0.61, "learning_rate": 0.00021859679029579784, "loss": 0.7616, "step": 346 }, { "epoch": 0.62, "learning_rate": 0.0002168476272343144, "loss": 0.7459, "step": 347 }, { "epoch": 0.62, "learning_rate": 0.00021510198003161447, "loss": 0.8287, "step": 348 }, { "epoch": 0.62, "learning_rate": 0.00021335990543586635, "loss": 0.7626, "step": 349 }, { "epoch": 0.62, "learning_rate": 0.00021162146007909881, "loss": 0.6894, "step": 350 }, { "epoch": 0.62, "learning_rate": 0.0002098867004753596, "loss": 0.7187, "step": 351 }, { "epoch": 0.62, "learning_rate": 0.00020815568301887888, "loss": 0.7867, "step": 352 }, { "epoch": 0.63, "learning_rate": 0.00020642846398223548, "loss": 0.7222, "step": 353 }, { "epoch": 0.63, "learning_rate": 0.0002047050995145278, "loss": 0.8687, "step": 354 }, { "epoch": 0.63, "learning_rate": 0.00020298564563954856, "loss": 0.781, "step": 355 }, { "epoch": 0.63, "learning_rate": 0.00020127015825396319, "loss": 0.8546, "step": 356 }, { "epoch": 0.63, "learning_rate": 0.00019955869312549316, "loss": 0.8654, "step": 357 }, { "epoch": 0.64, "learning_rate": 0.00019785130589110279, "loss": 0.8679, "step": 358 }, { "epoch": 0.64, "learning_rate": 0.00019614805205519055, "loss": 0.7049, "step": 359 }, { "epoch": 0.64, "learning_rate": 0.000194448986987785, "loss": 0.7861, "step": 360 }, { "epoch": 0.64, "learning_rate": 0.00019275416592274446, "loss": 0.8216, "step": 361 }, { "epoch": 0.64, "learning_rate": 0.0001910636439559618, "loss": 0.7813, "step": 362 }, { "epoch": 0.64, "learning_rate": 0.0001893774760435728, "loss": 0.7393, "step": 363 }, { "epoch": 0.65, "learning_rate": 0.0001876957170001704, "loss": 0.8036, "step": 364 }, { "epoch": 0.65, "learning_rate": 0.00018601842149702203, "loss": 0.8428, "step": 365 }, { "epoch": 0.65, "learning_rate": 0.00018434564406029258, "loss": 0.7555, "step": 366 }, { "epoch": 0.65, "learning_rate": 0.00018267743906927238, "loss": 0.7521, "step": 367 }, { "epoch": 0.65, "learning_rate": 0.00018101386075460843, "loss": 0.7631, "step": 368 }, { "epoch": 0.65, "learning_rate": 0.0001793549631965421, "loss": 0.7697, "step": 369 }, { "epoch": 0.66, "learning_rate": 0.00017770080032315127, "loss": 0.7109, "step": 370 }, { "epoch": 0.66, "learning_rate": 0.00017605142590859622, "loss": 0.8004, "step": 371 }, { "epoch": 0.66, "learning_rate": 0.00017440689357137265, "loss": 0.7539, "step": 372 }, { "epoch": 0.66, "learning_rate": 0.00017276725677256824, "loss": 0.7791, "step": 373 }, { "epoch": 0.66, "learning_rate": 0.00017113256881412428, "loss": 0.7562, "step": 374 }, { "epoch": 0.67, "learning_rate": 0.00016950288283710345, "loss": 0.7569, "step": 375 }, { "epoch": 0.67, "learning_rate": 0.00016787825181996193, "loss": 0.7316, "step": 376 }, { "epoch": 0.67, "learning_rate": 0.00016625872857682782, "loss": 0.7922, "step": 377 }, { "epoch": 0.67, "learning_rate": 0.00016464436575578314, "loss": 0.779, "step": 378 }, { "epoch": 0.67, "learning_rate": 0.00016303521583715312, "loss": 0.757, "step": 379 }, { "epoch": 0.67, "learning_rate": 0.00016143133113180028, "loss": 0.7411, "step": 380 }, { "epoch": 0.68, "learning_rate": 0.00015983276377942314, "loss": 0.7369, "step": 381 }, { "epoch": 0.68, "learning_rate": 0.00015823956574686192, "loss": 0.754, "step": 382 }, { "epoch": 0.68, "learning_rate": 0.00015665178882640884, "loss": 0.7461, "step": 383 }, { "epoch": 0.68, "learning_rate": 0.00015506948463412447, "loss": 0.7724, "step": 384 }, { "epoch": 0.68, "learning_rate": 0.00015349270460816004, "loss": 0.8327, "step": 385 }, { "epoch": 0.68, "learning_rate": 0.00015192150000708474, "loss": 0.7992, "step": 386 }, { "epoch": 0.69, "learning_rate": 0.0001503559219082202, "loss": 0.7249, "step": 387 }, { "epoch": 0.69, "learning_rate": 0.00014879602120597911, "loss": 0.8001, "step": 388 }, { "epoch": 0.69, "learning_rate": 0.0001472418486102113, "loss": 0.7667, "step": 389 }, { "epoch": 0.69, "learning_rate": 0.00014569345464455512, "loss": 0.8302, "step": 390 }, { "epoch": 0.69, "learning_rate": 0.00014415088964479526, "loss": 0.8082, "step": 391 }, { "epoch": 0.7, "learning_rate": 0.00014261420375722575, "loss": 0.7399, "step": 392 }, { "epoch": 0.7, "learning_rate": 0.00014108344693702026, "loss": 0.7786, "step": 393 }, { "epoch": 0.7, "learning_rate": 0.00013955866894660855, "loss": 0.7507, "step": 394 }, { "epoch": 0.7, "learning_rate": 0.00013803991935405755, "loss": 0.8151, "step": 395 }, { "epoch": 0.7, "learning_rate": 0.00013652724753146102, "loss": 0.7722, "step": 396 }, { "epoch": 0.7, "learning_rate": 0.00013502070265333462, "loss": 0.7904, "step": 397 }, { "epoch": 0.71, "learning_rate": 0.00013352033369501653, "loss": 0.8405, "step": 398 }, { "epoch": 0.71, "learning_rate": 0.00013202618943107576, "loss": 0.7437, "step": 399 }, { "epoch": 0.71, "learning_rate": 0.00013053831843372666, "loss": 0.8447, "step": 400 }, { "epoch": 0.71, "learning_rate": 0.00012905676907125005, "loss": 0.7949, "step": 401 }, { "epoch": 0.71, "learning_rate": 0.00012758158950642028, "loss": 0.7259, "step": 402 }, { "epoch": 0.71, "learning_rate": 0.00012611282769493999, "loss": 0.7557, "step": 403 }, { "epoch": 0.72, "learning_rate": 0.00012465053138388098, "loss": 0.8465, "step": 404 }, { "epoch": 0.72, "learning_rate": 0.0001231947481101325, "loss": 0.7576, "step": 405 }, { "epoch": 0.72, "learning_rate": 0.0001217455251988548, "loss": 0.7945, "step": 406 }, { "epoch": 0.72, "learning_rate": 0.0001203029097619417, "loss": 0.7446, "step": 407 }, { "epoch": 0.72, "learning_rate": 0.00011886694869648893, "loss": 0.8205, "step": 408 }, { "epoch": 0.73, "learning_rate": 0.00011743768868326904, "loss": 0.7842, "step": 409 }, { "epoch": 0.73, "learning_rate": 0.00011601517618521416, "loss": 0.868, "step": 410 }, { "epoch": 0.73, "learning_rate": 0.00011459945744590603, "loss": 0.8414, "step": 411 }, { "epoch": 0.73, "learning_rate": 0.00011319057848807186, "loss": 0.7259, "step": 412 }, { "epoch": 0.73, "learning_rate": 0.00011178858511208877, "loss": 0.8071, "step": 413 }, { "epoch": 0.73, "learning_rate": 0.00011039352289449466, "loss": 0.8765, "step": 414 }, { "epoch": 0.74, "learning_rate": 0.00010900543718650697, "loss": 0.7724, "step": 415 }, { "epoch": 0.74, "learning_rate": 0.00010762437311254777, "loss": 0.7945, "step": 416 }, { "epoch": 0.74, "learning_rate": 0.00010625037556877727, "loss": 0.8228, "step": 417 }, { "epoch": 0.74, "learning_rate": 0.00010488348922163421, "loss": 0.7729, "step": 418 }, { "epoch": 0.74, "learning_rate": 0.00010352375850638382, "loss": 0.7894, "step": 419 }, { "epoch": 0.75, "learning_rate": 0.00010217122762567324, "loss": 0.7933, "step": 420 }, { "epoch": 0.75, "learning_rate": 0.00010082594054809488, "loss": 0.7888, "step": 421 }, { "epoch": 0.75, "learning_rate": 9.948794100675652e-05, "loss": 0.7749, "step": 422 }, { "epoch": 0.75, "learning_rate": 9.815727249786008e-05, "loss": 0.8214, "step": 423 }, { "epoch": 0.75, "learning_rate": 9.683397827928737e-05, "loss": 0.8181, "step": 424 }, { "epoch": 0.75, "learning_rate": 9.55181013691941e-05, "loss": 0.7482, "step": 425 }, { "epoch": 0.76, "learning_rate": 9.420968454461113e-05, "loss": 0.812, "step": 426 }, { "epoch": 0.76, "learning_rate": 9.290877034005409e-05, "loss": 0.7501, "step": 427 }, { "epoch": 0.76, "learning_rate": 9.161540104614033e-05, "loss": 0.8509, "step": 428 }, { "epoch": 0.76, "learning_rate": 9.032961870821493e-05, "loss": 0.7103, "step": 429 }, { "epoch": 0.76, "learning_rate": 8.905146512498254e-05, "loss": 0.7894, "step": 430 }, { "epoch": 0.76, "learning_rate": 8.778098184714977e-05, "loss": 0.7947, "step": 431 }, { "epoch": 0.77, "learning_rate": 8.65182101760741e-05, "loss": 0.7175, "step": 432 }, { "epoch": 0.77, "learning_rate": 8.526319116242084e-05, "loss": 0.774, "step": 433 }, { "epoch": 0.77, "learning_rate": 8.401596560482902e-05, "loss": 0.8459, "step": 434 }, { "epoch": 0.77, "learning_rate": 8.277657404858523e-05, "loss": 0.7551, "step": 435 }, { "epoch": 0.77, "learning_rate": 8.154505678430514e-05, "loss": 0.8399, "step": 436 }, { "epoch": 0.78, "learning_rate": 8.032145384662393e-05, "loss": 0.7648, "step": 437 }, { "epoch": 0.78, "learning_rate": 7.91058050128947e-05, "loss": 0.7743, "step": 438 }, { "epoch": 0.78, "learning_rate": 7.789814980189591e-05, "loss": 0.7609, "step": 439 }, { "epoch": 0.78, "learning_rate": 7.669852747254598e-05, "loss": 0.8661, "step": 440 }, { "epoch": 0.78, "learning_rate": 7.550697702262705e-05, "loss": 0.8407, "step": 441 }, { "epoch": 0.78, "learning_rate": 7.432353718751805e-05, "loss": 0.7823, "step": 442 }, { "epoch": 0.79, "learning_rate": 7.314824643893467e-05, "loss": 0.693, "step": 443 }, { "epoch": 0.79, "learning_rate": 7.198114298367899e-05, "loss": 0.8215, "step": 444 }, { "epoch": 0.79, "learning_rate": 7.08222647623973e-05, "loss": 0.7529, "step": 445 }, { "epoch": 0.79, "learning_rate": 6.967164944834717e-05, "loss": 0.7446, "step": 446 }, { "epoch": 0.79, "learning_rate": 6.852933444617213e-05, "loss": 0.728, "step": 447 }, { "epoch": 0.79, "learning_rate": 6.739535689068595e-05, "loss": 0.7269, "step": 448 }, { "epoch": 0.8, "learning_rate": 6.626975364566577e-05, "loss": 0.772, "step": 449 }, { "epoch": 0.8, "learning_rate": 6.515256130265319e-05, "loss": 0.79, "step": 450 }, { "epoch": 0.8, "learning_rate": 6.404381617976511e-05, "loss": 0.7689, "step": 451 }, { "epoch": 0.8, "learning_rate": 6.29435543205129e-05, "loss": 0.7993, "step": 452 }, { "epoch": 0.8, "learning_rate": 6.18518114926307e-05, "loss": 0.7223, "step": 453 }, { "epoch": 0.81, "learning_rate": 6.07686231869128e-05, "loss": 0.7739, "step": 454 }, { "epoch": 0.81, "learning_rate": 5.9694024616059735e-05, "loss": 0.7768, "step": 455 }, { "epoch": 0.81, "learning_rate": 5.862805071353384e-05, "loss": 0.7749, "step": 456 }, { "epoch": 0.81, "learning_rate": 5.757073613242314e-05, "loss": 0.8183, "step": 457 }, { "epoch": 0.81, "learning_rate": 5.6522115244315215e-05, "loss": 0.7616, "step": 458 }, { "epoch": 0.81, "learning_rate": 5.548222213817996e-05, "loss": 0.7366, "step": 459 }, { "epoch": 0.82, "learning_rate": 5.445109061926091e-05, "loss": 0.8308, "step": 460 }, { "epoch": 0.82, "learning_rate": 5.3428754207976695e-05, "loss": 0.8053, "step": 461 }, { "epoch": 0.82, "learning_rate": 5.2415246138831057e-05, "loss": 0.8413, "step": 462 }, { "epoch": 0.82, "learning_rate": 5.141059935933293e-05, "loss": 0.8198, "step": 463 }, { "epoch": 0.82, "learning_rate": 5.041484652892487e-05, "loss": 0.7564, "step": 464 }, { "epoch": 0.82, "learning_rate": 4.9428020017921235e-05, "loss": 0.783, "step": 465 }, { "epoch": 0.83, "learning_rate": 4.84501519064567e-05, "loss": 0.7834, "step": 466 }, { "epoch": 0.83, "learning_rate": 4.7481273983442496e-05, "loss": 0.7552, "step": 467 }, { "epoch": 0.83, "learning_rate": 4.6521417745533556e-05, "loss": 0.8159, "step": 468 }, { "epoch": 0.83, "learning_rate": 4.557061439610419e-05, "loss": 0.815, "step": 469 }, { "epoch": 0.83, "learning_rate": 4.462889484423422e-05, "loss": 0.7664, "step": 470 }, { "epoch": 0.84, "learning_rate": 4.369628970370372e-05, "loss": 0.8062, "step": 471 }, { "epoch": 0.84, "learning_rate": 4.2772829291997916e-05, "loss": 0.7135, "step": 472 }, { "epoch": 0.84, "learning_rate": 4.1858543629321956e-05, "loss": 0.7692, "step": 473 }, { "epoch": 0.84, "learning_rate": 4.095346243762447e-05, "loss": 0.7481, "step": 474 }, { "epoch": 0.84, "learning_rate": 4.0057615139631764e-05, "loss": 0.7913, "step": 475 }, { "epoch": 0.84, "learning_rate": 3.9171030857891087e-05, "loss": 0.7626, "step": 476 }, { "epoch": 0.85, "learning_rate": 3.8293738413824065e-05, "loss": 0.8227, "step": 477 }, { "epoch": 0.85, "learning_rate": 3.742576632678971e-05, "loss": 0.7575, "step": 478 }, { "epoch": 0.85, "learning_rate": 3.656714281315718e-05, "loss": 0.7689, "step": 479 }, { "epoch": 0.85, "learning_rate": 3.571789578538891e-05, "loss": 0.7818, "step": 480 }, { "epoch": 0.85, "learning_rate": 3.487805285113266e-05, "loss": 0.8032, "step": 481 }, { "epoch": 0.86, "learning_rate": 3.404764131232438e-05, "loss": 0.7684, "step": 482 }, { "epoch": 0.86, "learning_rate": 3.322668816430062e-05, "loss": 0.7989, "step": 483 }, { "epoch": 0.86, "learning_rate": 3.241522009492107e-05, "loss": 0.7373, "step": 484 }, { "epoch": 0.86, "learning_rate": 3.161326348370065e-05, "loss": 0.7821, "step": 485 }, { "epoch": 0.86, "learning_rate": 3.082084440095212e-05, "loss": 0.7874, "step": 486 }, { "epoch": 0.86, "learning_rate": 3.003798860693901e-05, "loss": 0.8324, "step": 487 }, { "epoch": 0.87, "learning_rate": 2.9264721551037228e-05, "loss": 0.784, "step": 488 }, { "epoch": 0.87, "learning_rate": 2.8501068370908602e-05, "loss": 0.7433, "step": 489 }, { "epoch": 0.87, "learning_rate": 2.774705389168354e-05, "loss": 0.7602, "step": 490 }, { "epoch": 0.87, "learning_rate": 2.700270262515363e-05, "loss": 0.8091, "step": 491 }, { "epoch": 0.87, "learning_rate": 2.6268038768975084e-05, "loss": 0.7597, "step": 492 }, { "epoch": 0.87, "learning_rate": 2.554308620588212e-05, "loss": 0.8004, "step": 493 }, { "epoch": 0.88, "learning_rate": 2.482786850291056e-05, "loss": 0.7236, "step": 494 }, { "epoch": 0.88, "learning_rate": 2.4122408910631542e-05, "loss": 0.765, "step": 495 }, { "epoch": 0.88, "learning_rate": 2.342673036239589e-05, "loss": 0.7404, "step": 496 }, { "epoch": 0.88, "learning_rate": 2.2740855473588397e-05, "loss": 0.7428, "step": 497 }, { "epoch": 0.88, "learning_rate": 2.206480654089295e-05, "loss": 0.8271, "step": 498 }, { "epoch": 0.89, "learning_rate": 2.1398605541567217e-05, "loss": 0.779, "step": 499 }, { "epoch": 0.89, "learning_rate": 2.0742274132728692e-05, "loss": 0.7889, "step": 500 } ], "logging_steps": 1, "max_steps": 563, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "total_flos": 7.581847438968422e+17, "train_batch_size": 3, "trial_name": null, "trial_params": null }